Add support for hypernyms and hyponyms.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Sun, 23 Oct 2016 20:01:22 +0300
changeset 618 6ad7203ac9dc
parent 617 ec1c2838feae
child 619 72ed960bcf10
Add support for hypernyms and hyponyms.
contrib/gadict.el
py/gadict.py
py/gadict_c5.py
py/gadict_srs_anki.py
www/HACKING.rst
--- a/contrib/gadict.el	Sun Oct 23 19:57:02 2016 +0300
+++ b/contrib/gadict.el	Sun Oct 23 20:01:22 2016 +0300
@@ -23,7 +23,7 @@
 ;;; Code:
 
 (defconst gadict--art-lang-regex (regexp-opt '("en" "ru" "uk" "la")))
-(defconst gadict--art-rel-regex (regexp-opt '("ant" "syn" "rel" "topic")))
+(defconst gadict--art-rel-regex (regexp-opt '("ant" "syn" "rel" "topic" "hyper" "hypo")))
 (defconst gadict--art-var-regex (regexp-opt '("v1" "v2" "v3" "s" "pl" "male" "female" "abbr" "comp" "super" "Am" "Br" "Au")))
 (defconst gadict--art-pos-regex (regexp-opt '("n" "v" "adj" "adv" "pron" "prep" "num" "conj" "int" "phr" "phr.v" "contr" "abbr" "prefix")))
 
--- a/py/gadict.py	Sun Oct 23 19:57:02 2016 +0300
+++ b/py/gadict.py	Sun Oct 23 20:01:22 2016 +0300
@@ -45,7 +45,7 @@
 
 class Sense:
 
-    def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None):
+    def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None):
         if not pos:
             raise ParseException("Part of speech expected...\n")
         self.pos = pos
@@ -56,6 +56,8 @@
         self.syn_list = syn_list
         self.rel_list = rel_list
         self.topic_list = topic_list
+        self.hyper_list = hyper_list
+        self.hypo_list = hypo_list
 
     def add_tr(self, tr):
         if self.tr_list:
@@ -99,6 +101,18 @@
         else:
             self.topic_list = [topic]
 
+    def add_hyper(self, hyper):
+        if self.hyper_list:
+            self.hyper_list.append(hyper)
+        else:
+            self.hyper_list = [hyper]
+
+    def add_hypo(self, hypo):
+        if self.hypo_list:
+            self.hypo_list.append(hypo)
+        else:
+            self.hypo_list = [hypo]
+
     def __str__(self):
         if tr_list:
             (lang, text) = self.tr_list[0]
@@ -118,12 +132,14 @@
     HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
     TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
-    TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
+    TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
     TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$")
     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
     REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
+    HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
+    HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
 
     CONT_RE = regex.compile(u"^ +(.*)")
 
@@ -340,6 +356,24 @@
                         raise ParseException("""Empty relation...""")
                     sense.add_rel(rel)
                 continue
+            m = self.HYPER_RE.match(self.line)
+            if m is not None:
+                hypers = m.group(1).split(";")
+                for hyper in hypers:
+                    hyper = hyper.strip()
+                    if len(hyper) == 0:
+                        raise ParseException("""Empty hypernym...""")
+                    sense.add_hyper(hyper)
+                continue
+            m = self.HYPO_RE.match(self.line)
+            if m is not None:
+                hypos = m.group(1).split(";")
+                for hypo in hypos:
+                    hypo = hypo.strip()
+                    if len(hypo) == 0:
+                        raise ParseException("""Empty hyponym...""")
+                    sense.add_hypo(hypo)
+                continue
             m = self.TRANSL_RE.match(self.line)
             if m is not None:
                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
--- a/py/gadict_c5.py	Sun Oct 23 19:57:02 2016 +0300
+++ b/py/gadict_c5.py	Sun Oct 23 20:01:22 2016 +0300
@@ -102,6 +102,18 @@
                 FOUT.write(" syn: ")
                 FOUT.write("; ".join(["{"+s+"}" for s in sense.syn_list]))
                 need_sep = True
+            if sense.hyper_list and len(sense.hyper_list) > 0:
+                if need_sep:
+                    FOUT.write(" |")
+                FOUT.write(" hyper: ")
+                FOUT.write("; ".join(["{"+s+"}" for s in sense.hyper_list]))
+                need_sep = True
+            if sense.hypo_list and len(sense.hypo_list) > 0:
+                if need_sep:
+                    FOUT.write(" |")
+                FOUT.write(" hypo: ")
+                FOUT.write("; ".join(["{"+s+"}" for s in sense.hypo_list]))
+                need_sep = True
             if sense.rel_list and len(sense.rel_list) > 0:
                 if need_sep:
                     FOUT.write(" |")
--- a/py/gadict_srs_anki.py	Sun Oct 23 19:57:02 2016 +0300
+++ b/py/gadict_srs_anki.py	Sun Oct 23 20:01:22 2016 +0300
@@ -103,6 +103,9 @@
   color: red;
   font-style: italic;
 }
+.topic {
+  color: #B04080;
+}
 .ant {
   color: #404080;
 }
@@ -110,7 +113,13 @@
   color: #804040;
 }
 .rel {
-  color: #804080;
+  color: #604080;
+}
+.hyper {
+  color: #600080;
+}
+.hypo {
+  color: #606080;
 }
 .attrs {
   color: blue;
@@ -192,6 +201,16 @@
         buf.append(" <span class='syn'>syn: ")
         buf.append("; ".join(sense.syn_list))
         buf.append("</span>")
+    if sense.hyper_list and len(sense.hyper_list) > 0:
+        have_ref = True
+        buf.append(" <span class='hyper'>hyper: ")
+        buf.append("; ".join(sense.hyper_list))
+        buf.append("</span>")
+    if sense.hypo_list and len(sense.hypo_list) > 0:
+        have_ref = True
+        buf.append(" <span class='hypo'>hypo: ")
+        buf.append("; ".join(sense.hypo_list))
+        buf.append("</span>")
     if sense.rel_list and len(sense.rel_list) > 0:
         have_ref = True
         buf.append(" <span class='rel'>rel: ")
--- a/www/HACKING.rst	Sun Oct 23 19:57:02 2016 +0300
+++ b/www/HACKING.rst	Sun Oct 23 20:01:22 2016 +0300
@@ -118,9 +118,16 @@
 * ``music``
 * ``meal``, ``office``, etc
 * ``size``, ``shape``, ``age``, ``color``
+* ``archaic`` - old fashioned, no longer used
 
-Synonyms marked by ``syn:``, antonyms marked by ``ant:``, related (see also)
-terms marked by ``rel:``, topics/tags marked by ``topic:``.
+Word relations:
+
+* ``syn:`` - synonyms
+* ``ant:`` - antonyms
+* ``hyper:`` - hypernyms
+* ``hypo:`` - hyponyms
+* ``rel:`` - related (see also) terms
+* ``topic:`` - topics/tags
 
 Translation marked by lowercase ISO 639-1 code with ``:`` (colon) character,
 like: