py/gadict.py
changeset 618 6ad7203ac9dc
parent 594 910efcf51ac0
child 629 6a862ea41c00
--- a/py/gadict.py	Sun Oct 23 19:57:02 2016 +0300
+++ b/py/gadict.py	Sun Oct 23 20:01:22 2016 +0300
@@ -45,7 +45,7 @@
 
 class Sense:
 
-    def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None):
+    def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None):
         if not pos:
             raise ParseException("Part of speech expected...\n")
         self.pos = pos
@@ -56,6 +56,8 @@
         self.syn_list = syn_list
         self.rel_list = rel_list
         self.topic_list = topic_list
+        self.hyper_list = hyper_list
+        self.hypo_list = hypo_list
 
     def add_tr(self, tr):
         if self.tr_list:
@@ -99,6 +101,18 @@
         else:
             self.topic_list = [topic]
 
+    def add_hyper(self, hyper):
+        if self.hyper_list:
+            self.hyper_list.append(hyper)
+        else:
+            self.hyper_list = [hyper]
+
+    def add_hypo(self, hypo):
+        if self.hypo_list:
+            self.hypo_list.append(hypo)
+        else:
+            self.hypo_list = [hypo]
+
     def __str__(self):
         if tr_list:
             (lang, text) = self.tr_list[0]
@@ -118,12 +132,14 @@
     HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
     TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
-    TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
+    TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
     TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$")
     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
     REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
+    HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
+    HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
 
     CONT_RE = regex.compile(u"^ +(.*)")
 
@@ -340,6 +356,24 @@
                         raise ParseException("""Empty relation...""")
                     sense.add_rel(rel)
                 continue
+            m = self.HYPER_RE.match(self.line)
+            if m is not None:
+                hypers = m.group(1).split(";")
+                for hyper in hypers:
+                    hyper = hyper.strip()
+                    if len(hyper) == 0:
+                        raise ParseException("""Empty hypernym...""")
+                    sense.add_hyper(hyper)
+                continue
+            m = self.HYPO_RE.match(self.line)
+            if m is not None:
+                hypos = m.group(1).split(";")
+                for hypo in hypos:
+                    hypo = hypo.strip()
+                    if len(hypo) == 0:
+                        raise ParseException("""Empty hyponym...""")
+                    sense.add_hypo(hypo)
+                continue
             m = self.TRANSL_RE.match(self.line)
             if m is not None:
                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))