py/gadict.py
changeset 931 9a5f97027ee7
parent 892 0c298fe6739e
child 937 981839c72b64
--- a/py/gadict.py	Sun Oct 08 19:20:44 2017 +0300
+++ b/py/gadict.py	Fri Oct 13 01:44:07 2017 +0300
@@ -34,10 +34,11 @@
 
 class Headword:
 
-    def __init__(self, headword, pron = None, attrs = None):
+    def __init__(self, headword, pron = None, attrs = None, homo = None):
         self.headword = headword
         self.pron = pron
         self.attrs = attrs
+        self.homo = homo
 
     def __str__(self):
         return self.headword
@@ -131,6 +132,7 @@
     HEADWORD_RE = re.compile( u"^(\\w.*)$" )
     HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
     HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɐɹʃʧθðɡʒŋɾʔ ]+)\\]$", re.UNICODE)
+    HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w+)$", re.UNICODE)
     TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
     TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
     TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
@@ -255,6 +257,7 @@
         word = m.group(1)
         pron = None
         attrs = set()
+        homo = None
         while True:
             self.readline()
             if self.eof or len(self.line) == 0:
@@ -263,10 +266,11 @@
             if m is not None:
                 if word is None:
                     raise ParseException("""Didn't match previous headword...""")
-                self.words.append(Headword(word, pron, attrs))
+                self.words.append(Headword(word, pron, attrs, homo = homo))
                 word = m.group(1)
                 pron = None
                 attrs = set()
+                homo = None
                 continue
             m = self.HEADWORD_PRON_RE.match(self.line)
             if m is not None:
@@ -278,8 +282,14 @@
             if m is not None:
                 attrs.add(m.group(1))
                 continue
+            m = self.HEADWORD_HOMO_RE.match(self.line)
+            if m is not None:
+                if homo is not None:
+                    raise ParseException("""Homophones are redefined...""")
+                homo = [s.strip() for s in m.group(1).split(";")]
+                continue
             raise ParseException("""Line is not a headword or translation or headword attribute...""")
-        self.words.append(Headword(word, pron, attrs))
+        self.words.append(Headword(word, pron, attrs, homo))
 
     def parse_translation_continuation(self):
         string = ""