Store headword structure as class. Store headwords in list to preserve order
authorOleksandr Gavenko <gavenkoa@gmail.com>
Thu, 15 Sep 2016 17:48:20 +0300
changeset 554 59714b9033bc
parent 553 45a3138c9b4d
child 555 4a3188fc8951
Store headword structure as class. Store headwords in list to preserve order like in source file.
py/gadict.py
py/gadict_c5.py
py/gadict_srs_tab.py
--- a/py/gadict.py	Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict.py	Thu Sep 15 17:48:20 2016 +0300
@@ -31,6 +31,18 @@
         else:
             return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8'))
 
+class Headword:
+
+    def __init__(self, headword, pron = None, attrs = None):
+        self.headword = headword
+        self.pron = pron
+        self.attrs = attrs
+
+    def __str__(self):
+        return self.headword
+    def __repr__(self):
+        return "<Headword {}>".format(self.headword)
+
 class Sense:
 
     def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
@@ -72,6 +84,14 @@
         else:
             self.topic_list.append(topic)
 
+    def __str__(self):
+        if tr_list:
+            (lang, text) = self.tr_list[0]
+            return "{}: {}".format(lang, text)
+        return "<empy sence>"
+    def __repr__(self):
+        return "<Sence {}>".format(str(self))
+
 class Parser:
     """gadict dictionary format parser."""
 
@@ -190,7 +210,7 @@
 
     def parse_headlines(self):
         """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
-        self.words = {}
+        self.words = []
         self.readline()
         if self.eof:
             raise ParseException("""There are no definition after "__" delimiter...""")
@@ -208,7 +228,7 @@
             if m is not None:
                 if word is None:
                     raise ParseException("""Didn't match previous headword...""")
-                self.words[word] = (pron, attrs)
+                self.words.append(Headword(word, pron, attrs))
                 word = m.group(1)
                 pron = None
                 attrs = set()
@@ -224,7 +244,7 @@
                 attrs.add(m.group(1))
                 continue
             raise ParseException("""Line is not a headword or translation or headword attribute...""")
-        self.words[word] = (pron, attrs)
+        self.words.append(Headword(word, pron, attrs))
 
     def parse_translation_continuation(self):
         string = ""
--- a/py/gadict_c5.py	Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict_c5.py	Thu Sep 15 17:48:20 2016 +0300
@@ -65,54 +65,28 @@
 FOUT.write("\n")
 
 
-def attr_key(item):
-    (word, (pron, attrs)) = item
-    if not attrs:
-        return "zzz"
-    best_vattr = None
-    for attr in attrs:
-        if attr in ["v1", "v2", "v3"]:
-            if not best_vattr or (best_vattr and best_vattr > attr):
-                best_vattr = attr
-    if best_vattr:
-        return best_vattr
-    for attr in attrs:                      # single/plural
-        if attr in ["s"]:
-            return attr
-    for attr in attrs:                      # comparative/superlative
-        if attr in ["comp"]:
-            return attr
-    for attr in attrs:                      # Am/Br/Au
-        if attr in ["Am"]:
-            return attr
-    return "zzz"
-
-
-for idx in range(1, len(DOM)):
-    article = DOM[idx]
+for (headwords, translations) in DOM[1:]:
     FOUT.write("_____\n\n")
-    title = "; ".join(article[0].keys())
+    title = "; ".join([h.headword for h in headwords])
     FOUT.write(title)
     FOUT.write("\n\n")
-    defs = article[0].items()
-    defs = sorted(defs, key = attr_key)
-    for (word, (pron, attrs)) in defs:
+    for hw in headwords:
         FOUT.write("  ")
-        FOUT.write(word)
-        if pron is not None:
+        FOUT.write(hw.headword)
+        if hw.pron is not None:
             FOUT.write(" [")
-            FOUT.write(pron)
+            FOUT.write(hw.pron)
             FOUT.write("]")
-        if len(attrs) > 0:
+        if len(hw.attrs) > 0:
             FOUT.write(" ")
-            l = ["«"+x+"»" for x in attrs]
+            l = ["«"+x+"»" for x in hw.attrs]
             l.sort()
             FOUT.write(", ".join(l))
         FOUT.write("\n")
     FOUT.write("\n")
-    for sense in article[1]:
+    for sense in translations:
         if not sense:
-            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
+            raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
         FOUT.write("  ")
         if sense.pos:
             FOUT.write("«")
--- a/py/gadict_srs_tab.py	Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict_srs_tab.py	Thu Sep 15 17:48:20 2016 +0300
@@ -38,46 +38,21 @@
 else:
     FOUT = codecs.open(FONAME, "w", "utf-8")
 
-def attr_key(item):
-    (word, (pron, attrs)) = item
-    if not attrs:
-        return "zzz"
-    best_vattr = None
-    for attr in attrs:
-        if attr in ["v1", "v2", "v3"]:
-            if not best_vattr or (best_vattr and best_vattr > attr):
-                best_vattr = attr
-    if best_vattr:
-        return best_vattr
-    for attr in attrs:                      # single/plural
-        if attr in ["s"]:
-            return attr
-    for attr in attrs:                      # comparative/superlative
-        if attr in ["comp"]:
-            return attr
-    for attr in attrs:                      # Am/Br/Au
-        if attr in ["Am"]:
-            return attr
-    return "zzz"
-
-for idx in range(1, len(DOM)):
-    article = DOM[idx]
-    defs = article[0].items()
-    defs = sorted(defs, key = attr_key)
+for (headwords, translations) in DOM[1:]:
     lines = []
-    for (word, (pron, attrs)) in defs:
-        line = "<b>"+word+"</b>"
-        if pron:
-            line += " ["+pron+"]"
-        if len(attrs) > 0:
-            attrs = [" «"+x+"»" for x in attrs]
+    for hw in headwords:
+        line = "<b>"+hw.headword+"</b>"
+        if hw.pron:
+            line += " ["+hw.pron+"]"
+        if len(hw.attrs) > 0:
+            attrs = [" «"+x+"»" for x in hw.attrs]
             attrs.sort()
             line += ",".join(attrs)
         lines.append(line)
     question = "<br>".join(lines)
     FOUT.write(question)
     FOUT.write("\t")
-    for sense in article[1]:
+    for sense in translations:
         if not sense:
             raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
         if sense.pos: