py/gadict_srs_tab.py
changeset 536 c9f0064d8661
child 554 59714b9033bc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_srs_tab.py	Sat Aug 27 14:32:25 2016 +0300
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""Space repetition TAB format writer"""
+
+import io
+import sys
+import codecs
+
+import gadict
+
+
+FINAME = None
+FONAME = None
+if len(sys.argv) >= 2:
+    FINAME = sys.argv[1]
+if len(sys.argv) >= 3:
+    FONAME = sys.argv[2]
+LANGS = None
+if len(sys.argv) >= 4:
+    LANGS = set(sys.argv[3].split(","))
+
+FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
+
+PARSER = gadict.Parser()
+try:
+    DOM = PARSER.parse(FIN)
+except gadict.ParseException as ex:
+    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
+    if __debug__:
+        import traceback
+        traceback.print_exc()
+    exit(1)
+finally:
+    FIN.close()
+
+
+if FONAME is None:
+    FOUT = sys.stdout
+else:
+    FOUT = codecs.open(FONAME, "w", "utf-8")
+
+def attr_key(item):
+    (word, (pron, attrs)) = item
+    if not attrs:
+        return "zzz"
+    best_vattr = None
+    for attr in attrs:
+        if attr in ["v1", "v2", "v3"]:
+            if not best_vattr or (best_vattr and best_vattr > attr):
+                best_vattr = attr
+    if best_vattr:
+        return best_vattr
+    for attr in attrs:                      # single/plural
+        if attr in ["s"]:
+            return attr
+    for attr in attrs:                      # comparative/superlative
+        if attr in ["comp"]:
+            return attr
+    for attr in attrs:                      # Am/Br/Au
+        if attr in ["Am"]:
+            return attr
+    return "zzz"
+
+for idx in range(1, len(DOM)):
+    article = DOM[idx]
+    defs = article[0].items()
+    defs = sorted(defs, key = attr_key)
+    lines = []
+    for (word, (pron, attrs)) in defs:
+        line = "<b>"+word+"</b>"
+        if pron:
+            line += " ["+pron+"]"
+        if len(attrs) > 0:
+            attrs = [" «"+x+"»" for x in attrs]
+            attrs.sort()
+            line += ",".join(attrs)
+        lines.append(line)
+    question = "<br>".join(lines)
+    FOUT.write(question)
+    FOUT.write("\t")
+    for sense in article[1]:
+        if not sense:
+            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
+        if sense.pos:
+            FOUT.write('<i style="color: green;">')
+            FOUT.write(sense.pos)
+            FOUT.write('</i>')
+            if sense.ant_list and len(sense.ant_list) > 0:
+                FOUT.write(" <i>ant: ")
+                FOUT.write("; ".join(sense.ant_list))
+                FOUT.write("</i>")
+            if sense.syn_list and len(sense.syn_list) > 0:
+                FOUT.write(" <i>syn: ")
+                FOUT.write("; ".join(sense.syn_list))
+                FOUT.write("</i>")
+            if LANGS and len(LANGS) > 1:
+                FOUT.write("<br>")
+            else:
+                FOUT.write(" ")
+        for (lang, tr) in sense.tr_list:
+            tr = tr.replace('\n', ' ')
+            if LANGS is None:
+                FOUT.write(tr)
+            elif lang in LANGS:
+                if len(LANGS) == 1:
+                    FOUT.write(tr)
+                else:
+                    FOUT.write('<i style="color: blue;">')
+                    FOUT.write(lang)
+                    FOUT.write("</i> ")
+                    FOUT.write(tr)
+            FOUT.write("<br>")
+    FOUT.write("\n")