Generate import file for space repetition software in TAB format.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Sat, 27 Aug 2016 14:32:25 +0300
changeset 536 c9f0064d8661
parent 535 e5dd169f4bdb
child 537 a70ab6a33bfa
Generate import file for space repetition software in TAB format.
Makefile
py/gadict_srs_tab.py
www/CHANGES.rst
--- a/Makefile	Sat Aug 27 14:03:30 2016 +0300
+++ b/Makefile	Sat Aug 27 14:32:25 2016 +0300
@@ -142,6 +142,8 @@
 DICTDZ_FILES := $(C5_FILES:.c5=.dict.dz)
 INDEX_FILES := $(C5_FILES:.c5=.index)
 
+SRS_TAB_FILES := $(patsubst %.gadict,dist/srs/%.tab.txt,$(GADICT_FILES))
+
 RST_TMPL_FILE = dist/misc/rst.tmpl
 RST_CSS_FILE = www/tmpl/rst.css
 
@@ -387,6 +389,18 @@
 dist/dictd/:
 	mkdir -p $@
 
+.PHONY: srs
+srs: $(SRS_TAB_FILES)
+
+dist/srs/gadict_en-ru+uk.tab.txt: gadict_en-ru+uk.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/
+	python3 -B py/gadict_srs_tab.py  $< $@ ru,uk
+
+dist/srs/gadict_voa.tab.txt: gadict_voa.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/
+	python3 -B py/gadict_srs_tab.py  $< $@
+
+dist/srs/:
+	mkdir -p $@
+
 ################################################################
 # Install/uninstall targets.
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_srs_tab.py	Sat Aug 27 14:32:25 2016 +0300
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""Space repetition TAB format writer"""
+
+import io
+import sys
+import codecs
+
+import gadict
+
+
+FINAME = None
+FONAME = None
+if len(sys.argv) >= 2:
+    FINAME = sys.argv[1]
+if len(sys.argv) >= 3:
+    FONAME = sys.argv[2]
+LANGS = None
+if len(sys.argv) >= 4:
+    LANGS = set(sys.argv[3].split(","))
+
+FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
+
+PARSER = gadict.Parser()
+try:
+    DOM = PARSER.parse(FIN)
+except gadict.ParseException as ex:
+    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
+    if __debug__:
+        import traceback
+        traceback.print_exc()
+    exit(1)
+finally:
+    FIN.close()
+
+
+if FONAME is None:
+    FOUT = sys.stdout
+else:
+    FOUT = codecs.open(FONAME, "w", "utf-8")
+
+def attr_key(item):
+    (word, (pron, attrs)) = item
+    if not attrs:
+        return "zzz"
+    best_vattr = None
+    for attr in attrs:
+        if attr in ["v1", "v2", "v3"]:
+            if not best_vattr or (best_vattr and best_vattr > attr):
+                best_vattr = attr
+    if best_vattr:
+        return best_vattr
+    for attr in attrs:                      # single/plural
+        if attr in ["s"]:
+            return attr
+    for attr in attrs:                      # comparative/superlative
+        if attr in ["comp"]:
+            return attr
+    for attr in attrs:                      # Am/Br/Au
+        if attr in ["Am"]:
+            return attr
+    return "zzz"
+
+for idx in range(1, len(DOM)):
+    article = DOM[idx]
+    defs = article[0].items()
+    defs = sorted(defs, key = attr_key)
+    lines = []
+    for (word, (pron, attrs)) in defs:
+        line = "<b>"+word+"</b>"
+        if pron:
+            line += " ["+pron+"]"
+        if len(attrs) > 0:
+            attrs = [" «"+x+"»" for x in attrs]
+            attrs.sort()
+            line += ",".join(attrs)
+        lines.append(line)
+    question = "<br>".join(lines)
+    FOUT.write(question)
+    FOUT.write("\t")
+    for sense in article[1]:
+        if not sense:
+            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
+        if sense.pos:
+            FOUT.write('<i style="color: green;">')
+            FOUT.write(sense.pos)
+            FOUT.write('</i>')
+            if sense.ant_list and len(sense.ant_list) > 0:
+                FOUT.write(" <i>ant: ")
+                FOUT.write("; ".join(sense.ant_list))
+                FOUT.write("</i>")
+            if sense.syn_list and len(sense.syn_list) > 0:
+                FOUT.write(" <i>syn: ")
+                FOUT.write("; ".join(sense.syn_list))
+                FOUT.write("</i>")
+            if LANGS and len(LANGS) > 1:
+                FOUT.write("<br>")
+            else:
+                FOUT.write(" ")
+        for (lang, tr) in sense.tr_list:
+            tr = tr.replace('\n', ' ')
+            if LANGS is None:
+                FOUT.write(tr)
+            elif lang in LANGS:
+                if len(LANGS) == 1:
+                    FOUT.write(tr)
+                else:
+                    FOUT.write('<i style="color: blue;">')
+                    FOUT.write(lang)
+                    FOUT.write("</i> ")
+                    FOUT.write(tr)
+            FOUT.write("<br>")
+    FOUT.write("\n")
--- a/www/CHANGES.rst	Sat Aug 27 14:03:30 2016 +0300
+++ b/www/CHANGES.rst	Sat Aug 27 14:32:25 2016 +0300
@@ -6,6 +6,11 @@
 .. contents::
    :local:
 
+v0.8, 2016-09-01
+================
+
+ * Generate import file for space repetition software in TAB format.
+
 v0.7, 2016-06-01
 ================