Generate import file for space repetition software in TAB format.
--- a/Makefile Sat Aug 27 14:03:30 2016 +0300
+++ b/Makefile Sat Aug 27 14:32:25 2016 +0300
@@ -142,6 +142,8 @@
DICTDZ_FILES := $(C5_FILES:.c5=.dict.dz)
INDEX_FILES := $(C5_FILES:.c5=.index)
+SRS_TAB_FILES := $(patsubst %.gadict,dist/srs/%.tab.txt,$(GADICT_FILES))
+
RST_TMPL_FILE = dist/misc/rst.tmpl
RST_CSS_FILE = www/tmpl/rst.css
@@ -387,6 +389,18 @@
dist/dictd/:
mkdir -p $@
+.PHONY: srs
+srs: $(SRS_TAB_FILES)
+
+dist/srs/gadict_en-ru+uk.tab.txt: gadict_en-ru+uk.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/
+ python3 -B py/gadict_srs_tab.py $< $@ ru,uk
+
+dist/srs/gadict_voa.tab.txt: gadict_voa.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/
+ python3 -B py/gadict_srs_tab.py $< $@
+
+dist/srs/:
+ mkdir -p $@
+
################################################################
# Install/uninstall targets.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_srs_tab.py Sat Aug 27 14:32:25 2016 +0300
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""Space repetition TAB format writer"""
+
+import io
+import sys
+import codecs
+
+import gadict
+
+
+FINAME = None
+FONAME = None
+if len(sys.argv) >= 2:
+ FINAME = sys.argv[1]
+if len(sys.argv) >= 3:
+ FONAME = sys.argv[2]
+LANGS = None
+if len(sys.argv) >= 4:
+ LANGS = set(sys.argv[3].split(","))
+
+FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
+
+PARSER = gadict.Parser()
+try:
+ DOM = PARSER.parse(FIN)
+except gadict.ParseException as ex:
+ sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
+ if __debug__:
+ import traceback
+ traceback.print_exc()
+ exit(1)
+finally:
+ FIN.close()
+
+
+if FONAME is None:
+ FOUT = sys.stdout
+else:
+ FOUT = codecs.open(FONAME, "w", "utf-8")
+
+def attr_key(item):
+ (word, (pron, attrs)) = item
+ if not attrs:
+ return "zzz"
+ best_vattr = None
+ for attr in attrs:
+ if attr in ["v1", "v2", "v3"]:
+ if not best_vattr or (best_vattr and best_vattr > attr):
+ best_vattr = attr
+ if best_vattr:
+ return best_vattr
+ for attr in attrs: # single/plural
+ if attr in ["s"]:
+ return attr
+ for attr in attrs: # comparative/superlative
+ if attr in ["comp"]:
+ return attr
+ for attr in attrs: # Am/Br/Au
+ if attr in ["Am"]:
+ return attr
+ return "zzz"
+
+for idx in range(1, len(DOM)):
+ article = DOM[idx]
+ defs = article[0].items()
+ defs = sorted(defs, key = attr_key)
+ lines = []
+ for (word, (pron, attrs)) in defs:
+ line = "<b>"+word+"</b>"
+ if pron:
+ line += " ["+pron+"]"
+ if len(attrs) > 0:
+ attrs = [" «"+x+"»" for x in attrs]
+ attrs.sort()
+ line += ",".join(attrs)
+ lines.append(line)
+ question = "<br>".join(lines)
+ FOUT.write(question)
+ FOUT.write("\t")
+ for sense in article[1]:
+ if not sense:
+ raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
+ if sense.pos:
+ FOUT.write('<i style="color: green;">')
+ FOUT.write(sense.pos)
+ FOUT.write('</i>')
+ if sense.ant_list and len(sense.ant_list) > 0:
+ FOUT.write(" <i>ant: ")
+ FOUT.write("; ".join(sense.ant_list))
+ FOUT.write("</i>")
+ if sense.syn_list and len(sense.syn_list) > 0:
+ FOUT.write(" <i>syn: ")
+ FOUT.write("; ".join(sense.syn_list))
+ FOUT.write("</i>")
+ if LANGS and len(LANGS) > 1:
+ FOUT.write("<br>")
+ else:
+ FOUT.write(" ")
+ for (lang, tr) in sense.tr_list:
+ tr = tr.replace('\n', ' ')
+ if LANGS is None:
+ FOUT.write(tr)
+ elif lang in LANGS:
+ if len(LANGS) == 1:
+ FOUT.write(tr)
+ else:
+ FOUT.write('<i style="color: blue;">')
+ FOUT.write(lang)
+ FOUT.write("</i> ")
+ FOUT.write(tr)
+ FOUT.write("<br>")
+ FOUT.write("\n")
--- a/www/CHANGES.rst Sat Aug 27 14:03:30 2016 +0300
+++ b/www/CHANGES.rst Sat Aug 27 14:32:25 2016 +0300
@@ -6,6 +6,11 @@
.. contents::
:local:
+v0.8, 2016-09-01
+================
+
+ * Generate import file for space repetition software in TAB format.
+
v0.7, 2016-06-01
================