# HG changeset patch # User Oleksandr Gavenko # Date 1472297545 -10800 # Node ID c9f0064d866116c0025be33b8f87929573370c20 # Parent e5dd169f4bdb5834c117636e381a3a18860055c5 Generate import file for space repetition software in TAB format. diff -r e5dd169f4bdb -r c9f0064d8661 Makefile --- a/Makefile Sat Aug 27 14:03:30 2016 +0300 +++ b/Makefile Sat Aug 27 14:32:25 2016 +0300 @@ -142,6 +142,8 @@ DICTDZ_FILES := $(C5_FILES:.c5=.dict.dz) INDEX_FILES := $(C5_FILES:.c5=.index) +SRS_TAB_FILES := $(patsubst %.gadict,dist/srs/%.tab.txt,$(GADICT_FILES)) + RST_TMPL_FILE = dist/misc/rst.tmpl RST_CSS_FILE = www/tmpl/rst.css @@ -387,6 +389,18 @@ dist/dictd/: mkdir -p $@ +.PHONY: srs +srs: $(SRS_TAB_FILES) + +dist/srs/gadict_en-ru+uk.tab.txt: gadict_en-ru+uk.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/ + python3 -B py/gadict_srs_tab.py $< $@ ru,uk + +dist/srs/gadict_voa.tab.txt: gadict_voa.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/ + python3 -B py/gadict_srs_tab.py $< $@ + +dist/srs/: + mkdir -p $@ + ################################################################ # Install/uninstall targets. diff -r e5dd169f4bdb -r c9f0064d8661 py/gadict_srs_tab.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/py/gadict_srs_tab.py Sat Aug 27 14:32:25 2016 +0300 @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +"""Space repetition TAB format writer""" + +import io +import sys +import codecs + +import gadict + + +FINAME = None +FONAME = None +if len(sys.argv) >= 2: + FINAME = sys.argv[1] +if len(sys.argv) >= 3: + FONAME = sys.argv[2] +LANGS = None +if len(sys.argv) >= 4: + LANGS = set(sys.argv[3].split(",")) + +FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") + +PARSER = gadict.Parser() +try: + DOM = PARSER.parse(FIN) +except gadict.ParseException as ex: + sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) + if __debug__: + import traceback + traceback.print_exc() + exit(1) +finally: + FIN.close() + + +if FONAME is None: + FOUT = sys.stdout +else: + FOUT = codecs.open(FONAME, "w", "utf-8") + +def attr_key(item): + (word, (pron, attrs)) = item + if not attrs: + return "zzz" + best_vattr = None + for attr in attrs: + if attr in ["v1", "v2", "v3"]: + if not best_vattr or (best_vattr and best_vattr > attr): + best_vattr = attr + if best_vattr: + return best_vattr + for attr in attrs: # single/plural + if attr in ["s"]: + return attr + for attr in attrs: # comparative/superlative + if attr in ["comp"]: + return attr + for attr in attrs: # Am/Br/Au + if attr in ["Am"]: + return attr + return "zzz" + +for idx in range(1, len(DOM)): + article = DOM[idx] + defs = article[0].items() + defs = sorted(defs, key = attr_key) + lines = [] + for (word, (pron, attrs)) in defs: + line = ""+word+"" + if pron: + line += " ["+pron+"]" + if len(attrs) > 0: + attrs = [" «"+x+"»" for x in attrs] + attrs.sort() + line += ",".join(attrs) + lines.append(line) + question = "
".join(lines) + FOUT.write(question) + FOUT.write("\t") + for sense in article[1]: + if not sense: + raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__()) + if sense.pos: + FOUT.write('') + FOUT.write(sense.pos) + FOUT.write('') + if sense.ant_list and len(sense.ant_list) > 0: + FOUT.write(" ant: ") + FOUT.write("; ".join(sense.ant_list)) + FOUT.write("") + if sense.syn_list and len(sense.syn_list) > 0: + FOUT.write(" syn: ") + FOUT.write("; ".join(sense.syn_list)) + FOUT.write("") + if LANGS and len(LANGS) > 1: + FOUT.write("
") + else: + FOUT.write(" ") + for (lang, tr) in sense.tr_list: + tr = tr.replace('\n', ' ') + if LANGS is None: + FOUT.write(tr) + elif lang in LANGS: + if len(LANGS) == 1: + FOUT.write(tr) + else: + FOUT.write('') + FOUT.write(lang) + FOUT.write(" ") + FOUT.write(tr) + FOUT.write("
") + FOUT.write("\n") diff -r e5dd169f4bdb -r c9f0064d8661 www/CHANGES.rst --- a/www/CHANGES.rst Sat Aug 27 14:03:30 2016 +0300 +++ b/www/CHANGES.rst Sat Aug 27 14:32:25 2016 +0300 @@ -6,6 +6,11 @@ .. contents:: :local: +v0.8, 2016-09-01 +================ + + * Generate import file for space repetition software in TAB format. + v0.7, 2016-06-01 ================