diff -r 6d4a074cea27 -r 2d488cfc4c0c py/gadict_c5.py --- a/py/gadict_c5.py Tue Nov 08 18:12:50 2016 +0200 +++ b/py/gadict_c5.py Tue Nov 08 19:01:27 2016 +0200 @@ -7,11 +7,13 @@ import regex import gadict +import gadict_freq FINAME = None FONAME = None LANGS = None +FREQ_SOURCES = [] # -lang:ru,uk ARG_LANG_RE = regex.compile("-lang:(.+)") @@ -34,10 +36,18 @@ continue m = ARG_FREQ_RE.match(arg) if m: - LANGS = set(arg.split(",")) - for lang in LANGS: - if len(lang) != 2: - raise Exception("Incorrect language specification: '{:s}'".format(arg)) + mode = m.group(1) + tag = m.group(2) + fname = m.group(3) + with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: + if mode == "var": + parser = gadict_freq.HeadVarParser(stream) + elif mode == "freq": + parser = gadict_freq.FreqlistParser(stream) + else: + raise Exception("Unsupported mode: '{:s}'".format(mode)) + wlist = parser.parse() + FREQ_SOURCES.append((tag, set(wlist))) continue if arg.startswith("-"): raise Exception("Unsupported option format: '{:s}'".format(arg)) @@ -98,6 +108,7 @@ for (headwords, translations) in DOM[1:]: + identity = headwords[0].headword FOUT.write("_____\n\n") title = "; ".join([h.headword for h in headwords]) FOUT.write(title) @@ -180,3 +191,10 @@ FOUT.write("⇒ ") FOUT.write(tr) FOUT.write("\n") + freqtags = [] + for (freqtag, freqset) in FREQ_SOURCES: + if identity in freqset: + freqtags.append(freqtag) + if len(freqtags) > 0: + FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags])) + FOUT.write("\n")