py/gadict_c5.py
changeset 646 2d488cfc4c0c
parent 644 e38cd6112193
child 686 a9e3b6050544
--- a/py/gadict_c5.py	Tue Nov 08 18:12:50 2016 +0200
+++ b/py/gadict_c5.py	Tue Nov 08 19:01:27 2016 +0200
@@ -7,11 +7,13 @@
 import regex
 
 import gadict
+import gadict_freq
 
 
 FINAME = None
 FONAME = None
 LANGS = None
+FREQ_SOURCES = []
 
 # -lang:ru,uk
 ARG_LANG_RE = regex.compile("-lang:(.+)")
@@ -34,10 +36,18 @@
             continue
         m = ARG_FREQ_RE.match(arg)
         if m:
-            LANGS = set(arg.split(","))
-            for lang in LANGS:
-                if len(lang) != 2:
-                    raise Exception("Incorrect language specification: '{:s}'".format(arg))
+            mode = m.group(1)
+            tag = m.group(2)
+            fname = m.group(3)
+            with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
+                if mode == "var":
+                    parser = gadict_freq.HeadVarParser(stream)
+                elif mode == "freq":
+                    parser = gadict_freq.FreqlistParser(stream)
+                else:
+                    raise Exception("Unsupported mode: '{:s}'".format(mode))
+                wlist = parser.parse()
+            FREQ_SOURCES.append((tag, set(wlist)))
             continue
         if arg.startswith("-"):
             raise Exception("Unsupported option format: '{:s}'".format(arg))
@@ -98,6 +108,7 @@
 
 
 for (headwords, translations) in DOM[1:]:
+    identity = headwords[0].headword
     FOUT.write("_____\n\n")
     title = "; ".join([h.headword for h in headwords])
     FOUT.write(title)
@@ -180,3 +191,10 @@
             FOUT.write("⇒ ")
             FOUT.write(tr)
             FOUT.write("\n")
+    freqtags = []
+    for (freqtag, freqset) in FREQ_SOURCES:
+        if identity in freqset:
+            freqtags.append(freqtag)
+    if len(freqtags) > 0:
+        FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags]))
+        FOUT.write("\n")