Treat first argument in format like the rest arguments. Targets to print
authorOleksandr Gavenko <gavenkoa@gmail.com>
Thu, 29 Dec 2016 01:17:44 +0200
changeset 723 53095b480a73
parent 722 6b214c889e25
child 724 98fd211d27db
Treat first argument in format like the rest arguments. Targets to print statistic about missing words from wordlist.
Makefile
py/gadict_freq.py
--- a/Makefile	Thu Dec 29 00:44:28 2016 +0200
+++ b/Makefile	Thu Dec 29 01:17:44 2016 +0200
@@ -452,7 +452,34 @@
 
 .PHONY: freq
 freq: dist/wordlist/gadict_en-ru+uk.list py/gadict_freq.py $(FREQ_DEP) $(BUILD_SCRIPTS)
-	python3 -B py/gadict_freq.py dist/wordlist/gadict_en-ru+uk.list $(FREQ_FILTER)
+	python3 -B py/gadict_freq.py +b:dist/wordlist/gadict_en-ru+uk.list $(FREQ_FILTER)
+
+.PHONY: missing
+missing: dist/wordlist/gadict_en-ru+uk.list $(FREQ_DEP) $(BUILD_SCRIPTS)
+	@echo ===== VOA =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:dist/wordlist/voa.list
+	@echo ===== GSL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/gsl.freq
+	@echo ===== AWL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/awl.freq
+	@echo ===== NGSL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/ngsl.freq
+	@echo ===== NAWL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/nawl.freq
+	@echo ===== BSL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:wordlist/bsl.var
+	@echo ===== TSL =====
+	python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:wordlist/tsl.var
+
+.PHONY: missing-stat
+missing-stat: dist/wordlist/gadict_en-ru+uk.list $(FREQ_DEP) $(BUILD_SCRIPTS)
+	@printf "%4s: %4s / %4s\n" VOA `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:dist/wordlist/voa.list | wc -l` `wc -l <dist/wordlist/voa.list`
+	@printf "%4s: %4s / %4s\n" GSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/gsl.freq | wc -l` `wc -l <wordlist/gsl.freq`
+	@printf "%4s: %4s / %4s\n" AWL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/awl.freq | wc -l` `wc -l <wordlist/awl.freq`
+	@printf "%4s: %4s / %4s\n" NGSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/ngsl.freq | wc -l` `wc -l <wordlist/ngsl.freq`
+	@printf "%4s: %4s / %4s\n" NAWL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +f:wordlist/nawl.freq | wc -l` `wc -l <wordlist/nawl.freq`
+	@printf "%4s: %4s / %4s\n" BSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:wordlist/bsl.var | wc -l` `wc -l <wordlist/bsl.var`
+	@printf "%4s: %4s / %4s\n" TSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict_en-ru+uk.list +b:wordlist/tsl.var | wc -l` `wc -l <wordlist/tsl.var`
 
 dist/wordlist/%.list: %.gadict py/gadict_headwords.py $(BUILD_SCRIPTS) | dist/wordlist/
 	python3 -B py/gadict_headwords.py  $< $@
--- a/py/gadict_freq.py	Thu Dec 29 00:44:28 2016 +0200
+++ b/py/gadict_freq.py	Thu Dec 29 01:17:44 2016 +0200
@@ -19,7 +19,7 @@
             wlist.append(line)
         return wlist
 
-class HeadVarParser:
+class WordformParser:
 
     BASEVAR_RE = regex.compile(u"^(\t)?(.*)$")
 
@@ -80,17 +80,12 @@
         raise Exception(USAGE)
     FINAME = sys.argv[1]
 
-    with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as stream:
-        parser = WordlistParser(stream)
-        HEADWORDS = parser.parse()
-
     COMMAND_RE = regex.compile("([-+])([0-9]+)?([bf]):([^:]+)")
 
-
     IN_SET = set()
     EX_SET = set()
 
-    for idx in range(2, len(sys.argv)):
+    for idx in range(1, len(sys.argv)):
         spec = sys.argv[idx]
         m = COMMAND_RE.match(spec)
         if not m:
@@ -102,7 +97,7 @@
             limit = int(limit)
         with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
             if mode == "b":
-                parser = HeadVarParser(stream, limit)
+                parser = WordformParser(stream, limit)
             elif mode == "f":
                 parser = FreqlistParser(stream, limit)
             else:
@@ -112,16 +107,14 @@
             except:
                 print("Error during processing: {:s}".format(fname))
                 raise
-        wlist = set(wlist)
+        wlist = set([w.lower() for w in wlist])
         if m.group(1) == "+":
             IN_SET |= wlist
         else:
             EX_SET |= wlist
 
-    for headword in HEADWORDS:
-        if any(c in headword for c in " '."):
-            continue
-        normilized= headword.lower()
-        if (len(IN_SET) == 0 or normilized in IN_SET) and not normilized in EX_SET:
-            print(headword)
+    for headword in IN_SET - EX_SET:
+        # if any(c in headword for c in " '."):
+        #     continue
+        print(headword)