--- a/py/gadict_freq.py Thu Dec 29 00:44:28 2016 +0200
+++ b/py/gadict_freq.py Thu Dec 29 01:17:44 2016 +0200
@@ -19,7 +19,7 @@
wlist.append(line)
return wlist
-class HeadVarParser:
+class WordformParser:
BASEVAR_RE = regex.compile(u"^(\t)?(.*)$")
@@ -80,17 +80,12 @@
raise Exception(USAGE)
FINAME = sys.argv[1]
- with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as stream:
- parser = WordlistParser(stream)
- HEADWORDS = parser.parse()
-
COMMAND_RE = regex.compile("([-+])([0-9]+)?([bf]):([^:]+)")
-
IN_SET = set()
EX_SET = set()
- for idx in range(2, len(sys.argv)):
+ for idx in range(1, len(sys.argv)):
spec = sys.argv[idx]
m = COMMAND_RE.match(spec)
if not m:
@@ -102,7 +97,7 @@
limit = int(limit)
with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
if mode == "b":
- parser = HeadVarParser(stream, limit)
+ parser = WordformParser(stream, limit)
elif mode == "f":
parser = FreqlistParser(stream, limit)
else:
@@ -112,16 +107,14 @@
except:
print("Error during processing: {:s}".format(fname))
raise
- wlist = set(wlist)
+ wlist = set([w.lower() for w in wlist])
if m.group(1) == "+":
IN_SET |= wlist
else:
EX_SET |= wlist
- for headword in HEADWORDS:
- if any(c in headword for c in " '."):
- continue
- normilized= headword.lower()
- if (len(IN_SET) == 0 or normilized in IN_SET) and not normilized in EX_SET:
- print(headword)
+ for headword in IN_SET - EX_SET:
+ # if any(c in headword for c in " '."):
+ # continue
+ print(headword)