diff -r 6b214c889e25 -r 53095b480a73 py/gadict_freq.py --- a/py/gadict_freq.py Thu Dec 29 00:44:28 2016 +0200 +++ b/py/gadict_freq.py Thu Dec 29 01:17:44 2016 +0200 @@ -19,7 +19,7 @@ wlist.append(line) return wlist -class HeadVarParser: +class WordformParser: BASEVAR_RE = regex.compile(u"^(\t)?(.*)$") @@ -80,17 +80,12 @@ raise Exception(USAGE) FINAME = sys.argv[1] - with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as stream: - parser = WordlistParser(stream) - HEADWORDS = parser.parse() - COMMAND_RE = regex.compile("([-+])([0-9]+)?([bf]):([^:]+)") - IN_SET = set() EX_SET = set() - for idx in range(2, len(sys.argv)): + for idx in range(1, len(sys.argv)): spec = sys.argv[idx] m = COMMAND_RE.match(spec) if not m: @@ -102,7 +97,7 @@ limit = int(limit) with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: if mode == "b": - parser = HeadVarParser(stream, limit) + parser = WordformParser(stream, limit) elif mode == "f": parser = FreqlistParser(stream, limit) else: @@ -112,16 +107,14 @@ except: print("Error during processing: {:s}".format(fname)) raise - wlist = set(wlist) + wlist = set([w.lower() for w in wlist]) if m.group(1) == "+": IN_SET |= wlist else: EX_SET |= wlist - for headword in HEADWORDS: - if any(c in headword for c in " '."): - continue - normilized= headword.lower() - if (len(IN_SET) == 0 or normilized in IN_SET) and not normilized in EX_SET: - print(headword) + for headword in IN_SET - EX_SET: + # if any(c in headword for c in " '."): + # continue + print(headword)