equal
deleted
inserted
replaced
5 import sys |
5 import sys |
6 import codecs |
6 import codecs |
7 import regex |
7 import regex |
8 |
8 |
9 import gadict |
9 import gadict |
|
10 import gadict_freq |
10 |
11 |
11 |
12 |
12 FINAME = None |
13 FINAME = None |
13 FONAME = None |
14 FONAME = None |
14 LANGS = None |
15 LANGS = None |
|
16 FREQ_SOURCES = [] |
15 |
17 |
16 # -lang:ru,uk |
18 # -lang:ru,uk |
17 ARG_LANG_RE = regex.compile("-lang:(.+)") |
19 ARG_LANG_RE = regex.compile("-lang:(.+)") |
18 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE |
20 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE |
19 ARG_FREQ_RE = regex.compile("-freq:(freq|var):([^=]+)=(.+)") |
21 ARG_FREQ_RE = regex.compile("-freq:(freq|var):([^=]+)=(.+)") |
32 if len(lang) != 2: |
34 if len(lang) != 2: |
33 raise Exception("Incorrect language specification: '{:s}'".format(arg)) |
35 raise Exception("Incorrect language specification: '{:s}'".format(arg)) |
34 continue |
36 continue |
35 m = ARG_FREQ_RE.match(arg) |
37 m = ARG_FREQ_RE.match(arg) |
36 if m: |
38 if m: |
37 LANGS = set(arg.split(",")) |
39 mode = m.group(1) |
38 for lang in LANGS: |
40 tag = m.group(2) |
39 if len(lang) != 2: |
41 fname = m.group(3) |
40 raise Exception("Incorrect language specification: '{:s}'".format(arg)) |
42 with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: |
|
43 if mode == "var": |
|
44 parser = gadict_freq.HeadVarParser(stream) |
|
45 elif mode == "freq": |
|
46 parser = gadict_freq.FreqlistParser(stream) |
|
47 else: |
|
48 raise Exception("Unsupported mode: '{:s}'".format(mode)) |
|
49 wlist = parser.parse() |
|
50 FREQ_SOURCES.append((tag, set(wlist))) |
41 continue |
51 continue |
42 if arg.startswith("-"): |
52 if arg.startswith("-"): |
43 raise Exception("Unsupported option format: '{:s}'".format(arg)) |
53 raise Exception("Unsupported option format: '{:s}'".format(arg)) |
44 if not FINAME: |
54 if not FINAME: |
45 FINAME = arg |
55 FINAME = arg |
96 FOUT.write(PRELUDE.about) |
106 FOUT.write(PRELUDE.about) |
97 FOUT.write("\n") |
107 FOUT.write("\n") |
98 |
108 |
99 |
109 |
100 for (headwords, translations) in DOM[1:]: |
110 for (headwords, translations) in DOM[1:]: |
|
111 identity = headwords[0].headword |
101 FOUT.write("_____\n\n") |
112 FOUT.write("_____\n\n") |
102 title = "; ".join([h.headword for h in headwords]) |
113 title = "; ".join([h.headword for h in headwords]) |
103 FOUT.write(title) |
114 FOUT.write(title) |
104 FOUT.write("\n\n") |
115 FOUT.write("\n\n") |
105 for hw in headwords: |
116 for hw in headwords: |
178 if not LANGS or len(LANGS) > 1: |
189 if not LANGS or len(LANGS) > 1: |
179 FOUT.write(lang) |
190 FOUT.write(lang) |
180 FOUT.write("⇒ ") |
191 FOUT.write("⇒ ") |
181 FOUT.write(tr) |
192 FOUT.write(tr) |
182 FOUT.write("\n") |
193 FOUT.write("\n") |
|
194 freqtags = [] |
|
195 for (freqtag, freqset) in FREQ_SOURCES: |
|
196 if identity in freqset: |
|
197 freqtags.append(freqtag) |
|
198 if len(freqtags) > 0: |
|
199 FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags])) |
|
200 FOUT.write("\n") |