author | Oleksandr Gavenko <gavenkoa@gmail.com> |
Sun, 11 Sep 2016 22:48:16 +0300 | |
changeset 550 | 6728ffd79d0c |
parent 536 | c9f0064d8661 |
child 554 | 59714b9033bc |
permissions | -rw-r--r-- |
536
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
2 |
"""Space repetition TAB format writer""" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
3 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
4 |
import io |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
5 |
import sys |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
6 |
import codecs |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
7 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
8 |
import gadict |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
9 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
10 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
11 |
FINAME = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
12 |
FONAME = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
13 |
if len(sys.argv) >= 2: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
14 |
FINAME = sys.argv[1] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
15 |
if len(sys.argv) >= 3: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
16 |
FONAME = sys.argv[2] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
17 |
LANGS = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
18 |
if len(sys.argv) >= 4: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
19 |
LANGS = set(sys.argv[3].split(",")) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
20 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
21 |
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
22 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
23 |
PARSER = gadict.Parser() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
24 |
try: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
25 |
DOM = PARSER.parse(FIN) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
26 |
except gadict.ParseException as ex: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
27 |
sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
28 |
if __debug__: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
29 |
import traceback |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
30 |
traceback.print_exc() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
31 |
exit(1) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
32 |
finally: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
33 |
FIN.close() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
34 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
35 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
36 |
if FONAME is None: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
37 |
FOUT = sys.stdout |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
38 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
39 |
FOUT = codecs.open(FONAME, "w", "utf-8") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
40 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
41 |
def attr_key(item): |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
42 |
(word, (pron, attrs)) = item |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
43 |
if not attrs: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
44 |
return "zzz" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
45 |
best_vattr = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
46 |
for attr in attrs: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
47 |
if attr in ["v1", "v2", "v3"]: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
48 |
if not best_vattr or (best_vattr and best_vattr > attr): |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
49 |
best_vattr = attr |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
50 |
if best_vattr: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
51 |
return best_vattr |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
52 |
for attr in attrs: # single/plural |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
53 |
if attr in ["s"]: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
54 |
return attr |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
55 |
for attr in attrs: # comparative/superlative |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
56 |
if attr in ["comp"]: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
57 |
return attr |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
58 |
for attr in attrs: # Am/Br/Au |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
59 |
if attr in ["Am"]: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
60 |
return attr |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
61 |
return "zzz" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
62 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
63 |
for idx in range(1, len(DOM)): |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
64 |
article = DOM[idx] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
65 |
defs = article[0].items() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
66 |
defs = sorted(defs, key = attr_key) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
67 |
lines = [] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
68 |
for (word, (pron, attrs)) in defs: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
69 |
line = "<b>"+word+"</b>" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
70 |
if pron: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
71 |
line += " ["+pron+"]" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
72 |
if len(attrs) > 0: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
73 |
attrs = [" «"+x+"»" for x in attrs] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
74 |
attrs.sort() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
75 |
line += ",".join(attrs) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
76 |
lines.append(line) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
77 |
question = "<br>".join(lines) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
78 |
FOUT.write(question) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
79 |
FOUT.write("\t") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
80 |
for sense in article[1]: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
81 |
if not sense: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
82 |
raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__()) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
83 |
if sense.pos: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
84 |
FOUT.write('<i style="color: green;">') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
85 |
FOUT.write(sense.pos) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
86 |
FOUT.write('</i>') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
87 |
if sense.ant_list and len(sense.ant_list) > 0: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
88 |
FOUT.write(" <i>ant: ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
89 |
FOUT.write("; ".join(sense.ant_list)) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
90 |
FOUT.write("</i>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
91 |
if sense.syn_list and len(sense.syn_list) > 0: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
92 |
FOUT.write(" <i>syn: ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
93 |
FOUT.write("; ".join(sense.syn_list)) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
94 |
FOUT.write("</i>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
95 |
if LANGS and len(LANGS) > 1: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
96 |
FOUT.write("<br>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
97 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
98 |
FOUT.write(" ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
99 |
for (lang, tr) in sense.tr_list: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
100 |
tr = tr.replace('\n', ' ') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
101 |
if LANGS is None: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
102 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
103 |
elif lang in LANGS: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
104 |
if len(LANGS) == 1: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
105 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
106 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
107 |
FOUT.write('<i style="color: blue;">') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
108 |
FOUT.write(lang) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
109 |
FOUT.write("</i> ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
110 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
111 |
FOUT.write("<br>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
112 |
FOUT.write("\n") |