author | Oleksandr Gavenko <gavenkoa@gmail.com> |
Thu, 15 Sep 2016 20:13:18 +0300 | |
changeset 558 | 53fd793e345d |
parent 554 | 59714b9033bc |
child 586 | 1ddf73757bff |
permissions | -rw-r--r-- |
536
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
1 |
# -*- coding: utf-8 -*- |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
2 |
"""Space repetition TAB format writer""" |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
3 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
4 |
import io |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
5 |
import sys |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
6 |
import codecs |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
7 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
8 |
import gadict |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
9 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
10 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
11 |
FINAME = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
12 |
FONAME = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
13 |
if len(sys.argv) >= 2: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
14 |
FINAME = sys.argv[1] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
15 |
if len(sys.argv) >= 3: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
16 |
FONAME = sys.argv[2] |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
17 |
LANGS = None |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
18 |
if len(sys.argv) >= 4: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
19 |
LANGS = set(sys.argv[3].split(",")) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
20 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
21 |
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
22 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
23 |
PARSER = gadict.Parser() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
24 |
try: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
25 |
DOM = PARSER.parse(FIN) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
26 |
except gadict.ParseException as ex: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
27 |
sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
28 |
if __debug__: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
29 |
import traceback |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
30 |
traceback.print_exc() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
31 |
exit(1) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
32 |
finally: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
33 |
FIN.close() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
34 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
35 |
|
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
36 |
if FONAME is None: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
37 |
FOUT = sys.stdout |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
38 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
39 |
FOUT = codecs.open(FONAME, "w", "utf-8") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
40 |
|
554
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
41 |
for (headwords, translations) in DOM[1:]: |
536
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
42 |
lines = [] |
554
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
43 |
for hw in headwords: |
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
44 |
line = "<b>"+hw.headword+"</b>" |
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
45 |
if hw.pron: |
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
46 |
line += " ["+hw.pron+"]" |
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
47 |
if len(hw.attrs) > 0: |
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
48 |
attrs = [" «"+x+"»" for x in hw.attrs] |
536
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
49 |
attrs.sort() |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
50 |
line += ",".join(attrs) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
51 |
lines.append(line) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
52 |
question = "<br>".join(lines) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
53 |
FOUT.write(question) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
54 |
FOUT.write("\t") |
554
59714b9033bc
Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
536
diff
changeset
|
55 |
for sense in translations: |
536
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
56 |
if not sense: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
57 |
raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__()) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
58 |
if sense.pos: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
59 |
FOUT.write('<i style="color: green;">') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
60 |
FOUT.write(sense.pos) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
61 |
FOUT.write('</i>') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
62 |
if sense.ant_list and len(sense.ant_list) > 0: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
63 |
FOUT.write(" <i>ant: ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
64 |
FOUT.write("; ".join(sense.ant_list)) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
65 |
FOUT.write("</i>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
66 |
if sense.syn_list and len(sense.syn_list) > 0: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
67 |
FOUT.write(" <i>syn: ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
68 |
FOUT.write("; ".join(sense.syn_list)) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
69 |
FOUT.write("</i>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
70 |
if LANGS and len(LANGS) > 1: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
71 |
FOUT.write("<br>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
72 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
73 |
FOUT.write(" ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
74 |
for (lang, tr) in sense.tr_list: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
75 |
tr = tr.replace('\n', ' ') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
76 |
if LANGS is None: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
77 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
78 |
elif lang in LANGS: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
79 |
if len(LANGS) == 1: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
80 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
81 |
else: |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
82 |
FOUT.write('<i style="color: blue;">') |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
83 |
FOUT.write(lang) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
84 |
FOUT.write("</i> ") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
85 |
FOUT.write(tr) |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
86 |
FOUT.write("<br>") |
c9f0064d8661
Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
87 |
FOUT.write("\n") |