|
1 # -*- coding: utf-8 -*- |
|
2 """Space repetition TAB format writer""" |
|
3 |
|
4 import io |
|
5 import sys |
|
6 import codecs |
|
7 |
|
8 import gadict |
|
9 |
|
10 |
|
11 FINAME = None |
|
12 FONAME = None |
|
13 if len(sys.argv) >= 2: |
|
14 FINAME = sys.argv[1] |
|
15 if len(sys.argv) >= 3: |
|
16 FONAME = sys.argv[2] |
|
17 LANGS = None |
|
18 if len(sys.argv) >= 4: |
|
19 LANGS = set(sys.argv[3].split(",")) |
|
20 |
|
21 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") |
|
22 |
|
23 PARSER = gadict.Parser() |
|
24 try: |
|
25 DOM = PARSER.parse(FIN) |
|
26 except gadict.ParseException as ex: |
|
27 sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) |
|
28 if __debug__: |
|
29 import traceback |
|
30 traceback.print_exc() |
|
31 exit(1) |
|
32 finally: |
|
33 FIN.close() |
|
34 |
|
35 |
|
36 if FONAME is None: |
|
37 FOUT = sys.stdout |
|
38 else: |
|
39 FOUT = codecs.open(FONAME, "w", "utf-8") |
|
40 |
|
41 def attr_key(item): |
|
42 (word, (pron, attrs)) = item |
|
43 if not attrs: |
|
44 return "zzz" |
|
45 best_vattr = None |
|
46 for attr in attrs: |
|
47 if attr in ["v1", "v2", "v3"]: |
|
48 if not best_vattr or (best_vattr and best_vattr > attr): |
|
49 best_vattr = attr |
|
50 if best_vattr: |
|
51 return best_vattr |
|
52 for attr in attrs: # single/plural |
|
53 if attr in ["s"]: |
|
54 return attr |
|
55 for attr in attrs: # comparative/superlative |
|
56 if attr in ["comp"]: |
|
57 return attr |
|
58 for attr in attrs: # Am/Br/Au |
|
59 if attr in ["Am"]: |
|
60 return attr |
|
61 return "zzz" |
|
62 |
|
63 for idx in range(1, len(DOM)): |
|
64 article = DOM[idx] |
|
65 defs = article[0].items() |
|
66 defs = sorted(defs, key = attr_key) |
|
67 lines = [] |
|
68 for (word, (pron, attrs)) in defs: |
|
69 line = "<b>"+word+"</b>" |
|
70 if pron: |
|
71 line += " ["+pron+"]" |
|
72 if len(attrs) > 0: |
|
73 attrs = [" «"+x+"»" for x in attrs] |
|
74 attrs.sort() |
|
75 line += ",".join(attrs) |
|
76 lines.append(line) |
|
77 question = "<br>".join(lines) |
|
78 FOUT.write(question) |
|
79 FOUT.write("\t") |
|
80 for sense in article[1]: |
|
81 if not sense: |
|
82 raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__()) |
|
83 if sense.pos: |
|
84 FOUT.write('<i style="color: green;">') |
|
85 FOUT.write(sense.pos) |
|
86 FOUT.write('</i>') |
|
87 if sense.ant_list and len(sense.ant_list) > 0: |
|
88 FOUT.write(" <i>ant: ") |
|
89 FOUT.write("; ".join(sense.ant_list)) |
|
90 FOUT.write("</i>") |
|
91 if sense.syn_list and len(sense.syn_list) > 0: |
|
92 FOUT.write(" <i>syn: ") |
|
93 FOUT.write("; ".join(sense.syn_list)) |
|
94 FOUT.write("</i>") |
|
95 if LANGS and len(LANGS) > 1: |
|
96 FOUT.write("<br>") |
|
97 else: |
|
98 FOUT.write(" ") |
|
99 for (lang, tr) in sense.tr_list: |
|
100 tr = tr.replace('\n', ' ') |
|
101 if LANGS is None: |
|
102 FOUT.write(tr) |
|
103 elif lang in LANGS: |
|
104 if len(LANGS) == 1: |
|
105 FOUT.write(tr) |
|
106 else: |
|
107 FOUT.write('<i style="color: blue;">') |
|
108 FOUT.write(lang) |
|
109 FOUT.write("</i> ") |
|
110 FOUT.write(tr) |
|
111 FOUT.write("<br>") |
|
112 FOUT.write("\n") |