|
1 # -*- coding: utf-8 -*- |
|
2 """HTML format writer""" |
|
3 |
|
4 import io |
|
5 import sys |
|
6 import codecs |
|
7 import re |
|
8 import html |
|
9 |
|
10 import gadict |
|
11 import gadict_freq |
|
12 |
|
13 |
|
14 FINAME = None |
|
15 FONAME = None |
|
16 LANGS = None |
|
17 FREQ_SOURCES = [] |
|
18 |
|
19 # -lang:ru,uk |
|
20 ARG_LANG_RE = re.compile("-lang:(.+)") |
|
21 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE |
|
22 ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)") |
|
23 |
|
24 look_for_files = False |
|
25 for idx in range(1, len(sys.argv)): |
|
26 arg = sys.argv[idx] |
|
27 if arg == "--": |
|
28 look_for_files = True |
|
29 continue |
|
30 if not look_for_files: |
|
31 m = ARG_LANG_RE.match(arg) |
|
32 if m: |
|
33 LANGS = set(m.group(1).split(",")) |
|
34 for lang in LANGS: |
|
35 if len(lang) != 2: |
|
36 raise Exception("Incorrect language specification: '{:s}'".format(arg)) |
|
37 continue |
|
38 m = ARG_FREQ_RE.match(arg) |
|
39 if m: |
|
40 mode = m.group(1) |
|
41 tag = m.group(2) |
|
42 fname = m.group(3) |
|
43 with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: |
|
44 if mode == "var": |
|
45 parser = gadict_freq.WordformParser(stream) |
|
46 elif mode == "freq": |
|
47 parser = gadict_freq.FreqlistParser(stream) |
|
48 else: |
|
49 raise Exception("Unsupported mode: '{:s}'".format(mode)) |
|
50 wlist = parser.parse() |
|
51 FREQ_SOURCES.append((tag, set(wlist))) |
|
52 continue |
|
53 if arg.startswith("-"): |
|
54 raise Exception("Unsupported option format: '{:s}'".format(arg)) |
|
55 if not FINAME: |
|
56 FINAME = arg |
|
57 continue |
|
58 if not FONAME: |
|
59 FONAME = arg |
|
60 continue |
|
61 raise Exception("Unnecessary argument: '{:s}'".format(arg)) |
|
62 |
|
63 |
|
64 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") |
|
65 |
|
66 PARSER = gadict.Parser() |
|
67 try: |
|
68 DOM = PARSER.parse(FIN) |
|
69 except gadict.ParseException as ex: |
|
70 sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) |
|
71 if __debug__: |
|
72 import traceback |
|
73 traceback.print_exc() |
|
74 exit(1) |
|
75 finally: |
|
76 FIN.close() |
|
77 |
|
78 PRELUDE = DOM[0] |
|
79 |
|
80 |
|
81 if FONAME is None: |
|
82 FOUT = sys.stdout |
|
83 else: |
|
84 FOUT = codecs.open(FONAME, "w", "utf-8") |
|
85 |
|
86 HTML_HEADER = """<!DOCTYPE html> |
|
87 <html> |
|
88 <head> |
|
89 <meta charset="utf-8"> |
|
90 <title>{title}</title> |
|
91 <style> |
|
92 body {{ max-width: 30em; margin: 0 auto; }} |
|
93 .article {{ |
|
94 margin: .5em 0; |
|
95 border-bottom: 1px solid yellow; |
|
96 }} |
|
97 .sense {{ margin-top: .5em; }} |
|
98 .head {{ color: black; font-weight: bold; }} |
|
99 .pron {{ color: green; }} |
|
100 .attr {{ color: magenta; }} |
|
101 .pos {{ color: green; font: weight: bold; }} |
|
102 .lang {{ color: brown; font: weight: bold; }} |
|
103 .freq {{ color: red; }} |
|
104 .ex {{ font-style: italic; }} |
|
105 </style> |
|
106 </head> |
|
107 <body> |
|
108 """ |
|
109 HTML_FOOTER = """</body> |
|
110 </html> |
|
111 """ |
|
112 |
|
113 FOUT.write(HTML_HEADER.format(title=PRELUDE.name)) |
|
114 FOUT.write("<h1>{} dictionary</h1>\n".format(html.escape(PRELUDE.name))) |
|
115 # FOUT.write("<a href='{}'>Home page</a>") |
|
116 # FOUT.write(" , ".join(PRELUDE.urls)) |
|
117 FOUT.write("<p>License: ") |
|
118 FOUT.write(html.escape(", ".join(PRELUDE.licences))) |
|
119 # FOUT.write("</p>\n<p>") |
|
120 # FOUT.write(html.escape(PRELUDE.about)) |
|
121 FOUT.write("</p>\n") |
|
122 |
|
123 def link(lst): |
|
124 buf = [] |
|
125 for el in lst: |
|
126 el = html.escape(el) |
|
127 lnk = "<a href='#{}'>{}</a>".format(el, el) |
|
128 buf.append(lnk) |
|
129 return "; ".join(buf) |
|
130 |
|
131 for (headwords, translations) in DOM[1:]: |
|
132 identity = headwords[0].headword |
|
133 FOUT.write("<div id='{}' class='article'>\n".format(html.escape(identity))) |
|
134 for hw in headwords: |
|
135 FOUT.write("<div>") |
|
136 FOUT.write("<span class='head'>{}</span>".format(html.escape(hw.headword))) |
|
137 if hw.pron is not None: |
|
138 FOUT.write(" <span class='pron'>[") |
|
139 FOUT.write(html.escape(hw.pron)) |
|
140 FOUT.write("]</span>") |
|
141 if len(hw.attrs) > 0: |
|
142 FOUT.write(" <span class='attr'>") |
|
143 l = ["«"+x+"»" for x in hw.attrs] |
|
144 l.sort() |
|
145 FOUT.write(", ".join(l)) |
|
146 FOUT.write("</span>") |
|
147 FOUT.write("</div>\n") |
|
148 for sense in translations: |
|
149 if not sense: |
|
150 raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__()) |
|
151 FOUT.write("<div class='sense'>") |
|
152 if sense.pos: |
|
153 FOUT.write("<span class='pos'>«") |
|
154 FOUT.write(html.escape(sense.pos)) |
|
155 FOUT.write("»</span> ") |
|
156 need_sep = False |
|
157 if sense.topic_list and len(sense.topic_list) > 0: |
|
158 FOUT.write("<span class='topic'>") |
|
159 FOUT.write(html.escape(", ".join(sense.topic_list))) |
|
160 FOUT.write("</span>") |
|
161 need_sep = True |
|
162 if sense.ant_list and len(sense.ant_list) > 0: |
|
163 FOUT.write(" ant: <span class='ant'>") |
|
164 FOUT.write(link(sense.ant_list)) |
|
165 FOUT.write("</span>") |
|
166 need_sep = True |
|
167 if sense.syn_list and len(sense.syn_list) > 0: |
|
168 FOUT.write(" syn: <span class='syn'>") |
|
169 FOUT.write(link(sense.syn_list)) |
|
170 FOUT.write("</span>") |
|
171 need_sep = True |
|
172 if sense.hyper_list and len(sense.hyper_list) > 0: |
|
173 FOUT.write(" hyper: <span class='hyper'>") |
|
174 FOUT.write(link(sense.hyper_list)) |
|
175 FOUT.write("</span>") |
|
176 need_sep = True |
|
177 if sense.hypo_list and len(sense.hypo_list) > 0: |
|
178 FOUT.write(" hypo: <span class='hypo'>") |
|
179 FOUT.write(link(sense.hypo_list)) |
|
180 FOUT.write("</span>") |
|
181 need_sep = True |
|
182 if sense.rel_list and len(sense.rel_list) > 0: |
|
183 FOUT.write(" see: <span class='see'>") |
|
184 FOUT.write(link(sense.rel_list)) |
|
185 FOUT.write("</span>") |
|
186 for (lang, tr) in sense.tr_list or []: |
|
187 if LANGS and lang not in LANGS: |
|
188 continue |
|
189 FOUT.write("<div>") |
|
190 if not LANGS or len(LANGS) > 1: |
|
191 FOUT.write("<span class='lang tr'>{}</span> ".format(html.escape(lang))) |
|
192 FOUT.write("<span class='tr'>{}</span>".format(html.escape(tr))) |
|
193 FOUT.write("</div>") |
|
194 for (lang, tr) in sense.glos_list or []: |
|
195 if LANGS and lang not in LANGS: |
|
196 continue |
|
197 FOUT.write("<div>") |
|
198 if not LANGS or len(LANGS) > 1: |
|
199 FOUT.write("<span class='lang glos'>{}</span> ".format(html.escape(lang))) |
|
200 FOUT.write("<span class='glos'>{}</span>".format(html.escape(tr))) |
|
201 FOUT.write("</div>") |
|
202 for (lang, tr) in sense.ex_list or []: |
|
203 if LANGS and lang not in LANGS: |
|
204 continue |
|
205 FOUT.write("<div>") |
|
206 if not LANGS or len(LANGS) > 1: |
|
207 FOUT.write("<span class='lang ex'>{}</span> ".format(html.escape(lang))) |
|
208 FOUT.write("<span class='ex'>{}</span>".format(html.escape(tr))) |
|
209 FOUT.write("</div>") |
|
210 FOUT.write("</div>") |
|
211 freqtags = [] |
|
212 for (freqtag, freqset) in FREQ_SOURCES: |
|
213 if identity in freqset: |
|
214 freqtags.append(freqtag) |
|
215 if len(freqtags) > 0: |
|
216 FOUT.write("<p class='freq'>") |
|
217 FOUT.write(html.escape(",".join(freqtags))) |
|
218 FOUT.write("</p>") |
|
219 FOUT.write("</div>") |
|
220 |
|
221 FOUT.write(HTML_FOOTER) |
|
222 FOUT.close() |