diff -r 6aa817943828 -r d57c28843156 py/gadict_html.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/py/gadict_html.py Fri Mar 03 16:13:37 2017 +0200 @@ -0,0 +1,222 @@ +# -*- coding: utf-8 -*- +"""HTML format writer""" + +import io +import sys +import codecs +import re +import html + +import gadict +import gadict_freq + + +FINAME = None +FONAME = None +LANGS = None +FREQ_SOURCES = [] + +# -lang:ru,uk +ARG_LANG_RE = re.compile("-lang:(.+)") +# -freq:var:TAG=FILE or -freq:freq:TAG=FILE +ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)") + +look_for_files = False +for idx in range(1, len(sys.argv)): + arg = sys.argv[idx] + if arg == "--": + look_for_files = True + continue + if not look_for_files: + m = ARG_LANG_RE.match(arg) + if m: + LANGS = set(m.group(1).split(",")) + for lang in LANGS: + if len(lang) != 2: + raise Exception("Incorrect language specification: '{:s}'".format(arg)) + continue + m = ARG_FREQ_RE.match(arg) + if m: + mode = m.group(1) + tag = m.group(2) + fname = m.group(3) + with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: + if mode == "var": + parser = gadict_freq.WordformParser(stream) + elif mode == "freq": + parser = gadict_freq.FreqlistParser(stream) + else: + raise Exception("Unsupported mode: '{:s}'".format(mode)) + wlist = parser.parse() + FREQ_SOURCES.append((tag, set(wlist))) + continue + if arg.startswith("-"): + raise Exception("Unsupported option format: '{:s}'".format(arg)) + if not FINAME: + FINAME = arg + continue + if not FONAME: + FONAME = arg + continue + raise Exception("Unnecessary argument: '{:s}'".format(arg)) + + +FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") + +PARSER = gadict.Parser() +try: + DOM = PARSER.parse(FIN) +except gadict.ParseException as ex: + sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex))) + if __debug__: + import traceback + traceback.print_exc() + exit(1) +finally: + FIN.close() + +PRELUDE = DOM[0] + + +if FONAME is None: + FOUT = sys.stdout +else: + FOUT = codecs.open(FONAME, "w", "utf-8") + +HTML_HEADER = """ + +
+ +License: ") +FOUT.write(html.escape(", ".join(PRELUDE.licences))) +# FOUT.write("
\n") +# FOUT.write(html.escape(PRELUDE.about)) +FOUT.write("
\n") + +def link(lst): + buf = [] + for el in lst: + el = html.escape(el) + lnk = "{}".format(el, el) + buf.append(lnk) + return "; ".join(buf) + +for (headwords, translations) in DOM[1:]: + identity = headwords[0].headword + FOUT.write("") + FOUT.write(html.escape(",".join(freqtags))) + FOUT.write("
") + FOUT.write("