Generate HTML form of dictionary for online presents.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Fri, 03 Mar 2017 16:13:37 +0200
changeset 776 d57c28843156
parent 775 6aa817943828
child 777 6c191fc06fce
Generate HTML form of dictionary for online presents.
Makefile
py/gadict_html.py
--- a/Makefile	Fri Mar 03 14:50:57 2017 +0200
+++ b/Makefile	Fri Mar 03 16:13:37 2017 +0200
@@ -146,6 +146,8 @@
 SRS_TAB_FILES := $(patsubst %.gadict,dist/srs/%.tab.txt,$(GADICT_FILES))
 SRS_ANKI_FILES := $(patsubst %.gadict,dist/anki/%.apkg,$(GADICT_FILES))
 
+DICT_HTML_FILES := $(patsubst %.gadict,dist/html/%.html,$(GADICT_FILES))
+
 RST_TMPL_FILE = dist/misc/rst.tmpl
 RST_CSS_FILE = www/tmpl/rst.css
 
@@ -598,6 +600,15 @@
 dist/anki/ dist/srs/:
 	mkdir -p $@
 
+.PHONY: html
+html: $(DICT_HTML_FILES)
+
+dist/html/%.html: %.gadict py/gadict.py py/gadict_html.py $(FREQLIST_DEP) $(MAKEFILE_LIST) | dist/html/
+	python3 -B py/gadict_html.py $(FREQLIST_OPT) $< $@
+
+dist/html/:
+	mkdir -p $@
+
 ################################################################
 # Documentation targets.
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_html.py	Fri Mar 03 16:13:37 2017 +0200
@@ -0,0 +1,222 @@
+# -*- coding: utf-8 -*-
+"""HTML format writer"""
+
+import io
+import sys
+import codecs
+import re
+import html
+
+import gadict
+import gadict_freq
+
+
+FINAME = None
+FONAME = None
+LANGS = None
+FREQ_SOURCES = []
+
+# -lang:ru,uk
+ARG_LANG_RE = re.compile("-lang:(.+)")
+# -freq:var:TAG=FILE or -freq:freq:TAG=FILE
+ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)")
+
+look_for_files = False
+for idx in range(1, len(sys.argv)):
+    arg = sys.argv[idx]
+    if arg == "--":
+        look_for_files = True
+        continue
+    if not look_for_files:
+        m = ARG_LANG_RE.match(arg)
+        if m:
+            LANGS = set(m.group(1).split(","))
+            for lang in LANGS:
+                if len(lang) != 2:
+                    raise Exception("Incorrect language specification: '{:s}'".format(arg))
+            continue
+        m = ARG_FREQ_RE.match(arg)
+        if m:
+            mode = m.group(1)
+            tag = m.group(2)
+            fname = m.group(3)
+            with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
+                if mode == "var":
+                    parser = gadict_freq.WordformParser(stream)
+                elif mode == "freq":
+                    parser = gadict_freq.FreqlistParser(stream)
+                else:
+                    raise Exception("Unsupported mode: '{:s}'".format(mode))
+                wlist = parser.parse()
+            FREQ_SOURCES.append((tag, set(wlist)))
+            continue
+        if arg.startswith("-"):
+            raise Exception("Unsupported option format: '{:s}'".format(arg))
+    if not FINAME:
+        FINAME = arg
+        continue
+    if not FONAME:
+        FONAME = arg
+        continue
+    raise Exception("Unnecessary argument: '{:s}'".format(arg))
+
+
+FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
+
+PARSER = gadict.Parser()
+try:
+    DOM = PARSER.parse(FIN)
+except gadict.ParseException as ex:
+    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
+    if __debug__:
+        import traceback
+        traceback.print_exc()
+    exit(1)
+finally:
+    FIN.close()
+
+PRELUDE = DOM[0]
+
+
+if FONAME is None:
+    FOUT = sys.stdout
+else:
+    FOUT = codecs.open(FONAME, "w", "utf-8")
+
+HTML_HEADER = """<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <title>{title}</title>
+<style>
+body {{ max-width: 30em; margin: 0 auto; }}
+.article {{
+  margin: .5em 0;
+  border-bottom: 1px solid yellow;
+}}
+.sense {{ margin-top: .5em; }}
+.head {{ color: black; font-weight: bold; }}
+.pron {{ color: green; }}
+.attr {{ color: magenta; }}
+.pos {{ color: green; font: weight: bold; }}
+.lang {{ color: brown; font: weight: bold; }}
+.freq {{ color: red; }}
+.ex {{ font-style: italic; }}
+</style>
+</head>
+<body>
+"""
+HTML_FOOTER = """</body>
+</html>
+"""
+
+FOUT.write(HTML_HEADER.format(title=PRELUDE.name))
+FOUT.write("<h1>{} dictionary</h1>\n".format(html.escape(PRELUDE.name)))
+# FOUT.write("<a href='{}'>Home page</a>")
+# FOUT.write(" , ".join(PRELUDE.urls))
+FOUT.write("<p>License: ")
+FOUT.write(html.escape(", ".join(PRELUDE.licences)))
+# FOUT.write("</p>\n<p>")
+# FOUT.write(html.escape(PRELUDE.about))
+FOUT.write("</p>\n")
+
+def link(lst):
+    buf = []
+    for el in lst:
+        el = html.escape(el)
+        lnk = "<a href='#{}'>{}</a>".format(el, el)
+        buf.append(lnk)
+    return "; ".join(buf)
+
+for (headwords, translations) in DOM[1:]:
+    identity = headwords[0].headword
+    FOUT.write("<div id='{}' class='article'>\n".format(html.escape(identity)))
+    for hw in headwords:
+        FOUT.write("<div>")
+        FOUT.write("<span class='head'>{}</span>".format(html.escape(hw.headword)))
+        if hw.pron is not None:
+            FOUT.write(" <span class='pron'>[")
+            FOUT.write(html.escape(hw.pron))
+            FOUT.write("]</span>")
+        if len(hw.attrs) > 0:
+            FOUT.write(" <span class='attr'>")
+            l = ["«"+x+"»" for x in hw.attrs]
+            l.sort()
+            FOUT.write(", ".join(l))
+            FOUT.write("</span>")
+        FOUT.write("</div>\n")
+    for sense in translations:
+        if not sense:
+            raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
+        FOUT.write("<div class='sense'>")
+        if sense.pos:
+            FOUT.write("<span class='pos'>«")
+            FOUT.write(html.escape(sense.pos))
+            FOUT.write("»</span> ")
+            need_sep = False
+            if sense.topic_list and len(sense.topic_list) > 0:
+                FOUT.write("<span class='topic'>")
+                FOUT.write(html.escape(", ".join(sense.topic_list)))
+                FOUT.write("</span>")
+                need_sep = True
+            if sense.ant_list and len(sense.ant_list) > 0:
+                FOUT.write(" ant: <span class='ant'>")
+                FOUT.write(link(sense.ant_list))
+                FOUT.write("</span>")
+                need_sep = True
+            if sense.syn_list and len(sense.syn_list) > 0:
+                FOUT.write(" syn: <span class='syn'>")
+                FOUT.write(link(sense.syn_list))
+                FOUT.write("</span>")
+                need_sep = True
+            if sense.hyper_list and len(sense.hyper_list) > 0:
+                FOUT.write(" hyper: <span class='hyper'>")
+                FOUT.write(link(sense.hyper_list))
+                FOUT.write("</span>")
+                need_sep = True
+            if sense.hypo_list and len(sense.hypo_list) > 0:
+                FOUT.write(" hypo: <span class='hypo'>")
+                FOUT.write(link(sense.hypo_list))
+                FOUT.write("</span>")
+                need_sep = True
+            if sense.rel_list and len(sense.rel_list) > 0:
+                FOUT.write(" see: <span class='see'>")
+                FOUT.write(link(sense.rel_list))
+                FOUT.write("</span>")
+        for (lang, tr) in sense.tr_list or []:
+            if LANGS and lang not in LANGS:
+                continue
+            FOUT.write("<div>")
+            if not LANGS or len(LANGS) > 1:
+                FOUT.write("<span class='lang tr'>{}</span> ".format(html.escape(lang)))
+            FOUT.write("<span class='tr'>{}</span>".format(html.escape(tr)))
+            FOUT.write("</div>")
+        for (lang, tr) in sense.glos_list or []:
+            if LANGS and lang not in LANGS:
+                continue
+            FOUT.write("<div>")
+            if not LANGS or len(LANGS) > 1:
+                FOUT.write("<span class='lang glos'>{}</span> ".format(html.escape(lang)))
+            FOUT.write("<span class='glos'>{}</span>".format(html.escape(tr)))
+            FOUT.write("</div>")
+        for (lang, tr) in sense.ex_list or []:
+            if LANGS and lang not in LANGS:
+                continue
+            FOUT.write("<div>")
+            if not LANGS or len(LANGS) > 1:
+                FOUT.write("<span class='lang ex'>{}</span> ".format(html.escape(lang)))
+            FOUT.write("<span class='ex'>{}</span>".format(html.escape(tr)))
+            FOUT.write("</div>")
+        FOUT.write("</div>")
+    freqtags = []
+    for (freqtag, freqset) in FREQ_SOURCES:
+        if identity in freqset:
+            freqtags.append(freqtag)
+    if len(freqtags) > 0:
+        FOUT.write("<p class='freq'>")
+        FOUT.write(html.escape(",".join(freqtags)))
+        FOUT.write("</p>")
+    FOUT.write("</div>")
+
+FOUT.write(HTML_FOOTER)
+FOUT.close()