Generate HTML form of dictionary for online presents.
--- a/Makefile Fri Mar 03 14:50:57 2017 +0200
+++ b/Makefile Fri Mar 03 16:13:37 2017 +0200
@@ -146,6 +146,8 @@
SRS_TAB_FILES := $(patsubst %.gadict,dist/srs/%.tab.txt,$(GADICT_FILES))
SRS_ANKI_FILES := $(patsubst %.gadict,dist/anki/%.apkg,$(GADICT_FILES))
+DICT_HTML_FILES := $(patsubst %.gadict,dist/html/%.html,$(GADICT_FILES))
+
RST_TMPL_FILE = dist/misc/rst.tmpl
RST_CSS_FILE = www/tmpl/rst.css
@@ -598,6 +600,15 @@
dist/anki/ dist/srs/:
mkdir -p $@
+.PHONY: html
+html: $(DICT_HTML_FILES)
+
+dist/html/%.html: %.gadict py/gadict.py py/gadict_html.py $(FREQLIST_DEP) $(MAKEFILE_LIST) | dist/html/
+ python3 -B py/gadict_html.py $(FREQLIST_OPT) $< $@
+
+dist/html/:
+ mkdir -p $@
+
################################################################
# Documentation targets.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_html.py Fri Mar 03 16:13:37 2017 +0200
@@ -0,0 +1,222 @@
+# -*- coding: utf-8 -*-
+"""HTML format writer"""
+
+import io
+import sys
+import codecs
+import re
+import html
+
+import gadict
+import gadict_freq
+
+
+FINAME = None
+FONAME = None
+LANGS = None
+FREQ_SOURCES = []
+
+# -lang:ru,uk
+ARG_LANG_RE = re.compile("-lang:(.+)")
+# -freq:var:TAG=FILE or -freq:freq:TAG=FILE
+ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)")
+
+look_for_files = False
+for idx in range(1, len(sys.argv)):
+ arg = sys.argv[idx]
+ if arg == "--":
+ look_for_files = True
+ continue
+ if not look_for_files:
+ m = ARG_LANG_RE.match(arg)
+ if m:
+ LANGS = set(m.group(1).split(","))
+ for lang in LANGS:
+ if len(lang) != 2:
+ raise Exception("Incorrect language specification: '{:s}'".format(arg))
+ continue
+ m = ARG_FREQ_RE.match(arg)
+ if m:
+ mode = m.group(1)
+ tag = m.group(2)
+ fname = m.group(3)
+ with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
+ if mode == "var":
+ parser = gadict_freq.WordformParser(stream)
+ elif mode == "freq":
+ parser = gadict_freq.FreqlistParser(stream)
+ else:
+ raise Exception("Unsupported mode: '{:s}'".format(mode))
+ wlist = parser.parse()
+ FREQ_SOURCES.append((tag, set(wlist)))
+ continue
+ if arg.startswith("-"):
+ raise Exception("Unsupported option format: '{:s}'".format(arg))
+ if not FINAME:
+ FINAME = arg
+ continue
+ if not FONAME:
+ FONAME = arg
+ continue
+ raise Exception("Unnecessary argument: '{:s}'".format(arg))
+
+
+FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
+
+PARSER = gadict.Parser()
+try:
+ DOM = PARSER.parse(FIN)
+except gadict.ParseException as ex:
+ sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
+ if __debug__:
+ import traceback
+ traceback.print_exc()
+ exit(1)
+finally:
+ FIN.close()
+
+PRELUDE = DOM[0]
+
+
+if FONAME is None:
+ FOUT = sys.stdout
+else:
+ FOUT = codecs.open(FONAME, "w", "utf-8")
+
+HTML_HEADER = """<!DOCTYPE html>
+<html>
+<head>
+ <meta charset="utf-8">
+ <title>{title}</title>
+<style>
+body {{ max-width: 30em; margin: 0 auto; }}
+.article {{
+ margin: .5em 0;
+ border-bottom: 1px solid yellow;
+}}
+.sense {{ margin-top: .5em; }}
+.head {{ color: black; font-weight: bold; }}
+.pron {{ color: green; }}
+.attr {{ color: magenta; }}
+.pos {{ color: green; font: weight: bold; }}
+.lang {{ color: brown; font: weight: bold; }}
+.freq {{ color: red; }}
+.ex {{ font-style: italic; }}
+</style>
+</head>
+<body>
+"""
+HTML_FOOTER = """</body>
+</html>
+"""
+
+FOUT.write(HTML_HEADER.format(title=PRELUDE.name))
+FOUT.write("<h1>{} dictionary</h1>\n".format(html.escape(PRELUDE.name)))
+# FOUT.write("<a href='{}'>Home page</a>")
+# FOUT.write(" , ".join(PRELUDE.urls))
+FOUT.write("<p>License: ")
+FOUT.write(html.escape(", ".join(PRELUDE.licences)))
+# FOUT.write("</p>\n<p>")
+# FOUT.write(html.escape(PRELUDE.about))
+FOUT.write("</p>\n")
+
+def link(lst):
+ buf = []
+ for el in lst:
+ el = html.escape(el)
+ lnk = "<a href='#{}'>{}</a>".format(el, el)
+ buf.append(lnk)
+ return "; ".join(buf)
+
+for (headwords, translations) in DOM[1:]:
+ identity = headwords[0].headword
+ FOUT.write("<div id='{}' class='article'>\n".format(html.escape(identity)))
+ for hw in headwords:
+ FOUT.write("<div>")
+ FOUT.write("<span class='head'>{}</span>".format(html.escape(hw.headword)))
+ if hw.pron is not None:
+ FOUT.write(" <span class='pron'>[")
+ FOUT.write(html.escape(hw.pron))
+ FOUT.write("]</span>")
+ if len(hw.attrs) > 0:
+ FOUT.write(" <span class='attr'>")
+ l = ["«"+x+"»" for x in hw.attrs]
+ l.sort()
+ FOUT.write(", ".join(l))
+ FOUT.write("</span>")
+ FOUT.write("</div>\n")
+ for sense in translations:
+ if not sense:
+ raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
+ FOUT.write("<div class='sense'>")
+ if sense.pos:
+ FOUT.write("<span class='pos'>«")
+ FOUT.write(html.escape(sense.pos))
+ FOUT.write("»</span> ")
+ need_sep = False
+ if sense.topic_list and len(sense.topic_list) > 0:
+ FOUT.write("<span class='topic'>")
+ FOUT.write(html.escape(", ".join(sense.topic_list)))
+ FOUT.write("</span>")
+ need_sep = True
+ if sense.ant_list and len(sense.ant_list) > 0:
+ FOUT.write(" ant: <span class='ant'>")
+ FOUT.write(link(sense.ant_list))
+ FOUT.write("</span>")
+ need_sep = True
+ if sense.syn_list and len(sense.syn_list) > 0:
+ FOUT.write(" syn: <span class='syn'>")
+ FOUT.write(link(sense.syn_list))
+ FOUT.write("</span>")
+ need_sep = True
+ if sense.hyper_list and len(sense.hyper_list) > 0:
+ FOUT.write(" hyper: <span class='hyper'>")
+ FOUT.write(link(sense.hyper_list))
+ FOUT.write("</span>")
+ need_sep = True
+ if sense.hypo_list and len(sense.hypo_list) > 0:
+ FOUT.write(" hypo: <span class='hypo'>")
+ FOUT.write(link(sense.hypo_list))
+ FOUT.write("</span>")
+ need_sep = True
+ if sense.rel_list and len(sense.rel_list) > 0:
+ FOUT.write(" see: <span class='see'>")
+ FOUT.write(link(sense.rel_list))
+ FOUT.write("</span>")
+ for (lang, tr) in sense.tr_list or []:
+ if LANGS and lang not in LANGS:
+ continue
+ FOUT.write("<div>")
+ if not LANGS or len(LANGS) > 1:
+ FOUT.write("<span class='lang tr'>{}</span> ".format(html.escape(lang)))
+ FOUT.write("<span class='tr'>{}</span>".format(html.escape(tr)))
+ FOUT.write("</div>")
+ for (lang, tr) in sense.glos_list or []:
+ if LANGS and lang not in LANGS:
+ continue
+ FOUT.write("<div>")
+ if not LANGS or len(LANGS) > 1:
+ FOUT.write("<span class='lang glos'>{}</span> ".format(html.escape(lang)))
+ FOUT.write("<span class='glos'>{}</span>".format(html.escape(tr)))
+ FOUT.write("</div>")
+ for (lang, tr) in sense.ex_list or []:
+ if LANGS and lang not in LANGS:
+ continue
+ FOUT.write("<div>")
+ if not LANGS or len(LANGS) > 1:
+ FOUT.write("<span class='lang ex'>{}</span> ".format(html.escape(lang)))
+ FOUT.write("<span class='ex'>{}</span>".format(html.escape(tr)))
+ FOUT.write("</div>")
+ FOUT.write("</div>")
+ freqtags = []
+ for (freqtag, freqset) in FREQ_SOURCES:
+ if identity in freqset:
+ freqtags.append(freqtag)
+ if len(freqtags) > 0:
+ FOUT.write("<p class='freq'>")
+ FOUT.write(html.escape(",".join(freqtags)))
+ FOUT.write("</p>")
+ FOUT.write("</div>")
+
+FOUT.write(HTML_FOOTER)
+FOUT.close()