Add frequency markers to dictd dictionary and Anki cards.
--- a/Makefile Tue Nov 08 18:12:50 2016 +0200
+++ b/Makefile Tue Nov 08 19:01:27 2016 +0200
@@ -395,6 +395,9 @@
################################################################
# Word frequency statistic.
+# For dictd and anki.
+FREQLIST_OPT := -freq:freq:GSL=wordlist/gsl.freq -freq:freq:AWL=wordlist/awl.freq -freq:freq:NGSL=wordlist/ngsl.freq -freq:freq:NAWL=wordlist/nawl.freq
+
FREQ_DEP :=
FREQ_FILTER :=
@@ -482,18 +485,22 @@
# -B suppress __pycache__ dir
-dist/dictd/gadict_en-ru.c5: gadict_en-ru+uk.gadict py/gadict.py py/gadict_c5.py | dist/dictd/
+dist/dictd/gadict_en-ru+uk.c5: gadict_en-ru+uk.gadict py/gadict.py py/gadict_c5.py $(MAKEFILE_LIST) | dist/dictd/
+ python3 -B py/gadict_c5.py $(FREQLIST_OPT) $< $@
+ echo "gadict En-Ru+Uk"> dist/dictd/gadict_en-ru+uk.c5.name
+
+dist/dictd/gadict_en-ru.c5: gadict_en-ru+uk.gadict py/gadict.py py/gadict_c5.py $(MAKEFILE_LIST) | dist/dictd/
python3 -B py/gadict_c5.py $< $@ -lang:ru
echo "gadict En-Ru"> dist/dictd/gadict_en-ru.c5.name
-dist/dictd/gadict_en-uk.c5: gadict_en-ru+uk.gadict py/gadict.py py/gadict_c5.py | dist/dictd/
+dist/dictd/gadict_en-uk.c5: gadict_en-ru+uk.gadict py/gadict.py py/gadict_c5.py $(MAKEFILE_LIST) | dist/dictd/
python3 -B py/gadict_c5.py $< $@ -lang:uk
echo "gadict En-Uk"> dist/dictd/gadict_en-uk.c5.name
-dist/dictd/gadict_voa.c5: gadict_voa.gadict py/gadict.py py/gadict_c5.py | dist/dictd/
+dist/dictd/gadict_voa.c5: gadict_voa.gadict py/gadict.py py/gadict_c5.py $(MAKEFILE_LIST) | dist/dictd/
python3 -B py/gadict_c5.py $< $@ -lang:en
-dist/dictd/%.c5: %.gadict py/gadict.py py/gadict_c5.py | dist/dictd/
+dist/dictd/%.c5: %.gadict py/gadict.py py/gadict_c5.py $(MAKEFILE_LIST) | dist/dictd/
python3 -B py/gadict_c5.py $< $@
dist/dictd/:
@@ -506,10 +513,10 @@
anki: $(SRS_ANKI_FILES)
dist/srs/%.apkg: %.gadict %.del py/gadict.py py/gadict_srs_anki.py $(MAKEFILE_LIST) | dist/srs/
- PYTHONPATH=/usr/share/anki: python -B py/gadict_srs_anki.py $< $@
+ PYTHONPATH=/usr/share/anki: python -B py/gadict_srs_anki.py $(FREQLIST_OPT) $< $@
dist/srs/%.apkg: %.gadict py/gadict.py py/gadict_srs_anki.py $(MAKEFILE_LIST) | dist/srs/
- PYTHONPATH=/usr/share/anki: python -B py/gadict_srs_anki.py $< $@
+ PYTHONPATH=/usr/share/anki: python -B py/gadict_srs_anki.py $(FREQLIST_OPT) $< $@
dist/srs/gadict_en-ru+uk.tab.txt: gadict_en-ru+uk.gadict py/gadict.py py/gadict_srs_tab.py $(MAKEFILE_LIST) | dist/srs/
python3 -B py/gadict_srs_tab.py $< $@ -lang:ru,uk
--- a/py/gadict_c5.py Tue Nov 08 18:12:50 2016 +0200
+++ b/py/gadict_c5.py Tue Nov 08 19:01:27 2016 +0200
@@ -7,11 +7,13 @@
import regex
import gadict
+import gadict_freq
FINAME = None
FONAME = None
LANGS = None
+FREQ_SOURCES = []
# -lang:ru,uk
ARG_LANG_RE = regex.compile("-lang:(.+)")
@@ -34,10 +36,18 @@
continue
m = ARG_FREQ_RE.match(arg)
if m:
- LANGS = set(arg.split(","))
- for lang in LANGS:
- if len(lang) != 2:
- raise Exception("Incorrect language specification: '{:s}'".format(arg))
+ mode = m.group(1)
+ tag = m.group(2)
+ fname = m.group(3)
+ with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
+ if mode == "var":
+ parser = gadict_freq.HeadVarParser(stream)
+ elif mode == "freq":
+ parser = gadict_freq.FreqlistParser(stream)
+ else:
+ raise Exception("Unsupported mode: '{:s}'".format(mode))
+ wlist = parser.parse()
+ FREQ_SOURCES.append((tag, set(wlist)))
continue
if arg.startswith("-"):
raise Exception("Unsupported option format: '{:s}'".format(arg))
@@ -98,6 +108,7 @@
for (headwords, translations) in DOM[1:]:
+ identity = headwords[0].headword
FOUT.write("_____\n\n")
title = "; ".join([h.headword for h in headwords])
FOUT.write(title)
@@ -180,3 +191,10 @@
FOUT.write("⇒ ")
FOUT.write(tr)
FOUT.write("\n")
+ freqtags = []
+ for (freqtag, freqset) in FREQ_SOURCES:
+ if identity in freqset:
+ freqtags.append(freqtag)
+ if len(freqtags) > 0:
+ FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags]))
+ FOUT.write("\n")
--- a/py/gadict_freq.py Tue Nov 08 18:12:50 2016 +0200
+++ b/py/gadict_freq.py Tue Nov 08 19:01:27 2016 +0200
@@ -68,7 +68,7 @@
self.lineno += 1
m = self.FREQ_RE.match(line)
if not m:
- raise Exception("Line {:d} is not in NUM WORD format\n".format(self.lineno, line))
+ raise Exception("Line '{:s}' #{:d} is not in NUM WORD format\n".format(line, self.lineno))
headword = m.group(2).strip().lower()
wlist.append(headword)
return wlist
--- a/py/gadict_srs_anki.py Tue Nov 08 18:12:50 2016 +0200
+++ b/py/gadict_srs_anki.py Tue Nov 08 19:01:27 2016 +0200
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
+"""Anki card writer"""
import os
import io
@@ -10,11 +11,13 @@
import regex
import gadict
+import gadict_freq
FINAME = None
FONAME = None
LANGS = None
+FREQ_SOURCES = []
# -lang:ru,uk
ARG_LANG_RE = regex.compile("-lang:(.+)")
@@ -37,10 +40,18 @@
continue
m = ARG_FREQ_RE.match(arg)
if m:
- LANGS = set(arg.split(","))
- for lang in LANGS:
- if len(lang) != 2:
- raise Exception("Incorrect language specification: '{:s}'".format(arg))
+ mode = m.group(1)
+ tag = m.group(2)
+ fname = m.group(3)
+ with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
+ if mode == "var":
+ parser = gadict_freq.HeadVarParser(stream)
+ elif mode == "freq":
+ parser = gadict_freq.FreqlistParser(stream)
+ else:
+ raise Exception("Unsupported mode: '{:s}'".format(mode))
+ wlist = parser.parse()
+ FREQ_SOURCES.append((tag, set(wlist)))
continue
if arg.startswith("-"):
raise Exception("Unsupported option format: '{:s}'".format(arg))
@@ -166,6 +177,10 @@
span.glos {
font-size: .95em;
}
+.freq {
+ color: red;
+ font-weight: bold;
+}
.del {
color: red;
font-weight: bold;
@@ -294,6 +309,14 @@
for (headwords, translations) in DOM[1:]:
identity = headwords[0].headword
+ freqtags = []
+ for (freqtag, freqset) in FREQ_SOURCES:
+ if identity in freqset:
+ freqtags.append(freqtag)
+ freqmsg = None
+ if len(freqtags) > 0:
+ freqmsg = ",".join(freqtags)
+ freqmsg = "<div class='freq'>{:s}</div>".format(freqmsg)
buf = []
v1, v2, v3 = (None, None, None)
singular, plural = (None, None)
@@ -323,6 +346,8 @@
if 'pl' in hw.attrs:
plural = (hw.headword, hw.pron)
buf.append("</div>")
+ if freqmsg:
+ buf.append(freqmsg)
direct_from = "".join(buf)
buf = []
for sense in translations: