Include statistic on wordlist coverage.
--- a/Makefile Sat Mar 11 21:41:57 2017 +0200
+++ b/Makefile Sat Mar 11 22:44:11 2017 +0200
@@ -156,6 +156,7 @@
INDEX_HTML_FILE := dist/www/index.html
+STAT_MISSING_FILE := dist/misc/MISSING.rst
STAT_RST_FILE := dist/misc/STAT.rst
STAT_HTML_FILE := dist/www/STAT.html
@@ -508,14 +509,25 @@
.PHONY: missing-stat
missing-stat: dist/wordlist/gadict.list $(FREQ_DEP) $(BUILD_SCRIPTS)
- @echo ====== Missing words stat =======
- @printf "%4s: %4s / %4s\n" VOA `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +b:dist/wordlist/voa.list | wc -l` `wc -l <dist/wordlist/voa.list`
- @printf "%4s: %4s / %4s\n" GSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +f:wordlist/gsl.freq | wc -l` `wc -l <wordlist/gsl.freq`
- @printf "%4s: %4s / %4s\n" AWL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +f:wordlist/awl.freq | wc -l` `wc -l <wordlist/awl.freq`
- @printf "%4s: %4s / %4s\n" NGSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +f:wordlist/ngsl.freq | wc -l` `wc -l <wordlist/ngsl.freq`
- @printf "%4s: %4s / %4s\n" NAWL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +f:wordlist/nawl.freq | wc -l` `wc -l <wordlist/nawl.freq`
- @printf "%4s: %4s / %4s\n" BSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +b:wordlist/bsl.var | wc -l` `wc -l <wordlist/bsl.var`
- @printf "%4s: %4s / %4s\n" TSL `python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list +b:wordlist/tsl.var | wc -l` `wc -l <wordlist/tsl.var`
+ @ { \
+stat() { \
+ lack=`python3 -B py/gadict_freq.py -b:dist/wordlist/gadict.list $$2:$$3 | wc -l`; \
+ total=`wc -l <$$3`; \
+ cover=$$((total - lack)); \
+ printf " %4s %5s %5s %4s\n" $$1 $$cover $$total $$lack; \
+}; \
+echo " ==== ===== ===== ===="; \
+echo " List Cover Total Lack"; \
+echo " ==== ===== ===== ===="; \
+stat VOA +b dist/wordlist/voa.list; \
+stat GSL +f wordlist/gsl.freq; \
+stat AWL +f wordlist/awl.freq; \
+stat NGSL +f wordlist/ngsl.freq; \
+stat NAWL +f wordlist/nawl.freq; \
+stat BSL +b wordlist/bsl.var; \
+stat TSL +b wordlist/tsl.var; \
+echo " ==== ===== ===== ===="; \
+} $(REDIR)
dist/wordlist/%.list: %.gadict py/gadict_headwords.py $(BUILD_SCRIPTS) | dist/wordlist/
python3 -B py/gadict_headwords.py $< $@
@@ -666,7 +678,7 @@
$(STAT_HTML_FILE): $(STAT_RST_FILE) $(RST_CSS_FILE) $(RST_TMPL_FILE) | $(dir $(STAT_HTML_FILE))
$(RST2HTML) $(RST2HTML_FLAGS) --stylesheet=$(RST_CSS_FILE) --template=$(RST_TMPL_FILE) $< $@
-$(STAT_RST_FILE): $(GADICT_FILES) $(BUILD_SCRIPTS) | $(dir $(STAT_RST_FILE))
+$(STAT_RST_FILE): $(GADICT_FILES) $(STAT_MISSING_FILE) $(BUILD_SCRIPTS) | $(dir $(STAT_RST_FILE))
{ \
echo '==========================='; \
echo ' gadict project statistics'; \
@@ -674,9 +686,9 @@
echo; \
echo '.. class:: right'; \
echo; \
-echo '=================================== ======== ======== ========'; \
-echo ' Dictionary Articles Meanings Examples'; \
-echo '=================================== ======== ======== ========'; \
+echo '==================== ======== ======== ========'; \
+echo ' Dictionary Articles Meanings Examples'; \
+echo '==================== ======== ======== ========'; \
total=0; \
for dic in $(GADICT_FILES); do \
awk '\
@@ -684,12 +696,20 @@
/^__$$/{ art += 1 };\
/^(en|ru|uk)> /{ ex += 1 };\
/^(n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|contr|abbr|prefix)$$/{ tr += 1 };'\
-"END {printf \"%35s %8s %8s %8s\n\", \"$${dic%.gadict}\", art, tr, ex};"\
+"END {printf \"%20s %8s %8s %8s\n\", \"$${dic%.gadict}\", art, tr, ex};"\
<$$dic; \
done; \
-echo '=================================== ======== ======== ========'; \
+echo '==================== ======== ======== ========'; \
+echo; \
+echo '.. table:: ``gadict`` coverage of famous word lists'; \
+echo ' :class: right'; \
+echo; \
+cat $(STAT_MISSING_FILE); \
} >$@
+$(STAT_MISSING_FILE): REDIR := >$(STAT_MISSING_FILE)
+$(STAT_MISSING_FILE): missing-stat | $(dir $(STAT_MISSING_FILE))
+
dist/misc/:
mkdir -p $@