Add spell checker based on hunspell.
--- a/Makefile Thu Nov 10 14:51:49 2016 +0200
+++ b/Makefile Thu Nov 10 16:27:42 2016 +0200
@@ -670,6 +670,12 @@
todo:
grep -nH 'TODO\|XXX' $(RST_FILES) $(C5_FILES)
+# Install:
+# $ sudo apt-get install python3-enchant hunspell-en-us hunspell-en-gb hunspell-en-au hunspell-en-ca
+.PHONY: check
+check:
+ python3 -B py/gadict_spellcheck.py gadict_en-ru+uk.gadict
+
################################################################
# Clean targets.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_spellcheck.py Thu Nov 10 16:27:42 2016 +0200
@@ -0,0 +1,91 @@
+
+import sys
+import io
+import regex
+import enchant
+
+################################################################
+
+class EofReached (Exception):
+ pass
+
+class EmptyChecker:
+ def __init__(self):
+ pass
+ def check(self, word):
+ pass
+
+class GadictSpellChecker:
+
+ SEPARATOR_RE = regex.compile(u"^__$")
+ EMPTY_RE = regex.compile( u"^$" )
+ HEADWORD_ATTR_RE = regex.compile( u"^ " )
+
+ def _get_checker(self, lang):
+ try:
+ dic = enchant.Dict(lang)
+ except enchant.errors.DictNotFoundError:
+ print("Dictionary '{:s}' is not found...".format(lang))
+ dic = EmptyChecker()
+ return dic
+
+ def __init__(self, stream, fname):
+ self.stream = stream
+ self.fname = fname
+ self.lineno = 0
+ self.dict_us = self._get_checker('en_US')
+ self.dict_gb = self._get_checker('en_GB')
+ self.dict_au = self._get_checker('en_AU')
+ self.dict_ca = self._get_checker('en_CA')
+
+ def _readline(self):
+ line = self.stream.readline()
+ if len(line) == 0:
+ raise EofReached
+ self.lineno += 1
+ return line
+
+ def _check_body(self):
+ while True:
+ line = self._readline()
+ m = self.SEPARATOR_RE.match(line)
+ if not m:
+ continue
+
+ line = self._readline()
+ m = self.EMPTY_RE.match(line)
+ if not m:
+ raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))
+
+ while True:
+ line = self._readline()
+ m = self.EMPTY_RE.match(line)
+ if m:
+ break
+ m = self.HEADWORD_ATTR_RE.match(line)
+ if m:
+ continue
+ line = line.strip()
+ for word in regex.split("[ ,]+", line):
+ if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word):
+ continue
+ print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word))
+
+ def check(self):
+ try:
+ self._check_body()
+ except EofReached:
+ pass
+
+################################################################
+
+if len(sys.argv) < 2:
+ raise Exception("Please, supply path to dictionary...")
+if len(sys.argv) > 2:
+ raise Exception("Pnly one argument necessary...")
+
+FINAME = sys.argv[1]
+with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN:
+ checker = GadictSpellChecker(FIN, FINAME)
+ checker.check()
+