# HG changeset patch # User Oleksandr Gavenko # Date 1478788062 -7200 # Node ID 5305f170237d92b51ff169857c03375444399a14 # Parent 3d4ea0a5928f33ef436ce838c2114928dea6d1df Add spell checker based on hunspell. diff -r 3d4ea0a5928f -r 5305f170237d Makefile --- a/Makefile Thu Nov 10 14:51:49 2016 +0200 +++ b/Makefile Thu Nov 10 16:27:42 2016 +0200 @@ -670,6 +670,12 @@ todo: grep -nH 'TODO\|XXX' $(RST_FILES) $(C5_FILES) +# Install: +# $ sudo apt-get install python3-enchant hunspell-en-us hunspell-en-gb hunspell-en-au hunspell-en-ca +.PHONY: check +check: + python3 -B py/gadict_spellcheck.py gadict_en-ru+uk.gadict + ################################################################ # Clean targets. diff -r 3d4ea0a5928f -r 5305f170237d py/gadict_spellcheck.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/py/gadict_spellcheck.py Thu Nov 10 16:27:42 2016 +0200 @@ -0,0 +1,91 @@ + +import sys +import io +import regex +import enchant + +################################################################ + +class EofReached (Exception): + pass + +class EmptyChecker: + def __init__(self): + pass + def check(self, word): + pass + +class GadictSpellChecker: + + SEPARATOR_RE = regex.compile(u"^__$") + EMPTY_RE = regex.compile( u"^$" ) + HEADWORD_ATTR_RE = regex.compile( u"^ " ) + + def _get_checker(self, lang): + try: + dic = enchant.Dict(lang) + except enchant.errors.DictNotFoundError: + print("Dictionary '{:s}' is not found...".format(lang)) + dic = EmptyChecker() + return dic + + def __init__(self, stream, fname): + self.stream = stream + self.fname = fname + self.lineno = 0 + self.dict_us = self._get_checker('en_US') + self.dict_gb = self._get_checker('en_GB') + self.dict_au = self._get_checker('en_AU') + self.dict_ca = self._get_checker('en_CA') + + def _readline(self): + line = self.stream.readline() + if len(line) == 0: + raise EofReached + self.lineno += 1 + return line + + def _check_body(self): + while True: + line = self._readline() + m = self.SEPARATOR_RE.match(line) + if not m: + continue + + line = self._readline() + m = self.EMPTY_RE.match(line) + if not m: + raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line)) + + while True: + line = self._readline() + m = self.EMPTY_RE.match(line) + if m: + break + m = self.HEADWORD_ATTR_RE.match(line) + if m: + continue + line = line.strip() + for word in regex.split("[ ,]+", line): + if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word): + continue + print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word)) + + def check(self): + try: + self._check_body() + except EofReached: + pass + +################################################################ + +if len(sys.argv) < 2: + raise Exception("Please, supply path to dictionary...") +if len(sys.argv) > 2: + raise Exception("Pnly one argument necessary...") + +FINAME = sys.argv[1] +with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN: + checker = GadictSpellChecker(FIN, FINAME) + checker.check() +