Add spell checker based on hunspell.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Thu, 10 Nov 2016 16:27:42 +0200
changeset 660 5305f170237d
parent 659 3d4ea0a5928f
child 661 bd74a7baf7dd
Add spell checker based on hunspell.
Makefile
py/gadict_spellcheck.py
--- a/Makefile	Thu Nov 10 14:51:49 2016 +0200
+++ b/Makefile	Thu Nov 10 16:27:42 2016 +0200
@@ -670,6 +670,12 @@
 todo:
 	grep -nH 'TODO\|XXX' $(RST_FILES) $(C5_FILES)
 
+# Install:
+#   $ sudo apt-get install python3-enchant hunspell-en-us hunspell-en-gb hunspell-en-au hunspell-en-ca
+.PHONY: check
+check:
+	python3 -B py/gadict_spellcheck.py gadict_en-ru+uk.gadict
+
 ################################################################
 # Clean targets.
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict_spellcheck.py	Thu Nov 10 16:27:42 2016 +0200
@@ -0,0 +1,91 @@
+
+import sys
+import io
+import regex
+import enchant
+
+################################################################
+
+class EofReached (Exception):
+    pass
+
+class EmptyChecker:
+    def __init__(self):
+        pass
+    def check(self, word):
+        pass
+
+class GadictSpellChecker:
+
+    SEPARATOR_RE = regex.compile(u"^__$")
+    EMPTY_RE = regex.compile( u"^$" )
+    HEADWORD_ATTR_RE = regex.compile( u"^ " )
+
+    def _get_checker(self, lang):
+        try:
+            dic = enchant.Dict(lang)
+        except enchant.errors.DictNotFoundError:
+            print("Dictionary '{:s}' is not found...".format(lang))
+            dic = EmptyChecker()
+        return dic
+
+    def __init__(self, stream, fname):
+        self.stream = stream
+        self.fname = fname
+        self.lineno = 0
+        self.dict_us = self._get_checker('en_US')
+        self.dict_gb = self._get_checker('en_GB')
+        self.dict_au = self._get_checker('en_AU')
+        self.dict_ca = self._get_checker('en_CA')
+
+    def _readline(self):
+        line = self.stream.readline()
+        if len(line) == 0:
+            raise EofReached
+        self.lineno += 1
+        return line
+
+    def _check_body(self):
+        while True:
+            line = self._readline()
+            m = self.SEPARATOR_RE.match(line)
+            if not m:
+                continue
+
+            line = self._readline()
+            m = self.EMPTY_RE.match(line)
+            if not m:
+                raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))
+
+            while True:
+                line = self._readline()
+                m = self.EMPTY_RE.match(line)
+                if m:
+                    break
+                m = self.HEADWORD_ATTR_RE.match(line)
+                if m:
+                    continue
+                line = line.strip()
+                for word in regex.split("[ ,]+", line):
+                    if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word):
+                        continue
+                    print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word))
+
+    def check(self):
+        try:
+            self._check_body()
+        except EofReached:
+            pass
+
+################################################################
+
+if len(sys.argv) < 2:
+    raise Exception("Please, supply path to dictionary...")
+if len(sys.argv) > 2:
+    raise Exception("Pnly one argument necessary...")
+
+FINAME = sys.argv[1]
+with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN:
+    checker = GadictSpellChecker(FIN, FINAME)
+    checker.check()
+