Add spell checker based on hunspell.
import sys
import io
import regex
import enchant
################################################################
class EofReached (Exception):
pass
class EmptyChecker:
def __init__(self):
pass
def check(self, word):
pass
class GadictSpellChecker:
SEPARATOR_RE = regex.compile(u"^__$")
EMPTY_RE = regex.compile( u"^$" )
HEADWORD_ATTR_RE = regex.compile( u"^ " )
def _get_checker(self, lang):
try:
dic = enchant.Dict(lang)
except enchant.errors.DictNotFoundError:
print("Dictionary '{:s}' is not found...".format(lang))
dic = EmptyChecker()
return dic
def __init__(self, stream, fname):
self.stream = stream
self.fname = fname
self.lineno = 0
self.dict_us = self._get_checker('en_US')
self.dict_gb = self._get_checker('en_GB')
self.dict_au = self._get_checker('en_AU')
self.dict_ca = self._get_checker('en_CA')
def _readline(self):
line = self.stream.readline()
if len(line) == 0:
raise EofReached
self.lineno += 1
return line
def _check_body(self):
while True:
line = self._readline()
m = self.SEPARATOR_RE.match(line)
if not m:
continue
line = self._readline()
m = self.EMPTY_RE.match(line)
if not m:
raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))
while True:
line = self._readline()
m = self.EMPTY_RE.match(line)
if m:
break
m = self.HEADWORD_ATTR_RE.match(line)
if m:
continue
line = line.strip()
for word in regex.split("[ ,]+", line):
if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word):
continue
print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word))
def check(self):
try:
self._check_body()
except EofReached:
pass
################################################################
if len(sys.argv) < 2:
raise Exception("Please, supply path to dictionary...")
if len(sys.argv) > 2:
raise Exception("Pnly one argument necessary...")
FINAME = sys.argv[1]
with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN:
checker = GadictSpellChecker(FIN, FINAME)
checker.check()