py/gadict_spellcheck.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sun, 20 Aug 2017 00:17:33 +0300
changeset 911 cb947157f8e2
parent 694 4457721a1a13
permissions -rw-r--r--
Added new and improved existing articles.


import sys
import io
import regex
import enchant

################################################################

class EofReached (Exception):
    pass

class GadictSpellChecker:

    SEPARATOR_RE = regex.compile(u"^__$")
    EMPTY_RE = regex.compile( u"^$" )
    HEADWORD_ATTR_RE = regex.compile( u"^ " )

    def _add_checker(self, lang):
        try:
            self.dicts.append(enchant.Dict(lang))
        except enchant.errors.DictNotFoundError:
            print("Dictionary '{:s}' is not found...".format(lang))

    def __init__(self, stream, fname):
        self.stream = stream
        self.fname = fname
        self.lineno = 0
        self.dicts = []
        self._add_checker('en_US')
        self._add_checker('en_GB')
        # self._add_checker('en_AU')
        # self._add_checker('en_CA')

    def _readline(self):
        line = self.stream.readline()
        if len(line) == 0:
            raise EofReached
        self.lineno += 1
        return line

    def _check(self):
        while True:
            line = self._readline()
            m = self.SEPARATOR_RE.match(line)
            if not m:
                continue

            line = self._readline()
            m = self.EMPTY_RE.match(line)
            if not m:
                raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))

            while True:
                line = self._readline()
                m = self.EMPTY_RE.match(line)
                if m:
                    break
                m = self.HEADWORD_ATTR_RE.match(line)
                if m:
                    continue
                line = line.strip()
                for word in regex.split("[ ,]+", line):
                    if any([dic.check(word) for dic in self.dicts]):
                        continue
                    print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word))

    def check(self):
        try:
            self._check()
        except EofReached:
            pass

################################################################

if len(sys.argv) < 2:
    raise Exception("Please, supply path to file...")
if len(sys.argv) > 2:
    raise Exception("Only one argument is necessary...")

FINAME = sys.argv[1]
with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN:
    checker = GadictSpellChecker(FIN, FINAME)
    checker.check()