py/gadict_spellcheck.py
changeset 660 5305f170237d
child 662 a0ef60715efe
equal deleted inserted replaced
659:3d4ea0a5928f 660:5305f170237d
       
     1 
       
     2 import sys
       
     3 import io
       
     4 import regex
       
     5 import enchant
       
     6 
       
     7 ################################################################
       
     8 
       
     9 class EofReached (Exception):
       
    10     pass
       
    11 
       
    12 class EmptyChecker:
       
    13     def __init__(self):
       
    14         pass
       
    15     def check(self, word):
       
    16         pass
       
    17 
       
    18 class GadictSpellChecker:
       
    19 
       
    20     SEPARATOR_RE = regex.compile(u"^__$")
       
    21     EMPTY_RE = regex.compile( u"^$" )
       
    22     HEADWORD_ATTR_RE = regex.compile( u"^ " )
       
    23 
       
    24     def _get_checker(self, lang):
       
    25         try:
       
    26             dic = enchant.Dict(lang)
       
    27         except enchant.errors.DictNotFoundError:
       
    28             print("Dictionary '{:s}' is not found...".format(lang))
       
    29             dic = EmptyChecker()
       
    30         return dic
       
    31 
       
    32     def __init__(self, stream, fname):
       
    33         self.stream = stream
       
    34         self.fname = fname
       
    35         self.lineno = 0
       
    36         self.dict_us = self._get_checker('en_US')
       
    37         self.dict_gb = self._get_checker('en_GB')
       
    38         self.dict_au = self._get_checker('en_AU')
       
    39         self.dict_ca = self._get_checker('en_CA')
       
    40 
       
    41     def _readline(self):
       
    42         line = self.stream.readline()
       
    43         if len(line) == 0:
       
    44             raise EofReached
       
    45         self.lineno += 1
       
    46         return line
       
    47 
       
    48     def _check_body(self):
       
    49         while True:
       
    50             line = self._readline()
       
    51             m = self.SEPARATOR_RE.match(line)
       
    52             if not m:
       
    53                 continue
       
    54 
       
    55             line = self._readline()
       
    56             m = self.EMPTY_RE.match(line)
       
    57             if not m:
       
    58                 raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))
       
    59 
       
    60             while True:
       
    61                 line = self._readline()
       
    62                 m = self.EMPTY_RE.match(line)
       
    63                 if m:
       
    64                     break
       
    65                 m = self.HEADWORD_ATTR_RE.match(line)
       
    66                 if m:
       
    67                     continue
       
    68                 line = line.strip()
       
    69                 for word in regex.split("[ ,]+", line):
       
    70                     if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word):
       
    71                         continue
       
    72                     print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word))
       
    73 
       
    74     def check(self):
       
    75         try:
       
    76             self._check_body()
       
    77         except EofReached:
       
    78             pass
       
    79 
       
    80 ################################################################
       
    81 
       
    82 if len(sys.argv) < 2:
       
    83     raise Exception("Please, supply path to dictionary...")
       
    84 if len(sys.argv) > 2:
       
    85     raise Exception("Pnly one argument necessary...")
       
    86 
       
    87 FINAME = sys.argv[1]
       
    88 with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN:
       
    89     checker = GadictSpellChecker(FIN, FINAME)
       
    90     checker.check()
       
    91