author | Oleksandr Gavenko <gavenkoa@gmail.com> |
Thu, 10 Nov 2016 16:27:42 +0200 | |
changeset 660 | 5305f170237d |
child 662 | a0ef60715efe |
permissions | -rw-r--r-- |
660
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
1 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
2 |
import sys |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
3 |
import io |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
4 |
import regex |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
5 |
import enchant |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
6 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
7 |
################################################################ |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
8 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
9 |
class EofReached (Exception): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
10 |
pass |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
11 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
12 |
class EmptyChecker: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
13 |
def __init__(self): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
14 |
pass |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
15 |
def check(self, word): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
16 |
pass |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
17 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
18 |
class GadictSpellChecker: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
19 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
20 |
SEPARATOR_RE = regex.compile(u"^__$") |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
21 |
EMPTY_RE = regex.compile( u"^$" ) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
22 |
HEADWORD_ATTR_RE = regex.compile( u"^ " ) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
23 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
24 |
def _get_checker(self, lang): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
25 |
try: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
26 |
dic = enchant.Dict(lang) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
27 |
except enchant.errors.DictNotFoundError: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
28 |
print("Dictionary '{:s}' is not found...".format(lang)) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
29 |
dic = EmptyChecker() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
30 |
return dic |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
31 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
32 |
def __init__(self, stream, fname): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
33 |
self.stream = stream |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
34 |
self.fname = fname |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
35 |
self.lineno = 0 |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
36 |
self.dict_us = self._get_checker('en_US') |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
37 |
self.dict_gb = self._get_checker('en_GB') |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
38 |
self.dict_au = self._get_checker('en_AU') |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
39 |
self.dict_ca = self._get_checker('en_CA') |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
40 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
41 |
def _readline(self): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
42 |
line = self.stream.readline() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
43 |
if len(line) == 0: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
44 |
raise EofReached |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
45 |
self.lineno += 1 |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
46 |
return line |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
47 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
48 |
def _check_body(self): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
49 |
while True: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
50 |
line = self._readline() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
51 |
m = self.SEPARATOR_RE.match(line) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
52 |
if not m: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
53 |
continue |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
54 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
55 |
line = self._readline() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
56 |
m = self.EMPTY_RE.match(line) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
57 |
if not m: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
58 |
raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line)) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
59 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
60 |
while True: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
61 |
line = self._readline() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
62 |
m = self.EMPTY_RE.match(line) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
63 |
if m: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
64 |
break |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
65 |
m = self.HEADWORD_ATTR_RE.match(line) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
66 |
if m: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
67 |
continue |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
68 |
line = line.strip() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
69 |
for word in regex.split("[ ,]+", line): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
70 |
if self.dict_us.check(word) or self.dict_gb.check(word) or self.dict_au.check(word) or self.dict_ca.check(word): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
71 |
continue |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
72 |
print("""{:s}:{:d}: "{:s}" is misspelled""".format(self.fname, self.lineno, word)) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
73 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
74 |
def check(self): |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
75 |
try: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
76 |
self._check_body() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
77 |
except EofReached: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
78 |
pass |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
79 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
80 |
################################################################ |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
81 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
82 |
if len(sys.argv) < 2: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
83 |
raise Exception("Please, supply path to dictionary...") |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
84 |
if len(sys.argv) > 2: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
85 |
raise Exception("Pnly one argument necessary...") |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
86 |
|
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
87 |
FINAME = sys.argv[1] |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
88 |
with io.open(FINAME, mode='r', buffering=1, encoding="utf-8") as FIN: |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
89 |
checker = GadictSpellChecker(FIN, FINAME) |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
90 |
checker.check() |
5305f170237d
Add spell checker based on hunspell.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
91 |