Made parsing rule more strict for homophones.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Sat, 14 Oct 2017 00:59:59 +0300
changeset 937 981839c72b64
parent 936 8d6eda4aa795
child 938 d0bb9e2fd893
Made parsing rule more strict for homophones.
py/gadict.py
--- a/py/gadict.py	Sat Oct 14 00:59:18 2017 +0300
+++ b/py/gadict.py	Sat Oct 14 00:59:59 2017 +0300
@@ -132,7 +132,7 @@
     HEADWORD_RE = re.compile( u"^(\\w.*)$" )
     HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
     HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɐɹʃʧθðɡʒŋɾʔ ]+)\\]$", re.UNICODE)
-    HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w+)$", re.UNICODE)
+    HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w|\\w[-'\\w ;]*\\w)$", re.UNICODE)
     TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
     TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
     TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)