--- a/py/gadict.py Sun Oct 08 19:20:44 2017 +0300
+++ b/py/gadict.py Fri Oct 13 01:44:07 2017 +0300
@@ -34,10 +34,11 @@
class Headword:
- def __init__(self, headword, pron = None, attrs = None):
+ def __init__(self, headword, pron = None, attrs = None, homo = None):
self.headword = headword
self.pron = pron
self.attrs = attrs
+ self.homo = homo
def __str__(self):
return self.headword
@@ -131,6 +132,7 @@
HEADWORD_RE = re.compile( u"^(\\w.*)$" )
HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɐɹʃʧθðɡʒŋɾʔ ]+)\\]$", re.UNICODE)
+ HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w+)$", re.UNICODE)
TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
@@ -255,6 +257,7 @@
word = m.group(1)
pron = None
attrs = set()
+ homo = None
while True:
self.readline()
if self.eof or len(self.line) == 0:
@@ -263,10 +266,11 @@
if m is not None:
if word is None:
raise ParseException("""Didn't match previous headword...""")
- self.words.append(Headword(word, pron, attrs))
+ self.words.append(Headword(word, pron, attrs, homo = homo))
word = m.group(1)
pron = None
attrs = set()
+ homo = None
continue
m = self.HEADWORD_PRON_RE.match(self.line)
if m is not None:
@@ -278,8 +282,14 @@
if m is not None:
attrs.add(m.group(1))
continue
+ m = self.HEADWORD_HOMO_RE.match(self.line)
+ if m is not None:
+ if homo is not None:
+ raise ParseException("""Homophones are redefined...""")
+ homo = [s.strip() for s in m.group(1).split(";")]
+ continue
raise ParseException("""Line is not a headword or translation or headword attribute...""")
- self.words.append(Headword(word, pron, attrs))
+ self.words.append(Headword(word, pron, attrs, homo))
def parse_translation_continuation(self):
string = ""