diff -r 2989d9b90b14 -r 9a5f97027ee7 py/gadict.py --- a/py/gadict.py Sun Oct 08 19:20:44 2017 +0300 +++ b/py/gadict.py Fri Oct 13 01:44:07 2017 +0300 @@ -34,10 +34,11 @@ class Headword: - def __init__(self, headword, pron = None, attrs = None): + def __init__(self, headword, pron = None, attrs = None, homo = None): self.headword = headword self.pron = pron self.attrs = attrs + self.homo = homo def __str__(self): return self.headword @@ -131,6 +132,7 @@ HEADWORD_RE = re.compile( u"^(\\w.*)$" ) HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE) HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɐɹʃʧθðɡʒŋɾʔ ]+)\\]$", re.UNICODE) + HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w+)$", re.UNICODE) TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE) TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE) TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE) @@ -255,6 +257,7 @@ word = m.group(1) pron = None attrs = set() + homo = None while True: self.readline() if self.eof or len(self.line) == 0: @@ -263,10 +266,11 @@ if m is not None: if word is None: raise ParseException("""Didn't match previous headword...""") - self.words.append(Headword(word, pron, attrs)) + self.words.append(Headword(word, pron, attrs, homo = homo)) word = m.group(1) pron = None attrs = set() + homo = None continue m = self.HEADWORD_PRON_RE.match(self.line) if m is not None: @@ -278,8 +282,14 @@ if m is not None: attrs.add(m.group(1)) continue + m = self.HEADWORD_HOMO_RE.match(self.line) + if m is not None: + if homo is not None: + raise ParseException("""Homophones are redefined...""") + homo = [s.strip() for s in m.group(1).split(";")] + continue raise ParseException("""Line is not a headword or translation or headword attribute...""") - self.words.append(Headword(word, pron, attrs)) + self.words.append(Headword(word, pron, attrs, homo)) def parse_translation_continuation(self): string = ""