gadict: comparison py/gadict.py

equal deleted inserted replaced

-:2989d9b90b14
+:9a5f97027ee7
 else:
 return u":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line)
 class Headword:
-def __init__(self, headword, pron = None, attrs = None):
+def __init__(self, headword, pron = None, attrs = None, homo = None):
 self.headword = headword
 self.pron = pron
 self.attrs = attrs
+self.homo = homo
 def __str__(self):
 return self.headword
 def __repr__(self):
 return "<Headword {}>".format(self.headword)
 SEPARATOR_RE = re.compile(u"^__$", re.UNICODE)
 HEADWORD_RE = re.compile( u"^(\\w.*)$" )
 HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
 HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɐɹʃʧθðɡʒŋɾʔ ]+)\\]$", re.UNICODE)
+HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w+)$", re.UNICODE)
 TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
 TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
 TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
 TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE)
 TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE)
 if m is None:
 raise ParseException("""There are no headword after "__" delimiter...""")
 word = m.group(1)
 pron = None
 attrs = set()
+homo = None
 while True:
 self.readline()
 if self.eof or len(self.line) == 0:
 break
 m = self.HEADWORD_RE.match(self.line)
 if m is not None:
 if word is None:
 raise ParseException("""Didn't match previous headword...""")
-self.words.append(Headword(word, pron, attrs))
+self.words.append(Headword(word, pron, attrs, homo = homo))
 word = m.group(1)
 pron = None
 attrs = set()
+homo = None
 continue
 m = self.HEADWORD_PRON_RE.match(self.line)
 if m is not None:
 if pron is not None:
 raise ParseException("""Pronunciation is redefined...""")
 continue
 m = self.HEADWORD_VAR_RE.match(self.line)
 if m is not None:
 attrs.add(m.group(1))
 continue
+m = self.HEADWORD_HOMO_RE.match(self.line)
+if m is not None:
+if homo is not None:
+raise ParseException("""Homophones are redefined...""")
+homo = [s.strip() for s in m.group(1).split(";")]
+continue
 raise ParseException("""Line is not a headword or translation or headword attribute...""")
-self.words.append(Headword(word, pron, attrs))
+self.words.append(Headword(word, pron, attrs, homo))
 def parse_translation_continuation(self):
 string = ""
 while True:
 self.readline()

changeset 931	9a5f97027ee7
parent 892	0c298fe6739e
child 937	981839c72b64