diff -r ec1c2838feae -r 6ad7203ac9dc py/gadict.py --- a/py/gadict.py Sun Oct 23 19:57:02 2016 +0300 +++ b/py/gadict.py Sun Oct 23 20:01:22 2016 +0300 @@ -45,7 +45,7 @@ class Sense: - def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None): + def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None): if not pos: raise ParseException("Part of speech expected...\n") self.pos = pos @@ -56,6 +56,8 @@ self.syn_list = syn_list self.rel_list = rel_list self.topic_list = topic_list + self.hyper_list = hyper_list + self.hypo_list = hypo_list def add_tr(self, tr): if self.tr_list: @@ -99,6 +101,18 @@ else: self.topic_list = [topic] + def add_hyper(self, hyper): + if self.hyper_list: + self.hyper_list.append(hyper) + else: + self.hyper_list = [hyper] + + def add_hypo(self, hypo): + if self.hypo_list: + self.hypo_list.append(hypo) + else: + self.hypo_list = [hypo] + def __str__(self): if tr_list: (lang, text) = self.tr_list[0] @@ -118,12 +132,14 @@ HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$") TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") - TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$") + TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""") TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$") TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") REL_RE = regex.compile(u"^rel: (\\p{L}.*)$") + HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$") + HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$") CONT_RE = regex.compile(u"^ +(.*)") @@ -340,6 +356,24 @@ raise ParseException("""Empty relation...""") sense.add_rel(rel) continue + m = self.HYPER_RE.match(self.line) + if m is not None: + hypers = m.group(1).split(";") + for hyper in hypers: + hyper = hyper.strip() + if len(hyper) == 0: + raise ParseException("""Empty hypernym...""") + sense.add_hyper(hyper) + continue + m = self.HYPO_RE.match(self.line) + if m is not None: + hypos = m.group(1).split(";") + for hypo in hypos: + hypo = hypo.strip() + if len(hypo) == 0: + raise ParseException("""Empty hyponym...""") + sense.add_hypo(hypo) + continue m = self.TRANSL_RE.match(self.line) if m is not None: sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))