py/gadict.py
changeset 618 6ad7203ac9dc
parent 594 910efcf51ac0
child 629 6a862ea41c00
equal deleted inserted replaced
617:ec1c2838feae 618:6ad7203ac9dc
    43     def __repr__(self):
    43     def __repr__(self):
    44         return "<Headword {}>".format(self.headword)
    44         return "<Headword {}>".format(self.headword)
    45 
    45 
    46 class Sense:
    46 class Sense:
    47 
    47 
    48     def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None):
    48     def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None):
    49         if not pos:
    49         if not pos:
    50             raise ParseException("Part of speech expected...\n")
    50             raise ParseException("Part of speech expected...\n")
    51         self.pos = pos
    51         self.pos = pos
    52         self.tr_list = tr_list
    52         self.tr_list = tr_list
    53         self.ex_list = ex_list
    53         self.ex_list = ex_list
    54         self.glos_list = glos_list
    54         self.glos_list = glos_list
    55         self.ant_list = ant_list
    55         self.ant_list = ant_list
    56         self.syn_list = syn_list
    56         self.syn_list = syn_list
    57         self.rel_list = rel_list
    57         self.rel_list = rel_list
    58         self.topic_list = topic_list
    58         self.topic_list = topic_list
       
    59         self.hyper_list = hyper_list
       
    60         self.hypo_list = hypo_list
    59 
    61 
    60     def add_tr(self, tr):
    62     def add_tr(self, tr):
    61         if self.tr_list:
    63         if self.tr_list:
    62             self.tr_list.append(tr)
    64             self.tr_list.append(tr)
    63         else:
    65         else:
    96     def add_topic(self, topic):
    98     def add_topic(self, topic):
    97         if self.topic_list:
    99         if self.topic_list:
    98             self.topic_list.append(topic)
   100             self.topic_list.append(topic)
    99         else:
   101         else:
   100             self.topic_list = [topic]
   102             self.topic_list = [topic]
       
   103 
       
   104     def add_hyper(self, hyper):
       
   105         if self.hyper_list:
       
   106             self.hyper_list.append(hyper)
       
   107         else:
       
   108             self.hyper_list = [hyper]
       
   109 
       
   110     def add_hypo(self, hypo):
       
   111         if self.hypo_list:
       
   112             self.hypo_list.append(hypo)
       
   113         else:
       
   114             self.hypo_list = [hypo]
   101 
   115 
   102     def __str__(self):
   116     def __str__(self):
   103         if tr_list:
   117         if tr_list:
   104             (lang, text) = self.tr_list[0]
   118             (lang, text) = self.tr_list[0]
   105             return "{}: {}".format(lang, text)
   119             return "{}: {}".format(lang, text)
   116     HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
   130     HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
   117     HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
   131     HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
   118     HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
   132     HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
   119     TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
   133     TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
   120     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
   134     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
   121     TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
   135     TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
   122     TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$")
   136     TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$")
   123     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
   137     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
   124     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
   138     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
   125     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
   139     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
   126     REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
   140     REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
       
   141     HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
       
   142     HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
   127 
   143 
   128     CONT_RE = regex.compile(u"^ +(.*)")
   144     CONT_RE = regex.compile(u"^ +(.*)")
   129 
   145 
   130     TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
   146     TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
   131 
   147 
   338                     rel = rel.strip()
   354                     rel = rel.strip()
   339                     if len(rel) == 0:
   355                     if len(rel) == 0:
   340                         raise ParseException("""Empty relation...""")
   356                         raise ParseException("""Empty relation...""")
   341                     sense.add_rel(rel)
   357                     sense.add_rel(rel)
   342                 continue
   358                 continue
       
   359             m = self.HYPER_RE.match(self.line)
       
   360             if m is not None:
       
   361                 hypers = m.group(1).split(";")
       
   362                 for hyper in hypers:
       
   363                     hyper = hyper.strip()
       
   364                     if len(hyper) == 0:
       
   365                         raise ParseException("""Empty hypernym...""")
       
   366                     sense.add_hyper(hyper)
       
   367                 continue
       
   368             m = self.HYPO_RE.match(self.line)
       
   369             if m is not None:
       
   370                 hypos = m.group(1).split(";")
       
   371                 for hypo in hypos:
       
   372                     hypo = hypo.strip()
       
   373                     if len(hypo) == 0:
       
   374                         raise ParseException("""Empty hyponym...""")
       
   375                     sense.add_hypo(hypo)
       
   376                 continue
   343             m = self.TRANSL_RE.match(self.line)
   377             m = self.TRANSL_RE.match(self.line)
   344             if m is not None:
   378             if m is not None:
   345                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
   379                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
   346                 read = False
   380                 read = False
   347                 continue
   381                 continue