py/gadict.py
changeset 565 ac68f2680ea0
parent 554 59714b9033bc
child 566 0bba61492c37
equal deleted inserted replaced
564:93d0bdb815a1 565:ac68f2680ea0
    43     def __repr__(self):
    43     def __repr__(self):
    44         return "<Headword {}>".format(self.headword)
    44         return "<Headword {}>".format(self.headword)
    45 
    45 
    46 class Sense:
    46 class Sense:
    47 
    47 
    48     def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
    48     def __init__(self, pos, tr_list = None, ex_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None):
    49         if not pos:
    49         if not pos:
    50             raise ParseException("Part of speech expected...\n")
    50             raise ParseException("Part of speech expected...\n")
    51         self.pos = pos
    51         self.pos = pos
    52         self.tr_list = tr_list
    52         self.tr_list = tr_list
    53         if not tr_list:
    53         if not tr_list:
    54             self.tr_list = []
    54             self.tr_list = []
    55         self.ex_list = ex_list
    55         self.ex_list = ex_list
       
    56         self.ant_list = ant_list
    56         self.syn_list = syn_list
    57         self.syn_list = syn_list
    57         self.ant_list = ant_list
    58         self.rel_list = rel_list
    58         self.topic_list = topic_list
    59         self.topic_list = topic_list
    59 
    60 
    60     def add_tr(self, tr):
    61     def add_tr(self, tr):
    61         self.tr_list.append(tr)
    62         self.tr_list.append(tr)
    62 
    63 
    63     def add_ex(self, ex):
    64     def add_ex(self, ex):
    64         if not self.ex_list:
    65         if self.ex_list:
       
    66             self.ex_list.append(ex)
       
    67         else:
    65             self.ex_list = [ex]
    68             self.ex_list = [ex]
    66         else:
    69 
    67             self.ex_list.append(ex)
    70     def add_ant(self, ant):
       
    71         if self.ant_list:
       
    72             self.ant_list.append(ant)
       
    73         else:
       
    74             self.ant_list = [ant]
    68 
    75 
    69     def add_syn(self, syn):
    76     def add_syn(self, syn):
    70         if not self.syn_list:
    77         if self.syn_list:
       
    78             self.syn_list.append(syn)
       
    79         else:
    71             self.syn_list = [syn]
    80             self.syn_list = [syn]
    72         else:
    81 
    73             self.syn_list.append(syn)
    82     def add_rel(self, rel):
    74 
    83         if self.rel_list:
    75     def add_ant(self, ant):
    84             self.rel_list.append(rel)
    76         if not self.ant_list:
    85         else:
    77             self.ant_list = [ant]
    86             self.rel_list = [rel]
    78         else:
       
    79             self.ant_list.append(ant)
       
    80 
    87 
    81     def add_topic(self, topic):
    88     def add_topic(self, topic):
    82         if not self.topic_list:
    89         if self.topic_list:
       
    90             self.topic_list.append(topic)
       
    91         else:
    83             self.topic_list = [topic]
    92             self.topic_list = [topic]
    84         else:
       
    85             self.topic_list.append(topic)
       
    86 
    93 
    87     def __str__(self):
    94     def __str__(self):
    88         if tr_list:
    95         if tr_list:
    89             (lang, text) = self.tr_list[0]
    96             (lang, text) = self.tr_list[0]
    90             return "{}: {}".format(lang, text)
    97             return "{}: {}".format(lang, text)
   105     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
   112     TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
   106     TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
   113     TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
   107     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
   114     TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
   108     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
   115     SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
   109     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
   116     ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
       
   117     REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
   110 
   118 
   111     CONT_RE = regex.compile(u"^ +(.*)")
   119     CONT_RE = regex.compile(u"^ +(.*)")
   112 
   120 
   113     TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
   121     TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
   114 
   122 
   312                     ant = ant.strip()
   320                     ant = ant.strip()
   313                     if len(ant) == 0:
   321                     if len(ant) == 0:
   314                         raise ParseException("""Empty antonym...""")
   322                         raise ParseException("""Empty antonym...""")
   315                     sense.add_ant(ant)
   323                     sense.add_ant(ant)
   316                 continue
   324                 continue
       
   325             m = self.REL_RE.match(self.line)
       
   326             if m is not None:
       
   327                 rels = m.group(1).split(";")
       
   328                 for rel in rels:
       
   329                     rel = rel.strip()
       
   330                     if len(rel) == 0:
       
   331                         raise ParseException("""Empty relation...""")
       
   332                     sense.add_rel(rel)
       
   333                 continue
   317             m = self.TRANSL_RE.match(self.line)
   334             m = self.TRANSL_RE.match(self.line)
   318             if m is not None:
   335             if m is not None:
   319                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
   336                 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
   320                 read = False
   337                 read = False
   321                 continue
   338                 continue