py/gadict.py
changeset 1006 b1f11eff7c70
parent 984 73d6e2631338
child 1011 fdf5640f221a
equal deleted inserted replaced
1005:802f3f9c7ea6 1006:b1f11eff7c70
    45     def __repr__(self):
    45     def __repr__(self):
    46         return "<Headword {}>".format(self.headword)
    46         return "<Headword {}>".format(self.headword)
    47 
    47 
    48 class Sense:
    48 class Sense:
    49 
    49 
    50     def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None):
    50     def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None, countable = None):
    51         if not pos:
    51         if not pos:
    52             raise ParseException("Part of speech expected...\n")
    52             raise ParseException("Part of speech expected...\n")
    53         self.pos = pos
    53         self.pos = pos
    54         self.tr_list = tr_list
    54         self.tr_list = tr_list
    55         self.ex_list = ex_list
    55         self.ex_list = ex_list
    59         self.rel_list = rel_list
    59         self.rel_list = rel_list
    60         self.topic_list = topic_list
    60         self.topic_list = topic_list
    61         self.hyper_list = hyper_list
    61         self.hyper_list = hyper_list
    62         self.hypo_list = hypo_list
    62         self.hypo_list = hypo_list
    63         self.col_list = col_list
    63         self.col_list = col_list
       
    64         self.countable = countable
    64 
    65 
    65     def add_tr(self, tr):
    66     def add_tr(self, tr):
    66         if self.tr_list:
    67         if self.tr_list:
    67             self.tr_list.append(tr)
    68             self.tr_list.append(tr)
    68         else:
    69         else:
   119     def add_col(self, col):
   120     def add_col(self, col):
   120         if self.col_list:
   121         if self.col_list:
   121             self.col_list.append(col)
   122             self.col_list.append(col)
   122         else:
   123         else:
   123             self.col_list = [col]
   124             self.col_list = [col]
       
   125 
       
   126     def set_countable(self, countable):
       
   127         if isinstance(countable, str):
       
   128             if countable == 'yes':
       
   129                 self.countable = True
       
   130             elif countable == 'no':
       
   131                 self.countable = False
       
   132             else:
       
   133                 raise ParseException("Countable can only be yes/no.")
       
   134         elif isinstance(countable, bool):
       
   135             self.countable = countable
       
   136         else:
       
   137             raise ParseException("Countable can only be yes/no or bool.")
   124 
   138 
   125     def __str__(self):
   139     def __str__(self):
   126         if tr_list:
   140         if tr_list:
   127             (lang, text) = self.tr_list[0]
   141             (lang, text) = self.tr_list[0]
   128             return "{}: {}".format(lang, text)
   142             return "{}: {}".format(lang, text)
   142     HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w|\\w[-'\\w ;]*\\w)$", re.UNICODE)
   156     HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w|\\w[-'\\w ;]*\\w)$", re.UNICODE)
   143     TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
   157     TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
   144     TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
   158     TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
   145     TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
   159     TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
   146     TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE)
   160     TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE)
       
   161     CNT_RE = re.compile(u"^cnt: (yes|no)$", re.UNICODE)
   147     TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE)
   162     TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE)
   148     SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE)
   163     SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE)
   149     ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE)
   164     ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE)
   150     REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE)
   165     REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE)
   151     HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE)
   166     HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE)
   340                 pos = m.group(0)
   355                 pos = m.group(0)
   341                 sense = Sense(pos)
   356                 sense = Sense(pos)
   342                 continue
   357                 continue
   343             if not sense:
   358             if not sense:
   344                 raise ParseException("""Missing part of speech marker...""")
   359                 raise ParseException("""Missing part of speech marker...""")
       
   360             m = self.CNT_RE.match(self.line)
       
   361             if m is not None:
       
   362                 sense.set_countable(m.group(1))
       
   363                 continue
   345             m = self.TOPIC_RE.match(self.line)
   364             m = self.TOPIC_RE.match(self.line)
   346             if m is not None:
   365             if m is not None:
   347                 topics = m.group(1).split(";")
   366                 topics = m.group(1).split(";")
   348                 for topic in topics:
   367                 for topic in topics:
   349                     topic = topic.strip()
   368                     topic = topic.strip()