py/gadict.py
changeset 554 59714b9033bc
parent 553 45a3138c9b4d
child 565 ac68f2680ea0
equal deleted inserted replaced
553:45a3138c9b4d 554:59714b9033bc
    28             return self.msg
    28             return self.msg
    29         elif self.line is None:
    29         elif self.line is None:
    30             return ":{:d}:{:s}".format(self.lineno, self.msg.encode('utf-8'))
    30             return ":{:d}:{:s}".format(self.lineno, self.msg.encode('utf-8'))
    31         else:
    31         else:
    32             return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8'))
    32             return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8'))
       
    33 
       
    34 class Headword:
       
    35 
       
    36     def __init__(self, headword, pron = None, attrs = None):
       
    37         self.headword = headword
       
    38         self.pron = pron
       
    39         self.attrs = attrs
       
    40 
       
    41     def __str__(self):
       
    42         return self.headword
       
    43     def __repr__(self):
       
    44         return "<Headword {}>".format(self.headword)
    33 
    45 
    34 class Sense:
    46 class Sense:
    35 
    47 
    36     def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
    48     def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
    37         if not pos:
    49         if not pos:
    69     def add_topic(self, topic):
    81     def add_topic(self, topic):
    70         if not self.topic_list:
    82         if not self.topic_list:
    71             self.topic_list = [topic]
    83             self.topic_list = [topic]
    72         else:
    84         else:
    73             self.topic_list.append(topic)
    85             self.topic_list.append(topic)
       
    86 
       
    87     def __str__(self):
       
    88         if tr_list:
       
    89             (lang, text) = self.tr_list[0]
       
    90             return "{}: {}".format(lang, text)
       
    91         return "<empy sence>"
       
    92     def __repr__(self):
       
    93         return "<Sence {}>".format(str(self))
    74 
    94 
    75 class Parser:
    95 class Parser:
    76     """gadict dictionary format parser."""
    96     """gadict dictionary format parser."""
    77 
    97 
    78     COMMENT_RE = regex.compile(r"^# ")
    98     COMMENT_RE = regex.compile(r"^# ")
   188         if self.eof or len(self.line) != 1:
   208         if self.eof or len(self.line) != 1:
   189             raise ParseException(""""__" delimiter should followed by empty line...""")
   209             raise ParseException(""""__" delimiter should followed by empty line...""")
   190 
   210 
   191     def parse_headlines(self):
   211     def parse_headlines(self):
   192         """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
   212         """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
   193         self.words = {}
   213         self.words = []
   194         self.readline()
   214         self.readline()
   195         if self.eof:
   215         if self.eof:
   196             raise ParseException("""There are no definition after "__" delimiter...""")
   216             raise ParseException("""There are no definition after "__" delimiter...""")
   197         m = self.HEADWORD_RE.match(self.line)
   217         m = self.HEADWORD_RE.match(self.line)
   198         if m is None:
   218         if m is None:
   206                 break
   226                 break
   207             m = self.HEADWORD_RE.match(self.line)
   227             m = self.HEADWORD_RE.match(self.line)
   208             if m is not None:
   228             if m is not None:
   209                 if word is None:
   229                 if word is None:
   210                     raise ParseException("""Didn't match previous headword...""")
   230                     raise ParseException("""Didn't match previous headword...""")
   211                 self.words[word] = (pron, attrs)
   231                 self.words.append(Headword(word, pron, attrs))
   212                 word = m.group(1)
   232                 word = m.group(1)
   213                 pron = None
   233                 pron = None
   214                 attrs = set()
   234                 attrs = set()
   215                 continue
   235                 continue
   216             m = self.HEADWORD_PRON_RE.match(self.line)
   236             m = self.HEADWORD_PRON_RE.match(self.line)
   222             m = self.HEADWORD_VAR_RE.match(self.line)
   242             m = self.HEADWORD_VAR_RE.match(self.line)
   223             if m is not None:
   243             if m is not None:
   224                 attrs.add(m.group(1))
   244                 attrs.add(m.group(1))
   225                 continue
   245                 continue
   226             raise ParseException("""Line is not a headword or translation or headword attribute...""")
   246             raise ParseException("""Line is not a headword or translation or headword attribute...""")
   227         self.words[word] = (pron, attrs)
   247         self.words.append(Headword(word, pron, attrs))
   228 
   248 
   229     def parse_translation_continuation(self):
   249     def parse_translation_continuation(self):
   230         string = ""
   250         string = ""
   231         while True:
   251         while True:
   232             self.readline()
   252             self.readline()