py/gadialog.py
changeset 1223 d592572cc546
equal deleted inserted replaced
1222:790a5708630d 1223:d592572cc546
       
     1 import re
       
     2 
       
     3 from gadict_util import ParseException
       
     4 
       
     5 class Parser:
       
     6     """
       
     7     Parser of gadialog files of form:
       
     8 
       
     9         # num1
       
    10         - sentence1
       
    11         - sentence2
       
    12         # num2
       
    13         - sentence1
       
    14         ...
       
    15 
       
    16     converting them to map:
       
    17 
       
    18         obj.dom[num1] = [sentence1, sentence2, ...]
       
    19     """
       
    20 
       
    21     COMMENT_RE = re.compile("^; ")
       
    22     NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
       
    23     PHRASE_START_RE = re.compile(u"^- (.*)")
       
    24 
       
    25     def __init__(self):
       
    26         pass
       
    27 
       
    28     def readline(self):
       
    29         while True:
       
    30             self.line = self.stream.readline()
       
    31             self.eof = len(self.line) == 0
       
    32             if self.eof:
       
    33                 break
       
    34             self.lineno += 1
       
    35             if self.COMMENT_RE.search(self.line):
       
    36                 continue
       
    37             self.line = self.line.strip(' \n\t')
       
    38             if len(self.line) > 0:
       
    39                 break
       
    40 
       
    41     def parse(self, stream):
       
    42         self.lineno = 0
       
    43         self.stream = stream
       
    44         self.dom = dict()
       
    45         self.eof = False
       
    46         try:
       
    47             self.parse_prelude()
       
    48             while not self.eof:
       
    49                 self.parse_article()
       
    50         except ParseException as ex:
       
    51             if sys.version_info.major == 2:
       
    52                 import traceback
       
    53                 traceback.print_exc()
       
    54             raise ParseException(ex.msg, self.lineno, self.line)
       
    55         return self.dom
       
    56 
       
    57     def parse_prelude(self):
       
    58         while True:
       
    59             self.readline()
       
    60             if self.eof:
       
    61                 return
       
    62             m = self.NUM_RE.match(self.line)
       
    63             if m:
       
    64                 self.num = m.group(1)
       
    65                 break
       
    66 
       
    67     def parse_article(self):
       
    68         """Assume we are at ``# NUM`` line."""
       
    69         num = self.num
       
    70         phrase_buf = []
       
    71         phrases = []
       
    72         while True:
       
    73             self.readline()
       
    74             if self.eof:
       
    75                 if len(phrase_buf) > 0:
       
    76                     phrases.append(" ".join(phrase_buf))
       
    77                 break
       
    78             m = self.NUM_RE.match(self.line)
       
    79             if m:
       
    80                 if len(phrase_buf) > 0:
       
    81                     phrases.append(" ".join(phrase_buf))
       
    82                 self.num = m.group(1)
       
    83                 break
       
    84             m = self.PHRASE_START_RE.match(self.line)
       
    85             if m:
       
    86                 if len(phrase_buf) > 0:
       
    87                     phrases.append(" ".join(phrase_buf))
       
    88                 phrase_buf = [m.group(1)]
       
    89             else:
       
    90                 phrase_buf.append(self.line)
       
    91         if len(phrases) == 0:
       
    92             raise ParseException("""There are no any phrases...""")
       
    93         if num in self.dom:
       
    94             raise ParseException("""Conflicting key: {}...""".format(num))
       
    95         self.dom[num] = phrases