|
1 import re |
|
2 |
|
3 from gadict_util import ParseException |
|
4 |
|
5 class Parser: |
|
6 """ |
|
7 Parser of gadialog files of form: |
|
8 |
|
9 # num1 |
|
10 - sentence1 |
|
11 - sentence2 |
|
12 # num2 |
|
13 - sentence1 |
|
14 ... |
|
15 |
|
16 converting them to map: |
|
17 |
|
18 obj.dom[num1] = [sentence1, sentence2, ...] |
|
19 """ |
|
20 |
|
21 COMMENT_RE = re.compile("^; ") |
|
22 NUM_RE = re.compile(u"^# ([1-9][0-9]*)$") |
|
23 PHRASE_START_RE = re.compile(u"^- (.*)") |
|
24 |
|
25 def __init__(self): |
|
26 pass |
|
27 |
|
28 def readline(self): |
|
29 while True: |
|
30 self.line = self.stream.readline() |
|
31 self.eof = len(self.line) == 0 |
|
32 if self.eof: |
|
33 break |
|
34 self.lineno += 1 |
|
35 if self.COMMENT_RE.search(self.line): |
|
36 continue |
|
37 self.line = self.line.strip(' \n\t') |
|
38 if len(self.line) > 0: |
|
39 break |
|
40 |
|
41 def parse(self, stream): |
|
42 self.lineno = 0 |
|
43 self.stream = stream |
|
44 self.dom = dict() |
|
45 self.eof = False |
|
46 try: |
|
47 self.parse_prelude() |
|
48 while not self.eof: |
|
49 self.parse_article() |
|
50 except ParseException as ex: |
|
51 if sys.version_info.major == 2: |
|
52 import traceback |
|
53 traceback.print_exc() |
|
54 raise ParseException(ex.msg, self.lineno, self.line) |
|
55 return self.dom |
|
56 |
|
57 def parse_prelude(self): |
|
58 while True: |
|
59 self.readline() |
|
60 if self.eof: |
|
61 return |
|
62 m = self.NUM_RE.match(self.line) |
|
63 if m: |
|
64 self.num = m.group(1) |
|
65 break |
|
66 |
|
67 def parse_article(self): |
|
68 """Assume we are at ``# NUM`` line.""" |
|
69 num = self.num |
|
70 phrase_buf = [] |
|
71 phrases = [] |
|
72 while True: |
|
73 self.readline() |
|
74 if self.eof: |
|
75 if len(phrase_buf) > 0: |
|
76 phrases.append(" ".join(phrase_buf)) |
|
77 break |
|
78 m = self.NUM_RE.match(self.line) |
|
79 if m: |
|
80 if len(phrase_buf) > 0: |
|
81 phrases.append(" ".join(phrase_buf)) |
|
82 self.num = m.group(1) |
|
83 break |
|
84 m = self.PHRASE_START_RE.match(self.line) |
|
85 if m: |
|
86 if len(phrase_buf) > 0: |
|
87 phrases.append(" ".join(phrase_buf)) |
|
88 phrase_buf = [m.group(1)] |
|
89 else: |
|
90 phrase_buf.append(self.line) |
|
91 if len(phrases) == 0: |
|
92 raise ParseException("""There are no any phrases...""") |
|
93 if num in self.dom: |
|
94 raise ParseException("""Conflicting key: {}...""".format(num)) |
|
95 self.dom[num] = phrases |