--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadialog.py Fri Aug 21 12:17:35 2020 +0300
@@ -0,0 +1,95 @@
+import re
+
+from gadict_util import ParseException
+
+class Parser:
+ """
+ Parser of gadialog files of form:
+
+ # num1
+ - sentence1
+ - sentence2
+ # num2
+ - sentence1
+ ...
+
+ converting them to map:
+
+ obj.dom[num1] = [sentence1, sentence2, ...]
+ """
+
+ COMMENT_RE = re.compile("^; ")
+ NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
+ PHRASE_START_RE = re.compile(u"^- (.*)")
+
+ def __init__(self):
+ pass
+
+ def readline(self):
+ while True:
+ self.line = self.stream.readline()
+ self.eof = len(self.line) == 0
+ if self.eof:
+ break
+ self.lineno += 1
+ if self.COMMENT_RE.search(self.line):
+ continue
+ self.line = self.line.strip(' \n\t')
+ if len(self.line) > 0:
+ break
+
+ def parse(self, stream):
+ self.lineno = 0
+ self.stream = stream
+ self.dom = dict()
+ self.eof = False
+ try:
+ self.parse_prelude()
+ while not self.eof:
+ self.parse_article()
+ except ParseException as ex:
+ if sys.version_info.major == 2:
+ import traceback
+ traceback.print_exc()
+ raise ParseException(ex.msg, self.lineno, self.line)
+ return self.dom
+
+ def parse_prelude(self):
+ while True:
+ self.readline()
+ if self.eof:
+ return
+ m = self.NUM_RE.match(self.line)
+ if m:
+ self.num = m.group(1)
+ break
+
+ def parse_article(self):
+ """Assume we are at ``# NUM`` line."""
+ num = self.num
+ phrase_buf = []
+ phrases = []
+ while True:
+ self.readline()
+ if self.eof:
+ if len(phrase_buf) > 0:
+ phrases.append(" ".join(phrase_buf))
+ break
+ m = self.NUM_RE.match(self.line)
+ if m:
+ if len(phrase_buf) > 0:
+ phrases.append(" ".join(phrase_buf))
+ self.num = m.group(1)
+ break
+ m = self.PHRASE_START_RE.match(self.line)
+ if m:
+ if len(phrase_buf) > 0:
+ phrases.append(" ".join(phrase_buf))
+ phrase_buf = [m.group(1)]
+ else:
+ phrase_buf.append(self.line)
+ if len(phrases) == 0:
+ raise ParseException("""There are no any phrases...""")
+ if num in self.dom:
+ raise ParseException("""Conflicting key: {}...""".format(num))
+ self.dom[num] = phrases