py/gadialog.py
changeset 1223 d592572cc546
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadialog.py	Fri Aug 21 12:17:35 2020 +0300
@@ -0,0 +1,95 @@
+import re
+
+from gadict_util import ParseException
+
+class Parser:
+    """
+    Parser of gadialog files of form:
+
+        # num1
+        - sentence1
+        - sentence2
+        # num2
+        - sentence1
+        ...
+
+    converting them to map:
+
+        obj.dom[num1] = [sentence1, sentence2, ...]
+    """
+
+    COMMENT_RE = re.compile("^; ")
+    NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
+    PHRASE_START_RE = re.compile(u"^- (.*)")
+
+    def __init__(self):
+        pass
+
+    def readline(self):
+        while True:
+            self.line = self.stream.readline()
+            self.eof = len(self.line) == 0
+            if self.eof:
+                break
+            self.lineno += 1
+            if self.COMMENT_RE.search(self.line):
+                continue
+            self.line = self.line.strip(' \n\t')
+            if len(self.line) > 0:
+                break
+
+    def parse(self, stream):
+        self.lineno = 0
+        self.stream = stream
+        self.dom = dict()
+        self.eof = False
+        try:
+            self.parse_prelude()
+            while not self.eof:
+                self.parse_article()
+        except ParseException as ex:
+            if sys.version_info.major == 2:
+                import traceback
+                traceback.print_exc()
+            raise ParseException(ex.msg, self.lineno, self.line)
+        return self.dom
+
+    def parse_prelude(self):
+        while True:
+            self.readline()
+            if self.eof:
+                return
+            m = self.NUM_RE.match(self.line)
+            if m:
+                self.num = m.group(1)
+                break
+
+    def parse_article(self):
+        """Assume we are at ``# NUM`` line."""
+        num = self.num
+        phrase_buf = []
+        phrases = []
+        while True:
+            self.readline()
+            if self.eof:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                break
+            m = self.NUM_RE.match(self.line)
+            if m:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                self.num = m.group(1)
+                break
+            m = self.PHRASE_START_RE.match(self.line)
+            if m:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                phrase_buf = [m.group(1)]
+            else:
+                phrase_buf.append(self.line)
+        if len(phrases) == 0:
+            raise ParseException("""There are no any phrases...""")
+        if num in self.dom:
+            raise ParseException("""Conflicting key: {}...""".format(num))
+        self.dom[num] = phrases