Extracted parser of gadialog files to separate module.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Fri, 21 Aug 2020 12:17:35 +0300
changeset 1223 d592572cc546
parent 1222 790a5708630d
child 1224 23dc533e3dd3
Extracted parser of gadialog files to separate module.
py/gadialog.py
py/gadialog_srs_anki.py
py/gadict_html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadialog.py	Fri Aug 21 12:17:35 2020 +0300
@@ -0,0 +1,95 @@
+import re
+
+from gadict_util import ParseException
+
+class Parser:
+    """
+    Parser of gadialog files of form:
+
+        # num1
+        - sentence1
+        - sentence2
+        # num2
+        - sentence1
+        ...
+
+    converting them to map:
+
+        obj.dom[num1] = [sentence1, sentence2, ...]
+    """
+
+    COMMENT_RE = re.compile("^; ")
+    NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
+    PHRASE_START_RE = re.compile(u"^- (.*)")
+
+    def __init__(self):
+        pass
+
+    def readline(self):
+        while True:
+            self.line = self.stream.readline()
+            self.eof = len(self.line) == 0
+            if self.eof:
+                break
+            self.lineno += 1
+            if self.COMMENT_RE.search(self.line):
+                continue
+            self.line = self.line.strip(' \n\t')
+            if len(self.line) > 0:
+                break
+
+    def parse(self, stream):
+        self.lineno = 0
+        self.stream = stream
+        self.dom = dict()
+        self.eof = False
+        try:
+            self.parse_prelude()
+            while not self.eof:
+                self.parse_article()
+        except ParseException as ex:
+            if sys.version_info.major == 2:
+                import traceback
+                traceback.print_exc()
+            raise ParseException(ex.msg, self.lineno, self.line)
+        return self.dom
+
+    def parse_prelude(self):
+        while True:
+            self.readline()
+            if self.eof:
+                return
+            m = self.NUM_RE.match(self.line)
+            if m:
+                self.num = m.group(1)
+                break
+
+    def parse_article(self):
+        """Assume we are at ``# NUM`` line."""
+        num = self.num
+        phrase_buf = []
+        phrases = []
+        while True:
+            self.readline()
+            if self.eof:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                break
+            m = self.NUM_RE.match(self.line)
+            if m:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                self.num = m.group(1)
+                break
+            m = self.PHRASE_START_RE.match(self.line)
+            if m:
+                if len(phrase_buf) > 0:
+                    phrases.append(" ".join(phrase_buf))
+                phrase_buf = [m.group(1)]
+            else:
+                phrase_buf.append(self.line)
+        if len(phrases) == 0:
+            raise ParseException("""There are no any phrases...""")
+        if num in self.dom:
+            raise ParseException("""Conflicting key: {}...""".format(num))
+        self.dom[num] = phrases
--- a/py/gadialog_srs_anki.py	Fri Aug 21 12:15:51 2020 +0300
+++ b/py/gadialog_srs_anki.py	Fri Aug 21 12:17:35 2020 +0300
@@ -10,7 +10,7 @@
 import tempfile
 import shutil
 
-from gadict_util import ParseException
+from gadict import Parser
 
 import anki
 from anki.exporting import AnkiPackageExporter
@@ -61,84 +61,6 @@
 
 ################################################################
 
-class Parser:
-
-    COMMENT_RE = re.compile("^; ")
-    NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
-    PHRASE_START_RE = re.compile(u"^- (.*)")
-
-    def __init__(self):
-        pass
-
-    def readline(self):
-        while True:
-            self.line = self.stream.readline()
-            self.eof = len(self.line) == 0
-            if self.eof:
-                break
-            self.lineno += 1
-            if self.COMMENT_RE.search(self.line):
-                continue
-            self.line = self.line.strip(' \n\t')
-            if len(self.line) > 0:
-                break
-
-    def parse(self, stream):
-        self.lineno = 0
-        self.stream = stream
-        self.dom = dict()
-        self.eof = False
-        try:
-            self.parse_prelude()
-            while not self.eof:
-                self.parse_article()
-        except ParseException as ex:
-            if sys.version_info.major == 2:
-                import traceback
-                traceback.print_exc()
-            raise ParseException(ex.msg, self.lineno, self.line)
-        return self.dom
-
-    def parse_prelude(self):
-        while True:
-            self.readline()
-            if self.eof:
-                return
-            m = self.NUM_RE.match(self.line)
-            if m:
-                self.num = m.group(1)
-                break
-
-    def parse_article(self):
-        """Assume we are at ``# NUM`` line."""
-        num = self.num
-        phrase_buf = []
-        phrases = []
-        while True:
-            self.readline()
-            if self.eof:
-                if len(phrase_buf) > 0:
-                    phrases.append(" ".join(phrase_buf))
-                break
-            m = self.NUM_RE.match(self.line)
-            if m:
-                if len(phrase_buf) > 0:
-                    phrases.append(" ".join(phrase_buf))
-                self.num = m.group(1)
-                break
-            m = self.PHRASE_START_RE.match(self.line)
-            if m:
-                if len(phrase_buf) > 0:
-                    phrases.append(" ".join(phrase_buf))
-                phrase_buf = [m.group(1)]
-            else:
-                phrase_buf.append(self.line)
-        if len(phrases) == 0:
-            raise ParseException("""There are no any phrases...""")
-        if num in self.dom:
-            raise ParseException("""Conflicting key: {}...""".format(num))
-        self.dom[num] = phrases
-
 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
 
 PARSER = Parser()
--- a/py/gadict_html.py	Fri Aug 21 12:15:51 2020 +0300
+++ b/py/gadict_html.py	Fri Aug 21 12:17:35 2020 +0300
@@ -58,7 +58,7 @@
     if not FONAME:
         FONAME = arg
         continue
-    raise Exception("Unnecessary argument: '{:s}'".format(arg))
+    raise Exception("Superfluous argument: '{:s}'".format(arg))
 
 
 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")