# HG changeset patch # User Oleksandr Gavenko # Date 1598001455 -10800 # Node ID d592572cc54628ee68d2ca00750419fd9e558b4a # Parent 790a5708630d604fd01f2e8bbddbad28b78b30d1 Extracted parser of gadialog files to separate module. diff -r 790a5708630d -r d592572cc546 py/gadialog.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/py/gadialog.py Fri Aug 21 12:17:35 2020 +0300 @@ -0,0 +1,95 @@ +import re + +from gadict_util import ParseException + +class Parser: + """ + Parser of gadialog files of form: + + # num1 + - sentence1 + - sentence2 + # num2 + - sentence1 + ... + + converting them to map: + + obj.dom[num1] = [sentence1, sentence2, ...] + """ + + COMMENT_RE = re.compile("^; ") + NUM_RE = re.compile(u"^# ([1-9][0-9]*)$") + PHRASE_START_RE = re.compile(u"^- (.*)") + + def __init__(self): + pass + + def readline(self): + while True: + self.line = self.stream.readline() + self.eof = len(self.line) == 0 + if self.eof: + break + self.lineno += 1 + if self.COMMENT_RE.search(self.line): + continue + self.line = self.line.strip(' \n\t') + if len(self.line) > 0: + break + + def parse(self, stream): + self.lineno = 0 + self.stream = stream + self.dom = dict() + self.eof = False + try: + self.parse_prelude() + while not self.eof: + self.parse_article() + except ParseException as ex: + if sys.version_info.major == 2: + import traceback + traceback.print_exc() + raise ParseException(ex.msg, self.lineno, self.line) + return self.dom + + def parse_prelude(self): + while True: + self.readline() + if self.eof: + return + m = self.NUM_RE.match(self.line) + if m: + self.num = m.group(1) + break + + def parse_article(self): + """Assume we are at ``# NUM`` line.""" + num = self.num + phrase_buf = [] + phrases = [] + while True: + self.readline() + if self.eof: + if len(phrase_buf) > 0: + phrases.append(" ".join(phrase_buf)) + break + m = self.NUM_RE.match(self.line) + if m: + if len(phrase_buf) > 0: + phrases.append(" ".join(phrase_buf)) + self.num = m.group(1) + break + m = self.PHRASE_START_RE.match(self.line) + if m: + if len(phrase_buf) > 0: + phrases.append(" ".join(phrase_buf)) + phrase_buf = [m.group(1)] + else: + phrase_buf.append(self.line) + if len(phrases) == 0: + raise ParseException("""There are no any phrases...""") + if num in self.dom: + raise ParseException("""Conflicting key: {}...""".format(num)) + self.dom[num] = phrases diff -r 790a5708630d -r d592572cc546 py/gadialog_srs_anki.py --- a/py/gadialog_srs_anki.py Fri Aug 21 12:15:51 2020 +0300 +++ b/py/gadialog_srs_anki.py Fri Aug 21 12:17:35 2020 +0300 @@ -10,7 +10,7 @@ import tempfile import shutil -from gadict_util import ParseException +from gadict import Parser import anki from anki.exporting import AnkiPackageExporter @@ -61,84 +61,6 @@ ################################################################ -class Parser: - - COMMENT_RE = re.compile("^; ") - NUM_RE = re.compile(u"^# ([1-9][0-9]*)$") - PHRASE_START_RE = re.compile(u"^- (.*)") - - def __init__(self): - pass - - def readline(self): - while True: - self.line = self.stream.readline() - self.eof = len(self.line) == 0 - if self.eof: - break - self.lineno += 1 - if self.COMMENT_RE.search(self.line): - continue - self.line = self.line.strip(' \n\t') - if len(self.line) > 0: - break - - def parse(self, stream): - self.lineno = 0 - self.stream = stream - self.dom = dict() - self.eof = False - try: - self.parse_prelude() - while not self.eof: - self.parse_article() - except ParseException as ex: - if sys.version_info.major == 2: - import traceback - traceback.print_exc() - raise ParseException(ex.msg, self.lineno, self.line) - return self.dom - - def parse_prelude(self): - while True: - self.readline() - if self.eof: - return - m = self.NUM_RE.match(self.line) - if m: - self.num = m.group(1) - break - - def parse_article(self): - """Assume we are at ``# NUM`` line.""" - num = self.num - phrase_buf = [] - phrases = [] - while True: - self.readline() - if self.eof: - if len(phrase_buf) > 0: - phrases.append(" ".join(phrase_buf)) - break - m = self.NUM_RE.match(self.line) - if m: - if len(phrase_buf) > 0: - phrases.append(" ".join(phrase_buf)) - self.num = m.group(1) - break - m = self.PHRASE_START_RE.match(self.line) - if m: - if len(phrase_buf) > 0: - phrases.append(" ".join(phrase_buf)) - phrase_buf = [m.group(1)] - else: - phrase_buf.append(self.line) - if len(phrases) == 0: - raise ParseException("""There are no any phrases...""") - if num in self.dom: - raise ParseException("""Conflicting key: {}...""".format(num)) - self.dom[num] = phrases - FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8") PARSER = Parser() diff -r 790a5708630d -r d592572cc546 py/gadict_html.py --- a/py/gadict_html.py Fri Aug 21 12:15:51 2020 +0300 +++ b/py/gadict_html.py Fri Aug 21 12:17:35 2020 +0300 @@ -58,7 +58,7 @@ if not FONAME: FONAME = arg continue - raise Exception("Unnecessary argument: '{:s}'".format(arg)) + raise Exception("Superfluous argument: '{:s}'".format(arg)) FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")