py/gadialog.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Fri, 21 Aug 2020 12:17:35 +0300
changeset 1223 d592572cc546
permissions -rw-r--r--
Extracted parser of gadialog files to separate module.

import re

from gadict_util import ParseException

class Parser:
    """
    Parser of gadialog files of form:

        # num1
        - sentence1
        - sentence2
        # num2
        - sentence1
        ...

    converting them to map:

        obj.dom[num1] = [sentence1, sentence2, ...]
    """

    COMMENT_RE = re.compile("^; ")
    NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
    PHRASE_START_RE = re.compile(u"^- (.*)")

    def __init__(self):
        pass

    def readline(self):
        while True:
            self.line = self.stream.readline()
            self.eof = len(self.line) == 0
            if self.eof:
                break
            self.lineno += 1
            if self.COMMENT_RE.search(self.line):
                continue
            self.line = self.line.strip(' \n\t')
            if len(self.line) > 0:
                break

    def parse(self, stream):
        self.lineno = 0
        self.stream = stream
        self.dom = dict()
        self.eof = False
        try:
            self.parse_prelude()
            while not self.eof:
                self.parse_article()
        except ParseException as ex:
            if sys.version_info.major == 2:
                import traceback
                traceback.print_exc()
            raise ParseException(ex.msg, self.lineno, self.line)
        return self.dom

    def parse_prelude(self):
        while True:
            self.readline()
            if self.eof:
                return
            m = self.NUM_RE.match(self.line)
            if m:
                self.num = m.group(1)
                break

    def parse_article(self):
        """Assume we are at ``# NUM`` line."""
        num = self.num
        phrase_buf = []
        phrases = []
        while True:
            self.readline()
            if self.eof:
                if len(phrase_buf) > 0:
                    phrases.append(" ".join(phrase_buf))
                break
            m = self.NUM_RE.match(self.line)
            if m:
                if len(phrase_buf) > 0:
                    phrases.append(" ".join(phrase_buf))
                self.num = m.group(1)
                break
            m = self.PHRASE_START_RE.match(self.line)
            if m:
                if len(phrase_buf) > 0:
                    phrases.append(" ".join(phrase_buf))
                phrase_buf = [m.group(1)]
            else:
                phrase_buf.append(self.line)
        if len(phrases) == 0:
            raise ParseException("""There are no any phrases...""")
        if num in self.dom:
            raise ParseException("""Conflicting key: {}...""".format(num))
        self.dom[num] = phrases