py/gadialog.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Mon, 27 Feb 2023 00:55:27 +0200
changeset 1342 d6413e1d20b0
parent 1223 d592572cc546
permissions -rw-r--r--
Added new articles.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1223
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
import re
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     2
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
from gadict_util import ParseException
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
class Parser:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     6
    """
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
    Parser of gadialog files of form:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
        # num1
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
        - sentence1
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
        - sentence2
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
        # num2
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
        - sentence1
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
        ...
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
    converting them to map:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
        obj.dom[num1] = [sentence1, sentence2, ...]
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
    """
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    21
    COMMENT_RE = re.compile("^; ")
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    22
    NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    23
    PHRASE_START_RE = re.compile(u"^- (.*)")
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    24
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    25
    def __init__(self):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    26
        pass
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    27
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    28
    def readline(self):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    29
        while True:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    30
            self.line = self.stream.readline()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    31
            self.eof = len(self.line) == 0
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
            if self.eof:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
                break
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
            self.lineno += 1
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
            if self.COMMENT_RE.search(self.line):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
                continue
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
            self.line = self.line.strip(' \n\t')
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
            if len(self.line) > 0:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
                break
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    41
    def parse(self, stream):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
        self.lineno = 0
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    43
        self.stream = stream
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    44
        self.dom = dict()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    45
        self.eof = False
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    46
        try:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    47
            self.parse_prelude()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    48
            while not self.eof:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
                self.parse_article()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
        except ParseException as ex:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
            if sys.version_info.major == 2:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
                import traceback
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
                traceback.print_exc()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    54
            raise ParseException(ex.msg, self.lineno, self.line)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    55
        return self.dom
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    56
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    57
    def parse_prelude(self):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    58
        while True:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    59
            self.readline()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    60
            if self.eof:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    61
                return
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    62
            m = self.NUM_RE.match(self.line)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    63
            if m:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    64
                self.num = m.group(1)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    65
                break
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    66
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    67
    def parse_article(self):
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    68
        """Assume we are at ``# NUM`` line."""
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    69
        num = self.num
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    70
        phrase_buf = []
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    71
        phrases = []
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    72
        while True:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    73
            self.readline()
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    74
            if self.eof:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    75
                if len(phrase_buf) > 0:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    76
                    phrases.append(" ".join(phrase_buf))
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    77
                break
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    78
            m = self.NUM_RE.match(self.line)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    79
            if m:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    80
                if len(phrase_buf) > 0:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    81
                    phrases.append(" ".join(phrase_buf))
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    82
                self.num = m.group(1)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    83
                break
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    84
            m = self.PHRASE_START_RE.match(self.line)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    85
            if m:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    86
                if len(phrase_buf) > 0:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    87
                    phrases.append(" ".join(phrase_buf))
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    88
                phrase_buf = [m.group(1)]
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    89
            else:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    90
                phrase_buf.append(self.line)
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    91
        if len(phrases) == 0:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    92
            raise ParseException("""There are no any phrases...""")
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    93
        if num in self.dom:
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    94
            raise ParseException("""Conflicting key: {}...""".format(num))
d592572cc546 Extracted parser of gadialog files to separate module.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    95
        self.dom[num] = phrases