Added new articles.
import re
from gadict_util import ParseException
class Parser:
"""
Parser of gadialog files of form:
# num1
- sentence1
- sentence2
# num2
- sentence1
...
converting them to map:
obj.dom[num1] = [sentence1, sentence2, ...]
"""
COMMENT_RE = re.compile("^; ")
NUM_RE = re.compile(u"^# ([1-9][0-9]*)$")
PHRASE_START_RE = re.compile(u"^- (.*)")
def __init__(self):
pass
def readline(self):
while True:
self.line = self.stream.readline()
self.eof = len(self.line) == 0
if self.eof:
break
self.lineno += 1
if self.COMMENT_RE.search(self.line):
continue
self.line = self.line.strip(' \n\t')
if len(self.line) > 0:
break
def parse(self, stream):
self.lineno = 0
self.stream = stream
self.dom = dict()
self.eof = False
try:
self.parse_prelude()
while not self.eof:
self.parse_article()
except ParseException as ex:
if sys.version_info.major == 2:
import traceback
traceback.print_exc()
raise ParseException(ex.msg, self.lineno, self.line)
return self.dom
def parse_prelude(self):
while True:
self.readline()
if self.eof:
return
m = self.NUM_RE.match(self.line)
if m:
self.num = m.group(1)
break
def parse_article(self):
"""Assume we are at ``# NUM`` line."""
num = self.num
phrase_buf = []
phrases = []
while True:
self.readline()
if self.eof:
if len(phrase_buf) > 0:
phrases.append(" ".join(phrase_buf))
break
m = self.NUM_RE.match(self.line)
if m:
if len(phrase_buf) > 0:
phrases.append(" ".join(phrase_buf))
self.num = m.group(1)
break
m = self.PHRASE_START_RE.match(self.line)
if m:
if len(phrase_buf) > 0:
phrases.append(" ".join(phrase_buf))
phrase_buf = [m.group(1)]
else:
phrase_buf.append(self.line)
if len(phrases) == 0:
raise ParseException("""There are no any phrases...""")
if num in self.dom:
raise ParseException("""Conflicting key: {}...""".format(num))
self.dom[num] = phrases