--- a/py/gadict.py Sun Mar 27 23:32:42 2016 +0300
+++ b/py/gadict.py Sun Mar 27 23:57:43 2016 +0300
@@ -2,6 +2,14 @@
import regex
+class Prelude:
+ name = None
+ about = ""
+ urls = []
+ authors = []
+ licences = []
+
+
class ParseException(BaseException):
def __init__(self, msg, lineno = None, line = None):
@@ -28,8 +36,16 @@
TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
+ CONT_RE = regex.compile(r"^ +(.*)")
+
TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
+ PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
+ PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
+ PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
+ PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
+ PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
+
def __init__(self):
pass
@@ -53,14 +69,51 @@
raise ParseException(ex.msg, self.lineno, self.line) from ex
return self.dom
+ def parse_continuation(self):
+ string = ""
+ while True:
+ self.readline()
+ if self.eof:
+ return string
+ m = CONT_RE.match(self.line)
+ if m is not None:
+ string += "\n" + m.group(1)
+ elif len(self.line) == 1:
+ string += "\n"
+ else:
+ return string
+
def parse_prelude(self):
"""Read dictionary prelude until first "__" delimiter."""
+ pre = Prelude()
while True:
self.readline()
if self.eof:
raise ParseException("There are no articles...")
+ m = self.PRELUDE_ABOUT_RE.match(self.line)
+ if m:
+ pre.about += m.group(1) + self.parse_continuation()
+ if self.eof:
+ raise ParseException("There are no articles...")
if self.SEPARATOR_RE.match(self.line):
break
+ m = self.PRELUDE_NAME_RE.match(self.line)
+ if m:
+ pre.name = m.group(1)
+ continue
+ m = self.PRELUDE_URL_RE.match(self.line)
+ if m:
+ pre.urls.append(m.group(1))
+ continue
+ m = self.PRELUDE_AUTHOR_RE.match(self.line)
+ if m:
+ pre.authors.append(m.group(1))
+ continue
+ m = self.PRELUDE_LICENSE_RE.match(self.line)
+ if m:
+ pre.licences.append(m.group(1))
+ continue
+ self.dom.append(pre)
def parse_article(self):
"""Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""