py/gadict.py
changeset 402 b47698d5ccab
parent 400 aa03182d2e26
child 406 f0ac87e10d9a
--- a/py/gadict.py	Sun Mar 27 23:32:42 2016 +0300
+++ b/py/gadict.py	Sun Mar 27 23:57:43 2016 +0300
@@ -2,6 +2,14 @@
 import regex
 
 
+class Prelude:
+    name = None
+    about = ""
+    urls = []
+    authors = []
+    licences = []
+
+
 class ParseException(BaseException):
 
     def __init__(self, msg, lineno = None, line = None):
@@ -28,8 +36,16 @@
     TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
     TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
 
+    CONT_RE = regex.compile(r"^ +(.*)")
+
     TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
 
+    PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
+    PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
+    PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
+    PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
+    PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
+
     def __init__(self):
         pass
 
@@ -53,14 +69,51 @@
             raise ParseException(ex.msg, self.lineno, self.line) from ex
         return self.dom
 
+    def parse_continuation(self):
+        string = ""
+        while True:
+            self.readline()
+            if self.eof:
+                return string
+            m = CONT_RE.match(self.line)
+            if m is not None:
+                string += "\n" + m.group(1)
+            elif len(self.line) == 1:
+                string += "\n"
+            else:
+                return string
+
     def parse_prelude(self):
         """Read dictionary prelude until first "__" delimiter."""
+        pre = Prelude()
         while True:
             self.readline()
             if self.eof:
                 raise ParseException("There are no articles...")
+            m = self.PRELUDE_ABOUT_RE.match(self.line)
+            if m:
+                pre.about += m.group(1) + self.parse_continuation()
+                if self.eof:
+                    raise ParseException("There are no articles...")
             if self.SEPARATOR_RE.match(self.line):
                 break
+            m = self.PRELUDE_NAME_RE.match(self.line)
+            if m:
+                pre.name = m.group(1)
+                continue
+            m = self.PRELUDE_URL_RE.match(self.line)
+            if m:
+                pre.urls.append(m.group(1))
+                continue
+            m = self.PRELUDE_AUTHOR_RE.match(self.line)
+            if m:
+                pre.authors.append(m.group(1))
+                continue
+            m = self.PRELUDE_LICENSE_RE.match(self.line)
+            if m:
+                pre.licences.append(m.group(1))
+                continue
+        self.dom.append(pre)
 
     def parse_article(self):
         """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""