gadict: comparison py/gadict.py

equal deleted inserted replaced

-:791994f95561
+:b47698d5ccab
 import regex
+class Prelude:
+name = None
+about = ""
+urls = []
+authors = []
+licences = []
 class ParseException(BaseException):
 def __init__(self, msg, lineno = None, line = None):
 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
 TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
 TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
+CONT_RE = regex.compile(r"^ +(.*)")
 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
+PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
+PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
+PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
+PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
+PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
 def __init__(self):
 pass
 def readline(self):
 self.parse_article()
 except ParseException as ex:
 raise ParseException(ex.msg, self.lineno, self.line) from ex
 return self.dom
+def parse_continuation(self):
+string = ""
+while True:
+self.readline()
+if self.eof:
+return string
+m = CONT_RE.match(self.line)
+if m is not None:
+string += "\n" + m.group(1)
+elif len(self.line) == 1:
+string += "\n"
+else:
+return string
 def parse_prelude(self):
 """Read dictionary prelude until first "__" delimiter."""
+pre = Prelude()
 while True:
 self.readline()
 if self.eof:
 raise ParseException("There are no articles...")
+m = self.PRELUDE_ABOUT_RE.match(self.line)
+if m:
+pre.about += m.group(1) + self.parse_continuation()
+if self.eof:
+raise ParseException("There are no articles...")
 if self.SEPARATOR_RE.match(self.line):
 break
+m = self.PRELUDE_NAME_RE.match(self.line)
+if m:
+pre.name = m.group(1)
+continue
+m = self.PRELUDE_URL_RE.match(self.line)
+if m:
+pre.urls.append(m.group(1))
+continue
+m = self.PRELUDE_AUTHOR_RE.match(self.line)
+if m:
+pre.authors.append(m.group(1))
+continue
+m = self.PRELUDE_LICENSE_RE.match(self.line)
+if m:
+pre.licences.append(m.group(1))
+continue
+self.dom.append(pre)
 def parse_article(self):
 """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
 self.words = None
 self.tran = None

changeset 402	b47698d5ccab
parent 400	aa03182d2e26
child 406	f0ac87e10d9a