Parse dictionary metainfo.
--- a/py/gadict.py Sun Mar 27 23:32:42 2016 +0300
+++ b/py/gadict.py Sun Mar 27 23:57:43 2016 +0300
@@ -2,6 +2,14 @@
import regex
+class Prelude:
+ name = None
+ about = ""
+ urls = []
+ authors = []
+ licences = []
+
+
class ParseException(BaseException):
def __init__(self, msg, lineno = None, line = None):
@@ -28,8 +36,16 @@
TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
+ CONT_RE = regex.compile(r"^ +(.*)")
+
TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
+ PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
+ PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
+ PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
+ PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
+ PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
+
def __init__(self):
pass
@@ -53,14 +69,51 @@
raise ParseException(ex.msg, self.lineno, self.line) from ex
return self.dom
+ def parse_continuation(self):
+ string = ""
+ while True:
+ self.readline()
+ if self.eof:
+ return string
+ m = CONT_RE.match(self.line)
+ if m is not None:
+ string += "\n" + m.group(1)
+ elif len(self.line) == 1:
+ string += "\n"
+ else:
+ return string
+
def parse_prelude(self):
"""Read dictionary prelude until first "__" delimiter."""
+ pre = Prelude()
while True:
self.readline()
if self.eof:
raise ParseException("There are no articles...")
+ m = self.PRELUDE_ABOUT_RE.match(self.line)
+ if m:
+ pre.about += m.group(1) + self.parse_continuation()
+ if self.eof:
+ raise ParseException("There are no articles...")
if self.SEPARATOR_RE.match(self.line):
break
+ m = self.PRELUDE_NAME_RE.match(self.line)
+ if m:
+ pre.name = m.group(1)
+ continue
+ m = self.PRELUDE_URL_RE.match(self.line)
+ if m:
+ pre.urls.append(m.group(1))
+ continue
+ m = self.PRELUDE_AUTHOR_RE.match(self.line)
+ if m:
+ pre.authors.append(m.group(1))
+ continue
+ m = self.PRELUDE_LICENSE_RE.match(self.line)
+ if m:
+ pre.licences.append(m.group(1))
+ continue
+ self.dom.append(pre)
def parse_article(self):
"""Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
--- a/py/gadict_c5.py Sun Mar 27 23:32:42 2016 +0300
+++ b/py/gadict_c5.py Sun Mar 27 23:57:43 2016 +0300
@@ -30,6 +30,28 @@
finally:
fin.close()
+prelude = dom[0]
+if prelude.name is not None:
+ fout.write("_____\n\n00-database-short\n")
+ fout.write(prelude.name)
+ fout.write("\n")
+if len(prelude.urls) > 0:
+ fout.write("_____\n\n00-database-url\n")
+ for url in prelude.urls:
+ fout.write(url)
+ fout.write("\n")
+fout.write("_____\n\n00-database-info\n")
+if prelude.name is not None:
+ fout.write("Dictionary name: ")
+ fout.write(prelude.name)
+ fout.write("\n\n")
+fout.write("Project URLs: ")
+fout.write(" , ".join(prelude.urls))
+fout.write("\n\n")
+fout.write("Project licenses: ")
+fout.write(", ".join(prelude.licences))
+fout.write("\n")
+
for idx in range(1, len(dom)):
article = dom[idx]
fout.write("_____\n\n")