Move python files to separate directory.
--- a/Makefile Sun Mar 27 16:35:30 2016 +0300
+++ b/Makefile Sun Mar 27 16:44:14 2016 +0300
@@ -322,7 +322,7 @@
dictzip -c $< >$@
dist/dictd/%.c5: %.gadict | dist/dictd
- python3 gadict.py $< $@
+ python3 py/gadict.py $< $@
dist/dictd:
mkdir -p $@
--- a/gadict.py Sun Mar 27 16:35:30 2016 +0300
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,206 +0,0 @@
-
-import io
-import sys
-# import re
-import regex
-
-
-# fgadict = "gadict_en-ru+ua.gadict"
-fgadict = None
-fnout = None
-if len(sys.argv) >= 2:
- fgadict = sys.argv[1]
-if len(sys.argv) >= 3:
- fnout = sys.argv[2]
-
-fin = io.open(fgadict, mode='r', buffering=1, encoding="utf-8")
-if fnout is None:
- fout = sys.stdout
-else:
- fout = open(fnout, "w")
-
-
-class ParseException(Exception):
-
- def __init__(self, msg):
- self.msg = msg
-
- def __repr__(self):
- return self.msg
-
-
-class Parser:
-
- SEPARATOR_RE = regex.compile(r"^__$")
- HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
- HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$")
- HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
- TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
- TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
- TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
-
- TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
-
- def __init__(self):
- pass
-
- def readline(self):
- self.line = self.stream.readline()
- self.eof = len(self.line) == 0
- if not self.eof:
- self.lineno += 1
-
- def parse(self, stream):
- self.lineno = 0
- self.stream = stream
- self.dom = []
- try:
- self.parse_prelude()
- while not self.eof:
- self.parse_article()
- except ParseException as ex:
- if self.TRAILING_SPACES_RE.match(self.line):
- fout.write("{:s}:{:d}: {:s}".format(fgadict, self.lineno, "Traling spaces detected...\n"))
- fout.write("{:s}:{:d}: {:s}\nLINE: {:s}\n".format(fgadict, self.lineno, str(ex), self.line))
- raise Exception(ex)
- return self.dom
-
- def parse_prelude(self):
- """Read dictionary prelude until first "__" delimiter."""
- while True:
- self.readline()
- if self.eof:
- raise ParseException("There are no articles...")
- if self.SEPARATOR_RE.match(self.line):
- break
-
- def parse_article(self):
- """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
- self.words = None
- self.tran = None
- self.parse_empty_line()
- self.parse_headlines()
- self.parse_translation()
- self.dom.append((self.words, self.tran))
-
- def parse_empty_line(self):
- self.readline()
- if self.eof or len(self.line) != 1:
- raise ParseException(""""__" delimiter should followed by empty line...""")
-
- def parse_headlines(self):
- """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
- self.words = {}
- self.readline()
- if self.eof:
- raise ParseException("""There are no definition after "__" delimiter...""")
- m = self.HEADWORD_RE.match(self.line)
- if m is None:
- raise ParseException("""There are no headword after "__" delimiter...""")
- word = m.group(1)
- pron = None
- attrs = set()
- while True:
- self.readline()
- if self.eof or len(self.line) == 1:
- break
- m = self.HEADWORD_RE.match(self.line)
- if m is not None:
- if word is None:
- raise ParseException("""Didn't match previous headword...""")
- self.words[word] = (pron, attrs)
- word = m.group(1)
- pron = None
- attrs = set()
- continue
- m = self.HEADWORD_PRON_RE.match(self.line)
- if m is not None:
- if pron is not None:
- raise ParseException("""Pronunciation is redefined...""")
- pron = m.group(1)
- continue
- m = self.HEADWORD_VAR_RE.match(self.line)
- if m is not None:
- attrs.add(m.group(1))
- continue
- raise ParseException("""Line is not headword or translation or headword attribute...""")
- self.words[word] = (pron, attrs)
-
- def parse_translation(self):
- senses = []
- pos = None
- tr = []
- ex = []
- while True:
- self.readline()
- if self.eof:
- break
- m = self.SEPARATOR_RE.match(self.line)
- if m is not None:
- break
- if len(self.line) == 1:
- senses.append((pos, tr, ex))
- pos = None
- tr = []
- ex = []
- continue
- m = self.TRANSL_POS_RE.match(self.line)
- if m is not None:
- if pos is not None:
- raise ParseException("""Each translation should have only one part of speech marker...""")
- pos = m.group(0)
- continue
- m = self.TRANSL_RE.match(self.line)
- if m is not None:
- tr.append((m.group(1), m.group(2)))
- continue
- m = self.TRANSL_EX_RE.match(self.line)
- if m is not None:
- ex.append((m.group(1), m.group(2)))
- continue
- raise ParseException("""Uknown syntax...""")
- if len(tr) > 0:
- senses.append((pos, tr, ex))
- self.tran = senses
-
-parser = Parser()
-dom = parser.parse(fin)
-fin.close()
-
-for idx in range(1, len(dom)):
- article = dom[idx]
- fout.write("_____\n\n")
- title = "; ".join(article[0].keys())
- fout.write(title)
- fout.write("\n\n")
- for (word, (pron, attrs)) in article[0].items():
- if word == "approach":
- fout.write(str(article[0]))
- fout.write(" ")
- fout.write(word)
- fout.write("\n")
- if pron is not None:
- fout.write(" [")
- fout.write(pron)
- fout.write("]\n")
- if len(attrs) > 0:
- fout.write(" ")
- l = list(attrs)
- l.sort()
- fout.write(", ".join(l))
- fout.write("\n")
- fout.write("\n")
- for (pos, trs, exs) in article[1]:
- fout.write(" ")
- if pos is not None:
- fout.write("⟨")
- fout.write(pos)
- fout.write("⟩ ")
- for (lang, tr) in trs:
- if lang == "ru":
- fout.write(tr)
- break
- fout.write("\n")
-
- # fout.write(str(article[0])+"\n")
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/py/gadict.py Sun Mar 27 16:44:14 2016 +0300
@@ -0,0 +1,206 @@
+
+import io
+import sys
+# import re
+import regex
+
+
+# fgadict = "gadict_en-ru+ua.gadict"
+fgadict = None
+fnout = None
+if len(sys.argv) >= 2:
+ fgadict = sys.argv[1]
+if len(sys.argv) >= 3:
+ fnout = sys.argv[2]
+
+fin = io.open(fgadict, mode='r', buffering=1, encoding="utf-8")
+if fnout is None:
+ fout = sys.stdout
+else:
+ fout = open(fnout, "w")
+
+
+class ParseException(Exception):
+
+ def __init__(self, msg):
+ self.msg = msg
+
+ def __repr__(self):
+ return self.msg
+
+
+class Parser:
+
+ SEPARATOR_RE = regex.compile(r"^__$")
+ HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
+ HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$")
+ HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
+ TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
+ TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
+ TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
+
+ TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
+
+ def __init__(self):
+ pass
+
+ def readline(self):
+ self.line = self.stream.readline()
+ self.eof = len(self.line) == 0
+ if not self.eof:
+ self.lineno += 1
+
+ def parse(self, stream):
+ self.lineno = 0
+ self.stream = stream
+ self.dom = []
+ try:
+ self.parse_prelude()
+ while not self.eof:
+ self.parse_article()
+ except ParseException as ex:
+ if self.TRAILING_SPACES_RE.match(self.line):
+ fout.write("{:s}:{:d}: {:s}".format(fgadict, self.lineno, "Traling spaces detected...\n"))
+ fout.write("{:s}:{:d}: {:s}\nLINE: {:s}\n".format(fgadict, self.lineno, str(ex), self.line))
+ raise Exception(ex)
+ return self.dom
+
+ def parse_prelude(self):
+ """Read dictionary prelude until first "__" delimiter."""
+ while True:
+ self.readline()
+ if self.eof:
+ raise ParseException("There are no articles...")
+ if self.SEPARATOR_RE.match(self.line):
+ break
+
+ def parse_article(self):
+ """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
+ self.words = None
+ self.tran = None
+ self.parse_empty_line()
+ self.parse_headlines()
+ self.parse_translation()
+ self.dom.append((self.words, self.tran))
+
+ def parse_empty_line(self):
+ self.readline()
+ if self.eof or len(self.line) != 1:
+ raise ParseException(""""__" delimiter should followed by empty line...""")
+
+ def parse_headlines(self):
+ """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
+ self.words = {}
+ self.readline()
+ if self.eof:
+ raise ParseException("""There are no definition after "__" delimiter...""")
+ m = self.HEADWORD_RE.match(self.line)
+ if m is None:
+ raise ParseException("""There are no headword after "__" delimiter...""")
+ word = m.group(1)
+ pron = None
+ attrs = set()
+ while True:
+ self.readline()
+ if self.eof or len(self.line) == 1:
+ break
+ m = self.HEADWORD_RE.match(self.line)
+ if m is not None:
+ if word is None:
+ raise ParseException("""Didn't match previous headword...""")
+ self.words[word] = (pron, attrs)
+ word = m.group(1)
+ pron = None
+ attrs = set()
+ continue
+ m = self.HEADWORD_PRON_RE.match(self.line)
+ if m is not None:
+ if pron is not None:
+ raise ParseException("""Pronunciation is redefined...""")
+ pron = m.group(1)
+ continue
+ m = self.HEADWORD_VAR_RE.match(self.line)
+ if m is not None:
+ attrs.add(m.group(1))
+ continue
+ raise ParseException("""Line is not headword or translation or headword attribute...""")
+ self.words[word] = (pron, attrs)
+
+ def parse_translation(self):
+ senses = []
+ pos = None
+ tr = []
+ ex = []
+ while True:
+ self.readline()
+ if self.eof:
+ break
+ m = self.SEPARATOR_RE.match(self.line)
+ if m is not None:
+ break
+ if len(self.line) == 1:
+ senses.append((pos, tr, ex))
+ pos = None
+ tr = []
+ ex = []
+ continue
+ m = self.TRANSL_POS_RE.match(self.line)
+ if m is not None:
+ if pos is not None:
+ raise ParseException("""Each translation should have only one part of speech marker...""")
+ pos = m.group(0)
+ continue
+ m = self.TRANSL_RE.match(self.line)
+ if m is not None:
+ tr.append((m.group(1), m.group(2)))
+ continue
+ m = self.TRANSL_EX_RE.match(self.line)
+ if m is not None:
+ ex.append((m.group(1), m.group(2)))
+ continue
+ raise ParseException("""Uknown syntax...""")
+ if len(tr) > 0:
+ senses.append((pos, tr, ex))
+ self.tran = senses
+
+parser = Parser()
+dom = parser.parse(fin)
+fin.close()
+
+for idx in range(1, len(dom)):
+ article = dom[idx]
+ fout.write("_____\n\n")
+ title = "; ".join(article[0].keys())
+ fout.write(title)
+ fout.write("\n\n")
+ for (word, (pron, attrs)) in article[0].items():
+ if word == "approach":
+ fout.write(str(article[0]))
+ fout.write(" ")
+ fout.write(word)
+ fout.write("\n")
+ if pron is not None:
+ fout.write(" [")
+ fout.write(pron)
+ fout.write("]\n")
+ if len(attrs) > 0:
+ fout.write(" ")
+ l = list(attrs)
+ l.sort()
+ fout.write(", ".join(l))
+ fout.write("\n")
+ fout.write("\n")
+ for (pos, trs, exs) in article[1]:
+ fout.write(" ")
+ if pos is not None:
+ fout.write("⟨")
+ fout.write(pos)
+ fout.write("⟩ ")
+ for (lang, tr) in trs:
+ if lang == "ru":
+ fout.write(tr)
+ break
+ fout.write("\n")
+
+ # fout.write(str(article[0])+"\n")
+