Store headword structure as class. Store headwords in list to preserve order
like in source file.
--- a/py/gadict.py Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict.py Thu Sep 15 17:48:20 2016 +0300
@@ -31,6 +31,18 @@
else:
return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8'))
+class Headword:
+
+ def __init__(self, headword, pron = None, attrs = None):
+ self.headword = headword
+ self.pron = pron
+ self.attrs = attrs
+
+ def __str__(self):
+ return self.headword
+ def __repr__(self):
+ return "<Headword {}>".format(self.headword)
+
class Sense:
def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
@@ -72,6 +84,14 @@
else:
self.topic_list.append(topic)
+ def __str__(self):
+ if tr_list:
+ (lang, text) = self.tr_list[0]
+ return "{}: {}".format(lang, text)
+ return "<empy sence>"
+ def __repr__(self):
+ return "<Sence {}>".format(str(self))
+
class Parser:
"""gadict dictionary format parser."""
@@ -190,7 +210,7 @@
def parse_headlines(self):
"""Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
- self.words = {}
+ self.words = []
self.readline()
if self.eof:
raise ParseException("""There are no definition after "__" delimiter...""")
@@ -208,7 +228,7 @@
if m is not None:
if word is None:
raise ParseException("""Didn't match previous headword...""")
- self.words[word] = (pron, attrs)
+ self.words.append(Headword(word, pron, attrs))
word = m.group(1)
pron = None
attrs = set()
@@ -224,7 +244,7 @@
attrs.add(m.group(1))
continue
raise ParseException("""Line is not a headword or translation or headword attribute...""")
- self.words[word] = (pron, attrs)
+ self.words.append(Headword(word, pron, attrs))
def parse_translation_continuation(self):
string = ""
--- a/py/gadict_c5.py Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict_c5.py Thu Sep 15 17:48:20 2016 +0300
@@ -65,54 +65,28 @@
FOUT.write("\n")
-def attr_key(item):
- (word, (pron, attrs)) = item
- if not attrs:
- return "zzz"
- best_vattr = None
- for attr in attrs:
- if attr in ["v1", "v2", "v3"]:
- if not best_vattr or (best_vattr and best_vattr > attr):
- best_vattr = attr
- if best_vattr:
- return best_vattr
- for attr in attrs: # single/plural
- if attr in ["s"]:
- return attr
- for attr in attrs: # comparative/superlative
- if attr in ["comp"]:
- return attr
- for attr in attrs: # Am/Br/Au
- if attr in ["Am"]:
- return attr
- return "zzz"
-
-
-for idx in range(1, len(DOM)):
- article = DOM[idx]
+for (headwords, translations) in DOM[1:]:
FOUT.write("_____\n\n")
- title = "; ".join(article[0].keys())
+ title = "; ".join([h.headword for h in headwords])
FOUT.write(title)
FOUT.write("\n\n")
- defs = article[0].items()
- defs = sorted(defs, key = attr_key)
- for (word, (pron, attrs)) in defs:
+ for hw in headwords:
FOUT.write(" ")
- FOUT.write(word)
- if pron is not None:
+ FOUT.write(hw.headword)
+ if hw.pron is not None:
FOUT.write(" [")
- FOUT.write(pron)
+ FOUT.write(hw.pron)
FOUT.write("]")
- if len(attrs) > 0:
+ if len(hw.attrs) > 0:
FOUT.write(" ")
- l = ["«"+x+"»" for x in attrs]
+ l = ["«"+x+"»" for x in hw.attrs]
l.sort()
FOUT.write(", ".join(l))
FOUT.write("\n")
FOUT.write("\n")
- for sense in article[1]:
+ for sense in translations:
if not sense:
- raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
+ raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
FOUT.write(" ")
if sense.pos:
FOUT.write("«")
--- a/py/gadict_srs_tab.py Thu Sep 15 15:42:52 2016 +0300
+++ b/py/gadict_srs_tab.py Thu Sep 15 17:48:20 2016 +0300
@@ -38,46 +38,21 @@
else:
FOUT = codecs.open(FONAME, "w", "utf-8")
-def attr_key(item):
- (word, (pron, attrs)) = item
- if not attrs:
- return "zzz"
- best_vattr = None
- for attr in attrs:
- if attr in ["v1", "v2", "v3"]:
- if not best_vattr or (best_vattr and best_vattr > attr):
- best_vattr = attr
- if best_vattr:
- return best_vattr
- for attr in attrs: # single/plural
- if attr in ["s"]:
- return attr
- for attr in attrs: # comparative/superlative
- if attr in ["comp"]:
- return attr
- for attr in attrs: # Am/Br/Au
- if attr in ["Am"]:
- return attr
- return "zzz"
-
-for idx in range(1, len(DOM)):
- article = DOM[idx]
- defs = article[0].items()
- defs = sorted(defs, key = attr_key)
+for (headwords, translations) in DOM[1:]:
lines = []
- for (word, (pron, attrs)) in defs:
- line = "<b>"+word+"</b>"
- if pron:
- line += " ["+pron+"]"
- if len(attrs) > 0:
- attrs = [" «"+x+"»" for x in attrs]
+ for hw in headwords:
+ line = "<b>"+hw.headword+"</b>"
+ if hw.pron:
+ line += " ["+hw.pron+"]"
+ if len(hw.attrs) > 0:
+ attrs = [" «"+x+"»" for x in hw.attrs]
attrs.sort()
line += ",".join(attrs)
lines.append(line)
question = "<br>".join(lines)
FOUT.write(question)
FOUT.write("\t")
- for sense in article[1]:
+ for sense in translations:
if not sense:
raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
if sense.pos: