gadict: comparison py/gadict.py

equal deleted inserted replaced

-:ed54a93aa8d7
+:91771594bc8b
 elif self.line is None:
 return ":{:d}:{:s}".format(self.lineno, self.msg)
 else:
 return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line)
+class Sense:
+def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None):
+if not pos:
+raise ParseException("Part of speech expected...\n")
+self.pos = pos
+if tr_list:
+self.tr_list = tr_list
+else:
+self.tr_list = []
+self.ex_list = ex_list
+self.syn_list = syn_list
+self.ant_list = ant_list
+self.topic_list = topic_list
+def add_tr(self, tr):
+self.tr_list.append(tr)
+def add_ex(self, ex):
+if not self.ex_list:
+self.ex_list = [ex]
+else:
+self.ex_list.append(ex)
+def add_syn(self, syn):
+if not self.syn_list:
+self.syn_list = [syn]
+else:
+self.syn_list.append(syn)
+def add_ant(self, ant):
+if not self.ant_list:
+self.ant_list = [ant]
+else:
+self.ant_list.append(ant)
+def add_topic(self, topic):
+if not self.topic_list:
+self.topic_list = [topic]
+else:
+self.topic_list.append(topic)
 class Parser:
 """gadict dictionary format parser."""
 COMMENT_RE = regex.compile(r"^# ")
 SEPARATOR_RE = regex.compile(r"^__$")
 HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
 HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
-TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|abbr|prefix$")
+TRANSL_POS_RE = regex.compile(r"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|abbr|prefix$")
 TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(].*)$")
 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> ([-\p{L}].*)$")
-TOPIC_RE = regex.compile(r"^(topic|ant|syn): (\p{L}.*)$")
+TOPIC_RE = regex.compile(r"^topic: (\p{L}.*)$")
+SYN_RE = regex.compile(r"^syn: (\p{L}.*)$")
+ANT_RE = regex.compile(r"^ant: (\p{L}.*)$")
 CONT_RE = regex.compile(r"^ +(.*)")
 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
 else:
 return string
 def parse_translation(self):
 senses = []
-pos = None
+sense = None
-tr = []
-ex = []
 read = True
 while True:
 if read:
 self.readline()
 read = True
 if self.eof:
 break
 m = self.SEPARATOR_RE.match(self.line)
 if m is not None:
+if sense:
+senses.append(sense)
 break
 if len(self.line) == 1:
-senses.append((pos, tr, ex))
+if sense:
-pos = None
+senses.append(sense)
-tr = []
+sense = None
-ex = []
 continue
 m = self.TRANSL_POS_RE.match(self.line)
 if m is not None:
-if pos is not None:
+if sense is not None:
 raise ParseException("""Each translation should have only one part of speech marker...""")
 pos = m.group(0)
-continue
+sense = Sense(pos)
+continue
+if not sense:
+raise ParseException("""Missing part of speech marker...""")
 m = self.TOPIC_RE.match(self.line)
 if m is not None:
-# TODO
+topics = m.group(1).split(";")
+for topic in topics:
+topic = topic.strip()
+if len(topic) == 0:
+raise ParseException("""Empty topic...""")
+sense.add_topic(topic)
+continue
+m = self.SYN_RE.match(self.line)
+if m is not None:
+syns = m.group(1).split(";")
+for syn in syns:
+syn = syn.strip()
+if len(syn) == 0:
+raise ParseException("""Empty synonym...""")
+sense.add_syn(syn)
+continue
+m = self.ANT_RE.match(self.line)
+if m is not None:
+ants = m.group(1).split(";")
+for ant in ants:
+ant = ant.strip()
+if len(ant) == 0:
+raise ParseException("""Empty antonym...""")
+sense.add_ant(ant)
 continue
 m = self.TRANSL_RE.match(self.line)
 if m is not None:
-tr.append((m.group(1), m.group(2) + self.parse_translation_continuation()))
+sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))
 read = False
 continue
 m = self.TRANSL_EX_RE.match(self.line)
 if m is not None:
-ex.append((m.group(1), m.group(2) + self.parse_translation_continuation()))
+sense.add_ex((m.group(1), m.group(2) + self.parse_translation_continuation()))
 read = False
 continue
 raise ParseException("""Uknown syntax...""")
-if len(tr) > 0:
-senses.append((pos, tr, ex))
 self.tran = senses

changeset 530	91771594bc8b
parent 527	0a31299fad70
child 542	b5197c70972c