gadict: comparison py/gadict.py

equal deleted inserted replaced

-:2fac252890a5
+:ece60575a96a
 SEPARATOR_RE = regex.compile(r"^__$")
 HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
 HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$")
 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
-TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
+TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr|prefix$")
-TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
+TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(].*)$")
 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
+TOPIC_RE = regex.compile(r"^topic: (\p{L}.*)$")
 CONT_RE = regex.compile(r"^ +(.*)")
 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
 self.parse_article()
 except ParseException as ex:
 raise ParseException(ex.msg, self.lineno, self.line)
 return self.dom
-def parse_continuation(self):
+def parse_prelude_continuation(self):
 string = ""
 while True:
 self.readline()
 if self.eof:
 return string
 self.readline()
 if self.eof:
 raise ParseException("There are no articles...")
 m = self.PRELUDE_ABOUT_RE.match(self.line)
 if m:
-pre.about += m.group(1) + self.parse_continuation()
+pre.about += m.group(1) + self.parse_prelude_continuation()
 if self.eof:
 raise ParseException("There are no articles...")
 if self.SEPARATOR_RE.match(self.line):
 break
 m = self.PRELUDE_NAME_RE.match(self.line)
 attrs.add(m.group(1))
 continue
 raise ParseException("""Line is not a headword or translation or headword attribute...""")
 self.words[word] = (pron, attrs)
+def parse_translation_continuation(self):
+string = ""
+while True:
+self.readline()
+if self.eof:
+return string
+m = self.CONT_RE.match(self.line)
+if m is not None:
+string += "\n" + m.group(1)
+else:
+return string
 def parse_translation(self):
 senses = []
 pos = None
 tr = []
 ex = []
-while True:
+read = True
-self.readline()
+while True:
+if read:
+self.readline()
+read = True
 if self.eof:
 break
 m = self.SEPARATOR_RE.match(self.line)
 if m is not None:
 break
 if m is not None:
 if pos is not None:
 raise ParseException("""Each translation should have only one part of speech marker...""")
 pos = m.group(0)
 continue
+m = self.TOPIC_RE.match(self.line)
+if m is not None:
+# TODO
+continue
 m = self.TRANSL_RE.match(self.line)
 if m is not None:
-tr.append((m.group(1), m.group(2)))
+tr.append((m.group(1), m.group(2) + self.parse_translation_continuation()))
+read = False
 continue
 m = self.TRANSL_EX_RE.match(self.line)
 if m is not None:
-ex.append((m.group(1), m.group(2)))
+ex.append((m.group(1), m.group(2) + self.parse_translation_continuation()))
+read = False
 continue
 raise ParseException("""Uknown syntax...""")
 if len(tr) > 0:
 senses.append((pos, tr, ex))
 self.tran = senses

changeset 412	ece60575a96a
parent 406	f0ac87e10d9a
child 422	c97e9c1febe8