--- a/py/gadict.py Sun Oct 23 19:57:02 2016 +0300
+++ b/py/gadict.py Sun Oct 23 20:01:22 2016 +0300
@@ -45,7 +45,7 @@
class Sense:
- def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None):
+ def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None):
if not pos:
raise ParseException("Part of speech expected...\n")
self.pos = pos
@@ -56,6 +56,8 @@
self.syn_list = syn_list
self.rel_list = rel_list
self.topic_list = topic_list
+ self.hyper_list = hyper_list
+ self.hypo_list = hypo_list
def add_tr(self, tr):
if self.tr_list:
@@ -99,6 +101,18 @@
else:
self.topic_list = [topic]
+ def add_hyper(self, hyper):
+ if self.hyper_list:
+ self.hyper_list.append(hyper)
+ else:
+ self.hyper_list = [hyper]
+
+ def add_hypo(self, hypo):
+ if self.hypo_list:
+ self.hypo_list.append(hypo)
+ else:
+ self.hypo_list = [hypo]
+
def __str__(self):
if tr_list:
(lang, text) = self.tr_list[0]
@@ -118,12 +132,14 @@
HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
- TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
+ TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$")
TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
+ HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
+ HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
CONT_RE = regex.compile(u"^ +(.*)")
@@ -340,6 +356,24 @@
raise ParseException("""Empty relation...""")
sense.add_rel(rel)
continue
+ m = self.HYPER_RE.match(self.line)
+ if m is not None:
+ hypers = m.group(1).split(";")
+ for hyper in hypers:
+ hyper = hyper.strip()
+ if len(hyper) == 0:
+ raise ParseException("""Empty hypernym...""")
+ sense.add_hyper(hyper)
+ continue
+ m = self.HYPO_RE.match(self.line)
+ if m is not None:
+ hypos = m.group(1).split(";")
+ for hypo in hypos:
+ hypo = hypo.strip()
+ if len(hypo) == 0:
+ raise ParseException("""Empty hyponym...""")
+ sense.add_hypo(hypo)
+ continue
m = self.TRANSL_RE.match(self.line)
if m is not None:
sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))