diff -r a7c7af336365 -r 73d6e2631338 py/gadict.py --- a/py/gadict.py Mon Nov 27 10:24:20 2017 +0200 +++ b/py/gadict.py Mon Nov 27 11:59:24 2017 +0200 @@ -47,7 +47,7 @@ class Sense: - def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None): + def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None): if not pos: raise ParseException("Part of speech expected...\n") self.pos = pos @@ -60,6 +60,7 @@ self.topic_list = topic_list self.hyper_list = hyper_list self.hypo_list = hypo_list + self.col_list = col_list def add_tr(self, tr): if self.tr_list: @@ -115,6 +116,12 @@ else: self.hypo_list = [hypo] + def add_col(self, col): + if self.col_list: + self.col_list.append(col) + else: + self.col_list = [col] + def __str__(self): if tr_list: (lang, text) = self.tr_list[0] @@ -143,6 +150,7 @@ REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE) + COL_RE = re.compile(u"^col: (\\w.*)$", re.UNICODE) CONT_RE = re.compile(u"^ +(.*)", re.UNICODE) @@ -388,6 +396,15 @@ raise ParseException("""Empty hyponym...""") sense.add_hypo(hypo) continue + m = self.COL_RE.match(self.line) + if m is not None: + cols = m.group(1).split(";") + for col in cols: + col = col.strip() + if len(col) == 0: + raise ParseException("""Empty collocations...""") + sense.add_col(col) + continue m = self.TRANSL_RE.match(self.line) if m is not None: sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation()))