changeset 984 | 73d6e2631338 |
parent 937 | 981839c72b64 |
child 1006 | b1f11eff7c70 |
983:a7c7af336365 | 984:73d6e2631338 |
---|---|
45 def __repr__(self): |
45 def __repr__(self): |
46 return "<Headword {}>".format(self.headword) |
46 return "<Headword {}>".format(self.headword) |
47 |
47 |
48 class Sense: |
48 class Sense: |
49 |
49 |
50 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None): |
50 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None): |
51 if not pos: |
51 if not pos: |
52 raise ParseException("Part of speech expected...\n") |
52 raise ParseException("Part of speech expected...\n") |
53 self.pos = pos |
53 self.pos = pos |
54 self.tr_list = tr_list |
54 self.tr_list = tr_list |
55 self.ex_list = ex_list |
55 self.ex_list = ex_list |
58 self.syn_list = syn_list |
58 self.syn_list = syn_list |
59 self.rel_list = rel_list |
59 self.rel_list = rel_list |
60 self.topic_list = topic_list |
60 self.topic_list = topic_list |
61 self.hyper_list = hyper_list |
61 self.hyper_list = hyper_list |
62 self.hypo_list = hypo_list |
62 self.hypo_list = hypo_list |
63 self.col_list = col_list |
|
63 |
64 |
64 def add_tr(self, tr): |
65 def add_tr(self, tr): |
65 if self.tr_list: |
66 if self.tr_list: |
66 self.tr_list.append(tr) |
67 self.tr_list.append(tr) |
67 else: |
68 else: |
112 def add_hypo(self, hypo): |
113 def add_hypo(self, hypo): |
113 if self.hypo_list: |
114 if self.hypo_list: |
114 self.hypo_list.append(hypo) |
115 self.hypo_list.append(hypo) |
115 else: |
116 else: |
116 self.hypo_list = [hypo] |
117 self.hypo_list = [hypo] |
118 |
|
119 def add_col(self, col): |
|
120 if self.col_list: |
|
121 self.col_list.append(col) |
|
122 else: |
|
123 self.col_list = [col] |
|
117 |
124 |
118 def __str__(self): |
125 def __str__(self): |
119 if tr_list: |
126 if tr_list: |
120 (lang, text) = self.tr_list[0] |
127 (lang, text) = self.tr_list[0] |
121 return "{}: {}".format(lang, text) |
128 return "{}: {}".format(lang, text) |
141 SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE) |
148 SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE) |
142 ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE) |
149 ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE) |
143 REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) |
150 REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) |
144 HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) |
151 HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) |
145 HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE) |
152 HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE) |
153 COL_RE = re.compile(u"^col: (\\w.*)$", re.UNICODE) |
|
146 |
154 |
147 CONT_RE = re.compile(u"^ +(.*)", re.UNICODE) |
155 CONT_RE = re.compile(u"^ +(.*)", re.UNICODE) |
148 |
156 |
149 TRAILING_SPACES_RE = re.compile(u"\\s+$", re.UNICODE) |
157 TRAILING_SPACES_RE = re.compile(u"\\s+$", re.UNICODE) |
150 |
158 |
386 hypo = hypo.strip() |
394 hypo = hypo.strip() |
387 if len(hypo) == 0: |
395 if len(hypo) == 0: |
388 raise ParseException("""Empty hyponym...""") |
396 raise ParseException("""Empty hyponym...""") |
389 sense.add_hypo(hypo) |
397 sense.add_hypo(hypo) |
390 continue |
398 continue |
399 m = self.COL_RE.match(self.line) |
|
400 if m is not None: |
|
401 cols = m.group(1).split(";") |
|
402 for col in cols: |
|
403 col = col.strip() |
|
404 if len(col) == 0: |
|
405 raise ParseException("""Empty collocations...""") |
|
406 sense.add_col(col) |
|
407 continue |
|
391 m = self.TRANSL_RE.match(self.line) |
408 m = self.TRANSL_RE.match(self.line) |
392 if m is not None: |
409 if m is not None: |
393 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation())) |
410 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation())) |
394 read = False |
411 read = False |
395 continue |
412 continue |