changeset 618 | 6ad7203ac9dc |
parent 594 | 910efcf51ac0 |
child 629 | 6a862ea41c00 |
617:ec1c2838feae | 618:6ad7203ac9dc |
---|---|
43 def __repr__(self): |
43 def __repr__(self): |
44 return "<Headword {}>".format(self.headword) |
44 return "<Headword {}>".format(self.headword) |
45 |
45 |
46 class Sense: |
46 class Sense: |
47 |
47 |
48 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None): |
48 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None): |
49 if not pos: |
49 if not pos: |
50 raise ParseException("Part of speech expected...\n") |
50 raise ParseException("Part of speech expected...\n") |
51 self.pos = pos |
51 self.pos = pos |
52 self.tr_list = tr_list |
52 self.tr_list = tr_list |
53 self.ex_list = ex_list |
53 self.ex_list = ex_list |
54 self.glos_list = glos_list |
54 self.glos_list = glos_list |
55 self.ant_list = ant_list |
55 self.ant_list = ant_list |
56 self.syn_list = syn_list |
56 self.syn_list = syn_list |
57 self.rel_list = rel_list |
57 self.rel_list = rel_list |
58 self.topic_list = topic_list |
58 self.topic_list = topic_list |
59 self.hyper_list = hyper_list |
|
60 self.hypo_list = hypo_list |
|
59 |
61 |
60 def add_tr(self, tr): |
62 def add_tr(self, tr): |
61 if self.tr_list: |
63 if self.tr_list: |
62 self.tr_list.append(tr) |
64 self.tr_list.append(tr) |
63 else: |
65 else: |
96 def add_topic(self, topic): |
98 def add_topic(self, topic): |
97 if self.topic_list: |
99 if self.topic_list: |
98 self.topic_list.append(topic) |
100 self.topic_list.append(topic) |
99 else: |
101 else: |
100 self.topic_list = [topic] |
102 self.topic_list = [topic] |
103 |
|
104 def add_hyper(self, hyper): |
|
105 if self.hyper_list: |
|
106 self.hyper_list.append(hyper) |
|
107 else: |
|
108 self.hyper_list = [hyper] |
|
109 |
|
110 def add_hypo(self, hypo): |
|
111 if self.hypo_list: |
|
112 self.hypo_list.append(hypo) |
|
113 else: |
|
114 self.hypo_list = [hypo] |
|
101 |
115 |
102 def __str__(self): |
116 def __str__(self): |
103 if tr_list: |
117 if tr_list: |
104 (lang, text) = self.tr_list[0] |
118 (lang, text) = self.tr_list[0] |
105 return "{}: {}".format(lang, text) |
119 return "{}: {}".format(lang, text) |
116 HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) |
130 HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) |
117 HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") |
131 HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") |
118 HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") |
132 HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") |
119 TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$") |
133 TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$") |
120 TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") |
134 TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") |
121 TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$") |
135 TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""") |
122 TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$") |
136 TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$") |
123 TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") |
137 TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") |
124 SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") |
138 SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") |
125 ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") |
139 ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") |
126 REL_RE = regex.compile(u"^rel: (\\p{L}.*)$") |
140 REL_RE = regex.compile(u"^rel: (\\p{L}.*)$") |
141 HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$") |
|
142 HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$") |
|
127 |
143 |
128 CONT_RE = regex.compile(u"^ +(.*)") |
144 CONT_RE = regex.compile(u"^ +(.*)") |
129 |
145 |
130 TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$") |
146 TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$") |
131 |
147 |
338 rel = rel.strip() |
354 rel = rel.strip() |
339 if len(rel) == 0: |
355 if len(rel) == 0: |
340 raise ParseException("""Empty relation...""") |
356 raise ParseException("""Empty relation...""") |
341 sense.add_rel(rel) |
357 sense.add_rel(rel) |
342 continue |
358 continue |
359 m = self.HYPER_RE.match(self.line) |
|
360 if m is not None: |
|
361 hypers = m.group(1).split(";") |
|
362 for hyper in hypers: |
|
363 hyper = hyper.strip() |
|
364 if len(hyper) == 0: |
|
365 raise ParseException("""Empty hypernym...""") |
|
366 sense.add_hyper(hyper) |
|
367 continue |
|
368 m = self.HYPO_RE.match(self.line) |
|
369 if m is not None: |
|
370 hypos = m.group(1).split(";") |
|
371 for hypo in hypos: |
|
372 hypo = hypo.strip() |
|
373 if len(hypo) == 0: |
|
374 raise ParseException("""Empty hyponym...""") |
|
375 sense.add_hypo(hypo) |
|
376 continue |
|
343 m = self.TRANSL_RE.match(self.line) |
377 m = self.TRANSL_RE.match(self.line) |
344 if m is not None: |
378 if m is not None: |
345 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation())) |
379 sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation())) |
346 read = False |
380 read = False |
347 continue |
381 continue |