changeset 1006 | b1f11eff7c70 |
parent 984 | 73d6e2631338 |
child 1011 | fdf5640f221a |
1005:802f3f9c7ea6 | 1006:b1f11eff7c70 |
---|---|
45 def __repr__(self): |
45 def __repr__(self): |
46 return "<Headword {}>".format(self.headword) |
46 return "<Headword {}>".format(self.headword) |
47 |
47 |
48 class Sense: |
48 class Sense: |
49 |
49 |
50 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None): |
50 def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None, col_list = None, countable = None): |
51 if not pos: |
51 if not pos: |
52 raise ParseException("Part of speech expected...\n") |
52 raise ParseException("Part of speech expected...\n") |
53 self.pos = pos |
53 self.pos = pos |
54 self.tr_list = tr_list |
54 self.tr_list = tr_list |
55 self.ex_list = ex_list |
55 self.ex_list = ex_list |
59 self.rel_list = rel_list |
59 self.rel_list = rel_list |
60 self.topic_list = topic_list |
60 self.topic_list = topic_list |
61 self.hyper_list = hyper_list |
61 self.hyper_list = hyper_list |
62 self.hypo_list = hypo_list |
62 self.hypo_list = hypo_list |
63 self.col_list = col_list |
63 self.col_list = col_list |
64 self.countable = countable |
|
64 |
65 |
65 def add_tr(self, tr): |
66 def add_tr(self, tr): |
66 if self.tr_list: |
67 if self.tr_list: |
67 self.tr_list.append(tr) |
68 self.tr_list.append(tr) |
68 else: |
69 else: |
119 def add_col(self, col): |
120 def add_col(self, col): |
120 if self.col_list: |
121 if self.col_list: |
121 self.col_list.append(col) |
122 self.col_list.append(col) |
122 else: |
123 else: |
123 self.col_list = [col] |
124 self.col_list = [col] |
125 |
|
126 def set_countable(self, countable): |
|
127 if isinstance(countable, str): |
|
128 if countable == 'yes': |
|
129 self.countable = True |
|
130 elif countable == 'no': |
|
131 self.countable = False |
|
132 else: |
|
133 raise ParseException("Countable can only be yes/no.") |
|
134 elif isinstance(countable, bool): |
|
135 self.countable = countable |
|
136 else: |
|
137 raise ParseException("Countable can only be yes/no or bool.") |
|
124 |
138 |
125 def __str__(self): |
139 def __str__(self): |
126 if tr_list: |
140 if tr_list: |
127 (lang, text) = self.tr_list[0] |
141 (lang, text) = self.tr_list[0] |
128 return "{}: {}".format(lang, text) |
142 return "{}: {}".format(lang, text) |
142 HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w|\\w[-'\\w ;]*\\w)$", re.UNICODE) |
156 HEADWORD_HOMO_RE = re.compile(u"^ +homo: (\\w|\\w[-'\\w ;]*\\w)$", re.UNICODE) |
143 TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE) |
157 TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE) |
144 TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE) |
158 TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE) |
145 TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE) |
159 TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE) |
146 TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE) |
160 TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE) |
161 CNT_RE = re.compile(u"^cnt: (yes|no)$", re.UNICODE) |
|
147 TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE) |
162 TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE) |
148 SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE) |
163 SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE) |
149 ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE) |
164 ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE) |
150 REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) |
165 REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) |
151 HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) |
166 HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) |
340 pos = m.group(0) |
355 pos = m.group(0) |
341 sense = Sense(pos) |
356 sense = Sense(pos) |
342 continue |
357 continue |
343 if not sense: |
358 if not sense: |
344 raise ParseException("""Missing part of speech marker...""") |
359 raise ParseException("""Missing part of speech marker...""") |
360 m = self.CNT_RE.match(self.line) |
|
361 if m is not None: |
|
362 sense.set_countable(m.group(1)) |
|
363 continue |
|
345 m = self.TOPIC_RE.match(self.line) |
364 m = self.TOPIC_RE.match(self.line) |
346 if m is not None: |
365 if m is not None: |
347 topics = m.group(1).split(";") |
366 topics = m.group(1).split(";") |
348 for topic in topics: |
367 for topic in topics: |
349 topic = topic.strip() |
368 topic = topic.strip() |