equal
deleted
inserted
replaced
|
1 """ |
|
2 gadict dictionary format parser. |
|
3 """ |
1 |
4 |
2 import regex |
5 import regex |
3 |
6 |
4 |
7 |
5 class Prelude: |
8 class Prelude: |
|
9 """Dictionary metainfo structure.""" |
6 name = None |
10 name = None |
7 about = "" |
11 about = "" |
8 urls = [] |
12 urls = [] |
9 authors = [] |
13 authors = [] |
10 licences = [] |
14 licences = [] |
11 |
15 |
12 |
16 |
13 class ParseException(BaseException): |
17 class ParseException(BaseException): |
14 |
18 |
15 def __init__(self, msg, lineno = None, line = None): |
19 def __init__(self, msg, lineno=None, line=None): |
|
20 super().__init__() |
16 self.msg = msg |
21 self.msg = msg |
17 self.lineno = lineno |
22 self.lineno = lineno |
18 self.line = line |
23 self.line = line |
19 |
24 |
20 def __repr__(self): |
25 def __repr__(self): |
25 else: |
30 else: |
26 return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line) |
31 return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line) |
27 |
32 |
28 |
33 |
29 class Parser: |
34 class Parser: |
|
35 """gadict dictionary format parser.""" |
30 |
36 |
31 SEPARATOR_RE = regex.compile(r"^__$") |
37 SEPARATOR_RE = regex.compile(r"^__$") |
32 HEADWORD_RE = regex.compile(r"^(\p{L}.*)$") |
38 HEADWORD_RE = regex.compile(r"^(\p{L}.*)$") |
33 HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$") |
39 HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$") |
34 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") |
40 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") |
64 try: |
70 try: |
65 self.parse_prelude() |
71 self.parse_prelude() |
66 while not self.eof: |
72 while not self.eof: |
67 self.parse_article() |
73 self.parse_article() |
68 except ParseException as ex: |
74 except ParseException as ex: |
69 raise ParseException(ex.msg, self.lineno, self.line) from ex |
75 raise ParseException(ex.msg, self.lineno, self.line) |
70 return self.dom |
76 return self.dom |
71 |
77 |
72 def parse_continuation(self): |
78 def parse_continuation(self): |
73 string = "" |
79 string = "" |
74 while True: |
80 while True: |
75 self.readline() |
81 self.readline() |
76 if self.eof: |
82 if self.eof: |
77 return string |
83 return string |
78 m = CONT_RE.match(self.line) |
84 m = self.CONT_RE.match(self.line) |
79 if m is not None: |
85 if m is not None: |
80 string += "\n" + m.group(1) |
86 string += "\n" + m.group(1) |
81 elif len(self.line) == 1: |
87 elif len(self.line) == 1: |
82 string += "\n" |
88 string += "\n" |
83 else: |
89 else: |
201 continue |
207 continue |
202 raise ParseException("""Uknown syntax...""") |
208 raise ParseException("""Uknown syntax...""") |
203 if len(tr) > 0: |
209 if len(tr) > 0: |
204 senses.append((pos, tr, ex)) |
210 senses.append((pos, tr, ex)) |
205 self.tran = senses |
211 self.tran = senses |
206 |
|