26 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") |
34 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") |
27 TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$") |
35 TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$") |
28 TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$") |
36 TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$") |
29 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$") |
37 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$") |
30 |
38 |
|
39 CONT_RE = regex.compile(r"^ +(.*)") |
|
40 |
31 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$") |
41 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$") |
|
42 |
|
43 PRELUDE_NAME_RE = regex.compile(r"^name: (.*)") |
|
44 PRELUDE_URL_RE = regex.compile(r"^url: (.*)") |
|
45 PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)") |
|
46 PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)") |
|
47 PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)") |
32 |
48 |
33 def __init__(self): |
49 def __init__(self): |
34 pass |
50 pass |
35 |
51 |
36 def readline(self): |
52 def readline(self): |
51 self.parse_article() |
67 self.parse_article() |
52 except ParseException as ex: |
68 except ParseException as ex: |
53 raise ParseException(ex.msg, self.lineno, self.line) from ex |
69 raise ParseException(ex.msg, self.lineno, self.line) from ex |
54 return self.dom |
70 return self.dom |
55 |
71 |
|
72 def parse_continuation(self): |
|
73 string = "" |
|
74 while True: |
|
75 self.readline() |
|
76 if self.eof: |
|
77 return string |
|
78 m = CONT_RE.match(self.line) |
|
79 if m is not None: |
|
80 string += "\n" + m.group(1) |
|
81 elif len(self.line) == 1: |
|
82 string += "\n" |
|
83 else: |
|
84 return string |
|
85 |
56 def parse_prelude(self): |
86 def parse_prelude(self): |
57 """Read dictionary prelude until first "__" delimiter.""" |
87 """Read dictionary prelude until first "__" delimiter.""" |
|
88 pre = Prelude() |
58 while True: |
89 while True: |
59 self.readline() |
90 self.readline() |
60 if self.eof: |
91 if self.eof: |
61 raise ParseException("There are no articles...") |
92 raise ParseException("There are no articles...") |
|
93 m = self.PRELUDE_ABOUT_RE.match(self.line) |
|
94 if m: |
|
95 pre.about += m.group(1) + self.parse_continuation() |
|
96 if self.eof: |
|
97 raise ParseException("There are no articles...") |
62 if self.SEPARATOR_RE.match(self.line): |
98 if self.SEPARATOR_RE.match(self.line): |
63 break |
99 break |
|
100 m = self.PRELUDE_NAME_RE.match(self.line) |
|
101 if m: |
|
102 pre.name = m.group(1) |
|
103 continue |
|
104 m = self.PRELUDE_URL_RE.match(self.line) |
|
105 if m: |
|
106 pre.urls.append(m.group(1)) |
|
107 continue |
|
108 m = self.PRELUDE_AUTHOR_RE.match(self.line) |
|
109 if m: |
|
110 pre.authors.append(m.group(1)) |
|
111 continue |
|
112 m = self.PRELUDE_LICENSE_RE.match(self.line) |
|
113 if m: |
|
114 pre.licences.append(m.group(1)) |
|
115 continue |
|
116 self.dom.append(pre) |
64 |
117 |
65 def parse_article(self): |
118 def parse_article(self): |
66 """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter.""" |
119 """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter.""" |
67 self.words = None |
120 self.words = None |
68 self.tran = None |
121 self.tran = None |