15 |
16 |
16 |
17 |
17 class ParseException(BaseException): |
18 class ParseException(BaseException): |
18 |
19 |
19 def __init__(self, msg, lineno=None, line=None): |
20 def __init__(self, msg, lineno=None, line=None): |
20 super().__init__() |
21 BaseException.__init__(self) |
21 self.msg = msg |
22 self.msg = msg |
22 self.lineno = lineno |
23 self.lineno = lineno |
23 self.line = line |
24 self.line = line |
24 |
25 |
25 def __repr__(self): |
26 def __repr__(self): |
26 if self.lineno is None: |
27 if self.lineno is None: |
27 return self.msg |
28 return self.msg |
28 elif self.line is None: |
29 elif self.line is None: |
29 return ":{:d}:{:s}".format(self.lineno, self.msg) |
30 return ":{:d}:{:s}".format(self.lineno, self.msg.encode('utf-8')) |
30 else: |
31 else: |
31 return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line) |
32 return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8')) |
32 |
33 |
33 class Sense: |
34 class Sense: |
34 |
35 |
35 def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None): |
36 def __init__(self, pos, tr_list = None, ex_list = None, syn_list = None, ant_list = None, topic_list = None): |
36 if not pos: |
37 if not pos: |
75 class Parser: |
76 class Parser: |
76 """gadict dictionary format parser.""" |
77 """gadict dictionary format parser.""" |
77 |
78 |
78 COMMENT_RE = regex.compile(r"^# ") |
79 COMMENT_RE = regex.compile(r"^# ") |
79 |
80 |
80 SEPARATOR_RE = regex.compile(r"^__$") |
81 SEPARATOR_RE = regex.compile(u"^__$") |
81 HEADWORD_RE = regex.compile(r"^(\p{L}.*)$") |
82 HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) |
82 HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") |
83 HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") |
83 HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") |
84 HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") |
84 TRANSL_POS_RE = regex.compile(r"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|contr|abbr|prefix$") |
85 TRANSL_POS_RE = regex.compile(u"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix$") |
85 TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(].*)$") |
86 TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") |
86 TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> ([-\p{L}].*)$") |
87 TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$") |
87 TOPIC_RE = regex.compile(r"^topic: (\p{L}.*)$") |
88 TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") |
88 SYN_RE = regex.compile(r"^syn: (\p{L}.*)$") |
89 SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") |
89 ANT_RE = regex.compile(r"^ant: (\p{L}.*)$") |
90 ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") |
90 |
91 |
91 CONT_RE = regex.compile(r"^ +(.*)") |
92 CONT_RE = regex.compile(u"^ +(.*)") |
92 |
93 |
93 TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$") |
94 TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$") |
94 |
95 |
95 PRELUDE_NAME_RE = regex.compile(r"^name: (.*)") |
96 PRELUDE_NAME_RE = regex.compile(u"^name: (.*)") |
96 PRELUDE_URL_RE = regex.compile(r"^url: (.*)") |
97 PRELUDE_URL_RE = regex.compile(u"^url: (.*)") |
97 PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)") |
98 PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)") |
98 PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)") |
99 PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)") |
99 PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)") |
100 PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)") |
100 |
101 |
101 def __init__(self): |
102 def __init__(self): |
102 pass |
103 pass |
103 |
104 |
104 def readline(self): |
105 def readline(self): |
120 try: |
121 try: |
121 self.parse_prelude() |
122 self.parse_prelude() |
122 while not self.eof: |
123 while not self.eof: |
123 self.parse_article() |
124 self.parse_article() |
124 except ParseException as ex: |
125 except ParseException as ex: |
|
126 if sys.version_info.major == 2: |
|
127 import traceback |
|
128 traceback.print_exc() |
125 raise ParseException(ex.msg, self.lineno, self.line) |
129 raise ParseException(ex.msg, self.lineno, self.line) |
126 return self.dom |
130 return self.dom |
127 |
131 |
128 def parse_prelude_continuation(self): |
132 def parse_prelude_continuation(self): |
129 string = "" |
133 string = "" |