diff -r 8ba39e31d608 -r 7398bc1829d6 py/gadict.py --- a/py/gadict.py Sun Sep 11 22:53:55 2016 +0300 +++ b/py/gadict.py Tue Sep 13 17:41:14 2016 +0300 @@ -2,6 +2,7 @@ gadict dictionary format parser. """ +import sys import regex @@ -17,7 +18,7 @@ class ParseException(BaseException): def __init__(self, msg, lineno=None, line=None): - super().__init__() + BaseException.__init__(self) self.msg = msg self.lineno = lineno self.line = line @@ -26,9 +27,9 @@ if self.lineno is None: return self.msg elif self.line is None: - return ":{:d}:{:s}".format(self.lineno, self.msg) + return ":{:d}:{:s}".format(self.lineno, self.msg.encode('utf-8')) else: - return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line) + return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8')) class Sense: @@ -77,26 +78,26 @@ COMMENT_RE = regex.compile(r"^# ") - SEPARATOR_RE = regex.compile(r"^__$") - HEADWORD_RE = regex.compile(r"^(\p{L}.*)$") - HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") - HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$") - TRANSL_POS_RE = regex.compile(r"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|contr|abbr|prefix$") - TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(].*)$") - TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> ([-\p{L}].*)$") - TOPIC_RE = regex.compile(r"^topic: (\p{L}.*)$") - SYN_RE = regex.compile(r"^syn: (\p{L}.*)$") - ANT_RE = regex.compile(r"^ant: (\p{L}.*)$") + SEPARATOR_RE = regex.compile(u"^__$") + HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) + HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") + HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") + TRANSL_POS_RE = regex.compile(u"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix$") + TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") + TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$") + TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") + SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") + ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") - CONT_RE = regex.compile(r"^ +(.*)") + CONT_RE = regex.compile(u"^ +(.*)") - TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$") + TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$") - PRELUDE_NAME_RE = regex.compile(r"^name: (.*)") - PRELUDE_URL_RE = regex.compile(r"^url: (.*)") - PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)") - PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)") - PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)") + PRELUDE_NAME_RE = regex.compile(u"^name: (.*)") + PRELUDE_URL_RE = regex.compile(u"^url: (.*)") + PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)") + PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)") + PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)") def __init__(self): pass @@ -122,6 +123,9 @@ while not self.eof: self.parse_article() except ParseException as ex: + if sys.version_info.major == 2: + import traceback + traceback.print_exc() raise ParseException(ex.msg, self.lineno, self.line) return self.dom