py/gadict.py
changeset 552 7398bc1829d6
parent 542 b5197c70972c
child 553 45a3138c9b4d
--- a/py/gadict.py	Sun Sep 11 22:53:55 2016 +0300
+++ b/py/gadict.py	Tue Sep 13 17:41:14 2016 +0300
@@ -2,6 +2,7 @@
 gadict dictionary format parser.
 """
 
+import sys
 import regex
 
 
@@ -17,7 +18,7 @@
 class ParseException(BaseException):
 
     def __init__(self, msg, lineno=None, line=None):
-        super().__init__()
+        BaseException.__init__(self)
         self.msg = msg
         self.lineno = lineno
         self.line = line
@@ -26,9 +27,9 @@
         if self.lineno is None:
             return self.msg
         elif self.line is None:
-            return ":{:d}:{:s}".format(self.lineno, self.msg)
+            return ":{:d}:{:s}".format(self.lineno, self.msg.encode('utf-8'))
         else:
-            return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line)
+            return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg.encode('utf-8'), self.line.encode('utf-8'))
 
 class Sense:
 
@@ -77,26 +78,26 @@
 
     COMMENT_RE = regex.compile(r"^# ")
 
-    SEPARATOR_RE = regex.compile(r"^__$")
-    HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
-    HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
-    HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
-    TRANSL_POS_RE = regex.compile(r"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\.v|contr|abbr|prefix$")
-    TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(].*)$")
-    TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> ([-\p{L}].*)$")
-    TOPIC_RE = regex.compile(r"^topic: (\p{L}.*)$")
-    SYN_RE = regex.compile(r"^syn: (\p{L}.*)$")
-    ANT_RE = regex.compile(r"^ant: (\p{L}.*)$")
+    SEPARATOR_RE = regex.compile(u"^__$")
+    HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
+    HEADWORD_VAR_RE = regex.compile(u"^ +(s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
+    HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$")
+    TRANSL_POS_RE = regex.compile(u"^n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix$")
+    TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
+    TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$")
+    TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
+    SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
+    ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
 
-    CONT_RE = regex.compile(r"^ +(.*)")
+    CONT_RE = regex.compile(u"^ +(.*)")
 
-    TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
+    TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
 
-    PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
-    PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
-    PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
-    PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
-    PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
+    PRELUDE_NAME_RE = regex.compile(u"^name: (.*)")
+    PRELUDE_URL_RE = regex.compile(u"^url: (.*)")
+    PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)")
+    PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)")
+    PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)")
 
     def __init__(self):
         pass
@@ -122,6 +123,9 @@
             while not self.eof:
                 self.parse_article()
         except ParseException as ex:
+            if sys.version_info.major == 2:
+                import traceback
+                traceback.print_exc()
             raise ParseException(ex.msg, self.lineno, self.line)
         return self.dom