diff -r c1d3555458ad -r 5417f2102dc5 py/gadict.py --- a/py/gadict.py Tue Feb 21 10:03:54 2017 +0200 +++ b/py/gadict.py Tue Feb 21 10:10:03 2017 +0200 @@ -4,7 +4,7 @@ """ import sys -import regex +import re class Prelude: @@ -125,32 +125,32 @@ class Parser: """gadict dictionary format parser.""" - COMMENT_RE = regex.compile(r"^# ") + COMMENT_RE = re.compile("^# ") - SEPARATOR_RE = regex.compile(u"^__$") - HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) - HEADWORD_VAR_RE = regex.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$") - HEADWORD_PRON_RE = regex.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$") - TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$") - TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") - TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""") - TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}\\p{N}].*)$") - TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") - SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") - ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") - REL_RE = regex.compile(u"^rel: (\\p{L}.*)$") - HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$") - HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$") + SEPARATOR_RE = re.compile(u"^__$", re.UNICODE) + HEADWORD_RE = re.compile( u"^(\\w.*)$" ) + HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE) + HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$", re.UNICODE) + TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE) + TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE) + TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE) + TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE) + TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE) + SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE) + ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE) + REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE) + HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE) + HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE) - CONT_RE = regex.compile(u"^ +(.*)") + CONT_RE = re.compile(u"^ +(.*)", re.UNICODE) - TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$") + TRAILING_SPACES_RE = re.compile(u"\\s+$", re.UNICODE) - PRELUDE_NAME_RE = regex.compile(u"^name: (.*)") - PRELUDE_URL_RE = regex.compile(u"^url: (.*)") - PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)") - PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)") - PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)") + PRELUDE_NAME_RE = re.compile(u"^name: (.*)", re.UNICODE) + PRELUDE_URL_RE = re.compile(u"^url: (.*)", re.UNICODE) + PRELUDE_AUTHOR_RE = re.compile(u"^by: (.*)", re.UNICODE) + PRELUDE_LICENSE_RE = re.compile(u"^term: (.*)", re.UNICODE) + PRELUDE_ABOUT_RE = re.compile(u"^about: ?(.*)", re.UNICODE) def __init__(self): pass @@ -161,6 +161,7 @@ self.eof = len(self.line) == 0 if not self.eof: self.lineno += 1 + self.line = self.line.rstrip('\n') if self.TRAILING_SPACES_RE.search(self.line): raise ParseException("Traling spaces detected...\n") if self.COMMENT_RE.search(self.line): @@ -191,7 +192,7 @@ m = self.CONT_RE.match(self.line) if m is not None: string += "\n" + m.group(1) - elif len(self.line) == 1: + elif len(self.line) == 0: string += "\n" else: return string @@ -239,7 +240,7 @@ def parse_empty_line(self): self.readline() - if self.eof or len(self.line) != 1: + if self.eof or len(self.line) != 0: raise ParseException(""""__" delimiter should followed by empty line...""") def parse_headlines(self): @@ -256,7 +257,7 @@ attrs = set() while True: self.readline() - if self.eof or len(self.line) == 1: + if self.eof or len(self.line) == 0: break m = self.HEADWORD_RE.match(self.line) if m is not None: @@ -309,7 +310,7 @@ if sense: senses.append(sense) break - if len(self.line) == 1: + if len(self.line) == 0: if sense: senses.append(sense) sense = None