gadict: comparison py/gadict.py

equal deleted inserted replaced

-:c1d3555458ad
+:5417f2102dc5
 """
 gadict dictionary format parser.
 """
 import sys
-import regex
+import re
 class Prelude:
 """Dictionary metainfo structure."""
 name = None
 return "<Sense {}>".format(str(self))
 class Parser:
 """gadict dictionary format parser."""
-COMMENT_RE = regex.compile(r"^# ")
+COMMENT_RE = re.compile("^# ")
-SEPARATOR_RE = regex.compile(u"^__$")
+SEPARATOR_RE = re.compile(u"^__$", re.UNICODE)
-HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
+HEADWORD_RE = re.compile( u"^(\\w.*)$" )
-HEADWORD_VAR_RE = regex.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
+HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
-HEADWORD_PRON_RE = regex.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$")
+HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$", re.UNICODE)
-TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
+TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
-TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
+TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
-TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
+TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
-TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}\\p{N}].*)$")
+TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE)
-TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
+TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE)
-SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
+SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE)
-ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
+ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE)
-REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
+REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE)
-HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
+HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE)
-HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
+HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE)
-CONT_RE = regex.compile(u"^ +(.*)")
+CONT_RE = re.compile(u"^ +(.*)", re.UNICODE)
-TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
+TRAILING_SPACES_RE = re.compile(u"\\s+$", re.UNICODE)
-PRELUDE_NAME_RE = regex.compile(u"^name: (.*)")
+PRELUDE_NAME_RE = re.compile(u"^name: (.*)", re.UNICODE)
-PRELUDE_URL_RE = regex.compile(u"^url: (.*)")
+PRELUDE_URL_RE = re.compile(u"^url: (.*)", re.UNICODE)
-PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)")
+PRELUDE_AUTHOR_RE = re.compile(u"^by: (.*)", re.UNICODE)
-PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)")
+PRELUDE_LICENSE_RE = re.compile(u"^term: (.*)", re.UNICODE)
-PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)")
+PRELUDE_ABOUT_RE = re.compile(u"^about: ?(.*)", re.UNICODE)
 def __init__(self):
 pass
 def readline(self):
 while True:
 self.line = self.stream.readline()
 self.eof = len(self.line) == 0
 if not self.eof:
 self.lineno += 1
+self.line = self.line.rstrip('\n')
 if self.TRAILING_SPACES_RE.search(self.line):
 raise ParseException("Traling spaces detected...\n")
 if self.COMMENT_RE.search(self.line):
 continue
 break
 if self.eof:
 return string
 m = self.CONT_RE.match(self.line)
 if m is not None:
 string += "\n" + m.group(1)
-elif len(self.line) == 1:
+elif len(self.line) == 0:
 string += "\n"
 else:
 return string
 def parse_prelude(self):
 self.parse_translation()
 self.dom.append((self.words, self.tran))
 def parse_empty_line(self):
 self.readline()
-if self.eof or len(self.line) != 1:
+if self.eof or len(self.line) != 0:
 raise ParseException(""""__" delimiter should followed by empty line...""")
 def parse_headlines(self):
 """Try to match word variations with attributed. Assume that `self.line` on preceding empty line."""
 self.words = []
 word = m.group(1)
 pron = None
 attrs = set()
 while True:
 self.readline()
-if self.eof or len(self.line) == 1:
+if self.eof or len(self.line) == 0:
 break
 m = self.HEADWORD_RE.match(self.line)
 if m is not None:
 if word is None:
 raise ParseException("""Didn't match previous headword...""")
 m = self.SEPARATOR_RE.match(self.line)
 if m is not None:
 if sense:
 senses.append(sense)
 break
-if len(self.line) == 1:
+if len(self.line) == 0:
 if sense:
 senses.append(sense)
 sense = None
 continue
 m = self.TRANSL_POS_RE.match(self.line)

changeset 757	5417f2102dc5
parent 740	77668cb05069
child 871	1dfca1e1f42a