Switch to built-in `re` Python module over `regex`.
--- a/py/gadict.py Tue Feb 21 10:03:54 2017 +0200
+++ b/py/gadict.py Tue Feb 21 10:10:03 2017 +0200
@@ -4,7 +4,7 @@
"""
import sys
-import regex
+import re
class Prelude:
@@ -125,32 +125,32 @@
class Parser:
"""gadict dictionary format parser."""
- COMMENT_RE = regex.compile(r"^# ")
+ COMMENT_RE = re.compile("^# ")
- SEPARATOR_RE = regex.compile(u"^__$")
- HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
- HEADWORD_VAR_RE = regex.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$")
- HEADWORD_PRON_RE = regex.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$")
- TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$")
- TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$")
- TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""")
- TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}\\p{N}].*)$")
- TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$")
- SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$")
- ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$")
- REL_RE = regex.compile(u"^rel: (\\p{L}.*)$")
- HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$")
- HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$")
+ SEPARATOR_RE = re.compile(u"^__$", re.UNICODE)
+ HEADWORD_RE = re.compile( u"^(\\w.*)$" )
+ HEADWORD_VAR_RE = re.compile(u"^ +(rare|s|pl|v[123]|male|female|comp|super|abbr|Am|Br|Au)$", re.UNICODE)
+ HEADWORD_PRON_RE = re.compile(u"^ +\\[([a-zˌˈːəæɛɒʊɪɔɜɑʌɚɹʃʧθðɡʒŋɾ ]+)\\]$", re.UNICODE)
+ TRANSL_POS_RE = re.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$", re.UNICODE)
+ TRANSL_RE = re.compile(u"^(ru|uk|la|en): ([\\w(].*)$", re.UNICODE)
+ TRANSL_EX_RE = re.compile(u"""^(ru|uk|la|en)> ([-'"\\w].*)$""", re.UNICODE)
+ TRANSL_GLOS_RE = re.compile(u"^(ru|uk|la|en)= ([-\\w\\d].*)$", re.UNICODE)
+ TOPIC_RE = re.compile(u"^topic: (\\w.*)$", re.UNICODE)
+ SYN_RE = re.compile(u"^syn: (\\w.*)$", re.UNICODE)
+ ANT_RE = re.compile(u"^ant: (\\w.*)$", re.UNICODE)
+ REL_RE = re.compile(u"^rel: (\\w.*)$", re.UNICODE)
+ HYPER_RE = re.compile(u"^hyper: (\\w.*)$", re.UNICODE)
+ HYPO_RE = re.compile(u"^hypo: (\\w.*)$", re.UNICODE)
- CONT_RE = regex.compile(u"^ +(.*)")
+ CONT_RE = re.compile(u"^ +(.*)", re.UNICODE)
- TRAILING_SPACES_RE = regex.compile(u"\\p{Z}+$")
+ TRAILING_SPACES_RE = re.compile(u"\\s+$", re.UNICODE)
- PRELUDE_NAME_RE = regex.compile(u"^name: (.*)")
- PRELUDE_URL_RE = regex.compile(u"^url: (.*)")
- PRELUDE_AUTHOR_RE = regex.compile(u"^by: (.*)")
- PRELUDE_LICENSE_RE = regex.compile(u"^term: (.*)")
- PRELUDE_ABOUT_RE = regex.compile(u"^about: ?(.*)")
+ PRELUDE_NAME_RE = re.compile(u"^name: (.*)", re.UNICODE)
+ PRELUDE_URL_RE = re.compile(u"^url: (.*)", re.UNICODE)
+ PRELUDE_AUTHOR_RE = re.compile(u"^by: (.*)", re.UNICODE)
+ PRELUDE_LICENSE_RE = re.compile(u"^term: (.*)", re.UNICODE)
+ PRELUDE_ABOUT_RE = re.compile(u"^about: ?(.*)", re.UNICODE)
def __init__(self):
pass
@@ -161,6 +161,7 @@
self.eof = len(self.line) == 0
if not self.eof:
self.lineno += 1
+ self.line = self.line.rstrip('\n')
if self.TRAILING_SPACES_RE.search(self.line):
raise ParseException("Traling spaces detected...\n")
if self.COMMENT_RE.search(self.line):
@@ -191,7 +192,7 @@
m = self.CONT_RE.match(self.line)
if m is not None:
string += "\n" + m.group(1)
- elif len(self.line) == 1:
+ elif len(self.line) == 0:
string += "\n"
else:
return string
@@ -239,7 +240,7 @@
def parse_empty_line(self):
self.readline()
- if self.eof or len(self.line) != 1:
+ if self.eof or len(self.line) != 0:
raise ParseException(""""__" delimiter should followed by empty line...""")
def parse_headlines(self):
@@ -256,7 +257,7 @@
attrs = set()
while True:
self.readline()
- if self.eof or len(self.line) == 1:
+ if self.eof or len(self.line) == 0:
break
m = self.HEADWORD_RE.match(self.line)
if m is not None:
@@ -309,7 +310,7 @@
if sense:
senses.append(sense)
break
- if len(self.line) == 1:
+ if len(self.line) == 0:
if sense:
senses.append(sense)
sense = None
--- a/py/gadict_c5.py Tue Feb 21 10:03:54 2017 +0200
+++ b/py/gadict_c5.py Tue Feb 21 10:10:03 2017 +0200
@@ -4,7 +4,7 @@
import io
import sys
import codecs
-import regex
+import re
import gadict
import gadict_freq
@@ -16,9 +16,9 @@
FREQ_SOURCES = []
# -lang:ru,uk
-ARG_LANG_RE = regex.compile("-lang:(.+)")
+ARG_LANG_RE = re.compile("-lang:(.+)")
# -freq:var:TAG=FILE or -freq:freq:TAG=FILE
-ARG_FREQ_RE = regex.compile("-freq:(freq|var):([^=]+)=(.+)")
+ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)")
look_for_files = False
for idx in range(1, len(sys.argv)):
--- a/py/gadict_freq.py Tue Feb 21 10:03:54 2017 +0200
+++ b/py/gadict_freq.py Tue Feb 21 10:10:03 2017 +0200
@@ -2,7 +2,7 @@
import sys
import codecs
import io
-import regex
+import re
class WordlistParser:
@@ -21,7 +21,7 @@
class WordformParser:
- BASEVAR_RE = regex.compile(u"^(\t)?(.*)$")
+ BASEVAR_RE = re.compile(u"^(\t)?(.*)$", re.UNICODE)
def __init__(self, stream, limit = None):
self.stream = stream
@@ -50,7 +50,7 @@
class FreqlistParser:
- FREQ_RE = regex.compile(u"^([0-9]+) (.*)$")
+ FREQ_RE = re.compile(u"^([0-9]+) (.*)$", re.UNICODE)
def __init__(self, stream, limit = None):
self.stream = stream
@@ -80,7 +80,7 @@
raise Exception(USAGE)
FINAME = sys.argv[1]
- COMMAND_RE = regex.compile("([-+])([0-9]+)?([bf]):([^:]+)")
+ COMMAND_RE = re.compile("([-+])([0-9]+)?([bf]):([^:]+)")
IN_SET = set()
EX_SET = set()
--- a/py/gadict_headwords.py Tue Feb 21 10:03:54 2017 +0200
+++ b/py/gadict_headwords.py Tue Feb 21 10:10:03 2017 +0200
@@ -2,7 +2,7 @@
import sys
import codecs
import io
-import regex
+import re
FINAME = None
FONAME = None
@@ -20,10 +20,10 @@
class GadictParser:
- SEPARATOR_RE = regex.compile(u"^__$")
- EMPTY_RE = regex.compile( u"^$" )
- HEADWORD_ATTR_RE = regex.compile( u"^ " )
- HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
+ SEPARATOR_RE = re.compile(u"^__$", re.UNICODE)
+ EMPTY_RE = re.compile( u"^$" )
+ HEADWORD_ATTR_RE = re.compile( u"^ " )
+ HEADWORD_RE = re.compile(u"^(\\w.*)$", re.UNICODE)
def __init__(self, stream):
self.stream = stream