py/gadict_freq.py
changeset 757 5417f2102dc5
parent 723 53095b480a73
child 804 7aa283584ee6
equal deleted inserted replaced
756:c1d3555458ad 757:5417f2102dc5
     1 
     1 
     2 import sys
     2 import sys
     3 import codecs
     3 import codecs
     4 import io
     4 import io
     5 import regex
     5 import re
     6 
     6 
     7 class WordlistParser:
     7 class WordlistParser:
     8 
     8 
     9     def __init__(self, stream):
     9     def __init__(self, stream):
    10         self.stream = stream
    10         self.stream = stream
    19             wlist.append(line)
    19             wlist.append(line)
    20         return wlist
    20         return wlist
    21 
    21 
    22 class WordformParser:
    22 class WordformParser:
    23 
    23 
    24     BASEVAR_RE = regex.compile(u"^(\t)?(.*)$")
    24     BASEVAR_RE = re.compile(u"^(\t)?(.*)$", re.UNICODE)
    25 
    25 
    26     def __init__(self, stream, limit = None):
    26     def __init__(self, stream, limit = None):
    27         self.stream = stream
    27         self.stream = stream
    28         self.limit = limit
    28         self.limit = limit
    29         self.lineno = 0
    29         self.lineno = 0
    48             wlist.append(headword)
    48             wlist.append(headword)
    49         return wlist
    49         return wlist
    50 
    50 
    51 class FreqlistParser:
    51 class FreqlistParser:
    52 
    52 
    53     FREQ_RE = regex.compile(u"^([0-9]+) (.*)$")
    53     FREQ_RE = re.compile(u"^([0-9]+) (.*)$", re.UNICODE)
    54 
    54 
    55     def __init__(self, stream, limit = None):
    55     def __init__(self, stream, limit = None):
    56         self.stream = stream
    56         self.stream = stream
    57         self.limit = limit
    57         self.limit = limit
    58         self.lineno = 0
    58         self.lineno = 0
    78 
    78 
    79     if len(sys.argv) < 3:
    79     if len(sys.argv) < 3:
    80         raise Exception(USAGE)
    80         raise Exception(USAGE)
    81     FINAME = sys.argv[1]
    81     FINAME = sys.argv[1]
    82 
    82 
    83     COMMAND_RE = regex.compile("([-+])([0-9]+)?([bf]):([^:]+)")
    83     COMMAND_RE = re.compile("([-+])([0-9]+)?([bf]):([^:]+)")
    84 
    84 
    85     IN_SET = set()
    85     IN_SET = set()
    86     EX_SET = set()
    86     EX_SET = set()
    87 
    87 
    88     for idx in range(1, len(sys.argv)):
    88     for idx in range(1, len(sys.argv)):