equal
deleted
inserted
replaced
17 break |
17 break |
18 line = line.strip() |
18 line = line.strip() |
19 wlist.append(line) |
19 wlist.append(line) |
20 return wlist |
20 return wlist |
21 |
21 |
22 class BasewordParser: |
22 class HeadVarParser: |
23 |
23 |
24 BASEWORD_RE = regex.compile(u"^(\t)?(.*)$") |
24 BASEVAR_RE = regex.compile(u"^(\t)?(.*)$") |
25 |
25 |
26 def __init__(self, stream, limit): |
26 def __init__(self, stream, limit = None): |
27 self.stream = stream |
27 self.stream = stream |
28 self.limit = limit |
28 self.limit = limit |
29 self.lineno = 0 |
29 self.lineno = 0 |
30 self.cnt = 0 |
30 self.cnt = 0 |
31 |
31 |
34 while True: |
34 while True: |
35 line = self.stream.readline() |
35 line = self.stream.readline() |
36 if len(line) == 0: |
36 if len(line) == 0: |
37 break |
37 break |
38 self.lineno += 1 |
38 self.lineno += 1 |
39 m = self.BASEWORD_RE.match(line) |
39 m = self.BASEVAR_RE.match(line) |
40 if not m: |
40 if not m: |
41 raise Exception("Line {:d}: '{:s}' wrong format\n".format(self.lineno, line)) |
41 raise Exception("Line {:d}: '{:s}' wrong format\n".format(self.lineno, line)) |
42 tab = m.group(1) |
42 tab = m.group(1) |
43 if not tab: |
43 if not tab: |
44 self.cnt += 1 |
44 self.cnt += 1 |
50 |
50 |
51 class FreqlistParser: |
51 class FreqlistParser: |
52 |
52 |
53 FREQ_RE = regex.compile(u"^([0-9]+) (.*)$") |
53 FREQ_RE = regex.compile(u"^([0-9]+) (.*)$") |
54 |
54 |
55 def __init__(self, stream, limit): |
55 def __init__(self, stream, limit = None): |
56 self.stream = stream |
56 self.stream = stream |
57 self.limit = limit |
57 self.limit = limit |
58 self.lineno = 0 |
58 self.lineno = 0 |
59 |
59 |
60 def parse(self): |
60 def parse(self): |
100 mode = m.group(3) |
100 mode = m.group(3) |
101 if limit: |
101 if limit: |
102 limit = int(limit) |
102 limit = int(limit) |
103 with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: |
103 with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream: |
104 if mode == "b": |
104 if mode == "b": |
105 parser = BasewordParser(stream, limit) |
105 parser = HeadVarParser(stream, limit) |
106 elif mode == "f": |
106 elif mode == "f": |
107 parser = FreqlistParser(stream, limit) |
107 parser = FreqlistParser(stream, limit) |
108 else: |
108 else: |
109 raise Expection("Unknown mode in specification...") |
109 raise Expection("Unknown mode in specification...") |
110 try: |
110 try: |