Include all headwords into list not just first.
--- a/py/gadict_headwords.py Fri Dec 23 21:56:42 2016 +0200
+++ b/py/gadict_headwords.py Wed Dec 28 19:47:51 2016 +0200
@@ -22,6 +22,7 @@
SEPARATOR_RE = regex.compile(u"^__$")
EMPTY_RE = regex.compile( u"^$" )
+ HEADWORD_ATTR_RE = regex.compile( u"^ " )
HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" )
def __init__(self, stream):
@@ -33,7 +34,7 @@
while True:
line = self.stream.readline()
if len(line) == 0:
- break
+ return wlist
self.lineno += 1
m = self.SEPARATOR_RE.match(line)
if not m:
@@ -41,22 +42,27 @@
line = self.stream.readline()
if len(line) == 0:
- break
+ return wlist
self.lineno += 1
m = self.EMPTY_RE.match(line)
if not m:
raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line))
- line = self.stream.readline()
- if len(line) == 0:
- break
- line = line.strip()
- self.lineno += 1
- m = self.HEADWORD_RE.match(line)
- if not m:
- raise Exception("Line {:d}: '{:s}' is not a headword\n".format(self.lineno, line))
-
- wlist.append(line)
+ while True:
+ line = self.stream.readline()
+ if len(line) == 0:
+ return wlist
+ self.lineno += 1
+ m = self.HEADWORD_ATTR_RE.match(line)
+ if m:
+ continue
+ line = line.strip()
+ if len(line) == 0:
+ break
+ m = self.HEADWORD_RE.match(line)
+ if not m:
+ raise Exception("{:d}: '{:s}' is not a headword\n".format(self.lineno, line))
+ wlist.append(line)
return wlist
try:
@@ -64,6 +70,9 @@
for headword in parser.parse():
FOUT.write(headword)
FOUT.write("\n")
+except Exception as ex:
+ print("{}:{}".format(FINAME, str(ex)))
+ raise ex
finally:
FIN.close()
FOUT.close()