diff -r ebc16c3a9129 -r b5a4b476eddf py/gadict_headwords.py --- a/py/gadict_headwords.py Fri Dec 23 21:56:42 2016 +0200 +++ b/py/gadict_headwords.py Wed Dec 28 19:47:51 2016 +0200 @@ -22,6 +22,7 @@ SEPARATOR_RE = regex.compile(u"^__$") EMPTY_RE = regex.compile( u"^$" ) + HEADWORD_ATTR_RE = regex.compile( u"^ " ) HEADWORD_RE = regex.compile( u"^(\\p{L}.*)$" ) def __init__(self, stream): @@ -33,7 +34,7 @@ while True: line = self.stream.readline() if len(line) == 0: - break + return wlist self.lineno += 1 m = self.SEPARATOR_RE.match(line) if not m: @@ -41,22 +42,27 @@ line = self.stream.readline() if len(line) == 0: - break + return wlist self.lineno += 1 m = self.EMPTY_RE.match(line) if not m: raise Exception("Line {:d}: '{:s}' is not empty line\n".format(self.lineno, line)) - line = self.stream.readline() - if len(line) == 0: - break - line = line.strip() - self.lineno += 1 - m = self.HEADWORD_RE.match(line) - if not m: - raise Exception("Line {:d}: '{:s}' is not a headword\n".format(self.lineno, line)) - - wlist.append(line) + while True: + line = self.stream.readline() + if len(line) == 0: + return wlist + self.lineno += 1 + m = self.HEADWORD_ATTR_RE.match(line) + if m: + continue + line = line.strip() + if len(line) == 0: + break + m = self.HEADWORD_RE.match(line) + if not m: + raise Exception("{:d}: '{:s}' is not a headword\n".format(self.lineno, line)) + wlist.append(line) return wlist try: @@ -64,6 +70,9 @@ for headword in parser.parse(): FOUT.write(headword) FOUT.write("\n") +except Exception as ex: + print("{}:{}".format(FINAME, str(ex))) + raise ex finally: FIN.close() FOUT.close()