Marked some words as Am/Br.
#!/usr/bin/python
# python voa-special-c5.py
import re
f = open('../gadict-voa-special-english-word-book.dict-c5', 'r')
re_delim = re.compile(r'^_____\n')
re_empty = re.compile(r'^\n')
while True:
s = f.readline()
if re_delim.match(s):
break
state = 'delim'
words = []
word = None
article = None
while True:
line = f.readline()
if len(line) == 0:
break
if state == 'delim':
if re_empty.match(line):
state = 'skip_to_word'
elif state == 'skip_to_word':
if not re_empty.match(line):
state = 'word'
word = line.strip()
elif state == 'word':
if re_empty.match(line):
state = 'skip_to_article'
else:
state = 'article'
article += line
elif state == 'skip_to_article':
if not re_empty.match(line):
state = 'article'
article = line
elif state == 'article':
if re_delim.match(line):
state = 'delim'
words.append( (word, article.strip()) )
word = None
article = ''
else:
state = 'article'
article += line
f.close()
for i in words:
print("\\worddef{%s}{%s}" % (i[0], i[1]))