contrib/voa-special-c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sun, 07 Apr 2019 23:59:39 +0300
changeset 1132 c0c4bd562e38
parent 283 d53b7df7fc26
permissions -rw-r--r--
Added tag v0.16 for changeset e80a62985ca7

#!/usr/bin/python

# python voa-special-c5.py

import re

f = open('../gadict-voa-special-english-word-book.dict-c5', 'r')

re_delim = re.compile(r'^_____\n')
re_empty = re.compile(r'^\n')

while True:
    s = f.readline()
    if re_delim.match(s):
        break
state = 'delim'

words = []
word = None
article = None

while True:
    line = f.readline()
    if len(line) == 0:
        break
    if state == 'delim':
        if re_empty.match(line):
            state = 'skip_to_word'
    elif state == 'skip_to_word':
        if not re_empty.match(line):
            state = 'word'
            word = line.strip()
    elif state == 'word':
        if re_empty.match(line):
            state = 'skip_to_article'
        else:
            state = 'article'
            article += line
    elif state == 'skip_to_article':
        if not re_empty.match(line):
            state = 'article'
            article = line
    elif state == 'article':
        if re_delim.match(line):
            state = 'delim'
            words.append( (word, article.strip()) )
            word = None
            article = ''
        else:
            state = 'article'
            article += line

f.close()

for i in words:
    print("\\worddef{%s}{%s}" % (i[0], i[1]))