contrib/voa-special-c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sat, 14 Oct 2017 16:25:12 +0300
changeset 938 d0bb9e2fd893
parent 283 d53b7df7fc26
permissions -rw-r--r--
Added some homophone relations and fixed pronunciation.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
283
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
#!/usr/bin/python
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     2
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
# python voa-special-c5.py
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import re
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     6
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
f = open('../gadict-voa-special-english-word-book.dict-c5', 'r')
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
re_delim = re.compile(r'^_____\n')
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
re_empty = re.compile(r'^\n')
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
while True:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
    s = f.readline()
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
    if re_delim.match(s):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
        break
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
state = 'delim'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
words = []
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
word = None
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
article = None
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    21
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    22
while True:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    23
    line = f.readline()
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    24
    if len(line) == 0:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    25
        break
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    26
    if state == 'delim':
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    27
        if re_empty.match(line):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    28
            state = 'skip_to_word'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    29
    elif state == 'skip_to_word':
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    30
        if not re_empty.match(line):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    31
            state = 'word'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
            word = line.strip()
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
    elif state == 'word':
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
        if re_empty.match(line):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
            state = 'skip_to_article'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
        else:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
            state = 'article'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
            article += line
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
    elif state == 'skip_to_article':
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
        if not re_empty.match(line):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    41
            state = 'article'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
            article = line
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    43
    elif state == 'article':
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    44
        if re_delim.match(line):
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    45
            state = 'delim'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    46
            words.append( (word, article.strip()) )
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    47
            word = None
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    48
            article = ''
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
        else:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
            state = 'article'
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
            article += line
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
f.close()
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    54
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    55
for i in words:
d53b7df7fc26 VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    56
    print("\\worddef{%s}{%s}" % (i[0], i[1]))