author | Oleksandr Gavenko <gavenkoa@gmail.com> |
Thu, 15 Sep 2016 20:13:18 +0300 | |
changeset 558 | 53fd793e345d |
parent 283 | d53b7df7fc26 |
permissions | -rw-r--r-- |
283
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
1 |
#!/usr/bin/python |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
2 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
3 |
# python voa-special-c5.py |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
4 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
5 |
import re |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
6 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
7 |
f = open('../gadict-voa-special-english-word-book.dict-c5', 'r') |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
8 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
9 |
re_delim = re.compile(r'^_____\n') |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
10 |
re_empty = re.compile(r'^\n') |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
11 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
12 |
while True: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
13 |
s = f.readline() |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
14 |
if re_delim.match(s): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
15 |
break |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
16 |
state = 'delim' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
17 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
18 |
words = [] |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
19 |
word = None |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
20 |
article = None |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
21 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
22 |
while True: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
23 |
line = f.readline() |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
24 |
if len(line) == 0: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
25 |
break |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
26 |
if state == 'delim': |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
27 |
if re_empty.match(line): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
28 |
state = 'skip_to_word' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
29 |
elif state == 'skip_to_word': |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
30 |
if not re_empty.match(line): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
31 |
state = 'word' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
32 |
word = line.strip() |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
33 |
elif state == 'word': |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
34 |
if re_empty.match(line): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
35 |
state = 'skip_to_article' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
36 |
else: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
37 |
state = 'article' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
38 |
article += line |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
39 |
elif state == 'skip_to_article': |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
40 |
if not re_empty.match(line): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
41 |
state = 'article' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
42 |
article = line |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
43 |
elif state == 'article': |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
44 |
if re_delim.match(line): |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
45 |
state = 'delim' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
46 |
words.append( (word, article.strip()) ) |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
47 |
word = None |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
48 |
article = '' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
49 |
else: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
50 |
state = 'article' |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
51 |
article += line |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
52 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
53 |
f.close() |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
54 |
|
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
55 |
for i in words: |
d53b7df7fc26
VOA special dictionary in good to print form.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff
changeset
|
56 |
print("\\worddef{%s}{%s}" % (i[0], i[1])) |