contrib/voa-special-c5.py
changeset 283 d53b7df7fc26
equal deleted inserted replaced
282:bc7cd8c32b4a 283:d53b7df7fc26
       
     1 #!/usr/bin/python
       
     2 
       
     3 # python voa-special-c5.py
       
     4 
       
     5 import re
       
     6 
       
     7 f = open('../gadict-voa-special-english-word-book.dict-c5', 'r')
       
     8 
       
     9 re_delim = re.compile(r'^_____\n')
       
    10 re_empty = re.compile(r'^\n')
       
    11 
       
    12 while True:
       
    13     s = f.readline()
       
    14     if re_delim.match(s):
       
    15         break
       
    16 state = 'delim'
       
    17 
       
    18 words = []
       
    19 word = None
       
    20 article = None
       
    21 
       
    22 while True:
       
    23     line = f.readline()
       
    24     if len(line) == 0:
       
    25         break
       
    26     if state == 'delim':
       
    27         if re_empty.match(line):
       
    28             state = 'skip_to_word'
       
    29     elif state == 'skip_to_word':
       
    30         if not re_empty.match(line):
       
    31             state = 'word'
       
    32             word = line.strip()
       
    33     elif state == 'word':
       
    34         if re_empty.match(line):
       
    35             state = 'skip_to_article'
       
    36         else:
       
    37             state = 'article'
       
    38             article += line
       
    39     elif state == 'skip_to_article':
       
    40         if not re_empty.match(line):
       
    41             state = 'article'
       
    42             article = line
       
    43     elif state == 'article':
       
    44         if re_delim.match(line):
       
    45             state = 'delim'
       
    46             words.append( (word, article.strip()) )
       
    47             word = None
       
    48             article = ''
       
    49         else:
       
    50             state = 'article'
       
    51             article += line
       
    52 
       
    53 f.close()
       
    54 
       
    55 for i in words:
       
    56     print("\\worddef{%s}{%s}" % (i[0], i[1]))