contrib/voa-special-c5.py
changeset 283 d53b7df7fc26
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/voa-special-c5.py	Wed May 28 23:42:20 2014 +0300
@@ -0,0 +1,56 @@
+#!/usr/bin/python
+
+# python voa-special-c5.py
+
+import re
+
+f = open('../gadict-voa-special-english-word-book.dict-c5', 'r')
+
+re_delim = re.compile(r'^_____\n')
+re_empty = re.compile(r'^\n')
+
+while True:
+    s = f.readline()
+    if re_delim.match(s):
+        break
+state = 'delim'
+
+words = []
+word = None
+article = None
+
+while True:
+    line = f.readline()
+    if len(line) == 0:
+        break
+    if state == 'delim':
+        if re_empty.match(line):
+            state = 'skip_to_word'
+    elif state == 'skip_to_word':
+        if not re_empty.match(line):
+            state = 'word'
+            word = line.strip()
+    elif state == 'word':
+        if re_empty.match(line):
+            state = 'skip_to_article'
+        else:
+            state = 'article'
+            article += line
+    elif state == 'skip_to_article':
+        if not re_empty.match(line):
+            state = 'article'
+            article = line
+    elif state == 'article':
+        if re_delim.match(line):
+            state = 'delim'
+            words.append( (word, article.strip()) )
+            word = None
+            article = ''
+        else:
+            state = 'article'
+            article += line
+
+f.close()
+
+for i in words:
+    print("\\worddef{%s}{%s}" % (i[0], i[1]))