py/gadict_srs_tab.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sat, 27 Aug 2016 14:37:58 +0300
changeset 537 a70ab6a33bfa
parent 536 c9f0064d8661
child 554 59714b9033bc
permissions -rw-r--r--
Order irregular verbs, noun plural form and adverb/adjective comparison form in headword list.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
536
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     2
"""Space repetition TAB format writer"""
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import io
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import sys
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     6
import codecs
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
import gadict
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
FINAME = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
FONAME = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
if len(sys.argv) >= 2:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
    FINAME = sys.argv[1]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
if len(sys.argv) >= 3:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
    FONAME = sys.argv[2]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
LANGS = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
if len(sys.argv) >= 4:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
    LANGS = set(sys.argv[3].split(","))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    21
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    22
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    23
PARSER = gadict.Parser()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    24
try:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    25
    DOM = PARSER.parse(FIN)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    26
except gadict.ParseException as ex:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    27
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    28
    if __debug__:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    29
        import traceback
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    30
        traceback.print_exc()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    31
    exit(1)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
finally:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
    FIN.close()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
if FONAME is None:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
    FOUT = sys.stdout
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
    FOUT = codecs.open(FONAME, "w", "utf-8")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    41
def attr_key(item):
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
    (word, (pron, attrs)) = item
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    43
    if not attrs:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    44
        return "zzz"
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    45
    best_vattr = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    46
    for attr in attrs:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    47
        if attr in ["v1", "v2", "v3"]:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    48
            if not best_vattr or (best_vattr and best_vattr > attr):
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
                best_vattr = attr
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
    if best_vattr:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
        return best_vattr
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
    for attr in attrs:                      # single/plural
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
        if attr in ["s"]:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    54
            return attr
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    55
    for attr in attrs:                      # comparative/superlative
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    56
        if attr in ["comp"]:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    57
            return attr
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    58
    for attr in attrs:                      # Am/Br/Au
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    59
        if attr in ["Am"]:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    60
            return attr
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    61
    return "zzz"
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    62
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    63
for idx in range(1, len(DOM)):
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    64
    article = DOM[idx]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    65
    defs = article[0].items()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    66
    defs = sorted(defs, key = attr_key)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    67
    lines = []
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    68
    for (word, (pron, attrs)) in defs:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    69
        line = "<b>"+word+"</b>"
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    70
        if pron:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    71
            line += " ["+pron+"]"
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    72
        if len(attrs) > 0:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    73
            attrs = [" «"+x+"»" for x in attrs]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    74
            attrs.sort()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    75
            line += ",".join(attrs)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    76
        lines.append(line)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    77
    question = "<br>".join(lines)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    78
    FOUT.write(question)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    79
    FOUT.write("\t")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    80
    for sense in article[1]:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    81
        if not sense:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    82
            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    83
        if sense.pos:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    84
            FOUT.write('<i style="color: green;">')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    85
            FOUT.write(sense.pos)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    86
            FOUT.write('</i>')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    87
            if sense.ant_list and len(sense.ant_list) > 0:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    88
                FOUT.write(" <i>ant: ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    89
                FOUT.write("; ".join(sense.ant_list))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    90
                FOUT.write("</i>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    91
            if sense.syn_list and len(sense.syn_list) > 0:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    92
                FOUT.write(" <i>syn: ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    93
                FOUT.write("; ".join(sense.syn_list))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    94
                FOUT.write("</i>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    95
            if LANGS and len(LANGS) > 1:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    96
                FOUT.write("<br>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    97
            else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    98
                FOUT.write(" ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    99
        for (lang, tr) in sense.tr_list:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   100
            tr = tr.replace('\n', ' ')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   101
            if LANGS is None:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   102
                FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   103
            elif lang in LANGS:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   104
                if len(LANGS) == 1:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   105
                    FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   106
                else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   107
                    FOUT.write('<i style="color: blue;">')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   108
                    FOUT.write(lang)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   109
                    FOUT.write("</i> ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   110
                    FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   111
            FOUT.write("<br>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   112
    FOUT.write("\n")