py/gadict_srs_tab.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Thu, 15 Sep 2016 20:13:18 +0300
changeset 558 53fd793e345d
parent 554 59714b9033bc
child 586 1ddf73757bff
permissions -rw-r--r--
Add shortcut to deploy to HG repos.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
536
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     2
"""Space repetition TAB format writer"""
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import io
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import sys
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     6
import codecs
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
import gadict
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
FINAME = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
FONAME = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
if len(sys.argv) >= 2:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
    FINAME = sys.argv[1]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
if len(sys.argv) >= 3:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
    FONAME = sys.argv[2]
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
LANGS = None
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
if len(sys.argv) >= 4:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
    LANGS = set(sys.argv[3].split(","))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    21
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    22
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    23
PARSER = gadict.Parser()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    24
try:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    25
    DOM = PARSER.parse(FIN)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    26
except gadict.ParseException as ex:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    27
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    28
    if __debug__:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    29
        import traceback
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    30
        traceback.print_exc()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    31
    exit(1)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
finally:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
    FIN.close()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
if FONAME is None:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
    FOUT = sys.stdout
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
    FOUT = codecs.open(FONAME, "w", "utf-8")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    41
for (headwords, translations) in DOM[1:]:
536
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
    lines = []
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    43
    for hw in headwords:
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    44
        line = "<b>"+hw.headword+"</b>"
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    45
        if hw.pron:
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    46
            line += " ["+hw.pron+"]"
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    47
        if len(hw.attrs) > 0:
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    48
            attrs = [" «"+x+"»" for x in hw.attrs]
536
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
            attrs.sort()
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
            line += ",".join(attrs)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
        lines.append(line)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
    question = "<br>".join(lines)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
    FOUT.write(question)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    54
    FOUT.write("\t")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 536
diff changeset
    55
    for sense in translations:
536
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    56
        if not sense:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    57
            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    58
        if sense.pos:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    59
            FOUT.write('<i style="color: green;">')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    60
            FOUT.write(sense.pos)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    61
            FOUT.write('</i>')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    62
            if sense.ant_list and len(sense.ant_list) > 0:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    63
                FOUT.write(" <i>ant: ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    64
                FOUT.write("; ".join(sense.ant_list))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    65
                FOUT.write("</i>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    66
            if sense.syn_list and len(sense.syn_list) > 0:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    67
                FOUT.write(" <i>syn: ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    68
                FOUT.write("; ".join(sense.syn_list))
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    69
                FOUT.write("</i>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    70
            if LANGS and len(LANGS) > 1:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    71
                FOUT.write("<br>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    72
            else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    73
                FOUT.write(" ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    74
        for (lang, tr) in sense.tr_list:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    75
            tr = tr.replace('\n', ' ')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    76
            if LANGS is None:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    77
                FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    78
            elif lang in LANGS:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    79
                if len(LANGS) == 1:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    80
                    FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    81
                else:
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    82
                    FOUT.write('<i style="color: blue;">')
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    83
                    FOUT.write(lang)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    84
                    FOUT.write("</i> ")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    85
                    FOUT.write(tr)
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    86
            FOUT.write("<br>")
c9f0064d8661 Generate import file for space repetition software in TAB format.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    87
    FOUT.write("\n")