py/gadict_srs_tab.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Tue, 05 Dec 2023 13:24:46 +0200
changeset 1353 dcda231188dc
parent 586 1ddf73757bff
permissions -rw-r--r--
New articles.

# -*- coding: utf-8 -*-
"""Space repetition TAB format writer"""

import io
import sys
import codecs

import gadict


FINAME = None
FONAME = None
if len(sys.argv) >= 2:
    FINAME = sys.argv[1]
if len(sys.argv) >= 3:
    FONAME = sys.argv[2]
LANGS = None
if len(sys.argv) >= 4:
    LANGS = set(sys.argv[3].split(","))

FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")

PARSER = gadict.Parser()
try:
    DOM = PARSER.parse(FIN)
except gadict.ParseException as ex:
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
    if __debug__:
        import traceback
        traceback.print_exc()
    exit(1)
finally:
    FIN.close()


if FONAME is None:
    FOUT = sys.stdout
else:
    FOUT = codecs.open(FONAME, "w", "utf-8")

for (headwords, translations) in DOM[1:]:
    lines = []
    for hw in headwords:
        line = "<b>"+hw.headword+"</b>"
        if hw.pron:
            line += " ["+hw.pron+"]"
        if len(hw.attrs) > 0:
            attrs = [" «"+x+"»" for x in hw.attrs]
            attrs.sort()
            line += ",".join(attrs)
        lines.append(line)
    question = "<br>".join(lines)
    FOUT.write(question)
    FOUT.write("\t")
    for sense in translations:
        if not sense:
            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
        if sense.pos:
            FOUT.write('<i style="color: green;">')
            FOUT.write(sense.pos)
            FOUT.write('</i>')
            if sense.ant_list and len(sense.ant_list) > 0:
                FOUT.write(" <i>ant: ")
                FOUT.write("; ".join(sense.ant_list))
                FOUT.write("</i>")
            if sense.syn_list and len(sense.syn_list) > 0:
                FOUT.write(" <i>syn: ")
                FOUT.write("; ".join(sense.syn_list))
                FOUT.write("</i>")
            if LANGS and len(LANGS) > 1:
                FOUT.write("<br>")
            else:
                FOUT.write(" ")
        for (lang, tr) in sense.tr_list or []:
            tr = tr.replace('\n', ' ')
            if LANGS is None:
                FOUT.write(tr)
            elif lang in LANGS:
                if len(LANGS) == 1:
                    FOUT.write(tr)
                else:
                    FOUT.write('<i style="color: blue;">')
                    FOUT.write(lang)
                    FOUT.write("</i> ")
                    FOUT.write(tr)
            FOUT.write("<br>")
    FOUT.write("\n")