py/gadict_srs_anki.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Wed, 21 Sep 2016 22:09:37 +0300
changeset 565 ac68f2680ea0
parent 562 d4dbeb503ef4
child 566 0bba61492c37
permissions -rw-r--r--
Add syntax to add related words. Add separators between ant/syn/rel in generated output.

# -*- coding: utf-8 -*-

import os
import io
import sys
import codecs
import tempfile
import shutil
import signal

import gadict


FINAME = None
FONAME = None
if len(sys.argv) >= 2:
    FINAME = sys.argv[1]
if len(sys.argv) >= 3:
    FONAME = sys.argv[2]
LANGS = None
if len(sys.argv) >= 4:
    LANGS = set(sys.argv[3].split(","))

FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")

PARSER = gadict.Parser()
try:
    DOM = PARSER.parse(FIN)
except gadict.ParseException as ex:
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
    if __debug__:
        import traceback
        traceback.print_exc()
    exit(1)
finally:
    FIN.close()

def cleanup(collection, tmpdir):
    if collection:
        collection.close()
    if tmpdir:
        shutil.rmtree(tmpdir, ignore_errors=True)

# signal.signal(signal.SIGINT, lambda signal, frame: cleanup())


if FONAME is None:
    raise Exception('Missing output file name')
# Looks like anki libs change working directory to media directory of current deck
# Therefore absolute path should be stored before creating temporary deck
FONAME = os.path.abspath(FONAME)
FBASENAME, _ = os.path.splitext(os.path.basename(FONAME))
TMPDIR = tempfile.mkdtemp(dir = os.path.dirname(FONAME))

import hashlib

import anki
from anki.exporting import AnkiPackageExporter


class AnkiDbBuilder:

    def __init__(self, tmpdir, name):
        self.tmpdir = tmpdir
        self.name = name

        self.collection = collection = anki.Collection(os.path.join(self.tmpdir, 'collection.anki2'))

        deck_id = collection.decks.id(self.name + "_deck")
        deck = collection.decks.get(deck_id)

        model = collection.models.new(self.name + "_model")
        model['tags'].append(self.name + "_tag")
        model['did'] = deck_id
        model['css'] = """
.card {
  font-family: arial;
  font-size: 20px;
  text-align: center;
  color: black;
  background-color: white;
}
.ask {
  margin-bottom: 1.2em;
}
span.headword {
  font-weight: bolt;
}
.pron {
  color: magenta;
}
.pos {
  color: green;
  font-style: italic;
}
.lang {
  color: red;
  font-style: italic;
}
.ant {
  color: #404080;
}
.syn {
  color: #804040;
}
.rel {
  color: #804080;
}
.attrs {
  color: blue;
}
span.ex {
  font-style: italic;
  font-size: .85em;
}
"""

        collection.models.addField(model, collection.models.newField('From'))
        collection.models.addField(model, collection.models.newField('To'))

        tmpl = collection.models.newTemplate('From -> To')
        tmpl['qfmt'] = '<div class="from">{{From}}</div>'
        tmpl['afmt'] = '{{FrontSide}}\n\n<hr id=answer>\n\n<div class="to">{{To}}</div>'
        collection.models.addTemplate(model, tmpl)

        # Equivalent of:
        #   collection.models.add(model)
        # without setting auto-generated ID. It is essential to keep model['id']
        # unchanged between upgrades or notes will be skipped!!
        model['id'] = int(hashlib.sha1(self.name).hexdigest(), 16) % (2**63)
        collection.models.update(model)
        collection.models.setCurrent(model)
        collection.models.save(model)

    def guid(self, type_, headword):
        """
        :type_ used to generate different notes from same headword
        """
        h = hashlib.md5(":".join((self.name, type_, headword)))
        return h.hexdigest()

    def add_note(self, type_, id_, from_, to_):
        note = self.collection.newNote()
        note['From'] = from_
        # print(from_)
        note['To'] = to_
        # print(to_)
        note.guid = self.guid(type_, id_)
        self.collection.addNote(note)

    def export(self, fname):
        export = AnkiPackageExporter(self.collection)
        export.exportInto(fname)

    def close(self):
        self.collection.close()

try:
    builder = AnkiDbBuilder(TMPDIR, FBASENAME)

    for (headwords, translations) in DOM[1:]:
        identity = headwords[0].headword
        buf = []
        v1, v2, v3 = (None, None, None)
        singular, plural = (None, None)
        for hw in headwords:
            buf.append("<div clsas='headword'>")
            buf.append("<span clsas='headword'>")
            buf.append(hw.headword)
            buf.append("</span>")
            if hw.pron is not None:
                buf.append(" <span class='pron'>[")
                buf.append(hw.pron)
                buf.append("]</span>")
            if len(hw.attrs) > 0:
                l = [u"«"+x+u"»" for x in hw.attrs]
                l.sort()
                buf.append("<span class='attrs'>")
                buf.append(", ".join(l))
                buf.append("</span>")
            if 'v1' in hw.attrs:
                v1 = (hw.headword, hw.pron)
            if 'v2' in hw.attrs:
                v2 = (hw.headword, hw.pron)
            if 'v3' in hw.attrs:
                v3 = (hw.headword, hw.pron)
            if 's' in hw.attrs:
                singular = (hw.headword, hw.pron)
            if 'pl' in hw.attrs:
                plural = (hw.headword, hw.pron)
            buf.append("</div>")
        direct_from = "".join(buf)
        buf = []
        for sense in translations:
            buf.append("<div class='sense'>")
            if sense.pos:
                buf.append("<span class='pos'>")
                buf.append(sense.pos)
                buf.append("</span>")
            if sense.ant_list and len(sense.ant_list) > 0:
                buf.append(" <span class='ant'>ant: ")
                buf.append("; ".join(sense.ant_list))
                buf.append("</span>")
            if sense.syn_list and len(sense.syn_list) > 0:
                buf.append(" <span class='syn'>syn: ")
                buf.append("; ".join(sense.syn_list))
                buf.append("</span>")
            for (lang, tr) in sense.tr_list:
                if len(sense.tr_list) > 1:
                    buf.append("<div class='sense'>")
                buf.append(" <span class='lang'>")
                buf.append(lang)
                buf.append("</span>")
                buf.append(" <span class='tr'>")
                buf.append(tr)
                buf.append("</span>")
                if len(sense.tr_list) > 1:
                    buf.append("</div>")
            for (lang, tr) in sense.ex_list or []:
                buf.append("<div class='ex'>")
                buf.append(" <span class='lang'>")
                buf.append(lang)
                buf.append("</span>")
                buf.append(" <span class='ex'>")
                buf.append(tr)
                buf.append("</span>")
                buf.append("</div>")
        direct_to = "".join(buf)
        buf = []
        for sense in translations:
            buf.append("<div class='sense'>")
            if sense.pos:
                buf.append("<span class='pos'>")
                buf.append(sense.pos)
                buf.append("</span>")
            need_sep = False
            if sense.ant_list and len(sense.ant_list) > 0:
                buf.append(" <span class='ant'>ant: ")
                buf.append("; ".join(sense.ant_list))
                buf.append("</span>")
                need_sep = True
            if sense.syn_list and len(sense.syn_list) > 0:
                if need_sep:
                    buf.append(" |")
                buf.append(" <span class='syn'>syn: ")
                buf.append("; ".join(sense.syn_list))
                buf.append("</span>")
                need_sep = True
            if sense.rel_list and len(sense.rel_list) > 0:
                if need_sep:
                    buf.append(" |")
                buf.append(" <span class='rel'>rel: ")
                buf.append("; ".join(sense.rel_list))
                buf.append("</span>")
            for (lang, tr) in sense.tr_list:
                if len(sense.tr_list) > 1:
                    buf.append("<div class='sense'>")
                buf.append(" <span class='lang'>")
                buf.append(lang)
                buf.append("</span>")
                buf.append(" <span class='tr'>")
                buf.append(tr)
                buf.append("</span>")
                if len(sense.tr_list) > 1:
                    buf.append("</div>")
        reverse_from = "".join(buf)         # without examples!!
        builder.add_note("en->tr", identity, direct_from, direct_to)
        builder.add_note("tr->en", identity, reverse_from, direct_from)
        if v1 and v2 and v3:
            question = u"<div class='ask'>Find irregular verb:</div>"
            riddle1 = u"<span class='headword'>{}</span> <span class='pron'>[{}]</span> <span class='attrs'>v1</span>".format(v1[0], v1[1])
            riddle2 = u"<span class='headword'>{}</span> <span class='pron'>[{}]</span> <span class='attrs'>v2</span>".format(v2[0], v2[1])
            riddle3 = u"<span class='headword'>{}</span> <span class='pron'>[{}]</span> <span class='attrs'>v3</span>".format(v3[0], v3[1])
            answer = u"{}<br>{}<br>{}<br>{}".format(riddle1, riddle2, riddle3, direct_to)
            builder.add_note("irregular1", identity, question + riddle1, answer)
            builder.add_note("irregular2", identity, question + riddle2, answer)
            builder.add_note("irregular3", identity, question + riddle3, answer)
        if singular and plural:
            question = u"<div class='ask'>Find plural:</div>"
            riddle_s = u"<span class='headword'>{}</span> <span class='pron'>[{}]</span> <span class='attrs'>s</span>".format(singular[0], singular[1])
            riddle_pl = u"<span class='headword'>{}</span> <span class='pron'>[{}]</span> <span class='attrs'>pl</span>".format(plural[0], plural[1])
            answer = u"{}<br>{}<br>{}".format(riddle_s, riddle_pl, direct_to)
            builder.add_note("singular", identity, question + riddle_s, answer)
            builder.add_note("plural", identity, question + riddle_pl, answer)


    builder.export(FONAME)
finally:
    builder.close()
    shutil.rmtree(TMPDIR, ignore_errors=True)