py/gadict_c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sat, 27 Aug 2016 14:37:58 +0300
changeset 537 a70ab6a33bfa
parent 532 fc91cce0dff8
child 554 59714b9033bc
permissions -rw-r--r--
Order irregular verbs, noun plural form and adverb/adjective comparison form in headword list.

# -*- coding: utf-8 -*-
"""dictd C5 format writer"""

import io
import sys
import codecs

import gadict


FINAME = None
FONAME = None
if len(sys.argv) >= 2:
    FINAME = sys.argv[1]
if len(sys.argv) >= 3:
    FONAME = sys.argv[2]
LANGS = None
if len(sys.argv) >= 4:
    LANGS = set(sys.argv[3].split(","))

FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")

PARSER = gadict.Parser()
try:
    DOM = PARSER.parse(FIN)
except gadict.ParseException as ex:
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
    if __debug__:
        import traceback
        traceback.print_exc()
    exit(1)
finally:
    FIN.close()


PRELUDE = DOM[0]

with open(FONAME+".name", "w") as f:        # for dictfmt -s
    if PRELUDE.name is not None:
        f.write(PRELUDE.name)
    f.write("\n")

with open(FONAME+".url", "w") as f:         # for dictfmt -u
    if len(PRELUDE.urls) > 0:
        f.write(PRELUDE.urls[0])
    f.write("\n")


if FONAME is None:
    FOUT = sys.stdout
else:
    FOUT = codecs.open(FONAME, "w", "utf-8")

if PRELUDE.name is not None:
    FOUT.write("Dictionary name: ")
    FOUT.write(PRELUDE.name)
    FOUT.write("\n\n")
FOUT.write("Project URLs: ")
FOUT.write(" , ".join(PRELUDE.urls))
FOUT.write("\n\n")
FOUT.write("Project licenses: ")
FOUT.write(", ".join(PRELUDE.licences))
FOUT.write("\n\n")
FOUT.write(PRELUDE.about)
FOUT.write("\n")


def attr_key(item):
    (word, (pron, attrs)) = item
    if not attrs:
        return "zzz"
    best_vattr = None
    for attr in attrs:
        if attr in ["v1", "v2", "v3"]:
            if not best_vattr or (best_vattr and best_vattr > attr):
                best_vattr = attr
    if best_vattr:
        return best_vattr
    for attr in attrs:                      # single/plural
        if attr in ["s"]:
            return attr
    for attr in attrs:                      # comparative/superlative
        if attr in ["comp"]:
            return attr
    for attr in attrs:                      # Am/Br/Au
        if attr in ["Am"]:
            return attr
    return "zzz"


for idx in range(1, len(DOM)):
    article = DOM[idx]
    FOUT.write("_____\n\n")
    title = "; ".join(article[0].keys())
    FOUT.write(title)
    FOUT.write("\n\n")
    defs = article[0].items()
    defs = sorted(defs, key = attr_key)
    for (word, (pron, attrs)) in defs:
        FOUT.write("  ")
        FOUT.write(word)
        if pron is not None:
            FOUT.write(" [")
            FOUT.write(pron)
            FOUT.write("]")
        if len(attrs) > 0:
            FOUT.write(" ")
            l = ["«"+x+"»" for x in attrs]
            l.sort()
            FOUT.write(", ".join(l))
        FOUT.write("\n")
    FOUT.write("\n")
    for sense in article[1]:
        if not sense:
            raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
        FOUT.write("  ")
        if sense.pos:
            FOUT.write("«")
            FOUT.write(sense.pos)
            FOUT.write("» ")
            if sense.ant_list and len(sense.ant_list) > 0:
                FOUT.write(" ant: ")
                FOUT.write("; ".join(["{"+s+"}" for s in sense.ant_list]))
            if sense.syn_list and len(sense.syn_list) > 0:
                FOUT.write(" syn: ")
                FOUT.write("; ".join(["{"+s+"}" for s in sense.syn_list]))
            FOUT.write("\n")
        for (lang, tr) in sense.tr_list:
            FOUT.write("  ")
            if LANGS is None:
                FOUT.write(lang)
                FOUT.write("→ ")
                FOUT.write(tr)
            elif lang in LANGS:
                if len(LANGS) == 1:
                    FOUT.write(tr)
                else:
                    FOUT.write(lang)
                    FOUT.write("→ ")
                    FOUT.write(tr)
            FOUT.write("\n")