py/gadict_c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Thu, 15 Sep 2016 20:13:18 +0300
changeset 558 53fd793e345d
parent 554 59714b9033bc
child 560 dad049a0f585
permissions -rw-r--r--
Add shortcut to deploy to HG repos.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     1
# -*- coding: utf-8 -*-
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     2
"""dictd C5 format writer"""
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import io
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import sys
463
6eb9d9e1067a Fix error under Cygwin Python:
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 443
diff changeset
     6
import codecs
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     8
import gadict
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    11
FINAME = None
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    12
FONAME = None
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    13
if len(sys.argv) >= 2:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    14
    FINAME = sys.argv[1]
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    15
if len(sys.argv) >= 3:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    16
    FONAME = sys.argv[2]
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
    17
LANGS = None
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
    18
if len(sys.argv) >= 4:
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
    19
    LANGS = set(sys.argv[3].split(","))
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    21
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    22
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    23
PARSER = gadict.Parser()
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    24
try:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    25
    DOM = PARSER.parse(FIN)
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    26
except gadict.ParseException as ex:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    27
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    28
    if __debug__:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    29
        import traceback
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    30
        traceback.print_exc()
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    31
    exit(1)
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    32
finally:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    33
    FIN.close()
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    35
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    36
PRELUDE = DOM[0]
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    37
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    38
with open(FONAME+".name", "w") as f:        # for dictfmt -s
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    39
    if PRELUDE.name is not None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    40
        f.write(PRELUDE.name)
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    41
    f.write("\n")
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    42
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    43
with open(FONAME+".url", "w") as f:         # for dictfmt -u
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    44
    if len(PRELUDE.urls) > 0:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    45
        f.write(PRELUDE.urls[0])
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    46
    f.write("\n")
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    47
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    48
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    49
if FONAME is None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    50
    FOUT = sys.stdout
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    51
else:
463
6eb9d9e1067a Fix error under Cygwin Python:
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 443
diff changeset
    52
    FOUT = codecs.open(FONAME, "w", "utf-8")
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    53
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    54
if PRELUDE.name is not None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    55
    FOUT.write("Dictionary name: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    56
    FOUT.write(PRELUDE.name)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    57
    FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    58
FOUT.write("Project URLs: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    59
FOUT.write(" , ".join(PRELUDE.urls))
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    60
FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    61
FOUT.write("Project licenses: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    62
FOUT.write(", ".join(PRELUDE.licences))
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    63
FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    64
FOUT.write(PRELUDE.about)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    65
FOUT.write("\n")
402
b47698d5ccab Parse dictionary metainfo.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 399
diff changeset
    66
537
a70ab6a33bfa Order irregular verbs, noun plural form and adverb/adjective comparison form
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 532
diff changeset
    67
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    68
for (headwords, translations) in DOM[1:]:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    69
    FOUT.write("_____\n\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    70
    title = "; ".join([h.headword for h in headwords])
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    71
    FOUT.write(title)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    72
    FOUT.write("\n\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    73
    for hw in headwords:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    74
        FOUT.write("  ")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    75
        FOUT.write(hw.headword)
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    76
        if hw.pron is not None:
433
e8e9a3ddce4f Hold pronunciation on same line with headword to reduce article height.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 408
diff changeset
    77
            FOUT.write(" [")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    78
            FOUT.write(hw.pron)
433
e8e9a3ddce4f Hold pronunciation on same line with headword to reduce article height.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 408
diff changeset
    79
            FOUT.write("]")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    80
        if len(hw.attrs) > 0:
443
1753abbd1995 Write word variance attributed on same line with headword to preserve space.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 434
diff changeset
    81
            FOUT.write(" ")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    82
            l = ["«"+x+"»" for x in hw.attrs]
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    83
            l.sort()
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    84
            FOUT.write(", ".join(l))
443
1753abbd1995 Write word variance attributed on same line with headword to preserve space.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 434
diff changeset
    85
        FOUT.write("\n")
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    86
    FOUT.write("\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    87
    for sense in translations:
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
    88
        if not sense:
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
    89
            raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    90
        FOUT.write("  ")
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
    91
        if sense.pos:
464
5775915d15dc \langle symbol doesn't displayed in GoldenDict on Windows. Replace with
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 463
diff changeset
    92
            FOUT.write("«")
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
    93
            FOUT.write(sense.pos)
464
5775915d15dc \langle symbol doesn't displayed in GoldenDict on Windows. Replace with
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 463
diff changeset
    94
            FOUT.write("» ")
532
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
    95
            if sense.ant_list and len(sense.ant_list) > 0:
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
    96
                FOUT.write(" ant: ")
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
    97
                FOUT.write("; ".join(["{"+s+"}" for s in sense.ant_list]))
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
    98
            if sense.syn_list and len(sense.syn_list) > 0:
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
    99
                FOUT.write(" syn: ")
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   100
                FOUT.write("; ".join(["{"+s+"}" for s in sense.syn_list]))
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   101
            FOUT.write("\n")
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
   102
        for (lang, tr) in sense.tr_list:
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   103
            FOUT.write("  ")
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   104
            if LANGS is None:
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   105
                FOUT.write(lang)
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   106
                FOUT.write("→ ")
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   107
                FOUT.write(tr)
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   108
            elif lang in LANGS:
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   109
                if len(LANGS) == 1:
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   110
                    FOUT.write(tr)
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   111
                else:
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   112
                    FOUT.write(lang)
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   113
                    FOUT.write("→ ")
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   114
                    FOUT.write(tr)
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   115
            FOUT.write("\n")