py/gadict_c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Tue, 05 Dec 2023 13:24:46 +0200
changeset 1353 dcda231188dc
parent 1347 272ec25b6f12
permissions -rwxr-xr-x
New articles.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     1
# -*- coding: utf-8 -*-
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     2
"""dictd C5 format writer"""
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import io
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import sys
463
6eb9d9e1067a Fix error under Cygwin Python:
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 443
diff changeset
     6
import codecs
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 727
diff changeset
     7
import re
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
     9
import gadict
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    10
import gadict_freq
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    13
FINAME = None
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    14
FONAME = None
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
    15
LANGS = None
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    16
FREQ_SOURCES = []
644
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    17
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    18
# -lang:ru,uk
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 727
diff changeset
    19
ARG_LANG_RE = re.compile("-lang:(.+)")
644
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    20
# -freq:var:TAG=FILE or -freq:freq:TAG=FILE
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 727
diff changeset
    21
ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)")
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    22
# -grp:GLOB
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    23
ARG_GRP_RE = re.compile("-grp:(.+)")
644
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    24
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    25
look_for_files = False
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    26
for idx in range(1, len(sys.argv)):
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    27
    arg = sys.argv[idx]
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    28
    if arg == "--":
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    29
        look_for_files = True
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    30
        continue
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    31
    if not look_for_files:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    32
        m = ARG_LANG_RE.match(arg)
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    33
        if m:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    34
            LANGS = set(m.group(1).split(","))
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    35
            for lang in LANGS:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    36
                if len(lang) != 2:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    37
                    raise Exception("Incorrect language specification: '{:s}'".format(arg))
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    38
            continue
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    39
        m = ARG_FREQ_RE.match(arg)
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    40
        if m:
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    41
            mode = m.group(1)
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    42
            tag = m.group(2)
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    43
            fname = m.group(3)
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    44
            with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    45
                if mode == "var":
727
453837f63094 Fix after rename.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 696
diff changeset
    46
                    parser = gadict_freq.WordformParser(stream)
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    47
                elif mode == "freq":
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    48
                    parser = gadict_freq.FreqlistParser(stream)
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    49
                else:
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    50
                    raise Exception("Unsupported mode: '{:s}'".format(mode))
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    51
                wlist = parser.parse()
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
    52
            FREQ_SOURCES.append((tag, set(wlist)))
644
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    53
            continue
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    54
        m = ARG_GRP_RE.match(arg)
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    55
        if m:
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    56
            patt = m.group(1)
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    57
            parser = gadict_freq.WordformGroupParser(patt)
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    58
            for (tag, wset) in parser.parse().items():
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    59
                FREQ_SOURCES.append((tag, wset))
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1315
diff changeset
    60
            continue
644
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    61
        if arg.startswith("-"):
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    62
            raise Exception("Unsupported option format: '{:s}'".format(arg))
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    63
    if not FINAME:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    64
        FINAME = arg
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    65
        continue
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    66
    if not FONAME:
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    67
        FONAME = arg
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    68
        continue
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    69
    raise Exception("Unnecessary argument: '{:s}'".format(arg))
e38cd6112193 New command line format for passing language list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 618
diff changeset
    70
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    71
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    72
FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    73
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    74
PARSER = gadict.Parser()
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    75
try:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    76
    DOM = PARSER.parse(FIN)
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    77
except gadict.ParseException as ex:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    78
    sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    79
    if __debug__:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    80
        import traceback
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    81
        traceback.print_exc()
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    82
    exit(1)
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    83
finally:
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    84
    FIN.close()
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    85
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    86
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    87
PRELUDE = DOM[0]
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    88
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    89
with open(FONAME+".name", "w") as f:        # for dictfmt -s
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    90
    if PRELUDE.name is not None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    91
        f.write(PRELUDE.name)
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    92
    f.write("\n")
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    93
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    94
with open(FONAME+".url", "w") as f:         # for dictfmt -u
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    95
    if len(PRELUDE.urls) > 0:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
    96
        f.write(PRELUDE.urls[0])
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    97
    f.write("\n")
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    98
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
    99
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   100
if FONAME is None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   101
    FOUT = sys.stdout
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
   102
else:
463
6eb9d9e1067a Fix error under Cygwin Python:
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 443
diff changeset
   103
    FOUT = codecs.open(FONAME, "w", "utf-8")
404
cdfa6dfa9072 Integrate dictionary metainfo line name and home page url into dictd build.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 403
diff changeset
   104
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   105
if PRELUDE.name is not None:
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   106
    FOUT.write("Dictionary name: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   107
    FOUT.write(PRELUDE.name)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   108
    FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   109
FOUT.write("Project URLs: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   110
FOUT.write(" , ".join(PRELUDE.urls))
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   111
FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   112
FOUT.write("Project licenses: ")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   113
FOUT.write(", ".join(PRELUDE.licences))
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   114
FOUT.write("\n\n")
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   115
FOUT.write(PRELUDE.about)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   116
FOUT.write("\n")
402
b47698d5ccab Parse dictionary metainfo.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 399
diff changeset
   117
537
a70ab6a33bfa Order irregular verbs, noun plural form and adverb/adjective comparison form
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 532
diff changeset
   118
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   119
for (headwords, translations) in DOM[1:]:
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   120
    identity = headwords[0].headword
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   121
    FOUT.write("_____\n\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   122
    title = "; ".join([h.headword for h in headwords])
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   123
    FOUT.write(title)
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   124
    FOUT.write("\n\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   125
    for hw in headwords:
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   126
        FOUT.write(hw.headword)
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   127
        if hw.pron is not None:
433
e8e9a3ddce4f Hold pronunciation on same line with headword to reduce article height.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 408
diff changeset
   128
            FOUT.write(" [")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   129
            FOUT.write(hw.pron)
433
e8e9a3ddce4f Hold pronunciation on same line with headword to reduce article height.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 408
diff changeset
   130
            FOUT.write("]")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   131
        if len(hw.attrs) > 0:
443
1753abbd1995 Write word variance attributed on same line with headword to preserve space.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 434
diff changeset
   132
            FOUT.write(" ")
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   133
            l = [u"«"+x+u"»" for x in hw.attrs]
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   134
            l.sort()
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   135
            FOUT.write(", ".join(l))
936
8d6eda4aa795 Fixed typo.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 931
diff changeset
   136
        if hw.homo is not None and len(hw.homo) > 0:
931
9a5f97027ee7 Added support for homophones.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 796
diff changeset
   137
            FOUT.write(" homo: ")
9a5f97027ee7 Added support for homophones.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 796
diff changeset
   138
            l = ["{"+x+"}" for x in hw.homo]
9a5f97027ee7 Added support for homophones.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 796
diff changeset
   139
            l.sort()
9a5f97027ee7 Added support for homophones.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 796
diff changeset
   140
            FOUT.write(", ".join(l))
443
1753abbd1995 Write word variance attributed on same line with headword to preserve space.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 434
diff changeset
   141
        FOUT.write("\n")
405
6208d07b30f0 Fix warnings from pyflakes.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 404
diff changeset
   142
    FOUT.write("\n")
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   143
    for sense in translations:
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
   144
        if not sense:
554
59714b9033bc Store headword structure as class. Store headwords in list to preserve order
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 537
diff changeset
   145
            raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
   146
        if sense.pos:
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   147
            FOUT.write(u"● «")
530
91771594bc8b Make storage for topics, antonyms and synonyms. Require pos marker.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 464
diff changeset
   148
            FOUT.write(sense.pos)
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   149
            FOUT.write(u"»")
565
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   150
            need_sep = False
686
a9e3b6050544 Add topic to dictd and Anki.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   151
            if sense.topic_list and len(sense.topic_list) > 0:
796
cf1f6e851933 LEFT/RIGHT FLOOR symbol are not available by default in Windows. Replace with
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 757
diff changeset
   152
                FOUT.write(" topic: ")
686
a9e3b6050544 Add topic to dictd and Anki.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   153
                FOUT.write(", ".join(["{"+s+"}" for s in sense.topic_list]))
a9e3b6050544 Add topic to dictd and Anki.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   154
                need_sep = True
532
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   155
            if sense.ant_list and len(sense.ant_list) > 0:
796
cf1f6e851933 LEFT/RIGHT FLOOR symbol are not available by default in Windows. Replace with
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 757
diff changeset
   156
                if need_sep:
cf1f6e851933 LEFT/RIGHT FLOOR symbol are not available by default in Windows. Replace with
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 757
diff changeset
   157
                    FOUT.write(" |")
532
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   158
                FOUT.write(" ant: ")
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   159
                FOUT.write("; ".join(["{"+s+"}" for s in sense.ant_list]))
565
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   160
                need_sep = True
532
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   161
            if sense.syn_list and len(sense.syn_list) > 0:
565
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   162
                if need_sep:
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   163
                    FOUT.write(" |")
532
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   164
                FOUT.write(" syn: ")
fc91cce0dff8 Add links to synonyms and antonyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 530
diff changeset
   165
                FOUT.write("; ".join(["{"+s+"}" for s in sense.syn_list]))
565
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   166
                need_sep = True
618
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   167
            if sense.hyper_list and len(sense.hyper_list) > 0:
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   168
                if need_sep:
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   169
                    FOUT.write(" |")
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   170
                FOUT.write(" hyper: ")
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   171
                FOUT.write("; ".join(["{"+s+"}" for s in sense.hyper_list]))
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   172
                need_sep = True
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   173
            if sense.hypo_list and len(sense.hypo_list) > 0:
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   174
                if need_sep:
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   175
                    FOUT.write(" |")
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   176
                FOUT.write(" hypo: ")
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   177
                FOUT.write("; ".join(["{"+s+"}" for s in sense.hypo_list]))
6ad7203ac9dc Add support for hypernyms and hyponyms.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 597
diff changeset
   178
                need_sep = True
984
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   179
            if sense.col_list and len(sense.col_list) > 0:
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   180
                if need_sep:
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   181
                    FOUT.write(" |")
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   182
                FOUT.write(" col: ")
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   183
                FOUT.write("; ".join(["{"+s+"}" for s in sense.col_list]))
73d6e2631338 Added support for collocations' reference.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 936
diff changeset
   184
                need_sep = True
565
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   185
            if sense.rel_list and len(sense.rel_list) > 0:
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   186
                if need_sep:
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   187
                    FOUT.write(" |")
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   188
                FOUT.write(" see: ")
ac68f2680ea0 Add syntax to add related words. Add separators between ant/syn/rel in
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 560
diff changeset
   189
                FOUT.write("; ".join(["{"+s+"}" for s in sense.rel_list]))
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   190
            if not LANGS or len(LANGS) != 1:
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   191
                FOUT.write("\n")
584
255e568d0149 Fix typo after copy/paste.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 569
diff changeset
   192
        for (lang, tr) in sense.tr_list or []:
696
537b8ce489af Fix warning from pylint: [C0113(unneeded-not), ] Consider changing "not lang in LANGS" to "lang not in LANGS".
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 690
diff changeset
   193
            if LANGS and lang not in LANGS:
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   194
                continue
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   195
            FOUT.write("  ")
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   196
            if not LANGS or len(LANGS) > 1:
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   197
                FOUT.write(lang)
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   198
                FOUT.write(u"→ ")
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   199
            FOUT.write(tr)
408
52cd8e5f465f Write translation for different language by default.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 406
diff changeset
   200
            FOUT.write("\n")
596
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   201
        for (lang, tr) in sense.glos_list or []:
696
537b8ce489af Fix warning from pylint: [C0113(unneeded-not), ] Consider changing "not lang in LANGS" to "lang not in LANGS".
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 690
diff changeset
   202
            if LANGS and lang not in LANGS:
596
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   203
                continue
597
7ad3ac702099 Use usual intent level for glossary.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 596
diff changeset
   204
            FOUT.write("  ")
596
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   205
            if not LANGS or len(LANGS) > 1:
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   206
                FOUT.write(lang)
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   207
            FOUT.write(u"↦ ")
596
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   208
            FOUT.write(tr)
6c5ff92517a4 Show examples at the end.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 591
diff changeset
   209
            FOUT.write("\n")
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   210
        for (lang, tr) in sense.ex_list or []:
696
537b8ce489af Fix warning from pylint: [C0113(unneeded-not), ] Consider changing "not lang in LANGS" to "lang not in LANGS".
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 690
diff changeset
   211
            if LANGS and lang not in LANGS:
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   212
                continue
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   213
            FOUT.write("    ")
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   214
            if not LANGS or len(LANGS) > 1:
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   215
                FOUT.write(lang)
1315
6532512bbac4 Fixed compatibility with Python 2.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 984
diff changeset
   216
            FOUT.write(u"⇒ ")
560
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   217
            FOUT.write(tr)
dad049a0f585 Include examples into dictionary articles and Packaged Anki Desk files.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 554
diff changeset
   218
            FOUT.write("\n")
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   219
    freqtags = []
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   220
    for (freqtag, freqset) in FREQ_SOURCES:
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   221
        if identity in freqset:
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   222
            freqtags.append(freqtag)
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   223
    if len(freqtags) > 0:
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   224
        FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags]))
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 644
diff changeset
   225
        FOUT.write("\n")