py/gadict_c5.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sun, 27 Mar 2016 22:43:45 +0300
changeset 399 a6a7036f3c6f
parent 394 4d45194c71b6
child 402 b47698d5ccab
permissions -rw-r--r--
File name is not available in parser. Move error printing to writer.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
394
4d45194c71b6 Split generator from parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 393
diff changeset
     2
import gadict
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     3
import io
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import sys
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     6
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
fgadict = None
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
fnout = None
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
if len(sys.argv) >= 2:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
    fgadict = sys.argv[1]
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
if len(sys.argv) >= 3:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
    fnout = sys.argv[2]
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
fin = io.open(fgadict, mode='r', buffering=1, encoding="utf-8")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
if fnout is None:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
    fout = sys.stdout
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
else:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
    fout = open(fnout, "w")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
394
4d45194c71b6 Split generator from parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 393
diff changeset
    21
parser = gadict.Parser()
399
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    22
try:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    23
    dom = parser.parse(fin)
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    24
except gadict.ParseException as ex:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    25
    sys.stdout.write("{:s}{:s}\n".format(fgadict, repr(ex)))
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    26
    if __debug__:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    27
        import traceback
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    28
        traceback.print_exc()
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    29
    exit(1)
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    30
finally:
a6a7036f3c6f File name is not available in parser. Move error printing to writer.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 394
diff changeset
    31
    fin.close()
385
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
for idx in range(1, len(dom)):
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    34
    article = dom[idx]
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
    fout.write("_____\n\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
    title = "; ".join(article[0].keys())
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
    fout.write(title)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
    fout.write("\n\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
    for (word, (pron, attrs)) in article[0].items():
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
        if word == "approach":
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    41
            fout.write(str(article[0]))
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
        fout.write("  ")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    43
        fout.write(word)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    44
        fout.write("\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    45
        if pron is not None:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    46
            fout.write("    [")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    47
            fout.write(pron)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    48
            fout.write("]\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
        if len(attrs) > 0:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
            fout.write("    ")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
            l = list(attrs)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
            l.sort()
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
            fout.write(", ".join(l))
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    54
            fout.write("\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    55
    fout.write("\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    56
    for (pos, trs, exs) in article[1]:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    57
        fout.write("  ")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    58
        if pos is not None:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    59
            fout.write("⟨")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    60
            fout.write(pos)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    61
            fout.write("⟩ ")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    62
        for (lang, tr) in trs:
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    63
            if lang == "ru":
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    64
                fout.write(tr)
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    65
                break
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    66
        fout.write("\n")
18284ce77c7a gadict format parser.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    67