py/gadict_srs_tab.py
changeset 536 c9f0064d8661
child 554 59714b9033bc
equal deleted inserted replaced
535:e5dd169f4bdb 536:c9f0064d8661
       
     1 # -*- coding: utf-8 -*-
       
     2 """Space repetition TAB format writer"""
       
     3 
       
     4 import io
       
     5 import sys
       
     6 import codecs
       
     7 
       
     8 import gadict
       
     9 
       
    10 
       
    11 FINAME = None
       
    12 FONAME = None
       
    13 if len(sys.argv) >= 2:
       
    14     FINAME = sys.argv[1]
       
    15 if len(sys.argv) >= 3:
       
    16     FONAME = sys.argv[2]
       
    17 LANGS = None
       
    18 if len(sys.argv) >= 4:
       
    19     LANGS = set(sys.argv[3].split(","))
       
    20 
       
    21 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
       
    22 
       
    23 PARSER = gadict.Parser()
       
    24 try:
       
    25     DOM = PARSER.parse(FIN)
       
    26 except gadict.ParseException as ex:
       
    27     sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
       
    28     if __debug__:
       
    29         import traceback
       
    30         traceback.print_exc()
       
    31     exit(1)
       
    32 finally:
       
    33     FIN.close()
       
    34 
       
    35 
       
    36 if FONAME is None:
       
    37     FOUT = sys.stdout
       
    38 else:
       
    39     FOUT = codecs.open(FONAME, "w", "utf-8")
       
    40 
       
    41 def attr_key(item):
       
    42     (word, (pron, attrs)) = item
       
    43     if not attrs:
       
    44         return "zzz"
       
    45     best_vattr = None
       
    46     for attr in attrs:
       
    47         if attr in ["v1", "v2", "v3"]:
       
    48             if not best_vattr or (best_vattr and best_vattr > attr):
       
    49                 best_vattr = attr
       
    50     if best_vattr:
       
    51         return best_vattr
       
    52     for attr in attrs:                      # single/plural
       
    53         if attr in ["s"]:
       
    54             return attr
       
    55     for attr in attrs:                      # comparative/superlative
       
    56         if attr in ["comp"]:
       
    57             return attr
       
    58     for attr in attrs:                      # Am/Br/Au
       
    59         if attr in ["Am"]:
       
    60             return attr
       
    61     return "zzz"
       
    62 
       
    63 for idx in range(1, len(DOM)):
       
    64     article = DOM[idx]
       
    65     defs = article[0].items()
       
    66     defs = sorted(defs, key = attr_key)
       
    67     lines = []
       
    68     for (word, (pron, attrs)) in defs:
       
    69         line = "<b>"+word+"</b>"
       
    70         if pron:
       
    71             line += " ["+pron+"]"
       
    72         if len(attrs) > 0:
       
    73             attrs = [" «"+x+"»" for x in attrs]
       
    74             attrs.sort()
       
    75             line += ",".join(attrs)
       
    76         lines.append(line)
       
    77     question = "<br>".join(lines)
       
    78     FOUT.write(question)
       
    79     FOUT.write("\t")
       
    80     for sense in article[1]:
       
    81         if not sense:
       
    82             raise Exception("""Empty sense for article: """ + article[0].__iter__().__next__())
       
    83         if sense.pos:
       
    84             FOUT.write('<i style="color: green;">')
       
    85             FOUT.write(sense.pos)
       
    86             FOUT.write('</i>')
       
    87             if sense.ant_list and len(sense.ant_list) > 0:
       
    88                 FOUT.write(" <i>ant: ")
       
    89                 FOUT.write("; ".join(sense.ant_list))
       
    90                 FOUT.write("</i>")
       
    91             if sense.syn_list and len(sense.syn_list) > 0:
       
    92                 FOUT.write(" <i>syn: ")
       
    93                 FOUT.write("; ".join(sense.syn_list))
       
    94                 FOUT.write("</i>")
       
    95             if LANGS and len(LANGS) > 1:
       
    96                 FOUT.write("<br>")
       
    97             else:
       
    98                 FOUT.write(" ")
       
    99         for (lang, tr) in sense.tr_list:
       
   100             tr = tr.replace('\n', ' ')
       
   101             if LANGS is None:
       
   102                 FOUT.write(tr)
       
   103             elif lang in LANGS:
       
   104                 if len(LANGS) == 1:
       
   105                     FOUT.write(tr)
       
   106                 else:
       
   107                     FOUT.write('<i style="color: blue;">')
       
   108                     FOUT.write(lang)
       
   109                     FOUT.write("</i> ")
       
   110                     FOUT.write(tr)
       
   111             FOUT.write("<br>")
       
   112     FOUT.write("\n")