py/gadict_html.py
changeset 776 d57c28843156
child 778 9823606d61bd
equal deleted inserted replaced
775:6aa817943828 776:d57c28843156
       
     1 # -*- coding: utf-8 -*-
       
     2 """HTML format writer"""
       
     3 
       
     4 import io
       
     5 import sys
       
     6 import codecs
       
     7 import re
       
     8 import html
       
     9 
       
    10 import gadict
       
    11 import gadict_freq
       
    12 
       
    13 
       
    14 FINAME = None
       
    15 FONAME = None
       
    16 LANGS = None
       
    17 FREQ_SOURCES = []
       
    18 
       
    19 # -lang:ru,uk
       
    20 ARG_LANG_RE = re.compile("-lang:(.+)")
       
    21 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE
       
    22 ARG_FREQ_RE = re.compile("-freq:(freq|var):([^=]+)=(.+)")
       
    23 
       
    24 look_for_files = False
       
    25 for idx in range(1, len(sys.argv)):
       
    26     arg = sys.argv[idx]
       
    27     if arg == "--":
       
    28         look_for_files = True
       
    29         continue
       
    30     if not look_for_files:
       
    31         m = ARG_LANG_RE.match(arg)
       
    32         if m:
       
    33             LANGS = set(m.group(1).split(","))
       
    34             for lang in LANGS:
       
    35                 if len(lang) != 2:
       
    36                     raise Exception("Incorrect language specification: '{:s}'".format(arg))
       
    37             continue
       
    38         m = ARG_FREQ_RE.match(arg)
       
    39         if m:
       
    40             mode = m.group(1)
       
    41             tag = m.group(2)
       
    42             fname = m.group(3)
       
    43             with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
       
    44                 if mode == "var":
       
    45                     parser = gadict_freq.WordformParser(stream)
       
    46                 elif mode == "freq":
       
    47                     parser = gadict_freq.FreqlistParser(stream)
       
    48                 else:
       
    49                     raise Exception("Unsupported mode: '{:s}'".format(mode))
       
    50                 wlist = parser.parse()
       
    51             FREQ_SOURCES.append((tag, set(wlist)))
       
    52             continue
       
    53         if arg.startswith("-"):
       
    54             raise Exception("Unsupported option format: '{:s}'".format(arg))
       
    55     if not FINAME:
       
    56         FINAME = arg
       
    57         continue
       
    58     if not FONAME:
       
    59         FONAME = arg
       
    60         continue
       
    61     raise Exception("Unnecessary argument: '{:s}'".format(arg))
       
    62 
       
    63 
       
    64 FIN = io.open(FINAME, mode='r', buffering=1, encoding="utf-8")
       
    65 
       
    66 PARSER = gadict.Parser()
       
    67 try:
       
    68     DOM = PARSER.parse(FIN)
       
    69 except gadict.ParseException as ex:
       
    70     sys.stdout.write("{:s}{:s}\n".format(FINAME, repr(ex)))
       
    71     if __debug__:
       
    72         import traceback
       
    73         traceback.print_exc()
       
    74     exit(1)
       
    75 finally:
       
    76     FIN.close()
       
    77 
       
    78 PRELUDE = DOM[0]
       
    79 
       
    80 
       
    81 if FONAME is None:
       
    82     FOUT = sys.stdout
       
    83 else:
       
    84     FOUT = codecs.open(FONAME, "w", "utf-8")
       
    85 
       
    86 HTML_HEADER = """<!DOCTYPE html>
       
    87 <html>
       
    88 <head>
       
    89   <meta charset="utf-8">
       
    90   <title>{title}</title>
       
    91 <style>
       
    92 body {{ max-width: 30em; margin: 0 auto; }}
       
    93 .article {{
       
    94   margin: .5em 0;
       
    95   border-bottom: 1px solid yellow;
       
    96 }}
       
    97 .sense {{ margin-top: .5em; }}
       
    98 .head {{ color: black; font-weight: bold; }}
       
    99 .pron {{ color: green; }}
       
   100 .attr {{ color: magenta; }}
       
   101 .pos {{ color: green; font: weight: bold; }}
       
   102 .lang {{ color: brown; font: weight: bold; }}
       
   103 .freq {{ color: red; }}
       
   104 .ex {{ font-style: italic; }}
       
   105 </style>
       
   106 </head>
       
   107 <body>
       
   108 """
       
   109 HTML_FOOTER = """</body>
       
   110 </html>
       
   111 """
       
   112 
       
   113 FOUT.write(HTML_HEADER.format(title=PRELUDE.name))
       
   114 FOUT.write("<h1>{} dictionary</h1>\n".format(html.escape(PRELUDE.name)))
       
   115 # FOUT.write("<a href='{}'>Home page</a>")
       
   116 # FOUT.write(" , ".join(PRELUDE.urls))
       
   117 FOUT.write("<p>License: ")
       
   118 FOUT.write(html.escape(", ".join(PRELUDE.licences)))
       
   119 # FOUT.write("</p>\n<p>")
       
   120 # FOUT.write(html.escape(PRELUDE.about))
       
   121 FOUT.write("</p>\n")
       
   122 
       
   123 def link(lst):
       
   124     buf = []
       
   125     for el in lst:
       
   126         el = html.escape(el)
       
   127         lnk = "<a href='#{}'>{}</a>".format(el, el)
       
   128         buf.append(lnk)
       
   129     return "; ".join(buf)
       
   130 
       
   131 for (headwords, translations) in DOM[1:]:
       
   132     identity = headwords[0].headword
       
   133     FOUT.write("<div id='{}' class='article'>\n".format(html.escape(identity)))
       
   134     for hw in headwords:
       
   135         FOUT.write("<div>")
       
   136         FOUT.write("<span class='head'>{}</span>".format(html.escape(hw.headword)))
       
   137         if hw.pron is not None:
       
   138             FOUT.write(" <span class='pron'>[")
       
   139             FOUT.write(html.escape(hw.pron))
       
   140             FOUT.write("]</span>")
       
   141         if len(hw.attrs) > 0:
       
   142             FOUT.write(" <span class='attr'>")
       
   143             l = ["«"+x+"»" for x in hw.attrs]
       
   144             l.sort()
       
   145             FOUT.write(", ".join(l))
       
   146             FOUT.write("</span>")
       
   147         FOUT.write("</div>\n")
       
   148     for sense in translations:
       
   149         if not sense:
       
   150             raise Exception("""Empty sense for article: """ + headwords.__iter__().__next__())
       
   151         FOUT.write("<div class='sense'>")
       
   152         if sense.pos:
       
   153             FOUT.write("<span class='pos'>«")
       
   154             FOUT.write(html.escape(sense.pos))
       
   155             FOUT.write("»</span> ")
       
   156             need_sep = False
       
   157             if sense.topic_list and len(sense.topic_list) > 0:
       
   158                 FOUT.write("<span class='topic'>")
       
   159                 FOUT.write(html.escape(", ".join(sense.topic_list)))
       
   160                 FOUT.write("</span>")
       
   161                 need_sep = True
       
   162             if sense.ant_list and len(sense.ant_list) > 0:
       
   163                 FOUT.write(" ant: <span class='ant'>")
       
   164                 FOUT.write(link(sense.ant_list))
       
   165                 FOUT.write("</span>")
       
   166                 need_sep = True
       
   167             if sense.syn_list and len(sense.syn_list) > 0:
       
   168                 FOUT.write(" syn: <span class='syn'>")
       
   169                 FOUT.write(link(sense.syn_list))
       
   170                 FOUT.write("</span>")
       
   171                 need_sep = True
       
   172             if sense.hyper_list and len(sense.hyper_list) > 0:
       
   173                 FOUT.write(" hyper: <span class='hyper'>")
       
   174                 FOUT.write(link(sense.hyper_list))
       
   175                 FOUT.write("</span>")
       
   176                 need_sep = True
       
   177             if sense.hypo_list and len(sense.hypo_list) > 0:
       
   178                 FOUT.write(" hypo: <span class='hypo'>")
       
   179                 FOUT.write(link(sense.hypo_list))
       
   180                 FOUT.write("</span>")
       
   181                 need_sep = True
       
   182             if sense.rel_list and len(sense.rel_list) > 0:
       
   183                 FOUT.write(" see: <span class='see'>")
       
   184                 FOUT.write(link(sense.rel_list))
       
   185                 FOUT.write("</span>")
       
   186         for (lang, tr) in sense.tr_list or []:
       
   187             if LANGS and lang not in LANGS:
       
   188                 continue
       
   189             FOUT.write("<div>")
       
   190             if not LANGS or len(LANGS) > 1:
       
   191                 FOUT.write("<span class='lang tr'>{}</span> ".format(html.escape(lang)))
       
   192             FOUT.write("<span class='tr'>{}</span>".format(html.escape(tr)))
       
   193             FOUT.write("</div>")
       
   194         for (lang, tr) in sense.glos_list or []:
       
   195             if LANGS and lang not in LANGS:
       
   196                 continue
       
   197             FOUT.write("<div>")
       
   198             if not LANGS or len(LANGS) > 1:
       
   199                 FOUT.write("<span class='lang glos'>{}</span> ".format(html.escape(lang)))
       
   200             FOUT.write("<span class='glos'>{}</span>".format(html.escape(tr)))
       
   201             FOUT.write("</div>")
       
   202         for (lang, tr) in sense.ex_list or []:
       
   203             if LANGS and lang not in LANGS:
       
   204                 continue
       
   205             FOUT.write("<div>")
       
   206             if not LANGS or len(LANGS) > 1:
       
   207                 FOUT.write("<span class='lang ex'>{}</span> ".format(html.escape(lang)))
       
   208             FOUT.write("<span class='ex'>{}</span>".format(html.escape(tr)))
       
   209             FOUT.write("</div>")
       
   210         FOUT.write("</div>")
       
   211     freqtags = []
       
   212     for (freqtag, freqset) in FREQ_SOURCES:
       
   213         if identity in freqset:
       
   214             freqtags.append(freqtag)
       
   215     if len(freqtags) > 0:
       
   216         FOUT.write("<p class='freq'>")
       
   217         FOUT.write(html.escape(",".join(freqtags)))
       
   218         FOUT.write("</p>")
       
   219     FOUT.write("</div>")
       
   220 
       
   221 FOUT.write(HTML_FOOTER)
       
   222 FOUT.close()