py/gadict_c5.py
changeset 646 2d488cfc4c0c
parent 644 e38cd6112193
child 686 a9e3b6050544
equal deleted inserted replaced
645:6d4a074cea27 646:2d488cfc4c0c
     5 import sys
     5 import sys
     6 import codecs
     6 import codecs
     7 import regex
     7 import regex
     8 
     8 
     9 import gadict
     9 import gadict
       
    10 import gadict_freq
    10 
    11 
    11 
    12 
    12 FINAME = None
    13 FINAME = None
    13 FONAME = None
    14 FONAME = None
    14 LANGS = None
    15 LANGS = None
       
    16 FREQ_SOURCES = []
    15 
    17 
    16 # -lang:ru,uk
    18 # -lang:ru,uk
    17 ARG_LANG_RE = regex.compile("-lang:(.+)")
    19 ARG_LANG_RE = regex.compile("-lang:(.+)")
    18 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE
    20 # -freq:var:TAG=FILE or -freq:freq:TAG=FILE
    19 ARG_FREQ_RE = regex.compile("-freq:(freq|var):([^=]+)=(.+)")
    21 ARG_FREQ_RE = regex.compile("-freq:(freq|var):([^=]+)=(.+)")
    32                 if len(lang) != 2:
    34                 if len(lang) != 2:
    33                     raise Exception("Incorrect language specification: '{:s}'".format(arg))
    35                     raise Exception("Incorrect language specification: '{:s}'".format(arg))
    34             continue
    36             continue
    35         m = ARG_FREQ_RE.match(arg)
    37         m = ARG_FREQ_RE.match(arg)
    36         if m:
    38         if m:
    37             LANGS = set(arg.split(","))
    39             mode = m.group(1)
    38             for lang in LANGS:
    40             tag = m.group(2)
    39                 if len(lang) != 2:
    41             fname = m.group(3)
    40                     raise Exception("Incorrect language specification: '{:s}'".format(arg))
    42             with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
       
    43                 if mode == "var":
       
    44                     parser = gadict_freq.HeadVarParser(stream)
       
    45                 elif mode == "freq":
       
    46                     parser = gadict_freq.FreqlistParser(stream)
       
    47                 else:
       
    48                     raise Exception("Unsupported mode: '{:s}'".format(mode))
       
    49                 wlist = parser.parse()
       
    50             FREQ_SOURCES.append((tag, set(wlist)))
    41             continue
    51             continue
    42         if arg.startswith("-"):
    52         if arg.startswith("-"):
    43             raise Exception("Unsupported option format: '{:s}'".format(arg))
    53             raise Exception("Unsupported option format: '{:s}'".format(arg))
    44     if not FINAME:
    54     if not FINAME:
    45         FINAME = arg
    55         FINAME = arg
    96 FOUT.write(PRELUDE.about)
   106 FOUT.write(PRELUDE.about)
    97 FOUT.write("\n")
   107 FOUT.write("\n")
    98 
   108 
    99 
   109 
   100 for (headwords, translations) in DOM[1:]:
   110 for (headwords, translations) in DOM[1:]:
       
   111     identity = headwords[0].headword
   101     FOUT.write("_____\n\n")
   112     FOUT.write("_____\n\n")
   102     title = "; ".join([h.headword for h in headwords])
   113     title = "; ".join([h.headword for h in headwords])
   103     FOUT.write(title)
   114     FOUT.write(title)
   104     FOUT.write("\n\n")
   115     FOUT.write("\n\n")
   105     for hw in headwords:
   116     for hw in headwords:
   178             if not LANGS or len(LANGS) > 1:
   189             if not LANGS or len(LANGS) > 1:
   179                 FOUT.write(lang)
   190                 FOUT.write(lang)
   180             FOUT.write("⇒ ")
   191             FOUT.write("⇒ ")
   181             FOUT.write(tr)
   192             FOUT.write(tr)
   182             FOUT.write("\n")
   193             FOUT.write("\n")
       
   194     freqtags = []
       
   195     for (freqtag, freqset) in FREQ_SOURCES:
       
   196         if identity in freqset:
       
   197             freqtags.append(freqtag)
       
   198     if len(freqtags) > 0:
       
   199         FOUT.write(",".join(["{{{:s}}}".format(tag) for tag in freqtags]))
       
   200         FOUT.write("\n")