py/gadict_freq.py
author Oleksandr Gavenko <gavenkoa@gmail.com>
Sat, 22 Jul 2023 00:13:47 +0300
changeset 1351 ae963ee79b49
parent 1347 272ec25b6f12
permissions -rw-r--r--
Added support for 25k freq wordlist for dictionary as an HTML page.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     1
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     2
import sys
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
     3
import glob
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     4
import codecs
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     5
import io
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 723
diff changeset
     6
import re
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     7
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     8
class WordlistParser:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
     9
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    10
    def __init__(self, stream):
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    11
        self.stream = stream
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    12
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    13
    def parse(self):
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    14
        wlist = []
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    15
        while True:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    16
            line = self.stream.readline()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    17
            if len(line) == 0:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    18
                break
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    19
            line = line.strip()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    20
            wlist.append(line)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    21
        return wlist
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    22
723
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
    23
class WordformParser:
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    24
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 723
diff changeset
    25
    BASEVAR_RE = re.compile(u"^(\t)?(.*)$", re.UNICODE)
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    26
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    27
    def __init__(self, stream, limit = None, ignore_tab = True):
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    28
        self.stream = stream
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    29
        self.limit = limit
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    30
        self.ignore_tab = ignore_tab
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    31
        self.lineno = 0
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    32
        self.cnt = 0
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    33
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    34
    def parse(self) -> list[str]:
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    35
        wlist = []
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    36
        while True:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    37
            line = self.stream.readline()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    38
            if len(line) == 0:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    39
                break
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    40
            self.lineno += 1
645
6d4a074cea27 Small improvements.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 643
diff changeset
    41
            m = self.BASEVAR_RE.match(line)
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    42
            if not m:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    43
                raise Exception("Line {:d}: '{:s}' wrong format\n".format(self.lineno, line))
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    44
            tab = m.group(1)
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    45
            if tab and self.ignore_tab:
804
7aa283584ee6 Skip tabs for proper statistic calculation.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 757
diff changeset
    46
                continue
7aa283584ee6 Skip tabs for proper statistic calculation.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 757
diff changeset
    47
            self.cnt += 1
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    48
            if self.limit and self.cnt > self.limit:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    49
                break
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    50
            headword = m.group(2).strip().lower()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    51
            wlist.append(headword)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    52
        return wlist
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    53
1347
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    54
class WordformGroupParser:
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    55
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    56
    def __init__(self, globpatt):
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    57
        if globpatt.count('*') != 1:
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    58
            raise Exception("Glob pattern should have exactly one asterisk: {:s}".format(globpatt))
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    59
        self.globpatt = globpatt
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    60
        self.astOff = globpatt.index('*')
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    61
        self.cnt = 0
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    62
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    63
    def parse(self) -> dict[str, set[str]]:
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    64
        wmap = dict()
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    65
        for fname in glob.glob(self.globpatt):
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    66
            beg, end = self.astOff, self.astOff + 1 + len(fname) - len(self.globpatt)
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    67
            tag = fname[beg:end]
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    68
            with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    69
                parser = WordformParser(stream, ignore_tab=False)
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    70
                wmap[tag] = set(parser.parse())
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    71
        return wmap
272ec25b6f12 Mark word frequency based on Paul Nation BNC+COCA 25k wordfamily list.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 1004
diff changeset
    72
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    73
class FreqlistParser:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    74
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 723
diff changeset
    75
    FREQ_RE = re.compile(u"^([0-9]+) (.*)$", re.UNICODE)
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    76
645
6d4a074cea27 Small improvements.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 643
diff changeset
    77
    def __init__(self, stream, limit = None):
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    78
        self.stream = stream
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    79
        self.limit = limit
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    80
        self.lineno = 0
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    81
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    82
    def parse(self):
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    83
        wlist = []
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    84
        while True:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    85
            if self.limit and self.lineno >= self.limit:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    86
                break
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    87
            line = self.stream.readline()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    88
            if len(line) == 0:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    89
                break
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    90
            self.lineno += 1
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    91
            m = self.FREQ_RE.match(line)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    92
            if not m:
646
2d488cfc4c0c Add frequency markers to dictd dictionary and Anki cards.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 645
diff changeset
    93
                raise Exception("Line '{:s}' #{:d} is not in NUM WORD format\n".format(line, self.lineno))
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    94
            headword = m.group(2).strip().lower()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    95
            wlist.append(headword)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    96
        return wlist
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    97
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    98
if __name__ == '__main__':
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
    99
    USAGE = "Usage: PROG  $WORDLIST  [+/-][NUM][b/f]:FREQLIST..."
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   100
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   101
    if len(sys.argv) < 3:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   102
        raise Exception(USAGE)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   103
757
5417f2102dc5 Switch to built-in `re` Python module over `regex`.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 723
diff changeset
   104
    COMMAND_RE = re.compile("([-+])([0-9]+)?([bf]):([^:]+)")
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   105
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   106
    IN_SET = set()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   107
    EX_SET = set()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   108
723
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   109
    for idx in range(1, len(sys.argv)):
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   110
        spec = sys.argv[idx]
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   111
        m = COMMAND_RE.match(spec)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   112
        if not m:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   113
            raise Exception("Wrong FREQLIST spec: '{:s}'\n".format(spec) + USAGE)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   114
        fname = m.group(4)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   115
        limit = m.group(2)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   116
        mode = m.group(3)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   117
        if limit:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   118
            limit = int(limit)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   119
        with io.open(fname, mode='r', buffering=1, encoding="utf-8") as stream:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   120
            if mode == "b":
723
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   121
                parser = WordformParser(stream, limit)
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   122
            elif mode == "f":
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   123
                parser = FreqlistParser(stream, limit)
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   124
            else:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   125
                raise Expection("Unknown mode in specification...")
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   126
            try:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   127
                wlist = parser.parse()
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   128
            except:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   129
                print("Error during processing: {:s}".format(fname))
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   130
                raise
723
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   131
        wlist = set([w.lower() for w in wlist])
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   132
        if m.group(1) == "+":
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   133
            IN_SET |= wlist
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   134
        else:
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   135
            EX_SET |= wlist
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   136
723
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   137
    for headword in IN_SET - EX_SET:
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   138
        # if any(c in headword for c in " '."):
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   139
        #     continue
53095b480a73 Treat first argument in format like the rest arguments. Targets to print
Oleksandr Gavenko <gavenkoa@gmail.com>
parents: 646
diff changeset
   140
        print(headword)
643
c2c32f45dde6 Search rare words in gadict.
Oleksandr Gavenko <gavenkoa@gmail.com>
parents:
diff changeset
   141