py/gadict.py
changeset 402 b47698d5ccab
parent 400 aa03182d2e26
child 406 f0ac87e10d9a
equal deleted inserted replaced
401:791994f95561 402:b47698d5ccab
     1 
     1 
     2 import regex
     2 import regex
       
     3 
       
     4 
       
     5 class Prelude:
       
     6     name = None
       
     7     about = ""
       
     8     urls = []
       
     9     authors = []
       
    10     licences = []
     3 
    11 
     4 
    12 
     5 class ParseException(BaseException):
    13 class ParseException(BaseException):
     6 
    14 
     7     def __init__(self, msg, lineno = None, line = None):
    15     def __init__(self, msg, lineno = None, line = None):
    26     HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
    34     HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
    27     TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
    35     TRANSL_POS_RE = regex.compile(r"^n|pron|adj|v|adv|prep|conj|num|int|phr\.v|abbr$")
    28     TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
    36     TRANSL_RE = regex.compile(r"^(ru|uk|la|en): ([\p{L}(][\p{L}\p{P}~ ]*)$")
    29     TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
    37     TRANSL_EX_RE = regex.compile(r"^(ru|uk|la|en)> (\p{L}.*)$")
    30 
    38 
       
    39     CONT_RE = regex.compile(r"^ +(.*)")
       
    40 
    31     TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
    41     TRAILING_SPACES_RE = regex.compile(r"\p{Z}+$")
       
    42 
       
    43     PRELUDE_NAME_RE = regex.compile(r"^name: (.*)")
       
    44     PRELUDE_URL_RE = regex.compile(r"^url: (.*)")
       
    45     PRELUDE_AUTHOR_RE = regex.compile(r"^by: (.*)")
       
    46     PRELUDE_LICENSE_RE = regex.compile(r"^term: (.*)")
       
    47     PRELUDE_ABOUT_RE = regex.compile(r"^about: ?(.*)")
    32 
    48 
    33     def __init__(self):
    49     def __init__(self):
    34         pass
    50         pass
    35 
    51 
    36     def readline(self):
    52     def readline(self):
    51                 self.parse_article()
    67                 self.parse_article()
    52         except ParseException as ex:
    68         except ParseException as ex:
    53             raise ParseException(ex.msg, self.lineno, self.line) from ex
    69             raise ParseException(ex.msg, self.lineno, self.line) from ex
    54         return self.dom
    70         return self.dom
    55 
    71 
       
    72     def parse_continuation(self):
       
    73         string = ""
       
    74         while True:
       
    75             self.readline()
       
    76             if self.eof:
       
    77                 return string
       
    78             m = CONT_RE.match(self.line)
       
    79             if m is not None:
       
    80                 string += "\n" + m.group(1)
       
    81             elif len(self.line) == 1:
       
    82                 string += "\n"
       
    83             else:
       
    84                 return string
       
    85 
    56     def parse_prelude(self):
    86     def parse_prelude(self):
    57         """Read dictionary prelude until first "__" delimiter."""
    87         """Read dictionary prelude until first "__" delimiter."""
       
    88         pre = Prelude()
    58         while True:
    89         while True:
    59             self.readline()
    90             self.readline()
    60             if self.eof:
    91             if self.eof:
    61                 raise ParseException("There are no articles...")
    92                 raise ParseException("There are no articles...")
       
    93             m = self.PRELUDE_ABOUT_RE.match(self.line)
       
    94             if m:
       
    95                 pre.about += m.group(1) + self.parse_continuation()
       
    96                 if self.eof:
       
    97                     raise ParseException("There are no articles...")
    62             if self.SEPARATOR_RE.match(self.line):
    98             if self.SEPARATOR_RE.match(self.line):
    63                 break
    99                 break
       
   100             m = self.PRELUDE_NAME_RE.match(self.line)
       
   101             if m:
       
   102                 pre.name = m.group(1)
       
   103                 continue
       
   104             m = self.PRELUDE_URL_RE.match(self.line)
       
   105             if m:
       
   106                 pre.urls.append(m.group(1))
       
   107                 continue
       
   108             m = self.PRELUDE_AUTHOR_RE.match(self.line)
       
   109             if m:
       
   110                 pre.authors.append(m.group(1))
       
   111                 continue
       
   112             m = self.PRELUDE_LICENSE_RE.match(self.line)
       
   113             if m:
       
   114                 pre.licences.append(m.group(1))
       
   115                 continue
       
   116         self.dom.append(pre)
    64 
   117 
    65     def parse_article(self):
   118     def parse_article(self):
    66         """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
   119         """Try to match article until next "__" delimiter. Assume that `self.line` point to "__" delimiter."""
    67         self.words = None
   120         self.words = None
    68         self.tran = None
   121         self.tran = None