py/gadict.py
changeset 406 f0ac87e10d9a
parent 402 b47698d5ccab
child 412 ece60575a96a
equal deleted inserted replaced
405:6208d07b30f0 406:f0ac87e10d9a
       
     1 """
       
     2 gadict dictionary format parser.
       
     3 """
     1 
     4 
     2 import regex
     5 import regex
     3 
     6 
     4 
     7 
     5 class Prelude:
     8 class Prelude:
       
     9     """Dictionary metainfo structure."""
     6     name = None
    10     name = None
     7     about = ""
    11     about = ""
     8     urls = []
    12     urls = []
     9     authors = []
    13     authors = []
    10     licences = []
    14     licences = []
    11 
    15 
    12 
    16 
    13 class ParseException(BaseException):
    17 class ParseException(BaseException):
    14 
    18 
    15     def __init__(self, msg, lineno = None, line = None):
    19     def __init__(self, msg, lineno=None, line=None):
       
    20         super().__init__()
    16         self.msg = msg
    21         self.msg = msg
    17         self.lineno = lineno
    22         self.lineno = lineno
    18         self.line = line
    23         self.line = line
    19 
    24 
    20     def __repr__(self):
    25     def __repr__(self):
    25         else:
    30         else:
    26             return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line)
    31             return ":{:d}: {:s}\nLINE: {:s}".format(self.lineno, self.msg, self.line)
    27 
    32 
    28 
    33 
    29 class Parser:
    34 class Parser:
       
    35     """gadict dictionary format parser."""
    30 
    36 
    31     SEPARATOR_RE = regex.compile(r"^__$")
    37     SEPARATOR_RE = regex.compile(r"^__$")
    32     HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
    38     HEADWORD_RE = regex.compile(r"^(\p{L}.*)$")
    33     HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$")
    39     HEADWORD_VAR_RE = regex.compile(r"^ +(s|pl|v[123]|male|female|comp|super)$")
    34     HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
    40     HEADWORD_PRON_RE = regex.compile(r"^ +\[([\p{L}' ]+)\]$")
    64         try:
    70         try:
    65             self.parse_prelude()
    71             self.parse_prelude()
    66             while not self.eof:
    72             while not self.eof:
    67                 self.parse_article()
    73                 self.parse_article()
    68         except ParseException as ex:
    74         except ParseException as ex:
    69             raise ParseException(ex.msg, self.lineno, self.line) from ex
    75             raise ParseException(ex.msg, self.lineno, self.line)
    70         return self.dom
    76         return self.dom
    71 
    77 
    72     def parse_continuation(self):
    78     def parse_continuation(self):
    73         string = ""
    79         string = ""
    74         while True:
    80         while True:
    75             self.readline()
    81             self.readline()
    76             if self.eof:
    82             if self.eof:
    77                 return string
    83                 return string
    78             m = CONT_RE.match(self.line)
    84             m = self.CONT_RE.match(self.line)
    79             if m is not None:
    85             if m is not None:
    80                 string += "\n" + m.group(1)
    86                 string += "\n" + m.group(1)
    81             elif len(self.line) == 1:
    87             elif len(self.line) == 1:
    82                 string += "\n"
    88                 string += "\n"
    83             else:
    89             else:
   201                 continue
   207                 continue
   202             raise ParseException("""Uknown syntax...""")
   208             raise ParseException("""Uknown syntax...""")
   203         if len(tr) > 0:
   209         if len(tr) > 0:
   204             senses.append((pos, tr, ex))
   210             senses.append((pos, tr, ex))
   205         self.tran = senses
   211         self.tran = senses
   206