# HG changeset patch # User Oleksandr Gavenko # Date 1477242082 -10800 # Node ID 6ad7203ac9dcaee91d168878678245d36dfe7be9 # Parent ec1c2838feae42582d5cdf967d21e8095bfd2808 Add support for hypernyms and hyponyms. diff -r ec1c2838feae -r 6ad7203ac9dc contrib/gadict.el --- a/contrib/gadict.el Sun Oct 23 19:57:02 2016 +0300 +++ b/contrib/gadict.el Sun Oct 23 20:01:22 2016 +0300 @@ -23,7 +23,7 @@ ;;; Code: (defconst gadict--art-lang-regex (regexp-opt '("en" "ru" "uk" "la"))) -(defconst gadict--art-rel-regex (regexp-opt '("ant" "syn" "rel" "topic"))) +(defconst gadict--art-rel-regex (regexp-opt '("ant" "syn" "rel" "topic" "hyper" "hypo"))) (defconst gadict--art-var-regex (regexp-opt '("v1" "v2" "v3" "s" "pl" "male" "female" "abbr" "comp" "super" "Am" "Br" "Au"))) (defconst gadict--art-pos-regex (regexp-opt '("n" "v" "adj" "adv" "pron" "prep" "num" "conj" "int" "phr" "phr.v" "contr" "abbr" "prefix"))) diff -r ec1c2838feae -r 6ad7203ac9dc py/gadict.py --- a/py/gadict.py Sun Oct 23 19:57:02 2016 +0300 +++ b/py/gadict.py Sun Oct 23 20:01:22 2016 +0300 @@ -45,7 +45,7 @@ class Sense: - def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None): + def __init__(self, pos, tr_list = None, ex_list = None, glos_list = None, ant_list = None, syn_list = None, rel_list = None, topic_list = None, hyper_list = None, hypo_list = None): if not pos: raise ParseException("Part of speech expected...\n") self.pos = pos @@ -56,6 +56,8 @@ self.syn_list = syn_list self.rel_list = rel_list self.topic_list = topic_list + self.hyper_list = hyper_list + self.hypo_list = hypo_list def add_tr(self, tr): if self.tr_list: @@ -99,6 +101,18 @@ else: self.topic_list = [topic] + def add_hyper(self, hyper): + if self.hyper_list: + self.hyper_list.append(hyper) + else: + self.hyper_list = [hyper] + + def add_hypo(self, hypo): + if self.hypo_list: + self.hypo_list.append(hypo) + else: + self.hypo_list = [hypo] + def __str__(self): if tr_list: (lang, text) = self.tr_list[0] @@ -118,12 +132,14 @@ HEADWORD_PRON_RE = regex.compile(u"^ +\\[([\p{L}' ]+)\\]$") TRANSL_POS_RE = regex.compile(u"^(?:n|det|pron|adj|v|adv|prep|conj|num|int|phr|phr\\.v|contr|abbr|prefix)$") TRANSL_RE = regex.compile(u"^(ru|uk|la|en): ([\\p{L}(].*)$") - TRANSL_EX_RE = regex.compile(u"^(ru|uk|la|en)> ([-\\p{L}].*)$") + TRANSL_EX_RE = regex.compile(u"""^(ru|uk|la|en)> ([-'"\\p{L}].*)$""") TRANSL_GLOS_RE = regex.compile(u"^(ru|uk|la|en)= ([-\\p{L}].*)$") TOPIC_RE = regex.compile(u"^topic: (\\p{L}.*)$") SYN_RE = regex.compile(u"^syn: (\\p{L}.*)$") ANT_RE = regex.compile(u"^ant: (\\p{L}.*)$") REL_RE = regex.compile(u"^rel: (\\p{L}.*)$") + HYPER_RE = regex.compile(u"^hyper: (\\p{L}.*)$") + HYPO_RE = regex.compile(u"^hypo: (\\p{L}.*)$") CONT_RE = regex.compile(u"^ +(.*)") @@ -340,6 +356,24 @@ raise ParseException("""Empty relation...""") sense.add_rel(rel) continue + m = self.HYPER_RE.match(self.line) + if m is not None: + hypers = m.group(1).split(";") + for hyper in hypers: + hyper = hyper.strip() + if len(hyper) == 0: + raise ParseException("""Empty hypernym...""") + sense.add_hyper(hyper) + continue + m = self.HYPO_RE.match(self.line) + if m is not None: + hypos = m.group(1).split(";") + for hypo in hypos: + hypo = hypo.strip() + if len(hypo) == 0: + raise ParseException("""Empty hyponym...""") + sense.add_hypo(hypo) + continue m = self.TRANSL_RE.match(self.line) if m is not None: sense.add_tr((m.group(1), m.group(2) + self.parse_translation_continuation())) diff -r ec1c2838feae -r 6ad7203ac9dc py/gadict_c5.py --- a/py/gadict_c5.py Sun Oct 23 19:57:02 2016 +0300 +++ b/py/gadict_c5.py Sun Oct 23 20:01:22 2016 +0300 @@ -102,6 +102,18 @@ FOUT.write(" syn: ") FOUT.write("; ".join(["{"+s+"}" for s in sense.syn_list])) need_sep = True + if sense.hyper_list and len(sense.hyper_list) > 0: + if need_sep: + FOUT.write(" |") + FOUT.write(" hyper: ") + FOUT.write("; ".join(["{"+s+"}" for s in sense.hyper_list])) + need_sep = True + if sense.hypo_list and len(sense.hypo_list) > 0: + if need_sep: + FOUT.write(" |") + FOUT.write(" hypo: ") + FOUT.write("; ".join(["{"+s+"}" for s in sense.hypo_list])) + need_sep = True if sense.rel_list and len(sense.rel_list) > 0: if need_sep: FOUT.write(" |") diff -r ec1c2838feae -r 6ad7203ac9dc py/gadict_srs_anki.py --- a/py/gadict_srs_anki.py Sun Oct 23 19:57:02 2016 +0300 +++ b/py/gadict_srs_anki.py Sun Oct 23 20:01:22 2016 +0300 @@ -103,6 +103,9 @@ color: red; font-style: italic; } +.topic { + color: #B04080; +} .ant { color: #404080; } @@ -110,7 +113,13 @@ color: #804040; } .rel { - color: #804080; + color: #604080; +} +.hyper { + color: #600080; +} +.hypo { + color: #606080; } .attrs { color: blue; @@ -192,6 +201,16 @@ buf.append(" syn: ") buf.append("; ".join(sense.syn_list)) buf.append("") + if sense.hyper_list and len(sense.hyper_list) > 0: + have_ref = True + buf.append(" hyper: ") + buf.append("; ".join(sense.hyper_list)) + buf.append("") + if sense.hypo_list and len(sense.hypo_list) > 0: + have_ref = True + buf.append(" hypo: ") + buf.append("; ".join(sense.hypo_list)) + buf.append("") if sense.rel_list and len(sense.rel_list) > 0: have_ref = True buf.append(" rel: ") diff -r ec1c2838feae -r 6ad7203ac9dc www/HACKING.rst --- a/www/HACKING.rst Sun Oct 23 19:57:02 2016 +0300 +++ b/www/HACKING.rst Sun Oct 23 20:01:22 2016 +0300 @@ -118,9 +118,16 @@ * ``music`` * ``meal``, ``office``, etc * ``size``, ``shape``, ``age``, ``color`` +* ``archaic`` - old fashioned, no longer used -Synonyms marked by ``syn:``, antonyms marked by ``ant:``, related (see also) -terms marked by ``rel:``, topics/tags marked by ``topic:``. +Word relations: + +* ``syn:`` - synonyms +* ``ant:`` - antonyms +* ``hyper:`` - hypernyms +* ``hypo:`` - hyponyms +* ``rel:`` - related (see also) terms +* ``topic:`` - topics/tags Translation marked by lowercase ISO 639-1 code with ``:`` (colon) character, like: