# HG changeset patch # User Oleksandr Gavenko # Date 1483744202 -7200 # Node ID 61455bf4fb87cc9199fac716e468c4175c81cfe2 # Parent 6e3c4ea65439c1698294b1f9229e90ed357c987b Add handling of hex charrefs. diff -r 6e3c4ea65439 -r 61455bf4fb87 .emacs-my --- a/.emacs-my Sat Nov 19 18:06:38 2016 +0200 +++ b/.emacs-my Sat Jan 07 01:10:02 2017 +0200 @@ -3166,21 +3166,31 @@ )) (defun my-html-charref-to-string (html) + "Return string with replaced decimal/hex and string charrefs by +correcponding UTF-8 symbol." (let (str) (with-temp-buffer (insert html) (goto-char (point-min)) - (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|\\(lt\\|gt\\|amp\\)\\);" nil t) - (setq str (or (match-string 1) (match-string 2))) + (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|#x\\([[:xdigit:]]+\\)\\|\\(lt\\|gt\\|amp\\|quot\\)\\);" nil t) + (setq str (or (match-string 1) (match-string 2) (match-string 3))) (cond - ((equal str "lt") - (replace-match "<" t t)) - ((equal str "gt") - (replace-match ">" t t)) - ((equal str "amp") - (replace-match "&" t t)) - ((> (string-to-number str) 0) - (replace-match (string (string-to-number str 10)) t t)))) + ((match-string 1) + (when (> (string-to-number str 10) 0) + (replace-match (string (string-to-number str 10)) t t))) + ((match-string 2) + (when (> (string-to-number str 16) 0) + (replace-match (string (string-to-number str 16)) t t))) + (t + (cond + ((equal str "lt") + (replace-match "<" t t)) + ((equal str "gt") + (replace-match ">" t t)) + ((equal str "quot") + (replace-match "\"" t t)) + ((equal str "amp") + (replace-match "&" t t)))) )) (buffer-string)))) (defun my-html-charref-unescape-region (begin end &optional prefix)