Add handling of hex charrefs.
--- a/.emacs-my Sat Nov 19 18:06:38 2016 +0200
+++ b/.emacs-my Sat Jan 07 01:10:02 2017 +0200
@@ -3166,21 +3166,31 @@
))
(defun my-html-charref-to-string (html)
+ "Return string with replaced decimal/hex and string charrefs by
+correcponding UTF-8 symbol."
(let (str)
(with-temp-buffer
(insert html)
(goto-char (point-min))
- (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|\\(lt\\|gt\\|amp\\)\\);" nil t)
- (setq str (or (match-string 1) (match-string 2)))
+ (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|#x\\([[:xdigit:]]+\\)\\|\\(lt\\|gt\\|amp\\|quot\\)\\);" nil t)
+ (setq str (or (match-string 1) (match-string 2) (match-string 3)))
(cond
- ((equal str "lt")
- (replace-match "<" t t))
- ((equal str "gt")
- (replace-match ">" t t))
- ((equal str "amp")
- (replace-match "&" t t))
- ((> (string-to-number str) 0)
- (replace-match (string (string-to-number str 10)) t t))))
+ ((match-string 1)
+ (when (> (string-to-number str 10) 0)
+ (replace-match (string (string-to-number str 10)) t t)))
+ ((match-string 2)
+ (when (> (string-to-number str 16) 0)
+ (replace-match (string (string-to-number str 16)) t t)))
+ (t
+ (cond
+ ((equal str "lt")
+ (replace-match "<" t t))
+ ((equal str "gt")
+ (replace-match ">" t t))
+ ((equal str "quot")
+ (replace-match "\"" t t))
+ ((equal str "amp")
+ (replace-match "&" t t)))) ))
(buffer-string))))
(defun my-html-charref-unescape-region (begin end &optional prefix)