Add handling of hex charrefs.
authorOleksandr Gavenko <gavenkoa@gmail.com>
Sat, 07 Jan 2017 01:10:02 +0200
changeset 1449 61455bf4fb87
parent 1448 6e3c4ea65439
child 1450 44a6e8153899
Add handling of hex charrefs.
.emacs-my
--- a/.emacs-my	Sat Nov 19 18:06:38 2016 +0200
+++ b/.emacs-my	Sat Jan 07 01:10:02 2017 +0200
@@ -3166,21 +3166,31 @@
     ))
 
 (defun my-html-charref-to-string (html)
+  "Return string with replaced decimal/hex and string charrefs by
+correcponding UTF-8 symbol."
   (let (str)
     (with-temp-buffer
       (insert html)
       (goto-char (point-min))
-      (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|\\(lt\\|gt\\|amp\\)\\);" nil t)
-        (setq str (or (match-string 1) (match-string 2)))
+      (while (search-forward-regexp "&\\(?:#\\([[:digit:]]+\\)\\|#x\\([[:xdigit:]]+\\)\\|\\(lt\\|gt\\|amp\\|quot\\)\\);" nil t)
+        (setq str (or (match-string 1) (match-string 2) (match-string 3)))
         (cond
-         ((equal str "lt")
-          (replace-match "<" t t))
-         ((equal str "gt")
-          (replace-match ">" t t))
-         ((equal str "amp")
-          (replace-match "&" t t))
-         ((> (string-to-number str) 0)
-          (replace-match (string (string-to-number str 10)) t t))))
+         ((match-string 1)
+          (when (> (string-to-number str 10) 0)
+            (replace-match (string (string-to-number str 10)) t t)))
+         ((match-string 2)
+          (when (> (string-to-number str 16) 0)
+            (replace-match (string (string-to-number str 16)) t t)))
+         (t
+          (cond
+           ((equal str "lt")
+            (replace-match "<" t t))
+           ((equal str "gt")
+            (replace-match ">" t t))
+           ((equal str "quot")
+            (replace-match "\"" t t))
+           ((equal str "amp")
+            (replace-match "&" t t)))) ))
       (buffer-string))))
 
 (defun my-html-charref-unescape-region (begin end &optional prefix)