emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] /srv/bzr/emacs/trunk r102600: Decode extra numeric entitie


From: Katsumi Yamaoka
Subject: [Emacs-diffs] /srv/bzr/emacs/trunk r102600: Decode extra numeric entities.
Date: Tue, 07 Dec 2010 05:06:56 +0000
User-agent: Bazaar (2.0.3)

------------------------------------------------------------
revno: 102600
committer: Katsumi Yamaoka <address@hidden>
branch nick: trunk
timestamp: Tue 2010-12-07 05:06:56 +0000
message:
  Decode extra numeric entities.
  
  mm-util.el (mm-extra-numeric-entities): New variable.
  mm-url.el (mm-url-decode-entities):
  mm-decode.el (mm-shr): Use it to decode extra numeric entities.
modified:
  lisp/gnus/ChangeLog
  lisp/gnus/mm-decode.el
  lisp/gnus/mm-url.el
  lisp/gnus/mm-util.el
=== modified file 'lisp/gnus/ChangeLog'
--- a/lisp/gnus/ChangeLog       2010-12-07 02:01:00 +0000
+++ b/lisp/gnus/ChangeLog       2010-12-07 05:06:56 +0000
@@ -1,3 +1,10 @@
+2010-12-07  Katsumi Yamaoka  <address@hidden>
+
+       * mm-util.el (mm-extra-numeric-entities): New variable.
+
+       * mm-url.el (mm-url-decode-entities):
+       * mm-decode.el (mm-shr): Use it to decode extra numeric entities.
+
 2010-12-07  Stefan Monnier  <address@hidden>
 
        * message.el: Use completion-at-point.

=== modified file 'lisp/gnus/mm-decode.el'
--- a/lisp/gnus/mm-decode.el    2010-11-19 04:55:16 +0000
+++ b/lisp/gnus/mm-decode.el    2010-12-07 05:06:56 +0000
@@ -1699,7 +1699,7 @@
                                  (when handle
                                    (mm-with-part handle
                                      (buffer-string))))))
-       shr-inhibit-images shr-blocked-images charset)
+       shr-inhibit-images shr-blocked-images charset char)
     (if (and (boundp 'gnus-summary-buffer)
             (buffer-name gnus-summary-buffer))
        (with-current-buffer gnus-summary-buffer
@@ -1714,13 +1714,25 @@
       (narrow-to-region (point) (point))
       (shr-insert-document
        (mm-with-part handle
-        (when (and charset
-                   (setq charset (mm-charset-to-coding-system charset))
-                   (not (eq charset 'ascii)))
-          (insert (prog1
-                      (mm-decode-coding-string (buffer-string) charset)
-                    (erase-buffer)
-                    (mm-enable-multibyte))))
+        (insert (prog1
+                    (if (and charset
+                             (setq charset
+                                   (mm-charset-to-coding-system charset))
+                             (not (eq charset 'ascii)))
+                        (mm-decode-coding-string (buffer-string) charset)
+                      (mm-string-as-multibyte (buffer-string)))
+                  (erase-buffer)
+                  (mm-enable-multibyte)))
+        (goto-char (point-min))
+        (setq case-fold-search t)
+        (while (re-search-forward
+                "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
+          (when (setq char
+                      (cdr (assq (if (match-beginning 1)
+                                     (string-to-number (match-string 1) 16)
+                                   (string-to-number (match-string 2)))
+                                 mm-extra-numeric-entities)))
+            (replace-match (char-to-string char))))
         (libxml-parse-html-region (point-min) (point-max))))
       (mm-handle-set-undisplayer
        handle

=== modified file 'lisp/gnus/mm-url.el'
--- a/lisp/gnus/mm-url.el       2010-09-02 00:55:51 +0000
+++ b/lisp/gnus/mm-url.el       2010-12-07 05:06:56 +0000
@@ -365,16 +365,19 @@
 (defun mm-url-decode-entities ()
   "Decode all HTML entities."
   (goto-char (point-min))
-  (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" nil 
t)
+  (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);"
+                           nil t)
     (let* ((entity (match-string 1))
           (elem (if (eq (aref entity 0) ?\#)
-                    (let ((c (mm-ucs-to-char
-                              ;; Hex number: &#x3212
-                              (if (eq (aref entity 1) ?x)
-                                  (string-to-number (substring entity 2)
-                                                    16)
-                                ;; Decimal number: &#23
-                                (string-to-number (substring entity 1))))))
+                    (let ((c
+                           ;; Hex number: &#x3212
+                           (if (eq (aref entity 1) ?x)
+                               (string-to-number (substring entity 2)
+                                                 16)
+                             ;; Decimal number: &#23
+                             (string-to-number (substring entity 1)))))
+                      (setq c (or (cdr (assq c mm-extra-numeric-entities))
+                                  (mm-ucs-to-char c)))
                       (if (mm-char-or-char-int-p c) c ?#))
                   (or (cdr (assq (intern entity)
                                  mm-url-html-entities))

=== modified file 'lisp/gnus/mm-util.el'
--- a/lisp/gnus/mm-util.el      2010-12-02 22:21:31 +0000
+++ b/lisp/gnus/mm-util.el      2010-12-07 05:06:56 +0000
@@ -866,6 +866,21 @@
 Setting it to nil is useful on Emacsen supporting Unicode if sending
 mail with multiple parts is preferred to sending a Unicode one.")
 
+(defvar mm-extra-numeric-entities
+  (mapcar
+   (lambda (item)
+     (cons (car item) (mm-ucs-to-char (cdr item))))
+   '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E)
+     (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6)
+     (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152)
+     (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C)
+     (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014)
+     (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A)
+     (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178)))
+  "*Alist of extra numeric entities and characters other than ISO 10646.
+This table is used for decoding extra numeric entities to characters,
+like \"&#128;\" to the euro sign, mainly in html messages.")
+
 ;;; Internal variables:
 
 ;;; Functions:


reply via email to

[Prev in Thread] Current Thread [Next in Thread]