emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/international/utf-8.el


From: Kenichi Handa
Subject: [Emacs-diffs] Changes to emacs/lisp/international/utf-8.el
Date: Sat, 12 Jun 2004 20:24:32 -0400

Index: emacs/lisp/international/utf-8.el
diff -c emacs/lisp/international/utf-8.el:1.38 
emacs/lisp/international/utf-8.el:1.39
*** emacs/lisp/international/utf-8.el:1.38      Sat Jun 12 02:10:37 2004
--- emacs/lisp/international/utf-8.el   Sun Jun 13 00:21:39 2004
***************
*** 395,444 ****
    ;; Thus magnification factor is two.
    ;;
    `(2
!     ((r0 = -1)
       (loop
-       (if (r0 < 0)
-         (read r0))
        (if (r0 < #x80)
          ;; 1-byte encoding, i.e., ascii
!         ((write r0)
!          (r0 = -1)
!          (repeat)))
!       (if (r0 < #xc0)             ; continuation byte (invalid here)
          ((call ccl-mule-utf-untrans)
!          (r0 = -1)
           (repeat)))
  
        ;; Read the 2nd byte.
-       (r1 = -1)
        (read r1)
        (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
          ((call ccl-mule-utf-untrans)
           ;; Handle it in the next loop.
           (r0 = r1)
           (repeat)))
  
        (if (r0 < #xe0)
          ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
!         ((r2 = ((r0 & #x1F) << 6))
!          (r2 |= (r1 & #x3F))
!          ;; Now r2 holds scalar value
! 
!          (if (r2 < 128)       ; `overlong sequence'
!              ((call ccl-mule-utf-untrans)
!               (r0 = r1)
!               (call ccl-mule-utf-untrans)
!               (r0 = -1)
!               (repeat)))
  
!          (r1 = r2)
!          (if (r1 < 160)
!              ;; eight-bit-control
!              (r0 = ,(charset-id 'eight-bit-control))
!            (if (r1 < 256)
!                ;; latin-iso8859-1
!                ((r0 = ,(charset-id 'latin-iso8859-1))
!                 (r1 -= 128))
               ;; mule-unicode-0100-24ff (< 0800)
               ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
                (r1 -= #x0100)
--- 395,431 ----
    ;; Thus magnification factor is two.
    ;;
    `(2
!     ((r6 = ,(charset-id 'latin-iso8859-1))
!      (read r0)
       (loop
        (if (r0 < #x80)
          ;; 1-byte encoding, i.e., ascii
!         (write-read-repeat r0))
!       (if (r0 < #xc2)
!         ;; continuation byte (invalid here) or 1st byte of overlong
!         ;; 2-byte sequence.
          ((call ccl-mule-utf-untrans)
!          (r6 = ,(charset-id 'latin-iso8859-1))
!          (read r0)
           (repeat)))
  
        ;; Read the 2nd byte.
        (read r1)
        (if ((r1 & #b11000000) != #b10000000) ; Invalid 2nd byte
          ((call ccl-mule-utf-untrans)
+          (r6 = ,(charset-id 'latin-iso8859-1))
           ;; Handle it in the next loop.
           (r0 = r1)
           (repeat)))
  
        (if (r0 < #xe0)
          ;; 2-byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
!         ((r1 &= #x3F)
!          (r1 |= ((r0 & #x1F) << 6))
!          ;; Now r2 holds scalar value.  We don't have to check
!          ;; `overlong sequence' because r0 >= 0xC2.
  
!          (if (r1 >= 256)
               ;; mule-unicode-0100-24ff (< 0800)
               ((r0 = ,(charset-id 'mule-unicode-0100-24ff))
                (r1 -= #x0100)
***************
*** 446,463 ****
                (r1 %= 96)
                (r1 += (r2 + 32))
                (translate-character
!                utf-translation-table-for-decode r0 r1))))
!          (write-multibyte-character r0 r1)
!          (r0 = -1)
!          (repeat)))
  
        ;; Read the 3rd bytes.
-       (r2 = -1)
        (read r2)
        (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
          ((call ccl-mule-utf-untrans)
           (r0 = r1)
           (call ccl-mule-utf-untrans)
           ;; Handle it in the next loop.
           (r0 = r2)
           (repeat)))
--- 433,461 ----
                (r1 %= 96)
                (r1 += (r2 + 32))
                (translate-character
!                utf-translation-table-for-decode r0 r1)
!               (write-multibyte-character r0 r1)
!               (read r0)
!               (repeat))
!            (if (r1 >= 160)
!                ;; latin-iso8859-1
!                ((r1 -= 128)
!                 (write-multibyte-character r6 r1)
!                 (read r0)
!                 (repeat))
!              ;; eight-bit-control
!              ((r0 = ,(charset-id 'eight-bit-control))
!               (write-multibyte-character r0 r1)
!               (read r0)
!               (repeat))))))
  
        ;; Read the 3rd bytes.
        (read r2)
        (if ((r2 & #b11000000) != #b10000000) ; Invalid 3rd byte
          ((call ccl-mule-utf-untrans)
           (r0 = r1)
           (call ccl-mule-utf-untrans)
+          (r6 = ,(charset-id 'latin-iso8859-1))
           ;; Handle it in the next loop.
           (r0 = r2)
           (repeat)))
***************
*** 475,481 ****
                (call ccl-mule-utf-untrans)
                (r0 = r2)
                (call ccl-mule-utf-untrans)
!               (r0 = -1)
                (repeat)))
  
           (if (r3 < #x2500)
--- 473,480 ----
                (call ccl-mule-utf-untrans)
                (r0 = r2)
                (call ccl-mule-utf-untrans)
!               (r6 = ,(charset-id 'latin-iso8859-1))
!               (read r0)
                (repeat)))
  
           (if (r3 < #x2500)
***************
*** 488,494 ****
                (translate-character
                 utf-translation-table-for-decode r0 r1)
                (write-multibyte-character r0 r1)
!               (r0 = -1)
                (repeat)))
  
           (if (r3 < #x3400)
--- 487,493 ----
                (translate-character
                 utf-translation-table-for-decode r0 r1)
                (write-multibyte-character r0 r1)
!               (read r0)
                (repeat)))
  
           (if (r3 < #x3400)
***************
*** 502,508 ****
                     (r1 = (r7 + 32))
                     (r1 += ((r3 + 32) << 7))))
                (write-multibyte-character r0 r1)
!               (r0 = -1)
                (repeat)))
  
           (if (r3 < #xE000)
--- 501,507 ----
                     (r1 = (r7 + 32))
                     (r1 += ((r3 + 32) << 7))))
                (write-multibyte-character r0 r1)
!               (read r0)
                (repeat)))
  
           (if (r3 < #xE000)
***************
*** 512,521 ****
                (lookup-integer utf-subst-table-for-decode r3 r1)
                (if r7
                    ;; got a translation
!                   (write-multibyte-character r3 r1)
!                 (call ccl-mule-utf-untrans))
!               (r0 = -1)
!               (repeat)))
  
           ;; mule-unicode-e000-ffff
           ;; Fixme: fffe and ffff are invalid.
--- 511,523 ----
                (lookup-integer utf-subst-table-for-decode r3 r1)
                (if r7
                    ;; got a translation
!                   ((write-multibyte-character r3 r1)
!                    (read r0)
!                    (repeat))
!                 ((call ccl-mule-utf-untrans)
!                  (r6 = ,(charset-id 'latin-iso8859-1))
!                  (read r0)
!                  (repeat)))))
  
           ;; mule-unicode-e000-ffff
           ;; Fixme: fffe and ffff are invalid.
***************
*** 528,548 ****
                (r1 = (r7 + 32))
                (r1 += ((r3 + 32) << 7))))
           (write-multibyte-character r0 r1)
!          (r0 = -1)
           (repeat)))
  
        ;; Read the 4th bytes.
-       (r3 = -1)
        (read r3)
        (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
          ((call ccl-mule-utf-untrans)
           (r0 = r1)
           (call ccl-mule-utf-untrans)
           ;; Handle it in the next loop.
           (r0 = r3)
           (repeat)))
  
!       (if (r3 < #xF8)
          ;; 4-byte encoding:
          ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
          ;; keep those bytes as eight-bit-{control|graphic}
--- 530,552 ----
                (r1 = (r7 + 32))
                (r1 += ((r3 + 32) << 7))))
           (write-multibyte-character r0 r1)
!          (read r0)
           (repeat)))
  
        ;; Read the 4th bytes.
        (read r3)
        (if ((r3 & #b11000000) != #b10000000) ; Invalid 4th byte
          ((call ccl-mule-utf-untrans)
           (r0 = r1)
           (call ccl-mule-utf-untrans)
+          (r0 = r2)
+          (call ccl-mule-utf-untrans)
+          (r6 = ,(charset-id 'latin-iso8859-1))
           ;; Handle it in the next loop.
           (r0 = r3)
           (repeat)))
  
!       (if (r0 < #xF8)
          ;; 4-byte encoding:
          ;; wwwzzzzzzyyyyyyxxxxxx = 11110www 10zzzzzz 10yyyyyy 10xxxxxx
          ;; keep those bytes as eight-bit-{control|graphic}
***************
*** 561,581 ****
                (r0 = r3)
                (call ccl-mule-utf-untrans))
             ((r0 = r4)
!             (call ccl-mule-utf-untrans)))
!          (r0 = -1)
!          (repeat)))
  
!       ;; Unsupported sequence.
!       (call ccl-mule-utf-untrans)
!       (r0 = r1)
!       (call ccl-mule-utf-untrans)
!       (r0 = r2)
!       (call ccl-mule-utf-untrans)
!       (r0 = r3)
!       (call ccl-mule-utf-untrans)
!       (r0 = -1)
        (repeat)))
  
      ;; At EOF...
      (if (r0 >= 0)
        ;; r0 >= #x80
--- 565,585 ----
                (r0 = r3)
                (call ccl-mule-utf-untrans))
             ((r0 = r4)
!             (call ccl-mule-utf-untrans))))
  
!       ;; Unsupported sequence.
!       ((call ccl-mule-utf-untrans)
!        (r0 = r1)
!        (call ccl-mule-utf-untrans)
!        (r0 = r2)
!        (call ccl-mule-utf-untrans)
!        (r0 = r3)
!        (call ccl-mule-utf-untrans)))
!       (r6 = ,(charset-id 'latin-iso8859-1))
!       (read r0)
        (repeat)))
  
+ 
      ;; At EOF...
      (if (r0 >= 0)
        ;; r0 >= #x80
***************
*** 786,792 ****
          (if (r0 < #xF0)               ; 3-byte encoding, as above
              ((r0 = ((r0 & #xF) << 12))
               (r0 |= ((r1 & #x3F) << 6))
!              (r0 |= (r1 & #x3F))
               (r1 = 3))
            (if (r3 == 0)
                (r1 = 0)
--- 790,796 ----
          (if (r0 < #xF0)               ; 3-byte encoding, as above
              ((r0 = ((r0 & #xF) << 12))
               (r0 |= ((r1 & #x3F) << 6))
!              (r0 |= (r2 & #x3F))
               (r1 = 3))
            (if (r3 == 0)
                (r1 = 0)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]