LCOV - code coverage report
Current view: top level - lisp/international - mule.el (source / functions) Hit Total Coverage
Test: tramp-tests-after.info Lines: 369 1073 34.4 %
Date: 2017-08-30 10:12:24 Functions: 22 70 31.4 %

          Line data    Source code
       1             : ;;; mule.el --- basic commands for multilingual environment
       2             : 
       3             : ;; Copyright (C) 1997-2017 Free Software Foundation, Inc.
       4             : ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
       5             : ;;   2005, 2006, 2007, 2008, 2009, 2010, 2011
       6             : ;;   National Institute of Advanced Industrial Science and Technology (AIST)
       7             : ;;   Registration Number H14PRO021
       8             : ;; Copyright (C) 2003
       9             : ;;   National Institute of Advanced Industrial Science and Technology (AIST)
      10             : ;;   Registration Number H13PRO009
      11             : 
      12             : ;; Keywords: mule, multilingual, character set, coding system
      13             : 
      14             : ;; This file is part of GNU Emacs.
      15             : 
      16             : ;; GNU Emacs is free software: you can redistribute it and/or modify
      17             : ;; it under the terms of the GNU General Public License as published by
      18             : ;; the Free Software Foundation, either version 3 of the License, or
      19             : ;; (at your option) any later version.
      20             : 
      21             : ;; GNU Emacs is distributed in the hope that it will be useful,
      22             : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
      23             : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      24             : ;; GNU General Public License for more details.
      25             : 
      26             : ;; You should have received a copy of the GNU General Public License
      27             : ;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
      28             : 
      29             : ;;; Commentary:
      30             : 
      31             : ;;; Code:
      32             : 
      33             : ;; FIXME?  Are these still relevant?  Nothing uses them AFAICS.
      34             : (defconst mule-version "6.0 (HANACHIRUSATO)" "\
      35             : Version number and name of this version of MULE (multilingual environment).")
      36             : 
      37             : (defconst mule-version-date "2003.9.1" "\
      38             : Distribution date of this version of MULE (multilingual environment).")
      39             : 
      40             : 
      41             : ;;; CHARSET
      42             : 
      43             : ;; Backward compatibility code for handling emacs-mule charsets.
      44             : (defvar private-char-area-1-min #xF0000)
      45             : (defvar private-char-area-1-max #xFFFFE)
      46             : (defvar private-char-area-2-min #x100000)
      47             : (defvar private-char-area-2-max #x10FFFE)
      48             : 
      49             : ;; Table of emacs-mule charsets indexed by their emacs-mule ID.
      50             : (defvar emacs-mule-charset-table (make-vector 256 nil))
      51             : (aset emacs-mule-charset-table 0 'ascii)
      52             : 
      53             : ;; Convert the argument of old-style call of define-charset to a
      54             : ;; property list used by the new-style.
      55             : ;; INFO-VECTOR is a vector of the format:
      56             : ;;   [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
      57             : ;;    SHORT-NAME LONG-NAME DESCRIPTION]
      58             : 
      59             : (defun convert-define-charset-argument (emacs-mule-id info-vector)
      60           0 :   (let* ((dim (aref info-vector 0))
      61           0 :          (chars (aref info-vector 1))
      62           0 :          (total (if (= dim 1) chars (* chars chars)))
      63           0 :          (code-space (if (= dim 1) (if (= chars 96) [32 127] [33 126])
      64           0 :                        (if (= chars 96) [32 127 32 127] [33 126 33 126])))
      65             :          code-offset)
      66           0 :     (if (integerp emacs-mule-id)
      67           0 :         (or (= emacs-mule-id 0)
      68           0 :             (and (>= emacs-mule-id 129) (< emacs-mule-id 256))
      69           0 :             (error "Invalid CHARSET-ID: %d" emacs-mule-id))
      70           0 :       (let (from-id to-id)
      71           0 :         (if (= dim 1) (setq from-id 160 to-id 224)
      72           0 :           (setq from-id 224 to-id 255))
      73           0 :         (while (and (< from-id to-id)
      74           0 :                     (not (aref emacs-mule-charset-table from-id)))
      75           0 :           (setq from-id (1+ from-id)))
      76           0 :         (if (= from-id to-id)
      77           0 :             (error "No more room for the new Emacs-mule charset"))
      78           0 :         (setq emacs-mule-id from-id)))
      79           0 :     (if (> (- private-char-area-1-max private-char-area-1-min) total)
      80           0 :         (setq code-offset private-char-area-1-min
      81           0 :               private-char-area-1-min (+ private-char-area-1-min total))
      82           0 :       (if (> (- private-char-area-2-max private-char-area-2-min) total)
      83           0 :           (setq code-offset private-char-area-2-min
      84           0 :                 private-char-area-2-min (+ private-char-area-2-min total))
      85           0 :         (error "No more space for a new charset")))
      86           0 :     (list :dimension dim
      87           0 :           :code-space code-space
      88           0 :           :iso-final-char (aref info-vector 4)
      89           0 :           :code-offset code-offset
      90           0 :           :emacs-mule-id emacs-mule-id)))
      91             : 
      92             : (defun define-charset (name docstring &rest props)
      93             :   "Define NAME (symbol) as a charset with DOCSTRING.
      94             : The remaining arguments must come in pairs ATTRIBUTE VALUE.  ATTRIBUTE
      95             : may be any symbol.  The following have special meanings, and one of
      96             : `:code-offset', `:map', `:subset', `:superset' must be specified.
      97             : 
      98             : `:short-name'
      99             : 
     100             : VALUE must be a short string to identify the charset.  If omitted,
     101             : NAME is used.
     102             : 
     103             : `:long-name'
     104             : 
     105             : VALUE must be a string longer than `:short-name' to identify the
     106             : charset.  If omitted, the value of the `:short-name' attribute is used.
     107             : 
     108             : `:dimension'
     109             : 
     110             : VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of
     111             : code-points of the charsets.  If omitted, it is calculated from the
     112             : value of the `:code-space' attribute.
     113             : 
     114             : `:code-space'
     115             : 
     116             : VALUE must be a vector of length at most 8 specifying the byte code
     117             : range of each dimension in this format:
     118             :         [ MIN-1 MAX-1 MIN-2 MAX-2 ... ]
     119             : where MIN-N is the minimum byte value of Nth dimension of code-point,
     120             : MAX-N is the maximum byte value of that.
     121             : 
     122             : `:min-code'
     123             : 
     124             : VALUE must be an integer specifying the minimum code point of the
     125             : charset.  If omitted, it is calculated from `:code-space'.  VALUE may
     126             : be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
     127             : the code point and LOW is the least significant 16 bits.
     128             : 
     129             : `:max-code'
     130             : 
     131             : VALUE must be an integer specifying the maximum code point of the
     132             : charset.  If omitted, it is calculated from `:code-space'.  VALUE may
     133             : be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of
     134             : the code point and LOW is the least significant 16 bits.
     135             : 
     136             : `:iso-final-char'
     137             : 
     138             : VALUE must be a character in the range 32 to 127 (inclusive)
     139             : specifying the final char of the charset for ISO-2022 encoding.  If
     140             : omitted, the charset can't be encoded by ISO-2022 based
     141             : coding-systems.
     142             : 
     143             : `:iso-revision-number'
     144             : 
     145             : VALUE must be an integer in the range 0..63, specifying the revision
     146             : number of the charset for ISO-2022 encoding.
     147             : 
     148             : `:emacs-mule-id'
     149             : 
     150             : VALUE must be an integer of 0, 129..255.  If omitted, the charset
     151             : can't be encoded by coding-systems of type `emacs-mule'.
     152             : 
     153             : `:ascii-compatible-p'
     154             : 
     155             : VALUE must be nil or t (default nil).  If VALUE is t, the charset is
     156             : compatible with ASCII, i.e. the first 128 code points map to ASCII.
     157             : 
     158             : `:supplementary-p'
     159             : 
     160             : VALUE must be nil or t.  If the VALUE is t, the charset is
     161             : supplementary, which means it is used only as a parent or a
     162             : subset of some other charset, or it is provided just for backward
     163             : compatibility.
     164             : 
     165             : `:invalid-code'
     166             : 
     167             : VALUE must be a nonnegative integer that can be used as an invalid
     168             : code point of the charset.  If the minimum code is 0 and the maximum
     169             : code is greater than Emacs's maximum integer value, `:invalid-code'
     170             : should not be omitted.
     171             : 
     172             : `:code-offset'
     173             : 
     174             : VALUE must be an integer added to the index number of a character to
     175             : get the corresponding character code.
     176             : 
     177             : `:map'
     178             : 
     179             : VALUE must be vector or string.
     180             : 
     181             : If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
     182             : where CODE-n is a code-point of the charset, and CHAR-n is the
     183             : corresponding character code.
     184             : 
     185             : If it is a string, it is a name of file that contains the above
     186             : information.   Each line of the file must be this format:
     187             :         0xXXX 0xYYY
     188             : where XXX is a hexadecimal representation of CODE-n and YYY is a
     189             : hexadecimal representation of CHAR-n.  A line starting with `#' is a
     190             : comment line.
     191             : 
     192             : `:subset'
     193             : 
     194             : VALUE must be a list:
     195             :         ( PARENT MIN-CODE MAX-CODE OFFSET )
     196             : PARENT is a parent charset.  MIN-CODE and MAX-CODE specify the range
     197             : of characters inherited from the parent.  OFFSET is an integer value
     198             : to add to a code point of the parent charset to get the corresponding
     199             : code point of this charset.
     200             : 
     201             : `:superset'
     202             : 
     203             : VALUE must be a list of parent charsets.  The charset inherits
     204             : characters from them.  Each element of the list may be a cons (PARENT
     205             : . OFFSET), where PARENT is a parent charset, and OFFSET is an offset
     206             : value to add to a code point of PARENT to get the corresponding code
     207             : point of this charset.
     208             : 
     209             : `:unify-map'
     210             : 
     211             : VALUE must be vector or string.
     212             : 
     213             : If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ],
     214             : where CODE-n is a code-point of the charset, and CHAR-n is the
     215             : corresponding Unicode character code.
     216             : 
     217             : If it is a string, it is a name of file that contains the above
     218             : information.  The file format is the same as what described for `:map'
     219             : attribute."
     220         161 :   (when (vectorp (car props))
     221             :     ;; Old style code:
     222             :     ;;   (define-charset CHARSET-ID CHARSET-SYMBOL INFO-VECTOR)
     223             :     ;; Convert the argument to make it fit with the current style.
     224           0 :     (let ((vec (car props)))
     225           0 :       (setq props (convert-define-charset-argument name vec)
     226           0 :             name docstring
     227         161 :             docstring (aref vec 8))))
     228         161 :   (let ((attrs (mapcar 'list '(:dimension
     229             :                                :code-space
     230             :                                :min-code
     231             :                                :max-code
     232             :                                :iso-final-char
     233             :                                :iso-revision-number
     234             :                                :emacs-mule-id
     235             :                                :ascii-compatible-p
     236             :                                :supplementary-p
     237             :                                :invalid-code
     238             :                                :code-offset
     239             :                                :map
     240             :                                :subset
     241             :                                :superset
     242             :                                :unify-map
     243         161 :                                :plist))))
     244             : 
     245             :     ;; If :dimension is omitted, get the dimension from :code-space.
     246         161 :     (let ((dimension (plist-get props :dimension)))
     247         161 :       (or dimension
     248         156 :           (let ((code-space (plist-get props :code-space)))
     249         156 :             (setq dimension (if code-space (/ (length code-space) 2) 4))
     250         161 :             (setq props (plist-put props :dimension dimension)))))
     251             : 
     252         161 :     (let ((code-space (plist-get props :code-space)))
     253         161 :       (or code-space
     254           0 :           (let ((dimension (plist-get props :dimension)))
     255           0 :             (setq code-space (make-vector 8 0))
     256           0 :             (dotimes (i dimension)
     257           0 :               (aset code-space (1+ (* i 2)) #xFF))
     258         161 :             (setq props (plist-put props :code-space code-space)))))
     259             : 
     260             :     ;; If :emacs-mule-id is specified, update emacs-mule-charset-table.
     261         161 :     (let ((emacs-mule-id (plist-get props :emacs-mule-id)))
     262         161 :       (if (integerp emacs-mule-id)
     263         161 :           (aset emacs-mule-charset-table emacs-mule-id name)))
     264             : 
     265         161 :     (dolist (slot attrs)
     266        2576 :       (setcdr slot (purecopy (plist-get props (car slot)))))
     267             : 
     268             :     ;; Make sure that the value of :code-space is a vector of 8
     269             :     ;; elements.
     270         161 :     (let* ((slot (assq :code-space attrs))
     271         161 :            (val (cdr slot))
     272         161 :            (len (length val)))
     273         161 :       (if (< len 8)
     274         156 :           (setcdr slot
     275         161 :                   (vconcat val (make-vector (- 8 len) 0)))))
     276             : 
     277             :     ;; Add :name and :docstring properties to PROPS.
     278         161 :     (setq props
     279         161 :           (cons :name (cons name (cons :docstring (cons (purecopy docstring) props)))))
     280         161 :     (or (plist-get props :short-name)
     281         161 :         (plist-put props :short-name (symbol-name name)))
     282         161 :     (or (plist-get props :long-name)
     283         161 :         (plist-put props :long-name (plist-get props :short-name)))
     284         161 :     (plist-put props :base name)
     285             :     ;; We can probably get a worthwhile amount in purespace.
     286         161 :     (setq props
     287         161 :           (mapcar (lambda (elt)
     288        3086 :                     (if (stringp elt)
     289         578 :                         (purecopy elt)
     290        3086 :                       elt))
     291         161 :                   props))
     292         161 :     (setcdr (assq :plist attrs) props)
     293             : 
     294         161 :     (apply 'define-charset-internal name (mapcar 'cdr attrs))))
     295             : 
     296             : 
     297             : (defun load-with-code-conversion (fullname file &optional noerror nomessage)
     298             :   "Execute a file of Lisp code named FILE whose absolute name is FULLNAME.
     299             : The file contents are decoded before evaluation if necessary.
     300             : If optional third arg NOERROR is non-nil,
     301             :  report no error if FILE doesn't exist.
     302             : Print messages at start and end of loading unless
     303             :  optional fourth arg NOMESSAGE is non-nil.
     304             : Return t if file exists."
     305          11 :   (if (null (file-readable-p fullname))
     306           0 :       (and (null noerror)
     307           0 :            (signal 'file-error (list "Cannot open load file" file)))
     308             :     ;; Read file with code conversion, and then eval.
     309          11 :     (let* ((buffer
     310             :             ;; We can't use `generate-new-buffer' because files.el
     311             :             ;; is not yet loaded.
     312          11 :             (get-buffer-create (generate-new-buffer-name " *load*")))
     313             :            (load-in-progress t)
     314          11 :            (source (save-match-data (string-match "\\.el\\'" fullname))))
     315          11 :       (unless nomessage
     316           0 :         (if source
     317           0 :             (message "Loading %s (source)..." file)
     318          11 :           (message "Loading %s..." file)))
     319          11 :       (when purify-flag
     320          11 :         (push (purecopy file) preloaded-file-list))
     321          11 :       (unwind-protect
     322          11 :           (let ((load-file-name fullname)
     323             :                 (set-auto-coding-for-load t)
     324             :                 (inhibit-file-name-operation nil))
     325          11 :             (with-current-buffer buffer
     326             :               ;; So that we don't get completely screwed if the
     327             :               ;; file is encoded in some complicated character set,
     328             :               ;; read it with real decoding, as a multibyte buffer.
     329          11 :               (set-buffer-multibyte t)
     330             :               ;; Don't let deactivate-mark remain set.
     331          11 :               (let (deactivate-mark)
     332          11 :                 (insert-file-contents fullname))
     333             :               ;; If the loaded file was inserted with no-conversion or
     334             :               ;; raw-text coding system, make the buffer unibyte.
     335             :               ;; Otherwise, eval-buffer might try to interpret random
     336             :               ;; binary junk as multibyte characters.
     337          11 :               (if (and enable-multibyte-characters
     338          11 :                        (or (eq (coding-system-type last-coding-system-used)
     339          11 :                                'raw-text)))
     340          11 :                   (set-buffer-multibyte nil))
     341             :               ;; Make `kill-buffer' quiet.
     342          11 :               (set-buffer-modified-p nil))
     343             :             ;; Have the original buffer current while we eval.
     344          11 :             (eval-buffer buffer nil
     345             :                          ;; This is compatible with what `load' does.
     346          11 :                          (if purify-flag file fullname)
     347          11 :                          nil t))
     348          11 :         (let (kill-buffer-hook kill-buffer-query-functions)
     349          11 :           (kill-buffer buffer)))
     350          11 :       (do-after-load-evaluation fullname)
     351             : 
     352          11 :       (unless (or nomessage noninteractive)
     353           0 :         (if source
     354           0 :             (message "Loading %s (source)...done" file)
     355          11 :           (message "Loading %s...done" file)))
     356          11 :       t)))
     357             : 
     358             : (defun charset-info (charset)
     359             :   "Return a vector of information of CHARSET.
     360             : This function is provided for backward compatibility.
     361             : 
     362             : The elements of the vector are:
     363             :         CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
     364             :         LEADING-CODE-BASE, LEADING-CODE-EXT,
     365             :         ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
     366             :         REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
     367             :         PLIST.
     368             : where
     369             : CHARSET-ID is always 0.
     370             : BYTES is always 0.
     371             : DIMENSION is the number of bytes of a code-point of the charset:
     372             :   1, 2, 3, or 4.
     373             : CHARS is the number of characters in a dimension:
     374             :   94, 96, 128, or 256.
     375             : WIDTH is always 0.
     376             : DIRECTION is always 0.
     377             : LEADING-CODE-BASE is always 0.
     378             : LEADING-CODE-EXT is always 0.
     379             : ISO-FINAL-CHAR (character) is the final character of the
     380             :   corresponding ISO 2022 charset.  If the charset is not assigned
     381             :   any final character, the value is -1.
     382             : ISO-GRAPHIC-PLANE is always 0.
     383             : REVERSE-CHARSET is always -1.
     384             : SHORT-NAME (string) is the short name to refer to the charset.
     385             : LONG-NAME (string) is the long name to refer to the charset
     386             : DESCRIPTION (string) is the description string of the charset.
     387             : PLIST (property list) may contain any type of information a user
     388             :   want to put and get by functions `put-charset-property' and
     389             :   `get-charset-property' respectively."
     390           0 :   (vector 0
     391             :           0
     392           0 :           (charset-dimension charset)
     393           0 :           (charset-chars charset)
     394             :           0
     395             :           0
     396             :           0
     397             :           0
     398           0 :           (charset-iso-final-char charset)
     399             :           0
     400             :           -1
     401           0 :           (get-charset-property charset :short-name)
     402           0 :           (get-charset-property charset :short-name)
     403           0 :           (charset-description charset)
     404           0 :           (charset-plist charset)))
     405             : 
     406             : ;; It is better not to use backquote in this file,
     407             : ;; because that makes a bootstrapping problem
     408             : ;; if you need to recompile all the Lisp files using interpreted code.
     409             : 
     410             : (defun charset-id (_charset)
     411             :   "Always return 0.  This is provided for backward compatibility."
     412             :   (declare (obsolete nil "23.1"))
     413             :   0)
     414             : 
     415             : (defmacro charset-bytes (_charset)
     416             :   "Always return 0.  This is provided for backward compatibility."
     417             :   (declare (obsolete nil "23.1"))
     418             :   0)
     419             : 
     420             : (defun get-charset-property (charset propname)
     421             :   "Return the value of CHARSET's PROPNAME property.
     422             : This is the last value stored with
     423             :  (put-charset-property CHARSET PROPNAME VALUE)."
     424           0 :   (plist-get (charset-plist charset) propname))
     425             : 
     426             : (defun put-charset-property (charset propname value)
     427             :   "Set CHARSETS's PROPNAME property to value VALUE.
     428             : It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
     429          14 :   (set-charset-plist charset
     430          14 :                      (plist-put (charset-plist charset) propname
     431          14 :                                 (if (stringp value)
     432          14 :                                     (purecopy value)
     433          14 :                                   value))))
     434             : 
     435             : (defun charset-description (charset)
     436             :   "Return description string of CHARSET."
     437           0 :   (plist-get (charset-plist charset) :docstring))
     438             : 
     439             : (defun charset-dimension (charset)
     440             :   "Return dimension of CHARSET."
     441           0 :   (plist-get (charset-plist charset) :dimension))
     442             : 
     443             : (defun charset-chars (charset &optional dimension)
     444             :   "Return number of characters contained in DIMENSION of CHARSET.
     445             : DIMENSION defaults to the first dimension."
     446           0 :   (unless dimension (setq dimension 1))
     447           0 :   (let ((code-space (plist-get (charset-plist charset) :code-space)))
     448           0 :     (1+ (- (aref code-space (1- (* 2 dimension)))
     449           0 :            (aref code-space (- (* 2 dimension) 2))))))
     450             : 
     451             : (defun charset-iso-final-char (charset)
     452             :   "Return ISO-2022 final character of CHARSET.
     453             : Return -1 if charset isn't an ISO 2022 one."
     454           0 :   (or (plist-get (charset-plist charset) :iso-final-char)
     455           0 :       -1))
     456             : 
     457             : (defmacro charset-short-name (charset)
     458             :   "Return short name of CHARSET."
     459           0 :   (plist-get (charset-plist charset) :short-name))
     460             : 
     461             : (defmacro charset-long-name (charset)
     462             :   "Return long name of CHARSET."
     463           0 :   (plist-get (charset-plist charset) :long-name))
     464             : 
     465             : (defun charset-list ()
     466             :   "Return list of all charsets ever defined."
     467             :   (declare (obsolete charset-list "23.1"))
     468           0 :   charset-list)
     469             : 
     470             : 
     471             : ;;; CHARACTER
     472             : (define-obsolete-function-alias 'char-valid-p 'characterp "23.1")
     473             : 
     474             : (defun generic-char-p (_char)
     475             :   "Always return nil.  This is provided for backward compatibility."
     476             :   (declare (obsolete nil "23.1"))
     477             :   nil)
     478             : 
     479             : (defun make-char-internal (charset-id &optional code1 code2)
     480           0 :   (let ((charset (aref emacs-mule-charset-table charset-id)))
     481           0 :     (or charset
     482           0 :         (error "Invalid Emacs-mule charset ID: %d" charset-id))
     483           0 :     (make-char charset code1 code2)))
     484             : 
     485             : ;; Save the ASCII case table in case we need it later.  Some locales
     486             : ;; (such as Turkish) modify the case behavior of ASCII characters,
     487             : ;; which can interfere with networking code that uses ASCII strings.
     488             : 
     489             : (defvar ascii-case-table
     490             :   ;; Code copied from copy-case-table to avoid requiring case-table.el
     491             :   (let ((tbl (copy-sequence (standard-case-table)))
     492             :         (up  (char-table-extra-slot (standard-case-table) 0)))
     493             :     (if up (set-char-table-extra-slot tbl 0 (copy-sequence up)))
     494             :     (set-char-table-extra-slot tbl 1 nil)
     495             :     (set-char-table-extra-slot tbl 2 nil)
     496             :     tbl)
     497             :   "Case table for the ASCII character set.")
     498             : 
     499             : ;; Coding system stuff
     500             : 
     501             : ;; Coding system is a symbol that has been defined by the function
     502             : ;; `define-coding-system'.
     503             : 
     504             : (defconst coding-system-iso-2022-flags
     505             :   '(long-form
     506             :     ascii-at-eol
     507             :     ascii-at-cntl
     508             :     7-bit
     509             :     locking-shift
     510             :     single-shift
     511             :     designation
     512             :     revision
     513             :     direction
     514             :     init-at-bol
     515             :     designate-at-bol
     516             :     safe
     517             :     latin-extra
     518             :     composition
     519             :     euc-tw-shift
     520             :     use-roman
     521             :     use-oldjis
     522             :     8-bit-level-4)
     523             :   "List of symbols that control ISO-2022 encoder/decoder.
     524             : 
     525             : The value of the `:flags' attribute in the argument of the function
     526             : `define-coding-system' must be one of them.
     527             : 
     528             : If `long-form' is specified, use a long designation sequence on
     529             : encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312',
     530             : and `japanese-jisx0208'.  The long designation sequence doesn't
     531             : conform to ISO 2022, but is used by such coding systems as
     532             : `compound-text'.
     533             : 
     534             : If `ascii-at-eol' is specified, designate ASCII to g0 at end of line
     535             : on encoding.
     536             : 
     537             : If `ascii-at-cntl' is specified, designate ASCII to g0 before control
     538             : codes and SPC on encoding.
     539             : 
     540             : If `7-bit' is specified, use 7-bit code only on encoding.
     541             : 
     542             : If `locking-shift' is specified, decode locking-shift code correctly
     543             : on decoding, and use locking-shift to invoke a graphic element on
     544             : encoding.
     545             : 
     546             : If `single-shift' is specified, decode single-shift code
     547             : correctly on decoding, and use single-shift to invoke a graphic
     548             : element on encoding.  See also `8-bit-level-4' specification.
     549             : 
     550             : If `designation' is specified, decode designation code correctly on
     551             : decoding, and use designation to designate a charset to a graphic
     552             : element on encoding.
     553             : 
     554             : If `revision' is specified, produce an escape sequence to specify
     555             : revision number of a charset on encoding.  Such an escape sequence is
     556             : always correctly decoded on decoding.
     557             : 
     558             : If `direction' is specified, decode ISO6429's code for specifying
     559             : direction correctly, and produce the code on encoding.
     560             : 
     561             : If `init-at-bol' is specified, on encoding, it is assumed that
     562             : invocation and designation statuses are reset at each beginning of
     563             : line even if `ascii-at-eol' is not specified; thus no codes for
     564             : resetting them are produced.
     565             : 
     566             : If `safe' is specified, on encoding, characters not supported by a
     567             : coding are replaced with `?'.
     568             : 
     569             : If `latin-extra' is specified, the code-detection routine assumes that a
     570             : code specified in `latin-extra-code-table' (which see) is valid.
     571             : 
     572             : If `composition' is specified, an escape sequence to specify
     573             : composition sequence is correctly decoded on decoding, and is produced
     574             : on encoding.
     575             : 
     576             : If `euc-tw-shift' is specified, the EUC-TW specific shifting code is
     577             : correctly decoded on decoding, and is produced on encoding.
     578             : 
     579             : If `use-roman' is specified, JIS0201-1976-Roman is designated instead
     580             : of ASCII.
     581             : 
     582             : If `use-oldjis' is specified, JIS0208-1976 is designated instead of
     583             : JIS0208-1983.
     584             : 
     585             : If `8-bit-level-4' is specified, the decoder assumes the
     586             : implementation level \"4\" for 8-bit codes which means that GL is
     587             : identified as the single-shift area.  The default implementation
     588             : level for 8-bit code is \"4A\" which means that GR is identified
     589             : as the single-shift area.")
     590             : 
     591             : (defun define-coding-system (name docstring &rest props)
     592             :   "Define NAME (a symbol) as a coding system with DOCSTRING and attributes.
     593             : The remaining arguments must come in pairs ATTRIBUTE VALUE.  ATTRIBUTE
     594             : may be any symbol.
     595             : 
     596             : A coding system specifies a rule to decode (i.e. to convert a
     597             : byte sequence to a character sequence) and a rule to encode (the
     598             : opposite of decoding).
     599             : 
     600             : The decoding is done by at most 3 steps; the first is to convert
     601             : a byte sequence to a character sequence by one of Emacs'
     602             : internal routines specified by `:coding-type' attribute.  The
     603             : optional second step is to convert the character sequence (the
     604             : result of the first step) by a translation table specified
     605             : by `:decode-translation-table' attribute.  The optional third step
     606             : is to convert the above result by a Lisp function specified
     607             : by `:post-read-conversion' attribute.
     608             : 
     609             : The encoding is done by at most 3 steps, which are the reverse
     610             : of the decoding steps.  The optional first step converts a
     611             : character sequence to another character sequence by a Lisp
     612             : function specified by `:pre-write-conversion' attribute.  The
     613             : optional second step converts the above result by a translation
     614             : table specified by `:encode-translation-table' attribute.  The
     615             : third step converts the above result to a byte sequence by one
     616             : of the Emacs's internal routines specified by the `:coding-type'
     617             : attribute.
     618             : 
     619             : The following attributes have special meanings.  Those labeled as
     620             : \"(required)\" should not be omitted.
     621             : 
     622             : `:mnemonic' (required)
     623             : 
     624             : VALUE is a character to display on mode line for the coding system.
     625             : 
     626             : `:coding-type' (required)
     627             : 
     628             : VALUE specifies the format of byte sequence the coding system
     629             : decodes and encodes to.  It must be one of `charset', `utf-8',
     630             : `utf-16', `iso-2022', `emacs-mule', `shift-jis', `ccl',
     631             : `raw-text', `undecided'.
     632             : 
     633             : If VALUE is `charset', the coding system is for handling a
     634             : byte sequence in which each byte or every two- to four-byte
     635             : sequence represents a character code of a charset specified
     636             : by the `:charset-list' attribute.
     637             : 
     638             : If VALUE is `utf-8', the coding system is for handling Unicode
     639             : UTF-8 byte sequences.  See also the documentation of the
     640             : attribute `:bom'.
     641             : 
     642             : If VALUE is `utf-16', the coding system is for handling Unicode
     643             : UTF-16 byte sequences.  See also the documentation of the
     644             : attributes :bom and `:endian'.
     645             : 
     646             : If VALUE is `iso-2022', the coding system is for handling byte
     647             : sequences conforming to ISO/IEC 2022.  See also the documentation
     648             : of the attributes `:charset-list', `:flags', and `:designation'.
     649             : 
     650             : If VALUE is `emacs-mule', the coding system is for handling
     651             : byte sequences which Emacs 20 and 21 used for their internal
     652             : representation of characters.
     653             : 
     654             : If VALUE is `shift-jis', the coding system is for handling byte
     655             : sequences of Shift_JIS format.  See also the attribute `:charset-list'.
     656             : 
     657             : If VALUE is `ccl', the coding system uses CCL programs to decode
     658             : and encode byte sequences.  The CCL programs must be
     659             : specified by the attributes `:ccl-decoder' and `:ccl-encoder'.
     660             : 
     661             : If VALUE is `raw-text', the coding system decodes byte sequences
     662             : without any conversions.
     663             : 
     664             : `:eol-type'
     665             : 
     666             : VALUE is the EOL (end-of-line) format of the coding system.  It must be
     667             : one of `unix', `dos', `mac'.  The symbol `unix' means Unix-like EOL
     668             : \(i.e., a single LF character), `dos' means DOS-like EOL \(i.e., a sequence
     669             : of CR followed by LF), and `mac' means Mac-like EOL \(i.e., a single CR).
     670             : If omitted, Emacs detects the EOL format automatically when decoding.
     671             : 
     672             : `:charset-list' (required if `:coding-type' is `charset' or `shift-jis')
     673             : 
     674             : VALUE must be a list of charsets supported by the coding system.
     675             : 
     676             : If `coding-type:' is `charset', then on decoding and encoding by the
     677             : coding system, if a character belongs to multiple charsets in the
     678             : list, a charset that comes first in the list is selected.
     679             : 
     680             : If `:coding-type' is `iso-2022', VALUE may be `iso-2022', which
     681             : indicates that the coding system supports all ISO-2022 based
     682             : charsets.
     683             : 
     684             : If `:coding-type' is `shift-jis', VALUE must be a list of three
     685             : to four charsets supported by Shift_JIS encoding scheme.  The
     686             : first charset (one dimension) is for code space 0x00..0x7F, the
     687             : second (one dimension) for 0xA1..0xDF, the third (two dimension)
     688             : for 0x8140..0xEFFC, the optional fourth (three dimension) for
     689             : 0xF040..0xFCFC.
     690             : 
     691             : If `:coding-type' is `emacs-mule', VALUE may be `emacs-mule',
     692             : which indicates that the coding system supports all charsets that
     693             : have the `:emacs-mule-id' property.
     694             : 
     695             : `:ascii-compatible-p'
     696             : 
     697             : If VALUE is non-nil, the coding system decodes all 7-bit bytes into
     698             : the corresponding ASCII characters, and encodes all ASCII characters
     699             : back to the corresponding 7-bit bytes.  VALUE defaults to nil.
     700             : 
     701             : `:decode-translation-table'
     702             : 
     703             : VALUE must be a translation table to use on decoding.
     704             : 
     705             : `:encode-translation-table'
     706             : 
     707             : VALUE must be a translation table to use on encoding.
     708             : 
     709             : `:post-read-conversion'
     710             : 
     711             : VALUE must be a function to call after some text is inserted and
     712             : decoded by the coding system itself and before any functions in
     713             : `after-insert-functions' are called.  This function is passed one
     714             : argument: the number of characters in the text to convert, with
     715             : point at the start of the text.  The function should leave point
     716             : unchanged, and should return the new character count.  Note that
     717             : this function should avoid reading from files or receiving text
     718             : from subprocesses -- anything that could invoke decoding; if it
     719             : must do so, it should bind `coding-system-for-read' to a value
     720             : other than the current coding-system, to avoid infinite recursion.
     721             : 
     722             : `:pre-write-conversion'
     723             : 
     724             : VALUE must be a function to call after all functions in
     725             : `write-region-annotate-functions' and `buffer-file-format' are
     726             : called, and before the text is encoded by the coding system
     727             : itself.  This function should convert the whole text in the
     728             : current buffer.  For backward compatibility, this function is
     729             : passed two arguments which can be ignored.  Note that this
     730             : function should avoid writing to files or sending text to
     731             : subprocesses -- anything that could invoke encoding; if it
     732             : must do so, it should bind `coding-system-for-write' to a
     733             : value other than the current coding-system, to avoid infinite
     734             : recursion.
     735             : 
     736             : `:default-char'
     737             : 
     738             : VALUE must be a character.  On encoding, a character not supported by
     739             : the coding system is replaced with VALUE.
     740             : 
     741             : `:for-unibyte'
     742             : 
     743             : VALUE non-nil means that visiting a file with the coding system
     744             : results in a unibyte buffer.
     745             : 
     746             : `:mime-charset'
     747             : 
     748             : VALUE must be a symbol whose name is that of a MIME charset converted
     749             : to lower case.
     750             : 
     751             : `:mime-text-unsuitable'
     752             : 
     753             : VALUE non-nil means the `:mime-charset' property names a charset which
     754             : is unsuitable for the top-level media of type \"text\".
     755             : 
     756             : `:flags'
     757             : 
     758             : VALUE must be a list of symbols that control the ISO-2022 converter.
     759             : Each must be a member of the list `coding-system-iso-2022-flags'
     760             : \(which see).  This attribute is meaningful only when `:coding-type'
     761             : is `iso-2022'.
     762             : 
     763             : `:designation'
     764             : 
     765             : VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE].
     766             : GN-USAGE specifies the usage of graphic register GN as follows.
     767             : 
     768             : If it is nil, no charset can be designated to GN.
     769             : 
     770             : If it is a charset, the charset is initially designated to GN, and
     771             : never used by the other charsets.
     772             : 
     773             : If it is a list, the elements must be charsets, nil, 94, or 96.  GN
     774             : can be used by all the listed charsets.  If the list contains 94, any
     775             : iso-2022 charset whose code-space ranges are 94 long can be designated
     776             : to GN.  If the list contains 96, any charsets whose whose ranges are
     777             : 96 long can be designated to GN.  If the first element is a charset,
     778             : that charset is initially designated to GN.
     779             : 
     780             : This attribute is meaningful only when `:coding-type' is `iso-2022'.
     781             : 
     782             : `:bom'
     783             : 
     784             : This attributes specifies whether the coding system uses a \"byte order
     785             : mark\".  VALUE must be nil, t, or a cons cell of coding systems whose
     786             : `:coding-type' is `utf-16' or `utf-8'.
     787             : 
     788             : If the value is nil, on decoding, don't treat the first two-byte as
     789             : BOM, and on encoding, don't produce BOM bytes.
     790             : 
     791             : If the value is t, on decoding, skip the first two-byte as BOM, and on
     792             : encoding, produce BOM bytes according to the value of `:endian'.
     793             : 
     794             : If the value is a cons cell, on decoding, check the first two bytes.
     795             : If they are 0xFE 0xFF, use the car part coding system of the value.
     796             : If they are 0xFF 0xFE, use the cdr part coding system of the value.
     797             : Otherwise, treat them as bytes for a normal character.  On encoding,
     798             : produce BOM bytes according to the value of `:endian'.
     799             : 
     800             : This attribute is meaningful only when `:coding-type' is `utf-16' or
     801             : `utf-8'.
     802             : 
     803             : `:endian'
     804             : 
     805             : VALUE must be `big' or `little' specifying big-endian and
     806             : little-endian respectively.  The default value is `big'.
     807             : 
     808             : This attribute is meaningful only when `:coding-type' is `utf-16'.
     809             : 
     810             : `:ccl-decoder' (required if :coding-type is `ccl')
     811             : 
     812             : VALUE is a CCL program name defined by `define-ccl-program'.  The
     813             : CCL program reads a byte sequence and writes a character sequence
     814             : as a decoding result.
     815             : 
     816             : `:ccl-encoder' (required if :coding-type is `ccl')
     817             : 
     818             : VALUE is a CCL program name defined by `define-ccl-program'.  The
     819             : CCL program reads a character sequence and writes a byte sequence
     820             : as an encoding result.
     821             : 
     822             : `:inhibit-null-byte-detection'
     823             : 
     824             : VALUE non-nil means Emacs ignore null bytes on code detection.
     825             : See the variable `inhibit-null-byte-detection'.  This attribute
     826             : is meaningful only when `:coding-type' is `undecided'.
     827             : 
     828             : `:inhibit-iso-escape-detection'
     829             : 
     830             : VALUE non-nil means Emacs ignores ISO-2022 escape sequences on
     831             : code detection.  See the variable `inhibit-iso-escape-detection'.
     832             : This attribute is meaningful only when `:coding-type' is
     833             : `undecided'.
     834             : 
     835             : `:prefer-utf-8'
     836             : 
     837             : VALUE non-nil means Emacs prefers UTF-8 on code detection for
     838             : non-ASCII files.  This attribute is meaningful only when
     839             : `:coding-type' is `undecided'."
     840         114 :   (let* ((common-attrs (mapcar 'list
     841             :                                '(:mnemonic
     842             :                                  :coding-type
     843             :                                  :charset-list
     844             :                                  :ascii-compatible-p
     845             :                                  :decode-translation-table
     846             :                                  :encode-translation-table
     847             :                                  :post-read-conversion
     848             :                                  :pre-write-conversion
     849             :                                  :default-char
     850             :                                  :for-unibyte
     851             :                                  :plist
     852         114 :                                  :eol-type)))
     853         114 :          (coding-type (plist-get props :coding-type))
     854         114 :          (spec-attrs (mapcar 'list
     855         114 :                              (cond ((eq coding-type 'iso-2022)
     856             :                                     '(:initial
     857             :                                       :reg-usage
     858             :                                       :request
     859             :                                       :flags))
     860          91 :                                    ((eq coding-type 'utf-8)
     861             :                                     '(:bom))
     862          81 :                                    ((eq coding-type 'utf-16)
     863             :                                     '(:bom
     864             :                                       :endian))
     865          76 :                                    ((eq coding-type 'ccl)
     866             :                                     '(:ccl-decoder
     867             :                                       :ccl-encoder
     868             :                                       :valids))
     869          76 :                                    ((eq coding-type 'undecided)
     870             :                                     '(:inhibit-null-byte-detection
     871             :                                       :inhibit-iso-escape-detection
     872         114 :                                       :prefer-utf-8))))))
     873             : 
     874         114 :     (dolist (slot common-attrs)
     875        1368 :       (setcdr slot (plist-get props (car slot))))
     876             : 
     877         114 :     (dolist (slot spec-attrs)
     878         115 :       (setcdr slot (plist-get props (car slot))))
     879             : 
     880         114 :     (if (eq coding-type 'iso-2022)
     881          23 :         (let ((designation (plist-get props :designation))
     882          23 :               (flags (plist-get props :flags))
     883          23 :               (initial (make-vector 4 nil))
     884          23 :               (reg-usage (cons 4 4))
     885             :               request elt)
     886          23 :           (dotimes (i 4)
     887          92 :             (setq elt (aref designation i))
     888          92 :             (cond ((charsetp elt)
     889          25 :                    (aset initial i elt)
     890          25 :                    (setq request (cons (cons elt i) request)))
     891          67 :                   ((consp elt)
     892          29 :                    (aset initial i (car elt))
     893          29 :                    (if (charsetp (car elt))
     894          29 :                        (setq request (cons (cons (car elt) i) request)))
     895          29 :                    (dolist (e (cdr elt))
     896          64 :                      (cond ((charsetp e)
     897          49 :                             (setq request (cons (cons e i) request)))
     898          15 :                            ((eq e 94)
     899           7 :                             (setcar reg-usage i))
     900           8 :                            ((eq e 96)
     901           7 :                             (setcdr reg-usage i))
     902           1 :                            ((eq e t)
     903           1 :                             (setcar reg-usage i)
     904          92 :                             (setcdr reg-usage i)))))))
     905          23 :           (setcdr (assq :initial spec-attrs) initial)
     906          23 :           (setcdr (assq :reg-usage spec-attrs) reg-usage)
     907          23 :           (setcdr (assq :request spec-attrs) request)
     908             : 
     909             :           ;; Change :flags value from a list to a bit-mask.
     910          23 :           (let ((bits 0)
     911             :                 (i 0))
     912          23 :             (dolist (elt coding-system-iso-2022-flags)
     913         414 :               (if (memq elt flags)
     914         414 :                   (setq bits (logior bits (lsh 1 i))))
     915         414 :               (setq i (1+ i)))
     916         114 :             (setcdr (assq :flags spec-attrs) bits))))
     917             : 
     918             :     ;; Add :name and :docstring properties to PROPS.
     919         114 :     (setq props
     920         114 :           (cons :name (cons name (cons :docstring (cons (purecopy docstring)
     921         114 :                                                         props)))))
     922         114 :     (setcdr (assq :plist common-attrs) props)
     923         114 :     (apply 'define-coding-system-internal
     924         114 :            name (mapcar 'cdr (append common-attrs spec-attrs)))))
     925             : 
     926             : (defun coding-system-doc-string (coding-system)
     927             :   "Return the documentation string for CODING-SYSTEM."
     928           0 :   (plist-get (coding-system-plist coding-system) :docstring))
     929             : 
     930             : (defun coding-system-mnemonic (coding-system)
     931             :   "Return the mnemonic character of CODING-SYSTEM.
     932             : The mnemonic character of a coding system is used in mode line to
     933             : indicate the coding system.  If CODING-SYSTEM is nil, return ?=."
     934           0 :   (plist-get (coding-system-plist coding-system) :mnemonic))
     935             : 
     936             : (defun coding-system-type (coding-system)
     937             :   "Return the coding type of CODING-SYSTEM.
     938             : A coding type is a symbol indicating the encoding method of CODING-SYSTEM.
     939             : See the function `define-coding-system' for more detail."
     940         480 :   (plist-get (coding-system-plist coding-system) :coding-type))
     941             : 
     942             : (defun coding-system-charset-list (coding-system)
     943             :   "Return list of charsets supported by CODING-SYSTEM.
     944             : If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'.
     945             : If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'."
     946           0 :   (plist-get (coding-system-plist coding-system) :charset-list))
     947             : 
     948             : (defun coding-system-category (coding-system)
     949             :   "Return a category symbol of CODING-SYSTEM."
     950           0 :   (plist-get (coding-system-plist coding-system) :category))
     951             : 
     952             : (defun coding-system-get (coding-system prop)
     953             :   "Extract a value from CODING-SYSTEM's property list for property PROP.
     954             : For compatibility with Emacs 20/21, this accepts old-style symbols
     955             : like `mime-charset' as well as the current style like `:mime-charset'."
     956         339 :   (or (plist-get (coding-system-plist coding-system) prop)
     957           0 :       (if (not (keywordp prop))
     958             :           ;; For backward compatibility.
     959           0 :           (if (eq prop 'ascii-incompatible)
     960           0 :               (not (plist-get (coding-system-plist coding-system)
     961           0 :                               :ascii-compatible-p))
     962           0 :             (plist-get (coding-system-plist coding-system)
     963         339 :                        (intern (concat ":" (symbol-name prop))))))))
     964             : 
     965             : (defun coding-system-eol-type-mnemonic (coding-system)
     966             :   "Return the string indicating end-of-line format of CODING-SYSTEM."
     967           0 :   (let* ((eol-type (coding-system-eol-type coding-system))
     968           0 :          (val (cond ((eq eol-type 0) eol-mnemonic-unix)
     969           0 :                     ((eq eol-type 1) eol-mnemonic-dos)
     970           0 :                     ((eq eol-type 2) eol-mnemonic-mac)
     971           0 :                     (t eol-mnemonic-undecided))))
     972           0 :     (if (stringp val)
     973           0 :         val
     974           0 :       (char-to-string val))))
     975             : 
     976             : (defun coding-system-lessp (x y)
     977           0 :   (cond ((eq x 'no-conversion) t)
     978           0 :         ((eq y 'no-conversion) nil)
     979           0 :         ((eq x 'emacs-mule) t)
     980           0 :         ((eq y 'emacs-mule) nil)
     981           0 :         ((eq x 'undecided) t)
     982           0 :         ((eq y 'undecided) nil)
     983           0 :         (t (let ((c1 (coding-system-mnemonic x))
     984           0 :                  (c2 (coding-system-mnemonic y)))
     985           0 :              (or (< (downcase c1) (downcase c2))
     986           0 :                  (and (not (> (downcase c1) (downcase c2)))
     987           0 :                       (< c1 c2)))))))
     988             : 
     989             : (defun coding-system-equal (coding-system-1 coding-system-2)
     990             :   "Return t if and only if CODING-SYSTEM-1 and CODING-SYSTEM-2 are identical.
     991             : Two coding systems are identical if both symbols are equal
     992             : or one is an alias of the other."
     993           0 :   (or (eq coding-system-1 coding-system-2)
     994           0 :       (and (equal (coding-system-plist coding-system-1)
     995           0 :                   (coding-system-plist coding-system-2))
     996           0 :            (let ((eol-type-1 (coding-system-eol-type coding-system-1))
     997           0 :                  (eol-type-2 (coding-system-eol-type coding-system-2)))
     998           0 :              (or (eq eol-type-1 eol-type-2)
     999           0 :                  (and (vectorp eol-type-1) (vectorp eol-type-2)))))))
    1000             : 
    1001             : (defun add-to-coding-system-list (coding-system)
    1002             :   "Add CODING-SYSTEM to variable `coding-system-list' while keeping it sorted."
    1003           0 :   (if (or (null coding-system-list)
    1004           0 :           (coding-system-lessp coding-system (car coding-system-list)))
    1005           0 :       (setq coding-system-list (cons coding-system coding-system-list))
    1006           0 :     (let ((len (length coding-system-list))
    1007           0 :           mid (tem coding-system-list))
    1008           0 :       (while (> len 1)
    1009           0 :         (setq mid (nthcdr (/ len 2) tem))
    1010           0 :         (if (coding-system-lessp (car mid) coding-system)
    1011           0 :             (setq tem mid
    1012           0 :                   len (- len (/ len 2)))
    1013           0 :           (setq len (/ len 2))))
    1014           0 :       (setcdr tem (cons coding-system (cdr tem))))))
    1015             : 
    1016             : (defun coding-system-list (&optional base-only)
    1017             :   "Return a list of all existing non-subsidiary coding systems.
    1018             : If optional arg BASE-ONLY is non-nil, only base coding systems are
    1019             : listed.  The value doesn't include subsidiary coding systems which are
    1020             : made from bases and aliases automatically for various end-of-line
    1021             : formats (e.g. iso-latin-1-unix, koi8-r-dos)."
    1022          71 :   (let ((codings nil))
    1023          71 :     (dolist (coding coding-system-list)
    1024      128439 :       (if (eq (coding-system-base coding) coding)
    1025       16188 :           (if base-only
    1026           0 :               (setq codings (cons coding codings))
    1027       16188 :             (dolist (alias (coding-system-aliases coding))
    1028      128439 :               (setq codings (cons alias codings))))))
    1029          71 :     codings))
    1030             : 
    1031             : (defconst char-coding-system-table nil
    1032             :   "It exists just for backward compatibility, and the value is always nil.")
    1033             : (make-obsolete-variable 'char-coding-system-table nil "23.1")
    1034             : 
    1035             : (defun transform-make-coding-system-args (name type &optional doc-string props)
    1036             :   "For internal use only.
    1037             : Transform XEmacs style args for `make-coding-system' to Emacs style.
    1038             : Value is a list of transformed arguments."
    1039           0 :   (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?")))
    1040           0 :         (eol-type (plist-get props 'eol-type))
    1041             :         properties tmp)
    1042           0 :     (cond
    1043           0 :      ((eq eol-type 'lf) (setq eol-type 'unix))
    1044           0 :      ((eq eol-type 'crlf) (setq eol-type 'dos))
    1045           0 :      ((eq eol-type 'cr) (setq eol-type 'mac)))
    1046           0 :     (if (setq tmp (plist-get props 'post-read-conversion))
    1047           0 :         (setq properties (plist-put properties 'post-read-conversion tmp)))
    1048           0 :     (if (setq tmp (plist-get props 'pre-write-conversion))
    1049           0 :         (setq properties (plist-put properties 'pre-write-conversion tmp)))
    1050           0 :     (cond
    1051           0 :      ((eq type 'shift-jis)
    1052           0 :       `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type))
    1053           0 :      ((eq type 'iso2022) ; This is not perfect.
    1054           0 :       (if (plist-get props 'escape-quoted)
    1055           0 :           (error "escape-quoted is not supported: %S"
    1056           0 :                  `(,name ,type ,doc-string ,props)))
    1057           0 :       (let ((g0 (plist-get props 'charset-g0))
    1058           0 :             (g1 (plist-get props 'charset-g1))
    1059           0 :             (g2 (plist-get props 'charset-g2))
    1060           0 :             (g3 (plist-get props 'charset-g3))
    1061             :             (use-roman
    1062           0 :              (and
    1063           0 :               (eq (cadr (assoc 'latin-jisx0201
    1064           0 :                                (plist-get props 'input-charset-conversion)))
    1065           0 :                   'ascii)
    1066           0 :               (eq (cadr (assoc 'ascii
    1067           0 :                                (plist-get props 'output-charset-conversion)))
    1068           0 :                   'latin-jisx0201)))
    1069             :             (use-oldjis
    1070           0 :              (and
    1071           0 :               (eq (cadr (assoc 'japanese-jisx0208-1978
    1072           0 :                                (plist-get props 'input-charset-conversion)))
    1073           0 :                   'japanese-jisx0208)
    1074           0 :               (eq (cadr (assoc 'japanese-jisx0208
    1075           0 :                                (plist-get props 'output-charset-conversion)))
    1076           0 :                   'japanese-jisx0208-1978))))
    1077           0 :         (if (charsetp g0)
    1078           0 :             (if (plist-get props 'force-g0-on-output)
    1079           0 :                 (setq g0 `(nil ,g0))
    1080           0 :               (setq g0 `(,g0 t))))
    1081           0 :         (if (charsetp g1)
    1082           0 :             (if (plist-get props 'force-g1-on-output)
    1083           0 :                 (setq g1 `(nil ,g1))
    1084           0 :               (setq g1 `(,g1 t))))
    1085           0 :         (if (charsetp g2)
    1086           0 :             (if (plist-get props 'force-g2-on-output)
    1087           0 :                 (setq g2 `(nil ,g2))
    1088           0 :               (setq g2 `(,g2 t))))
    1089           0 :         (if (charsetp g3)
    1090           0 :             (if (plist-get props 'force-g3-on-output)
    1091           0 :                 (setq g3 `(nil ,g3))
    1092           0 :               (setq g3 `(,g3 t))))
    1093           0 :         `(,name 2 ,mnemonic ,doc-string
    1094           0 :           (,g0 ,g1 ,g2 ,g3
    1095           0 :            ,(plist-get props 'short)
    1096           0 :            ,(not (plist-get props 'no-ascii-eol))
    1097           0 :            ,(not (plist-get props 'no-ascii-cntl))
    1098           0 :            ,(plist-get props 'seven)
    1099             :            t
    1100           0 :            ,(not (plist-get props 'lock-shift))
    1101           0 :            ,use-roman
    1102           0 :            ,use-oldjis
    1103           0 :            ,(plist-get props 'no-iso6429)
    1104             :            nil nil nil nil)
    1105           0 :         ,properties ,eol-type)))
    1106           0 :      ((eq type 'big5)
    1107           0 :       `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type))
    1108           0 :      ((eq type 'ccl)
    1109           0 :       `(,name 4 ,mnemonic ,doc-string
    1110           0 :               (,(plist-get props 'decode) . ,(plist-get props 'encode))
    1111           0 :               ,properties ,eol-type))
    1112             :      (t
    1113           0 :       (error "unsupported XEmacs style make-coding-style arguments: %S"
    1114           0 :              `(,name ,type ,doc-string ,props))))))
    1115             : 
    1116             : (defun make-coding-system (coding-system type mnemonic doc-string
    1117             :                                          &optional
    1118             :                                          flags
    1119             :                                          properties
    1120             :                                          eol-type)
    1121             :   "Define a new coding system CODING-SYSTEM (symbol).
    1122             : This function is provided for backward compatibility."
    1123             :   (declare (obsolete define-coding-system "23.1"))
    1124             :   ;; For compatibility with XEmacs, we check the type of TYPE.  If it
    1125             :   ;; is a symbol, perhaps, this function is called with XEmacs-style
    1126             :   ;; arguments.  Here, try to transform that kind of arguments to
    1127             :   ;; Emacs style.
    1128           0 :   (if (symbolp type)
    1129           0 :       (let ((args (transform-make-coding-system-args coding-system type
    1130           0 :                                                      mnemonic doc-string)))
    1131           0 :         (setq coding-system (car args)
    1132           0 :               type (nth 1 args)
    1133           0 :               mnemonic (nth 2 args)
    1134           0 :               doc-string (nth 3 args)
    1135           0 :               flags (nth 4 args)
    1136           0 :               properties (nth 5 args)
    1137           0 :               eol-type (nth 6 args))))
    1138             : 
    1139           0 :   (setq type
    1140           0 :         (cond ((eq type 0) 'emacs-mule)
    1141           0 :               ((eq type 1) 'shift-jis)
    1142           0 :               ((eq type 2) 'iso2022)
    1143           0 :               ((eq type 3) 'big5)
    1144           0 :               ((eq type 4) 'ccl)
    1145           0 :               ((eq type 5) 'raw-text)
    1146             :               (t
    1147           0 :                (error "Invalid coding system type: %s" type))))
    1148             : 
    1149           0 :   (setq properties
    1150           0 :         (let ((plist nil) key)
    1151           0 :           (dolist (elt properties)
    1152           0 :             (setq key (car elt))
    1153           0 :             (cond ((eq key 'post-read-conversion)
    1154           0 :                    (setq key :post-read-conversion))
    1155           0 :                   ((eq key 'pre-write-conversion)
    1156           0 :                    (setq key :pre-write-conversion))
    1157           0 :                   ((eq key 'translation-table-for-decode)
    1158           0 :                    (setq key :decode-translation-table))
    1159           0 :                   ((eq key 'translation-table-for-encode)
    1160           0 :                    (setq key :encode-translation-table))
    1161           0 :                   ((eq key 'safe-charsets)
    1162           0 :                    (setq key :charset-list))
    1163           0 :                   ((eq key 'mime-charset)
    1164           0 :                    (setq key :mime-charset))
    1165           0 :                   ((eq key 'valid-codes)
    1166           0 :                    (setq key :valids)))
    1167           0 :             (setq plist (plist-put plist key (cdr elt))))
    1168           0 :           plist))
    1169           0 :   (setq properties (plist-put properties :mnemonic mnemonic))
    1170           0 :   (plist-put properties :coding-type type)
    1171           0 :   (cond ((eq eol-type 0) (setq eol-type 'unix))
    1172           0 :         ((eq eol-type 1) (setq eol-type 'dos))
    1173           0 :         ((eq eol-type 2) (setq eol-type 'mac))
    1174           0 :         ((vectorp eol-type) (setq eol-type nil)))
    1175           0 :   (plist-put properties :eol-type eol-type)
    1176             : 
    1177           0 :   (cond
    1178           0 :    ((eq type 'iso2022)
    1179           0 :     (plist-put properties :flags
    1180           0 :                (list (and (or (consp (nth 0 flags))
    1181           0 :                               (consp (nth 1 flags))
    1182           0 :                               (consp (nth 2 flags))
    1183           0 :                               (consp (nth 3 flags))) 'designation)
    1184           0 :                      (or (nth 4 flags) 'long-form)
    1185           0 :                      (and (nth 5 flags) 'ascii-at-eol)
    1186           0 :                      (and (nth 6 flags) 'ascii-at-cntl)
    1187           0 :                      (and (nth 7 flags) '7-bit)
    1188           0 :                      (and (nth 8 flags) 'locking-shift)
    1189           0 :                      (and (nth 9 flags) 'single-shift)
    1190           0 :                      (and (nth 10 flags) 'use-roman)
    1191           0 :                      (and (nth 11 flags) 'use-oldjis)
    1192           0 :                      (or (nth 12 flags) 'direction)
    1193           0 :                      (and (nth 13 flags) 'init-at-bol)
    1194           0 :                      (and (nth 14 flags) 'designate-at-bol)
    1195           0 :                      (and (nth 15 flags) 'safe)
    1196           0 :                      (and (nth 16 flags) 'latin-extra)))
    1197           0 :     (plist-put properties :designation
    1198           0 :                (let ((vec (make-vector 4 nil)))
    1199           0 :                  (dotimes (i 4)
    1200           0 :                    (let ((spec (nth i flags)))
    1201           0 :                      (if (eq spec t)
    1202           0 :                          (aset vec i '(94 96))
    1203           0 :                      (if (consp spec)
    1204           0 :                          (progn
    1205           0 :                            (if (memq t spec)
    1206           0 :                                (setq spec (append (delq t spec) '(94 96))))
    1207           0 :                            (aset vec i spec))))))
    1208           0 :                  vec)))
    1209             : 
    1210           0 :    ((eq type 'ccl)
    1211           0 :     (plist-put properties :ccl-decoder (car flags))
    1212           0 :     (plist-put properties :ccl-encoder (cdr flags))))
    1213             : 
    1214           0 :   (apply 'define-coding-system coding-system doc-string properties))
    1215             : 
    1216             : (defun merge-coding-systems (first second)
    1217             :   "Fill in any unspecified aspects of coding system FIRST from SECOND.
    1218             : Return the resulting coding system."
    1219           0 :   (let ((base (coding-system-base second))
    1220           0 :         (eol (coding-system-eol-type second)))
    1221             :     ;; If FIRST doesn't specify text conversion, merge with that of SECOND.
    1222           0 :     (if (eq (coding-system-base first) 'undecided)
    1223           0 :         (setq first (coding-system-change-text-conversion first base)))
    1224             :     ;; If FIRST doesn't specify eol conversion, merge with that of SECOND.
    1225           0 :     (if (and (vectorp (coding-system-eol-type first))
    1226           0 :              (numberp eol) (>= eol 0) (<= eol 2))
    1227           0 :         (setq first (coding-system-change-eol-conversion
    1228           0 :                      first eol)))
    1229           0 :     first))
    1230             : 
    1231             : (defun autoload-coding-system (symbol form)
    1232             :   "Define SYMBOL as a coding-system that is defined on demand.
    1233             : 
    1234             : FORM is a form to evaluate to define the coding-system."
    1235           0 :   (put symbol 'coding-system-define-form form)
    1236           0 :   (setq coding-system-alist (cons (list (symbol-name symbol))
    1237           0 :                                   coding-system-alist))
    1238           0 :   (dolist (elt '("-unix" "-dos" "-mac"))
    1239           0 :     (let ((name (concat (symbol-name symbol) elt)))
    1240           0 :       (put (intern name) 'coding-system-define-form form)
    1241           0 :       (setq coding-system-alist (cons (list name) coding-system-alist)))))
    1242             : 
    1243             : ;; This variable is set in these two cases:
    1244             : ;;   (1) A file is read by a coding system specified explicitly.
    1245             : ;;       `after-insert-file-set-coding' sets the car of this value to
    1246             : ;;       `coding-system-for-read', and sets the cdr to nil.
    1247             : ;;   (2) `set-buffer-file-coding-system' is called.
    1248             : ;;       The cdr of this value is set to the specified coding system.
    1249             : ;; This variable is used for decoding in `revert-buffer' and encoding
    1250             : ;; in `select-safe-coding-system'.
    1251             : ;;
    1252             : ;; When saving a buffer, if `buffer-file-coding-system-explicit' is
    1253             : ;; already non-nil, `basic-save-buffer-1' sets its CAR to the value of
    1254             : ;; `last-coding-system-used'.  (It used to set it unconditionally, but
    1255             : ;; that seems unnecessary; see Bug#4533.)
    1256             : 
    1257             : (defvar buffer-file-coding-system-explicit nil
    1258             :   "The file coding system explicitly specified for the current buffer.
    1259             : The value is a cons of coding systems for reading (decoding) and
    1260             : writing (encoding).
    1261             : Internal use only.")
    1262             : (make-variable-buffer-local 'buffer-file-coding-system-explicit)
    1263             : (put 'buffer-file-coding-system-explicit 'permanent-local t)
    1264             : 
    1265             : (defun read-buffer-file-coding-system ()
    1266           0 :   (let* ((bcss (find-coding-systems-region (point-min) (point-max)))
    1267             :          (css-table
    1268           0 :           (unless (equal bcss '(undecided))
    1269           0 :             (append '("dos" "unix" "mac")
    1270           0 :                     (delq nil (mapcar (lambda (cs)
    1271           0 :                                         (if (memq (coding-system-base cs) bcss)
    1272           0 :                                             (symbol-name cs)))
    1273           0 :                                       coding-system-list)))))
    1274             :          (combined-table
    1275           0 :           (if css-table
    1276           0 :               (completion-table-in-turn css-table coding-system-alist)
    1277           0 :             coding-system-alist))
    1278             :          (auto-cs
    1279           0 :           (unless find-file-literally
    1280           0 :             (save-excursion
    1281           0 :               (save-restriction
    1282           0 :                 (widen)
    1283           0 :                 (goto-char (point-min))
    1284           0 :                 (funcall set-auto-coding-function
    1285           0 :                          (or buffer-file-name "") (buffer-size))))))
    1286             :          (preferred
    1287           0 :           (let ((bfcs (default-value 'buffer-file-coding-system)))
    1288           0 :             (cons (and (or (equal bcss '(undecided))
    1289           0 :                            (memq (coding-system-base bfcs) bcss))
    1290           0 :                        bfcs)
    1291           0 :                   (mapcar (lambda (cs)
    1292           0 :                             (and (coding-system-p cs)
    1293           0 :                                  (coding-system-get cs :mime-charset)
    1294           0 :                                  (or (equal bcss '(undecided))
    1295           0 :                                      (memq (coding-system-base cs) bcss))
    1296           0 :                                  cs))
    1297           0 :                           (coding-system-priority-list)))))
    1298             :          (default
    1299           0 :            (let ((current (coding-system-base buffer-file-coding-system)))
    1300             :              ;; Generally use as a default the first preferred coding-system
    1301             :              ;; different from the current coding-system, except for
    1302             :              ;; the case of auto-cs since choosing anything else is asking
    1303             :              ;; for trouble (would lead to using a different coding
    1304             :              ;; system than specified in the coding tag).
    1305           0 :              (or auto-cs
    1306           0 :                  (car (delq nil
    1307           0 :                             (mapcar (lambda (cs)
    1308           0 :                                       (if (eq current (coding-system-base cs))
    1309             :                                           nil
    1310           0 :                                         cs))
    1311           0 :                                     preferred))))))
    1312             :          (completion-ignore-case t)
    1313             :          (completion-pcm--delim-wild-regex ; Let "u8" complete to "utf-8".
    1314           0 :           (concat "\\(?:" completion-pcm--delim-wild-regex
    1315           0 :                   "\\|\\([[:alpha:]]\\)[[:digit:]]\\)"))
    1316           0 :          (cs (completing-read
    1317           0 :               (format "Coding system for saving file (default %s): " default)
    1318           0 :               combined-table
    1319             :               nil t nil 'coding-system-history
    1320           0 :               (if default (symbol-name default)))))
    1321           0 :     (unless (zerop (length cs)) (intern cs))))
    1322             : 
    1323             : (defun set-buffer-file-coding-system (coding-system &optional force nomodify)
    1324             :   "Set the file coding-system of the current buffer to CODING-SYSTEM.
    1325             : This means that when you save the buffer, it will be converted
    1326             : according to CODING-SYSTEM.  For a list of possible values of
    1327             : CODING-SYSTEM, use \\[list-coding-systems].
    1328             : 
    1329             : If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves
    1330             : the end-of-line conversion unspecified, FORCE controls what to do.
    1331             : If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's
    1332             : previous `buffer-file-coding-system' value (if it is specified there).
    1333             : Otherwise, leave it unspecified.
    1334             : 
    1335             : This marks the buffer modified so that the succeeding \\[save-buffer]
    1336             : surely saves the buffer with CODING-SYSTEM.  From a program, if you
    1337             : don't want to mark the buffer modified, specify t for NOMODIFY.
    1338             : If you know exactly what coding system you want to use,
    1339             : just set the variable `buffer-file-coding-system' directly."
    1340             :   (interactive
    1341           0 :    (list (read-buffer-file-coding-system)
    1342           0 :          current-prefix-arg))
    1343           0 :   (check-coding-system coding-system)
    1344           0 :   (if (and coding-system buffer-file-coding-system (null force))
    1345           0 :       (setq coding-system
    1346           0 :             (merge-coding-systems coding-system buffer-file-coding-system)))
    1347           0 :   (when (and (called-interactively-p 'interactive)
    1348           0 :              (not (memq 'emacs (coding-system-get coding-system
    1349           0 :                                                   :charset-list))))
    1350             :     ;; Check whether save would succeed, and jump to the offending char(s)
    1351             :     ;; if not.
    1352           0 :     (let ((css (find-coding-systems-region (point-min) (point-max))))
    1353           0 :       (unless (or (eq (car css) 'undecided)
    1354           0 :                   (memq (coding-system-base coding-system) css))
    1355           0 :         (setq coding-system (select-safe-coding-system-interactively
    1356           0 :                              (point-min) (point-max) css
    1357           0 :                              (list coding-system))))))
    1358           0 :   (setq buffer-file-coding-system coding-system)
    1359           0 :   (if buffer-file-coding-system-explicit
    1360           0 :       (setcdr buffer-file-coding-system-explicit coding-system)
    1361           0 :     (setq buffer-file-coding-system-explicit (cons nil coding-system)))
    1362           0 :   (unless nomodify
    1363           0 :     (set-buffer-modified-p t))
    1364           0 :   (force-mode-line-update))
    1365             : 
    1366             : (defun revert-buffer-with-coding-system (coding-system &optional force)
    1367             :   "Visit the current buffer's file again using coding system CODING-SYSTEM.
    1368             : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
    1369             : 
    1370             : If CODING-SYSTEM leaves the text conversion unspecified, or if it leaves
    1371             : the end-of-line conversion unspecified, FORCE controls what to do.
    1372             : If FORCE is nil, get the unspecified aspect (or aspects) from the buffer's
    1373             : previous `buffer-file-coding-system' value (if it is specified there).
    1374             : Otherwise, determine it from the file contents as usual for visiting a file."
    1375             :   (interactive "zCoding system for visited file (default nil): \nP")
    1376           0 :   (check-coding-system coding-system)
    1377           0 :   (if (and coding-system buffer-file-coding-system (null force))
    1378           0 :       (setq coding-system
    1379           0 :             (merge-coding-systems coding-system buffer-file-coding-system)))
    1380           0 :   (let ((coding-system-for-read coding-system))
    1381           0 :     (revert-buffer)))
    1382             : 
    1383             : (defun set-file-name-coding-system (coding-system)
    1384             :   "Set coding system for decoding and encoding file names to CODING-SYSTEM.
    1385             : It actually just set the variable `file-name-coding-system' (which see)
    1386             : to CODING-SYSTEM."
    1387             :   (interactive "zCoding system for file names (default nil): ")
    1388           0 :   (check-coding-system coding-system)
    1389           0 :   (if (and coding-system
    1390           0 :            (not (coding-system-get coding-system :ascii-compatible-p))
    1391           0 :            (not (coding-system-get coding-system :suitable-for-file-name)))
    1392           0 :       (error "%s is not suitable for file names" coding-system))
    1393           0 :   (setq file-name-coding-system coding-system))
    1394             : 
    1395             : (defvar default-terminal-coding-system nil
    1396             :   "Default value for the terminal coding system.
    1397             : This is normally set according to the selected language environment.
    1398             : See also the command `set-terminal-coding-system'.")
    1399             : 
    1400             : (defun set-terminal-coding-system (coding-system &optional terminal)
    1401             :   "Set coding system of terminal output to CODING-SYSTEM.
    1402             : All text output to TERMINAL will be encoded
    1403             : with the specified coding system.
    1404             : 
    1405             : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
    1406             : The default is determined by the selected language environment
    1407             : or by the previous use of this command.
    1408             : 
    1409             : TERMINAL may be a terminal object, a frame, or nil for the
    1410             : selected frame's terminal.  The setting has no effect on
    1411             : graphical terminals."
    1412             :   (interactive
    1413           0 :    (list (let ((default (if (and (not (terminal-coding-system))
    1414           0 :                                  default-terminal-coding-system)
    1415           0 :                             default-terminal-coding-system)))
    1416           0 :            (read-coding-system
    1417           0 :             (format "Coding system for terminal display (default %s): "
    1418           0 :                     default)
    1419           0 :             default))))
    1420           0 :   (if (and (not coding-system)
    1421           0 :            (not (terminal-coding-system)))
    1422           0 :       (setq coding-system default-terminal-coding-system))
    1423           0 :   (if coding-system
    1424           0 :       (setq default-terminal-coding-system coding-system))
    1425           0 :   (set-terminal-coding-system-internal coding-system terminal)
    1426           0 :   (redraw-frame))
    1427             : 
    1428             : (defvar default-keyboard-coding-system nil
    1429             :   "Default value of the keyboard coding system.
    1430             : This is normally set according to the selected language environment.
    1431             : See also the command `set-keyboard-coding-system'.")
    1432             : 
    1433             : (defun set-keyboard-coding-system (coding-system &optional terminal)
    1434             :   "Set coding system for keyboard input on TERMINAL to CODING-SYSTEM.
    1435             : 
    1436             : For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems].
    1437             : The default is determined by the selected language environment
    1438             : or by the previous use of this command.
    1439             : 
    1440             : If CODING-SYSTEM is nil or the coding-type of CODING-SYSTEM is
    1441             : `raw-text', the decoding of keyboard input is disabled.
    1442             : 
    1443             : TERMINAL may be a terminal object, a frame, or nil for the
    1444             : selected frame's terminal.  The setting has no effect on
    1445             : graphical terminals."
    1446             :   (interactive
    1447           0 :    (list (let* ((coding (keyboard-coding-system nil))
    1448           0 :                 (default (if (eq (coding-system-type coding) 'raw-text)
    1449           0 :                              default-keyboard-coding-system)))
    1450           0 :            (read-coding-system
    1451           0 :             (format "Coding system for keyboard input (default %s): "
    1452           0 :                     default)
    1453           0 :             default))))
    1454           0 :   (let ((coding-type (coding-system-type coding-system))
    1455             :         (saved-meta-mode
    1456           0 :          (terminal-parameter terminal 'keyboard-coding-saved-meta-mode)))
    1457           0 :     (let (accept-8-bit)
    1458           0 :       (if (not (or (coding-system-get coding-system :suitable-for-keyboard)
    1459           0 :                    (coding-system-get coding-system :ascii-compatible-p)))
    1460           0 :           (error "Unsuitable coding system for keyboard: %s" coding-system))
    1461           0 :       (cond ((memq coding-type '(raw-text charset utf-8 shift-jis big5 ccl))
    1462           0 :              (setq accept-8-bit t))
    1463           0 :             ((eq coding-type 'iso-2022)
    1464           0 :              (let ((flags (coding-system-get coding-system :flags)))
    1465           0 :                (or (memq '7-bit flags)
    1466           0 :                    (setq accept-8-bit t))))
    1467             :             (t
    1468           0 :              (error "Unsupported coding system for keyboard: %s"
    1469           0 :                     coding-system)))
    1470           0 :       (if accept-8-bit
    1471           0 :           (progn
    1472           0 :             (or saved-meta-mode
    1473           0 :                 (set-terminal-parameter terminal
    1474             :                                         'keyboard-coding-saved-meta-mode
    1475           0 :                                         (cons (nth 2 (current-input-mode))
    1476           0 :                                               nil)))
    1477           0 :             (set-input-meta-mode 8 terminal))
    1478           0 :         (when saved-meta-mode
    1479           0 :           (set-input-meta-mode (car saved-meta-mode) terminal)
    1480           0 :           (set-terminal-parameter terminal
    1481             :                                   'keyboard-coding-saved-meta-mode
    1482           0 :                                   nil)))
    1483             :       ;; Avoid end-of-line conversion.
    1484           0 :       (setq coding-system
    1485           0 :             (coding-system-change-eol-conversion coding-system 'unix))))
    1486           0 :   (set-keyboard-coding-system-internal coding-system terminal)
    1487           0 :   (setq keyboard-coding-system coding-system))
    1488             : 
    1489             : (defcustom keyboard-coding-system nil
    1490             :   "Specify coding system for keyboard input.
    1491             : If you set this on a terminal which can't distinguish Meta keys from
    1492             : 8-bit characters, you will have to use ESC to type Meta characters.
    1493             : See Info node `Terminal Coding' and Info node `Unibyte Mode'.
    1494             : 
    1495             : On non-windowing terminals, this is set from the locale by default.
    1496             : 
    1497             : Setting this variable directly does not take effect;
    1498             : use either \\[customize] or \\[set-keyboard-coding-system]."
    1499             :   :type '(coding-system :tag "Coding system")
    1500             :   :link '(info-link "(emacs)Terminal Coding")
    1501             :   :link '(info-link "(emacs)Unibyte Mode")
    1502             :   :set (lambda (_symbol value)
    1503             :          ;; Don't load encoded-kb unnecessarily.
    1504             :          (if (or value (boundp 'encoded-kbd-setup-display))
    1505             :              (set-keyboard-coding-system value)
    1506             :            (set-default 'keyboard-coding-system nil))) ; must initialize
    1507             :   :version "22.1"
    1508             :   :group 'keyboard
    1509             :   :group 'mule)
    1510             : 
    1511             : (defun set-buffer-process-coding-system (decoding encoding)
    1512             :   "Set coding systems for the process associated with the current buffer.
    1513             : DECODING is the coding system to be used to decode input from the process,
    1514             : ENCODING is the coding system to be used to encode output to the process.
    1515             : 
    1516             : For a list of possible coding systems, use \\[list-coding-systems]."
    1517             :   (interactive
    1518             :    "zCoding-system for output from the process: \nzCoding-system for input to the process: ")
    1519          71 :   (let ((proc (get-buffer-process (current-buffer))))
    1520          71 :     (if (null proc)
    1521           0 :         (error "No process")
    1522          71 :       (check-coding-system decoding)
    1523          71 :       (check-coding-system encoding)
    1524          71 :       (set-process-coding-system proc decoding encoding)))
    1525          71 :   (force-mode-line-update))
    1526             : 
    1527             : (defalias 'set-clipboard-coding-system 'set-selection-coding-system)
    1528             : 
    1529             : (defun set-selection-coding-system (coding-system)
    1530             :   "Make CODING-SYSTEM used for communicating with other X clients.
    1531             : When sending or receiving text via cut_buffer, selection, and clipboard,
    1532             : the text is encoded or decoded by CODING-SYSTEM."
    1533             :   (interactive "zCoding system for X selection: ")
    1534           1 :   (check-coding-system coding-system)
    1535           1 :   (setq selection-coding-system coding-system))
    1536             : 
    1537             : ;; Coding system lastly specified by the command
    1538             : ;; set-next-selection-coding-system.
    1539             : (defvar last-next-selection-coding-system nil)
    1540             : 
    1541             : (defun set-next-selection-coding-system (coding-system)
    1542             :   "Use CODING-SYSTEM for next communication with other window system clients.
    1543             : This setting is effective for the next communication only."
    1544             :   (interactive
    1545           0 :    (list (read-coding-system
    1546           0 :           (if last-next-selection-coding-system
    1547           0 :               (format "Coding system for the next selection (default %S): "
    1548           0 :                       last-next-selection-coding-system)
    1549           0 :             "Coding system for the next selection: ")
    1550           0 :           last-next-selection-coding-system)))
    1551           0 :   (if coding-system
    1552           0 :       (setq last-next-selection-coding-system coding-system)
    1553           0 :     (setq coding-system last-next-selection-coding-system))
    1554           0 :   (check-coding-system coding-system)
    1555             : 
    1556           0 :   (setq next-selection-coding-system coding-system))
    1557             : 
    1558             : (defun set-coding-priority (arg)
    1559             :   "Set priority of coding categories according to ARG.
    1560             : ARG is a list of coding categories ordered by priority.
    1561             : 
    1562             : This function is provided for backward compatibility."
    1563             :   (declare (obsolete set-coding-system-priority "23.1"))
    1564           0 :   (apply 'set-coding-system-priority
    1565           0 :          (mapcar #'(lambda (x) (symbol-value x)) arg)))
    1566             : 
    1567             : ;;; X selections
    1568             : 
    1569             : (defvar ctext-non-standard-encodings-alist
    1570             :   (mapcar 'purecopy
    1571             :   '(("big5-0" big5 2 big5)
    1572             :     ("ISO8859-14" iso-8859-14 1 latin-iso8859-14)
    1573             :     ("ISO8859-15" iso-8859-15 1 latin-iso8859-15)
    1574             :     ("gbk-0" gbk 2 chinese-gbk)
    1575             :     ("koi8-r" koi8-r 1 koi8-r)
    1576             :     ("microsoft-cp1251" windows-1251 1 windows-1251)))
    1577             :   "Alist of non-standard encoding names vs the corresponding usages in CTEXT.
    1578             : 
    1579             : It controls how extended segments of a compound text are handled
    1580             : by the coding system `compound-text-with-extensions'.
    1581             : 
    1582             : Each element has the form (ENCODING-NAME CODING-SYSTEM N-OCTET CHARSET).
    1583             : 
    1584             : ENCODING-NAME is an encoding name of an \"extended segment\".
    1585             : 
    1586             : CODING-SYSTEM is the coding-system to encode (or decode) the
    1587             : characters into (or from) the extended segment.
    1588             : 
    1589             : N-OCTET is the number of octets (bytes) that encodes a character
    1590             : in the segment.  It can be 0 (meaning the number of octets per
    1591             : character is variable), 1, 2, 3, or 4.
    1592             : 
    1593             : CHARSET is a character set containing characters that are encoded
    1594             : in the segment.  It can be a list of character sets.
    1595             : 
    1596             : On decoding CTEXT, all encoding names listed here are recognized.
    1597             : 
    1598             : On encoding CTEXT, encoding names in the variable
    1599             : `ctext-non-standard-encodings' (which see) and in the information
    1600             : listed for the current language environment under the key
    1601             : `ctext-non-standard-encodings' are used.")
    1602             : 
    1603             : (defvar ctext-non-standard-encodings nil
    1604             :   "List of non-standard encoding names used in extended segments of CTEXT.
    1605             : Each element must be one of the names listed in the variable
    1606             : `ctext-non-standard-encodings-alist' (which see).")
    1607             : 
    1608             : (defvar ctext-non-standard-encodings-regexp
    1609             :   (purecopy
    1610             :   (string-to-multibyte
    1611             :    (concat
    1612             :     ;; For non-standard encodings.
    1613             :     "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)"
    1614             :     "\\|"
    1615             :     ;; For UTF-8 encoding.
    1616             :     "\\(\e%G[^\e]*\e%@\\)"))))
    1617             : 
    1618             : ;; Functions to support "Non-Standard Character Set Encodings" defined
    1619             : ;; by the COMPOUND-TEXT spec.  They also support "The UTF-8 encoding"
    1620             : ;; described in the section 7 of the documentation of COMPOUND-TEXT
    1621             : ;; distributed with XFree86.
    1622             : 
    1623             : (defun ctext-post-read-conversion (len)
    1624             :   "Decode LEN characters encoded as Compound Text with Extended Segments."
    1625             :   ;; We don't need the following because it is expected that this
    1626             :   ;; function is mainly used for decoding X selection which is not
    1627             :   ;; that big data.
    1628             :   ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions
    1629           0 :   (save-match-data
    1630           0 :     (save-restriction
    1631           0 :       (narrow-to-region (point) (+ (point) len))
    1632           0 :       (let ((case-fold-search nil)
    1633             :             last-coding-system-used
    1634             :             pos bytes)
    1635           0 :         (decode-coding-region (point-min) (point-max) 'ctext)
    1636           0 :         (while (re-search-forward ctext-non-standard-encodings-regexp
    1637           0 :                                   nil 'move)
    1638           0 :           (setq pos (match-beginning 0))
    1639           0 :           (if (match-beginning 1)
    1640             :               ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
    1641           0 :               (let* ((M (multibyte-char-to-unibyte (char-after (+ pos 4))))
    1642           0 :                      (L (multibyte-char-to-unibyte (char-after (+ pos 5))))
    1643           0 :                      (encoding (match-string 2))
    1644           0 :                      (encoding-info (assoc-string
    1645           0 :                                      encoding
    1646           0 :                                      ctext-non-standard-encodings-alist t))
    1647           0 :                      (coding (if encoding-info
    1648           0 :                                  (nth 1 encoding-info)
    1649           0 :                                (setq encoding (intern (downcase encoding)))
    1650           0 :                                (and (coding-system-p encoding)
    1651           0 :                                     encoding))))
    1652           0 :                 (setq bytes (- (+ (* (- M 128) 128) (- L 128))
    1653           0 :                                (- (point) (+ pos 6))))
    1654           0 :                 (when coding
    1655           0 :                   (delete-region pos (point))
    1656           0 :                   (forward-char bytes)
    1657           0 :                   (decode-coding-region (- (point) bytes) (point) coding)))
    1658             :             ;; ESC % G --UTF-8-BYTES-- ESC % @
    1659           0 :             (delete-char -3)
    1660           0 :             (delete-region pos (+ pos 3))
    1661           0 :             (decode-coding-region pos (point) 'utf-8))))
    1662           0 :       (goto-char (point-min))
    1663           0 :       (- (point-max) (point)))))
    1664             : 
    1665             : (defvar ctext-standard-encodings
    1666             :   '(ascii latin-jisx0201 katakana-jisx0201
    1667             :           latin-iso8859-1 latin-iso8859-2 latin-iso8859-3 latin-iso8859-4
    1668             :           greek-iso8859-7 arabic-iso8859-6 hebrew-iso8859-8 cyrillic-iso8859-5
    1669             :           latin-iso8859-9
    1670             :           chinese-gb2312 japanese-jisx0208 korean-ksc5601)
    1671             :   "List of approved standard encodings (i.e. charsets) of X's Compound Text.
    1672             : Coding-system `compound-text-with-extensions' encodes a character
    1673             : belonging to any of those charsets using the normal ISO2022
    1674             : designation sequence unless the current language environment or
    1675             : the variable `ctext-non-standard-encodings' decide to use an extended
    1676             : segment of CTEXT for that character.  See also the documentation
    1677             : of `ctext-non-standard-encodings-alist'.")
    1678             : 
    1679             : ;; Return an alist of CHARSET vs CTEXT-USAGE-INFO generated from
    1680             : ;; `ctext-non-standard-encodings' and a list specified by the key
    1681             : ;; `ctext-non-standard-encodings' for the current language
    1682             : ;; environment.  CTEXT-USAGE-INFO is one of the element of
    1683             : ;; `ctext-non-standard-encodings-alist' or nil.  In the former case, a
    1684             : ;; character in CHARSET is encoded using extended segment.  In the
    1685             : ;; latter case, a character in CHARSET is encoded using normal ISO2022
    1686             : ;; designation sequence.  If a character is not in any of CHARSETs, it
    1687             : ;; is encoded using UTF-8 encoding extension.
    1688             : 
    1689             : (defun ctext-non-standard-encodings-table ()
    1690           0 :   (let* ((table (append ctext-non-standard-encodings
    1691           0 :                         (copy-sequence
    1692           0 :                          (get-language-info current-language-environment
    1693           0 :                                             'ctext-non-standard-encodings))))
    1694           0 :          (tail table)
    1695             :          elt)
    1696           0 :     (while tail
    1697           0 :       (setq elt (car tail))
    1698           0 :       (let* ((slot (assoc elt ctext-non-standard-encodings-alist))
    1699           0 :              (charset (nth 3 slot)))
    1700           0 :         (if (charsetp charset)
    1701           0 :             (setcar tail
    1702           0 :                     (cons (plist-get (charset-plist charset) :base) slot))
    1703           0 :           (setcar tail (cons (car charset) slot))
    1704           0 :           (dolist (cs (cdr charset))
    1705           0 :             (setcdr tail
    1706           0 :                     (cons (cons (plist-get (charset-plist (car cs)) :base) slot)
    1707           0 :                           (cdr tail)))
    1708           0 :             (setq tail (cdr tail))))
    1709           0 :         (setq tail (cdr tail))))
    1710           0 :     table))
    1711             : 
    1712             : (defun ctext-pre-write-conversion (from to)
    1713             :   "Encode characters between FROM and TO as Compound Text w/Extended Segments.
    1714             : 
    1715             : If FROM is a string, generate a new temp buffer, insert the text,
    1716             : and convert it in the temporary buffer.  Otherwise, convert
    1717             : in-place."
    1718           0 :   (save-match-data
    1719             :     ;; Setup a working buffer if necessary.
    1720           0 :     (when (stringp from)
    1721           0 :       (set-buffer (generate-new-buffer " *temp"))
    1722           0 :       (set-buffer-multibyte (multibyte-string-p from))
    1723           0 :       (insert from)
    1724           0 :       (setq from (point-min) to (point-max)))
    1725           0 :     (save-restriction
    1726           0 :       (narrow-to-region from to)
    1727           0 :       (goto-char from)
    1728           0 :       (let ((encoding-table (ctext-non-standard-encodings-table))
    1729           0 :             (charset-list (sort-charsets
    1730           0 :                            (copy-sequence ctext-standard-encodings)))
    1731           0 :             (end-pos (make-marker))
    1732             :             last-coding-system-used
    1733             :             last-pos charset encoding-info)
    1734           0 :         (dolist (elt encoding-table)
    1735           0 :           (push (car elt) charset-list))
    1736           0 :         (setq end-pos (point-marker))
    1737           0 :         (while (re-search-forward "[^\0-\177]+" nil t)
    1738             :           ;; Found a sequence of non-ASCII characters.
    1739           0 :           (set-marker end-pos (match-end 0))
    1740           0 :           (goto-char (match-beginning 0))
    1741           0 :           (setq last-pos (point)
    1742           0 :                 charset (char-charset (following-char) charset-list))
    1743           0 :           (forward-char 1)
    1744           0 :           (while (and (< (point) end-pos)
    1745           0 :                       (eq charset (char-charset (following-char) charset-list)))
    1746           0 :             (forward-char 1))
    1747           0 :           (if charset
    1748           0 :               (if (setq encoding-info (cdr (assq charset encoding-table)))
    1749             :                   ;; Encode this range using an extended segment.
    1750           0 :                   (let ((encoding-name (car encoding-info))
    1751           0 :                         (coding-system (nth 1 encoding-info))
    1752           0 :                         (noctets (nth 2 encoding-info))
    1753             :                         len)
    1754           0 :                     (encode-coding-region last-pos (point) coding-system)
    1755           0 :                     (setq len (+ (length encoding-name) 1
    1756           0 :                                  (- (point) last-pos)))
    1757             :                     ;; According to the spec of CTEXT, it is not
    1758             :                     ;; necessary to produce this extra designation
    1759             :                     ;; sequence, but some buggy application
    1760             :                     ;; (e.g. crxvt-gb) requires it.
    1761           0 :                     (insert "\e(B")
    1762           0 :                     (save-excursion
    1763           0 :                       (goto-char last-pos)
    1764           0 :                       (insert (format "\e%%/%d" noctets))
    1765           0 :                       (insert-byte (+ (/ len 128) 128) 1)
    1766           0 :                       (insert-byte (+ (% len 128) 128) 1)
    1767           0 :                       (insert encoding-name)
    1768           0 :                       (insert 2)))
    1769             :                 ;; Encode this range as characters in CHARSET.
    1770           0 :                 (put-text-property last-pos (point) 'charset charset))
    1771             :             ;; Encode this range using UTF-8 encoding extension.
    1772           0 :             (encode-coding-region last-pos (point) 'mule-utf-8)
    1773           0 :             (save-excursion
    1774           0 :               (goto-char last-pos)
    1775           0 :               (insert "\e%G"))
    1776           0 :             (insert "\e%@")))
    1777           0 :         (goto-char (point-min)))))
    1778             :   ;; Must return nil, as build_annotations_2 expects that.
    1779             :   nil)
    1780             : 
    1781             : ;;; FILE I/O
    1782             : 
    1783             : ;; TODO many elements of this list are also in inhibit-local-variables-regexps.
    1784             : (defcustom auto-coding-alist
    1785             :   ;; .exe and .EXE are added to support archive-mode looking at DOS
    1786             :   ;; self-extracting exe archives.
    1787             :   (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
    1788             :           '(("\\.\\(\
    1789             : arc\\|zip\\|lzh\\|lha\\|zoo\\|[jew]ar\\|xpi\\|rar\\|7z\\|\
    1790             : ARC\\|ZIP\\|LZH\\|LHA\\|ZOO\\|[JEW]AR\\|XPI\\|RAR\\|7Z\\)\\'"
    1791             :      . no-conversion-multibyte)
    1792             :     ("\\.\\(exe\\|EXE\\)\\'" . no-conversion)
    1793             :     ("\\.\\(sx[dmicw]\\|odt\\|tar\\|t[bg]z\\)\\'" . no-conversion)
    1794             :     ("\\.\\(gz\\|Z\\|bz\\|bz2\\|xz\\|gpg\\)\\'" . no-conversion)
    1795             :     ("\\.\\(jpe?g\\|png\\|gif\\|tiff?\\|p[bpgn]m\\)\\'" . no-conversion)
    1796             :     ("\\.pdf\\'" . no-conversion)
    1797             :     ("/#[^/]+#\\'" . utf-8-emacs-unix)))
    1798             :   "Alist of filename patterns vs corresponding coding systems.
    1799             : Each element looks like (REGEXP . CODING-SYSTEM).
    1800             : A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading.
    1801             : 
    1802             : The settings in this alist take priority over `coding:' tags
    1803             : in the file (see the function `set-auto-coding')
    1804             : and the contents of `file-coding-system-alist'."
    1805             :   :version "24.1"                       ; added xz
    1806             :   :group 'files
    1807             :   :group 'mule
    1808             :   :type '(repeat (cons (regexp :tag "File name regexp")
    1809             :                        (symbol :tag "Coding system"))))
    1810             : 
    1811             : (defcustom auto-coding-regexp-alist
    1812             :   (mapcar (lambda (arg) (cons (purecopy (car arg)) (cdr arg)))
    1813             :   '(("\\`BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)
    1814             :     ("\\`\xFE\xFF" . utf-16be-with-signature)
    1815             :     ("\\`\xFF\xFE" . utf-16le-with-signature)
    1816             :     ("\\`\xEF\xBB\xBF" . utf-8-with-signature)
    1817             :     ("\\`;ELC\024\0\0\0" . emacs-mule)))      ; Emacs 20-compiled
    1818             :   "Alist of patterns vs corresponding coding systems.
    1819             : Each element looks like (REGEXP . CODING-SYSTEM).
    1820             : A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
    1821             : 
    1822             : The settings in this alist take priority over `coding:' tags
    1823             : in the file (see the function `set-auto-coding')
    1824             : and the contents of `file-coding-system-alist'."
    1825             :   :group 'files
    1826             :   :group 'mule
    1827             :   :type '(repeat (cons (regexp :tag "Regexp")
    1828             :                        (symbol :tag "Coding system"))))
    1829             : 
    1830             : (defun auto-coding-regexp-alist-lookup (from to)
    1831             :   "Lookup `auto-coding-regexp-alist' for the contents of the current buffer.
    1832             : The value is a coding system is specified for the region FROM and TO,
    1833             : or nil."
    1834         326 :   (save-excursion
    1835         326 :     (goto-char from)
    1836         326 :     (let ((alist auto-coding-regexp-alist)
    1837             :           coding-system)
    1838        1956 :       (while (and alist (not coding-system))
    1839        1630 :         (let ((regexp (car (car alist))))
    1840        1630 :           (if enable-multibyte-characters
    1841        1630 :               (setq regexp (string-to-multibyte regexp)))
    1842        1630 :           (if (re-search-forward regexp to t)
    1843           0 :               (setq coding-system (cdr (car alist)))
    1844        1630 :             (setq alist (cdr alist)))))
    1845         326 :       coding-system)))
    1846             : 
    1847             : ;; See the bottom of this file for built-in auto coding functions.
    1848             : (defcustom auto-coding-functions '(sgml-xml-auto-coding-function
    1849             :                                    sgml-html-meta-auto-coding-function)
    1850             :   "A list of functions which attempt to determine a coding system.
    1851             : 
    1852             : Each function in this list should be written to operate on the
    1853             : current buffer, but should not modify it in any way.  The buffer
    1854             : will contain undecoded text of parts of the file.  Each function
    1855             : should take one argument, SIZE, which says how many characters
    1856             : \(starting from point) it should look at.
    1857             : 
    1858             : If one of these functions succeeds in determining a coding
    1859             : system, it should return that coding system.  Otherwise, it
    1860             : should return nil.
    1861             : 
    1862             : If a file has a `coding:' tag, that takes precedence over these
    1863             : functions, so they won't be called at all."
    1864             :   :group 'files
    1865             :   :group 'mule
    1866             :   :type '(repeat function))
    1867             : 
    1868             : (defvar set-auto-coding-for-load nil
    1869             :   "Non-nil means respect a \"unibyte: t\" entry in file local variables.
    1870             : Emacs binds this variable to t when loading or byte-compiling Emacs Lisp
    1871             : files.")
    1872             : 
    1873             : (defun auto-coding-alist-lookup (filename)
    1874             :   "Return the coding system specified by `auto-coding-alist' for FILENAME."
    1875         326 :   (let ((alist auto-coding-alist)
    1876         326 :         (case-fold-search (file-name-case-insensitive-p filename))
    1877             :         coding-system)
    1878        2608 :     (while (and alist (not coding-system))
    1879        2282 :       (if (string-match (car (car alist)) filename)
    1880           0 :           (setq coding-system (cdr (car alist)))
    1881        2282 :         (setq alist (cdr alist))))
    1882         326 :     coding-system))
    1883             : 
    1884             : (put 'enable-character-translation 'permanent-local t)
    1885             : (put 'enable-character-translation 'safe-local-variable 'booleanp)
    1886             : 
    1887             : (defun find-auto-coding (filename size)
    1888             :   "Find a coding system for a file FILENAME of which SIZE bytes follow point.
    1889             : These bytes should include at least the first 1k of the file
    1890             : and the last 3k of the file, but the middle may be omitted.
    1891             : 
    1892             : The function checks FILENAME against the variable `auto-coding-alist'.
    1893             : If FILENAME doesn't match any entries in the variable, it checks the
    1894             : contents of the current buffer following point against
    1895             : `auto-coding-regexp-alist'.  If no match is found, it checks for a
    1896             : `coding:' tag in the first one or two lines following point.  If no
    1897             : `coding:' tag is found, it checks any local variables list in the last
    1898             : 3K bytes out of the SIZE bytes.  Finally, if none of these methods
    1899             : succeed, it checks to see if any function in `auto-coding-functions'
    1900             : gives a match.
    1901             : 
    1902             : If a coding system is specified, the return value is a cons
    1903             : \(CODING . SOURCE), where CODING is the specified coding system and
    1904             : SOURCE is a symbol `auto-coding-alist', `auto-coding-regexp-alist',
    1905             : `:coding', or `auto-coding-functions' indicating by what CODING is
    1906             : specified.  Note that the validity of CODING is not checked;
    1907             : it's the caller's responsibility to check it.
    1908             : 
    1909             : If nothing is specified, the return value is nil."
    1910         326 :   (or (let ((coding-system (auto-coding-alist-lookup filename)))
    1911         326 :         (if coding-system
    1912         326 :             (cons coding-system 'auto-coding-alist)))
    1913             :       ;; Try using `auto-coding-regexp-alist'.
    1914         326 :       (let ((coding-system (auto-coding-regexp-alist-lookup (point)
    1915         326 :                                                             (+ (point) size))))
    1916         326 :         (if coding-system
    1917         326 :             (cons coding-system 'auto-coding-regexp-alist)))
    1918         326 :       (let* ((case-fold-search t)
    1919         326 :              (head-start (point))
    1920         326 :              (head-end (+ head-start (min size 1024)))
    1921         326 :              (tail-start (+ head-start (max (- size 3072) 0)))
    1922         326 :              (tail-end (+ head-start size))
    1923             :              coding-system head-found tail-found char-trans)
    1924             :         ;; Try a short cut by searching for the string "coding:"
    1925             :         ;; and for "unibyte:" at the head and tail of SIZE bytes.
    1926         326 :         (setq head-found (or (search-forward "coding:" head-end t)
    1927         305 :                              (search-forward "unibyte:" head-end t)
    1928         305 :                              (search-forward "enable-character-translation:"
    1929         326 :                                              head-end t)))
    1930         326 :         (if (and head-found (> head-found tail-start))
    1931             :             ;; Head and tail are overlapped.
    1932          10 :             (setq tail-found head-found)
    1933         316 :           (goto-char tail-start)
    1934         316 :           (setq tail-found (or (search-forward "coding:" tail-end t)
    1935         287 :                                (search-forward "unibyte:" tail-end t)
    1936         287 :                                (search-forward "enable-character-translation:"
    1937         326 :                                                tail-end t))))
    1938             : 
    1939             :         ;; At first check the head.
    1940         326 :         (when head-found
    1941          21 :           (goto-char head-start)
    1942          21 :           (setq head-end (set-auto-mode-1))
    1943          21 :           (setq head-start (point))
    1944          21 :           (when (and head-end (< head-found head-end))
    1945          21 :             (goto-char head-start)
    1946          21 :             (when (and set-auto-coding-for-load
    1947           0 :                        (re-search-forward
    1948             :                         "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
    1949          21 :                         head-end t))
    1950           0 :               (display-warning 'mule
    1951           0 :                                (format "\"unibyte: t\" (in %s) is obsolete; \
    1952             : use \"coding: 'raw-text\" instead."
    1953           0 :                                        (file-relative-name filename))
    1954           0 :                                :warning)
    1955          21 :               (setq coding-system 'raw-text))
    1956          21 :             (when (and (not coding-system)
    1957          21 :                        (re-search-forward
    1958             :                         "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
    1959          21 :                         head-end t))
    1960          21 :               (setq coding-system (intern (match-string 2))))
    1961          21 :             (when (re-search-forward
    1962             :                    "\\(.*;\\)?[ \t]*enable-character-translation:[ \t]*\\([^ ;]+\\)"
    1963          21 :                    head-end t)
    1964         326 :               (setq char-trans (match-string 2)))))
    1965             : 
    1966             :         ;; If no coding: tag in the head, check the tail.
    1967             :         ;; Here we must pay attention to the case that the end-of-line
    1968             :         ;; is just "\r" and we can't use "^" nor "$" in regexp.
    1969         326 :         (when (and tail-found (or (not coding-system) (not char-trans)))
    1970          39 :           (goto-char tail-start)
    1971          39 :           (re-search-forward "[\r\n]\^L" tail-end t)
    1972          39 :           (if (re-search-forward
    1973             :                "[\r\n]\\([^\r\n]*\\)[ \t]*Local Variables:[ \t]*\\([^\r\n]*\\)[\r\n]"
    1974          39 :                tail-end t)
    1975             :               ;; The prefix is what comes before "local variables:" in its
    1976             :               ;; line.  The suffix is what comes after "local variables:"
    1977             :               ;; in its line.
    1978          29 :               (let* ((prefix (regexp-quote (match-string 1)))
    1979          29 :                      (suffix (regexp-quote (match-string 2)))
    1980             :                      (re-coding
    1981          29 :                       (concat
    1982          29 :                        "[\r\n]" prefix
    1983             :                        ;; N.B. without the \n below, the regexp can
    1984             :                        ;; eat newlines.
    1985             :                        "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
    1986          29 :                        suffix "[\r\n]"))
    1987             :                      (re-unibyte
    1988          29 :                       (concat
    1989          29 :                        "[\r\n]" prefix
    1990             :                        "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
    1991          29 :                        suffix "[\r\n]"))
    1992             :                      (re-char-trans
    1993          29 :                       (concat
    1994          29 :                        "[\r\n]" prefix
    1995             :                        "[ \t]*enable-character-translation[ \t]*:[ \t]*\\([^ \t\r\n]+\\)[ \t]*"
    1996          29 :                        suffix "[\r\n]"))
    1997             :                      (re-end
    1998          29 :                       (concat "[\r\n]" prefix "[ \t]*End *:[ \t]*" suffix
    1999          29 :                               "[\r\n]?"))
    2000          29 :                      (pos (1- (point))))
    2001          29 :                 (forward-char -1)       ; skip back \r or \n.
    2002          29 :                 (re-search-forward re-end tail-end 'move)
    2003          29 :                 (setq tail-end (point))
    2004          29 :                 (goto-char pos)
    2005          29 :                 (when (and set-auto-coding-for-load
    2006          29 :                            (re-search-forward re-unibyte tail-end t))
    2007           0 :                   (display-warning 'mule "\"unibyte: t\" is obsolete; \
    2008           0 : use \"coding: 'raw-text\" instead." :warning)
    2009          29 :                   (setq coding-system 'raw-text))
    2010          29 :                 (when (and (not coding-system)
    2011          29 :                            (re-search-forward re-coding tail-end t))
    2012          29 :                   (setq coding-system (intern (match-string 1))))
    2013          29 :                 (when (and (not char-trans)
    2014          29 :                            (re-search-forward re-char-trans tail-end t))
    2015         326 :                   (setq char-trans (match-string 1))))))
    2016         326 :         (if coding-system
    2017             :             ;; If the coding-system name ends with "!", remove it and
    2018             :             ;; set char-trans to "nil".
    2019          50 :             (let ((name (symbol-name coding-system)))
    2020          50 :               (if (= (aref name (1- (length name))) ?!)
    2021           0 :                   (setq coding-system (intern (substring name 0 -1))
    2022         326 :                         char-trans "nil"))))
    2023         326 :         (when (and char-trans
    2024         326 :                    (not (setq char-trans (intern char-trans))))
    2025           0 :           (make-local-variable 'enable-character-translation)
    2026         326 :           (setq enable-character-translation nil))
    2027         326 :         (if coding-system
    2028         326 :             (cons coding-system :coding)))
    2029             :       ;; Finally, try all the `auto-coding-functions'.
    2030         276 :       (let ((funcs auto-coding-functions)
    2031             :             (coding-system nil))
    2032         828 :         (while (and funcs (not coding-system))
    2033         552 :           (setq coding-system (ignore-errors
    2034         552 :                                 (save-excursion
    2035         552 :                                   (goto-char (point-min))
    2036        1104 :                                   (funcall (pop funcs) size)))))
    2037         276 :         (if coding-system
    2038         326 :             (cons coding-system 'auto-coding-functions)))))
    2039             : 
    2040             : (defun set-auto-coding (filename size)
    2041             :   "Return coding system for a file FILENAME of which SIZE bytes follow point.
    2042             : See `find-auto-coding' for how the coding system is found.
    2043             : Return nil if an invalid coding system is found.
    2044             : 
    2045             : The variable `set-auto-coding-function' (which see) is set to this
    2046             : function by default."
    2047         316 :   (let ((found (find-auto-coding filename size)))
    2048         316 :     (if (and found (coding-system-p (car found)))
    2049         316 :         (car found))))
    2050             : 
    2051             : (setq set-auto-coding-function 'set-auto-coding)
    2052             : 
    2053             : (defun after-insert-file-set-coding (inserted &optional visit)
    2054             :   "Set `buffer-file-coding-system' of current buffer after text is inserted.
    2055             : INSERTED is the number of characters that were inserted, as figured
    2056             : in the situation before this function.  Return the number of characters
    2057             : inserted, as figured in the situation after.  The two numbers can be
    2058             : different if the buffer has become unibyte.
    2059             : The optional second arg VISIT non-nil means that we are visiting a file."
    2060        1185 :   (if (and visit
    2061         155 :            coding-system-for-read
    2062        1185 :            (not (eq coding-system-for-read 'auto-save-coding)))
    2063           0 :       (setq buffer-file-coding-system-explicit
    2064        1185 :             (cons coding-system-for-read nil)))
    2065        1185 :   (if last-coding-system-used
    2066        1185 :       (let ((coding-system
    2067        1185 :              (find-new-buffer-file-coding-system last-coding-system-used)))
    2068        1185 :         (if coding-system
    2069        1185 :             (setq buffer-file-coding-system coding-system))))
    2070        1185 :   inserted)
    2071             : 
    2072             : ;; The coding-spec and eol-type of coding-system returned is decided
    2073             : ;; independently in the following order.
    2074             : ;;      1. That of buffer-file-coding-system locally bound.
    2075             : ;;      2. That of CODING.
    2076             : 
    2077             : (defun find-new-buffer-file-coding-system (coding)
    2078             :   "Return a coding system for a buffer when a file of CODING is inserted.
    2079             : The local variable `buffer-file-coding-system' of the current buffer
    2080             : is set to the returned value.
    2081             : Return nil if there's no need to set `buffer-file-coding-system'."
    2082        1185 :   (let (local-coding local-eol
    2083             :         found-coding found-eol
    2084             :         new-coding new-eol)
    2085        1185 :     (if (null coding)
    2086             :         ;; Nothing found about coding.
    2087             :         nil
    2088             : 
    2089             :       ;; Get information of `buffer-file-coding-system' in LOCAL-EOL
    2090             :       ;; and LOCAL-CODING.
    2091        1185 :       (setq local-eol (coding-system-eol-type buffer-file-coding-system))
    2092        1185 :       (if (null (numberp local-eol))
    2093             :           ;; But eol-type is not yet set.
    2094        1185 :           (setq local-eol nil))
    2095        1185 :       (if (and buffer-file-coding-system
    2096         130 :                (not (eq (coding-system-type buffer-file-coding-system)
    2097        1185 :                         'undecided)))
    2098        1185 :           (setq local-coding (coding-system-base buffer-file-coding-system)))
    2099             : 
    2100        1185 :       (if (and (local-variable-p 'buffer-file-coding-system)
    2101        1185 :                local-eol local-coding)
    2102             :           ;; The current buffer has already set full coding-system, we
    2103             :           ;; had better not change it.
    2104             :           nil
    2105             : 
    2106        1055 :         (setq found-eol (coding-system-eol-type coding))
    2107        1055 :         (if (null (numberp found-eol))
    2108             :             ;; But eol-type is not found.
    2109             :             ;; If EOL conversions are inhibited, force unix eol-type.
    2110        1055 :             (setq found-eol (if inhibit-eol-conversion 0)))
    2111        1055 :         (setq found-coding (coding-system-base coding))
    2112             : 
    2113        1055 :         (if (and (not found-eol) (eq found-coding 'undecided))
    2114             :             ;; No valid coding information found.
    2115             :             nil
    2116             : 
    2117             :           ;; Some coding information (eol or text) found.
    2118             : 
    2119             :           ;; The local setting takes precedence over the found one.
    2120         909 :           (setq new-coding (if (local-variable-p 'buffer-file-coding-system)
    2121           0 :                                (or local-coding found-coding)
    2122         909 :                              (or found-coding local-coding)))
    2123         909 :           (setq new-eol (if (local-variable-p 'buffer-file-coding-system)
    2124           0 :                             (or local-eol found-eol)
    2125         909 :                           (or found-eol local-eol)))
    2126             : 
    2127         909 :           (let ((eol-type (coding-system-eol-type new-coding)))
    2128         909 :             (if (and (numberp new-eol) (vectorp eol-type))
    2129         196 :                 (aref eol-type new-eol)
    2130        1185 :               new-coding)))))))
    2131             : 
    2132             : (defun modify-coding-system-alist (target-type regexp coding-system)
    2133             :   "Modify one of look up tables for finding a coding system on I/O operation.
    2134             : There are three of such tables, `file-coding-system-alist',
    2135             : `process-coding-system-alist', and `network-coding-system-alist'.
    2136             : 
    2137             : TARGET-TYPE specifies which of them to modify.
    2138             : If it is `file', it affects `file-coding-system-alist' (which see).
    2139             : If it is `process', it affects `process-coding-system-alist' (which see).
    2140             : If it is `network', it affects `network-coding-system-alist' (which see).
    2141             : 
    2142             : REGEXP is a regular expression matching a target of I/O operation.
    2143             : The target is a file name if TARGET-TYPE is `file', a program name if
    2144             : TARGET-TYPE is `process', or a network service name or a port number
    2145             : to connect to if TARGET-TYPE is `network'.
    2146             : 
    2147             : CODING-SYSTEM is a coding system to perform code conversion on the I/O
    2148             : operation, or a cons cell (DECODING . ENCODING) specifying the coding
    2149             : systems for decoding and encoding respectively, or a function symbol
    2150             : which, when called, returns such a cons cell."
    2151           0 :   (or (memq target-type '(file process network))
    2152           0 :       (error "Invalid target type: %s" target-type))
    2153           0 :   (or (stringp regexp)
    2154           0 :       (and (eq target-type 'network) (integerp regexp))
    2155           0 :       (error "Invalid regular expression: %s" regexp))
    2156           0 :   (if (symbolp coding-system)
    2157           0 :       (if (not (fboundp coding-system))
    2158           0 :           (progn
    2159           0 :             (check-coding-system coding-system)
    2160           0 :             (setq coding-system (cons coding-system coding-system))))
    2161           0 :     (check-coding-system (car coding-system))
    2162           0 :     (check-coding-system (cdr coding-system)))
    2163           0 :   (cond ((eq target-type 'file)
    2164           0 :          (let ((slot (assoc regexp file-coding-system-alist)))
    2165           0 :            (if slot
    2166           0 :                (setcdr slot coding-system)
    2167           0 :              (setq file-coding-system-alist
    2168           0 :                    (cons (cons regexp coding-system)
    2169           0 :                          file-coding-system-alist)))))
    2170           0 :         ((eq target-type 'process)
    2171           0 :          (let ((slot (assoc regexp process-coding-system-alist)))
    2172           0 :            (if slot
    2173           0 :                (setcdr slot coding-system)
    2174           0 :              (setq process-coding-system-alist
    2175           0 :                    (cons (cons regexp coding-system)
    2176           0 :                          process-coding-system-alist)))))
    2177             :         (t
    2178           0 :          (let ((slot (assoc regexp network-coding-system-alist)))
    2179           0 :            (if slot
    2180           0 :                (setcdr slot coding-system)
    2181           0 :              (setq network-coding-system-alist
    2182           0 :                    (cons (cons regexp coding-system)
    2183           0 :                          network-coding-system-alist)))))))
    2184             : 
    2185             : (defun decode-coding-inserted-region (from to filename
    2186             :                                            &optional visit beg end replace)
    2187             :   "Decode the region between FROM and TO as if it is read from file FILENAME.
    2188             : The idea is that the text between FROM and TO was just inserted somehow.
    2189             : Optional arguments VISIT, BEG, END, and REPLACE are the same as those
    2190             : of the function `insert-file-contents'.
    2191             : Part of the job of this function is setting `buffer-undo-list' appropriately."
    2192           0 :   (save-excursion
    2193           0 :     (save-restriction
    2194           0 :       (let ((coding coding-system-for-read)
    2195             :             undo-list-saved)
    2196           0 :         (if visit
    2197             :             ;; Temporarily turn off undo recording, if we're decoding the
    2198             :             ;; text of a visited file.
    2199           0 :             (setq buffer-undo-list t)
    2200             :           ;; Otherwise, if we can recognize the undo elt for the insertion,
    2201             :           ;; remove it and get ready to replace it later.
    2202             :           ;; In the mean time, turn off undo recording.
    2203           0 :           (let ((last (car-safe buffer-undo-list)))
    2204           0 :             (if (and (consp last) (eql (car last) from) (eql (cdr last) to))
    2205           0 :                 (setq undo-list-saved (cdr buffer-undo-list)
    2206           0 :                       buffer-undo-list t))))
    2207           0 :         (narrow-to-region from to)
    2208           0 :         (goto-char (point-min))
    2209           0 :         (or coding
    2210           0 :             (setq coding (funcall set-auto-coding-function
    2211           0 :                                   filename (- (point-max) (point-min)))))
    2212           0 :         (or coding
    2213           0 :             (setq coding (car (find-operation-coding-system
    2214             :                                'insert-file-contents
    2215           0 :                                (cons filename (current-buffer))
    2216           0 :                                visit beg end replace))))
    2217           0 :         (if (coding-system-p coding)
    2218           0 :             (or enable-multibyte-characters
    2219           0 :                 (setq coding
    2220           0 :                       (coding-system-change-text-conversion coding 'raw-text)))
    2221           0 :           (setq coding nil))
    2222           0 :         (if coding
    2223           0 :             (decode-coding-region (point-min) (point-max) coding)
    2224           0 :           (setq last-coding-system-used coding))
    2225             :         ;; If we're decoding the text of a visited file,
    2226             :         ;; the undo list should start out empty.
    2227           0 :         (if visit
    2228           0 :             (setq buffer-undo-list nil)
    2229             :           ;; If we decided to replace the undo entry for the insertion,
    2230             :           ;; do so now.
    2231           0 :           (if undo-list-saved
    2232           0 :               (setq buffer-undo-list
    2233           0 :                     (cons (cons from (point-max)) undo-list-saved))))))))
    2234             : 
    2235             : (defun recode-region (start end new-coding coding)
    2236             :   "Re-decode the region (previously decoded by CODING) by NEW-CODING."
    2237             :   (interactive
    2238           0 :    (list (region-beginning) (region-end)
    2239           0 :          (read-coding-system "Text was really in: ")
    2240           0 :          (let ((coding (or buffer-file-coding-system last-coding-system-used)))
    2241           0 :            (read-coding-system
    2242           0 :             (concat "But was interpreted as"
    2243           0 :                     (if coding (format " (default %S): " coding) ": "))
    2244           0 :             coding))))
    2245           0 :   (or (and new-coding coding)
    2246           0 :       (error "Coding system not specified"))
    2247             :   ;; Check it before we encode the region.
    2248           0 :   (check-coding-system new-coding)
    2249           0 :   (save-restriction
    2250           0 :     (narrow-to-region start end)
    2251           0 :     (encode-coding-region (point-min) (point-max) coding)
    2252           0 :     (decode-coding-region (point-min) (point-max) new-coding))
    2253           0 :   (if (region-active-p)
    2254           0 :       (deactivate-mark)))
    2255             : 
    2256             : (defun make-translation-table (&rest args)
    2257             :   "Make a translation table from arguments.
    2258             : A translation table is a char table intended for character
    2259             : translation in CCL programs.
    2260             : 
    2261             : Each argument is a list of elements of the form (FROM . TO), where FROM
    2262             : is a character to be translated to TO.
    2263             : 
    2264             : The arguments and forms in each argument are processed in the given
    2265             : order, and if a previous form already translates TO to some other
    2266             : character, say TO-ALT, FROM is also translated to TO-ALT."
    2267           8 :   (let ((table (make-char-table 'translation-table))
    2268             :         revlist)
    2269           8 :     (dolist (elts args)
    2270           8 :       (dolist (elt elts)
    2271        5070 :         (let ((from (car elt))
    2272        5070 :               (to (cdr elt))
    2273             :               to-alt rev-from rev-to)
    2274             :           ;; If we have already translated TO to TO-ALT, FROM should
    2275             :           ;; also be translated to TO-ALT.
    2276        5070 :           (if (setq to-alt (aref table to))
    2277        5070 :               (setq to to-alt))
    2278        5070 :           (aset table from to)
    2279             :           ;; If we have already translated some chars to FROM, they
    2280             :           ;; should also be translated to TO.
    2281        5070 :           (when (setq rev-from (assq from revlist))
    2282           0 :             (dolist (elt (cdr rev-from))
    2283           0 :               (aset table elt to))
    2284           0 :             (setq revlist (delq rev-from revlist)
    2285        5070 :                   rev-from (cdr rev-from)))
    2286             :           ;; Now update REVLIST.
    2287        5070 :           (setq rev-to (assq to revlist))
    2288        5070 :           (if rev-to
    2289          13 :               (setcdr rev-to (cons from (cdr rev-to)))
    2290        5057 :             (setq rev-to (list to from)
    2291        5070 :                   revlist (cons rev-to revlist)))
    2292        5070 :           (if rev-from
    2293        5070 :               (setcdr rev-to (append rev-from (cdr rev-to)))))))
    2294             :     ;; Return TABLE just created.
    2295           8 :     (set-char-table-extra-slot table 1 1)
    2296           8 :     table))
    2297             : 
    2298             : (defun make-translation-table-from-vector (vec)
    2299             :   "Make translation table from decoding vector VEC.
    2300             : VEC is an array of 256 elements to map unibyte codes to multibyte
    2301             : characters.  Elements may be nil for undefined code points."
    2302           0 :   (let ((table (make-char-table 'translation-table))
    2303           0 :         (rev-table (make-char-table 'translation-table))
    2304             :         ch)
    2305           0 :     (dotimes (i 256)
    2306           0 :       (setq ch (aref vec i))
    2307           0 :       (when ch
    2308           0 :         (aset table i ch)
    2309           0 :         (if (>= ch 256)
    2310           0 :             (aset rev-table ch i))))
    2311           0 :     (set-char-table-extra-slot table 0 rev-table)
    2312           0 :     (set-char-table-extra-slot table 1 1)
    2313           0 :     (set-char-table-extra-slot rev-table 1 1)
    2314           0 :     table))
    2315             : 
    2316             : (defun make-translation-table-from-alist (alist)
    2317             :   "Make translation table from N<->M mapping in ALIST.
    2318             : ALIST is an alist, each element has the form (FROM . TO).
    2319             : FROM and TO are a character or a vector of characters.
    2320             : If FROM is a character, that character is translated to TO.
    2321             : If FROM is a vector of characters, that sequence is translated to TO.
    2322             : The first extra-slot of the value is a translation table for reverse mapping.
    2323             : 
    2324             : FROM and TO may be nil.  If TO is nil, the translation from FROM
    2325             : to nothing is defined in the translation table and that element
    2326             : is ignored in the reverse map.  If FROM is nil, the translation
    2327             : from TO to nothing is defined in the reverse map only.  A vector
    2328             : of length zero has the same meaning as specifying nil."
    2329           7 :   (let ((tables (vector (make-char-table 'translation-table)
    2330           7 :                         (make-char-table 'translation-table)))
    2331             :         table max-lookup from to idx val)
    2332           7 :     (dotimes (i 2)
    2333          14 :       (setq table (aref tables i))
    2334          14 :       (setq max-lookup 1)
    2335          14 :       (dolist (elt alist)
    2336      168594 :         (if (= i 0)
    2337       84297 :             (setq from (car elt) to (cdr elt))
    2338      168594 :           (setq from (cdr elt) to (car elt)))
    2339      168594 :         (if (characterp from)
    2340       84297 :             (setq idx from)
    2341       84297 :           (if (= (length from) 0)
    2342           0 :               (setq idx nil)
    2343       84297 :             (setq idx (aref from 0)
    2344      168594 :                   max-lookup (max max-lookup (length from)))))
    2345      168594 :         (when idx
    2346      168594 :           (setq val (aref table idx))
    2347      168594 :           (if val
    2348       77528 :               (progn
    2349       77528 :                 (or (consp val)
    2350       77528 :                     (setq val (list (cons (vector idx) val))))
    2351       77528 :                 (if (characterp from)
    2352       77528 :                     (setq from (vector from)))
    2353       77528 :                 (setq val (nconc val (list (cons from to)))))
    2354       91066 :             (if (characterp from)
    2355       84297 :                 (setq val to)
    2356      168594 :               (setq val (list (cons from to)))))
    2357      168594 :           (aset table idx val)))
    2358          14 :       (set-char-table-extra-slot table 1 max-lookup))
    2359           7 :     (set-char-table-extra-slot (aref tables 0) 0 (aref tables 1))
    2360           7 :     (aref tables 0)))
    2361             : 
    2362             : (defun define-translation-table (symbol &rest args)
    2363             :   "Define SYMBOL as the name of translation table made by ARGS.
    2364             : This sets up information so that the table can be used for
    2365             : translations in a CCL program.
    2366             : 
    2367             : If the first element of ARGS is a char-table whose purpose is
    2368             : `translation-table', just define SYMBOL to name it.  (Note that this
    2369             : function does not bind SYMBOL.)
    2370             : 
    2371             : Any other ARGS should be suitable as arguments of the function
    2372             : `make-translation-table' (which see).
    2373             : 
    2374             : This function sets properties `translation-table' and
    2375             : `translation-table-id' of SYMBOL to the created table itself and the
    2376             : identification number of the table respectively.  It also registers
    2377             : the table in `translation-table-vector'."
    2378          16 :   (let ((table (if (and (char-table-p (car args))
    2379           8 :                         (eq (char-table-subtype (car args))
    2380          16 :                             'translation-table))
    2381           8 :                    (car args)
    2382          16 :                  (apply 'make-translation-table args)))
    2383          16 :         (len (length translation-table-vector))
    2384             :         (id 0)
    2385             :         (done nil))
    2386          16 :     (put symbol 'translation-table table)
    2387         143 :     (while (not done)
    2388         127 :       (if (>= id len)
    2389           0 :           (setq translation-table-vector
    2390         127 :                 (vconcat translation-table-vector (make-vector len nil))))
    2391         127 :       (let ((slot (aref translation-table-vector id)))
    2392         127 :         (if (or (not slot)
    2393         127 :                 (eq (car slot) symbol))
    2394          16 :             (progn
    2395          16 :               (aset translation-table-vector id (cons symbol table))
    2396          16 :               (setq done t))
    2397         127 :           (setq id (1+ id)))))
    2398          16 :     (put symbol 'translation-table-id id)
    2399          16 :     id))
    2400             : 
    2401             : (defun translate-region (start end table)
    2402             :   "From START to END, translate characters according to TABLE.
    2403             : TABLE is a string or a char-table.
    2404             : If TABLE is a string, the Nth character in it is the mapping
    2405             : for the character with code N.
    2406             : If TABLE is a char-table, the element for character N is the mapping
    2407             : for the character with code N.
    2408             : It returns the number of characters changed."
    2409             :   (interactive
    2410           0 :    (list (region-beginning)
    2411           0 :          (region-end)
    2412           0 :          (let (table l)
    2413           0 :            (dotimes (i (length translation-table-vector))
    2414           0 :              (if (consp (aref translation-table-vector i))
    2415           0 :                  (push (list (symbol-name
    2416           0 :                               (car (aref translation-table-vector i)))) l)))
    2417           0 :            (if (not l)
    2418           0 :                (error "No translation table defined"))
    2419           0 :            (while (not table)
    2420           0 :              (setq table (completing-read "Translation table: " l nil t)))
    2421           0 :            (intern table))))
    2422      157260 :   (if (symbolp table)
    2423      157260 :       (let ((val (get table 'translation-table)))
    2424      157260 :         (or (char-table-p val)
    2425      157260 :             (error "Invalid translation table name: %s" table))
    2426      157260 :         (setq table val)))
    2427      157260 :   (translate-region-internal start end table))
    2428             : 
    2429             : (defmacro with-category-table (table &rest body)
    2430             :   "Execute BODY like `progn' with TABLE the current category table.
    2431             : The category table of the current buffer is saved, BODY is evaluated,
    2432             : then the saved table is restored, even in case of an abnormal exit.
    2433             : Value is what BODY returns."
    2434             :   (declare (indent 1) (debug t))
    2435           0 :   (let ((old-table (make-symbol "old-table"))
    2436           0 :         (old-buffer (make-symbol "old-buffer")))
    2437           0 :     `(let ((,old-table (category-table))
    2438           0 :            (,old-buffer (current-buffer)))
    2439             :        (unwind-protect
    2440             :            (progn
    2441           0 :              (set-category-table ,table)
    2442           0 :              ,@body)
    2443           0 :          (with-current-buffer ,old-buffer
    2444           0 :            (set-category-table ,old-table))))))
    2445             : 
    2446             : (defun define-translation-hash-table (symbol table)
    2447             :   "Define SYMBOL as the name of the hash translation TABLE for use in CCL.
    2448             : 
    2449             : Analogous to `define-translation-table', but updates
    2450             : `translation-hash-table-vector' and the table is for use in the CCL
    2451             : `lookup-integer' and `lookup-character' functions."
    2452           0 :   (unless (and (symbolp symbol)
    2453           0 :                (hash-table-p table))
    2454           0 :     (error "Bad args to define-translation-hash-table"))
    2455           0 :   (let ((len (length translation-hash-table-vector))
    2456             :         (id 0)
    2457             :         done)
    2458           0 :     (put symbol 'translation-hash-table table)
    2459           0 :     (while (not done)
    2460           0 :       (if (>= id len)
    2461           0 :           (setq translation-hash-table-vector
    2462           0 :                 (vconcat translation-hash-table-vector [nil])))
    2463           0 :       (let ((slot (aref translation-hash-table-vector id)))
    2464           0 :         (if (or (not slot)
    2465           0 :                 (eq (car slot) symbol))
    2466           0 :             (progn
    2467           0 :               (aset translation-hash-table-vector id (cons symbol table))
    2468           0 :               (setq done t))
    2469           0 :           (setq id (1+ id)))))
    2470           0 :     (put symbol 'translation-hash-table-id id)
    2471           0 :     id))
    2472             : 
    2473             : ;;; Initialize some variables.
    2474             : 
    2475             : (put 'use-default-ascent 'char-table-extra-slots 0)
    2476             : (setq use-default-ascent (make-char-table 'use-default-ascent))
    2477             : (put 'ignore-relative-composition 'char-table-extra-slots 0)
    2478             : (setq ignore-relative-composition
    2479             :       (make-char-table 'ignore-relative-composition))
    2480             : 
    2481             : ;;; Built-in auto-coding-functions:
    2482             : 
    2483             : (defun sgml-xml-auto-coding-function (size)
    2484             :   "Determine whether the buffer is XML, and if so, its encoding.
    2485             : This function is intended to be added to `auto-coding-functions'."
    2486         276 :   (setq size (+ (point) size))
    2487         276 :   (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t)
    2488           0 :     (let ((end (save-excursion
    2489             :                  ;; This is a hack.
    2490           0 :                  (re-search-forward "[\"']\\s-*\\?>" size t))))
    2491           0 :       (when end
    2492           0 :         (if (re-search-forward "encoding=[\"']\\(.+?\\)[\"']" end t)
    2493           0 :             (let* ((match (match-string 1))
    2494           0 :                    (sym (intern (downcase match))))
    2495           0 :               (if (coding-system-p sym)
    2496           0 :                   sym
    2497           0 :                 (message "Warning: unknown coding system \"%s\"" match)
    2498           0 :                 nil))
    2499             :           ;; Files without an encoding tag should be UTF-8. But users
    2500             :           ;; may be naive about encodings, and have saved the file from
    2501             :           ;; another editor that does not help them get the encoding right.
    2502             :           ;; Detect the encoding and warn the user if it is detected as
    2503             :           ;; something other than UTF-8.
    2504           0 :           (let ((detected
    2505           0 :                  (with-coding-priority '(utf-8)
    2506           0 :                    (coding-system-base
    2507           0 :                     (detect-coding-region (point-min) size t)))))
    2508             :             ;; Pure ASCII always comes back as undecided.
    2509           0 :             (if (memq detected '(utf-8 undecided))
    2510             :                 'utf-8
    2511           0 :               (warn "File contents detected as %s.
    2512             :   Consider adding an encoding attribute to the xml declaration,
    2513           0 :   or saving as utf-8, as mandated by the xml specification." detected)
    2514         276 :               detected)))))))
    2515             : 
    2516             : (defun sgml-html-meta-auto-coding-function (size)
    2517             :   "If the buffer has an HTML meta tag, use it to determine encoding.
    2518             : This function is intended to be added to `auto-coding-functions'."
    2519         276 :   (let ((case-fold-search t))
    2520         276 :     (setq size (min (+ (point) size)
    2521         276 :                     (save-excursion
    2522             :                       ;; Limit the search by the end of the HTML header.
    2523         276 :                       (or (search-forward "</head>" (+ (point) size) t)
    2524             :                           ;; In case of no header, search only 10 lines.
    2525         276 :                           (forward-line 10))
    2526         276 :                       (point))))
    2527             :     ;; Make sure that the buffer really contains an HTML document, by
    2528             :     ;; checking that it starts with a doctype or a <HTML> start tag
    2529             :     ;; (allowing for whitespace at bob).  Note: 'DOCTYPE NETSCAPE' is
    2530             :     ;; useful for Mozilla bookmark files.
    2531         276 :     (when (and (re-search-forward "\\`[[:space:]\n]*\\(<!doctype[[:space:]\n]+\\(html\\|netscape\\)\\|<html\\)" size t)
    2532         276 :                (re-search-forward "<meta\\s-+\\(http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']text/\\sw+;\\s-*\\)?charset=[\"']?\\(.+?\\)[\"'\\s-/>]" size t))
    2533           0 :       (let* ((match (match-string 2))
    2534           0 :              (sym (intern (downcase match))))
    2535           0 :         (if (coding-system-p sym)
    2536           0 :             sym
    2537           0 :           (message "Warning: unknown coding system \"%s\"" match)
    2538         276 :           nil)))))
    2539             : 
    2540             : (defun xml-find-file-coding-system (args)
    2541             :   "Determine the coding system of an XML file without a declaration.
    2542             : Strictly speaking, the file should be utf-8, but mistakes are
    2543             : made, and there are genuine cases where XML fragments are saved,
    2544             : with the encoding properly specified in a master document, or
    2545             : added by processing software."
    2546           0 :   (if (eq (car args) 'insert-file-contents)
    2547           0 :       (let ((detected
    2548           0 :              (with-coding-priority '(utf-8)
    2549           0 :                (coding-system-base
    2550           0 :                 (detect-coding-region (point-min) (point-max) t)))))
    2551             :         ;; Pure ASCII always comes back as undecided.
    2552           0 :         (cond
    2553           0 :          ((memq detected '(utf-8 undecided))
    2554             :           'utf-8)
    2555           0 :          ((eq detected 'utf-16le-with-signature) 'utf-16le-with-signature)
    2556           0 :          ((eq detected 'utf-16be-with-signature) 'utf-16be-with-signature)
    2557             :          (t
    2558           0 :           (warn "File contents detected as %s.
    2559             :   Consider adding an xml declaration with the encoding specified,
    2560           0 :   or saving as utf-8, as mandated by the xml specification." detected)
    2561           0 :           detected)))
    2562             :     ;; Don't interfere with the user's wishes for saving the buffer.
    2563             :     ;; We did what we could when the buffer was created to ensure the
    2564             :     ;; correct encoding was used, or the user was warned, so any
    2565             :     ;; non-conformity here is deliberate on the part of the user.
    2566           0 :     'undecided))
    2567             : 
    2568             : ;;;
    2569             : (provide 'mule)
    2570             : 
    2571             : ;;; mule.el ends here

Generated by: LCOV version 1.12