Line data Source code
1 : ;;; mule-cmds.el --- commands for multilingual environment -*- lexical-binding:t -*-
2 :
3 : ;; Copyright (C) 1997-2017 Free Software Foundation, Inc.
4 : ;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 : ;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
7 : ;; Registration Number H14PRO021
8 : ;; Copyright (C) 2003
9 : ;; National Institute of Advanced Industrial Science and Technology (AIST)
10 : ;; Registration Number H13PRO009
11 :
12 : ;; Keywords: mule, i18n
13 :
14 : ;; This file is part of GNU Emacs.
15 :
16 : ;; GNU Emacs is free software: you can redistribute it and/or modify
17 : ;; it under the terms of the GNU General Public License as published by
18 : ;; the Free Software Foundation, either version 3 of the License, or
19 : ;; (at your option) any later version.
20 :
21 : ;; GNU Emacs is distributed in the hope that it will be useful,
22 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
23 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 : ;; GNU General Public License for more details.
25 :
26 : ;; You should have received a copy of the GNU General Public License
27 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
28 :
29 : ;;; Commentary:
30 :
31 : ;;; Code:
32 :
33 : (eval-when-compile (require 'cl-lib))
34 :
35 : (defvar dos-codepage)
36 : (autoload 'widget-value "wid-edit")
37 :
38 : ;;; MULE related key bindings and menus.
39 :
40 : (defvar mule-keymap
41 : (let ((map (make-sparse-keymap)))
42 : (define-key map "f" 'set-buffer-file-coding-system)
43 : (define-key map "r" 'revert-buffer-with-coding-system)
44 : (define-key map "F" 'set-file-name-coding-system)
45 : (define-key map "t" 'set-terminal-coding-system)
46 : (define-key map "k" 'set-keyboard-coding-system)
47 : (define-key map "p" 'set-buffer-process-coding-system)
48 : (define-key map "x" 'set-selection-coding-system)
49 : (define-key map "X" 'set-next-selection-coding-system)
50 : (define-key map "\C-\\" 'set-input-method)
51 : (define-key map "c" 'universal-coding-system-argument)
52 : (define-key map "l" 'set-language-environment)
53 : map)
54 : "Keymap for Mule (Multilingual environment) specific commands.")
55 :
56 : ;; Keep "C-x C-m ..." for mule specific commands.
57 : (define-key ctl-x-map "\C-m" mule-keymap)
58 :
59 : (defvar describe-language-environment-map
60 : (let ((map (make-sparse-keymap "Describe Language Environment")))
61 : (bindings--define-key map
62 : [Default] '(menu-item "Default" describe-specified-language-support))
63 : map))
64 :
65 : (defvar setup-language-environment-map
66 : (let ((map (make-sparse-keymap "Set Language Environment")))
67 : (bindings--define-key map
68 : [Default] '(menu-item "Default" setup-specified-language-environment))
69 : map))
70 :
71 : (defvar set-coding-system-map
72 : (let ((map (make-sparse-keymap "Set Coding System")))
73 : (bindings--define-key map [set-buffer-process-coding-system]
74 : '(menu-item "For I/O with Subprocess" set-buffer-process-coding-system
75 : :visible (fboundp 'make-process)
76 : :enable (get-buffer-process (current-buffer))
77 : :help "How to en/decode I/O from/to subprocess connected to this buffer"))
78 : (bindings--define-key map [set-next-selection-coding-system]
79 : '(menu-item "For Next X Selection" set-next-selection-coding-system
80 : :visible (display-selections-p)
81 : :help "How to en/decode next selection/clipboard operation"))
82 : (bindings--define-key map [set-selection-coding-system]
83 : '(menu-item "For X Selections/Clipboard" set-selection-coding-system
84 : :visible (display-selections-p)
85 : :help "How to en/decode data to/from selection/clipboard"))
86 :
87 : (bindings--define-key map [separator-3] menu-bar-separator)
88 : (bindings--define-key map [set-terminal-coding-system]
89 : '(menu-item "For Terminal" set-terminal-coding-system
90 : :enable (null (memq initial-window-system '(x w32 ns)))
91 : :help "How to encode terminal output"))
92 : (bindings--define-key map [set-keyboard-coding-system]
93 : '(menu-item "For Keyboard" set-keyboard-coding-system
94 : :help "How to decode keyboard input"))
95 :
96 : (bindings--define-key map [separator-2] menu-bar-separator)
97 : (bindings--define-key map [set-file-name-coding-system]
98 : '(menu-item "For File Name" set-file-name-coding-system
99 : :help "How to decode/encode file names"))
100 : (bindings--define-key map [revert-buffer-with-coding-system]
101 : '(menu-item "For Reverting This File Now"
102 : revert-buffer-with-coding-system
103 : :enable buffer-file-name
104 : :help "Revisit this file immediately using specified coding system"))
105 : (bindings--define-key map [set-buffer-file-coding-system]
106 : '(menu-item "For Saving This Buffer" set-buffer-file-coding-system
107 : :help "How to encode this buffer when saved"))
108 : (bindings--define-key map [separator-1] menu-bar-separator)
109 : (bindings--define-key map [universal-coding-system-argument]
110 : '(menu-item "For Next Command" universal-coding-system-argument
111 : :help "Coding system to be used by next command"))
112 : map))
113 :
114 : (defvar mule-menu-keymap
115 : (let ((map (make-sparse-keymap "Mule (Multilingual Environment)")))
116 : (bindings--define-key map [mule-diag]
117 : '(menu-item "Show All Multilingual Settings" mule-diag
118 : :help "Display multilingual environment settings"))
119 : (bindings--define-key map [list-character-sets]
120 : '(menu-item "List Character Sets" list-character-sets
121 : :help "Show table of available character sets"))
122 : (bindings--define-key map [describe-coding-system]
123 : '(menu-item "Describe Coding System..." describe-coding-system))
124 : (bindings--define-key map [describe-input-method]
125 : '(menu-item "Describe Input Method..." describe-input-method
126 : :help "Keyboard layout for a specific input method"))
127 : (bindings--define-key map [describe-language-environment]
128 : `(menu-item "Describe Language Environment"
129 : ,describe-language-environment-map
130 : :help "Show multilingual settings for a specific language"))
131 :
132 : (bindings--define-key map [separator-coding-system] menu-bar-separator)
133 : (bindings--define-key map [view-hello-file]
134 : '(menu-item "Show Multilingual Sample Text" view-hello-file
135 : :enable (file-readable-p
136 : (expand-file-name "HELLO" data-directory))
137 : :help "Demonstrate various character sets"))
138 : (bindings--define-key map [set-various-coding-system]
139 : `(menu-item "Set Coding Systems" ,set-coding-system-map
140 : :enable (default-value 'enable-multibyte-characters)))
141 :
142 : (bindings--define-key map [separator-input-method] menu-bar-separator)
143 : (bindings--define-key map [describe-input-method]
144 : '(menu-item "Describe Input Method" describe-input-method))
145 : (bindings--define-key map [set-input-method]
146 : '(menu-item "Select Input Method..." set-input-method))
147 : (bindings--define-key map [toggle-input-method]
148 : '(menu-item "Toggle Input Method" toggle-input-method))
149 :
150 : (bindings--define-key map [separator-mule] menu-bar-separator)
151 : (bindings--define-key map [set-language-environment]
152 : `(menu-item "Set Language Environment" ,setup-language-environment-map))
153 : map)
154 : "Keymap for Mule (Multilingual environment) menu specific commands.")
155 :
156 : ;; This should be a single character key binding because users use it
157 : ;; very frequently while editing multilingual text. Now we can use
158 : ;; only two such keys: "\C-\\" and "\C-^", but the latter is not
159 : ;; convenient because it requires shifting on most keyboards. An
160 : ;; alternative is "\C-]" which is now bound to `abort-recursive-edit'
161 : ;; but it won't be used that frequently.
162 : (define-key global-map "\C-\\" 'toggle-input-method)
163 :
164 : ;; This is no good because people often type Shift-SPC
165 : ;; meaning to type SPC. -- rms.
166 : ;; ;; Here's an alternative key binding for X users (Shift-SPACE).
167 : ;; (define-key global-map [?\S- ] 'toggle-input-method)
168 :
169 : ;;; Mule related hyperlinks.
170 : (defconst help-xref-mule-regexp-template
171 : (purecopy (concat "\\(\\<\\("
172 : "\\(coding system\\)\\|"
173 : "\\(input method\\)\\|"
174 : "\\(character set\\)\\|"
175 : "\\(charset\\)"
176 : "\\)\\s-+\\)?"
177 : ;; Note starting with word-syntax character:
178 : "['`‘]\\(\\sw\\(\\sw\\|\\s_\\)+\\)['’]")))
179 :
180 : (defun coding-system-change-eol-conversion (coding-system eol-type)
181 : "Return a coding system which differs from CODING-SYSTEM in EOL conversion.
182 : The returned coding system converts end-of-line by EOL-TYPE
183 : but text as the same way as CODING-SYSTEM.
184 : EOL-TYPE should be `unix', `dos', `mac', or nil.
185 : If EOL-TYPE is nil, the returned coding system detects
186 : how end-of-line is formatted automatically while decoding.
187 :
188 : EOL-TYPE can be specified by an integer 0, 1, or 2.
189 : They means `unix', `dos', and `mac' respectively."
190 87 : (if (symbolp eol-type)
191 87 : (setq eol-type (cond ((eq eol-type 'unix) 0)
192 0 : ((eq eol-type 'dos) 1)
193 0 : ((eq eol-type 'mac) 2)
194 87 : (t eol-type))))
195 : ;; We call `coding-system-base' before `coding-system-eol-type',
196 : ;; because the coding-system may not be initialized until then.
197 87 : (let* ((base (coding-system-base coding-system))
198 87 : (orig-eol-type (coding-system-eol-type coding-system)))
199 87 : (cond ((vectorp orig-eol-type)
200 83 : (if (not eol-type)
201 0 : coding-system
202 83 : (aref orig-eol-type eol-type)))
203 4 : ((not eol-type)
204 0 : base)
205 4 : ((= eol-type orig-eol-type)
206 4 : coding-system)
207 0 : ((progn (setq orig-eol-type (coding-system-eol-type base))
208 0 : (vectorp orig-eol-type))
209 87 : (aref orig-eol-type eol-type)))))
210 :
211 : (defun coding-system-change-text-conversion (coding-system coding)
212 : "Return a coding system which differs from CODING-SYSTEM in text conversion.
213 : The returned coding system converts text by CODING
214 : but end-of-line as the same way as CODING-SYSTEM.
215 : If CODING is nil, the returned coding system detects
216 : how text is formatted automatically while decoding."
217 8 : (let ((eol-type (coding-system-eol-type coding-system)))
218 8 : (coding-system-change-eol-conversion
219 8 : (if coding coding 'undecided)
220 8 : (if (numberp eol-type) (aref [unix dos mac] eol-type)))))
221 :
222 : ;; Canonicalize the coding system name NAME by removing some prefixes
223 : ;; and delimiter characters. Support function of
224 : ;; coding-system-from-name.
225 : (defun canonicalize-coding-system-name (name)
226 0 : (if (string-match "^\\(ms\\|ibm\\|windows-\\)\\([0-9]+\\)$" name)
227 : ;; "ms950", "ibm950", "windows-950" -> "cp950"
228 0 : (concat "cp" (match-string 2 name))
229 0 : (if (string-match "^iso[-_ ]?[0-9]" name)
230 : ;; "iso-8859-1" -> "8859-1", "iso-2022-jp" ->"2022-jp"
231 0 : (setq name (substring name (1- (match-end 0)))))
232 0 : (let ((idx (string-match "[-_ /]" name)))
233 : ;; Delete "-", "_", " ", "/" but do distinguish "16-be" and "16be".
234 0 : (while idx
235 0 : (if (and (>= idx 2)
236 0 : (eq (string-match "16-[lb]e$" name (- idx 2))
237 0 : (- idx 2)))
238 0 : (setq idx (string-match "[-_ /]" name (match-end 0)))
239 0 : (setq name (concat (substring name 0 idx) (substring name (1+ idx)))
240 0 : idx (string-match "[-_ /]" name idx))))
241 0 : name)))
242 :
243 : (defun coding-system-from-name (name)
244 : "Return a coding system whose name matches with NAME (string or symbol)."
245 0 : (let (sym)
246 0 : (if (stringp name) (setq sym (intern name))
247 0 : (setq sym name name (symbol-name name)))
248 0 : (if (coding-system-p sym)
249 0 : sym
250 0 : (let ((eol-type
251 0 : (if (string-match "-\\(unix\\|dos\\|mac\\)$" name)
252 0 : (prog1 (intern (match-string 1 name))
253 0 : (setq name (substring name 0 (match-beginning 0)))))))
254 0 : (setq name (canonicalize-coding-system-name (downcase name)))
255 0 : (catch 'tag
256 0 : (dolist (elt (coding-system-list))
257 0 : (if (string= (canonicalize-coding-system-name (symbol-name elt))
258 0 : name)
259 0 : (throw 'tag (if eol-type (coding-system-change-eol-conversion
260 0 : elt eol-type)
261 0 : elt)))))))))
262 :
263 : (defun toggle-enable-multibyte-characters (&optional arg)
264 : "Change whether this buffer uses multibyte characters.
265 : With ARG, use multibyte characters if the ARG is positive.
266 :
267 : Note that this command does not convert the byte contents of
268 : the buffer; it only changes the way those bytes are interpreted.
269 : In general, therefore, this command *changes* the sequence of
270 : characters that the current buffer contains.
271 :
272 : We suggest you avoid using this command unless you know what you are
273 : doing. If you use it by mistake, and the buffer is now displayed
274 : wrong, use this command again to toggle back to the right mode."
275 : (interactive "P")
276 0 : (let ((new-flag
277 0 : (if (null arg) (null enable-multibyte-characters)
278 0 : (> (prefix-numeric-value arg) 0))))
279 0 : (set-buffer-multibyte new-flag))
280 0 : (force-mode-line-update))
281 :
282 : (defun view-hello-file ()
283 : "Display the HELLO file, which lists many languages and characters."
284 : (interactive)
285 : ;; We have to decode the file in any environment.
286 0 : (let ((coding-system-for-read 'iso-2022-7bit))
287 0 : (view-file (expand-file-name "HELLO" data-directory))))
288 :
289 : (defun universal-coding-system-argument (coding-system)
290 : "Execute an I/O command using the specified coding system."
291 : (interactive
292 0 : (let ((default (and buffer-file-coding-system
293 0 : (not (eq (coding-system-type buffer-file-coding-system)
294 0 : 'undecided))
295 0 : buffer-file-coding-system)))
296 0 : (list (read-coding-system
297 0 : (if default
298 0 : (format "Coding system for following command (default %s): " default)
299 0 : "Coding system for following command: ")
300 0 : default))))
301 0 : (let* ((keyseq (read-key-sequence
302 0 : (format "Command to execute with %s:" coding-system)))
303 0 : (cmd (key-binding keyseq))
304 : prefix)
305 : ;; read-key-sequence ignores quit, so make an explicit check.
306 : ;; Like many places, this assumes quit == C-g, but it need not be.
307 0 : (if (equal last-input-event ?\C-g)
308 0 : (keyboard-quit))
309 0 : (when (memq cmd '(universal-argument digit-argument))
310 0 : (call-interactively cmd)
311 :
312 : ;; Process keys bound in `universal-argument-map'.
313 0 : (while (progn
314 0 : (setq keyseq (read-key-sequence nil t)
315 0 : cmd (key-binding keyseq t))
316 0 : (not (eq cmd 'universal-argument-other-key)))
317 0 : (let ((current-prefix-arg prefix-arg)
318 : ;; Have to bind `last-command-event' here so that
319 : ;; `digit-argument', for instance, can compute the
320 : ;; prefix arg.
321 0 : (last-command-event (aref keyseq 0)))
322 0 : (call-interactively cmd)))
323 :
324 : ;; This is the final call to `universal-argument-other-key', which
325 : ;; set's the final `prefix-arg.
326 0 : (let ((current-prefix-arg prefix-arg))
327 0 : (call-interactively cmd))
328 :
329 : ;; Read the command to execute with the given prefix arg.
330 0 : (setq prefix prefix-arg
331 0 : keyseq (read-key-sequence nil t)
332 0 : cmd (key-binding keyseq)))
333 :
334 0 : (let ((coding-system-for-read coding-system)
335 0 : (coding-system-for-write coding-system)
336 : (coding-system-require-warning t)
337 0 : (current-prefix-arg prefix))
338 0 : (message "")
339 0 : (call-interactively cmd))))
340 :
341 : (defun set-default-coding-systems (coding-system)
342 : "Set default value of various coding systems to CODING-SYSTEM.
343 : This sets the following coding systems:
344 : o coding system of a newly created buffer
345 : o default coding system for subprocess I/O
346 : This also sets the following values:
347 : o default value used as `file-name-coding-system' for converting file names
348 : if CODING-SYSTEM is ASCII-compatible
349 : o default value for the command `set-terminal-coding-system'
350 : o default value for the command `set-keyboard-coding-system'
351 : if CODING-SYSTEM is ASCII-compatible"
352 2 : (check-coding-system coding-system)
353 2 : (setq-default buffer-file-coding-system coding-system)
354 :
355 2 : (if (eq system-type 'darwin)
356 : ;; The file-name coding system on Darwin systems is always utf-8.
357 0 : (setq default-file-name-coding-system 'utf-8-unix)
358 2 : (if (and (default-value 'enable-multibyte-characters)
359 2 : (or (not coding-system)
360 2 : (coding-system-get coding-system 'ascii-compatible-p)))
361 2 : (setq default-file-name-coding-system
362 2 : (coding-system-change-eol-conversion coding-system 'unix))))
363 2 : (setq default-terminal-coding-system coding-system)
364 : ;; Prevent default-terminal-coding-system from converting ^M to ^J.
365 2 : (setq default-keyboard-coding-system
366 2 : (coding-system-change-eol-conversion coding-system 'unix))
367 : ;; Preserve eol-type from existing default-process-coding-systems.
368 : ;; On non-unix-like systems in particular, these may have been set
369 : ;; carefully by the user, or by the startup code, to deal with the
370 : ;; users shell appropriately, so should not be altered by changing
371 : ;; language environment.
372 2 : (let ((output-coding
373 2 : (coding-system-change-text-conversion
374 2 : (car default-process-coding-system) coding-system))
375 : (input-coding
376 2 : (coding-system-change-text-conversion
377 2 : (cdr default-process-coding-system) coding-system)))
378 2 : (setq default-process-coding-system
379 2 : (cons output-coding input-coding))))
380 :
381 : (defun prefer-coding-system (coding-system)
382 : "Add CODING-SYSTEM at the front of the priority list for automatic detection.
383 : This also sets the following coding systems:
384 : o coding system of a newly created buffer
385 : o default coding system for subprocess I/O
386 : This also sets the following values:
387 : o default value used as `file-name-coding-system' for converting file names
388 : o default value for the command `set-terminal-coding-system'
389 : o default value for the command `set-keyboard-coding-system'
390 :
391 : If CODING-SYSTEM specifies a certain type of EOL conversion, the coding
392 : systems set by this function will use that type of EOL conversion.
393 :
394 : A coding system that requires automatic detection of text+encoding
395 : \(e.g. undecided, unix) can't be preferred.
396 :
397 : To prefer, for instance, utf-8, say the following:
398 :
399 : (prefer-coding-system \\='utf-8)"
400 : (interactive "zPrefer coding system: ")
401 0 : (if (not (and coding-system (coding-system-p coding-system)))
402 0 : (error "Invalid coding system `%s'" coding-system))
403 0 : (if (memq (coding-system-type coding-system) '(raw-text undecided))
404 0 : (error "Can't prefer the coding system `%s'" coding-system))
405 0 : (let ((base (coding-system-base coding-system))
406 0 : (eol-type (coding-system-eol-type coding-system)))
407 0 : (set-coding-system-priority base)
408 0 : (and (called-interactively-p 'interactive)
409 0 : (or (eq base coding-system)
410 0 : (message "Highest priority is set to %s (base of %s)"
411 0 : base coding-system)))
412 : ;; If they asked for specific EOL conversion, honor that.
413 0 : (if (memq eol-type '(0 1 2))
414 0 : (setq base
415 0 : (coding-system-change-eol-conversion base eol-type)))
416 0 : (set-default-coding-systems base)
417 0 : (if (called-interactively-p 'interactive)
418 0 : (or (eq base (coding-system-type default-file-name-coding-system))
419 0 : (message "The default value of `file-name-coding-system' was not changed because the specified coding system is not suitable for file names.")))))
420 :
421 : (defvar sort-coding-systems-predicate nil
422 : "If non-nil, a predicate function to sort coding systems.
423 :
424 : It is called with two coding systems, and should return t if the first
425 : one is \"less\" than the second.
426 :
427 : The function `sort-coding-systems' use it.")
428 :
429 : (defun sort-coding-systems (codings)
430 : "Sort coding system list CODINGS by a priority of each coding system.
431 : Return the sorted list. CODINGS is modified by side effects.
432 :
433 : If a coding system is most preferred, it has the highest priority.
434 : Otherwise, coding systems that correspond to MIME charsets have
435 : higher priorities. Among them, a coding system included in the
436 : `coding-system' key of the current language environment has higher
437 : priority. See also the documentation of `language-info-alist'.
438 :
439 : If the variable `sort-coding-systems-predicate' (which see) is
440 : non-nil, it is used to sort CODINGS instead."
441 0 : (if sort-coding-systems-predicate
442 0 : (sort codings sort-coding-systems-predicate)
443 0 : (let* ((from-priority (coding-system-priority-list))
444 0 : (most-preferred (car from-priority))
445 0 : (lang-preferred (get-language-info current-language-environment
446 0 : 'coding-system))
447 0 : (func (function
448 : (lambda (x)
449 0 : (let ((base (coding-system-base x)))
450 : ;; We calculate the priority number 0..255 by
451 : ;; using the 8 bits PMMLCEII as this:
452 : ;; P: 1 if most preferred.
453 : ;; MM: greater than 0 if mime-charset.
454 : ;; L: 1 if one of the current lang. env.'s codings.
455 : ;; C: 1 if one of codings listed in the category list.
456 : ;; E: 1 if not XXX-with-esc
457 : ;; II: if iso-2022 based, 0..3, else 1.
458 0 : (logior
459 0 : (lsh (if (eq base most-preferred) 1 0) 7)
460 0 : (lsh
461 0 : (let ((mime (coding-system-get base :mime-charset)))
462 : ;; Prefer coding systems corresponding to a
463 : ;; MIME charset.
464 0 : (if mime
465 : ;; Lower utf-16 priority so that we
466 : ;; normally prefer utf-8 to it, and put
467 : ;; x-ctext below that.
468 0 : (cond ((string-match-p "utf-16"
469 0 : (symbol-name mime))
470 : 2)
471 0 : ((string-match-p "^x-" (symbol-name mime))
472 : 1)
473 0 : (t 3))
474 0 : 0))
475 0 : 5)
476 0 : (lsh (if (memq base lang-preferred) 1 0) 4)
477 0 : (lsh (if (memq base from-priority) 1 0) 3)
478 0 : (lsh (if (string-match-p "-with-esc\\'"
479 0 : (symbol-name base))
480 0 : 0 1) 2)
481 0 : (if (eq (coding-system-type base) 'iso-2022)
482 0 : (let ((category (coding-system-category base)))
483 : ;; For ISO based coding systems, prefer
484 : ;; one that doesn't use designation nor
485 : ;; locking/single shifting.
486 0 : (cond
487 0 : ((or (eq category 'coding-category-iso-8-1)
488 0 : (eq category 'coding-category-iso-8-2))
489 : 2)
490 0 : ((or (eq category 'coding-category-iso-7-tight)
491 0 : (eq category 'coding-category-iso-7))
492 : 1)
493 : (t
494 0 : 0)))
495 0 : 1)
496 0 : ))))))
497 0 : (sort codings (function (lambda (x y)
498 0 : (> (funcall func x) (funcall func y))))))))
499 :
500 : (defun find-coding-systems-region (from to)
501 : "Return a list of proper coding systems to encode a text between FROM and TO.
502 :
503 : If FROM is a string, find coding systems in that instead of the buffer.
504 : All coding systems in the list can safely encode any multibyte characters
505 : in the text.
506 :
507 : If the text contains no multibyte characters, return a list of a single
508 : element `undecided'."
509 339 : (let ((codings (find-coding-systems-region-internal from to)))
510 339 : (if (eq codings t)
511 : ;; The text contains only ASCII characters. Any coding
512 : ;; systems are safe.
513 : '(undecided)
514 : ;; We need copy-sequence because sorting will alter the argument.
515 339 : (sort-coding-systems (copy-sequence codings)))))
516 :
517 : (defun find-coding-systems-string (string)
518 : "Return a list of proper coding systems to encode STRING.
519 : All coding systems in the list can safely encode any multibyte characters
520 : in STRING.
521 :
522 : If STRING contains no multibyte characters, return a list of a single
523 : element `undecided'."
524 0 : (find-coding-systems-region string nil))
525 :
526 : (defun find-coding-systems-for-charsets (charsets)
527 : "Return a list of proper coding systems to encode characters of CHARSETS.
528 : CHARSETS is a list of character sets.
529 :
530 : This only finds coding systems of type `charset', whose
531 : `:charset-list' property includes all of CHARSETS (plus `ascii' for
532 : ASCII-compatible coding systems). It was used in older versions of
533 : Emacs, but is unlikely to be what you really want now."
534 : ;; Deal with aliases.
535 0 : (setq charsets (mapcar (lambda (c)
536 0 : (get-charset-property c :name))
537 0 : charsets))
538 0 : (cond ((or (null charsets)
539 0 : (and (= (length charsets) 1)
540 0 : (eq 'ascii (car charsets))))
541 : '(undecided))
542 0 : ((or (memq 'eight-bit-control charsets)
543 0 : (memq 'eight-bit-graphic charsets))
544 : '(raw-text utf-8-emacs))
545 : (t
546 0 : (let (codings)
547 0 : (dolist (cs (coding-system-list t))
548 0 : (let ((cs-charsets (and (eq (coding-system-type cs) 'charset)
549 0 : (coding-system-charset-list cs)))
550 0 : (charsets charsets))
551 0 : (if (coding-system-get cs :ascii-compatible-p)
552 0 : (cl-pushnew 'ascii cs-charsets))
553 0 : (if (catch 'ok
554 0 : (when cs-charsets
555 0 : (while charsets
556 0 : (unless (memq (pop charsets) cs-charsets)
557 0 : (throw 'ok nil)))
558 0 : t))
559 0 : (push cs codings))))
560 0 : (nreverse codings)))))
561 :
562 : (defun find-multibyte-characters (from to &optional maxcount excludes)
563 : "Find multibyte characters in the region specified by FROM and TO.
564 : If FROM is a string, find multibyte characters in the string.
565 : The return value is an alist of the following format:
566 : ((CHARSET COUNT CHAR ...) ...)
567 : where
568 : CHARSET is a character set,
569 : COUNT is a number of characters,
570 : CHARs are the characters found from the character set.
571 : Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
572 : Optional 4th arg EXCLUDES is a list of character sets to be ignored."
573 0 : (let ((chars nil)
574 : charset char)
575 0 : (if (stringp from)
576 0 : (if (multibyte-string-p from)
577 0 : (let ((idx 0))
578 0 : (while (setq idx (string-match-p "[^\000-\177]" from idx))
579 0 : (setq char (aref from idx)
580 0 : charset (char-charset char))
581 0 : (unless (memq charset excludes)
582 0 : (let ((slot (assq charset chars)))
583 0 : (if slot
584 0 : (if (not (memq char (nthcdr 2 slot)))
585 0 : (let ((count (nth 1 slot)))
586 0 : (setcar (cdr slot) (1+ count))
587 0 : (if (or (not maxcount) (< count maxcount))
588 0 : (nconc slot (list char)))))
589 0 : (setq chars (cons (list charset 1 char) chars)))))
590 0 : (setq idx (1+ idx)))))
591 0 : (if enable-multibyte-characters
592 0 : (save-excursion
593 0 : (goto-char from)
594 0 : (while (re-search-forward "[^\000-\177]" to t)
595 0 : (setq char (preceding-char)
596 0 : charset (char-charset char))
597 0 : (unless (memq charset excludes)
598 0 : (let ((slot (assq charset chars)))
599 0 : (if slot
600 0 : (if (not (member char (nthcdr 2 slot)))
601 0 : (let ((count (nth 1 slot)))
602 0 : (setcar (cdr slot) (1+ count))
603 0 : (if (or (not maxcount) (< count maxcount))
604 0 : (nconc slot (list char)))))
605 0 : (setq chars (cons (list charset 1 char) chars)))))))))
606 0 : (nreverse chars)))
607 :
608 : (defun search-unencodable-char (coding-system)
609 : "Search forward from point for a character that is not encodable.
610 : It asks which coding system to check.
611 : If such a character is found, set point after that character.
612 : Otherwise, don't move point.
613 :
614 : When called from a program, the value is the position of the unencodable
615 : character found, or nil if all characters are encodable."
616 : (interactive
617 0 : (list (let ((default (or buffer-file-coding-system 'us-ascii)))
618 0 : (read-coding-system
619 0 : (format "Coding-system (default %s): " default)
620 0 : default))))
621 0 : (let ((pos (unencodable-char-position (point) (point-max) coding-system)))
622 0 : (if pos
623 0 : (goto-char (1+ pos))
624 0 : (message "All following characters are encodable by %s" coding-system))
625 0 : pos))
626 :
627 : (defvar last-coding-system-specified nil
628 : "Most recent coding system explicitly specified by the user when asked.
629 : This variable is set whenever Emacs asks the user which coding system
630 : to use in order to write a file. If you set it to nil explicitly,
631 : then call `write-region', then afterward this variable will be non-nil
632 : only if the user was explicitly asked and specified a coding system.")
633 :
634 : (defvar select-safe-coding-system-accept-default-p nil
635 : "If non-nil, a function to control the behavior of coding system selection.
636 : The meaning is the same as the argument ACCEPT-DEFAULT-P of the
637 : function `select-safe-coding-system' (which see). This variable
638 : overrides that argument.")
639 :
640 : (defun sanitize-coding-system-list (codings)
641 : "Return a list of coding systems presumably more user-friendly than CODINGS."
642 : ;; Change each safe coding system to the corresponding
643 : ;; mime-charset name if it is also a coding system. Such a name
644 : ;; is more friendly to users.
645 0 : (setq codings
646 0 : (mapcar (lambda (cs)
647 0 : (let ((mime-charset (coding-system-get cs 'mime-charset)))
648 0 : (if (and mime-charset (coding-system-p mime-charset)
649 0 : (coding-system-equal cs mime-charset))
650 0 : mime-charset cs)))
651 0 : codings))
652 :
653 : ;; Don't offer variations with locking shift, which you
654 : ;; basically never want.
655 0 : (let (l)
656 0 : (dolist (elt codings (setq codings (nreverse l)))
657 0 : (unless (or (eq 'coding-category-iso-7-else
658 0 : (coding-system-category elt))
659 0 : (eq 'coding-category-iso-8-else
660 0 : (coding-system-category elt)))
661 0 : (push elt l))))
662 :
663 : ;; Remove raw-text, emacs-mule and no-conversion unless nothing
664 : ;; else is available.
665 0 : (or (delq 'raw-text
666 0 : (delq 'emacs-mule
667 0 : (delq 'no-conversion (copy-sequence codings))))
668 0 : codings))
669 :
670 : (defun select-safe-coding-system-interactively (from to codings unsafe
671 : &optional rejected default)
672 : "Select interactively a coding system for the region FROM ... TO.
673 : FROM can be a string, as in `write-region'.
674 : CODINGS is the list of base coding systems known to be safe for this region,
675 : typically obtained with `find-coding-systems-region'.
676 : UNSAFE is a list of coding systems known to be unsafe for this region.
677 : REJECTED is a list of coding systems which were safe but for some reason
678 : were not recommended in the particular context.
679 : DEFAULT is the coding system to use by default in the query."
680 : ;; At first, if some defaults are unsafe, record at most 11
681 : ;; problematic characters and their positions for them by turning
682 : ;; (CODING ...)
683 : ;; into
684 : ;; ((CODING (POS . CHAR) (POS . CHAR) ...) ...)
685 0 : (if unsafe
686 0 : (setq unsafe
687 0 : (mapcar #'(lambda (coding)
688 0 : (cons coding
689 0 : (if (stringp from)
690 0 : (mapcar #'(lambda (pos)
691 0 : (cons pos (aref from pos)))
692 0 : (unencodable-char-position
693 0 : 0 (length from) coding
694 0 : 11 from))
695 0 : (mapcar #'(lambda (pos)
696 0 : (cons pos (char-after pos)))
697 0 : (unencodable-char-position
698 0 : from to coding 11)))))
699 0 : unsafe)))
700 :
701 0 : (setq codings (sanitize-coding-system-list codings))
702 :
703 0 : (let ((window-configuration (current-window-configuration))
704 0 : (bufname (buffer-name))
705 : coding-system)
706 0 : (save-excursion
707 : ;; If some defaults are unsafe, make sure the offending
708 : ;; buffer is displayed.
709 0 : (when (and unsafe (not (stringp from)))
710 0 : (pop-to-buffer bufname)
711 0 : (goto-char (apply 'min (mapcar #'(lambda (x) (car (cadr x)))
712 0 : unsafe))))
713 : ;; Then ask users to select one from CODINGS while showing
714 : ;; the reason why none of the defaults are not used.
715 0 : (with-output-to-temp-buffer "*Warning*"
716 0 : (with-current-buffer standard-output
717 0 : (if (and (null rejected) (null unsafe))
718 0 : (insert "No default coding systems to try for "
719 0 : (if (stringp from)
720 0 : (format "string \"%s\"." from)
721 0 : (format-message "buffer `%s'." bufname)))
722 0 : (insert
723 : "These default coding systems were tried to encode"
724 0 : (if (stringp from)
725 0 : (concat " \"" (if (> (length from) 10)
726 0 : (concat (substring from 0 10) "...\"")
727 0 : (concat from "\"")))
728 0 : (format-message " text\nin the buffer `%s'" bufname))
729 0 : ":\n")
730 0 : (let ((pos (point))
731 : (fill-prefix " "))
732 0 : (dolist (x (append rejected unsafe))
733 0 : (princ " ") (princ x))
734 0 : (insert "\n")
735 0 : (fill-region-as-paragraph pos (point)))
736 0 : (when rejected
737 0 : (insert "These safely encode the text in the buffer,
738 : but are not recommended for encoding text in this context,
739 0 : e.g., for sending an email message.\n ")
740 0 : (dolist (x rejected)
741 0 : (princ " ") (princ x))
742 0 : (insert "\n"))
743 0 : (when unsafe
744 0 : (insert (if rejected "The other coding systems"
745 0 : "However, each of them")
746 0 : (substitute-command-keys
747 0 : " encountered characters it couldn't encode:\n"))
748 0 : (dolist (coding unsafe)
749 0 : (insert (format " %s cannot encode these:" (car coding)))
750 0 : (let ((i 0)
751 : (func1
752 0 : #'(lambda (bufname pos)
753 0 : (when (buffer-live-p (get-buffer bufname))
754 0 : (pop-to-buffer bufname)
755 0 : (goto-char pos))))
756 : (func2
757 0 : #'(lambda (bufname pos coding)
758 0 : (when (buffer-live-p (get-buffer bufname))
759 0 : (pop-to-buffer bufname)
760 0 : (if (< (point) pos)
761 0 : (goto-char pos)
762 0 : (forward-char 1)
763 0 : (search-unencodable-char coding)
764 0 : (forward-char -1))))))
765 0 : (dolist (elt (cdr coding))
766 0 : (insert " ")
767 0 : (if (stringp from)
768 0 : (insert (if (< i 10) (cdr elt) "..."))
769 0 : (if (< i 10)
770 0 : (insert-text-button
771 0 : (cdr elt)
772 : :type 'help-xref
773 : 'face 'link
774 : 'help-echo
775 : "mouse-2, RET: jump to this character"
776 0 : 'help-function func1
777 0 : 'help-args (list bufname (car elt)))
778 0 : (insert-text-button
779 : "..."
780 : :type 'help-xref
781 : 'face 'link
782 : 'help-echo
783 : "mouse-2, RET: next unencodable character"
784 0 : 'help-function func2
785 0 : 'help-args (list bufname (car elt)
786 0 : (car coding)))))
787 0 : (setq i (1+ i))))
788 0 : (insert "\n"))
789 0 : (insert (substitute-command-keys "\
790 :
791 : Click on a character (or switch to this window by `\\[other-window]'\n\
792 : and select the characters by RET) to jump to the place it appears,\n\
793 0 : where `\\[universal-argument] \\[what-cursor-position]' will give information about it.\n"))))
794 0 : (insert (substitute-command-keys "\nSelect \
795 : one of the safe coding systems listed below,\n\
796 : or cancel the writing with \\[keyboard-quit] and edit the buffer\n\
797 : to remove or modify the problematic characters,\n\
798 : or specify any other coding system (and risk losing\n\
799 0 : the problematic characters).\n\n"))
800 0 : (let ((pos (point))
801 : (fill-prefix " "))
802 0 : (dolist (x codings)
803 0 : (princ " ") (princ x))
804 0 : (insert "\n")
805 0 : (fill-region-as-paragraph pos (point)))))
806 :
807 : ;; Read a coding system.
808 0 : (setq coding-system
809 0 : (read-coding-system
810 0 : (format "Select coding system (default %s): " default)
811 0 : default))
812 0 : (setq last-coding-system-specified coding-system))
813 :
814 0 : (kill-buffer "*Warning*")
815 0 : (set-window-configuration window-configuration)
816 0 : coding-system))
817 :
818 : (defun select-safe-coding-system (from to &optional default-coding-system
819 : accept-default-p file)
820 : "Ask a user to select a safe coding system from candidates.
821 : The candidates of coding systems which can safely encode a text
822 : between FROM and TO are shown in a popup window. Among them, the most
823 : proper one is suggested as the default.
824 :
825 : The list of `buffer-file-coding-system' of the current buffer, the
826 : default `buffer-file-coding-system', and the most preferred coding
827 : system (if it corresponds to a MIME charset) is treated as the
828 : default coding system list. Among them, the first one that safely
829 : encodes the text is normally selected silently and returned without
830 : any user interaction. See also the command `prefer-coding-system'.
831 :
832 : However, the user is queried if the chosen coding system is
833 : inconsistent with what would be selected by `find-auto-coding' from
834 : coding cookies &c. if the contents of the region were read from a
835 : file. (That could lead to data corruption in a file subsequently
836 : re-visited and edited.)
837 :
838 : Optional 3rd arg DEFAULT-CODING-SYSTEM specifies a coding system or a
839 : list of coding systems to be prepended to the default coding system
840 : list. However, if DEFAULT-CODING-SYSTEM is a list and the first
841 : element is t, the cdr part is used as the default coding system list,
842 : i.e. current `buffer-file-coding-system', default `buffer-file-coding-system',
843 : and the most preferred coding system are not used.
844 :
845 : Optional 4th arg ACCEPT-DEFAULT-P, if non-nil, is a function to
846 : determine the acceptability of the silently selected coding system.
847 : It is called with that coding system, and should return nil if it
848 : should not be silently selected and thus user interaction is required.
849 :
850 : Optional 5th arg FILE is the file name to use for this purpose.
851 : That is different from `buffer-file-name' when handling `write-region'
852 : \(for example).
853 :
854 : The variable `select-safe-coding-system-accept-default-p', if non-nil,
855 : overrides ACCEPT-DEFAULT-P.
856 :
857 : Kludgy feature: if FROM is a string, the string is the target text,
858 : and TO is ignored."
859 339 : (if (not (listp default-coding-system))
860 339 : (setq default-coding-system (list default-coding-system)))
861 :
862 339 : (let ((no-other-defaults nil)
863 : auto-cs)
864 339 : (unless (or (stringp from) find-file-literally)
865 : ;; Find an auto-coding that is specified for the current
866 : ;; buffer and file from the region FROM and TO.
867 10 : (save-excursion
868 10 : (save-restriction
869 10 : (widen)
870 10 : (goto-char from)
871 10 : (setq auto-cs (find-auto-coding (or file buffer-file-name "")
872 10 : (- to from)))
873 10 : (if auto-cs
874 0 : (if (coding-system-p (car auto-cs))
875 0 : (setq auto-cs (car auto-cs))
876 0 : (display-warning
877 : 'mule
878 0 : (format-message "\
879 : Invalid coding system `%s' is specified
880 : for the current buffer/file by the %s.
881 : It is highly recommended to fix it before writing to a file."
882 0 : (car auto-cs)
883 0 : (if (eq (cdr auto-cs) :coding) ":coding tag"
884 0 : (format-message "variable `%s'" (cdr auto-cs))))
885 0 : :warning)
886 0 : (or (yes-or-no-p "Really proceed with writing? ")
887 0 : (error "Save aborted"))
888 339 : (setq auto-cs nil))))))
889 :
890 339 : (if (eq (car default-coding-system) t)
891 0 : (setq no-other-defaults t
892 339 : default-coding-system (cdr default-coding-system)))
893 :
894 : ;; Change elements of the list to (coding . base-coding).
895 339 : (setq default-coding-system
896 339 : (mapcar (function (lambda (x) (cons x (coding-system-base x))))
897 339 : default-coding-system))
898 :
899 339 : (if (and auto-cs (not no-other-defaults))
900 : ;; If the file has a coding cookie, use it regardless of any
901 : ;; other setting.
902 0 : (let ((base (coding-system-base auto-cs)))
903 0 : (unless (memq base '(nil undecided))
904 0 : (setq default-coding-system (list (cons auto-cs base)))
905 339 : (setq no-other-defaults t))))
906 :
907 339 : (unless no-other-defaults
908 : ;; If buffer-file-coding-system is not nil nor undecided, append it
909 : ;; to the defaults.
910 339 : (if buffer-file-coding-system
911 0 : (let ((base (coding-system-base buffer-file-coding-system)))
912 0 : (or (eq base 'undecided)
913 0 : (rassq base default-coding-system)
914 0 : (setq default-coding-system
915 0 : (append default-coding-system
916 339 : (list (cons buffer-file-coding-system base)))))))
917 :
918 339 : (unless (and buffer-file-coding-system-explicit
919 339 : (cdr buffer-file-coding-system-explicit))
920 : ;; If default buffer-file-coding-system is not nil nor undecided,
921 : ;; append it to the defaults.
922 339 : (when (default-value 'buffer-file-coding-system)
923 0 : (let ((base (coding-system-base
924 0 : (default-value 'buffer-file-coding-system))))
925 0 : (or (eq base 'undecided)
926 0 : (rassq base default-coding-system)
927 0 : (setq default-coding-system
928 0 : (append default-coding-system
929 0 : (list (cons (default-value
930 0 : 'buffer-file-coding-system)
931 339 : base)))))))
932 :
933 : ;; If the most preferred coding system has the property mime-charset,
934 : ;; append it to the defaults.
935 339 : (let ((preferred (coding-system-priority-list t))
936 : base)
937 339 : (and (coding-system-p preferred)
938 339 : (setq base (coding-system-base preferred))
939 339 : (coding-system-get preferred :mime-charset)
940 339 : (not (rassq base default-coding-system))
941 339 : (setq default-coding-system
942 339 : (append default-coding-system
943 339 : (list (cons preferred base))))))))
944 :
945 339 : (if select-safe-coding-system-accept-default-p
946 339 : (setq accept-default-p select-safe-coding-system-accept-default-p))
947 :
948 : ;; Decide the eol-type from the top of the default codings,
949 : ;; current buffer-file-coding-system, or default buffer-file-coding-system.
950 339 : (if default-coding-system
951 339 : (let ((default-eol-type (coding-system-eol-type
952 339 : (caar default-coding-system))))
953 339 : (if (and (vectorp default-eol-type) buffer-file-coding-system)
954 0 : (setq default-eol-type (coding-system-eol-type
955 339 : buffer-file-coding-system)))
956 339 : (if (and (vectorp default-eol-type)
957 339 : (default-value 'buffer-file-coding-system))
958 0 : (setq default-eol-type
959 0 : (coding-system-eol-type
960 339 : (default-value 'buffer-file-coding-system))))
961 339 : (if (and default-eol-type (not (vectorp default-eol-type)))
962 0 : (dolist (elt default-coding-system)
963 0 : (setcar elt (coding-system-change-eol-conversion
964 339 : (car elt) default-eol-type))))))
965 :
966 339 : (let ((codings (find-coding-systems-region from to))
967 : (coding-system nil)
968 339 : (tick (if (not (stringp from)) (buffer-chars-modified-tick)))
969 : safe rejected unsafe)
970 339 : (if (eq (car codings) 'undecided)
971 : ;; Any coding system is ok.
972 339 : (setq coding-system (caar default-coding-system))
973 : ;; Reverse the list so that elements are accumulated in safe,
974 : ;; rejected, and unsafe in the correct order.
975 0 : (setq default-coding-system (nreverse default-coding-system))
976 :
977 : ;; Classify the defaults into safe, rejected, and unsafe.
978 0 : (dolist (elt default-coding-system)
979 0 : (if (memq (cdr elt) codings)
980 : ;; This is safe. Is it acceptable?
981 0 : (if (and (functionp accept-default-p)
982 0 : (not (funcall accept-default-p (cdr elt))))
983 : ;; No, not acceptable.
984 0 : (push (car elt) rejected)
985 : ;; Yes, acceptable.
986 0 : (push (car elt) safe))
987 : ;; This is not safe.
988 0 : (push (car elt) unsafe)))
989 : ;; If there are safe ones, the first one is what we want.
990 0 : (if safe
991 339 : (setq coding-system (car safe))))
992 :
993 : ;; If all the defaults failed, ask a user.
994 339 : (when (not coding-system)
995 0 : (setq coding-system (select-safe-coding-system-interactively
996 339 : from to codings unsafe rejected (car codings))))
997 :
998 : ;; Check we're not inconsistent with what `coding:' spec &c would
999 : ;; give when file is re-read.
1000 : ;; But don't do this if we explicitly ignored the cookie
1001 : ;; by using `find-file-literally'.
1002 339 : (when (and auto-cs
1003 0 : (not (and
1004 0 : coding-system
1005 339 : (memq (coding-system-type coding-system) '(0 5)))))
1006 : ;; Merge coding-system and auto-cs as far as possible.
1007 0 : (if (not coding-system)
1008 0 : (setq coding-system auto-cs)
1009 0 : (if (not auto-cs)
1010 0 : (setq auto-cs coding-system)
1011 0 : (let ((eol-type-1 (coding-system-eol-type coding-system))
1012 0 : (eol-type-2 (coding-system-eol-type auto-cs)))
1013 0 : (if (eq (coding-system-base coding-system) 'undecided)
1014 0 : (setq coding-system (coding-system-change-text-conversion
1015 0 : coding-system auto-cs))
1016 0 : (if (eq (coding-system-base auto-cs) 'undecided)
1017 0 : (setq auto-cs (coding-system-change-text-conversion
1018 0 : auto-cs coding-system))))
1019 0 : (if (vectorp eol-type-1)
1020 0 : (or (vectorp eol-type-2)
1021 0 : (setq coding-system (coding-system-change-eol-conversion
1022 0 : coding-system eol-type-2)))
1023 0 : (if (vectorp eol-type-2)
1024 0 : (setq auto-cs (coding-system-change-eol-conversion
1025 0 : auto-cs eol-type-1)))))))
1026 :
1027 0 : (if (and auto-cs
1028 : ;; Don't barf if writing a compressed file, say.
1029 : ;; This check perhaps isn't ideal, but is probably
1030 : ;; the best thing to do.
1031 0 : (not (auto-coding-alist-lookup (or file buffer-file-name "")))
1032 0 : (not (coding-system-equal coding-system auto-cs)))
1033 0 : (unless (yes-or-no-p
1034 0 : (format "Selected encoding %s disagrees with \
1035 : %s specified by file contents. Really save (else edit coding cookies \
1036 0 : and try again)? " coding-system auto-cs))
1037 339 : (error "Save aborted"))))
1038 339 : (when (and tick (/= tick (buffer-chars-modified-tick)))
1039 339 : (error "Canceled because the buffer was modified"))
1040 339 : (if (and (eq (coding-system-type coding-system) 'undecided)
1041 0 : (coding-system-get coding-system :prefer-utf-8)
1042 0 : (or (multibyte-string-p from)
1043 0 : (and (number-or-marker-p from)
1044 0 : (< (- to from)
1045 339 : (- (position-bytes to) (position-bytes from))))))
1046 0 : (setq coding-system
1047 339 : (coding-system-change-text-conversion coding-system 'utf-8)))
1048 339 : coding-system)))
1049 :
1050 : (setq select-safe-coding-system-function 'select-safe-coding-system)
1051 :
1052 : (defun select-message-coding-system ()
1053 : "Return a coding system to encode the outgoing message of the current buffer.
1054 : It at first tries the first coding system found in these variables
1055 : in this order:
1056 : (1) local value of `buffer-file-coding-system'
1057 : (2) value of `sendmail-coding-system'
1058 : (3) value of `default-sendmail-coding-system'
1059 : (4) default value of `buffer-file-coding-system'
1060 : If the found coding system can't encode the current buffer,
1061 : or none of them are bound to a coding system,
1062 : it asks the user to select a proper coding system."
1063 0 : (let ((coding (or (and (local-variable-p 'buffer-file-coding-system)
1064 0 : buffer-file-coding-system)
1065 0 : sendmail-coding-system
1066 0 : default-sendmail-coding-system
1067 0 : (default-value 'buffer-file-coding-system))))
1068 0 : (if (eq coding 'no-conversion)
1069 : ;; We should never use no-conversion for outgoing mail.
1070 0 : (setq coding nil))
1071 0 : (if (fboundp select-safe-coding-system-function)
1072 0 : (funcall select-safe-coding-system-function
1073 0 : (point-min) (point-max) coding
1074 0 : (function (lambda (x) (coding-system-get x :mime-charset))))
1075 0 : coding)))
1076 :
1077 : ;;; Language support stuff.
1078 :
1079 : (defvar language-info-alist nil
1080 : "Alist of language environment definitions.
1081 : Each element looks like:
1082 : (LANGUAGE-NAME . ((KEY . INFO) ...))
1083 : where LANGUAGE-NAME is a string, the name of the language environment,
1084 : KEY is a symbol denoting the kind of information, and
1085 : INFO is the data associated with KEY.
1086 : Meaningful values for KEY include
1087 :
1088 : documentation value is documentation of what this language environment
1089 : is meant for, and how to use it.
1090 : charset value is a list of the character sets mainly used
1091 : by this language environment.
1092 : sample-text value is an expression which is evalled to generate
1093 : a line of text written using characters appropriate
1094 : for this language environment.
1095 : setup-function value is a function to call to switch to this
1096 : language environment.
1097 : exit-function value is a function to call to leave this
1098 : language environment.
1099 : coding-system value is a list of coding systems that are good for
1100 : saving text written in this language environment.
1101 : This list serves as suggestions to the user;
1102 : in effect, as a kind of documentation.
1103 : coding-priority value is a list of coding systems for this language
1104 : environment, in order of decreasing priority.
1105 : This is used to set up the coding system priority
1106 : list when you switch to this language environment.
1107 : nonascii-translation
1108 : value is a charset of dimension one to use for
1109 : converting a unibyte character to multibyte
1110 : and vice versa.
1111 : input-method value is a default input method for this language
1112 : environment.
1113 : features value is a list of features requested in this
1114 : language environment.
1115 : ctext-non-standard-encodings
1116 : value is a list of non-standard encoding names used
1117 : in extended segments of CTEXT. See the variable
1118 : `ctext-non-standard-encodings' for more detail.
1119 :
1120 : The following key takes effect only when multibyte characters are
1121 : globally disabled, i.e. the default value of `enable-multibyte-characters'
1122 : is nil (which is an obsolete and deprecated use):
1123 :
1124 : unibyte-display value is a coding system to encode characters for
1125 : the terminal. Characters in the range of 160 to
1126 : 255 display not as octal escapes, but as non-ASCII
1127 : characters in this language environment.")
1128 :
1129 : (defun get-language-info (lang-env key)
1130 : "Return information listed under KEY for language environment LANG-ENV.
1131 : KEY is a symbol denoting the kind of information.
1132 : For a list of useful values for KEY and their meanings,
1133 : see `language-info-alist'."
1134 9 : (if (symbolp lang-env)
1135 9 : (setq lang-env (symbol-name lang-env)))
1136 9 : (let ((lang-slot (assoc-string lang-env language-info-alist t)))
1137 9 : (if lang-slot
1138 9 : (cdr (assq key (cdr lang-slot))))))
1139 :
1140 : (defun set-language-info (lang-env key info)
1141 : "Modify part of the definition of language environment LANG-ENV.
1142 : Specifically, this stores the information INFO under KEY
1143 : in the definition of this language environment.
1144 : KEY is a symbol denoting the kind of information.
1145 : INFO is the value for that information.
1146 :
1147 : For a list of useful values for KEY and their meanings,
1148 : see `language-info-alist'."
1149 0 : (if (symbolp lang-env)
1150 0 : (setq lang-env (symbol-name lang-env)))
1151 0 : (set-language-info-internal lang-env key info)
1152 0 : (if (equal lang-env current-language-environment)
1153 0 : (cond ((eq key 'coding-priority)
1154 0 : (set-language-environment-coding-systems lang-env)
1155 0 : (set-language-environment-charset lang-env))
1156 0 : ((eq key 'input-method)
1157 0 : (set-language-environment-input-method lang-env))
1158 0 : ((eq key 'nonascii-translation)
1159 0 : (set-language-environment-nonascii-translation lang-env))
1160 0 : ((eq key 'charset)
1161 0 : (set-language-environment-charset lang-env))
1162 0 : ((and (not (default-value 'enable-multibyte-characters))
1163 0 : (or (eq key 'unibyte-syntax) (eq key 'unibyte-display)))
1164 0 : (set-language-environment-unibyte lang-env)))))
1165 :
1166 : (defun set-language-info-internal (lang-env key info)
1167 : "Internal use only.
1168 : Arguments are the same as `set-language-info'."
1169 554 : (let (lang-slot key-slot)
1170 554 : (setq lang-slot (assoc lang-env language-info-alist))
1171 554 : (if (null lang-slot) ; If no slot for the language, add it.
1172 0 : (setq lang-slot (list lang-env)
1173 554 : language-info-alist (cons lang-slot language-info-alist)))
1174 554 : (setq key-slot (assq key lang-slot))
1175 554 : (if (null key-slot) ; If no slot for the key, add it.
1176 0 : (progn
1177 0 : (setq key-slot (list key))
1178 554 : (setcdr lang-slot (cons key-slot (cdr lang-slot)))))
1179 554 : (setcdr key-slot (purecopy info))
1180 : ;; Update the custom-type of `current-language-environment'.
1181 554 : (put 'current-language-environment 'custom-type
1182 554 : (cons 'choice (mapcar
1183 : (lambda (lang)
1184 40442 : (list 'const lang))
1185 554 : (sort (mapcar 'car language-info-alist) 'string<))))))
1186 :
1187 : (defun set-language-info-alist (lang-env alist &optional parents)
1188 : "Store ALIST as the definition of language environment LANG-ENV.
1189 : ALIST is an alist of KEY and INFO values. See the documentation of
1190 : `language-info-alist' for the meanings of KEY and INFO.
1191 :
1192 : Optional arg PARENTS is a list of parent menu names; it specifies
1193 : where to put this language environment in the
1194 : Describe Language Environment and Set Language Environment menus.
1195 : For example, (\"European\") means to put this language environment
1196 : in the European submenu in each of those two menus."
1197 76 : (cond ((symbolp lang-env)
1198 0 : (setq lang-env (symbol-name lang-env)))
1199 76 : ((stringp lang-env)
1200 76 : (setq lang-env (purecopy lang-env))))
1201 76 : (let ((describe-map describe-language-environment-map)
1202 76 : (setup-map setup-language-environment-map))
1203 76 : (if parents
1204 54 : (let ((l parents)
1205 : map parent-symbol parent prompt)
1206 108 : (while l
1207 54 : (if (symbolp (setq parent-symbol (car l)))
1208 0 : (setq parent (symbol-name parent))
1209 54 : (setq parent parent-symbol parent-symbol (intern parent)))
1210 54 : (setq map (lookup-key describe-map (vector parent-symbol)))
1211 : ;; This prompt string is for define-prefix-command, so
1212 : ;; that the map it creates will be suitable for a menu.
1213 54 : (or map (setq prompt (format "%s Environment" parent)))
1214 54 : (if (not map)
1215 0 : (progn
1216 0 : (setq map (intern (format "describe-%s-environment-map"
1217 0 : (downcase parent))))
1218 0 : (define-prefix-command map nil prompt)
1219 0 : (define-key-after describe-map (vector parent-symbol)
1220 54 : (cons parent map))))
1221 54 : (setq describe-map (symbol-value map))
1222 54 : (setq map (lookup-key setup-map (vector parent-symbol)))
1223 54 : (if (not map)
1224 0 : (progn
1225 0 : (setq map (intern (format "setup-%s-environment-map"
1226 0 : (downcase parent))))
1227 0 : (define-prefix-command map nil prompt)
1228 0 : (define-key-after setup-map (vector parent-symbol)
1229 54 : (cons parent map))))
1230 54 : (setq setup-map (symbol-value map))
1231 76 : (setq l (cdr l)))))
1232 :
1233 : ;; Set up menu items for this language env.
1234 76 : (let ((doc (assq 'documentation alist)))
1235 76 : (when doc
1236 75 : (define-key-after describe-map (vector (intern lang-env))
1237 76 : (cons lang-env 'describe-specified-language-support))))
1238 76 : (define-key-after setup-map (vector (intern lang-env))
1239 76 : (cons lang-env 'setup-specified-language-environment))
1240 :
1241 76 : (dolist (elt alist)
1242 554 : (set-language-info-internal lang-env (car elt) (cdr elt)))
1243 :
1244 76 : (if (equal lang-env current-language-environment)
1245 76 : (set-language-environment lang-env))))
1246 :
1247 : (defun read-language-name (key prompt &optional default)
1248 : "Read a language environment name which has information for KEY.
1249 : If KEY is nil, read any language environment.
1250 : Prompt with PROMPT. DEFAULT is the default choice of language environment.
1251 : This returns a language environment name as a string."
1252 0 : (let* ((completion-ignore-case t)
1253 0 : (name (completing-read prompt
1254 0 : language-info-alist
1255 0 : (and key
1256 0 : (function (lambda (elm) (and (listp elm) (assq key elm)))))
1257 0 : t nil nil default)))
1258 0 : (if (and (> (length name) 0)
1259 0 : (or (not key)
1260 0 : (get-language-info name key)))
1261 0 : name)))
1262 :
1263 : ;;; Multilingual input methods.
1264 : (defgroup leim nil
1265 : "LEIM: Libraries of Emacs Input Methods."
1266 : :group 'mule)
1267 :
1268 : (defconst leim-list-file-name "leim-list.el"
1269 : "Name of LEIM list file.
1270 : This file contains a list of libraries of Emacs input methods (LEIM)
1271 : in the format of Lisp expression for registering each input method.
1272 : Emacs loads this file at startup time.")
1273 :
1274 : (defconst leim-list-header (format-message
1275 : ";;; %s -- list of LEIM (Library of Emacs Input Method) -*-coding: utf-8;-*-
1276 : ;;
1277 : ;; This file is automatically generated.
1278 : ;;
1279 : ;; This file contains a list of LEIM (Library of Emacs Input Method)
1280 : ;; methods in the same directory as this file. Loading this file
1281 : ;; registers all the input methods in Emacs.
1282 : ;;
1283 : ;; Each entry has the form:
1284 : ;; (register-input-method
1285 : ;; INPUT-METHOD LANGUAGE-NAME ACTIVATE-FUNC
1286 : ;; TITLE DESCRIPTION
1287 : ;; ARG ...)
1288 : ;; See the function `register-input-method' for the meanings of the arguments.
1289 : ;;
1290 : ;; If this directory is included in `load-path', Emacs automatically
1291 : ;; loads this file at startup time.
1292 :
1293 : "
1294 : leim-list-file-name)
1295 : "Header to be inserted in LEIM list file.")
1296 :
1297 : (defconst leim-list-entry-regexp "^(register-input-method"
1298 : "Regexp matching head of each entry in LEIM list file.
1299 : See also the variable `leim-list-header'.")
1300 :
1301 : (defvar update-leim-list-functions
1302 : '(quail-update-leim-list-file)
1303 : "List of functions to call to update LEIM list file.
1304 : Each function is called with one arg, LEIM directory name.")
1305 :
1306 : (defun update-leim-list-file (&rest dirs)
1307 : "Update LEIM list file in directories DIRS."
1308 0 : (dolist (function update-leim-list-functions)
1309 0 : (apply function dirs)))
1310 :
1311 : (defvar current-input-method nil
1312 : "The current input method for multilingual text.
1313 : If nil, that means no input method is activated now.")
1314 : (make-variable-buffer-local 'current-input-method)
1315 : (put 'current-input-method 'permanent-local t)
1316 :
1317 : (defvar current-input-method-title nil
1318 : "Title string of the current input method shown in mode line.")
1319 : (make-variable-buffer-local 'current-input-method-title)
1320 : (put 'current-input-method-title 'permanent-local t)
1321 :
1322 : (define-widget 'mule-input-method-string 'string
1323 : "String widget with completion for input method."
1324 : :completions
1325 : (lambda (string pred action)
1326 : (let ((completion-ignore-case t))
1327 : (complete-with-action action input-method-alist string pred)))
1328 : :prompt-history 'input-method-history)
1329 :
1330 : (defcustom default-input-method nil
1331 : "Default input method for multilingual text (a string).
1332 : This is the input method activated automatically by the command
1333 : `toggle-input-method' (\\[toggle-input-method])."
1334 : :link '(custom-manual "(emacs)Input Methods")
1335 : :group 'mule
1336 : :type `(choice (const nil)
1337 : mule-input-method-string)
1338 : :set-after '(current-language-environment))
1339 :
1340 : (put 'input-method-function 'permanent-local t)
1341 :
1342 : (defvar input-method-history nil
1343 : "History list of input methods read from the minibuffer.
1344 :
1345 : Maximum length of the history list is determined by the value
1346 : of `history-length', which see.")
1347 : (make-variable-buffer-local 'input-method-history)
1348 : (put 'input-method-history 'permanent-local t)
1349 :
1350 : (define-obsolete-variable-alias
1351 : 'inactivate-current-input-method-function
1352 : 'deactivate-current-input-method-function "24.3")
1353 : (defvar deactivate-current-input-method-function nil
1354 : "Function to call for deactivating the current input method.
1355 : Every input method should set this to an appropriate value when activated.
1356 : This function is called with no argument.
1357 :
1358 : This function should never change the value of `current-input-method'.
1359 : It is set to nil by the function `deactivate-input-method'.")
1360 : (make-variable-buffer-local 'deactivate-current-input-method-function)
1361 : (put 'deactivate-current-input-method-function 'permanent-local t)
1362 :
1363 : (defvar describe-current-input-method-function nil
1364 : "Function to call for describing the current input method.
1365 : This function is called with no argument.")
1366 : (make-variable-buffer-local 'describe-current-input-method-function)
1367 : (put 'describe-current-input-method-function 'permanent-local t)
1368 :
1369 : (defvar input-method-alist nil
1370 : "Alist of input method names vs how to use them.
1371 : Each element has the form:
1372 : (INPUT-METHOD LANGUAGE-ENV ACTIVATE-FUNC TITLE DESCRIPTION ARGS...)
1373 : See the function `register-input-method' for the meanings of the elements.")
1374 : ;;;###autoload
1375 : (put 'input-method-alist 'risky-local-variable t)
1376 :
1377 : (defun register-input-method (input-method lang-env &rest args)
1378 : "Register INPUT-METHOD as an input method for language environment LANG-ENV.
1379 :
1380 : INPUT-METHOD and LANG-ENV are symbols or strings.
1381 : ACTIVATE-FUNC is a function to call to activate this method.
1382 : TITLE is a string to show in the mode line when this method is active.
1383 : DESCRIPTION is a string describing this method and what it is good for.
1384 : The ARGS, if any, are passed as arguments to ACTIVATE-FUNC.
1385 : All told, the arguments to ACTIVATE-FUNC are INPUT-METHOD and the ARGS.
1386 :
1387 : This function is mainly used in the file \"leim-list.el\" which is
1388 : created at Emacs build time, registering all Quail input methods
1389 : contained in the Emacs distribution.
1390 :
1391 : In case you want to register a new Quail input method by yourself, be
1392 : careful to use the same input method title as given in the third
1393 : parameter of `quail-define-package'. (If the values are different, the
1394 : string specified in this function takes precedence.)
1395 :
1396 : The commands `describe-input-method' and `list-input-methods' need
1397 : these duplicated values to show some information about input methods
1398 : without loading the relevant Quail packages.
1399 : \n(fn INPUT-METHOD LANG-ENV ACTIVATE-FUNC TITLE DESCRIPTION &rest ARGS)"
1400 192 : (if (symbolp lang-env)
1401 0 : (setq lang-env (symbol-name lang-env))
1402 192 : (setq lang-env (purecopy lang-env)))
1403 192 : (if (symbolp input-method)
1404 0 : (setq input-method (symbol-name input-method))
1405 192 : (setq input-method (purecopy input-method)))
1406 192 : (setq args (mapcar 'purecopy args))
1407 192 : (let ((info (cons lang-env args))
1408 192 : (slot (assoc input-method input-method-alist)))
1409 192 : (if slot
1410 192 : (setcdr slot info)
1411 0 : (setq slot (cons input-method info))
1412 192 : (setq input-method-alist (cons slot input-method-alist)))))
1413 :
1414 : (defun read-input-method-name (prompt &optional default inhibit-null)
1415 : "Read a name of input method from a minibuffer prompting with PROMPT.
1416 : If DEFAULT is non-nil, use that as the default,
1417 : and substitute it into PROMPT at the first `%s'.
1418 : If INHIBIT-NULL is non-nil, null input signals an error.
1419 :
1420 : The return value is a string."
1421 0 : (if default
1422 0 : (setq prompt (format prompt default)))
1423 0 : (let* ((completion-ignore-case t)
1424 : ;; As it is quite normal to change input method in the
1425 : ;; minibuffer, we must enable it even if
1426 : ;; enable-recursive-minibuffers is currently nil.
1427 : (enable-recursive-minibuffers t)
1428 : ;; This binding is necessary because input-method-history is
1429 : ;; buffer local.
1430 0 : (input-method (completing-read prompt input-method-alist
1431 : nil t nil 'input-method-history
1432 0 : (if (and default (symbolp default))
1433 0 : (symbol-name default)
1434 0 : default))))
1435 0 : (if (and input-method (symbolp input-method))
1436 0 : (setq input-method (symbol-name input-method)))
1437 0 : (if (> (length input-method) 0)
1438 0 : input-method
1439 0 : (if inhibit-null
1440 0 : (error "No valid input method is specified")))))
1441 :
1442 : (defun activate-input-method (input-method)
1443 : "Switch to input method INPUT-METHOD for the current buffer.
1444 : If some other input method is already active, turn it off first.
1445 : If INPUT-METHOD is nil, deactivate any current input method."
1446 0 : (if (and input-method (symbolp input-method))
1447 0 : (setq input-method (symbol-name input-method)))
1448 0 : (if (and current-input-method
1449 0 : (not (string= current-input-method input-method)))
1450 0 : (deactivate-input-method))
1451 0 : (unless (or current-input-method (null input-method))
1452 0 : (let ((slot (assoc input-method input-method-alist)))
1453 0 : (if (null slot)
1454 0 : (error "Can't activate input method `%s'" input-method))
1455 0 : (setq current-input-method-title nil)
1456 0 : (let ((func (nth 2 slot)))
1457 0 : (if (functionp func)
1458 0 : (apply (nth 2 slot) input-method (nthcdr 5 slot))
1459 0 : (if (and (consp func) (symbolp (car func)) (symbolp (cdr func)))
1460 0 : (progn
1461 0 : (require (cdr func))
1462 0 : (apply (car func) input-method (nthcdr 5 slot)))
1463 0 : (error "Can't activate input method `%s'" input-method))))
1464 0 : (setq current-input-method input-method)
1465 0 : (or (stringp current-input-method-title)
1466 0 : (setq current-input-method-title (nth 3 slot)))
1467 0 : (unwind-protect
1468 0 : (run-hooks 'input-method-activate-hook)
1469 0 : (force-mode-line-update)))))
1470 :
1471 : (defun deactivate-input-method ()
1472 : "Turn off the current input method."
1473 0 : (when current-input-method
1474 0 : (if input-method-history
1475 0 : (unless (string= current-input-method (car input-method-history))
1476 0 : (setq input-method-history
1477 0 : (cons current-input-method
1478 0 : (delete current-input-method input-method-history))))
1479 0 : (setq input-method-history (list current-input-method)))
1480 0 : (unwind-protect
1481 0 : (progn
1482 0 : (setq input-method-function nil
1483 0 : current-input-method-title nil)
1484 0 : (funcall deactivate-current-input-method-function))
1485 0 : (unwind-protect
1486 0 : (run-hooks 'input-method-deactivate-hook)
1487 0 : (setq current-input-method nil)
1488 0 : (force-mode-line-update)))))
1489 :
1490 : (define-obsolete-function-alias
1491 : 'inactivate-input-method
1492 : 'deactivate-input-method "24.3")
1493 :
1494 : (defun set-input-method (input-method &optional interactive)
1495 : "Select and activate input method INPUT-METHOD for the current buffer.
1496 : This also sets the default input method to the one you specify.
1497 : If INPUT-METHOD is nil, this function turns off the input method, and
1498 : also causes you to be prompted for a name of an input method the next
1499 : time you invoke \\[toggle-input-method].
1500 : When called interactively, the optional arg INTERACTIVE is non-nil,
1501 : which marks the variable `default-input-method' as set for Custom buffers.
1502 :
1503 : To deactivate the input method interactively, use \\[toggle-input-method].
1504 : To deactivate it programmatically, use `deactivate-input-method'."
1505 : (interactive
1506 0 : (let* ((default (or (car input-method-history) default-input-method)))
1507 0 : (list (read-input-method-name
1508 0 : (if default "Select input method (default %s): " "Select input method: ")
1509 0 : default t)
1510 0 : t)))
1511 0 : (activate-input-method input-method)
1512 0 : (setq default-input-method input-method)
1513 0 : (when interactive
1514 0 : (customize-mark-as-set 'default-input-method))
1515 0 : default-input-method)
1516 :
1517 : (defvar toggle-input-method-active nil
1518 : "Non-nil inside `toggle-input-method'.")
1519 :
1520 : (defun toggle-input-method (&optional arg interactive)
1521 : "Enable or disable multilingual text input method for the current buffer.
1522 : Only one input method can be enabled at any time in a given buffer.
1523 :
1524 : The normal action is to enable an input method if none was enabled,
1525 : and disable the current one otherwise. Which input method to enable
1526 : can be determined in various ways--either the one most recently used,
1527 : or the one specified by `default-input-method', or as a last resort
1528 : by reading the name of an input method in the minibuffer.
1529 :
1530 : With a prefix argument ARG, read an input method name with the minibuffer
1531 : and enable that one. The default is the most recent input method specified
1532 : \(not including the currently active input method, if any).
1533 :
1534 : When called interactively, the optional argument INTERACTIVE is non-nil,
1535 : which marks the variable `default-input-method' as set for Custom buffers."
1536 :
1537 : (interactive "P\np")
1538 0 : (if toggle-input-method-active
1539 0 : (error "Recursive use of `toggle-input-method'"))
1540 0 : (if (and current-input-method (not arg))
1541 0 : (deactivate-input-method)
1542 0 : (let ((toggle-input-method-active t)
1543 0 : (default (or (car input-method-history) default-input-method)))
1544 0 : (if (and arg default (equal current-input-method default)
1545 0 : (> (length input-method-history) 1))
1546 0 : (setq default (nth 1 input-method-history)))
1547 0 : (activate-input-method
1548 0 : (if (or arg (not default))
1549 0 : (progn
1550 0 : (read-input-method-name
1551 0 : (if default "Input method (default %s): " "Input method: " )
1552 0 : default t))
1553 0 : default))
1554 0 : (unless default-input-method
1555 0 : (prog1
1556 0 : (setq default-input-method current-input-method)
1557 0 : (when interactive
1558 0 : (customize-mark-as-set 'default-input-method)))))))
1559 :
1560 : (autoload 'help-buffer "help-mode")
1561 :
1562 : (defun describe-input-method (input-method)
1563 : "Describe input method INPUT-METHOD."
1564 : (interactive
1565 0 : (list (read-input-method-name
1566 0 : "Describe input method (default current choice): ")))
1567 0 : (if (and input-method (symbolp input-method))
1568 0 : (setq input-method (symbol-name input-method)))
1569 0 : (help-setup-xref (list #'describe-input-method
1570 0 : (or input-method current-input-method))
1571 0 : (called-interactively-p 'interactive))
1572 :
1573 0 : (if (null input-method)
1574 0 : (describe-current-input-method)
1575 0 : (let ((current current-input-method))
1576 0 : (condition-case nil
1577 0 : (progn
1578 0 : (save-excursion
1579 0 : (activate-input-method input-method)
1580 0 : (describe-current-input-method))
1581 0 : (activate-input-method current))
1582 : (error
1583 0 : (activate-input-method current)
1584 0 : (help-setup-xref (list #'describe-input-method input-method)
1585 0 : (called-interactively-p 'interactive))
1586 0 : (with-output-to-temp-buffer (help-buffer)
1587 0 : (let ((elt (assoc input-method input-method-alist)))
1588 0 : (princ (format-message
1589 : "Input method: %s (`%s' in mode line) for %s\n %s\n"
1590 0 : input-method (nth 3 elt) (nth 1 elt) (nth 4 elt))))))))))
1591 :
1592 : (defun describe-current-input-method ()
1593 : "Describe the input method currently in use.
1594 : This is a subroutine for `describe-input-method'."
1595 0 : (if current-input-method
1596 0 : (if (and (symbolp describe-current-input-method-function)
1597 0 : (fboundp describe-current-input-method-function))
1598 0 : (funcall describe-current-input-method-function)
1599 0 : (message "No way to describe the current input method `%s'"
1600 0 : current-input-method)
1601 0 : (ding))
1602 0 : (error "No input method is activated now")))
1603 :
1604 : (defun read-multilingual-string (prompt &optional initial-input input-method)
1605 : "Read a multilingual string from minibuffer, prompting with string PROMPT.
1606 : The input method selected last time is activated in minibuffer.
1607 : If optional second argument INITIAL-INPUT is non-nil, insert it in the
1608 : minibuffer initially.
1609 : Optional 3rd argument INPUT-METHOD specifies the input method to be activated
1610 : instead of the one selected last time. It is a symbol or a string."
1611 0 : (setq input-method
1612 0 : (or input-method
1613 0 : current-input-method
1614 0 : default-input-method
1615 0 : (read-input-method-name "Input method: " nil t)))
1616 0 : (if (and input-method (symbolp input-method))
1617 0 : (setq input-method (symbol-name input-method)))
1618 0 : (let ((prev-input-method current-input-method))
1619 0 : (unwind-protect
1620 0 : (progn
1621 0 : (activate-input-method input-method)
1622 0 : (read-string prompt initial-input nil nil t))
1623 0 : (activate-input-method prev-input-method))))
1624 :
1625 : ;; Variables to control behavior of input methods. All input methods
1626 : ;; should react to these variables.
1627 :
1628 : (defcustom input-method-verbose-flag 'default
1629 : "A flag to control extra guidance given by input methods.
1630 : The value should be nil, t, `complex-only', or `default'.
1631 :
1632 : The extra guidance is done by showing list of available keys in echo
1633 : area. When you use the input method in the minibuffer, the guidance
1634 : is shown at the bottom short window (split from the existing window).
1635 :
1636 : If the value is t, extra guidance is always given, if the value is
1637 : nil, extra guidance is always suppressed.
1638 :
1639 : If the value is `complex-only', only complex input methods such as
1640 : `chinese-py' and `japanese' give extra guidance.
1641 :
1642 : If the value is `default', complex input methods always give extra
1643 : guidance, but simple input methods give it only when you are not in
1644 : the minibuffer.
1645 :
1646 : See also the variable `input-method-highlight-flag'."
1647 : :type '(choice (const :tag "Always" t) (const :tag "Never" nil)
1648 : (const complex-only) (const default))
1649 : :group 'mule)
1650 :
1651 : (defcustom input-method-highlight-flag t
1652 : "If this flag is non-nil, input methods highlight partially-entered text.
1653 : For instance, while you are in the middle of a Quail input method sequence,
1654 : the text inserted so far is temporarily underlined.
1655 : The underlining goes away when you finish or abort the input method sequence.
1656 : See also the variable `input-method-verbose-flag'."
1657 : :type 'boolean
1658 : :group 'mule)
1659 :
1660 : (defcustom input-method-activate-hook nil
1661 : "Normal hook run just after an input method is activated.
1662 :
1663 : The variable `current-input-method' keeps the input method name
1664 : just activated."
1665 : :type 'hook
1666 : :group 'mule)
1667 :
1668 : (define-obsolete-variable-alias
1669 : 'input-method-inactivate-hook
1670 : 'input-method-deactivate-hook "24.3")
1671 :
1672 : (defcustom input-method-deactivate-hook nil
1673 : "Normal hook run just after an input method is deactivated.
1674 :
1675 : The variable `current-input-method' still keeps the input method name
1676 : just deactivated."
1677 : :type 'hook
1678 : :group 'mule
1679 : :version "24.3")
1680 :
1681 : (defcustom input-method-after-insert-chunk-hook nil
1682 : "Normal hook run just after an input method insert some chunk of text."
1683 : :type 'hook
1684 : :group 'mule)
1685 :
1686 : (defvar input-method-exit-on-first-char nil
1687 : "This flag controls when an input method returns.
1688 : Usually, the input method does not return while there's a possibility
1689 : that it may find a different translation if a user types another key.
1690 : But, if this flag is non-nil, the input method returns as soon as the
1691 : current key sequence gets long enough to have some valid translation.")
1692 :
1693 : (defcustom input-method-use-echo-area nil
1694 : "This flag controls how an input method shows an intermediate key sequence.
1695 : Usually, the input method inserts the intermediate key sequence,
1696 : or candidate translations corresponding to the sequence,
1697 : at point in the current buffer.
1698 : But, if this flag is non-nil, it displays them in echo area instead."
1699 : :type 'boolean
1700 : :group 'mule)
1701 :
1702 : (defvar input-method-exit-on-invalid-key nil
1703 : "This flag controls the behavior of an input method on invalid key input.
1704 : Usually, when a user types a key which doesn't start any character
1705 : handled by the input method, the key is handled by turning off the
1706 : input method temporarily. After that key, the input method is re-enabled.
1707 : But, if this flag is non-nil, the input method is never back on.")
1708 :
1709 :
1710 : (defcustom set-language-environment-hook nil
1711 : "Normal hook run after some language environment is set.
1712 :
1713 : When you set some hook function here, that effect usually should not
1714 : be inherited to another language environment. So, you had better set
1715 : another function in `exit-language-environment-hook' (which see) to
1716 : cancel the effect."
1717 : :type 'hook
1718 : :group 'mule)
1719 :
1720 : (defcustom exit-language-environment-hook nil
1721 : "Normal hook run after exiting from some language environment.
1722 : When this hook is run, the variable `current-language-environment'
1723 : is still bound to the language environment being exited.
1724 :
1725 : This hook is mainly used for canceling the effect of
1726 : `set-language-environment-hook' (which see)."
1727 : :type 'hook
1728 : :group 'mule)
1729 :
1730 : (put 'setup-specified-language-environment 'apropos-inhibit t)
1731 :
1732 : (defun setup-specified-language-environment ()
1733 : "Switch to a specified language environment."
1734 : (interactive)
1735 0 : (let (language-name)
1736 0 : (if (and (symbolp last-command-event)
1737 0 : (or (not (eq last-command-event 'Default))
1738 0 : (setq last-command-event 'English))
1739 0 : (setq language-name (symbol-name last-command-event)))
1740 0 : (prog1
1741 0 : (set-language-environment language-name)
1742 0 : (customize-mark-as-set 'current-language-environment))
1743 0 : (error "Bogus calling sequence"))))
1744 :
1745 : (defcustom current-language-environment "English"
1746 : "The last language environment specified with `set-language-environment'.
1747 : This variable should be set only with \\[customize], which is equivalent
1748 : to using the function `set-language-environment'."
1749 : :link '(custom-manual "(emacs)Language Environments")
1750 : :set (lambda (_symbol value) (set-language-environment value))
1751 : :get (lambda (_x)
1752 : (or (car-safe (assoc-string
1753 : (if (symbolp current-language-environment)
1754 : (symbol-name current-language-environment)
1755 : current-language-environment)
1756 : language-info-alist t))
1757 : "English"))
1758 : ;; custom type will be updated with `set-language-info'.
1759 : :type (if language-info-alist
1760 : (cons 'choice (mapcar
1761 : (lambda (lang)
1762 : (list 'const lang))
1763 : (sort (mapcar 'car language-info-alist) 'string<)))
1764 : 'string)
1765 : :initialize 'custom-initialize-default
1766 : :group 'mule)
1767 :
1768 : (defun reset-language-environment ()
1769 : "Reset multilingual environment of Emacs to the default status.
1770 :
1771 : The default status is as follows:
1772 :
1773 : The default value of `buffer-file-coding-system' is nil.
1774 : The default coding system for process I/O is nil.
1775 : The default value for the command `set-terminal-coding-system' is nil.
1776 : The default value for the command `set-keyboard-coding-system' is nil.
1777 :
1778 : The order of priorities of coding systems are as follows:
1779 : utf-8
1780 : iso-2022-7bit
1781 : iso-latin-1
1782 : iso-2022-7bit-lock
1783 : iso-2022-8bit-ss2
1784 : emacs-mule
1785 : raw-text"
1786 : (interactive)
1787 : ;; This function formerly set default-enable-multibyte-characters to t,
1788 : ;; but that is incorrect. It should not alter the unibyte/multibyte choice.
1789 :
1790 2 : (set-coding-system-priority
1791 : 'utf-8
1792 : 'iso-2022-7bit
1793 : 'iso-latin-1
1794 : 'iso-2022-7bit-lock
1795 : 'iso-2022-8bit-ss2
1796 : 'emacs-mule
1797 2 : 'raw-text)
1798 :
1799 2 : (set-default-coding-systems nil)
1800 2 : (setq default-sendmail-coding-system 'iso-latin-1)
1801 : ;; On Darwin systems, this should be utf-8-unix, but when this file is loaded
1802 : ;; that is not yet defined, so we set it in set-locale-environment instead.
1803 2 : (setq default-file-name-coding-system 'iso-latin-1-unix)
1804 : ;; Preserve eol-type from existing default-process-coding-systems.
1805 : ;; On non-unix-like systems in particular, these may have been set
1806 : ;; carefully by the user, or by the startup code, to deal with the
1807 : ;; users shell appropriately, so should not be altered by changing
1808 : ;; language environment.
1809 2 : (let ((output-coding
1810 : ;; When bootstrapping, coding-systems are not defined yet, so
1811 : ;; we need to catch the error from check-coding-system.
1812 2 : (condition-case nil
1813 2 : (coding-system-change-text-conversion
1814 2 : (car default-process-coding-system) 'undecided)
1815 2 : (coding-system-error 'undecided)))
1816 : (input-coding
1817 2 : (condition-case nil
1818 2 : (coding-system-change-text-conversion
1819 2 : (cdr default-process-coding-system) 'iso-latin-1)
1820 2 : (coding-system-error 'iso-latin-1))))
1821 2 : (setq default-process-coding-system
1822 2 : (cons output-coding input-coding)))
1823 :
1824 : ;; Put the highest priority to the charset iso-8859-1 to prefer the
1825 : ;; registry iso8859-1 over iso8859-2 in font selection. It also
1826 : ;; makes unibyte-display-via-language-environment to use iso-8859-1
1827 : ;; as the unibyte charset.
1828 2 : (set-charset-priority 'iso-8859-1)
1829 :
1830 : ;; Don't alter the terminal and keyboard coding systems here.
1831 : ;; The terminal still supports the same coding system
1832 : ;; that it supported a minute ago.
1833 : ;; (set-terminal-coding-system-internal nil)
1834 : ;; (set-keyboard-coding-system-internal nil)
1835 :
1836 : ;; Back in Emacs-20, it was necessary to provide some fallback implicit
1837 : ;; conversion, because almost no packages handled coding-system issues.
1838 : ;; Nowadays it'd just paper over bugs.
1839 : ;; (set-unibyte-charset 'iso-8859-1)
1840 : )
1841 :
1842 : (reset-language-environment)
1843 :
1844 : (defun set-display-table-and-terminal-coding-system (language-name &optional coding-system display)
1845 : "Set up the display table and terminal coding system for LANGUAGE-NAME."
1846 0 : (let ((coding (get-language-info language-name 'unibyte-display)))
1847 0 : (if (and coding
1848 0 : (or (not coding-system)
1849 0 : (coding-system-equal coding coding-system)))
1850 0 : (standard-display-european-internal)
1851 : ;; The following 2 lines undo the 8-bit display that we set up
1852 : ;; in standard-display-european-internal, which see. This is in
1853 : ;; case the user has used standard-display-european earlier in
1854 : ;; this session.
1855 0 : (when standard-display-table
1856 0 : (dotimes (i 128)
1857 0 : (aset standard-display-table (+ i 128) nil))))
1858 0 : (set-terminal-coding-system (or coding-system coding) display)))
1859 :
1860 : (defun set-language-environment (language-name)
1861 : "Set up multilingual environment for using LANGUAGE-NAME.
1862 : This sets the coding system priority and the default input method
1863 : and sometimes other things. LANGUAGE-NAME should be a string
1864 : which is the name of a language environment. For example, \"Latin-1\"
1865 : specifies the character set for the major languages of Western Europe.
1866 :
1867 : If there is a prior value for `current-language-environment', this
1868 : runs the hook `exit-language-environment-hook'. After setting up
1869 : the new language environment, it runs `set-language-environment-hook'."
1870 0 : (interactive (list (read-language-name
1871 : nil
1872 0 : "Set language environment (default English): ")))
1873 1 : (if language-name
1874 1 : (if (symbolp language-name)
1875 1 : (setq language-name (symbol-name language-name)))
1876 1 : (setq language-name "English"))
1877 1 : (let ((slot (assoc-string language-name language-info-alist t)))
1878 1 : (unless slot
1879 1 : (error "Language environment not defined: %S" language-name))
1880 1 : (setq language-name (car slot)))
1881 1 : (if current-language-environment
1882 1 : (let ((func (get-language-info current-language-environment
1883 1 : 'exit-function)))
1884 1 : (run-hooks 'exit-language-environment-hook)
1885 1 : (if (functionp func) (funcall func))))
1886 :
1887 1 : (reset-language-environment)
1888 : ;; The features might set up coding systems.
1889 1 : (let ((required-features (get-language-info language-name 'features)))
1890 1 : (while required-features
1891 0 : (require (car required-features))
1892 1 : (setq required-features (cdr required-features))))
1893 :
1894 1 : (setq current-language-environment language-name)
1895 :
1896 1 : (set-language-environment-coding-systems language-name)
1897 1 : (set-language-environment-input-method language-name)
1898 1 : (set-language-environment-nonascii-translation language-name)
1899 1 : (set-language-environment-charset language-name)
1900 : ;; Unibyte setups if necessary.
1901 1 : (unless (default-value 'enable-multibyte-characters)
1902 1 : (set-language-environment-unibyte language-name))
1903 :
1904 1 : (let ((func (get-language-info language-name 'setup-function)))
1905 1 : (if (functionp func)
1906 1 : (funcall func)))
1907 :
1908 1 : (setq current-iso639-language
1909 1 : (or (get-language-info language-name 'iso639-language)
1910 1 : current-iso639-language))
1911 :
1912 1 : (run-hooks 'set-language-environment-hook)
1913 1 : (force-mode-line-update t))
1914 :
1915 : (define-widget 'charset 'symbol
1916 : "An Emacs charset."
1917 : :tag "Charset"
1918 : :completions
1919 : (lambda (string pred action)
1920 : (let ((completion-ignore-case t))
1921 : (completion-table-with-predicate
1922 : obarray #'charsetp 'strict string pred action)))
1923 : :value 'ascii
1924 : :validate (lambda (widget)
1925 : (unless (charsetp (widget-value widget))
1926 : (widget-put widget :error (format "Invalid charset: %S"
1927 : (widget-value widget)))
1928 : widget))
1929 : :prompt-history 'charset-history)
1930 :
1931 : (defcustom language-info-custom-alist nil
1932 : "Customizations of language environment parameters.
1933 : Value is an alist with elements like those of `language-info-alist'.
1934 : These are used to set values in `language-info-alist' which replace
1935 : the defaults. A typical use is replacing the default input method for
1936 : the environment. Use \\[describe-language-environment] to find the environment's settings.
1937 :
1938 : This option is intended for use at startup. Removing items doesn't
1939 : remove them from the language info until you next restart Emacs.
1940 :
1941 : Setting this variable directly does not take effect.
1942 : See `set-language-info-alist' for use in programs."
1943 : :group 'mule
1944 : :version "23.1"
1945 : :set (lambda (s v)
1946 : (custom-set-default s v)
1947 : ;; Can't do this before language environments are set up.
1948 : (when v
1949 : ;; modify language-info-alist
1950 : (dolist (elt v)
1951 : (set-language-info-alist (car elt) (cdr elt)))
1952 : ;; re-set the environment in case its parameters changed
1953 : (set-language-environment current-language-environment)))
1954 : :type `(alist
1955 : :key-type (string :tag "Language environment"
1956 : :completions
1957 : (lambda (string pred action)
1958 : (let ((completion-ignore-case t))
1959 : (complete-with-action
1960 : action language-info-alist string pred))))
1961 : :value-type
1962 : (alist :key-type symbol
1963 : :options ((documentation string)
1964 : (charset (repeat charset))
1965 : (sample-text string)
1966 : (setup-function function)
1967 : (exit-function function)
1968 : (coding-system (repeat coding-system))
1969 : (coding-priority (repeat coding-system))
1970 : (nonascii-translation charset)
1971 : (input-method mule-input-method-string)
1972 : (features (repeat symbol))
1973 : (unibyte-display coding-system)))))
1974 :
1975 : (declare-function x-server-vendor "xfns.c" (&optional terminal))
1976 : (declare-function x-server-version "xfns.c" (&optional terminal))
1977 :
1978 : (defun standard-display-european-internal ()
1979 : ;; Actually set up direct output of non-ASCII characters.
1980 0 : (standard-display-8bit (if (eq window-system 'pc) 128 160) 255)
1981 : ;; Unibyte Emacs on MS-DOS wants to display all 8-bit characters with
1982 : ;; the native font, and codes 160 and 146 stand for something very
1983 : ;; different there.
1984 0 : (or (and (eq window-system 'pc) (not (default-value
1985 0 : 'enable-multibyte-characters)))
1986 0 : (progn
1987 : ;; Most X fonts used to do the wrong thing for latin-1 code 160.
1988 0 : (unless (and (eq window-system 'x)
1989 : ;; XFree86 4 has fixed the fonts.
1990 0 : (string= "The XFree86 Project, Inc" (x-server-vendor))
1991 0 : (> (aref (number-to-string (nth 2 (x-server-version))) 0)
1992 0 : ?3))
1993 : ;; Make non-line-break space display as a plain space.
1994 0 : (aset standard-display-table (unibyte-char-to-multibyte 160) [32]))
1995 : ;; Most Windows programs send out apostrophes as \222. Most X fonts
1996 : ;; don't contain a character at that position. Map it to the ASCII
1997 : ;; apostrophe. [This is actually RIGHT SINGLE QUOTATION MARK,
1998 : ;; U+2019, normally from the windows-1252 character set. XFree 4
1999 : ;; fonts probably have the appropriate glyph at this position,
2000 : ;; so they could use standard-display-8bit. It's better to use a
2001 : ;; proper windows-1252 coding system. --fx]
2002 0 : (aset standard-display-table (unibyte-char-to-multibyte 146) [39]))))
2003 :
2004 : (defun set-language-environment-coding-systems (language-name)
2005 : "Do various coding system setups for language environment LANGUAGE-NAME."
2006 1 : (let* ((priority (get-language-info language-name 'coding-priority))
2007 1 : (default-coding (car priority))
2008 : ;; If the default buffer-file-coding-system is nil, don't use
2009 : ;; coding-system-eol-type, because it treats nil as
2010 : ;; `no-conversion'. The default buffer-file-coding-system is set
2011 : ;; to nil by reset-language-environment, and in that case we
2012 : ;; want to have here the native EOL type for each platform.
2013 : ;; FIXME: there should be a common code that runs both on
2014 : ;; startup and here to set the default EOL type correctly.
2015 : ;; Right now, DOS/Windows platforms set this on dos-w32.el,
2016 : ;; which works only as long as the order of loading files at
2017 : ;; dump time and calling functions at startup is not modified
2018 : ;; significantly, i.e. as long as this function is called
2019 : ;; _after_ the default buffer-file-coding-system was set by
2020 : ;; dos-w32.el.
2021 : (eol-type
2022 1 : (coding-system-eol-type
2023 1 : (or (default-value 'buffer-file-coding-system)
2024 1 : (if (memq system-type '(windows-nt ms-dos)) 'dos 'unix)))))
2025 1 : (when priority
2026 0 : (set-default-coding-systems
2027 0 : (if (memq eol-type '(0 1 2 unix dos mac))
2028 0 : (coding-system-change-eol-conversion default-coding eol-type)
2029 0 : default-coding))
2030 0 : (setq default-sendmail-coding-system default-coding)
2031 1 : (apply 'set-coding-system-priority priority))))
2032 :
2033 : (defun set-language-environment-input-method (language-name)
2034 : "Do various input method setups for language environment LANGUAGE-NAME."
2035 1 : (let ((input-method (get-language-info language-name 'input-method)))
2036 1 : (when input-method
2037 0 : (setq default-input-method input-method)
2038 0 : (if input-method-history
2039 0 : (setq input-method-history
2040 0 : (cons input-method
2041 1 : (delete input-method input-method-history)))))))
2042 :
2043 : (defun set-language-environment-nonascii-translation (language-name)
2044 : "Do unibyte/multibyte translation setup for language environment LANGUAGE-NAME."
2045 : ;; Note: For DOS, we assumed that the charset cpXXX is already
2046 : ;; defined.
2047 1 : (let ((nonascii (get-language-info language-name 'nonascii-translation)))
2048 1 : (if (eq window-system 'pc)
2049 1 : (setq nonascii (intern (format "cp%d" dos-codepage))))
2050 1 : (or (and (charsetp nonascii)
2051 1 : (get-charset-property nonascii :ascii-compatible-p))
2052 1 : (setq nonascii 'iso-8859-1))
2053 : ;; Back in Emacs-20, it was necessary to provide some fallback implicit
2054 : ;; conversion, because almost no packages handled coding-system issues.
2055 : ;; Nowadays it'd just paper over bugs.
2056 : ;; (set-unibyte-charset nonascii)
2057 1 : ))
2058 :
2059 : (defun set-language-environment-charset (language-name)
2060 : "Do various charset setups for language environment LANGUAGE-NAME."
2061 : ;; Put higher priorities to such charsets that are supported by the
2062 : ;; coding systems of higher priorities in this environment.
2063 1 : (let ((charsets (get-language-info language-name 'charset)))
2064 1 : (dolist (coding (get-language-info language-name 'coding-priority))
2065 0 : (let ((list (coding-system-charset-list coding)))
2066 0 : (if (consp list)
2067 1 : (setq charsets (append charsets list)))))
2068 1 : (if charsets
2069 1 : (apply 'set-charset-priority charsets))))
2070 :
2071 : (defun set-language-environment-unibyte (language-name)
2072 : "Do various unibyte-mode setups for language environment LANGUAGE-NAME."
2073 0 : (set-display-table-and-terminal-coding-system language-name))
2074 :
2075 : (defun princ-list (&rest args)
2076 : "Print all arguments with `princ', then print \"\\n\"."
2077 : (declare (obsolete "use mapc and princ instead." "23.3"))
2078 0 : (mapc #'princ args)
2079 0 : (princ "\n"))
2080 :
2081 : (put 'describe-specified-language-support 'apropos-inhibit t)
2082 :
2083 : ;; Print language-specific information such as input methods,
2084 : ;; charsets, and coding systems. This function is intended to be
2085 : ;; called from the menu:
2086 : ;; [menu-bar mule describe-language-environment LANGUAGE]
2087 : ;; and should not run it by `M-x describe-current-input-method-function'.
2088 : (defun describe-specified-language-support ()
2089 : "Describe how Emacs supports the specified language environment."
2090 : (interactive)
2091 0 : (let (language-name)
2092 0 : (if (not (and (symbolp last-command-event)
2093 0 : (or (not (eq last-command-event 'Default))
2094 0 : (setq last-command-event 'English))
2095 0 : (setq language-name (symbol-name last-command-event))))
2096 0 : (error "This command should only be called from the menu bar"))
2097 0 : (describe-language-environment language-name)))
2098 :
2099 : (defun describe-language-environment (language-name)
2100 : "Describe how Emacs supports language environment LANGUAGE-NAME."
2101 : (interactive
2102 0 : (list (read-language-name
2103 : 'documentation
2104 0 : "Describe language environment (default current choice): ")))
2105 0 : (if (null language-name)
2106 0 : (setq language-name current-language-environment))
2107 0 : (if (or (null language-name)
2108 0 : (null (get-language-info language-name 'documentation)))
2109 0 : (error "No documentation for the specified language"))
2110 0 : (if (symbolp language-name)
2111 0 : (setq language-name (symbol-name language-name)))
2112 0 : (dolist (feature (get-language-info language-name 'features))
2113 0 : (require feature))
2114 0 : (let ((doc (get-language-info language-name 'documentation)))
2115 0 : (help-setup-xref (list #'describe-language-environment language-name)
2116 0 : (called-interactively-p 'interactive))
2117 0 : (with-output-to-temp-buffer (help-buffer)
2118 0 : (with-current-buffer standard-output
2119 0 : (insert language-name " language environment\n\n")
2120 0 : (if (stringp doc)
2121 0 : (insert (substitute-command-keys doc) "\n\n"))
2122 0 : (condition-case nil
2123 0 : (let ((str (eval (get-language-info language-name 'sample-text))))
2124 0 : (if (stringp str)
2125 0 : (insert "Sample text:\n "
2126 0 : (replace-regexp-in-string "\n" "\n " str)
2127 0 : "\n\n")))
2128 0 : (error nil))
2129 0 : (let ((input-method (get-language-info language-name 'input-method))
2130 0 : (l (copy-sequence input-method-alist))
2131 : (first t))
2132 0 : (when (and input-method
2133 0 : (setq input-method (assoc input-method l)))
2134 0 : (insert "Input methods (default " (car input-method) ")\n")
2135 0 : (setq l (cons input-method (delete input-method l))
2136 0 : first nil))
2137 0 : (dolist (elt l)
2138 0 : (when (or (eq input-method elt)
2139 0 : (eq t (compare-strings language-name nil nil
2140 0 : (nth 1 elt) nil nil t)))
2141 0 : (when first
2142 0 : (insert "Input methods:\n")
2143 0 : (setq first nil))
2144 0 : (insert " " (car elt))
2145 0 : (search-backward (car elt))
2146 0 : (help-xref-button 0 'help-input-method (car elt))
2147 0 : (goto-char (point-max))
2148 0 : (insert " (\""
2149 0 : (if (stringp (nth 3 elt)) (nth 3 elt) (car (nth 3 elt)))
2150 0 : "\" in mode line)\n")))
2151 0 : (or first
2152 0 : (insert "\n")))
2153 0 : (insert "Character sets:\n")
2154 0 : (let ((l (get-language-info language-name 'charset)))
2155 0 : (if (null l)
2156 0 : (insert " nothing specific to " language-name "\n")
2157 0 : (while l
2158 0 : (insert " " (symbol-name (car l)))
2159 0 : (search-backward (symbol-name (car l)))
2160 0 : (help-xref-button 0 'help-character-set (car l))
2161 0 : (goto-char (point-max))
2162 0 : (insert ": " (charset-description (car l)) "\n")
2163 0 : (setq l (cdr l)))))
2164 0 : (insert "\n")
2165 0 : (insert "Coding systems:\n")
2166 0 : (let ((l (get-language-info language-name 'coding-system)))
2167 0 : (if (null l)
2168 0 : (insert " nothing specific to " language-name "\n")
2169 0 : (while l
2170 0 : (insert " " (symbol-name (car l)))
2171 0 : (search-backward (symbol-name (car l)))
2172 0 : (help-xref-button 0 'help-coding-system (car l))
2173 0 : (goto-char (point-max))
2174 0 : (insert (substitute-command-keys " (`")
2175 0 : (coding-system-mnemonic (car l))
2176 0 : (substitute-command-keys "' in mode line):\n\t")
2177 0 : (substitute-command-keys
2178 0 : (coding-system-doc-string (car l)))
2179 0 : "\n")
2180 0 : (let ((aliases (coding-system-aliases (car l))))
2181 0 : (when aliases
2182 0 : (insert "\t(alias:")
2183 0 : (while aliases
2184 0 : (insert " " (symbol-name (car aliases)))
2185 0 : (setq aliases (cdr aliases)))
2186 0 : (insert ")\n")))
2187 0 : (setq l (cdr l)))))))))
2188 :
2189 : ;;; Locales.
2190 :
2191 : (defvar locale-translation-file-name nil
2192 : "File name for the system's file of locale-name aliases, or nil if none.")
2193 :
2194 : ;; The following definitions might as well be marked as constants and
2195 : ;; purecopied, since they're normally used on startup, and probably
2196 : ;; should reflect the facilities of the base Emacs.
2197 : (defconst locale-language-names
2198 : (purecopy
2199 : '(
2200 : ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
2201 : ;; as specified in the Single Unix Spec, Version 2.
2202 : ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
2203 : ;; with additions from ISO 639/RA Newsletter No.1/1989;
2204 : ;; see Internet RFC 2165 (1997-06) and
2205 : ;; http://www.evertype.com/standards/iso639/iso639-en.html
2206 : ;; TERRITORY is a country code taken from ISO 3166
2207 : ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
2208 : ;; CODESET and MODIFIER are implementation-dependent.
2209 :
2210 : ;; jasonr comments: MS Windows uses three letter codes for
2211 : ;; languages instead of the two letter ISO codes that POSIX
2212 : ;; uses. In most cases the first two letters are the same, so
2213 : ;; most of the regexps in locale-language-names work. Japanese
2214 : ;; and Chinese are exceptions, which are listed in the
2215 : ;; non-standard section at the bottom of locale-language-names.
2216 :
2217 : ("aa_DJ" . "Latin-1") ; Afar
2218 : ("aa" . "UTF-8")
2219 : ;; ab Abkhazian
2220 : ("af" . "Latin-1") ; Afrikaans
2221 : ("am" "Ethiopic" utf-8) ; Amharic
2222 : ("an" . "Latin-9") ; Aragonese
2223 : ("ar" . "Arabic")
2224 : ; as Assamese
2225 : ; ay Aymara
2226 : ("az" . "UTF-8") ; Azerbaijani
2227 : ; ba Bashkir
2228 : ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
2229 : ("bg" "Bulgarian" cp1251) ; Bulgarian
2230 : ; bh Bihari
2231 : ; bi Bislama
2232 : ("bn" . "UTF-8") ; Bengali, Bangla
2233 : ("bo" . "Tibetan")
2234 : ("br" . "Latin-1") ; Breton
2235 : ("bs" . "Latin-2") ; Bosnian
2236 : ("byn" . "UTF-8") ; Bilin; Blin
2237 : ("ca" "Catalan" iso-8859-1) ; Catalan
2238 : ; co Corsican
2239 : ("cs" "Czech" iso-8859-2)
2240 : ("cy" "Welsh" iso-8859-14)
2241 : ("da" . "Latin-1") ; Danish
2242 : ("de" "German" iso-8859-1)
2243 : ; dv Divehi
2244 : ; dz Bhutani
2245 : ("el" "Greek" iso-8859-7)
2246 : ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
2247 : ;; That's actually what the GNU locales define, modulo things like
2248 : ;; en_IN -- fx.
2249 : ("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
2250 : ("en" "English" iso-8859-1) ; English
2251 : ("eo" . "Esperanto") ; Esperanto
2252 : ("es" "Spanish" iso-8859-1)
2253 : ("et" . "Latin-1") ; Estonian
2254 : ("eu" . "Latin-1") ; Basque
2255 : ("fa" . "UTF-8") ; Persian
2256 : ("fi" . "Latin-1") ; Finnish
2257 : ("fj" . "Latin-1") ; Fiji
2258 : ("fo" . "Latin-1") ; Faroese
2259 : ("fr" "French" iso-8859-1) ; French
2260 : ("fy" . "Latin-1") ; Frisian
2261 : ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
2262 : ("gd" . "Latin-9") ; Scots Gaelic
2263 : ("gez" "Ethiopic" utf-8) ; Geez
2264 : ("gl" . "Latin-1") ; Gallegan; Galician
2265 : ; gn Guarani
2266 : ("gu" . "UTF-8") ; Gujarati
2267 : ("gv" . "Latin-1") ; Manx Gaelic
2268 : ; ha Hausa
2269 : ("he" "Hebrew" iso-8859-8)
2270 : ("hi" "Devanagari" utf-8) ; Hindi
2271 : ("hr" "Croatian" iso-8859-2) ; Croatian
2272 : ("hu" . "Latin-2") ; Hungarian
2273 : ; hy Armenian
2274 : ; ia Interlingua
2275 : ("id" . "Latin-1") ; Indonesian
2276 : ; ie Interlingue
2277 : ; ik Inupiak
2278 : ("is" . "Latin-1") ; Icelandic
2279 : ("it" "Italian" iso-8859-1) ; Italian
2280 : ; iu Inuktitut
2281 : ("iw" "Hebrew" iso-8859-8)
2282 : ("ja" "Japanese" euc-jp)
2283 : ; jw Javanese
2284 : ("ka" "Georgian" georgian-ps) ; Georgian
2285 : ; kk Kazakh
2286 : ("kl" . "Latin-1") ; Greenlandic
2287 : ; km Cambodian
2288 : ("kn" "Kannada" utf-8)
2289 : ("ko" "Korean" euc-kr)
2290 : ; ks Kashmiri
2291 : ; ku Kurdish
2292 : ("kw" . "Latin-1") ; Cornish
2293 : ; ky Kirghiz
2294 : ("la" . "Latin-1") ; Latin
2295 : ("lb" . "Latin-1") ; Luxemburgish
2296 : ("lg" . "Laint-6") ; Ganda
2297 : ; ln Lingala
2298 : ("lo" "Lao" utf-8) ; Laothian
2299 : ("lt" "Lithuanian" iso-8859-13)
2300 : ("lv" . "Latvian") ; Latvian, Lettish
2301 : ; mg Malagasy
2302 : ("mi" . "Latin-7") ; Maori
2303 : ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
2304 : ("ml" "Malayalam" utf-8)
2305 : ("mn" . "UTF-8") ; Mongolian
2306 : ; mo Moldavian
2307 : ("mr" "Devanagari" utf-8) ; Marathi
2308 : ("ms" . "Latin-1") ; Malay
2309 : ("mt" . "Latin-3") ; Maltese
2310 : ; my Burmese
2311 : ; na Nauru
2312 : ("nb" . "Latin-1") ; Norwegian
2313 : ("ne" "Devanagari" utf-8) ; Nepali
2314 : ("nl" "Dutch" iso-8859-1)
2315 : ("no" . "Latin-1") ; Norwegian
2316 : ("oc" . "Latin-1") ; Occitan
2317 : ("om_ET" . "UTF-8") ; (Afan) Oromo
2318 : ("om" . "Latin-1") ; (Afan) Oromo
2319 : ; or Oriya
2320 : ("pa" . "UTF-8") ; Punjabi
2321 : ("pl" . "Latin-2") ; Polish
2322 : ; ps Pashto, Pushto
2323 : ("pt" . "Latin-1") ; Portuguese
2324 : ; qu Quechua
2325 : ("rm" . "Latin-1") ; Rhaeto-Romanic
2326 : ; rn Kirundi
2327 : ("ro" "Romanian" iso-8859-2)
2328 : ("ru_RU" "Russian" iso-8859-5)
2329 : ("ru_UA" "Russian" koi8-u)
2330 : ; rw Kinyarwanda
2331 : ("sa" . "Devanagari") ; Sanskrit
2332 : ; sd Sindhi
2333 : ("se" . "UTF-8") ; Northern Sami
2334 : ; sg Sangho
2335 : ("sh" . "Latin-2") ; Serbo-Croatian
2336 : ; si Sinhalese
2337 : ("sid" . "UTF-8") ; Sidamo
2338 : ("sk" "Slovak" iso-8859-2)
2339 : ("sl" "Slovenian" iso-8859-2)
2340 : ; sm Samoan
2341 : ; sn Shona
2342 : ("so_ET" "UTF-8") ; Somali
2343 : ("so" "Latin-1") ; Somali
2344 : ("sq" . "Latin-1") ; Albanian
2345 : ("sr" . "Latin-2") ; Serbian (Latin alphabet)
2346 : ; ss Siswati
2347 : ("st" . "Latin-1") ; Sesotho
2348 : ; su Sundanese
2349 : ("sv" "Swedish" iso-8859-1) ; Swedish
2350 : ("sw" . "Latin-1") ; Swahili
2351 : ("ta" "Tamil" utf-8)
2352 : ("te" . "UTF-8") ; Telugu
2353 : ("tg" "Tajik" koi8-t)
2354 : ("th" "Thai" tis-620)
2355 : ("ti" "Ethiopic" utf-8) ; Tigrinya
2356 : ("tig_ER" . "UTF-8") ; Tigre
2357 : ; tk Turkmen
2358 : ("tl" . "Latin-1") ; Tagalog
2359 : ; tn Setswana
2360 : ; to Tonga
2361 : ("tr" "Turkish" iso-8859-9)
2362 : ; ts Tsonga
2363 : ("tt" . "UTF-8") ; Tatar
2364 : ; tw Twi
2365 : ; ug Uighur
2366 : ("uk" "Ukrainian" koi8-u)
2367 : ("ur" . "UTF-8") ; Urdu
2368 : ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
2369 : ("uz" . "Latin-1") ; Uzbek
2370 : ("vi" "Vietnamese" utf-8)
2371 : ; vo Volapuk
2372 : ("wa" . "Latin-1") ; Walloon
2373 : ; wo Wolof
2374 : ("xh" . "Latin-1") ; Xhosa
2375 : ("yi" . "Windows-1255") ; Yiddish
2376 : ; yo Yoruba
2377 : ; za Zhuang
2378 : ("zh_HK" . "Chinese-Big5")
2379 : ; zh_HK/BIG5-HKSCS \
2380 : ("zh_TW" . "Chinese-Big5")
2381 : ("zh_CN.GB2312" "Chinese-GB")
2382 : ("zh_CN.GBK" "Chinese-GBK")
2383 : ("zh_CN.GB18030" "Chinese-GB18030")
2384 : ("zh_CN.UTF-8" . "Chinese-GBK")
2385 : ("zh_CN" . "Chinese-GB")
2386 : ("zh" . "Chinese-GB")
2387 : ("zu" . "Latin-1") ; Zulu
2388 :
2389 : ;; ISO standard locales
2390 : ("c$" . "ASCII")
2391 : ("posix$" . "ASCII")
2392 :
2393 : ;; The "IPA" Emacs language environment does not correspond
2394 : ;; to any ISO 639 code, so let it stand for itself.
2395 : ("ipa$" . "IPA")
2396 :
2397 : ;; Nonstandard or obsolete language codes
2398 : ("cz" . "Czech") ; e.g. Solaris 2.6
2399 : ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4
2400 : ("iw" . "Hebrew") ; e.g. X11R6.4
2401 : ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
2402 : ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
2403 : ("jp" . "Japanese") ; e.g. MS Windows
2404 : ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
2405 : ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
2406 : ("gbz" . "UTF-8") ; MS Windows Dari Persian
2407 : ("div" . "UTF-8") ; MS Windows Divehi (Maldives)
2408 : ("wee" . "Latin-2") ; MS Windows Lower Sorbian
2409 : ("wen" . "Latin-2") ; MS Windows Upper Sorbian
2410 : ))
2411 : "Alist of locale regexps vs the corresponding languages and coding systems.
2412 : Each element has this form:
2413 : (LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
2414 : The first element whose LOCALE-REGEXP matches the start of a
2415 : downcased locale specifies the LANG-ENV \(language environment)
2416 : and CODING-SYSTEM corresponding to that locale. If there is no
2417 : appropriate language environment, the element may have this form:
2418 : (LOCALE-REGEXP . LANG-ENV)
2419 : In this case, LANG-ENV is one of generic language environments for an
2420 : specific encoding such as \"Latin-1\" and \"UTF-8\".")
2421 :
2422 : (defconst locale-charset-language-names
2423 : (purecopy
2424 : '((".*8859[-_]?1\\>" . "Latin-1")
2425 : (".*8859[-_]?2\\>" . "Latin-2")
2426 : (".*8859[-_]?3\\>" . "Latin-3")
2427 : (".*8859[-_]?4\\>" . "Latin-4")
2428 : (".*8859[-_]?9\\>" . "Latin-5")
2429 : (".*8859[-_]?14\\>" . "Latin-8")
2430 : (".*8859[-_]?15\\>" . "Latin-9")
2431 : (".*utf\\(?:-?8\\)?\\>" . "UTF-8")
2432 : ;; utf-8@euro exists, so put this last. (@euro really specifies
2433 : ;; the currency, rather than the charset.)
2434 : (".*@euro\\>" . "Latin-9")))
2435 : "List of pairs of locale regexps and charset language names.
2436 : The first element whose locale regexp matches the start of a downcased locale
2437 : specifies the language name whose charset corresponds to that locale.
2438 : This language name is used if the locale is not listed in
2439 : `locale-language-names'.")
2440 :
2441 : (defconst locale-preferred-coding-systems
2442 : (purecopy
2443 : '((".*8859[-_]?1\\>" . iso-8859-1)
2444 : (".*8859[-_]?2\\>" . iso-8859-2)
2445 : (".*8859[-_]?3\\>" . iso-8859-3)
2446 : (".*8859[-_]?4\\>" . iso-8859-4)
2447 : (".*8859[-_]?9\\>" . iso-8859-9)
2448 : (".*8859[-_]?14\\>" . iso-8859-14)
2449 : (".*8859[-_]?15\\>" . iso-8859-15)
2450 : (".*utf\\(?:-?8\\)?" . utf-8)
2451 : ;; utf-8@euro exists, so put this after utf-8. (@euro really
2452 : ;; specifies the currency, rather than the charset.)
2453 : (".*@euro" . iso-8859-15)
2454 : ("koi8-?r" . koi8-r)
2455 : ("koi8-?u" . koi8-u)
2456 : ("tcvn" . tcvn)
2457 : ("big5[-_]?hkscs" . big5-hkscs)
2458 : ("big5" . big5)
2459 : ("euc-?tw" . euc-tw)
2460 : ("euc-?cn" . euc-cn)
2461 : ("gb2312" . gb2312)
2462 : ("gbk" . gbk)
2463 : ("gb18030" . gb18030)
2464 : ("ja.*[._]euc" . japanese-iso-8bit)
2465 : ("ja.*[._]jis7" . iso-2022-jp)
2466 : ("ja.*[._]pck" . japanese-shift-jis)
2467 : ("ja.*[._]sjis" . japanese-shift-jis)
2468 : ("jpn" . japanese-shift-jis) ; MS-Windows uses this.
2469 : ))
2470 : "List of pairs of locale regexps and preferred coding systems.
2471 : The first element whose locale regexp matches the start of a downcased locale
2472 : specifies the coding system to prefer when using that locale.
2473 : This coding system is used if the locale specifies a specific charset.")
2474 :
2475 : (defun locale-name-match (key alist)
2476 : "Search for KEY in ALIST, which should be a list of regexp-value pairs.
2477 : Return the value corresponding to the first regexp that matches the
2478 : start of KEY, or nil if there is no match."
2479 0 : (let (element)
2480 0 : (while (and alist (not element))
2481 0 : (if (string-match-p (concat "\\`\\(?:" (car (car alist)) "\\)") key)
2482 0 : (setq element (car alist)))
2483 0 : (setq alist (cdr alist)))
2484 0 : (cdr element)))
2485 :
2486 : (defun locale-charset-match-p (charset1 charset2)
2487 : "Whether charset names (strings) CHARSET1 and CHARSET2 are equivalent.
2488 : Matching is done ignoring case and any hyphens and underscores in the
2489 : names. E.g. `ISO_8859-1' and `iso88591' both match `iso-8859-1'."
2490 0 : (setq charset1 (replace-regexp-in-string "[-_]" "" charset1))
2491 0 : (setq charset2 (replace-regexp-in-string "[-_]" "" charset2))
2492 0 : (eq t (compare-strings charset1 nil nil charset2 nil nil t)))
2493 :
2494 : (defvar locale-charset-alist nil
2495 : "Coding system alist keyed on locale-style charset name.
2496 : Used by `locale-charset-to-coding-system'.")
2497 :
2498 : (defun locale-charset-to-coding-system (charset)
2499 : "Find coding system corresponding to CHARSET.
2500 : CHARSET is any sort of non-Emacs charset name, such as might be used
2501 : in a locale codeset, or elsewhere. It is matched to a coding system
2502 : first by case-insensitive lookup in `locale-charset-alist'. Then
2503 : matches are looked for in the coding system list, treating case and
2504 : the characters `-' and `_' as insignificant. The coding system base
2505 : is returned. Thus, for instance, if charset \"ISO8859-2\",
2506 : `iso-latin-2' is returned."
2507 0 : (or (car (assoc-string charset locale-charset-alist t))
2508 0 : (let ((cs coding-system-alist)
2509 : c)
2510 0 : (while (and (not c) cs)
2511 0 : (if (locale-charset-match-p charset (caar cs))
2512 0 : (setq c (intern (caar cs)))
2513 0 : (pop cs)))
2514 0 : (if c (coding-system-base c)))))
2515 :
2516 : ;; Fixme: This ought to deal with the territory part of the locale
2517 : ;; too, for setting things such as calendar holidays, ps-print paper
2518 : ;; size, spelling dictionary.
2519 :
2520 : (declare-function w32-get-console-codepage "w32proc.c" ())
2521 : (declare-function w32-get-console-output-codepage "w32proc.c" ())
2522 :
2523 : (defun locale-translate (locale)
2524 : "Expand LOCALE according to `locale-translation-file-name', if possible.
2525 : For example, translate \"swedish\" into \"sv_SE.ISO8859-1\"."
2526 0 : (if locale-translation-file-name
2527 0 : (with-temp-buffer
2528 0 : (set-buffer-multibyte nil)
2529 0 : (insert-file-contents locale-translation-file-name)
2530 0 : (if (re-search-forward
2531 0 : (concat "^" (regexp-quote locale) ":?[ \t]+") nil t)
2532 0 : (buffer-substring (point) (line-end-position))
2533 0 : locale))
2534 0 : locale))
2535 :
2536 : (defun set-locale-environment (&optional locale-name frame)
2537 : "Set up multilingual environment for using LOCALE-NAME.
2538 : This sets the language environment, the coding system priority,
2539 : the default input method and sometimes other things.
2540 :
2541 : LOCALE-NAME should be a string which is the name of a locale supported
2542 : by the system. Often it is of the form xx_XX.CODE, where xx is a
2543 : language, XX is a country, and CODE specifies a character set and
2544 : coding system. For example, the locale name \"ja_JP.EUC\" might name
2545 : a locale for Japanese in Japan using the `japanese-iso-8bit'
2546 : coding-system. The name may also have a modifier suffix, e.g. `@euro'
2547 : or `@cyrillic'.
2548 :
2549 : If LOCALE-NAME is nil, its value is taken from the environment
2550 : variables LC_ALL, LC_CTYPE and LANG (the first one that is set).
2551 :
2552 : The locale names supported by your system can typically be found in a
2553 : directory named `/usr/share/locale' or `/usr/lib/locale'. LOCALE-NAME
2554 : will be translated according to the table specified by
2555 : `locale-translation-file-name'.
2556 :
2557 : If FRAME is non-nil, only set the keyboard coding system and the
2558 : terminal coding system for the terminal of that frame, and don't
2559 : touch session-global parameters like the language environment.
2560 :
2561 : See also `locale-charset-language-names', `locale-language-names',
2562 : `locale-preferred-coding-systems' and `locale-coding-system'."
2563 : (interactive "sSet environment for locale: ")
2564 :
2565 : ;; Do this at runtime for the sake of binaries possibly transported
2566 : ;; to a system without X.
2567 0 : (setq locale-translation-file-name
2568 0 : (let ((files
2569 : '("/usr/share/X11/locale/locale.alias" ; e.g. X11R7
2570 : "/usr/lib/X11/locale/locale.alias" ; e.g. X11R6.4
2571 : "/usr/X11R6/lib/X11/locale/locale.alias" ; XFree86, e.g. RedHat 4.2
2572 : "/usr/openwin/lib/locale/locale.alias" ; e.g. Solaris 2.6
2573 : ;;
2574 : ;; The following name appears after the X-related names above,
2575 : ;; since the X-related names are what X actually uses.
2576 : "/usr/share/locale/locale.alias" ; GNU/Linux sans X
2577 : )))
2578 0 : (while (and files (not (file-exists-p (car files))))
2579 0 : (setq files (cdr files)))
2580 0 : (car files)))
2581 :
2582 0 : (let ((locale locale-name))
2583 :
2584 0 : (unless locale
2585 : ;; Use the first of these three environment variables
2586 : ;; that has a nonempty value.
2587 0 : (let ((vars '("LC_ALL" "LC_CTYPE" "LANG")))
2588 0 : (while (and vars
2589 0 : (= 0 (length locale))) ; nil or empty string
2590 0 : (setq locale (getenv (pop vars) frame)))))
2591 :
2592 0 : (when locale
2593 0 : (setq locale (locale-translate locale))
2594 :
2595 : ;; Leave the system locales alone if the caller did not specify
2596 : ;; an explicit locale name, as their defaults are set from
2597 : ;; LC_MESSAGES and LC_TIME, not LC_CTYPE, and the user might not
2598 : ;; want to set them to the same value as LC_CTYPE.
2599 0 : (when locale-name
2600 0 : (setq system-messages-locale locale)
2601 0 : (setq system-time-locale locale))
2602 :
2603 0 : (if (string-match "^[a-z][a-z]" locale)
2604 : ;; The value of 'current-iso639-language' is matched against
2605 : ;; the ':lang' property of font-spec objects when selecting
2606 : ;; and prioritizing available fonts for displaying
2607 : ;; characters; see fontset.c.
2608 0 : (setq current-iso639-language
2609 : ;; The call to 'downcase' is for w32, where the
2610 : ;; MS-Windows locale names are in caps, as in "ENU",
2611 : ;; the equivalent of the Posix "en_US". Since the
2612 : ;; match mentioned above uses memq, and ':lang'
2613 : ;; properties have lower-case values, the letter-case
2614 : ;; must match exactly.
2615 0 : (intern (downcase (match-string 0 locale))))))
2616 :
2617 0 : (setq woman-locale
2618 0 : (or system-messages-locale
2619 0 : (let ((msglocale (getenv "LC_MESSAGES" frame)))
2620 0 : (if (zerop (length msglocale))
2621 0 : locale
2622 0 : (locale-translate msglocale)))))
2623 :
2624 0 : (when locale
2625 0 : (setq locale (downcase locale))
2626 :
2627 0 : (let ((language-name
2628 0 : (locale-name-match locale locale-language-names))
2629 : (charset-language-name
2630 0 : (locale-name-match locale locale-charset-language-names))
2631 0 : (default-eol-type (coding-system-eol-type
2632 0 : (default-value 'buffer-file-coding-system)))
2633 : (coding-system
2634 0 : (or (locale-name-match locale locale-preferred-coding-systems)
2635 0 : (when locale
2636 0 : (if (string-match "\\.\\([^@]+\\)" locale)
2637 0 : (locale-charset-to-coding-system
2638 0 : (match-string 1 locale)))))))
2639 :
2640 0 : (if (consp language-name)
2641 : ;; locale-language-names specify both lang-env and coding.
2642 : ;; But, what specified in locale-preferred-coding-systems
2643 : ;; has higher priority.
2644 0 : (setq coding-system (or coding-system
2645 0 : (nth 1 language-name))
2646 0 : language-name (car language-name))
2647 : ;; Otherwise, if locale is not listed in locale-language-names,
2648 : ;; use what listed in locale-charset-language-names.
2649 0 : (if (not language-name)
2650 0 : (setq language-name charset-language-name)))
2651 :
2652 : ;; If a specific EOL conversion was specified in the default
2653 : ;; buffer-file-coding-system, preserve it in the coding system
2654 : ;; we will be using from now on.
2655 0 : (if (and (memq default-eol-type '(0 1 2 unix dos mac))
2656 0 : coding-system
2657 0 : (coding-system-p coding-system))
2658 0 : (setq coding-system (coding-system-change-eol-conversion
2659 0 : coding-system default-eol-type)))
2660 :
2661 0 : (when language-name
2662 :
2663 : ;; Set up for this character set. This is now the right way
2664 : ;; to do it for both unibyte and multibyte modes.
2665 0 : (unless frame
2666 0 : (set-language-environment language-name))
2667 :
2668 : ;; If the default enable-multibyte-characters is nil,
2669 : ;; we are using single-byte characters,
2670 : ;; so the display table and terminal coding system are irrelevant.
2671 0 : (when (default-value 'enable-multibyte-characters)
2672 0 : (set-display-table-and-terminal-coding-system
2673 0 : language-name coding-system frame))
2674 :
2675 : ;; Set the `keyboard-coding-system' if appropriate (tty
2676 : ;; only). At least X and MS Windows can generate
2677 : ;; multilingual input.
2678 : ;; XXX This was disabled unless `window-system', but that
2679 : ;; leads to buggy behavior when a tty frame is opened
2680 : ;; later. Setting the keyboard coding system has no adverse
2681 : ;; effect on X, so let's do it anyway. -- Lorentey
2682 0 : (let ((kcs (or coding-system
2683 0 : (car (get-language-info language-name
2684 0 : 'coding-system)))))
2685 0 : (if kcs (set-keyboard-coding-system kcs frame)))
2686 :
2687 0 : (unless frame
2688 0 : (setq locale-coding-system
2689 0 : (car (get-language-info language-name 'coding-priority)))))
2690 :
2691 0 : (when (and (not frame)
2692 0 : coding-system
2693 0 : (not (coding-system-equal coding-system
2694 0 : locale-coding-system)))
2695 0 : (prefer-coding-system coding-system)
2696 : ;; Fixme: perhaps prefer-coding-system should set this too.
2697 : ;; But it's not the time to do such a fundamental change.
2698 0 : (setq default-sendmail-coding-system coding-system)
2699 0 : (setq locale-coding-system coding-system))))
2700 :
2701 : ;; On Windows, override locale-coding-system,
2702 : ;; default-file-name-coding-system, keyboard-coding-system,
2703 : ;; terminal-coding-system with the ANSI or console codepage.
2704 0 : (when (and (eq system-type 'windows-nt)
2705 0 : (boundp 'w32-ansi-code-page))
2706 0 : (let* ((ansi-code-page-coding
2707 0 : (intern (format "cp%d" w32-ansi-code-page)))
2708 : (code-page-coding
2709 0 : (if noninteractive
2710 0 : (intern (format "cp%d" (w32-get-console-codepage)))
2711 0 : ansi-code-page-coding))
2712 : (output-coding
2713 0 : (if noninteractive
2714 0 : (intern (format "cp%d" (w32-get-console-output-codepage)))
2715 0 : code-page-coding)))
2716 0 : (when (coding-system-p code-page-coding)
2717 0 : (or output-coding (setq output-coding code-page-coding))
2718 0 : (unless frame (setq locale-coding-system code-page-coding))
2719 0 : (set-keyboard-coding-system code-page-coding frame)
2720 0 : (set-terminal-coding-system output-coding frame)
2721 0 : (setq default-file-name-coding-system ansi-code-page-coding))))
2722 :
2723 0 : (when (eq system-type 'darwin)
2724 : ;; On Darwin, file names are always encoded in utf-8, no matter
2725 : ;; the locale.
2726 0 : (setq default-file-name-coding-system 'utf-8-unix)
2727 : ;; macOS's Terminal.app by default uses utf-8 regardless of
2728 : ;; the locale.
2729 0 : (when (and (null window-system)
2730 0 : (equal (getenv "TERM_PROGRAM" frame) "Apple_Terminal"))
2731 0 : (set-terminal-coding-system 'utf-8)
2732 0 : (set-keyboard-coding-system 'utf-8)))
2733 :
2734 : ;; Default to A4 paper if we're not in a C, POSIX or US locale.
2735 : ;; (See comments in Flocale_info.)
2736 0 : (unless frame
2737 0 : (let ((paper (locale-info 'paper))
2738 : locale)
2739 0 : (if paper
2740 : ;; This will always be null at the time of writing.
2741 0 : (cond
2742 0 : ((equal paper '(216 279))
2743 0 : (setq ps-paper-type 'letter))
2744 0 : ((equal paper '(210 297))
2745 0 : (setq ps-paper-type 'a4)))
2746 0 : (let ((vars '("LC_ALL" "LC_PAPER" "LANG")))
2747 0 : (while (and vars (= 0 (length locale)))
2748 0 : (setq locale (getenv (pop vars) frame))))
2749 0 : (when locale
2750 : ;; As of glibc 2.2.5, these are the only US Letter locales,
2751 : ;; and the rest are A4.
2752 0 : (setq ps-paper-type
2753 0 : (or (locale-name-match locale '(("c$" . letter)
2754 : ("posix$" . letter)
2755 : (".._us" . letter)
2756 : (".._pr" . letter)
2757 : (".._ca" . letter)
2758 : ("enu$" . letter) ; Windows
2759 : ("esu$" . letter)
2760 : ("enc$" . letter)
2761 0 : ("frc$" . letter)))
2762 0 : 'a4)))))))
2763 : nil)
2764 :
2765 : ;;; Character property
2766 :
2767 : (put 'char-code-property-table 'char-table-extra-slots 5)
2768 :
2769 : (defun define-char-code-property (name table &optional docstring)
2770 : "Define NAME as a character code property given by TABLE.
2771 : TABLE is a char-table of purpose `char-code-property-table' with
2772 : these extra slots:
2773 : 1st: NAME.
2774 : 2nd: Function to call to get a property value of a character.
2775 : It is called with three arguments CHAR, VAL, and TABLE, where
2776 : CHAR is a character, VAL is the value of (aref TABLE CHAR).
2777 : 3rd: Function to call to put a property value of a character.
2778 : It is called with the same arguments as above.
2779 : 4th: Function to call to get a description string of a property value.
2780 : It is called with one argument VALUE, a property value.
2781 : 5th: Data used by the above functions.
2782 :
2783 : TABLE may be a name of file to load to build a char-table. The
2784 : file should contain a call of `define-char-code-property' with a
2785 : char-table of the above format as the argument TABLE.
2786 :
2787 : TABLE may also be nil, in which case no property value is pre-assigned.
2788 :
2789 : Optional 3rd argument DOCSTRING is a documentation string of the property.
2790 :
2791 : See also the documentation of `get-char-code-property' and
2792 : `put-char-code-property'."
2793 42 : (or (symbolp name)
2794 42 : (error "Not a symbol: %s" name))
2795 42 : (if (char-table-p table)
2796 20 : (or (and (eq (char-table-subtype table) 'char-code-property-table)
2797 20 : (eq (char-table-extra-slot table 0) name))
2798 20 : (error "Invalid char-table: %s" table))
2799 22 : (or (stringp table)
2800 42 : (error "Not a char-table nor a file name: %s" table)))
2801 42 : (if (stringp table) (setq table (purecopy table)))
2802 42 : (setf (alist-get name char-code-property-alist) table)
2803 42 : (put name 'char-code-property-documentation (purecopy docstring)))
2804 :
2805 : (defvar char-code-property-table
2806 : (make-char-table 'char-code-property-table)
2807 : "Char-table containing a property list of each character code.
2808 : This table is used for properties not listed in `char-code-property-alist'.
2809 : See also the documentation of `get-char-code-property' and
2810 : `put-char-code-property'.")
2811 :
2812 : (defun get-char-code-property (char propname)
2813 : "Return the value of CHAR's PROPNAME property."
2814 175190 : (let ((table (unicode-property-table-internal propname)))
2815 175190 : (if table
2816 175190 : (let ((func (char-table-extra-slot table 1)))
2817 175190 : (if (functionp func)
2818 115145 : (funcall func char (aref table char) table)
2819 175190 : (get-unicode-property-internal table char)))
2820 175190 : (plist-get (aref char-code-property-table char) propname))))
2821 :
2822 : (defun put-char-code-property (char propname value)
2823 : "Store CHAR's PROPNAME property with VALUE.
2824 : It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
2825 0 : (let ((table (unicode-property-table-internal propname)))
2826 0 : (if table
2827 0 : (let ((func (char-table-extra-slot table 2)))
2828 0 : (if (functionp func)
2829 0 : (funcall func char value table)
2830 0 : (put-unicode-property-internal table char value)))
2831 0 : (let* ((plist (aref char-code-property-table char))
2832 0 : (x (plist-put plist propname value)))
2833 0 : (or (eq x plist)
2834 0 : (aset char-code-property-table char x))))
2835 0 : value))
2836 :
2837 : (defun char-code-property-description (prop value)
2838 : "Return a description string of character property PROP's value VALUE.
2839 : If there's no description string for VALUE, return nil."
2840 0 : (let ((table (unicode-property-table-internal prop)))
2841 0 : (if table
2842 0 : (let ((func (char-table-extra-slot table 3)))
2843 0 : (if (functionp func)
2844 0 : (funcall func value))))))
2845 :
2846 :
2847 : ;; Pretty description of encoded string
2848 :
2849 : ;; Alist of ISO 2022 control code vs the corresponding mnemonic string.
2850 : (defconst iso-2022-control-alist
2851 : '((?\x1b . "ESC")
2852 : (?\x0e . "SO")
2853 : (?\x0f . "SI")
2854 : (?\x8e . "SS2")
2855 : (?\x8f . "SS3")
2856 : (?\x9b . "CSI")))
2857 :
2858 : (defun encoded-string-description (str coding-system)
2859 : "Return a pretty description of STR that is encoded by CODING-SYSTEM."
2860 0 : (setq str (string-as-unibyte str))
2861 0 : (mapconcat
2862 0 : (if (and coding-system (eq (coding-system-type coding-system) 'iso-2022))
2863 : ;; Try to get a pretty description for ISO 2022 escape sequences.
2864 0 : (function (lambda (x) (or (cdr (assq x iso-2022-control-alist))
2865 0 : (format "#x%02X" x))))
2866 0 : (function (lambda (x) (format "#x%02X" x))))
2867 0 : str " "))
2868 :
2869 : (defun encode-coding-char (char coding-system &optional charset)
2870 : "Encode CHAR by CODING-SYSTEM and return the resulting string.
2871 : If CODING-SYSTEM can't safely encode CHAR, return nil.
2872 : The 3rd optional argument CHARSET, if non-nil, is a charset preferred
2873 : on encoding."
2874 0 : (let* ((str1 (string-as-multibyte (string char)))
2875 0 : (str2 (string-as-multibyte (string char char)))
2876 0 : (found (find-coding-systems-string str1))
2877 : enc1 enc2 i1 i2)
2878 0 : (if (and (consp found)
2879 0 : (eq (car found) 'undecided))
2880 0 : str1
2881 0 : (when (memq (coding-system-base coding-system) found)
2882 : ;; We must find the encoded string of CHAR. But, just encoding
2883 : ;; CHAR will put extra control sequences (usually to designate
2884 : ;; ASCII charset) at the tail if type of CODING is ISO 2022.
2885 : ;; To exclude such tailing bytes, we at first encode one-char
2886 : ;; string and two-char string, then check how many bytes at the
2887 : ;; tail of both encoded strings are the same.
2888 :
2889 0 : (when charset
2890 0 : (put-text-property 0 1 'charset charset str1)
2891 0 : (put-text-property 0 2 'charset charset str2))
2892 0 : (setq enc1 (encode-coding-string str1 coding-system)
2893 0 : i1 (length enc1)
2894 0 : enc2 (encode-coding-string str2 coding-system)
2895 0 : i2 (length enc2))
2896 0 : (while (and (> i1 0) (= (aref enc1 (1- i1)) (aref enc2 (1- i2))))
2897 0 : (setq i1 (1- i1) i2 (1- i2)))
2898 :
2899 : ;; Now (substring enc1 i1) and (substring enc2 i2) are the same,
2900 : ;; and they are the extra control sequences at the tail to
2901 : ;; exclude.
2902 0 : (substring enc2 0 i2)))))
2903 :
2904 : ;; Backwards compatibility. These might be better with :init-value t,
2905 : ;; but that breaks loadup.
2906 : (define-minor-mode unify-8859-on-encoding-mode
2907 : "Exists only for backwards compatibility."
2908 : :group 'mule
2909 : :global t)
2910 : ;; Doc said "obsolete" in 23.1, this statement only added in 24.1.
2911 : (make-obsolete 'unify-8859-on-encoding-mode "don't use it." "23.1")
2912 :
2913 : (define-minor-mode unify-8859-on-decoding-mode
2914 : "Exists only for backwards compatibility."
2915 : :group 'mule
2916 : :global t)
2917 : ;; Doc said "obsolete" in 23.1, this statement only added in 24.1.
2918 : (make-obsolete 'unify-8859-on-decoding-mode "don't use it." "23.1")
2919 :
2920 : (defvar nonascii-insert-offset 0)
2921 : (make-obsolete-variable 'nonascii-insert-offset "do not use it." "23.1")
2922 : (defvar nonascii-translation-table nil)
2923 : (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1")
2924 :
2925 : (defvar ucs-names nil
2926 : "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.")
2927 :
2928 : (defun ucs-names ()
2929 : "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
2930 0 : (or ucs-names
2931 0 : (let ((ranges
2932 : '((#x0000 . #x33FF)
2933 : ;; (#x3400 . #x4DBF) CJK Ideographs Extension A
2934 : (#x4DC0 . #x4DFF)
2935 : ;; (#x4E00 . #x9FFF) CJK Unified Ideographs
2936 : (#xA000 . #xD7FF)
2937 : ;; (#xD800 . #xFAFF) Surrogate/Private
2938 : (#xFB00 . #x134FF)
2939 : ;; (#x13500 . #x143FF) unused
2940 : (#x14400 . #x14646)
2941 : ;; (#x14647 . #x167FF) unused
2942 : (#x16800 . #x16F9F)
2943 : (#x16FE0 . #x16FE0)
2944 : ;; (#x17000 . #x187FF) Tangut Ideographs
2945 : ;; (#x18800 . #x18AFF) Tangut Components
2946 : ;; (#x18B00 . #x1AFFF) unused
2947 : (#x1B000 . #x1B12F)
2948 : ;; (#x1B130 . #x1B16F) unused
2949 : (#x1B170 . #x1B2FF)
2950 : ;; (#x1B300 . #x1BBFF) unused
2951 : (#x1BC00 . #x1BCAF)
2952 : ;; (#x1BCB0 . #x1CFFF) unused
2953 : (#x1D000 . #x1FFFF)
2954 : ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
2955 : (#xE0000 . #xE01FF)))
2956 : (gc-cons-threshold 10000000)
2957 : names)
2958 0 : (dolist (range ranges)
2959 0 : (let ((c (car range))
2960 0 : (end (cdr range)))
2961 0 : (while (<= c end)
2962 0 : (let ((new-name (get-char-code-property c 'name))
2963 0 : (old-name (get-char-code-property c 'old-name)))
2964 : ;; In theory this code could end up pushing an "old-name" that
2965 : ;; shadows a "new-name" but in practice every time an
2966 : ;; `old-name' conflicts with a `new-name', the newer one has a
2967 : ;; higher code, so it gets pushed later!
2968 0 : (if new-name (push (cons new-name c) names))
2969 0 : (if old-name (push (cons old-name c) names))
2970 0 : (setq c (1+ c))))))
2971 : ;; Special case for "BELL" which is apparently the only char which
2972 : ;; doesn't have a new name and whose old-name is shadowed by a newer
2973 : ;; char with that name.
2974 0 : (setq ucs-names `(("BELL (BEL)" . 7) ,@names)))))
2975 :
2976 : (defun mule--ucs-names-annotation (name)
2977 : ;; FIXME: It would be much better to add this annotation before rather than
2978 : ;; after the char name, so the annotations are aligned.
2979 : ;; FIXME: The default behavior of displaying annotations in italics
2980 : ;; doesn't work well here.
2981 0 : (let ((char (assoc name ucs-names)))
2982 0 : (when char (format " (%c)" (cdr char)))))
2983 :
2984 : (defun char-from-name (string &optional ignore-case)
2985 : "Return a character as a number from its Unicode name STRING.
2986 : If optional IGNORE-CASE is non-nil, ignore case in STRING.
2987 : Return nil if STRING does not name a character."
2988 0 : (or (cdr (assoc-string string (ucs-names) ignore-case))
2989 0 : (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
2990 0 : (when minus
2991 : ;; Parse names like "VARIATION SELECTOR-17" and "CJK
2992 : ;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names.
2993 0 : (ignore-errors
2994 0 : (let* ((case-fold-search ignore-case)
2995 0 : (vs (string-match-p "\\`VARIATION SELECTOR-" string))
2996 0 : (minus-num (string-to-number (substring string minus)
2997 0 : (if vs 10 16)))
2998 0 : (vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0))
2999 0 : (code (- vs-offset minus-num))
3000 0 : (name (get-char-code-property code 'name)))
3001 0 : (when (eq t (compare-strings string nil nil name nil nil
3002 0 : ignore-case))
3003 0 : code)))))))
3004 :
3005 : (defun read-char-by-name (prompt)
3006 : "Read a character by its Unicode name or hex number string.
3007 : Display PROMPT and read a string that represents a character by its
3008 : Unicode property `name' or `old-name'.
3009 :
3010 : This function returns the character as a number.
3011 :
3012 : You can type a few of the first letters of the Unicode name and
3013 : use completion. If you type a substring of the Unicode name
3014 : preceded by an asterisk `*' and use completion, it will show all
3015 : the characters whose names include that substring, not necessarily
3016 : at the beginning of the name.
3017 :
3018 : Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal
3019 : number like \"2A10\", or a number in hash notation (e.g.,
3020 : \"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for
3021 : octal). Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF)
3022 : as names, not numbers."
3023 0 : (let* ((enable-recursive-minibuffers t)
3024 : (completion-ignore-case t)
3025 : (input
3026 0 : (completing-read
3027 0 : prompt
3028 : (lambda (string pred action)
3029 0 : (if (eq action 'metadata)
3030 : '(metadata
3031 : (annotation-function . mule--ucs-names-annotation)
3032 : (category . unicode-name))
3033 0 : (complete-with-action action (ucs-names) string pred)))))
3034 : (char
3035 0 : (cond
3036 0 : ((char-from-name input t))
3037 0 : ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
3038 0 : (ignore-errors (string-to-number input 16)))
3039 0 : ((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'"
3040 0 : input)
3041 0 : (ignore-errors (read input))))))
3042 0 : (unless (characterp char)
3043 0 : (error "Invalid character"))
3044 0 : char))
3045 :
3046 : (define-obsolete-function-alias 'ucs-insert 'insert-char "24.3")
3047 : (define-key ctl-x-map "8\r" 'insert-char)
3048 :
3049 : ;;; mule-cmds.el ends here
|