emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: MML charset tag regression


From: Kenichi Handa
Subject: Re: MML charset tag regression
Date: Tue, 20 May 2003 21:47:42 +0900 (JST)

I'm sorry for this late response.

In article <address@hidden>, Richard Stallman <address@hidden> writes:

>     Recently, many gtk clients start supporting UTF8_STRING
>     without making COMPOUND_TEXT support better.  It may cause
>     no problem between gtk clients because they will request
>     only the type UTF8_STING.  But, it's a too shortsighted
>     manner.  :-(

> Is this an issue I should raise with the GTK developers?  Could they,
> should they, do something to encourage app developers to handle
> COMPOUND_TEXT properly?

Perhaps, app developers are just using some GTK function for
X selection handing (I don't know if such a function surely
exists).  In that case, improving that function solve the
problem.

>     Perhaps, we should make Emacs to request UTF8_STRING at
>     first if the locale is UTF8, and if that request fails,
>     request COMPOUND_TEXT.

> If we do this, it should be controlled by a Lisp variable, not by the
> locale.  Perhaps the Lisp variable could default based on the locale.

> Is there a reason not to do this unconditionally, always?

Perhaps, no.

I've just written these changes (not yet installed).  What
do you think?  When I finish writing ChangeLog and do some
more tests, I'll install it.

(1) Modify selection_data_to_lisp_data (in xselect.c) to
    simply return a unibyte string generated from selection
    data while putting text property `foreign-selection'
    (value is a symbol of type).  This property is to
    distinguish it from the return string of
    x_get_local_selection.  This property is also used to
    decode it properly.  With this change, we can completely
    get rid of code-conversion routine from xselect.c.

(2) Modify x-get-selection (in select.el) so that it decode
    the received data if it has `foreign-selection'
    property.

(3) New variable x-select-request-type (in x-win.el) which
    is nil (default), a data-type, or a list of data-types.
    As I don't know how to write a customization code for
    this kind of data type, it's currently just `defver'ed.

(5) Modify x-cut-buffer-or-selection-value (in x-win.el) to
    call the following x-selection-value.

(6) New function x-selction-value (in x-win.el), a support
    function for x-cut-buffer-of selection-value that calls
    x-get-selection according to x-select-request-type.  If
    x-select-request-type is nil, it tries both
    COMPOUND_TEXT and UTF8_STRING, and choose the better one
    by this heuristics.

;;   (1) If their lengthes are different, select the longer one.  This
;;   is because an X client may just cut off unsupported characters.
;;
;;   (2) Otherwise, if the Nth character of CTEXT is an ASCII
;;   character that is different from the Nth character of UTF8,
;;   select UTF8.  This is because an X client may replace unsupported
;;   characters with some ASCII character (typically ` ' or `?') in
;;   CTEXT.
;;
;;   (3) Otherwise, select CTEXT.  This is because legacy charsets are
;;   better for the current Emacs, especially when the selection owner
;;   is also Emacs.

---
Ken'ichi HANDA
address@hidden

*** x-win.el.~1.163.~   Thu Mar 13 15:21:29 2003
--- x-win.el    Tue May 20 21:25:34 2003
***************
*** 2145,2150 ****
--- 2145,2249 ----
      (setq x-last-selected-text-clipboard text))
    )
  
+ (defvar x-select-request-type nil
+   "*Data type request for X selection.
+ The value is nil, one of the following data types, or a list of them:
+   `COMPOUND_TEXT', `UTF8_STRING', `STRING', `TEXT'
+ 
+ If the value is nil, try `COMPOUND_TEXT' and `UTF8_STRING', and
+ use the more appropriate result.  If both fail, try `STRING', and
+ then `TEXT'.
+ 
+ If the value is one of the above symbols, try only the specified
+ type.
+ 
+ If the value is a list of them, try each of them in the specified
+ order until succeed.")
+ 
+ ;; Helper function for x-selection-value.  Select UTF8 or CTEXT
+ ;; whichever is more appropriate.  Here, we use this heurisitcs.
+ ;;
+ ;;   (1) If their lengthes are different, select the longer one.  This
+ ;;   is because an X client may just cut off unsupported characters.
+ ;;
+ ;;   (2) Otherwise, if the Nth character of CTEXT is an ASCII
+ ;;   character that is different from the Nth character of UTF8,
+ ;;   select UTF8.  This is because an X client may replace unsupported
+ ;;   characters with some ASCII character (typically ` ' or `?') in
+ ;;   CTEXT.
+ ;;
+ ;;   (3) Otherwise, select CTEXT.  This is because legacy charsets are
+ ;;   better for the current Emacs, especially when the selection owner
+ ;;   is also Emacs.
+ 
+ (defun x-select-utf8-or-ctext (utf8 ctext)
+   (let ((len-utf8 (length utf8))
+       (len-ctext (length ctext))
+       (selected ctext)
+       (i 0)
+       char)
+     (if (/= len-utf8 len-ctext)
+       (if (> len-utf8 len-ctext) utf8 ctext)
+       (while (< i len-utf8)
+       (setq char (aref ctext i))
+       (if (and (< char 128) (/= char (aref utf8 i)))
+           (setq selected utf8
+                 i len-utf8)
+         (setq i (1+ i))))
+       selected)))
+ 
+ (defun x-selection-value (type)
+   (let (text)
+     (cond ((null x-select-request-type)
+          (let (utf8 ctext utf8-coding)
+            ;; We try both UTF8_STRING and COMPOUND_TEXT, and choose
+            ;; the more appropriate one.  If both fail, try STRING.
+ 
+            ;; At first try UTF8_STRING.
+            (setq utf8 (x-get-selection type 'UTF8_STRING)
+                  utf8-coding last-coding-system-used)
+            (if utf8
+                ;; If it is a locale selection, choose it.
+                (or (condition-case nil
+                        (get-text-property 0 'foreign-selection utf8)
+                      (error nil))
+                    (setq text utf8)))
+            ;; If not yet decided, try COMPOUND_TEXT.
+            (if (not text)
+                (if (setq ctext (condition-case nil
+                                    (x-get-selection type 'COMPOUND_TEXT)
+                                  (error nil)))
+                    ;; If UTF8_STRING was also successful, choose the
+                    ;; more appropriate one from UTF8 and CTEXT.
+                    (if utf8
+                        (setq text (x-select-utf8-or-ctext utf8 ctext))
+                      ;; Othewise, choose CTEXT.
+                      (setq text ctext))))
+            ;; If not yet decided, try STRING.
+            (or text
+                (setq text (condition-case nil
+                               (x-get-selection type 'STRING)
+                             (error nil))))
+            (if (eq text utf8)
+                (setq last-coding-system-used utf8-coding))))
+ 
+         ((consp x-select-request-type)
+          (let ((tail x-select-request-type))
+            (while (and tail (not text))
+              (condition-case nil
+                  (setq text (x-get-selection type (car tail)))
+                (error nil)))
+            (setq tail (cdr tail))))
+ 
+         (t
+          (condition-case nil
+              (setq text (x-get-selection type x-select-request-type))
+            (error nil))))
+ 
+     (if text
+       (put-text-property 0 (length text) 'foreign-selection nil text))
+     text))
+       
  ;;; Return the value of the current X selection.
  ;;; Consult the selection, and the cut buffer.  Treat empty strings
  ;;; as if they were unset.
***************
*** 2154,2168 ****
  (defun x-cut-buffer-or-selection-value ()
    (let (clip-text primary-text cut-text)
      (when x-select-enable-clipboard
!       ;; Don't die if x-get-selection signals an error.
!       (if (null clip-text)
!         (condition-case c
!             (setq clip-text (x-get-selection 'CLIPBOARD 'COMPOUND_TEXT))
!           (error nil)))
!       (if (null clip-text)
!         (condition-case c
!             (setq clip-text (x-get-selection 'CLIPBOARD 'STRING))
!           (error nil)))
        (if (string= clip-text "") (setq clip-text nil))
  
        ;; Check the CLIPBOARD selection for 'newness', is it different
--- 2253,2259 ----
  (defun x-cut-buffer-or-selection-value ()
    (let (clip-text primary-text cut-text)
      (when x-select-enable-clipboard
!       (setq clip-text (x-selection-value 'CLIPBOARD))
        (if (string= clip-text "") (setq clip-text nil))
  
        ;; Check the CLIPBOARD selection for 'newness', is it different
***************
*** 2182,2196 ****
              (setq x-last-selected-text-clipboard clip-text))))
        )
  
!     ;; Don't die if x-get-selection signals an error.
!     (if (null primary-text)
!       (condition-case c
!           (setq primary-text (x-get-selection 'PRIMARY 'COMPOUND_TEXT))
!         (error nil)))
!     (if (null primary-text)
!       (condition-case c
!           (setq primary-text (x-get-selection 'PRIMARY 'STRING))
!         (error nil)))
      ;; Check the PRIMARY selection for 'newness', is it different
      ;; from what we remebered them to be last time we did a
      ;; cut/paste operation.
--- 2273,2279 ----
              (setq x-last-selected-text-clipboard clip-text))))
        )
  
!     (setq primary-text (x-selection-value 'PRIMARY))
      ;; Check the PRIMARY selection for 'newness', is it different
      ;; from what we remebered them to be last time we did a
      ;; cut/paste operation.
***************
*** 2224,2229 ****
--- 2307,2315 ----
        nil)
       (t
            (setq x-last-selected-text-cut cut-text))))
+ 
+     ;; As we have done one selection, clear this now.
+     (setq next-selection-coding-system nil)
  
      ;; At this point we have recorded the current values for the
      ;; selection from clipboard (if we are supposed to) primary,
*** select.el.~1.19.~   Wed Jan 29 22:04:38 2003
--- select.el   Tue May 20 21:37:41 2003
***************
*** 38,44 ****
  TYPE may be `SECONDARY' or `CLIPBOARD', in addition to `PRIMARY'.
  DATA-TYPE is usually `STRING', but can also be one of the symbols
  in `selection-converter-alist', which see."
!   (x-get-selection-internal (or type 'PRIMARY) (or data-type 'STRING)))
  
  (defun x-get-clipboard ()
    "Return text pasted to the clipboard."
--- 38,55 ----
  TYPE may be `SECONDARY' or `CLIPBOARD', in addition to `PRIMARY'.
  DATA-TYPE is usually `STRING', but can also be one of the symbols
  in `selection-converter-alist', which see."
!   (let ((data (x-get-selection-internal (or type 'PRIMARY)
!                                       (or data-type 'STRING)))
!       coding)
!     (when (and data
!              (setq data-type (get-text-property 0 'foreign-selection data)))
!       (setq coding (if (eq data-type 'UTF8_STRING)
!                      'utf-8
!                    (or next-selection-coding-system
!                        selection-coding-system))
!           data (decode-coding-string data coding))
!       (put-text-property 0 (length data) 'foreign-selection data-type data))
!     data))
  
  (defun x-get-clipboard ()
    "Return text pasted to the clipboard."
*** xselect.c.~1.128.~  Mon Apr  7 11:03:27 2003
--- xselect.c   Tue May 20 11:55:40 2003
***************
*** 29,38 ****
  #include "frame.h"    /* Need this to get the X window of selected_frame */
  #include "blockinput.h"
  #include "buffer.h"
- #include "charset.h"
- #include "coding.h"
  #include "process.h"
- #include "composite.h"
  
  struct prop_location;
  
--- 29,35 ----
***************
*** 114,119 ****
--- 111,118 ----
  /* Coding system for the next communicating with other X clients.  */
  static Lisp_Object Vnext_selection_coding_system;
  
+ static Lisp_Object Qforeign_selection;
+ 
  /* If this is a smaller number than the max-request-size of the display,
     emacs will use INCR selection transfer when the selection is larger
     than this.  The max-request-size is usually around 64k, so if you want
***************
*** 1605,1678 ****
    /* Convert any 8-bit data to a string, for compactness.  */
    else if (format == 8)
      {
!       Lisp_Object str;
!       int require_encoding = 0;
  
!       if (
! #if 1
!         1
! #else
!         ! NILP (buffer_defaults.enable_multibyte_characters)
! #endif
!         )
!       {
!         /* If TYPE is `TEXT' or `COMPOUND_TEXT', we should decode
!            DATA to Emacs internal format because DATA may be encoded
!            in compound text format.  In addtion, if TYPE is `STRING'
!            and DATA contains any 8-bit Latin-1 code, we should also
!            decode it.  */
!         if (type == dpyinfo->Xatom_TEXT
!             || type == dpyinfo->Xatom_COMPOUND_TEXT)
!           require_encoding = 1;
!         else if (type == XA_STRING)
!           {
!             int i;
!             for (i = 0; i < size; i++)
!               {
!                 if (data[i] >= 0x80)
!                   {
!                     require_encoding = 1;
!                     break;
!                   }
!               }
!           }
!       }
!       if (!require_encoding)
!       {
!         str = make_unibyte_string ((char *) data, size);
!         Vlast_coding_system_used = Qraw_text;
!       }
        else
!       {
!         int bufsize;
!         unsigned char *buf;
!         struct coding_system coding;
! 
!         if (NILP (Vnext_selection_coding_system))
!           Vnext_selection_coding_system = Vselection_coding_system;
!         setup_coding_system
!           (Fcheck_coding_system(Vnext_selection_coding_system), &coding);
!         coding.src_multibyte = 0;
!         coding.dst_multibyte = 1;
!         Vnext_selection_coding_system = Qnil;
!           coding.mode |= CODING_MODE_LAST_BLOCK;
!         /* We explicitely disable composition handling because
!            selection data should not contain any composition
!            sequence.  */
!         coding.composing = COMPOSITION_DISABLED;
!         bufsize = decoding_buffer_size (&coding, size);
!         buf = (unsigned char *) xmalloc (bufsize);
!         decode_coding (&coding, data, buf, size, bufsize);
!         str = make_string_from_bytes ((char *) buf,
!                                       coding.produced_char, coding.produced);
!         xfree (buf);
! 
!         if (SYMBOLP (coding.post_read_conversion)
!             && !NILP (Ffboundp (coding.post_read_conversion)))
!           str = run_pre_post_conversion_on_str (str, &coding, 0);
!         Vlast_coding_system_used = coding.symbol;
!       }
!       compose_chars_in_text (0, SCHARS (str), str);
        return str;
      }
    /* Convert a single atom to a Lisp_Symbol.  Convert a set of atoms to
--- 1604,1622 ----
    /* Convert any 8-bit data to a string, for compactness.  */
    else if (format == 8)
      {
!       Lisp_Object str, lispy_type;
  
!       str = make_unibyte_string ((char *) data, size);
!       /* Indicate that this string is from foreign selection thus the
!        caller of x-get-selection-internal has to decode it.  */
!       if (type == dpyinfo->Xatom_COMPOUND_TEXT)
!       lispy_type = QCOMPOUND_TEXT;
!       else if (type == dpyinfo->Xatom_UTF8_STRING)
!       lispy_type = QUTF8_STRING;
        else
!       lispy_type = QSTRING;
!       Fput_text_property (make_number (0), make_number (size),
!                         Qforeign_selection, lispy_type, str);
        return str;
      }
    /* Convert a single atom to a Lisp_Symbol.  Convert a set of atoms to
***************
*** 2451,2454 ****
--- 2395,2400 ----
    QCUT_BUFFER7 = intern ("CUT_BUFFER7"); staticpro (&QCUT_BUFFER7);
  #endif
  
+   Qforeign_selection = intern ("foreign-selection");
+   staticpro (&Qforeign_selection);
  }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]