[Guile-commits] GNU Guile branch, master, updated. release

guile-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] GNU Guile branch, master, updated. release_1-9-4-55-gb15

From:	Michael Gran
Subject:	[Guile-commits] GNU Guile branch, master, updated. release_1-9-4-55-gb158c2c
Date:	Wed, 28 Oct 2009 13:34:47 +0000
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=b158c2c3b592e13ae2694cb1d974bb0de60d19c6

The branch, master has been updated
       via  b158c2c3b592e13ae2694cb1d974bb0de60d19c6 (commit)
       via  3a5bc4fadac5b1574f081e7ecd3a043cbd395794 (commit)
      from  b02b05332f45fc6ac4f99556cda9fb7ee894e673 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit b158c2c3b592e13ae2694cb1d974bb0de60d19c6
Author: Michael Gran <address@hidden>
Date:   Wed Oct 28 06:27:47 2009 -0700

    Fix incorrect display of wide strings in decompilation
    
    A byte ordering error caused incorrect display of wide strings
    when using the ",c" decompilation from the REPL.
    
    * module/language/assembly/decompile-bytecode.scm (decode-bytecode):
      wide strings are encoded in native endianness

commit 3a5bc4fadac5b1574f081e7ecd3a043cbd395794
Author: Michael Gran <address@hidden>
Date:   Wed Oct 28 06:24:23 2009 -0700

    Modify bytevectors/string conversions to allow wide strings
    
    The bytevector to string conversion functions were accomplished
    by converting via locale strings.  This did not allow conversions
    of wide strings in an 8-bit locale.  This is avoided by using knowledge
    of the storage format of the string.
    
    * libguile/bytevectors.c (STRING_TO_UTF, scm_string_to_utf8): modify
      string to bytevector conversion to use internal string information
      (UTF_TO_STRING, scm_utf8_to_string): modify bytevector to string
      conversion

-----------------------------------------------------------------------

Summary of changes:
 libguile/bytevectors.c                          |  165 +++++++++-------------
 module/language/assembly/decompile-bytecode.scm |    2 +-
 2 files changed, 69 insertions(+), 98 deletions(-)

diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c
index b9d2d89..ce5fa96 100644
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -40,6 +40,7 @@
 #include <byteswap.h>
 #include <striconveh.h>
 #include <uniconv.h>
+#include <unistr.h>
 
 #ifdef HAVE_LIMITS_H
 # include <limits.h>
@@ -1871,58 +1872,50 @@ utf_encoding_name (char *name, size_t utf_width, SCM 
endianness)
 #define MAX_UTF_ENCODING_NAME_LEN  16
 
 /* Produce the body of a `string->utf' function.  */
-#define STRING_TO_UTF(_utf_width)                                      \
-  SCM utf;                                                             \
-  int err;                                                             \
-  char *c_str;                                                         \
-  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  char *c_utf = NULL, *c_locale;                                       \
-  size_t c_strlen, c_raw_strlen, c_utf_len = 0;                                
\
-                                                                       \
-  SCM_VALIDATE_STRING (1, str);                                                
\
-  if (endianness == SCM_UNDEFINED)                                     \
-    endianness = scm_sym_big;                                          \
-  else                                                                 \
-    SCM_VALIDATE_SYMBOL (2, endianness);                               \
-                                                                       \
-  c_strlen = scm_c_string_length (str);                                        
\
-  c_raw_strlen = c_strlen * ((_utf_width) / 8);                                
\
-  do                                                                   \
-    {                                                                  \
-      c_str = (char *) alloca (c_raw_strlen + 1);                      \
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);   \
-    }                                                                  \
-  while (c_raw_strlen > c_strlen);                                     \
-  c_str[c_raw_strlen] = '\0';                                          \
-                                                                       \
-  utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
-                                                                       \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        
\
-                                                                       \
-  err = mem_iconveh (c_str, c_raw_strlen,                              \
-                    c_locale, c_utf_name,                              \
-                    iconveh_question_mark, NULL,                       \
-                    &c_utf, &c_utf_len);                               \
-  if (SCM_UNLIKELY (err))                                              \
-    scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",       \
-                     scm_list_1 (str), err);                           \
-  else                                                                 \
-    {                                                                  \
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot \
-        use `scm_c_take_bytevector ()'.  */                            \
-      scm_dynwind_begin (0);                                           \
-      scm_dynwind_free (c_utf);                                                
\
-                                                                       \
-      utf = make_bytevector (c_utf_len,                                        
\
-                             SCM_ARRAY_ELEMENT_TYPE_VU8);              \
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,                    \
-             c_utf_len);                                               \
-                                                                       \
-      scm_dynwind_end ();                                              \
-    }                                                                  \
-                                                                       \
-  return (utf);
+#define STRING_TO_UTF(_utf_width)                                       \
+  SCM utf;                                                              \
+  int err;                                                              \
+  char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                           \
+  char *c_utf = NULL;                                                   \
+  size_t c_strlen, c_utf_len = 0;                                       \
+                                                                        \
+  SCM_VALIDATE_STRING (1, str);                                         \
+  if (endianness == SCM_UNDEFINED)                                      \
+    endianness = scm_sym_big;                                           \
+  else                                                                  \
+    SCM_VALIDATE_SYMBOL (2, endianness);                                \
+                                                                        \
+  utf_encoding_name (c_utf_name, (_utf_width), endianness);             \
+                                                                        \
+  c_strlen = scm_i_string_length (str);                                 \
+  if (scm_i_is_narrow_string (str))                                     \
+    {                                                                   \
+      err = mem_iconveh (scm_i_string_chars (str), c_strlen,            \
+                         "ISO-8859-1", c_utf_name,                      \
+                         iconveh_question_mark, NULL,                   \
+                         &c_utf, &c_utf_len);                           \
+      if (SCM_UNLIKELY (err))                                           \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), err);                       \
+    }                                                                   \
+  else                                                                  \
+    {                                                                   \
+      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);          \
+      c_utf = u32_conv_to_encoding (c_utf_name,                         \
+                                    iconveh_question_mark,              \
+                                    (scm_t_uint32 *) wbuf,              \
+                                    c_strlen, NULL, NULL, &c_utf_len);  \
+      if (SCM_UNLIKELY (c_utf == NULL))                                 \
+        scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A",    \
+                          scm_list_1 (str), errno);                     \
+    }                                                                   \
+  scm_dynwind_begin (0);                                                \
+  scm_dynwind_free (c_utf);                                             \
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);        \
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);             \
+  scm_dynwind_end ();                                                   \
+                                                                        \
+  return (utf); 
 
 
 
@@ -1934,36 +1927,30 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
 #define FUNC_NAME s_scm_string_to_utf8
 {
   SCM utf;
-  char *c_str;
   uint8_t *c_utf;
-  size_t c_strlen, c_raw_strlen;
+  size_t c_strlen, c_utf_len = 0;
 
   SCM_VALIDATE_STRING (1, str);
 
-  c_strlen = scm_c_string_length (str);
-  c_raw_strlen = c_strlen;
-  do
+  c_strlen = scm_i_string_length (str);
+  if (scm_i_is_narrow_string (str))
+    c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
+                                   scm_i_string_chars (str), c_strlen,
+                                   NULL, NULL, &c_utf_len);
+  else
     {
-      c_str = (char *) alloca (c_raw_strlen + 1);
-      c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
+      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
+      c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
     }
-  while (c_raw_strlen > c_strlen);
-  c_str[c_raw_strlen] = '\0';
-
-  c_utf = u8_strconv_from_locale (c_str);
   if (SCM_UNLIKELY (c_utf == NULL))
     scm_syserror (FUNC_NAME);
   else
     {
-      /* C_UTF is null-terminated.  It is malloc(3)-allocated, so we cannot
-        use `scm_c_take_bytevector ()'.  */
       scm_dynwind_begin (0);
       scm_dynwind_free (c_utf);
 
-      utf = make_bytevector (UTF_STRLEN (8, c_utf),
-                            SCM_ARRAY_ELEMENT_TYPE_VU8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,
-             UTF_STRLEN (8, c_utf));
+      utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
 
       scm_dynwind_end ();
     }
@@ -2000,10 +1987,10 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
 #define UTF_TO_STRING(_utf_width)                                      \
   SCM str = SCM_BOOL_F;                                                        
\
   int err;                                                             \
-  char *c_str = NULL, *c_locale;                                       \
+  char *c_str = NULL;                                                   \
   char c_utf_name[MAX_UTF_ENCODING_NAME_LEN];                          \
-  const char *c_utf;                                                   \
-  size_t c_strlen = 0, c_utf_len;                                      \
+  char *c_utf;                                                          \
+  size_t c_strlen = 0, c_utf_len = 0;                                  \
                                                                        \
   SCM_VALIDATE_BYTEVECTOR (1, utf);                                    \
   if (endianness == SCM_UNDEFINED)                                     \
@@ -2015,20 +2002,19 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
   c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);                      \
   utf_encoding_name (c_utf_name, (_utf_width), endianness);            \
                                                                        \
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);         \
-  strcpy (c_locale, locale_charset ());                                        
\
-                                                                       \
   err = mem_iconveh (c_utf, c_utf_len,                                 \
-                    c_utf_name, c_locale,                              \
+                    c_utf_name, "UTF-8",                               \
                     iconveh_question_mark, NULL,                       \
                     &c_str, &c_strlen);                                \
   if (SCM_UNLIKELY (err))                                              \
     scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",    \
                      scm_list_1 (utf), err);                           \
   else                                                                 \
-    /* C_STR is null-terminated.  */                                   \
-    str = scm_take_locale_stringn (c_str, c_strlen);                   \
-                                                                       \
+    {                                                                   \
+      str = scm_from_stringn (c_str, c_strlen, "UTF-8",                 \
+                              SCM_FAILED_CONVERSION_ERROR);             \
+      free (c_str);                                                     \
+    }                                                                   \
   return (str);
 
 
@@ -2040,29 +2026,15 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string",
 #define FUNC_NAME s_scm_utf8_to_string
 {
   SCM str;
-  int err;
-  char *c_str = NULL, *c_locale;
   const char *c_utf;
-  size_t c_utf_len, c_strlen = 0;
+  size_t c_utf_len = 0;
 
   SCM_VALIDATE_BYTEVECTOR (1, utf);
 
   c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
-
-  c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
-  strcpy (c_locale, locale_charset ());
-
   c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
-  err = mem_iconveh (c_utf, c_utf_len,
-                    "UTF-8", c_locale,
-                    iconveh_question_mark, NULL,
-                    &c_str, &c_strlen);
-  if (SCM_UNLIKELY (err))
-    scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
-                     scm_list_1 (utf), err);
-  else
-    /* C_STR is null-terminated.  */
-    str = scm_take_locale_stringn (c_str, c_strlen);
+  str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
+                          SCM_FAILED_CONVERSION_ERROR);
 
   return (str);
 }
@@ -2090,7 +2062,6 @@ SCM_DEFINE (scm_utf32_to_string, "utf32->string",
 }
 #undef FUNC_NAME
 
-
 
 /* Bytevectors as generalized vectors & arrays.  */
 
diff --git a/module/language/assembly/decompile-bytecode.scm 
b/module/language/assembly/decompile-bytecode.scm
index 555ee12..6d41da2 100644
--- a/module/language/assembly/decompile-bytecode.scm
+++ b/module/language/assembly/decompile-bytecode.scm
@@ -119,7 +119,7 @@
                  (let lp ((i 0))
                    (if (= i len)
                        `(,inst ,(if (eq? inst 'load-wide-string)
-                                    (utf32->string seq)
+                                    (utf32->string seq (native-endianness))
                                     seq))
                        (begin
                          (sequence-set! seq i (pop))


hooks/post-receive
-- 
GNU Guile
[Prev in Thread]
Current Thread
[Next in Thread]
[Guile-commits] GNU Guile branch, master, updated. release_1-9-4-55-gb158c2c, Michael Gran <=
Prev by Date: [Guile-commits] GNU Guile branch, wip-wingo-elisp-badness, created. release_1-9-4-112-g87dd448
Next by Date: [Guile-commits] GNU Guile branch, wip-eval-cleanup, deleted. release_1-9-2-98-g2f2b9df
Previous by thread: [Guile-commits] GNU Guile branch, wip-wingo-elisp-badness, created. release_1-9-4-112-g87dd448
Next by thread: [Guile-commits] GNU Guile branch, wip-eval-cleanup, deleted. release_1-9-2-98-g2f2b9df
Index(es):
- Date
- Thread