guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-1-18-g904


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-1-18-g904a78f
Date: Wed, 29 Jul 2009 13:53:09 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=904a78f11d2d11a58d5df365a44c4fbbd4c96df3

The branch, master has been updated
       via  904a78f11d2d11a58d5df365a44c4fbbd4c96df3 (commit)
      from  77332b21a01fac906ae4707426e00f01e62c0415 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 904a78f11d2d11a58d5df365a44c4fbbd4c96df3
Author: Michael Gran <address@hidden>
Date:   Wed Jul 29 06:38:32 2009 -0700

    Add 32-bit characters
    
    This adds the 32-bit standalone characters.  Strings are still
    8-bit.  Characters larger than 8-bit can only be entered or
    displayed in octal format at this point.  At this point, the
    terminal's display encoding is expected to be Latin-1.
    
            * module/language/assembly/compile-bytecode.scm (write-bytecode):
            add 32-bit char
    
            * module/language/assembly.scm (object->assembly): add 32-bit char
            (assembly->object): add 32-bit char
    
            * libguile/vm-i-system.c (make-char32): new op
    
            * libguile/print.c (iprin1): print 32-bit char
    
            * libguile/numbers.h: add type scm_t_wchar
    
            * libguile/numbers.c: add type scm_t_wchar
    
            * libguile/chars.h: new type scm_t_wchar
            (SCM_CODEPOINT_MAX): new
            (SCM_IS_UNICODE_CHAR): new
            (SCM_MAKE_CHAR): operate on 32-bit char
    
            * libguile/chars.c: comparison operators now use Unicode
            codepoints
            (scm_c_upcase): now receives and returns scm_t_wchar
            (scm_c_downcase): now receives and returns scm_t_wchar

-----------------------------------------------------------------------

Summary of changes:
 libguile/chars.c                              |   68 ++++++++++++++-----------
 libguile/chars.h                              |   27 ++++++++--
 libguile/numbers.c                            |    8 +++
 libguile/numbers.h                            |   10 +++-
 libguile/print.c                              |   31 +++++++++--
 libguile/vm-i-system.c                        |   13 +++++
 module/language/assembly.scm                  |   11 ++++-
 module/language/assembly/compile-bytecode.scm |    1 +
 8 files changed, 126 insertions(+), 43 deletions(-)

diff --git a/libguile/chars.c b/libguile/chars.c
index 511ffc7..5a53c45 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -1,4 +1,4 @@
-/*     Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008 Free 
Software Foundation, Inc.
+/*     Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free 
Software Foundation, Inc.
  * 
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -24,6 +24,8 @@
 
 #include <ctype.h>
 #include <limits.h>
+#include <unicase.h>
+
 #include "libguile/_scm.h"
 #include "libguile/validate.h"
 
@@ -55,7 +57,7 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr, 
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than @var{y} in the ASCII 
sequence,\n"
+            "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode 
sequence,\n"
             "else @code{#f}.")
 #define FUNC_NAME s_scm_char_less_p
 {
@@ -68,7 +70,7 @@ SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is less than or equal to @var{y} in 
the\n"
-            "ASCII sequence, else @code{#f}.")
+            "Unicode sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -79,7 +81,7 @@ SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
ASCII\n"
+            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
Unicode\n"
             "sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_gr_p
 {
@@ -92,7 +94,7 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is greater than or equal to @var{y} 
in the\n"
-            "ASCII sequence, else @code{#f}.")
+            "Unicode sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -104,7 +106,7 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is the same character as @var{y} 
ignoring\n"
-            "case, else @code{#f}.")
+            "case, else @code{#f}.  Case is locale free and not context 
sensitive.")
 #define FUNC_NAME s_scm_char_ci_eq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -115,8 +117,9 @@ SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than @var{y} in the ASCII 
sequence\n"
-            "ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
+            "than the Unicode uppercase form @var{y} in the Unicode 
sequence,\n"
+            "else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_less_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -127,8 +130,9 @@ SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", 
scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than or equal to @var{y} in 
the\n"
-            "ASCII sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
+            "than or equal to the Unicode uppercase form of @var{y} in the\n"
+            "Unicode  sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -139,8 +143,9 @@ SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", 
scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
ASCII\n"
-            "sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
+            "than the Unicode uppercase form of @var{y} in the Unicode\n"
+            "sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_gr_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -151,8 +156,9 @@ SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than or equal to @var{y} 
in the\n"
-            "ASCII sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
+            "than or equal to the Unicode uppercase form of @var{y} in the\n"
+            "Unicode sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -233,7 +239,7 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
 #define FUNC_NAME s_scm_char_to_integer
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return scm_from_ulong (SCM_CHAR(chr));
+  return scm_from_uint32 (SCM_CHAR(chr));
 }
 #undef FUNC_NAME
 
@@ -244,7 +250,15 @@ SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
            "Return the character at position @var{n} in the ASCII sequence.")
 #define FUNC_NAME s_scm_integer_to_char
 {
-  return SCM_MAKE_CHAR (scm_to_uchar (n));
+  scm_t_wchar cn;
+
+  cn = scm_to_wchar (n);
+
+  /* Avoid the surrogates.  */
+  if (!SCM_IS_UNICODE_CHAR (cn))
+    scm_out_of_range (FUNC_NAME, n);
+
+  return SCM_MAKE_CHAR (cn);
 }
 #undef FUNC_NAME
 
@@ -255,7 +269,7 @@ SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
 #define FUNC_NAME s_scm_char_upcase
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return SCM_MAKE_CHAR (toupper (SCM_CHAR (chr)));
+  return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
 }
 #undef FUNC_NAME
 
@@ -266,7 +280,7 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
 #define FUNC_NAME s_scm_char_downcase
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return SCM_MAKE_CHAR (tolower (SCM_CHAR(chr)));
+  return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -279,23 +293,17 @@ TODO: change name  to scm_i_.. ? --hwn
 */
 
 
-int
-scm_c_upcase (unsigned int c)
+scm_t_wchar
+scm_c_upcase (scm_t_wchar c)
 {
-  if (c <= UCHAR_MAX)
-    return toupper (c);
-  else
-    return c;
+  return uc_toupper (c);
 }
 
 
-int
-scm_c_downcase (unsigned int c)
+scm_t_wchar
+scm_c_downcase (scm_t_wchar c)
 {
-  if (c <= UCHAR_MAX)
-    return tolower (c);
-  else
-    return c;
+  return uc_tolower (c);
 }
 
 
diff --git a/libguile/chars.h b/libguile/chars.h
index 5bceea5..e68f06d 100644
--- a/libguile/chars.h
+++ b/libguile/chars.h
@@ -3,7 +3,7 @@
 #ifndef SCM_CHARS_H
 #define SCM_CHARS_H
 
-/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008 Free Software 
Foundation, Inc.
+/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008, 2009 Free Software 
Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -28,9 +28,24 @@
 
 /* Immediate Characters
  */
+
+#ifndef SCM_WCHAR_DEFINED
+typedef scm_t_int32 scm_t_wchar;
+#define SCM_WCHAR_DEFINED
+#endif
+
 #define SCM_CHARP(x) (SCM_ITAG8(x) == scm_tc8_char)
-#define SCM_CHAR(x) ((unsigned int)SCM_ITAG8_DATA(x))
-#define SCM_MAKE_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (unsigned char) (x), 
scm_tc8_char)
+#define SCM_CHAR(x) ((scm_t_wchar)SCM_ITAG8_DATA(x))
+
+#define SCM_MAKE_CHAR(x) ({scm_t_int32 _x = (x);                        \
+      _x < 0                                                            \
+        ? SCM_MAKE_ITAG8((scm_t_bits)(unsigned char)_x, scm_tc8_char)   \
+        : SCM_MAKE_ITAG8((scm_t_bits)_x, scm_tc8_char);})
+
+#define SCM_CODEPOINT_MAX (0x10ffff)
+#define SCM_IS_UNICODE_CHAR(c)                                          \
+  ((scm_t_wchar)(c)<=0xd7ff ||                                          \
+   ((scm_t_wchar)(c)>=0xe000 && (scm_t_wchar)(c)<=SCM_CODEPOINT_MAX))
 
 
 
@@ -55,9 +70,9 @@ SCM_API SCM scm_char_to_integer (SCM chr);
 SCM_API SCM scm_integer_to_char (SCM n);
 SCM_API SCM scm_char_upcase (SCM chr);
 SCM_API SCM scm_char_downcase (SCM chr);
-SCM_API int scm_c_upcase (unsigned int c);
-SCM_API int scm_c_downcase (unsigned int c);
-SCM_INTERNAL const char * scm_i_charname (SCM chr);
+SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
+SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
+SCM_INTERNAL const char *scm_i_charname (SCM chr);
 SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname, 
                                          size_t charname_len);
 SCM_INTERNAL void scm_init_chars (void);
diff --git a/libguile/numbers.c b/libguile/numbers.c
index c7e0981..5f56b7a 100644
--- a/libguile/numbers.c
+++ b/libguile/numbers.c
@@ -5863,6 +5863,14 @@ scm_i_range_error (SCM bad_val, SCM min, SCM max)
 #define SCM_FROM_TYPE_PROTO(arg) scm_from_uint32 (arg)
 #include "libguile/conv-uinteger.i.c"
 
+#define TYPE                     scm_t_wchar
+#define TYPE_MIN                 (scm_t_int32)-1
+#define TYPE_MAX                 (scm_t_int32)0x10ffff
+#define SIZEOF_TYPE              4
+#define SCM_TO_TYPE_PROTO(arg)   scm_to_wchar (arg)
+#define SCM_FROM_TYPE_PROTO(arg) scm_from_wchar (arg)
+#include "libguile/conv-integer.i.c"
+
 #if SCM_HAVE_T_INT64
 
 #define TYPE                     scm_t_int64
diff --git a/libguile/numbers.h b/libguile/numbers.h
index 5bad447..f30f7d0 100644
--- a/libguile/numbers.h
+++ b/libguile/numbers.h
@@ -3,7 +3,7 @@
 #ifndef SCM_NUMBERS_H
 #define SCM_NUMBERS_H
 
-/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008 Free 
Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008, 
2009 Free Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -174,6 +174,11 @@ typedef struct scm_t_complex
   double imag;
 } scm_t_complex;
 
+#ifndef SCM_WCHAR_DEFINED
+typedef scm_t_int32 scm_t_wchar;
+#define SCM_WCHAR_DEFINED
+#endif
+
 
 
 SCM_API SCM scm_exact_p (SCM x);
@@ -322,6 +327,9 @@ SCM_API SCM          scm_from_int32  (scm_t_int32 x);
 SCM_API scm_t_uint32 scm_to_uint32   (SCM x);
 SCM_API SCM          scm_from_uint32 (scm_t_uint32 x);
 
+SCM_API scm_t_wchar  scm_to_wchar    (SCM x);
+SCM_API SCM          scm_from_wchar  (scm_t_wchar x);
+
 #if SCM_HAVE_T_INT64
 
 SCM_API scm_t_int64  scm_to_int64    (SCM x);
diff --git a/libguile/print.c b/libguile/print.c
index 6045718..1a5aebe 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -23,6 +23,7 @@
 #endif
 
 #include <errno.h>
+#include <unictype.h>
 
 #include "libguile/_scm.h"
 #include "libguile/chars.h"
@@ -436,7 +437,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
     case scm_tc3_imm24:
       if (SCM_CHARP (exp))
        {
-         long i = SCM_CHAR (exp);
+         scm_t_wchar i = SCM_CHAR (exp);
           const char *name;
 
          if (SCM_WRITINGP (pstate))
@@ -445,10 +446,30 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
              name = scm_i_charname (exp);
              if (name != NULL)
                scm_puts (name, port);
-             else if (i < 0 || i > '\177')
-               scm_intprint (i, 8, port);
-             else
-               scm_putc (i, port);
+             else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
+                                                         | UC_CATEGORY_MASK_M 
+                                                         | UC_CATEGORY_MASK_N 
+                                                         | UC_CATEGORY_MASK_P 
+                                                         | UC_CATEGORY_MASK_S))
+                /* Print the character if is graphic character.  */
+                {
+                  if (i<256)
+                    {
+                      /* Character is graphic.  Print it.  */
+                      scm_putc (i, port);
+                    }
+                  else
+                    {
+                      /* Character is graphic but unrepresentable in
+                         this port's encoding.  */
+                      scm_intprint (i, 8, port);
+                    }
+                }
+              else
+                {
+                  /* Character is a non-graphical character.  */
+                  scm_intprint (i, 8, port);
+                }
            }
          else
            scm_putc (i, port);
diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c
index 726112c..ecafbeb 100644
--- a/libguile/vm-i-system.c
+++ b/libguile/vm-i-system.c
@@ -175,6 +175,19 @@ VM_DEFINE_INSTRUCTION (16, make_char8, "make-char8", 1, 0, 
1)
   NEXT;
 }
 
+VM_DEFINE_INSTRUCTION (42, make_char32, "make-char32", 4, 0, 1)
+{
+  scm_t_wchar v = 0;
+  v += FETCH ();
+  v <<= 8; v += FETCH ();
+  v <<= 8; v += FETCH ();
+  v <<= 8; v += FETCH ();
+  PUSH (SCM_MAKE_CHAR (v));
+  NEXT;
+}
+
+
+
 VM_DEFINE_INSTRUCTION (17, list, "list", 2, -1, 1)
 {
   unsigned h = FETCH ();
diff --git a/module/language/assembly.scm b/module/language/assembly.scm
index e7308ac..3a1da4f 100644
--- a/module/language/assembly.scm
+++ b/module/language/assembly.scm
@@ -131,7 +131,11 @@
                                   (bytevector-s64-set! bv 0 x (endianness big))
                                   bv))))
               (else #f)))
-       ((char? x) `(make-char8 ,(char->integer x)))
+       ((char? x)
+         (cond ((<= (char->integer x) #xff)
+                `(make-char8 ,(char->integer x)))
+               (else
+                `(make-char32 ,(char->integer x)))))
        (else #f)))
 
 (define (assembly->object code)
@@ -156,6 +160,11 @@
       (endianness big)))
     ((make-char8 ,n)
      (integer->char n))
+    ((make-char32 ,n1 ,n2 ,n3 ,n4)
+     (integer->char (+ (* n1 #x1000000)
+                       (* n2 #x10000)
+                       (* n3 #x100)
+                       n4)))
     ((load-string ,s) s)
     ((load-symbol ,s) (string->symbol s))
     ((load-keyword ,s) (symbol->keyword (string->symbol s)))
diff --git a/module/language/assembly/compile-bytecode.scm 
b/module/language/assembly/compile-bytecode.scm
index bf6c5f7..bed0fb2 100644
--- a/module/language/assembly/compile-bytecode.scm
+++ b/module/language/assembly/compile-bytecode.scm
@@ -122,6 +122,7 @@
                ;; meets the alignment requirements of `scm_objcode'.  See
                ;; `scm_c_make_objcode_slice ()'.
                (write-bytecode meta write get-addr '()))))
+        ((make-char32 ,x) (write-uint32-be x))
         ((load-unsigned-integer ,str) (write-loader str))
         ((load-integer ,str) (write-loader str))
         ((load-number ,str) (write-loader str))


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]