guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-2-39-ge23


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-2-39-ge23106d
Date: Fri, 21 Aug 2009 15:58:36 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=e23106d53eb03d7cb4962282396269176ea7482e

The branch, master has been updated
       via  e23106d53eb03d7cb4962282396269176ea7482e (commit)
      from  90305ce9e429f0381ff79427e71287fdafd4d201 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit e23106d53eb03d7cb4962282396269176ea7482e
Author: Michael Gran <address@hidden>
Date:   Fri Aug 21 08:57:35 2009 -0700

    Add initial support for wide symbols
    
    * libguile/hash.c (scm_i_string_hash): new function
      (scm_hasher): don't unpack string: use scm_i_string_hash
    
    * libguile/hash.h: new declaration for scm_i_string_hash
    
    * libguile/print.c (quote_keywordish_symbol): use symbol accessors
      (scm_i_print_symbol_name): new function
      (scm_print_symbol_name): call scm_i_print_symbol_name
      (iprin1): use scm_i_print_symbol_name to print symbols
    
    * libguile/print.h: new declaration for scm_i_print_symbol_name
    
    * libguile/symbols.c (lookup_interned_symbol): now takes scheme string
      instead of c string; callers changed
      (lookup_interned_symbol): add wide symbol support
      (scm_i_c_mem2symbol): removed
      (scm_i_mem2symbol): removed and replaced with scm_i_str2symbol
      (scm_i_str2symbol): new function
      (scm_i_mem2uninterned_symbol): removed and replaced with
      scm_i_str2uninterned_symbol
      (scm_i_str2uninterned_symbol): new function
      (scm_make_symbol, scm_string_to_symbol, scm_from_locale_symbol)
      (scm_from_locale_symboln): use scm_i_str2symbol
    
    * test-suite/tests/symbols.test: new tests

-----------------------------------------------------------------------

Summary of changes:
 libguile/hash.c               |   17 ++++++-
 libguile/hash.h               |    1 +
 libguile/print.c              |   59 ++++++++++++----------
 libguile/print.h              |    1 +
 libguile/symbols.c            |  108 +++++++++++++++-------------------------
 test-suite/tests/symbols.test |   39 ++++++++------
 6 files changed, 113 insertions(+), 112 deletions(-)

diff --git a/libguile/hash.c b/libguile/hash.c
index d2fe177..e6e38ba 100644
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -50,6 +50,20 @@ scm_string_hash (const unsigned char *str, size_t len)
   return h;
 }
 
+unsigned long 
+scm_i_string_hash (SCM str)
+{
+  size_t len = scm_i_string_length (str);
+  size_t i = 0;
+
+  unsigned long h = 0;
+  while (len-- > 0)
+    h = (unsigned long) scm_i_string_ref (str, i++) + h * 37;
+
+  scm_remember_upto_here_1 (str);
+  return h;
+}
+
 
 /* Dirk:FIXME:: why downcase for characters? (2x: scm_hasher, scm_ihashv) */
 /* Dirk:FIXME:: scm_hasher could be made static. */
@@ -115,8 +129,7 @@ scm_hasher(SCM obj, unsigned long n, size_t d)
     case scm_tc7_string:
       {
        unsigned long hash =
-         scm_string_hash ((const unsigned char *) scm_i_string_chars (obj),
-                          scm_i_string_length (obj)) % n;
+         scm_i_string_hash (obj) % n;
        scm_remember_upto_here_1 (obj);
        return hash;
       }
diff --git a/libguile/hash.h b/libguile/hash.h
index 789595b..2ebc053 100644
--- a/libguile/hash.h
+++ b/libguile/hash.h
@@ -28,6 +28,7 @@
 
 
 SCM_API unsigned long scm_string_hash (const unsigned char *str, size_t len);
+SCM_INTERNAL unsigned long scm_i_string_hash (SCM str);
 SCM_API unsigned long scm_hasher (SCM obj, unsigned long n, size_t d);
 SCM_API unsigned long scm_ihashq (SCM obj, unsigned long n);
 SCM_API SCM scm_hashq (SCM obj, SCM n);
diff --git a/libguile/print.c b/libguile/print.c
index 74f7d8d..07bff47 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -295,13 +295,12 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref)
 /* Print the name of a symbol. */
 
 static int
-quote_keywordish_symbol (const char *str, size_t len)
+quote_keywordish_symbol (SCM symbol)
 {
   SCM option;
 
-  /* LEN is guaranteed to be > 0.
-   */
-  if (str[0] != ':' && str[len-1] != ':')
+  if (scm_i_symbol_ref (symbol, 0) != ':'
+      && scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) !=  ':')
     return 0;
 
   option = SCM_PRINT_KEYWORD_STYLE;
@@ -313,7 +312,7 @@ quote_keywordish_symbol (const char *str, size_t len)
 }
 
 void
-scm_print_symbol_name (const char *str, size_t len, SCM port)
+scm_i_print_symbol_name (SCM str, SCM port)
 {
   /* This points to the first character that has not yet been written to the
    * port. */
@@ -334,18 +333,20 @@ scm_print_symbol_name (const char *str, size_t len, SCM 
port)
    * simpler and faster. */
   int maybe_weird = 0;
   size_t mw_pos = 0;
+  size_t len = scm_i_symbol_length (str);
+  scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
 
-  if (len == 0 || str[0] == '\'' || str[0] == '`' || str[0] == ','
-      || quote_keywordish_symbol (str, len)
-      || (str[0] == '.' && len == 1)
-      || scm_is_true (scm_c_locale_stringn_to_number (str, len, 10)))
+  if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
+      || quote_keywordish_symbol (str) 
+      || (str0 == '.' && len == 1)
+      || scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
     {
       scm_lfwrite ("#{", 2, port);
       weird = 1;
     }
 
   for (end = pos; end < len; ++end)
-    switch (str[end])
+    switch (scm_i_symbol_ref (str, end))
       {
 #ifdef BRACKETS_AS_PARENS
       case '[':
@@ -370,11 +371,11 @@ scm_print_symbol_name (const char *str, size_t len, SCM 
port)
            weird = 1;
          }
        if (pos < end)
-         scm_lfwrite (str + pos, end - pos, port);
+         scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
        {
          char buf[2];
          buf[0] = '\\';
-         buf[1] = str[end];
+         buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
          scm_lfwrite (buf, 2, port);
        }
        pos = end + 1;
@@ -392,11 +393,18 @@ scm_print_symbol_name (const char *str, size_t len, SCM 
port)
        break;
       }
   if (pos < end)
-    scm_lfwrite (str + pos, end - pos, port);
+    scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
   if (weird)
     scm_lfwrite ("}#", 2, port);
 }
 
+void
+scm_print_symbol_name (const char *str, size_t len, SCM port)
+{
+  SCM symbol = scm_from_locale_symboln (str, len);
+  return scm_i_print_symbol_name (symbol, port);
+}
+
 /* Print generally.  Handles both write and display according to PSTATE.
  */
 SCM_GPROC(s_write, "write", 1, 1, 0, scm_write, g_write);
@@ -665,16 +673,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
        case scm_tc7_symbol:
          if (scm_i_symbol_is_interned (exp))
            {
-             scm_print_symbol_name (scm_i_symbol_chars (exp),
-                                    scm_i_symbol_length (exp), port);
+             scm_i_print_symbol_name (exp, port);
              scm_remember_upto_here_1 (exp);
            }
          else
            {
              scm_puts ("#<uninterned-symbol ", port);
-             scm_print_symbol_name (scm_i_symbol_chars (exp),
-                                    scm_i_symbol_length (exp),
-                                    port);
+             scm_i_print_symbol_name (exp, port);
              scm_putc (' ', port);
              scm_uintprint (SCM_UNPACK (exp), 16, port);
              scm_putc ('>', port);
@@ -726,14 +731,16 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
          EXIT_NESTED_DATA (pstate);
          break;
        case scm_tcs_subrs:
-         scm_puts (SCM_SUBR_GENERIC (exp)
-                   ? "#<primitive-generic "
-                   : "#<primitive-procedure ",
-                   port);
-         scm_puts (scm_i_symbol_chars (SCM_SUBR_NAME (exp)), port);
-         scm_putc ('>', port);
-         break;
-
+         {
+           SCM name = scm_symbol_to_string (SCM_SUBR_NAME (exp));
+           scm_puts (SCM_SUBR_GENERIC (exp)
+                     ? "#<primitive-generic "
+                     : "#<primitive-procedure ",
+                     port);
+           scm_lfwrite_str (name, port);
+           scm_putc ('>', port);
+           break;
+         }
        case scm_tc7_pws:
          scm_puts ("#<procedure-with-setter", port);
          {
diff --git a/libguile/print.h b/libguile/print.h
index 00648ef..3e2333d 100644
--- a/libguile/print.h
+++ b/libguile/print.h
@@ -82,6 +82,7 @@ SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM 
port);
 SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
 SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);
 SCM_API void scm_iprlist (char *hdr, SCM exp, int tlr, SCM port, 
scm_print_state *pstate);
+SCM_INTERNAL void scm_i_print_symbol_name (SCM sym, SCM port);
 SCM_API void scm_print_symbol_name (const char *str, size_t len, SCM port);
 SCM_API void scm_prin1 (SCM exp, SCM port, int writingp);
 SCM_API void scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate);
diff --git a/libguile/symbols.c b/libguile/symbols.c
index c0ba2a8..a932016 100644
--- a/libguile/symbols.c
+++ b/libguile/symbols.c
@@ -89,11 +89,11 @@ scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
 }
 
 static SCM
-lookup_interned_symbol (const char *name, size_t len,
-                       unsigned long raw_hash)
+lookup_interned_symbol (SCM name, unsigned long raw_hash)
 {
   /* Try to find the symbol in the symbols table */
   SCM l;
+  size_t len = scm_i_string_length (name);
   unsigned long hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
 
   for (l = SCM_HASHTABLE_BUCKET (symbols, hash);
@@ -104,15 +104,32 @@ lookup_interned_symbol (const char *name, size_t len,
       if (scm_i_symbol_hash (sym) == raw_hash
          && scm_i_symbol_length (sym) == len)
        {
-         const char *chrs = scm_i_symbol_chars (sym);
-         size_t i = len;
-
-         while (i != 0)
-           {
-             --i;
-             if (name[i] != chrs[i])
-               goto next_symbol;
-           }
+          size_t i = len;
+
+          /* Slightly faster path for comparing narrow to narrow.  */
+          if (scm_i_is_narrow_string (name) && scm_i_is_narrow_symbol (sym))
+            {
+              const char *chrs = scm_i_symbol_chars (sym);
+              const char *str = scm_i_string_chars (name);
+
+              while (i != 0)
+                {
+                  --i;
+                  if (str[i] != chrs[i])
+                    goto next_symbol;
+                }
+            }
+          else
+            {
+              /* Somewhat slower path for comparing narrow to wide or
+                 wide to wide.  */
+              while (i != 0)
+                {
+                  --i;
+                  if (scm_i_string_ref (name, i) != scm_i_symbol_ref (sym, i))
+                    goto next_symbol;
+                }
+            }
 
          return sym;
        }
@@ -142,32 +159,12 @@ intern_symbol (SCM symbol)
 }
 
 static SCM
-scm_i_c_mem2symbol (const char *name, size_t len)
-{
-  SCM symbol;
-  size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
-
-  symbol = lookup_interned_symbol (name, len, raw_hash);
-  if (scm_is_false (symbol))
-    {
-      /* The symbol was not found, create it.  */
-      symbol = scm_i_c_make_symbol (name, len, 0, raw_hash,
-                                   scm_cons (SCM_BOOL_F, SCM_EOL));
-      intern_symbol (symbol);
-    }
-
-  return symbol;
-}
-
-static SCM
-scm_i_mem2symbol (SCM str)
+scm_i_str2symbol (SCM str)
 {
   SCM symbol;
-  const char *name = scm_i_string_chars (str);
-  size_t len = scm_i_string_length (str);
-  size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
+  size_t raw_hash = scm_i_string_hash (str);
 
-  symbol = lookup_interned_symbol (name, len, raw_hash);
+  symbol = lookup_interned_symbol (str, raw_hash);
   if (scm_is_false (symbol))
     {
       /* The symbol was not found, create it.  */
@@ -181,11 +178,9 @@ scm_i_mem2symbol (SCM str)
 
 
 static SCM
-scm_i_mem2uninterned_symbol (SCM str)
+scm_i_str2uninterned_symbol (SCM str)
 {
-  const char *name = scm_i_string_chars (str);
-  size_t len = scm_i_string_length (str);
-  size_t raw_hash = scm_string_hash ((const unsigned char *) name, len);
+  size_t raw_hash = scm_i_string_hash (str);
 
   return scm_i_make_symbol (str, SCM_I_F_SYMBOL_UNINTERNED, 
                            raw_hash, scm_cons (SCM_BOOL_F, SCM_EOL));
@@ -220,7 +215,7 @@ SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
 #define FUNC_NAME s_scm_make_symbol
 {
   SCM_VALIDATE_STRING (1, name);
-  return scm_i_mem2uninterned_symbol (name);
+  return scm_i_str2uninterned_symbol (name);
 }
 #undef FUNC_NAME
 
@@ -282,7 +277,7 @@ SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
 #define FUNC_NAME s_scm_string_to_symbol
 {
   SCM_VALIDATE_STRING (1, string);
-  return scm_i_mem2symbol (string);
+  return scm_i_str2symbol (string);
 }
 #undef FUNC_NAME
 
@@ -389,44 +384,23 @@ SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
 SCM
 scm_from_locale_symbol (const char *sym)
 {
-  return scm_i_c_mem2symbol (sym, strlen (sym));
+  return scm_from_locale_symboln (sym, -1);
 }
 
 SCM
 scm_from_locale_symboln (const char *sym, size_t len)
 {
-  return scm_i_c_mem2symbol (sym, len);
+  SCM str = scm_from_locale_stringn (sym, len);
+  return scm_i_str2symbol (str);
 }
 
 SCM
 scm_take_locale_symboln (char *sym, size_t len)
 {
-  SCM res;
-  unsigned long raw_hash;
-
-  if (len == (size_t)-1)
-    len = strlen (sym);
-  else
-    {
-      /* Ensure STR is null terminated.  A realloc for 1 extra byte should
-         often be satisfied from the alignment padding after the block, with
-         no actual data movement.  */
-      sym = scm_realloc (sym, len+1);
-      sym[len] = '\0';
-    }
-
-  raw_hash = scm_string_hash ((unsigned char *)sym, len);
-  res = lookup_interned_symbol (sym, len, raw_hash);
-  if (scm_is_false (res))
-    {
-      res = scm_i_c_take_symbol (sym, len, 0, raw_hash,
-                                scm_cons (SCM_BOOL_F, SCM_EOL));
-      intern_symbol (res);
-    }
-  else
-    free (sym);
+  SCM str;
 
-  return res;
+  str = scm_take_locale_stringn (sym, len);
+  return scm_i_str2symbol (str);
 }
 
 SCM
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 3b1abe1..b6dbb9d 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -61,15 +61,13 @@
     (let ((s 'x0123456789012345678901234567890123456789))
       (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
 
-  ;; symbol->string isn't ready for UCS-4 yet
-
-  ;;(pass-if "short UCS-4-encoded symbols are not inlined"
-  ;;  (let ((s (string->symbol "\u0100")))
-  ;;    (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+  (pass-if "short UCS-4-encoded symbols are not inlined"
+    (let ((s (string->symbol "\u0100")))
+      (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
 
-  ;;(pass-if "long UCS-4-encoded symbols are not inlined"
-  ;;  (let ((s (string->symbol "\u010012345678901234567890123456789")))
-  ;;    (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+  (pass-if "long UCS-4-encoded symbols are not inlined"
+    (let ((s (string->symbol "\u010012345678901234567890123456789")))
+      (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
 
   (with-test-prefix "hashes"
   
@@ -99,16 +97,13 @@
       (let ((s (string->symbol "\xC0\xC1\xC2")))
         (not (assq-ref (%symbol-dump s) 'stringbuf-wide))))
 
-    ;; symbol->string isn't ready for UCS-4 yet
-
-    ;;(pass-if "BMP symbols are UCS-4 encoded"
-    ;;  (let ((s (string->symbol "\u0100\u0101\x0102")))
-    ;;    (assq-ref (%symbol-dump s) 'stringbuf-wide)))
+    (pass-if "BMP symbols are UCS-4 encoded"
+      (let ((s (string->symbol "\u0100\u0101\x0102")))
+        (assq-ref (%symbol-dump s) 'stringbuf-wide)))
 
-    ;;(pass-if "SMP symbols are UCS-4 encoded"
-    ;;  (let ((s (string->symbol "\U010300\u010301\x010302")))
-    ;;    (assq-ref (%symbol-dump s) 'stringbuf-wide)))
-    ))
+    (pass-if "SMP symbols are UCS-4 encoded"
+      (let ((s (string->symbol "\U010300\u010301\x010302")))
+        (assq-ref (%symbol-dump s) 'stringbuf-wide)))))
 
 ;;;
 ;;; symbol?
@@ -125,6 +120,16 @@
   (pass-if "symbol"
     (symbol? 'foo)))
 
+;;;
+;;; wide symbols
+;;;
+
+(with-test-prefix "BMP symbols"
+
+  (pass-if "BMP symbol's string"
+    (and (= 4 (string-length "abc\u0100"))
+         (string=? "abc\u0100" 
+                   (symbol->string (string->symbol "abc\u0100"))))))
 
 ;;;
 ;;; symbol->string


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]