guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, string_abstraction2, updated. release_


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, string_abstraction2, updated. release_1-9-1-137-g9b0c25f
Date: Tue, 11 Aug 2009 05:23:42 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=9b0c25f6d18d5be318ea3a47fd87cf7e63b689e1

The branch, string_abstraction2 has been updated
       via  9b0c25f6d18d5be318ea3a47fd87cf7e63b689e1 (commit)
       via  be6f5cfa23aeb33569959c18584d437a57971978 (commit)
      from  26e9742b9a89878e736d9759bb005ed3986a3bae (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 9b0c25f6d18d5be318ea3a47fd87cf7e63b689e1
Author: Michael Gran <address@hidden>
Date:   Mon Aug 10 22:21:21 2009 -0700

    More string and symbol tests
    
            * test-suite/tests/strings.test: more tests
    
            * test-suite/tests/symbols.test: more tests

commit be6f5cfa23aeb33569959c18584d437a57971978
Author: Michael Gran <address@hidden>
Date:   Mon Aug 10 22:18:47 2009 -0700

    Fix %string-dump and %symbol-dump fields
    
            * libguile/strings.c (scm_sys_string_dump): don't print
            stringbuf. Print read-only status.
            (scm_sys_symbol_dump): don't print stringbuf.  Print interned
            status.

-----------------------------------------------------------------------

Summary of changes:
 libguile/strings.c            |   24 ++--
 test-suite/tests/strings.test |  244 ++++++++++++++++++++++++++++++++++++++++-
 test-suite/tests/symbols.test |   75 +++++++++++++-
 3 files changed, 325 insertions(+), 18 deletions(-)

diff --git a/libguile/strings.c b/libguile/strings.c
index c3ebc50..040d754 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -868,8 +868,8 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, 
(SCM str),
             "@item shared\n"
             "If this string is a substring, it returns its parent string.\n"
             "Otherwise, it returns @code{#f}\n"
-            "@item stringbuf\n"
-            "The string buffer that contains this string's characters\n"
+            "@item read-only\n"
+            "@code{#t} if the string is read-only\n"
             "@item stringbuf-chars\n"
             "A new string containing this string's stringbuf's characters\n"
             "@item stringbuf-length\n"
@@ -911,10 +911,14 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, 
(SCM str),
       buf = STRING_STRINGBUF (str);
     }
 
+  if (IS_RO_STRING (str))
+    e5 = scm_cons (scm_from_locale_symbol ("read-only"),
+                   SCM_BOOL_T);
+  else
+    e5 = scm_cons (scm_from_locale_symbol ("read-only"),
+                   SCM_BOOL_F);
+      
   /* Stringbuf info */
-  e5 = scm_cons (scm_from_locale_symbol ("stringbuf"),
-                 buf);
-  
   if (!STRINGBUF_WIDE (buf))
     {
       size_t len = STRINGBUF_LENGTH (buf);
@@ -967,8 +971,8 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, 
(SCM sym),
             "The symbol itself\n"
             "@item hash\n"
             "Its hash value\n"
-            "@item stringbuf\n"
-            "The string buffer that contains this symbol's characters\n"
+            "@item interned\n"
+            "@code{#t} if it is an interned symbol\n"
             "@item stringbuf-chars\n"
             "A new string containing this symbols's stringbuf's characters\n"
             "@item stringbuf-length\n"
@@ -992,13 +996,11 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, 
(SCM sym),
                  sym);
   e2 = scm_cons (scm_from_locale_symbol ("hash"),
                  scm_from_ulong (scm_i_symbol_hash (sym)));
-
+  e3 = scm_cons (scm_from_locale_symbol ("interned"),
+                 scm_symbol_interned_p (sym));
   buf = SYMBOL_STRINGBUF (sym);
 
   /* Stringbuf info */
-  e3 = scm_cons (scm_from_locale_symbol ("stringbuf"),
-                 buf);
-  
   if (!STRINGBUF_WIDE (buf))
     {
       size_t len = STRINGBUF_LENGTH (buf);
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index ffc6955..d82a472 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -1,7 +1,7 @@
 ;;;; strings.test --- test suite for Guile's string functions    -*- scheme -*-
 ;;;; Jim Blandy <address@hidden> --- August 1999
 ;;;;
-;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008 Free Software 
Foundation, Inc.
+;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software 
Foundation, Inc.
 ;;;; 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -20,14 +20,219 @@
 (define-module (test-strings)
   #:use-module (test-suite lib))
 
-
 (define exception:read-only-string
   (cons 'misc-error "^string is read-only"))
+(define exception:illegal-escape
+  (cons 'read-error "illegal character in escape sequence"))
 
 ;; Create a string from integer char values, eg. (string-ints 65) => "A"
 (define (string-ints . args)
   (apply string (map integer->char args)))
 
+;;
+;; string internals
+;;
+
+;; Some abbreviations
+;; BMP - Basic Multilingual Plane (codepoints below U+FFFF)
+;; SMP - Suplementary Multilingual Plane (codebpoints from U+10000 to U+1FFFF)
+
+(with-test-prefix "string internals"
+
+  (pass-if "new string starts at 1st char in stringbuf"
+    (let ((s "abc"))
+      (= 0 (assq-ref (%string-dump s) 'start))))
+
+  (pass-if "length of new string same as stringbuf"
+    (let ((s "def"))
+      (= (string-length s) (assq-ref (%string-dump s) 'stringbuf-length))))
+
+  (pass-if "contents of new string same as stringbuf"
+    (let ((s "ghi"))
+      (string=? s (assq-ref (%string-dump s) 'stringbuf-chars))))
+
+  (pass-if "writable strings are not read-only"
+    (let ((s "zyx"))
+      (not (assq-ref (%string-dump s) 'read-only))))
+
+  (pass-if "read-only strings are read-only"
+    (let ((s (substring/read-only "zyx" 0)))
+      (assq-ref (%string-dump s) 'read-only)))
+
+  (pass-if "null strings are inlined"
+    (let ((s ""))
+      (assq-ref (%string-dump s) 'stringbuf-inline)))
+
+  (pass-if "short Latin-1 encoded strings are inlined"
+    (let ((s "m"))
+      (assq-ref (%string-dump s) 'stringbuf-inline)))
+
+  (pass-if "long Latin-1 encoded strings are not inlined"
+    (let ((s "0123456789012345678901234567890123456789"))
+      (not (assq-ref (%string-dump s) 'stringbuf-inline))))
+
+  (pass-if "short UCS-4 encoded strings are not inlined"
+    (let ((s "\u0100"))
+      (not (assq-ref (%string-dump s) 'stringbuf-inline))))
+
+  (pass-if "long UCS-4 encoded strings are not inlined"
+    (let ((s "\u010012345678901234567890123456789"))
+      (not (assq-ref (%string-dump s) 'stringbuf-inline))))
+
+  (pass-if "new Latin-1 encoded strings are not shared"
+    (let ((s "abc"))
+      (not (assq-ref (%string-dump s) 'stringbuf-shared))))
+
+  (pass-if "new UCS-4 encoded strings are not shared"
+    (let ((s "\u0100bc"))
+      (not (assq-ref (%string-dump s) 'stringbuf-shared))))
+
+  ;; Should this be true? It isn't currently true.
+  (pass-if "null shared substrings are shared"
+    (let* ((s1 "")
+           (s2 (substring/shared s1 0 0)))
+      (throw 'untested)
+      (eq? (assq-ref (%string-dump s2) 'shared)
+           s1)))
+
+  (pass-if "ASCII shared substrings are shared"
+    (let* ((s1 "foobar")
+           (s2 (substring/shared s1 0 3)))
+      (eq? (assq-ref (%string-dump s2) 'shared)
+           s1)))
+
+  (pass-if "BMP shared substrings are shared"
+    (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
+           (s2 (substring/shared s1 0 3)))
+      (eq? (assq-ref (%string-dump s2) 'shared)
+           s1)))
+
+  (pass-if "null substrings are not shared"
+    (let* ((s1 "")
+           (s2 (substring s1 0 0)))
+      (not (eq? (assq-ref (%string-dump s2) 'shared)
+                s1))))
+
+  (pass-if "ASCII substrings are not shared"
+    (let* ((s1 "foobar")
+           (s2 (substring s1 0 3)))
+      (not (eq? (assq-ref (%string-dump s2) 'shared)
+                s1))))
+
+  (pass-if "BMP substrings are not shared"
+    (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
+           (s2 (substring s1 0 3)))
+      (not (eq? (assq-ref (%string-dump s2) 'shared)
+                s1))))
+
+  (pass-if "ASCII substrings share stringbufs before copy-on-write"
+    (let* ((s1 "foobar")
+           (s2 (substring s1 0 3)))
+      (assq-ref (%string-dump s1) 'stringbuf-shared)))
+
+  (pass-if "BMP substrings share stringbufs before copy-on-write"
+    (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
+           (s2 (substring s1 0 3)))
+      (assq-ref (%string-dump s1) 'stringbuf-shared)))
+
+  (pass-if "ASCII substrings don't share stringbufs after copy-on-write"
+    (let* ((s1 "foobar")
+           (s2 (substring s1 0 3)))
+      (string-set! s2 0 #\F)
+      (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
+
+  (pass-if "BMP substrings don't share stringbufs after copy-on-write"
+    (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
+           (s2 (substring s1 0 3)))
+      (string-set! s2 0 #\F)
+      (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
+
+  (with-test-prefix "encodings"
+
+    (pass-if "null strings are Latin-1 encoded"
+      (let ((s ""))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "ASCII strings are Latin-1 encoded"
+      (let ((s "jkl"))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "Latin-1 strings are Latin-1 encoded"
+      (let ((s "\xC0\xC1\xC2"))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "BMP strings are UCS-4 encoded"
+      (let ((s "\u0100\u0101\x0102"))
+        (assq-ref (%string-dump s) 'stringbuf-wide)))
+
+    (pass-if "SMP strings are UCS-4 encoded"
+      (let ((s "\U010300\u010301\x010302"))
+        (assq-ref (%string-dump s) 'stringbuf-wide)))
+
+    (pass-if "null list->string is Latin-1 encoded"
+      (let ((s (string-ints)))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "ASCII list->string is Latin-1 encoded"
+      (let ((s (string-ints 65 66 67)))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "Latin-1 list->string is Latin-1 encoded"
+      (let ((s (string-ints #xc0 #xc1 #xc2)))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))
+
+    (pass-if "BMP list->string is UCS-4 encoded"
+      (let ((s (string-ints #x0100 #x0101 #x0102)))
+        (assq-ref (%string-dump s) 'stringbuf-wide)))
+
+    (pass-if "SMP list->string is UCS-4 encoded"
+      (let ((s (string-ints #x010300 #x010301 #x010302)))
+        (assq-ref (%string-dump s) 'stringbuf-wide)))
+
+    (pass-if "encoding of string not based on escape style"
+      (let ((s "\U000040"))
+        (not (assq-ref (%string-dump s) 'stringbuf-wide))))))
+
+(with-test-prefix "hex escapes"
+
+  (pass-if-exception "non-hex char in two-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x0g\"" read))
+
+  (pass-if-exception "non-hex char in four-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\u000g\"" read))
+
+  (pass-if-exception "non-hex char in six-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\U00000g\"" read))
+
+  (pass-if-exception "premature termination of two-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\x0\"" read))
+
+  (pass-if-exception "premature termination of four-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\u000\"" read))
+
+  (pass-if-exception "premature termination of six-digit hex-escape"
+    exception:illegal-escape                     
+    (with-input-from-string "\"\\U00000\"" read))
+
+  (pass-if "extra hex digits ignored for two-digit hex escape"
+    (eqv? (string-ref "--\xfff--" 2)
+          (integer->char #xff)))
+
+  (pass-if "extra hex digits ignored for four-digit hex escape"
+    (eqv? (string-ref "--\u0100f--" 2)
+          (integer->char #x0100)))
+
+  (pass-if "extra hex digits ignored for six-digit hex escape"
+    (eqv? (string-ref "--\U010300f--" 2)
+          (integer->char #x010300)))
+
+  (pass-if "escaped characters match non-escaped ASCII characters"
+    (string=? "ABC" "\x41\u0042\U000043")))
 
 ;;
 ;; string=?
@@ -181,8 +386,20 @@
     exception:out-of-range
     (string-ref "hello" -1))
 
-  (pass-if "regular string"
-    (char=? (string-ref "GNU Guile" 4) #\G)))
+  (pass-if "regular string, ASCII char"
+    (char=? (string-ref "GNU Guile" 4) #\G))
+
+  (pass-if "regular string, hex escaped Latin-1 char"
+    (char=? (string-ref "--\xff--" 2) 
+            (integer->char #xff)))
+
+  (pass-if "regular string, hex escaped BMP char"
+    (char=? (string-ref "--\u0100--" 2) 
+            (integer->char #x0100)))
+
+  (pass-if "regular string, hex escaped SMP char"
+    (char=? (string-ref "--\U010300--" 2) 
+            (integer->char #x010300))))
 
 ;;
 ;; string-set!
@@ -210,10 +427,25 @@
     exception:read-only-string
     (string-set! (substring/read-only "abc" 0) 1 #\space))
 
-  (pass-if "regular string"
+  (pass-if "regular string, ASCII char"
     (let ((s (string-copy "GNU guile")))
       (string-set! s 4 #\G)
-      (char=? (string-ref s 4) #\G))))
+      (char=? (string-ref s 4) #\G)))
+
+  (pass-if "regular string, Latin-1 char"
+    (let ((s (string-copy "GNU guile")))
+      (string-set! s 4 (integer->char #xfe))
+      (char=? (string-ref s 4) (integer->char #xfe))))
+
+  (pass-if "regular string, BMP char"
+    (let ((s (string-copy "GNU guile")))
+      (string-set! s 4 (integer->char #x0100))
+      (char=? (string-ref s 4) (integer->char #x0100))))
+
+  (pass-if "regular string, SMP char"
+    (let ((s (string-copy "GNU guile")))
+      (string-set! s 4 (integer->char #x010300))
+      (char=? (string-ref s 4) (integer->char #x010300)))))
 
 
 (with-test-prefix "string-split"
diff --git a/test-suite/tests/symbols.test b/test-suite/tests/symbols.test
index 5be2743..bdd08a0 100644
--- a/test-suite/tests/symbols.test
+++ b/test-suite/tests/symbols.test
@@ -1,6 +1,6 @@
 ;;;; symbols.test --- test suite for Guile's symbols    -*- scheme -*-
 ;;;;
-;;;; Copyright (C) 2001, 2006, 2008 Free Software Foundation, Inc.
+;;;; Copyright (C) 2001, 2006, 2008, 2009 Free Software Foundation, Inc.
 ;;;; 
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -31,6 +31,79 @@
 (define (documented? object)
   (not (not (object-documentation object))))
 
+(define (symbol-length s)
+  (string-length (symbol->string s)))
+
+;;
+;; symbol internals
+;;
+
+(with-test-prefix "symbol internals"
+
+  (pass-if "length of new symbol same as stringbuf"
+    (let ((s 'def))
+      (= (symbol-length s) (assq-ref (%symbol-dump s) 'stringbuf-length))))
+
+  (pass-if "contents of new symbol same as stringbuf"
+    (let ((s 'ghi))
+      (string=? (symbol->string s) 
+                (assq-ref (%symbol-dump s) 'stringbuf-chars))))
+
+  (pass-if "the null symbol is inlined"
+    (let ((s '#{}#))
+      (assq-ref (%symbol-dump s) 'stringbuf-inline)))
+
+  (pass-if "short Latin-1-encoded symbols are inlined"
+    (let ((s 'm))
+      (assq-ref (%symbol-dump s) 'stringbuf-inline)))
+
+  (pass-if "long Latin-1-encoded symbols are not inlined"
+    (let ((s 'x0123456789012345678901234567890123456789))
+      (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+
+  (pass-if "short UCS-4-encoded symbols are not inlined"
+    (let ((s (string->symbol "\u0100")))
+      (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+
+  (pass-if "long UCS-4-encoded symbols are not inlined"
+    (let ((s (string->symbol "\u010012345678901234567890123456789")))
+      (not (assq-ref (%symbol-dump s) 'stringbuf-inline))))
+
+  (with-test-prefix "hashes"
+  
+    (pass-if "equal symbols have equal hashes"
+      (let ((s1 'mux)
+            (s2 'mux))
+        (= (assq-ref (%symbol-dump s1) 'hash)
+           (assq-ref (%symbol-dump s2) 'hash))))
+
+    (pass-if "different symbols have different hashes"
+      (let ((s1 'mux)
+            (s2 'muy))
+        (not (= (assq-ref (%symbol-dump s1) 'hash)
+                (assq-ref (%symbol-dump s2) 'hash))))))
+
+  (with-test-prefix "encodings"
+
+    (pass-if "the null symbol is Latin-1 encoded"
+      (let ((s '#{}#))
+        (not (assq-ref (%symbol-dump s) 'stringbuf-wide))))
+
+    (pass-if "ASCII symbols are Latin-1 encoded"
+      (let ((s 'jkl))
+        (not (assq-ref (%symbol-dump s) 'stringbuf-wide))))
+
+    (pass-if "Latin-1 symbols are Latin-1 encoded"
+      (let ((s (string->symbol "\xC0\xC1\xC2")))
+        (not (assq-ref (%symbol-dump s) 'stringbuf-wide))))
+
+    (pass-if "BMP symbols are UCS-4 encoded"
+      (let ((s (string->symbol "\u0100\u0101\x0102")))
+        (assq-ref (%symbol-dump s) 'stringbuf-wide)))
+
+    (pass-if "SMP symbols are UCS-4 encoded"
+      (let ((s (string->symbol "\U010300\u010301\x010302")))
+        (assq-ref (%symbol-dump s) 'stringbuf-wide)))))
 
 ;;;
 ;;; symbol?


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]