guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-12-26-gf1


From: Ludovic Courtès
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-12-26-gf1ee6d5
Date: Wed, 15 Sep 2010 21:33:09 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=f1ee6d54d219056c62d87a8e4a6b199162c946e8

The branch, master has been updated
       via  f1ee6d54d219056c62d87a8e4a6b199162c946e8 (commit)
       via  fd5eec2b6e113f6d13028215a738417607432a2d (commit)
       via  e9c3018cec8ed236a375c59aed55e66e47671022 (commit)
      from  33d92fe6ca726a51c079a6524c18217bbe371cee (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit f1ee6d54d219056c62d87a8e4a6b199162c946e8
Author: Ludovic Courtès <address@hidden>
Date:   Wed Sep 15 23:32:28 2010 +0200

    Fix write-beyond-end-of-string error in the conversion to R6RS string 
escapes.
    
    Reported by Mike Gran <address@hidden>.
    
    * libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
      scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
      (scm_to_stringn): When `handler ==
      SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
      BUF so that it's large enough for the worst case.
    
    * libguile/print.c (display_character): When `result != NULL && strategy
      == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
      LOCALE_ENCODED large enough to hold an R6RS escape.

commit fd5eec2b6e113f6d13028215a738417607432a2d
Author: Ludovic Courtès <address@hidden>
Date:   Wed Sep 15 18:38:57 2010 +0200

    Optimize `peek-char'.
    
    This makes `peek-char' 40x faster on a port whose encoding is
    faster on a UTF-8 port containing multi-byte codepoints.
    
    The `xml->sxml' procedure is 4x faster on a 2.7 MiB XML file.
    
    * libguile/ports.c (get_codepoint): New procedure, moved here from
      `scm_getc', with the additional BUF and LEN parameters.
      (scm_getc): Use it.
      (scm_peek_char): Use it instead of the `scm_getc'/`scm_ungetc'
      sequence.
    
    * test-suite/tests/ports.test ("string ports")["peek-char [latin-1]",
      "peek-char [utf-8]"]: New tests.
    
    * benchmark-suite/Makefile.am (SCM_BENCHMARKS): Add
      `benchmarks/ports.bm'.
    
    * benchmark-suite/benchmarks/ports.bm: New file.

commit e9c3018cec8ed236a375c59aed55e66e47671022
Author: Ludovic Courtès <address@hidden>
Date:   Wed Sep 15 18:27:47 2010 +0200

    Fix copyright year and module name of `write.bm'.
    
    * benchmark-suite/benchmarks/write.bm: Fix copyright year and module
      name.

-----------------------------------------------------------------------

Summary of changes:
 benchmark-suite/Makefile.am         |    1 +
 benchmark-suite/benchmarks/ports.bm |   67 +++++++++++++++++++++++++++++++++++
 benchmark-suite/benchmarks/write.bm |    4 +-
 libguile/ports.c                    |   58 +++++++++++++++++++++++-------
 libguile/print.c                    |   13 ++++++-
 libguile/strings.c                  |   20 ++++++++--
 test-suite/tests/ports.test         |   20 ++++++++++-
 7 files changed, 161 insertions(+), 22 deletions(-)
 create mode 100644 benchmark-suite/benchmarks/ports.bm

diff --git a/benchmark-suite/Makefile.am b/benchmark-suite/Makefile.am
index 9f49f2a..e2aad91 100644
--- a/benchmark-suite/Makefile.am
+++ b/benchmark-suite/Makefile.am
@@ -4,6 +4,7 @@ SCM_BENCHMARKS = benchmarks/0-reference.bm              \
                 benchmarks/continuations.bm            \
                  benchmarks/if.bm                      \
                  benchmarks/logand.bm                  \
+                benchmarks/ports.bm                    \
                 benchmarks/read.bm                     \
                 benchmarks/srfi-1.bm                   \
                 benchmarks/srfi-13.bm                  \
diff --git a/benchmark-suite/benchmarks/ports.bm 
b/benchmark-suite/benchmarks/ports.bm
new file mode 100644
index 0000000..917a7dd
--- /dev/null
+++ b/benchmark-suite/benchmarks/ports.bm
@@ -0,0 +1,67 @@
+;;; ports.bm --- Port I/O.         -*- mode: scheme; coding: utf-8; -*-
+;;;
+;;; Copyright (C) 2010 Free Software Foundation, Inc.
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Lesser General Public License
+;;; as published by the Free Software Foundation; either version 3, or
+;;; (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU Lesser General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Lesser General Public
+;;; License along with this software; see the file COPYING.LESSER.  If
+;;; not, write to the Free Software Foundation, Inc., 51 Franklin
+;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+(define-module (benchmarks ports)
+  #:use-module (benchmark-suite lib))
+
+(define %latin1-port
+  (with-fluids ((%default-port-encoding #f))
+    (open-input-string "hello, world")))
+
+(define %utf8/ascii-port
+  (with-fluids ((%default-port-encoding "UTF-8"))
+    (open-input-string "hello, world")))
+
+(define %utf8/wide-port
+  (with-fluids ((%default-port-encoding "UTF-8"))
+    (open-input-string "안녕하세요")))
+
+
+(with-benchmark-prefix "peek-char"
+
+  (benchmark "latin-1 port" 700000
+    (peek-char %latin1-port))
+
+  (benchmark "utf-8 port, ascii character" 700000
+    (peek-char %utf8/ascii-port))
+
+  (benchmark "utf-8 port, Korean character" 700000
+    (peek-char %utf8/wide-port)))
+
+(with-benchmark-prefix "read-char"
+
+  (benchmark "latin-1 port" 10000000
+    (read-char %latin1-port))
+
+  (benchmark "utf-8 port, ascii character" 10000000
+    (read-char %utf8/ascii-port))
+
+  (benchmark "utf-8 port, Korean character" 10000000
+    (read-char %utf8/wide-port)))
+
+(with-benchmark-prefix "char-ready?"
+
+  (benchmark "latin-1 port" 10000000
+    (char-ready? %latin1-port))
+
+  (benchmark "utf-8 port, ascii character" 10000000
+    (char-ready? %utf8/ascii-port))
+
+  (benchmark "utf-8 port, Korean character" 10000000
+    (char-ready? %utf8/wide-port)))
diff --git a/benchmark-suite/benchmarks/write.bm 
b/benchmark-suite/benchmarks/write.bm
index e96f2ef..46f5c8d 100644
--- a/benchmark-suite/benchmarks/write.bm
+++ b/benchmark-suite/benchmarks/write.bm
@@ -1,6 +1,6 @@
 ;;; write.bm --- Exercise the printer.               -*- Scheme -*-
 ;;;
-;;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
+;;; Copyright (C) 2010 Free Software Foundation, Inc.
 ;;;
 ;;; This program is free software; you can redistribute it and/or
 ;;; modify it under the terms of the GNU Lesser General Public License
@@ -17,7 +17,7 @@
 ;;; not, write to the Free Software Foundation, Inc., 51 Franklin
 ;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
 
-(define-module (benchmarks read)
+(define-module (benchmarks write)
   #:use-module (benchmark-suite lib))
 
 (define %len 50000)
diff --git a/libguile/ports.c b/libguile/ports.c
index 7c3791d..6cf0de2 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1023,13 +1023,15 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
 
 #define SCM_MBCHAR_BUF_SIZE (4)
 
-/* Get one codepoint from a file, using the port's encoding.  */
-scm_t_wchar
-scm_getc (SCM port)
+/* Read a codepoint from PORT and return it.  Fill BUF with the byte
+   representation of the codepoint in PORT's encoding, and set *LEN to
+   the length in bytes of that representation.  Raise an error on
+   failure.  */
+static scm_t_wchar
+get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
 {
   int c;
-  unsigned int bufcount = 0;
-  char buf[SCM_MBCHAR_BUF_SIZE];
+  size_t bufcount = 0;
   scm_t_uint32 result_buf;
   scm_t_wchar codepoint = 0;
   scm_t_uint32 *u32;
@@ -1133,6 +1135,8 @@ scm_getc (SCM port)
       break;
     }
 
+  *len = bufcount;
+
   return codepoint;
 
  failure:
@@ -1155,6 +1159,15 @@ scm_getc (SCM port)
   return 0;
 }
 
+/* Read a codepoint from PORT and return it.  */
+scm_t_wchar
+scm_getc (SCM port)
+{
+  size_t len;
+  char buf[SCM_MBCHAR_BUF_SIZE];
+
+  return get_codepoint (port, buf, &len);
+}
 
 /* this should only be called when the read buffer is empty.  it
    tries to refill the read buffer.  it returns the first char from
@@ -1635,18 +1648,37 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
            "to @code{read-char} would have hung.")
 #define FUNC_NAME s_scm_peek_char
 {
-  scm_t_wchar c, column;
+  SCM result;
+  scm_t_wchar c;
+  char bytes[SCM_MBCHAR_BUF_SIZE];
+  long column, line;
+  size_t len;
+
   if (SCM_UNBNDP (port))
     port = scm_current_input_port ();
   else
     SCM_VALIDATE_OPINPORT (1, port);
-  column = SCM_COL(port);
-  c = scm_getc (port);
-  if (EOF == c)
-    return SCM_EOF_VAL;
-  scm_ungetc (c, port);
-  SCM_COL(port) = column;
-  return SCM_MAKE_CHAR (c);
+
+  column = SCM_COL (port);
+  line = SCM_LINUM (port);
+
+  c = get_codepoint (port, bytes, &len);
+  if (c == EOF)
+    result = SCM_EOF_VAL;
+  else
+    {
+      long i;
+
+      result = SCM_MAKE_CHAR (c);
+
+      for (i = len - 1; i >= 0; i--)
+       scm_unget_byte (bytes[i], port);
+
+      SCM_COL (port) = column;
+      SCM_LINUM (port) = line;
+    }
+
+  return result;
 }
 #undef FUNC_NAME
 
diff --git a/libguile/print.c b/libguile/print.c
index 2ffe70e..bdc6c9f 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -768,7 +768,7 @@ display_character (scm_t_wchar ch, SCM port,
   else
     {
       size_t len;
-      char locale_encoded[sizeof (ch)], *result;
+      char locale_encoded[8 * sizeof (ch)], *result;
 
       len = sizeof (locale_encoded);
       result = u32_conv_to_encoding (encoding, strategy,
@@ -782,7 +782,16 @@ display_character (scm_t_wchar ch, SCM port,
            {
              /* Apply the same escaping syntax as in `write_character'.  */
              if (SCM_R6RS_ESCAPES_P)
-               scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+               {
+                 /* LOCALE_ENCODED is large enough to store an R6RS
+                    `\xNNNN;' escape sequence.  However, libunistring
+                    up to 0.9.3 (included) always returns a
+                    heap-allocated RESULT.  */
+                 if (SCM_UNLIKELY (result != locale_encoded))
+                   result = scm_realloc (result, len * 7);
+
+                 scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+               }
              else
                scm_i_unistring_escapes_to_guile_escapes (result, &len);
            }
diff --git a/libguile/strings.c b/libguile/strings.c
index dbff066..e64c37b 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1575,8 +1575,9 @@ scm_take_locale_string (char *str)
   return scm_take_locale_stringn (str, -1);
 }
 
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
-   and \UXXXXXX.  */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
+   Set *LENP to the size of the resulting string.  */
 void
 scm_i_unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
 {
@@ -1629,7 +1630,11 @@ scm_i_unistring_escapes_to_guile_escapes (char *buf, 
size_t *lenp)
   *lenp = j;
 }
 
-/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXXXX; */
+/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
+   *LENP-byte locale-encoded string, to `\xXXXX;'.  Set *LEN to the size
+   of the resulting string.  BUF must be large enough to handle the
+   worst case when `\uXXXX' escapes (6 characters) are replaced by
+   `\xXXXX;' (7 characters).  */
 void
 scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
 {
@@ -1815,7 +1820,14 @@ scm_to_stringn (SCM str, size_t *lenp, const char 
*encoding,
   if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
     {
       if (SCM_R6RS_ESCAPES_P)
-        scm_i_unistring_escapes_to_r6rs_escapes (buf, &len);
+       {
+         /* The worst case is if the input string contains all 4-digit
+            hex escapes.  "\uXXXX" (six characters) becomes "\xXXXX;"
+            (seven characters).  Make BUF large enough to hold
+            that.  */
+         buf = scm_realloc (buf, (len * 7) / 6 + 1);
+         scm_i_unistring_escapes_to_r6rs_escapes (buf, &len);
+       }
       else
         scm_i_unistring_escapes_to_guile_escapes (buf, &len);
 
diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test
index bb5c173..4edd531 100644
--- a/test-suite/tests/ports.test
+++ b/test-suite/tests/ports.test
@@ -422,7 +422,25 @@
           (and (eq? faulty-str str)
                (string=? from "UTF-32")
                (string=? to "ISO-8859-1")
-               (string? (strerror errno))))))))
+               (string? (strerror errno)))))))
+
+  (pass-if "peek-char [latin-1]"
+    (let ((p (with-fluids ((%default-port-encoding #f))
+               (open-input-string "hello, world"))))
+      (and (char=? (peek-char p) #\h)
+           (char=? (peek-char p) #\h)
+           (char=? (peek-char p) #\h)
+           (= (port-line p) 0)
+           (= (port-column p) 0))))
+
+  (pass-if "peek-char [utf-8]"
+    (let ((p (with-fluids ((%default-port-encoding "UTF-8"))
+               (open-input-string "안녕하세요"))))
+      (and (char=? (peek-char p) #\안)
+           (char=? (peek-char p) #\안)
+           (char=? (peek-char p) #\안)
+           (= (port-line p) 0)
+           (= (port-column p) 0)))))
 
 (with-test-prefix "call-with-output-string"
 


hooks/post-receive
-- 
GNU Guile



reply via email to

[Prev in Thread] Current Thread [Next in Thread]