[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] GNU Guile branch, master, updated. release_1-9-6-150-gde
From: |
Michael Gran |
Subject: |
[Guile-commits] GNU Guile branch, master, updated. release_1-9-6-150-gdea901d |
Date: |
Wed, 13 Jan 2010 05:13:05 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".
http://git.savannah.gnu.org/cgit/guile.git/commit/?id=dea901d66e46041f96d3d3a0f95bf0ab209387c9
The branch, master has been updated
via dea901d66e46041f96d3d3a0f95bf0ab209387c9 (commit)
from 8470b3f45b48bf627642e8f41938492be4eacf2c (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit dea901d66e46041f96d3d3a0f95bf0ab209387c9
Author: Michael Gran <address@hidden>
Date: Tue Jan 12 21:02:41 2010 -0800
Reader option for R6RS hex escapes
This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings. When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters. Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).
* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
(iprin1): use new macro PRINT_CHAR_ESCAPE
* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define
* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
(SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
(scm_read_string): parse R6RS hex string escapes
(scm_read_character): parse R6RS hex character escapes
* test-suite/tests/chars.test (with-read-options): new procedure
(R6RS hex escapes): new tests
* test-suite/tests/strings.test (with-read-options): new procedure
(R6RS hex escapes): new tests
-----------------------------------------------------------------------
Summary of changes:
libguile/print.c | 106 ++++++++++++++++++++++++++++-------------
libguile/private-options.h | 8 ++-
libguile/read.c | 88 +++++++++++++++++++++++-----------
test-suite/tests/chars.test | 44 +++++++++++++++++
test-suite/tests/strings.test | 96 +++++++++++++++++++++++++++++++++++--
5 files changed, 274 insertions(+), 68 deletions(-)
diff --git a/libguile/print.c b/libguile/print.c
index aef575d..dcf28c7 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -409,6 +409,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display,
g_display);
static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
+
+/* Print a character as an octal or hex escape. */
+#define PRINT_CHAR_ESCAPE(i, port) \
+ do \
+ { \
+ if (!SCM_R6RS_ESCAPES_P) \
+ scm_intprint (i, 8, port); \
+ else \
+ { \
+ scm_puts ("x", port); \
+ scm_intprint (i, 16, port); \
+ } \
+ } \
+ while (0)
+
+
void
scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
@@ -488,7 +504,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE (i, port);
}
else
{
@@ -507,12 +523,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE (i, port);
}
}
else
/* Character is a non-graphical character. */
- scm_intprint (i, 8, port);
+ PRINT_CHAR_ESCAPE (i, port);
}
else
scm_i_charprint (i, port);
@@ -579,9 +595,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
- size_t i, j, len;
+ size_t i, len;
static char const hex[] = "0123456789abcdef";
- char buf[8];
+ char buf[9];
scm_putc ('"', port);
@@ -647,37 +663,61 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
/* Character is graphic but unrepresentable in
this port's encoding or is not graphic. */
- if (ch <= 0xFF)
+ if (!SCM_R6RS_ESCAPES_P)
{
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- j = i + 1;
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
}
- else if (ch > 0xFFFF)
+ else
{
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- j = i + 1;
+ scm_t_wchar ch2 = ch;
+
+ /* Print an R6RS variable-length hex escape:
"\xNNNN;"
+ */
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
}
}
}
diff --git a/libguile/private-options.h b/libguile/private-options.h
index 703ca8a..40d40fb 100644
--- a/libguile/private-options.h
+++ b/libguile/private-options.h
@@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
#if SCM_ENABLE_ELISP
#define SCM_ELISP_VECTORS_P scm_read_opts[4].val
#define SCM_ESCAPED_PARENS_P scm_read_opts[5].val
-#define SCM_N_READ_OPTIONS 6
+#endif
+#define SCM_R6RS_ESCAPES_P scm_read_opts[6].val
+
+#if SCM_ENABLE_ELISP
+#define SCM_N_READ_OPTIONS 7
#else
-#define SCM_N_READ_OPTIONS 4
+#define SCM_N_READ_OPTIONS 5
#endif
#endif /* PRIVATE_OPTIONS */
diff --git a/libguile/read.c b/libguile/read.c
index 011684b..9e66cce 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -76,6 +76,8 @@ scm_t_option scm_read_opts[] = {
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
"Support `\\(' and `\\)' in strings."},
#endif
+ { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
+ "Use R6RS variable-length character and string hex escapes."},
{ 0, },
};
@@ -412,32 +414,37 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
/* Read a hexadecimal number NDIGITS in length. Put its value into the
variable
- C. */
-#define SCM_READ_HEX_ESCAPE(ndigits) \
- do \
- { \
- scm_t_wchar a; \
- size_t i = 0; \
- c = 0; \
- while (i < ndigits) \
- { \
- a = scm_getc (port); \
- if (a == EOF) \
- goto str_eof; \
- if ('0' <= a && a <= '9') \
- a -= '0'; \
- else if ('A' <= a && a <= 'F') \
- a = a - 'A' + 10; \
- else if ('a' <= a && a <= 'f') \
- a = a - 'a' + 10; \
- else \
- { \
- c = a; \
- goto bad_escaped; \
- } \
- c = c * 16 + a; \
- i ++; \
- } \
+ C. If TERMINATOR is non-null, terminate early if the TERMINATOR character
is
+ found. */
+#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
+ do \
+ { \
+ scm_t_wchar a; \
+ size_t i = 0; \
+ c = 0; \
+ while (i < ndigits) \
+ { \
+ a = scm_getc (port); \
+ if (a == EOF) \
+ goto str_eof; \
+ if (terminator \
+ && (a == (scm_t_wchar) terminator) \
+ && (i > 0)) \
+ break; \
+ if ('0' <= a && a <= '9') \
+ a -= '0'; \
+ else if ('A' <= a && a <= 'F') \
+ a = a - 'A' + 10; \
+ else if ('a' <= a && a <= 'f') \
+ a = a - 'a' + 10; \
+ else \
+ { \
+ c = a; \
+ goto bad_escaped; \
+ } \
+ c = c * 16 + a; \
+ i ++; \
+ } \
} while (0)
static SCM
@@ -511,13 +518,16 @@ scm_read_string (int chr, SCM port)
c = '\010';
break;
case 'x':
- SCM_READ_HEX_ESCAPE (2);
+ if (SCM_R6RS_ESCAPES_P)
+ SCM_READ_HEX_ESCAPE (10, ';');
+ else
+ SCM_READ_HEX_ESCAPE (2, '\0');
break;
case 'u':
- SCM_READ_HEX_ESCAPE (4);
+ SCM_READ_HEX_ESCAPE (4, '\0');
break;
case 'U':
- SCM_READ_HEX_ESCAPE (6);
+ SCM_READ_HEX_ESCAPE (6, '\0');
break;
default:
bad_escaped:
@@ -828,6 +838,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
}
}
+ if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
+ {
+ SCM p;
+ scm_t_wchar chr;
+
+ /* Convert from hex, skipping the initial 'x' character in CHARNAME */
+ p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
+ scm_from_uint (16));
+ if (SCM_I_INUMP (p))
+ {
+ scm_t_wchar c = SCM_I_INUM (p);
+ if (SCM_IS_UNICODE_CHAR (c))
+ return SCM_MAKE_CHAR (c);
+ else
+ scm_i_input_error (FUNC_NAME, port,
+ "out-of-range hex character escape: ~a",
+ scm_list_1 (charname));
+ }
+ }
+
/* The names of characters should never have non-Latin1
characters. */
if (scm_i_is_narrow_string (charname)
diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test
index 509f070..25c82e8 100644
--- a/test-suite/tests/chars.test
+++ b/test-suite/tests/chars.test
@@ -29,6 +29,16 @@
(cons #t "out-of-range"))
+;; Run THUNK in the context of the reader options OPTS
+(define (with-read-options opts thunk)
+ (let ((saved-options (read-options)))
+ (dynamic-wind
+ (lambda ()
+ (read-options opts))
+ thunk
+ (lambda ()
+ (read-options saved-options)))))
+
(with-test-prefix "basic char handling"
(with-test-prefix "evaluator"
@@ -313,3 +323,37 @@
(with-output-to-string (lambda () (write #\soh)))
"#\\soh"))))
+(with-test-prefix "R6RS hex escapes"
+
+ (pass-if "one-digit hex escape"
+ (eqv? (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "#\\xA" read)))
+ (integer->char #x0A)))
+
+ (pass-if "two-digit hex escape"
+ (eqv? (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "#\\xFF" read)))
+ (integer->char #xFF)))
+
+ (pass-if "four-digit hex escape"
+ (eqv? (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "#\\x00FF" read)))
+ (integer->char #xFF)))
+
+ (pass-if "eight-digit hex escape"
+ (eqv? (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "#\\x00006587" read)))
+ (integer->char #x6587)))
+ (pass-if "write R6RS escapes"
+ (string=?
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-output-to-string
+ (lambda ()
+ (write (integer->char #x80))))))
+ "#\\x80")))
+
diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test
index e04c026..47ae93a 100644
--- a/test-suite/tests/strings.test
+++ b/test-suite/tests/strings.test
@@ -2,23 +2,24 @@
;;;; Jim Blandy <address@hidden> --- August 1999
;;;;
;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software
Foundation, Inc.
-;;;;
+;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
-;;;;
+;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
-;;;;
+;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
USA
(define-module (test-strings)
- #:use-module (test-suite lib))
+ #:use-module (test-suite lib)
+ #:use-module (srfi srfi-1))
(define exception:read-only-string
(cons 'misc-error "^string is read-only"))
@@ -29,6 +30,16 @@
(define exception:wrong-type-arg
(cons #t "Wrong type"))
+;; Run THUNK in the context of the reader options OPTS
+(define (with-read-options opts thunk)
+ (let ((saved-options (read-options)))
+ (dynamic-wind
+ (lambda ()
+ (read-options opts))
+ thunk
+ (lambda ()
+ (read-options saved-options)))))
+
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
@@ -229,6 +240,83 @@
(pass-if "Guile extensions backslash escapes"
(string=? "\0" (string #\nul))))
+
+(with-test-prefix "R6RS hex escapes"
+
+ (pass-if-exception "non-hex char in two-digit hex-escape"
+ exception:illegal-escape
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x0g;\"" read))))
+
+ (pass-if-exception "non-hex char in four-digit hex-escape"
+ exception:illegal-escape
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x000g;\"" read))))
+
+ (pass-if-exception "non-hex char in six-digit hex-escape"
+ exception:illegal-escape
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x00000g;\"" read))))
+
+ (pass-if-exception "no semicolon at termination of one-digit hex-escape"
+ exception:illegal-escape
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x0\"" read))))
+
+ (pass-if-exception "no semicolon at termination of three-digit hex-escape"
+ exception:illegal-escape
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x000\"" read))))
+
+ (pass-if "two-digit hex escape"
+ (eqv?
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)))
+ (integer->char #xff)))
+
+ (pass-if "four-digit hex escape"
+ (eqv?
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)))
+ (integer->char #x0100)))
+
+ (pass-if "six-digit hex escape"
+ (eqv?
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)))
+ (integer->char #x010300)))
+
+ (pass-if "escaped characters match non-escaped ASCII characters"
+ (string=?
+ (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
+ "ABC"))
+
+ (pass-if "write R6RS escapes"
+
+ (let* ((s1 (apply string
+ (map integer->char '(#x8 ; backspace
+ #x20 ; space
+ #x30 ; zero
+ #x40 ; at sign
+ ))))
+ (s2 (with-read-options '(r6rs-hex-escapes)
+ (lambda ()
+ (with-output-to-string
+ (lambda () (write s1)))))))
+ (lset= eqv?
+ (string->list s2)
+ (list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))))
+
;;
;; string?
;;
hooks/post-receive
--
GNU Guile
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Guile-commits] GNU Guile branch, master, updated. release_1-9-6-150-gdea901d,
Michael Gran <=