[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 05/08: Port refactors to help Scheme peek-char
From: |
Andy Wingo |
Subject: |
[Guile-commits] 05/08: Port refactors to help Scheme peek-char |
Date: |
Wed, 04 May 2016 10:43:53 +0000 |
wingo pushed a commit to branch wip-port-refactor
in repository guile.
commit 36e32138f8559ac4e35ce97ba747b4dc58ba70d3
Author: Andy Wingo <address@hidden>
Date: Wed May 4 11:40:22 2016 +0200
Port refactors to help Scheme peek-char
* libguile/ports.h (scm_sys_port_encoding, scm_sys_set_port_encoding):
New functions, to expose port encodings as symbols directly to (ice-9
ports).
(scm_port_maybe_consume_initial_byte_order_mark): New function.
* libguile/ports.c (scm_port_encoding): Dispatch to %port-encoding.
(scm_set_port_encoding_x): Dispatch to %set-port-encoding!.
(port_maybe_consume_initial_byte_order_mark): New helper, factored out
of peek_codepoint.
(scm_port_maybe_consume_initial_byte_order_mark, peek_codepoint): Call
port_maybe_consume_initial_byte_order_mark.
* module/ice-9/ports.scm (port-encoding): Implement in Scheme.
---
libguile/ports.c | 100 ++++++++++++++++++++++++++++++++----------------
libguile/ports.h | 3 ++
module/ice-9/ports.scm | 7 ++++
3 files changed, 77 insertions(+), 33 deletions(-)
diff --git a/libguile/ports.c b/libguile/ports.c
index a35a3a1..da1af2f 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1222,40 +1222,48 @@ scm_i_set_port_encoding_x (SCM port, const char
*encoding)
close_iconv_descriptors (prev);
}
-SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
+SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
(SCM port),
- "Returns, as a string, the character encoding that @var{port}\n"
+ "Returns, as a symbol, the character encoding that @var{port}\n"
"uses to interpret its input and output.\n")
-#define FUNC_NAME s_scm_port_encoding
+#define FUNC_NAME s_scm_sys_port_encoding
{
SCM_VALIDATE_PORT (1, port);
- return scm_symbol_to_string (SCM_PTAB_ENTRY (port)->encoding);
+ return SCM_PTAB_ENTRY (port)->encoding;
}
#undef FUNC_NAME
-SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
+SCM
+scm_port_encoding (SCM port)
+{
+ return scm_symbol_to_string (scm_sys_port_encoding (port));
+}
+
+SCM_DEFINE (scm_sys_set_port_encoding_x, "%set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"port I/O. New ports are created with the encoding\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
-#define FUNC_NAME s_scm_set_port_encoding_x
+#define FUNC_NAME s_scm_sys_set_port_encoding_x
{
- char *enc_str;
-
SCM_VALIDATE_PORT (1, port);
- SCM_VALIDATE_STRING (2, enc);
+ SCM_VALIDATE_SYMBOL (2, enc);
- enc_str = scm_to_latin1_string (enc);
- scm_i_set_port_encoding_x (port, enc_str);
- free (enc_str);
+ scm_i_set_port_encoding_x (port, scm_i_symbol_chars (enc));
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
+SCM
+scm_set_port_encoding_x (SCM port, SCM enc)
+{
+ return scm_sys_set_port_encoding_x (port, scm_string_to_symbol (enc));
+}
+
scm_t_string_failed_conversion_handler
scm_i_string_failed_conversion_handler (SCM conversion_strategy)
{
@@ -1545,6 +1553,50 @@ scm_c_read (SCM port, void *buffer, size_t size)
}
#undef FUNC_NAME
+static int
+port_maybe_consume_initial_byte_order_mark (SCM port, scm_t_wchar codepoint,
+ size_t len)
+{
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+ scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+ if (!pti->at_stream_start_for_bom_read) return 0;
+
+ /* Record that we're no longer at stream start. */
+ pti->at_stream_start_for_bom_read = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_write = 0;
+
+ if (codepoint != SCM_UNICODE_BOM) return 0;
+
+ /* If we just read a BOM in an encoding that recognizes them, then
+ silently consume it. */
+ if (scm_is_eq (pt->encoding, sym_UTF_8)
+ || scm_is_eq (pt->encoding, sym_UTF_16)
+ || scm_is_eq (pt->encoding, sym_UTF_32))
+ {
+ scm_port_buffer_did_take (pt->read_buf, len);
+ return 1;
+ }
+
+ return 0;
+}
+
+SCM_DEFINE (scm_port_maybe_consume_initial_byte_order_mark,
+ "port-maybe-consume-initial-byte-order-mark", 3, 0, 0,
+ (SCM port, SCM codepoint, SCM len),
+ "")
+#define FUNC_NAME s_scm_port_maybe_consume_initial_byte_order_mark
+{
+ SCM_VALIDATE_PORT (1, port);
+ return scm_from_bool
+ (port_maybe_consume_initial_byte_order_mark
+ (port,
+ SCM_CHARP (codepoint) ? SCM_CHAR (codepoint) : EOF,
+ scm_to_size_t (len)));
+}
+#undef FUNC_NAME
+
/* Update the line and column number of PORT after consumption of C. */
static inline void
update_port_lf (scm_t_wchar c, SCM port)
@@ -1807,7 +1859,6 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t
*len)
{
int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
- scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
if (scm_is_eq (pt->encoding, sym_UTF_8))
err = peek_utf8_codepoint (port, codepoint, len);
@@ -1818,25 +1869,8 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t
*len)
if (SCM_LIKELY (err == 0))
{
- if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
- {
- /* Record that we're no longer at stream start. */
- pti->at_stream_start_for_bom_read = 0;
- if (pt->rw_random)
- pti->at_stream_start_for_bom_write = 0;
-
- /* If we just read a BOM in an encoding that recognizes them,
- then silently consume it and read another code point. */
- if (SCM_UNLIKELY
- (*codepoint == SCM_UNICODE_BOM
- && (scm_is_eq (pt->encoding, sym_UTF_8)
- || scm_is_eq (pt->encoding, sym_UTF_16)
- || scm_is_eq (pt->encoding, sym_UTF_32))))
- {
- scm_port_buffer_did_take (pt->read_buf, *len);
- return peek_codepoint (port, codepoint, len);
- }
- }
+ if (port_maybe_consume_initial_byte_order_mark (port, *codepoint, *len))
+ return peek_codepoint (port, codepoint, len);
}
else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
{
@@ -3162,7 +3196,7 @@ scm_init_ports (void)
/* The following bindings are used early in boot-9.scm. */
/* Used by `include'. */
- scm_c_define_gsubr (s_scm_set_port_encoding_x, 2, 0, 0,
+ scm_c_define_gsubr ("set-port-encoding!", 2, 0, 0,
(scm_t_subr) scm_set_port_encoding_x);
scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0,
(scm_t_subr) scm_eof_object_p);
diff --git a/libguile/ports.h b/libguile/ports.h
index 1572e40..cec6021 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -285,12 +285,15 @@ SCM_INTERNAL void scm_i_set_default_port_encoding (const
char *encoding);
SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void);
SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy);
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
+SCM_INTERNAL SCM scm_sys_port_encoding (SCM port);
+SCM_INTERNAL SCM scm_sys_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_encoding (SCM port);
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
SCM_API SCM scm_port_conversion_strategy (SCM port);
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
/* Input. */
+SCM_INTERNAL SCM scm_port_maybe_consume_initial_byte_order_mark (SCM, SCM,
SCM);
SCM_API int scm_get_byte_or_eof (SCM port);
SCM_API int scm_peek_byte_or_eof (SCM port);
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index db1c6f7..1bf13be 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -154,6 +154,13 @@
+(define (port-encoding port)
+ "Return, as a string, the character encoding that @var{port} uses to
+interpret its input and output."
+ (symbol->string (%port-encoding port)))
+
+
+
(define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
(define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
(define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))
- [Guile-commits] branch wip-port-refactor updated (1309ab8 -> d7a111b), Andy Wingo, 2016/05/04
- [Guile-commits] 06/08: Initial peek-char implementation in Scheme, Andy Wingo, 2016/05/04
- [Guile-commits] 01/08: Changes to Scheme fill-input corresponding to C, Andy Wingo, 2016/05/04
- [Guile-commits] 02/08: Minor tweak to Scheme peek-byte., Andy Wingo, 2016/05/04
- [Guile-commits] 07/08: Add integer->char and char->integer opcodes, Andy Wingo, 2016/05/04
- [Guile-commits] 03/08: Port encoding internally represented as symbol, Andy Wingo, 2016/05/04
- [Guile-commits] 05/08: Port refactors to help Scheme peek-char,
Andy Wingo <=
- [Guile-commits] 08/08: Spead tweaks to Scheme peek-char, Andy Wingo, 2016/05/04
- [Guile-commits] 04/08: Port conversion strategies internally are symbols, Andy Wingo, 2016/05/04