guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 05/08: Port refactors to help Scheme peek-char


From: Andy Wingo
Subject: [Guile-commits] 05/08: Port refactors to help Scheme peek-char
Date: Wed, 04 May 2016 10:43:53 +0000

wingo pushed a commit to branch wip-port-refactor
in repository guile.

commit 36e32138f8559ac4e35ce97ba747b4dc58ba70d3
Author: Andy Wingo <address@hidden>
Date:   Wed May 4 11:40:22 2016 +0200

    Port refactors to help Scheme peek-char
    
    * libguile/ports.h (scm_sys_port_encoding, scm_sys_set_port_encoding):
      New functions, to expose port encodings as symbols directly to (ice-9
      ports).
      (scm_port_maybe_consume_initial_byte_order_mark): New function.
    * libguile/ports.c (scm_port_encoding): Dispatch to %port-encoding.
      (scm_set_port_encoding_x): Dispatch to %set-port-encoding!.
      (port_maybe_consume_initial_byte_order_mark): New helper, factored out
      of peek_codepoint.
      (scm_port_maybe_consume_initial_byte_order_mark, peek_codepoint): Call
      port_maybe_consume_initial_byte_order_mark.
    * module/ice-9/ports.scm (port-encoding): Implement in Scheme.
---
 libguile/ports.c       |  100 ++++++++++++++++++++++++++++++++----------------
 libguile/ports.h       |    3 ++
 module/ice-9/ports.scm |    7 ++++
 3 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/libguile/ports.c b/libguile/ports.c
index a35a3a1..da1af2f 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1222,40 +1222,48 @@ scm_i_set_port_encoding_x (SCM port, const char 
*encoding)
     close_iconv_descriptors (prev);
 }
 
-SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
+SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
            (SCM port),
-           "Returns, as a string, the character encoding that @var{port}\n"
+           "Returns, as a symbol, the character encoding that @var{port}\n"
            "uses to interpret its input and output.\n")
-#define FUNC_NAME s_scm_port_encoding
+#define FUNC_NAME s_scm_sys_port_encoding
 {
   SCM_VALIDATE_PORT (1, port);
 
-  return scm_symbol_to_string (SCM_PTAB_ENTRY (port)->encoding);
+  return SCM_PTAB_ENTRY (port)->encoding;
 }
 #undef FUNC_NAME
 
-SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
+SCM
+scm_port_encoding (SCM port)
+{
+  return scm_symbol_to_string (scm_sys_port_encoding (port));
+}
+
+SCM_DEFINE (scm_sys_set_port_encoding_x, "%set-port-encoding!", 2, 0, 0,
            (SCM port, SCM enc),
            "Sets the character encoding that will be used to interpret all\n"
            "port I/O.  New ports are created with the encoding\n"
            "appropriate for the current locale if @code{setlocale} has \n"
            "been called or ISO-8859-1 otherwise\n"
            "and this procedure can be used to modify that encoding.\n")
-#define FUNC_NAME s_scm_set_port_encoding_x
+#define FUNC_NAME s_scm_sys_set_port_encoding_x
 {
-  char *enc_str;
-
   SCM_VALIDATE_PORT (1, port);
-  SCM_VALIDATE_STRING (2, enc);
+  SCM_VALIDATE_SYMBOL (2, enc);
 
-  enc_str = scm_to_latin1_string (enc);
-  scm_i_set_port_encoding_x (port, enc_str);
-  free (enc_str);
+  scm_i_set_port_encoding_x (port, scm_i_symbol_chars (enc));
 
   return SCM_UNSPECIFIED;
 }
 #undef FUNC_NAME
 
+SCM
+scm_set_port_encoding_x (SCM port, SCM enc)
+{
+  return scm_sys_set_port_encoding_x (port, scm_string_to_symbol (enc));
+}
+
 scm_t_string_failed_conversion_handler
 scm_i_string_failed_conversion_handler (SCM conversion_strategy)
 {
@@ -1545,6 +1553,50 @@ scm_c_read (SCM port, void *buffer, size_t size)
 }
 #undef FUNC_NAME
 
+static int
+port_maybe_consume_initial_byte_order_mark (SCM port, scm_t_wchar codepoint,
+                                            size_t len)
+{
+  scm_t_port *pt = SCM_PTAB_ENTRY (port);
+  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
+
+  if (!pti->at_stream_start_for_bom_read) return 0;
+
+  /* Record that we're no longer at stream start. */
+  pti->at_stream_start_for_bom_read = 0;
+  if (pt->rw_random)
+    pti->at_stream_start_for_bom_write = 0;
+
+  if (codepoint != SCM_UNICODE_BOM) return 0;
+
+  /* If we just read a BOM in an encoding that recognizes them, then
+     silently consume it. */
+  if (scm_is_eq (pt->encoding, sym_UTF_8)
+      || scm_is_eq (pt->encoding, sym_UTF_16)
+      || scm_is_eq (pt->encoding, sym_UTF_32))
+    {
+      scm_port_buffer_did_take (pt->read_buf, len);
+      return 1;
+    }
+
+  return 0;
+}
+
+SCM_DEFINE (scm_port_maybe_consume_initial_byte_order_mark,
+            "port-maybe-consume-initial-byte-order-mark", 3, 0, 0,
+            (SCM port, SCM codepoint, SCM len),
+            "")
+#define FUNC_NAME s_scm_port_maybe_consume_initial_byte_order_mark
+{
+  SCM_VALIDATE_PORT (1, port);
+  return scm_from_bool
+    (port_maybe_consume_initial_byte_order_mark
+     (port,
+      SCM_CHARP (codepoint) ? SCM_CHAR (codepoint) : EOF,
+      scm_to_size_t (len)));
+}
+#undef FUNC_NAME
+
 /* Update the line and column number of PORT after consumption of C.  */
 static inline void
 update_port_lf (scm_t_wchar c, SCM port)
@@ -1807,7 +1859,6 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t 
*len)
 {
   int err;
   scm_t_port *pt = SCM_PTAB_ENTRY (port);
-  scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
 
   if (scm_is_eq (pt->encoding, sym_UTF_8))
     err = peek_utf8_codepoint (port, codepoint, len);
@@ -1818,25 +1869,8 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t 
*len)
 
   if (SCM_LIKELY (err == 0))
     {
-      if (SCM_UNLIKELY (pti->at_stream_start_for_bom_read))
-        {
-          /* Record that we're no longer at stream start. */
-          pti->at_stream_start_for_bom_read = 0;
-          if (pt->rw_random)
-            pti->at_stream_start_for_bom_write = 0;
-
-          /* If we just read a BOM in an encoding that recognizes them,
-             then silently consume it and read another code point. */
-          if (SCM_UNLIKELY
-              (*codepoint == SCM_UNICODE_BOM
-               && (scm_is_eq (pt->encoding, sym_UTF_8)
-                   || scm_is_eq (pt->encoding, sym_UTF_16)
-                   || scm_is_eq (pt->encoding, sym_UTF_32))))
-            {
-              scm_port_buffer_did_take (pt->read_buf, *len);
-              return peek_codepoint (port, codepoint, len);
-            }
-        }
+      if (port_maybe_consume_initial_byte_order_mark (port, *codepoint, *len))
+        return peek_codepoint (port, codepoint, len);
     }
   else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
     {
@@ -3162,7 +3196,7 @@ scm_init_ports (void)
   /* The following bindings are used early in boot-9.scm.  */
 
   /* Used by `include'.  */
-  scm_c_define_gsubr (s_scm_set_port_encoding_x, 2, 0, 0,
+  scm_c_define_gsubr ("set-port-encoding!", 2, 0, 0,
                       (scm_t_subr) scm_set_port_encoding_x);
   scm_c_define_gsubr (s_scm_eof_object_p, 1, 0, 0,
                       (scm_t_subr) scm_eof_object_p);
diff --git a/libguile/ports.h b/libguile/ports.h
index 1572e40..cec6021 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -285,12 +285,15 @@ SCM_INTERNAL void scm_i_set_default_port_encoding (const 
char *encoding);
 SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void);
 SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy);
 SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
+SCM_INTERNAL SCM scm_sys_port_encoding (SCM port);
+SCM_INTERNAL SCM scm_sys_set_port_encoding_x (SCM port, SCM encoding);
 SCM_API SCM scm_port_encoding (SCM port);
 SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
 SCM_API SCM scm_port_conversion_strategy (SCM port);
 SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
 
 /* Input.  */
+SCM_INTERNAL SCM scm_port_maybe_consume_initial_byte_order_mark (SCM, SCM, 
SCM);
 SCM_API int scm_get_byte_or_eof (SCM port);
 SCM_API int scm_peek_byte_or_eof (SCM port);
 SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index db1c6f7..1bf13be 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -154,6 +154,13 @@
 
 
 
+(define (port-encoding port)
+  "Return, as a string, the character encoding that @var{port} uses to
+interpret its input and output."
+  (symbol->string (%port-encoding port)))
+
+
+
 (define-syntax-rule (port-buffer-bytevector buf) (vector-ref buf 0))
 (define-syntax-rule (port-buffer-cur buf) (vector-ref buf 1))
 (define-syntax-rule (port-buffer-end buf) (vector-ref buf 2))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]