guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 02/03: Port to Scheme of new BOM handling


From: Andy Wingo
Subject: [Guile-commits] 02/03: Port to Scheme of new BOM handling
Date: Thu, 05 May 2016 20:59:10 +0000

wingo pushed a commit to branch wip-port-refactor
in repository guile.

commit 6d15a71e8f7c61d67b8e6a3f9ac751ac918382b6
Author: Andy Wingo <address@hidden>
Date:   Thu May 5 22:54:58 2016 +0200

    Port to Scheme of new BOM handling
    
    * libguile/ports.c (scm_specialize_port_encoding_x)
      (scm_port_clear_stream_start_for_bom_read): New functions exported
      to (ice-9 ports).
    * module/ice-9/ports.scm (clear-stream-start-for-bom-read):
      (fill-input, peek-char-and-len): Rework to handle BOM in fill-input
      instead of once per peek-char.
---
 libguile/ports.c       |   49 +++++++++++++++++++++++++++++++++++++
 module/ice-9/ports.scm |   63 ++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/libguile/ports.c b/libguile/ports.c
index e8c79bc..e823436 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1137,6 +1137,22 @@ prepare_iconv_descriptors (SCM port, SCM encoding)
     (encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
 }
 
+SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
+SCM_DEFINE (scm_specialize_port_encoding_x,
+            "specialize-port-encoding!", 2, 0, 0,
+            (SCM port, SCM encoding),
+            "")
+#define FUNC_NAME s_scm_specialize_port_encoding_x
+{
+  SCM_VALIDATE_PORT (1, port);
+  SCM_VALIDATE_SYMBOL (2, encoding);
+
+  prepare_iconv_descriptors (port, encoding);
+
+  return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
 scm_t_iconv_descriptors *
 scm_i_port_iconv_descriptors (SCM port)
 {
@@ -2351,6 +2367,39 @@ port_clear_stream_start_for_bom_read (SCM port, enum 
bom_io_mode io_mode)
   return 0;
 }
 
+SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_read (SCM port);
+SCM_DEFINE (scm_port_clear_stream_start_for_bom_read,
+            "port-clear-stream-start-for-bom-read", 1, 0, 0,
+            (SCM port),
+            "")
+#define FUNC_NAME s_scm_port_clear_stream_start_for_bom_read
+{
+  scm_t_port_internal *pti;
+  scm_t_port *pt;
+
+  SCM_VALIDATE_PORT (1, port);
+
+  pti = SCM_PORT_GET_INTERNAL (port);
+  if (!pti->at_stream_start_for_bom_read)
+    return 0;
+
+  /* Maybe slurp off a byte-order marker.  */
+  pt = SCM_PTAB_ENTRY (port);
+  pti->at_stream_start_for_bom_read = 0;
+
+  if (!pti->at_stream_start_for_bom_read)
+    return SCM_BOOL_F;
+
+  /* Maybe slurp off a byte-order marker.  */
+  pt = SCM_PTAB_ENTRY (port);
+  pti->at_stream_start_for_bom_read = 0;
+  if (pt->rw_random)
+    pti->at_stream_start_for_bom_write = 0;
+
+  return SCM_BOOL_T;
+}
+#undef FUNC_NAME
+
 static void
 port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode)
 {
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 0c42331..41eb866 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -203,7 +203,50 @@ interpret its input and output."
       (error "bad return from port read function" read))
     read))
 
+(define utf8-bom #vu8(#xEF #xBB #xBF))
+(define utf16be-bom #vu8(#xFE #xFF))
+(define utf16le-bom #vu8(#xFF #xFE))
+(define utf32be-bom #vu8(#x00 #x00 #xFE #xFF))
+(define utf32le-bom #vu8(#xFF #xFE #x00 #x00))
+
+(define (clear-stream-start-for-bom-read port io-mode)
+  (define (maybe-consume-bom bom)
+    (and (eq? (peek-byte port) (bytevector-u8-ref bom 0))
+         (let* ((buf (fill-input port (bytevector-length bom)))
+                (bv (port-buffer-bytevector buf))
+                (cur (port-buffer-cur bv)))
+           (and (<= (bytevector-length bv)
+                    (- (port-buffer-end buf) cur))
+                (let lp ((i 1))
+                  (if (= i (bytevector-length bom))
+                      (begin
+                        (set-port-buffer-cur! buf (+ cur i))
+                        #t)
+                      (and (eq? (bytevector-u8-ref bv (+ cur i))
+                                (bytevector-u8-ref bom i))
+                           (lp (1+ i)))))))))
+  (when (and (port-clear-stream-start-for-bom-read port)
+             (eq? io-mode 'text))
+    (case (port-encoding port)
+      ((UTF-8)
+       (maybe-consume-bom utf8-bom))
+      ((UTF-16)
+       (cond
+        ((maybe-consume-bom utf16le-bom)
+         (specialize-port-encoding! port 'UTF-16LE))
+        (else
+         (maybe-consume-bom utf16be-bom)
+         (specialize-port-encoding! port 'UTF-16BE))))
+      ((UTF-32)
+       (cond
+        ((maybe-consume-bom utf32le-bom)
+         (specialize-port-encoding! port 'UTF-32LE))
+        (else
+         (maybe-consume-bom utf32be-bom)
+         (specialize-port-encoding! port 'UTF-32BE)))))))
+
 (define* (fill-input port #:optional (minimum-buffering 1))
+  (clear-stream-start-for-bom-read port 'text)
   (let* ((buf (port-read-buffer port))
          (cur (port-buffer-cur buf))
          (buffered (- (port-buffer-end buf) cur)))
@@ -360,19 +403,13 @@ interpret its input and output."
     (if (eq? first-byte the-eof-object)
         (values first-byte 0)
         (let ((first-byte (logand first-byte #xff)))
-          (call-with-values
-              (lambda ()
-                (case (%port-encoding port)
-                  ((UTF-8)
-                   (peek-char-and-len/utf8 port first-byte))
-                  ((ISO-8859-1)
-                   (peek-char-and-len/iso-8859-1 port first-byte))
-                  (else
-                   (peek-char-and-len/iconv port first-byte))))
-            (lambda (char len)
-              (if (port-maybe-consume-initial-byte-order-mark port char len)
-                  (peek-char-and-len port)
-                  (values char len))))))))
+          (case (%port-encoding port)
+            ((UTF-8)
+             (peek-char-and-len/utf8 port first-byte))
+            ((ISO-8859-1)
+             (peek-char-and-len/iso-8859-1 port first-byte))
+            (else
+             (peek-char-and-len/iconv port first-byte)))))))
 
 (define (%peek-char port)
   (call-with-values (lambda () (peek-char-and-len port))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]