[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 02/03: Port to Scheme of new BOM handling
From: |
Andy Wingo |
Subject: |
[Guile-commits] 02/03: Port to Scheme of new BOM handling |
Date: |
Thu, 05 May 2016 20:59:10 +0000 |
wingo pushed a commit to branch wip-port-refactor
in repository guile.
commit 6d15a71e8f7c61d67b8e6a3f9ac751ac918382b6
Author: Andy Wingo <address@hidden>
Date: Thu May 5 22:54:58 2016 +0200
Port to Scheme of new BOM handling
* libguile/ports.c (scm_specialize_port_encoding_x)
(scm_port_clear_stream_start_for_bom_read): New functions exported
to (ice-9 ports).
* module/ice-9/ports.scm (clear-stream-start-for-bom-read):
(fill-input, peek-char-and-len): Rework to handle BOM in fill-input
instead of once per peek-char.
---
libguile/ports.c | 49 +++++++++++++++++++++++++++++++++++++
module/ice-9/ports.scm | 63 ++++++++++++++++++++++++++++++++++++++----------
2 files changed, 99 insertions(+), 13 deletions(-)
diff --git a/libguile/ports.c b/libguile/ports.c
index e8c79bc..e823436 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1137,6 +1137,22 @@ prepare_iconv_descriptors (SCM port, SCM encoding)
(encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
}
+SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
+SCM_DEFINE (scm_specialize_port_encoding_x,
+ "specialize-port-encoding!", 2, 0, 0,
+ (SCM port, SCM encoding),
+ "")
+#define FUNC_NAME s_scm_specialize_port_encoding_x
+{
+ SCM_VALIDATE_PORT (1, port);
+ SCM_VALIDATE_SYMBOL (2, encoding);
+
+ prepare_iconv_descriptors (port, encoding);
+
+ return SCM_UNSPECIFIED;
+}
+#undef FUNC_NAME
+
scm_t_iconv_descriptors *
scm_i_port_iconv_descriptors (SCM port)
{
@@ -2351,6 +2367,39 @@ port_clear_stream_start_for_bom_read (SCM port, enum
bom_io_mode io_mode)
return 0;
}
+SCM_INTERNAL SCM scm_port_clear_stream_start_for_bom_read (SCM port);
+SCM_DEFINE (scm_port_clear_stream_start_for_bom_read,
+ "port-clear-stream-start-for-bom-read", 1, 0, 0,
+ (SCM port),
+ "")
+#define FUNC_NAME s_scm_port_clear_stream_start_for_bom_read
+{
+ scm_t_port_internal *pti;
+ scm_t_port *pt;
+
+ SCM_VALIDATE_PORT (1, port);
+
+ pti = SCM_PORT_GET_INTERNAL (port);
+ if (!pti->at_stream_start_for_bom_read)
+ return 0;
+
+ /* Maybe slurp off a byte-order marker. */
+ pt = SCM_PTAB_ENTRY (port);
+ pti->at_stream_start_for_bom_read = 0;
+
+ if (!pti->at_stream_start_for_bom_read)
+ return SCM_BOOL_F;
+
+ /* Maybe slurp off a byte-order marker. */
+ pt = SCM_PTAB_ENTRY (port);
+ pti->at_stream_start_for_bom_read = 0;
+ if (pt->rw_random)
+ pti->at_stream_start_for_bom_write = 0;
+
+ return SCM_BOOL_T;
+}
+#undef FUNC_NAME
+
static void
port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode)
{
diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 0c42331..41eb866 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -203,7 +203,50 @@ interpret its input and output."
(error "bad return from port read function" read))
read))
+(define utf8-bom #vu8(#xEF #xBB #xBF))
+(define utf16be-bom #vu8(#xFE #xFF))
+(define utf16le-bom #vu8(#xFF #xFE))
+(define utf32be-bom #vu8(#x00 #x00 #xFE #xFF))
+(define utf32le-bom #vu8(#xFF #xFE #x00 #x00))
+
+(define (clear-stream-start-for-bom-read port io-mode)
+ (define (maybe-consume-bom bom)
+ (and (eq? (peek-byte port) (bytevector-u8-ref bom 0))
+ (let* ((buf (fill-input port (bytevector-length bom)))
+ (bv (port-buffer-bytevector buf))
+ (cur (port-buffer-cur bv)))
+ (and (<= (bytevector-length bv)
+ (- (port-buffer-end buf) cur))
+ (let lp ((i 1))
+ (if (= i (bytevector-length bom))
+ (begin
+ (set-port-buffer-cur! buf (+ cur i))
+ #t)
+ (and (eq? (bytevector-u8-ref bv (+ cur i))
+ (bytevector-u8-ref bom i))
+ (lp (1+ i)))))))))
+ (when (and (port-clear-stream-start-for-bom-read port)
+ (eq? io-mode 'text))
+ (case (port-encoding port)
+ ((UTF-8)
+ (maybe-consume-bom utf8-bom))
+ ((UTF-16)
+ (cond
+ ((maybe-consume-bom utf16le-bom)
+ (specialize-port-encoding! port 'UTF-16LE))
+ (else
+ (maybe-consume-bom utf16be-bom)
+ (specialize-port-encoding! port 'UTF-16BE))))
+ ((UTF-32)
+ (cond
+ ((maybe-consume-bom utf32le-bom)
+ (specialize-port-encoding! port 'UTF-32LE))
+ (else
+ (maybe-consume-bom utf32be-bom)
+ (specialize-port-encoding! port 'UTF-32BE)))))))
+
(define* (fill-input port #:optional (minimum-buffering 1))
+ (clear-stream-start-for-bom-read port 'text)
(let* ((buf (port-read-buffer port))
(cur (port-buffer-cur buf))
(buffered (- (port-buffer-end buf) cur)))
@@ -360,19 +403,13 @@ interpret its input and output."
(if (eq? first-byte the-eof-object)
(values first-byte 0)
(let ((first-byte (logand first-byte #xff)))
- (call-with-values
- (lambda ()
- (case (%port-encoding port)
- ((UTF-8)
- (peek-char-and-len/utf8 port first-byte))
- ((ISO-8859-1)
- (peek-char-and-len/iso-8859-1 port first-byte))
- (else
- (peek-char-and-len/iconv port first-byte))))
- (lambda (char len)
- (if (port-maybe-consume-initial-byte-order-mark port char len)
- (peek-char-and-len port)
- (values char len))))))))
+ (case (%port-encoding port)
+ ((UTF-8)
+ (peek-char-and-len/utf8 port first-byte))
+ ((ISO-8859-1)
+ (peek-char-and-len/iso-8859-1 port first-byte))
+ (else
+ (peek-char-and-len/iconv port first-byte)))))))
(define (%peek-char port)
(call-with-values (lambda () (peek-char-and-len port))