guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 06/08: Initial peek-char implementation in Scheme


From: Andy Wingo
Subject: [Guile-commits] 06/08: Initial peek-char implementation in Scheme
Date: Wed, 04 May 2016 10:43:53 +0000

wingo pushed a commit to branch wip-port-refactor
in repository guile.

commit 2ba638092fc890cd33416c6adcbc107e5f5cd0d5
Author: Andy Wingo <address@hidden>
Date:   Wed May 4 11:48:05 2016 +0200

    Initial peek-char implementation in Scheme
    
    * module/ice-9/ports.scm (EILSEQ, decoding-error, peek-char-and-len/utf8):
      (peek-char-and-len/iso-8859-1, peek-char-and-len/iconv):
      (peek-char-and-len, %peek-char): New definitions.  Missing iconv1 for
      peek-char, but enough to benchmark.
---
 module/ice-9/ports.scm |  123 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 1bf13be..9774e46 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -255,6 +255,129 @@ interpret its input and output."
                 (bytevector-u8-ref (port-buffer-bytevector buf)
                                    (port-buffer-cur buf))))))))
 
+;; GNU/Linux definition; fixme?
+(define-syntax EILSEQ (identifier-syntax 84))
+
+(define-syntax-rule (decoding-error subr port)
+  (throw 'decoding-error subr "input decoding error" EILSEQ port))
+
+(define-inlinable (peek-char-and-len/utf8 port)
+  (define (bad-utf8 len)
+    (if (eq? (port-conversion-strategy port) 'substitute)
+        (values #\? len)
+        (decoding-error "peek-char" port)))
+  (let ((first-byte (peek-byte port)))
+    (cond
+     ((eq? first-byte the-eof-object)
+      (values first-byte 0))
+     ((< first-byte #x80)
+      (values (integer->char first-byte) 1))
+     ((<= #xc2 first-byte #xdf)
+      (call-with-values (lambda () (fill-input port 2))
+        (lambda (buf buffering)
+          (let ((bv (port-buffer-bytevector buf))
+                (cur (port-buffer-cur buf)))
+            (define (ref n)
+              (bytevector-u8-ref bv (+ cur 1)))
+            (when (or (< buffering 2)
+                      (not (= (logand (ref 1) #xc0) #x80)))
+              (bad-utf8 1))
+            (values (integer->char
+                     (logior (ash (logand first-byte #x1f) 6)
+                             (logand (ref 1) #x3f)))
+                    2)))))
+     ((= (logand first-byte #xf0) #xe0)
+      (call-with-values (lambda () (fill-input port 3))
+        (lambda (buf buffering)
+          (let ((bv (port-buffer-bytevector buf))
+                (cur (port-buffer-cur buf)))
+            (define (ref n)
+              (bytevector-u8-ref bv (+ cur 1)))
+            (when (or (< buffering 2)
+                      (not (= (logand (ref 1) #xc0) #x80))
+                      (and (eq? first-byte #xe0) (< (ref 1) #xa0))
+                      (and (eq? first-byte #xed) (< (ref 1) #x9f)))
+              (bad-utf8 1))
+            (when (or (< buffering 3)
+                      (not (= (logand (ref 2) #xc0) #x80)))
+              (bad-utf8 2))
+            (values (integer->char
+                     (logior (ash (logand first-byte #x0f) 12)
+                             (ash (logand (ref 1) #x3f) 6)
+                             (logand (ref 2) #x3f)))
+                    3)))))
+     ((<= #xf0 first-byte #xf4)
+      (call-with-values (lambda () (fill-input port 4))
+        (lambda (buf buffering)
+          (let ((bv (port-buffer-bytevector buf))
+                (cur (port-buffer-cur buf)))
+            (define (ref n)
+              (bytevector-u8-ref bv (+ cur 1)))
+            (when (or (< buffering 2)
+                      (not (= (logand (ref 1) #xc0) #x80))
+                      (and (eq? first-byte #xf0) (< (ref 1) #x90))
+                      (and (eq? first-byte #xf4) (< (ref 1) #x8f)))
+              (bad-utf8 1))
+            (when (or (< buffering 3)
+                      (not (= (logand (ref 2) #xc0) #x80)))
+              (bad-utf8 2))
+            (when (or (< buffering 4)
+                      (not (= (logand (ref 3) #xc0) #x80)))
+              (bad-utf8 3))
+            (values (integer->char
+                     (logior (ash (logand first-byte #x07) 18)
+                             (ash (logand (ref 1) #x3f) 12)
+                             (ash (logand (ref 2) #x3f) 6)
+                             (logand (ref 3) #x3f)))
+                    4)))))
+     (else
+      (bad-utf8 1)))))
+
+(define-inlinable (peek-char-and-len/iso-8859-1 port)
+  (let ((byte-or-eof (peek-byte port)))
+    (if (eof-object? byte-or-eof)
+        (values byte-or-eof 0)
+        (values (integer->char byte-or-eof) 1))))
+
+(define (peek-char-and-len/iconv port)
+  (define (bad-input len)
+    (if (eq? (port-conversion-strategy port) 'substitute)
+        (values #\? len)
+        (decoding-error "peek-char" port)))
+  (let lp ((prev-input-size 0))
+    (let* ((input-size (1+ prev-input-size))
+           (buf (fill-input port input-size))
+           (cur (port-buffer-cur buf)))
+      (cond
+       ((<= (- (port-buffer-end buf) cur) prev-input-size)
+        (if (zero? prev-input-size)
+            (values the-eof-object 0)
+            (bad-input prev-input-size)))
+       ;; fixme: takes port arg???
+       ((iconv1 port (port-buffer-bytevector buf) cur input-size
+                (port-conversion-strategy port))
+        => (lambda (char)
+             (values char input-size)))
+       (else
+        (lp input-size))))))
+
+(define-inlinable (peek-char-and-len port)
+  (let ((enc (%port-encoding port)))
+    (call-with-values
+        (lambda ()
+          (case enc
+            ((UTF-8) (peek-char-and-len/utf8 port))
+            ((ISO-8859-1) (peek-char-and-len/iso-8859-1 port))
+            (else (peek-char-and-len/iconv port))))
+      (lambda (char len)
+        (if (port-maybe-consume-initial-byte-order-mark port char len)
+            (peek-char-and-len port)
+            (values char len))))))
+
+(define (%peek-char port)
+  (call-with-values (lambda () (peek-char-and-len port))
+    (lambda (char len)
+      char)))
 
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]