guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 03/03: Speed refactors to Scheme %peek-char


From: Andy Wingo
Subject: [Guile-commits] 03/03: Speed refactors to Scheme %peek-char
Date: Sat, 7 May 2016 10:43:48 +0000 (UTC)

wingo pushed a commit to branch wip-port-refactor
in repository guile.

commit d77b50476a340338615aa9f9952a2001f5f139f1
Author: Andy Wingo <address@hidden>
Date:   Sat May 7 12:41:07 2016 +0200

    Speed refactors to Scheme %peek-char
    
    * module/ice-9/ports.scm (peek-bytes): New helper.
      (peek-byte): Use peek-bytes helper.
      (decoding-error): Don't inline; no need.
      (decode-utf8, bad-utf8-len): New helpers.
      (peek-char-and-len/utf8): Use new helpers.
      (peek-char-and-len): No fast paths, and not inline.  Peek-char has its
      own fast path.
      (%peek-char): Use helpers to make fast path.
---
 module/ice-9/ports.scm |  238 +++++++++++++++++++++++++++++-------------------
 1 file changed, 143 insertions(+), 95 deletions(-)

diff --git a/module/ice-9/ports.scm b/module/ice-9/ports.scm
index 27bcdd8..a222e83 100644
--- a/module/ice-9/ports.scm
+++ b/module/ice-9/ports.scm
@@ -286,94 +286,134 @@ interpret its input and output."
                         (lp buffered)
                         (values buf buffered)))))))))))))))
 
-(define-inlinable (peek-byte port)
+(define-inlinable (peek-bytes port count kfast kslow)
   (let* ((buf (port-read-buffer port))
-         (cur (port-buffer-cur buf)))
-    (if (< cur (port-buffer-end buf))
-        (bytevector-u8-ref (port-buffer-bytevector buf) cur)
-        (call-with-values (lambda () (fill-input port))
+         (cur (port-buffer-cur buf))
+         (buffered (- (port-buffer-end buf) cur)))
+    (if (<= count buffered)
+        (kfast buf (port-buffer-bytevector buf) cur buffered)
+        (call-with-values (lambda () (fill-input port count))
           (lambda (buf buffered)
-            (if (zero? buffered)
-                the-eof-object
-                (bytevector-u8-ref (port-buffer-bytevector buf)
-                                   (port-buffer-cur buf))))))))
-
-;; GNU/Linux definition; fixme?
-(define-syntax EILSEQ (identifier-syntax 84))
-
-(define-syntax-rule (decoding-error subr port)
+            (kslow buf (port-buffer-bytevector buf) (port-buffer-cur buf)
+                   buffered))))))
+
+(define (peek-byte port)
+  (peek-bytes port 1
+              (lambda (buf bv cur buffered)
+                (bytevector-u8-ref bv cur))
+              (lambda (buf bv cur buffered)
+                (and (> buffered 0)
+                     (bytevector-u8-ref bv cur)))))
+
+(define (decoding-error subr port)
+  ;; GNU/Linux definition; fixme?
+  (define EILSEQ 84)
   (throw 'decoding-error subr "input decoding error" EILSEQ port))
 
+(define-inlinable (decode-utf8 bv start avail u8_0 kt kf)
+  (cond
+   ((< u8_0 #x80)
+    (kt (integer->char u8_0) 1))
+   ((and (<= #xc2 u8_0 #xdf) (<= 2 avail))
+    (let ((u8_1 (bytevector-u8-ref bv (1+ start))))
+      (if (= (logand u8_1 #xc0) #x80)
+          (kt (integer->char
+               (logior (ash (logand u8_0 #x1f) 6)
+                       (logand u8_1 #x3f)))
+              2)
+          (kf))))
+   ((and (= (logand u8_0 #xf0) #xe0) (<= 3 avail))
+    (let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
+          (u8_2 (bytevector-u8-ref bv (+ start 2))))
+      (if (and (= (logand u8_1 #xc0) #x80)
+               (= (logand u8_2 #xc0) #x80)
+               (case u8_0
+                 ((#xe0) (>= u8_1 #xa0))
+                 ((#xed) (>= u8_1 #x9f))
+                 (else #t)))
+          (kt (integer->char
+               (logior (ash (logand u8_0 #x0f) 12)
+                       (ash (logand u8_1 #x3f) 6)
+                       (logand u8_2 #x3f)))
+              3)
+          (kf))))
+   ((and (<= #xf0 u8_0 #xf4) (<= 4 avail))
+    (let ((u8_1 (bytevector-u8-ref bv (+ start 1)))
+          (u8_2 (bytevector-u8-ref bv (+ start 2)))
+          (u8_3 (bytevector-u8-ref bv (+ start 3))))
+      (if (and (= (logand u8_1 #xc0) #x80)
+               (= (logand u8_2 #xc0) #x80)
+               (= (logand u8_3 #xc0) #x80)
+               (case u8_0
+                 ((#xf0) (>= u8_1 #x90))
+                 ((#xf4) (>= u8_1 #x8f))
+                 (else #t)))
+          (kt (integer->char
+               (logior (ash (logand u8_0 #x07) 18)
+                       (ash (logand u8_1 #x3f) 12)
+                       (ash (logand u8_2 #x3f) 6)
+                       (logand u8_3 #x3f)))
+              4)
+          (kf))))
+   (else (kf))))
+
+(define (bad-utf8-len bv cur buffering first-byte)
+  (define (ref n)
+    (bytevector-u8-ref bv (+ cur 1)))
+  (cond
+   ((< first-byte #x80) 0)
+   ((<= #xc2 first-byte #xdf)
+    (cond
+     ((< buffering 2) 1)
+     ((not (= (logand (ref 1) #xc0) #x80)) 1)
+     (else 0)))
+   ((= (logand first-byte #xf0) #xe0)
+    (cond
+     ((< buffering 2) 1)
+     ((not (= (logand (ref 1) #xc0) #x80)) 1)
+     ((and (eq? first-byte #xe0) (< (ref 1) #xa0)) 1)
+     ((and (eq? first-byte #xed) (< (ref 1) #x9f)) 1)
+     ((< buffering 3) 2)
+     ((not (= (logand (ref 2) #xc0) #x80)) 2)
+     (else 0)))
+   ((<= #xf0 first-byte #xf4)
+    (cond
+     ((< buffering 2) 1)
+     ((not (= (logand (ref 1) #xc0) #x80)) 1)
+     ((and (eq? first-byte #xf0) (< (ref 1) #x90)) 1)
+     ((and (eq? first-byte #xf4) (< (ref 1) #x8f)) 1)
+     ((< buffering 3) 2)
+     ((not (= (logand (ref 2) #xc0) #x80)) 2)
+     ((< buffering 4) 3)
+     ((not (= (logand (ref 3) #xc0) #x80)) 3)
+     (else 0)))
+   (else 1)))
+
 (define (peek-char-and-len/utf8 port first-byte)
   (define (bad-utf8 len)
     (if (eq? (port-conversion-strategy port) 'substitute)
         (values #\? len)
         (decoding-error "peek-char" port)))
-  (cond
-   ((< first-byte #x80)
-    (values (integer->char first-byte) 1))
-   ((<= #xc2 first-byte #xdf)
-    (call-with-values (lambda () (fill-input port 2))
-      (lambda (buf buffering)
-        (let ((bv (port-buffer-bytevector buf))
-              (cur (port-buffer-cur buf)))
-          (define (ref n)
-            (bytevector-u8-ref bv (+ cur 1)))
-          (when (or (< buffering 2)
-                    (not (= (logand (ref 1) #xc0) #x80)))
-            (bad-utf8 1))
-          (values (integer->char
-                   (logior (ash (logand first-byte #x1f) 6)
-                           (logand (ref 1) #x3f)))
-                  2)))))
-   ((= (logand first-byte #xf0) #xe0)
-    (call-with-values (lambda () (fill-input port 3))
-      (lambda (buf buffering)
-        (let ((bv (port-buffer-bytevector buf))
-              (cur (port-buffer-cur buf)))
-          (define (ref n)
-            (bytevector-u8-ref bv (+ cur 1)))
-          (when (or (< buffering 2)
-                    (not (= (logand (ref 1) #xc0) #x80))
-                    (and (eq? first-byte #xe0) (< (ref 1) #xa0))
-                    (and (eq? first-byte #xed) (< (ref 1) #x9f)))
-            (bad-utf8 1))
-          (when (or (< buffering 3)
-                    (not (= (logand (ref 2) #xc0) #x80)))
-            (bad-utf8 2))
-          (values (integer->char
-                   (logior (ash (logand first-byte #x0f) 12)
-                           (ash (logand (ref 1) #x3f) 6)
-                           (logand (ref 2) #x3f)))
-                  3)))))
-   ((<= #xf0 first-byte #xf4)
-    (call-with-values (lambda () (fill-input port 4))
-      (lambda (buf buffering)
-        (let ((bv (port-buffer-bytevector buf))
-              (cur (port-buffer-cur buf)))
-          (define (ref n)
-            (bytevector-u8-ref bv (+ cur 1)))
-          (when (or (< buffering 2)
-                    (not (= (logand (ref 1) #xc0) #x80))
-                    (and (eq? first-byte #xf0) (< (ref 1) #x90))
-                    (and (eq? first-byte #xf4) (< (ref 1) #x8f)))
-            (bad-utf8 1))
-          (when (or (< buffering 3)
-                    (not (= (logand (ref 2) #xc0) #x80)))
-            (bad-utf8 2))
-          (when (or (< buffering 4)
-                    (not (= (logand (ref 3) #xc0) #x80)))
-            (bad-utf8 3))
-          (values (integer->char
-                   (logior (ash (logand first-byte #x07) 18)
-                           (ash (logand (ref 1) #x3f) 12)
-                           (ash (logand (ref 2) #x3f) 6)
-                           (logand (ref 3) #x3f)))
-                  4)))))
-   (else
-    (bad-utf8 1))))
-
-(define-inlinable (peek-char-and-len/iso-8859-1 port first-byte)
+  (if (< first-byte #x80)
+      (values (integer->char first-byte) 1)
+      (call-with-values (lambda ()
+                          (fill-input port
+                                      (cond
+                                       ((<= #xc2 first-byte #xdf) 2)
+                                       ((= (logand first-byte #xf0) #xe0) 3)
+                                       (else 4))))
+        (lambda (buf buffering)
+          (let* ((bv (port-buffer-bytevector buf))
+                 (cur (port-buffer-cur buf)))
+            (define (bad-utf8)
+              (let ((len (bad-utf8-len bv cur buffering first-byte)))
+                (when (zero? len) (error "internal error"))
+                (if (eq? (port-conversion-strategy port) 'substitute)
+                    (values #\? len)
+                    (decoding-error "peek-char" port))))
+            (decode-utf8 bv cur buffering first-byte values bad-utf8))))))
+
+(define (peek-char-and-len/iso-8859-1 port first-byte)
   (values (integer->char first-byte) 1))
 
 (define (peek-char-and-len/iconv port first-byte)
@@ -398,25 +438,33 @@ interpret its input and output."
        (else
         (lp input-size))))))
 
-(define-inlinable (peek-char-and-len port)
+(define (peek-char-and-len port)
   (let ((first-byte (peek-byte port)))
-    (if (eq? first-byte the-eof-object)
-        (values first-byte 0)
-        (let ((first-byte (logand first-byte #xff)))
-          (case (%port-encoding port)
-            ((UTF-8)
-             (if (< first-byte #x80)
-                 (values (integer->char first-byte) 1)
-                 (peek-char-and-len/utf8 port first-byte)))
-            ((ISO-8859-1)
-             (peek-char-and-len/iso-8859-1 port first-byte))
-            (else
-             (peek-char-and-len/iconv port first-byte)))))))
+    (if (not first-byte)
+        (values the-eof-object 0)
+        (case (%port-encoding port)
+          ((UTF-8)
+           (peek-char-and-len/utf8 port first-byte))
+          ((ISO-8859-1)
+           (peek-char-and-len/iso-8859-1 port first-byte))
+          (else
+           (peek-char-and-len/iconv port first-byte))))))
 
 (define (%peek-char port)
-  (call-with-values (lambda () (peek-char-and-len port))
-    (lambda (char len)
-      char)))
+  (define (slow-path)
+    (call-with-values (lambda () (peek-char-and-len port))
+      (lambda (char len)
+        char)))
+  (define (fast-path buf bv cur buffered)
+    (let ((u8 (bytevector-u8-ref bv cur))
+          (enc (%port-encoding port)))
+      (case enc
+        ((UTF-8) (decode-utf8 bv cur buffered u8 (lambda (char len) char)
+                              slow-path))
+        ((ISO-8859-1) (integer->char u8))
+        (else (slow-path)))))
+  (peek-bytes port 1 fast-path
+              (lambda (buf bv cur buffered) (slow-path))))
 
 
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]