[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 13/23: add latin1 chars and strings to eports
From: |
Andy Wingo |
Subject: |
[Guile-commits] 13/23: add latin1 chars and strings to eports |
Date: |
Thu, 24 Mar 2016 14:26:04 +0000 |
wingo pushed a commit to branch wip-ethreads
in repository guile.
commit d11341d1c420b0d8ccda36c2140261d2ef1cbfbc
Author: Andy Wingo <address@hidden>
Date: Tue Mar 27 00:14:08 2012 +0200
add latin1 chars and strings to eports
* module/ice-9/eports.scm: Add functions that deal in latin1.
---
module/ice-9/eports.scm | 156 +++++++++++++++++++++++++++++++++++++++-------
1 files changed, 132 insertions(+), 24 deletions(-)
diff --git a/module/ice-9/eports.scm b/module/ice-9/eports.scm
index 1622986..2ca8656 100644
--- a/module/ice-9/eports.scm
+++ b/module/ice-9/eports.scm
@@ -43,9 +43,19 @@
get-bytevector-n
get-bytevector-n!
get-bytevector-delimited
- get-u8
put-u8
- put-bytevector))
+ put-bytevector
+
+ get-latin1-char
+ putback-latin1-char
+ lookahead-latin1-char
+ get-latin1-string-some
+ putback-latin1-string
+ get-latin1-string-n
+ get-latin1-string-n!
+ get-latin1-string-delimited
+ put-latin1-char
+ put-latin1-string))
(define-record-type <eport>
(make-eport fd readbuf writebuf file-port)
@@ -312,12 +322,13 @@
(flush-buffer buf len)
ret)))))
-;; Read bytes from EPORT until the byte DELIMITER is seen. Return
-;; two values: a bytevector of the bytes read, not including the
-;; delimiter, and the delimiter, or the EOF object if EOF was
+;; Read bytes from EPORT, continuing to read until calling PREDICATE on
+;; the byte returns a true value. Return two values: a bytevector of
+;; the bytes read, not including the delimiter, and the delimiter, or #f
+;; if the byte limit was reached, or the EOF object if EOF was
;; encountered first.
;;
-(define* (get-bytevector-delimited eport predicate #:key limit)
+(define* (get-bytevector-delimited eport predicate #:key max-bytes)
(define (collect-result prev prev-len bv)
(if (null? prev-len)
bv
@@ -331,12 +342,10 @@
(bytevector-copy! (car prev) 0 out (- prev-len len) len)
(lp (cdr prev) (- prev-len len)))))))))
(define (found-delimiter buf start len delimiter prev prev-len)
- (when (and limit (> (+ len prev-len) limit))
- (error "Input too long" limit (+ len prev-len)))
(let ((ret (make-bytevector len)))
(bytevector-copy! (buf-bv buf) start ret 0 len)
;; Plus one for the delimiter, if present
- (flush-buffer buf (if (eof-object? delimiter) len (1+ len)))
+ (flush-buffer buf (if (integer? delimiter) (1+ len) len))
(values (collect-result prev prev-len ret)
delimiter)))
(let ((buf (eport-readbuf eport)))
@@ -346,11 +355,11 @@
(size (bytevector-length bv)))
(let lp ((prev '()) (prev-len 0))
(when (= (buf-cur buf) (buf-end buf))
- (when (and limit (> prev-len limit))
- (error "Input too long" limit prev-len))
(fill-input eport))
- (let ((cur (buf-cur buf))
- (end (buf-end buf)))
+ (let* ((cur (buf-cur buf))
+ (end (if max-bytes
+ (min (+ cur (- max-bytes prev-len)) (buf-end buf))
+ (buf-end buf))))
(let search ((i cur))
(if (< i end)
(if (predicate (bytevector-u8-ref bv i))
@@ -359,17 +368,21 @@
prev prev-len)
(search (1+ i)))
(let ((len (- end cur)))
- (if (zero? len)
- ;; EOF
- (if (zero? prev-len)
- (values the-eof-object
- the-eof-object)
- (found-delimiter buf cur len the-eof-object
- prev prev-len))
- (let ((ret (make-bytevector len)))
- (bytevector-copy! bv cur ret 0 len)
- (flush-buffer buf len)
- (lp (cons ret prev) (+ len prev-len))))))))))))
+ (cond
+ ((and max-bytes (= (+ len prev-len) max-bytes))
+ ;; Limit reached
+ (found-delimiter buf cur len #f
+ prev prev-len))
+ ((zero? len)
+ ;; EOF
+ (found-delimiter buf cur len the-eof-object
+ prev prev-len))
+ (else
+ ;; End of buffered input
+ (let ((ret (make-bytevector len)))
+ (bytevector-copy! bv cur ret 0 len)
+ (flush-buffer buf len)
+ (lp (cons ret prev) (+ len prev-len)))))))))))))
;; Read COUNT bytes into bytevector DST, starting at offset START.
;; Return the actual number of bytes read, which may be less if EOF was
@@ -445,3 +458,98 @@
(when (< written count)
(wait-for-writable eport)
(lp (+ start written) (- count written))))))))))
+
+;; Get the next latin1 (ISO-8859-1) character from EPORT, or EOF.
+;;
+(define (get-latin1-char eport)
+ (let ((x (get-u8 eport)))
+ (if (integer? x)
+ (integer->char x)
+ x)))
+
+;; Put a latin1 character back into the buf of the port. Note that you
+;; are only guaranteed to be able to put back as many bytes as your last
+;; fill-input was able to read.
+;;
+(define (putback-latin1-char eport c)
+ (putback-u8 eport (char->integer c)))
+
+;; Peek at the next latin1 character from EPORT, blocking if necessary.
+;;
+(define (lookahead-latin1-char eport)
+ (let ((x (lookahead-u8 eport)))
+ (if (integer? x)
+ (integer->char x)
+ x)))
+
+(define (get-latin1-string-n eport count)
+ (let* ((bv (get-bytevector-n eport count))
+ (len (bytevector-length bv))
+ (str (make-string len)))
+ (let lp ((n 0))
+ (when (< n len)
+ (string-set! str n (integer->char (bytevector-u8-ref bv n)))
+ (lp (1+ n))))
+ str))
+
+(define (get-latin1-string-n! eport dst start count)
+ (let lp ((start start) (count count) (total 0))
+ (let* ((buf (eport-readbuf eport))
+ (bv (buf-bv buf))
+ (size (bytevector-length bv))
+ (cur (buf-cur buf))
+ (len (- (buf-end buf) cur)))
+ (unless buf
+ (error "not a readable port" eport))
+ (if (<= count len)
+ (begin
+ (let lp ((n 0))
+ (when (< n count)
+ (string-set! dst (+ start n)
+ (integer->char (bytevector-u8-ref bv (+ n cur))))
+ (lp (1+ n))))
+ (flush-buffer buf count)
+ (+ total count))
+ (begin
+ (let lp ((n 0))
+ (when (< n len)
+ (string-set! dst (+ start n)
+ (integer->char (bytevector-u8-ref bv (+ n cur))))
+ (lp (1+ n))))
+ (flush-buffer buf len)
+ (if (zero? (fill-input eport))
+ (+ total len)
+ (lp (+ start len) (- count len) (+ total len))))))))
+
+;; Read latin1 (ISO-8859-1) characters from EPORT, continuing to read
+;; until calling PREDICATE on the character returns a true value, or EOF
+;; is reached, or MAX-CHARS is reached.
+;;
+;; Return two values: a string of the characters read, not including the
+;; delimiter, and the delimiter as a character, or #f if MAX-CHARS was
+;; reached, or the EOF object if no more bytes were available.
+;;
+(define* (get-latin1-string-delimited eport predicate #:key max-chars)
+ (call-with-values (lambda ()
+ (get-bytevector-delimited
+ eport
+ (lambda (u8) (predicate (integer->char u8)))
+ #:max-bytes max-chars))
+ (lambda (bv delimiter)
+ (values (utf8->string bv)
+ (if (integer? delimiter)
+ (integer->char delimiter)
+ delimiter)))))
+
+(define (put-latin1-char eport c)
+ (put-u8 eport (char->integer c)))
+
+(define (put-latin1-string eport str)
+ (if (string-every (lambda (c) (< (char->integer c) 128)) str)
+ (put-bytevector eport (string->utf8 eport))
+ ;; Need a string->latin1.
+ (let ((len (string-length str)))
+ (let lp ((n 0))
+ (when (< n len)
+ (put-u8 eport (char->integer (string-ref str n)))
+ (lp (1+ n)))))))
- [Guile-commits] branch wip-ethreads created (now 4dc952f), Andy Wingo, 2016/03/24
- [Guile-commits] 04/23: add (ice-9 ethreads), Andy Wingo, 2016/03/24
- [Guile-commits] 09/23: eports: some more exports, Andy Wingo, 2016/03/24
- [Guile-commits] 12/23: (web server ethreads): Use a large backlog., Andy Wingo, 2016/03/24
- [Guile-commits] 13/23: add latin1 chars and strings to eports,
Andy Wingo <=
- [Guile-commits] 18/23: (web server ethreads) TCP_NODELAY tweak, Andy Wingo, 2016/03/24
- [Guile-commits] 05/23: http: allow custom read-line / continuation-line? functions, Andy Wingo, 2016/03/24
- [Guile-commits] 06/23: setsockopt can take an fd, Andy Wingo, 2016/03/24
- [Guile-commits] 10/23: EOF fix for continuation-line?, Andy Wingo, 2016/03/24
- [Guile-commits] 19/23: nio: add non-blocking connect, Andy Wingo, 2016/03/24
- [Guile-commits] 08/23: add #:limit to get-bytevector-delimited, Andy Wingo, 2016/03/24
- [Guile-commits] 11/23: socket: TCP_CORK, TCP_NODELAY, Andy Wingo, 2016/03/24
- [Guile-commits] 23/23: virtualize read/write/close operations in <eport>, Andy Wingo, 2016/03/24
- [Guile-commits] 15/23: (web server ethreads): more use of latin1 accessors, Andy Wingo, 2016/03/24
- [Guile-commits] 01/23: add (ice-9 nio), Andy Wingo, 2016/03/24