guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 02/02: VM support for string-set!; slimmer read-string


From: Andy Wingo
Subject: [Guile-commits] 02/02: VM support for string-set!; slimmer read-string
Date: Thu, 9 Mar 2017 11:27:54 -0500 (EST)

wingo pushed a commit to branch master
in repository guile.

commit c525aa6d95a9e19b260d6b99dbf6d73939d76585
Author: Andy Wingo <address@hidden>
Date:   Thu Mar 9 17:22:08 2017 +0100

    VM support for string-set!; slimmer read-string
    
    * doc/ref/vm.texi (Inlined Scheme Instructions): Add string-set!.
    * libguile/vm-engine.c (string-set!): New opcode.
    * module/ice-9/rdelim.scm (read-string): Reimplement in terms of a
      geometrically growing list of strings, to reduce total heap usage when
      reading big files.
    * module/language/cps/compile-bytecode.scm (compile-function): Add
      string-set! support.
    * module/language/cps/types.scm (string-set!): Update for &u64 index.
    * module/language/tree-il/compile-cps.scm (convert): Unbox index to
      string-set!.
    * module/system/vm/assembler.scm (system): Export string-set!.
---
 doc/ref/vm.texi                          |  6 ++++++
 libguile/vm-engine.c                     | 33 ++++++++++++++++++++++++++++++--
 module/ice-9/rdelim.scm                  | 19 ++++++++++++------
 module/language/cps/compile-bytecode.scm |  3 +++
 module/language/cps/types.scm            |  4 ++--
 module/language/tree-il/compile-cps.scm  |  2 +-
 module/system/vm/assembler.scm           |  1 +
 7 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/doc/ref/vm.texi b/doc/ref/vm.texi
index 4e42bb9..ac3889f 100644
--- a/doc/ref/vm.texi
+++ b/doc/ref/vm.texi
@@ -1355,6 +1355,12 @@ and store it in @var{dst}.  The @var{idx} value should 
be an unboxed
 unsigned 64-bit integer.
 @end deftypefn
 
address@hidden Instruction {} string-set! s8:@var{dst} s8:@var{idx} s8:@var{src}
+Store the character @var{src} into the string @var{dst} at index
address@hidden  The @var{idx} value should be an unboxed unsigned 64-bit
+integer.
address@hidden deftypefn
+
 @deftypefn Instruction {} cons s8:@var{dst} s8:@var{car} s8:@var{cdr}
 Cons @var{car} and @var{cdr}, and store the result in @var{dst}.
 @end deftypefn
diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c
index 9ddda8f..89c6bc5 100644
--- a/libguile/vm-engine.c
+++ b/libguile/vm-engine.c
@@ -2263,7 +2263,8 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       RETURN (SCM_MAKE_CHAR (scm_i_string_ref (str, c_idx)));
     }
 
-  /* No string-set! instruction, as there is no good fast path there.  */
+  /* string-set! instruction is currently number 192.  Probably need to
+     reorder before releasing.  */
 
   /* string->number dst:12 src:12
    *
@@ -4006,7 +4007,35 @@ VM_NAME (scm_i_thread *thread, struct scm_vm *vp,
       BR_F64_ARITHMETIC (>=);
     }
 
-  VM_DEFINE_OP (192, unused_192, NULL, NOP)
+  /* string-set! dst:8 idx:8 src:8
+   *
+   * Store the character SRC into the string DST at index IDX.
+   */
+  VM_DEFINE_OP (192, string_set, "string-set!", OP1 (X8_S8_S8_S8))
+    {
+      scm_t_uint8 dst, idx, src;
+      SCM str, chr;
+      scm_t_uint64 c_idx;
+
+      UNPACK_8_8_8 (op, dst, idx, src);
+      str = SP_REF (dst);
+      c_idx = SP_REF_U64 (idx);
+      chr = SP_REF (src);
+
+      VM_VALIDATE_STRING (str, "string-ref");
+      VM_VALIDATE_INDEX (c_idx, scm_i_string_length (str), "string-ref");
+
+      /* If needed we can speed this up and only SYNC_IP +
+         scm_i_string_writing if the string isn't already a non-shared
+         stringbuf.  */
+      SYNC_IP ();
+      scm_i_string_start_writing (str);
+      scm_i_string_set_x (str, c_idx, SCM_CHAR (chr));
+      scm_i_string_stop_writing ();
+
+      NEXT (1);
+    }
+
   VM_DEFINE_OP (193, unused_193, NULL, NOP)
   VM_DEFINE_OP (194, unused_194, NULL, NOP)
   VM_DEFINE_OP (195, unused_195, NULL, NOP)
diff --git a/module/ice-9/rdelim.scm b/module/ice-9/rdelim.scm
index a406f4e..d2cd081 100644
--- a/module/ice-9/rdelim.scm
+++ b/module/ice-9/rdelim.scm
@@ -156,13 +156,20 @@ If the COUNT argument is present, treat it as a limit to 
the number of
 characters to read.  By default, there is no limit."
    ((#:optional (port (current-input-port)))
     ;; Fast path.
-    ;; This creates more garbage than using 'string-set!' as in
-    ;; 'read-string!', but currently that is faster nonetheless.
-    (let loop ((chars '()))
+    (let loop ((head (make-string 30)) (pos 0) (tail '()))
       (let ((char (read-char port)))
-        (if (eof-object? char)
-            (list->string (reverse! chars))
-            (loop (cons char chars))))))
+        (cond
+         ((eof-object? char)
+          (let ((head (substring head 0 pos)))
+            (if (null? tail)
+                (substring head 0 pos)
+                (string-concatenate-reverse tail head pos))))
+         (else
+          (string-set! head pos char)
+          (if (< (1+ pos) (string-length head))
+              (loop head (1+ pos) tail)
+              (loop (make-string (* (string-length head) 2)) 0
+                    (cons head tail))))))))
    ((port count)
     ;; Slower path.
     (let loop ((chars '())
diff --git a/module/language/cps/compile-bytecode.scm 
b/module/language/cps/compile-bytecode.scm
index 98d6354..c283eb6 100644
--- a/module/language/cps/compile-bytecode.scm
+++ b/module/language/cps/compile-bytecode.scm
@@ -322,6 +322,9 @@
         (($ $primcall 'vector-set!/immediate (vector index value))
          (emit-vector-set!/immediate asm (from-sp (slot vector))
                                      (constant index) (from-sp (slot value))))
+        (($ $primcall 'string-set! (string index char))
+         (emit-string-set! asm (from-sp (slot string)) (from-sp (slot index))
+                           (from-sp (slot char))))
         (($ $primcall 'set-car! (pair value))
          (emit-set-car! asm (from-sp (slot pair)) (from-sp (slot value))))
         (($ $primcall 'set-cdr! (pair value))
diff --git a/module/language/cps/types.scm b/module/language/cps/types.scm
index a66e4b8..fd592ea 100644
--- a/module/language/cps/types.scm
+++ b/module/language/cps/types.scm
@@ -707,12 +707,12 @@ minimum, and maximum."
 
 (define-type-checker (string-set! s idx val)
   (and (check-type s &string 0 *max-size-t*)
-       (check-type idx &exact-integer 0 *max-size-t*)
+       (check-type idx &u64 0 *max-size-t*)
        (check-type val &char 0 *max-codepoint*)
        (< (&max idx) (&min s))))
 (define-type-inferrer (string-set! s idx val)
   (restrict! s &string (1+ (&min/0 idx)) *max-size-t*)
-  (restrict! idx &exact-integer 0 (1- (&max/size s)))
+  (restrict! idx &u64 0 (1- (&max/size s)))
   (restrict! val &char 0 *max-codepoint*))
 
 (define-simple-type-checker (string-length &string))
diff --git a/module/language/tree-il/compile-cps.scm 
b/module/language/tree-il/compile-cps.scm
index 9e7dc72..3e1c1d4 100644
--- a/module/language/tree-il/compile-cps.scm
+++ b/module/language/tree-il/compile-cps.scm
@@ -652,7 +652,7 @@
                      cps idx 'scm->u64
                      (lambda (cps idx)
                        (have-args cps (list obj idx)))))))
-                ((vector-set! struct-set!)
+                ((vector-set! struct-set! string-set!)
                  (match args
                    ((obj idx val)
                     (unbox-arg
diff --git a/module/system/vm/assembler.scm b/module/system/vm/assembler.scm
index aa803ac..9ac3fa6 100644
--- a/module/system/vm/assembler.scm
+++ b/module/system/vm/assembler.scm
@@ -134,6 +134,7 @@
             emit-fluid-set!
             emit-string-length
             emit-string-ref
+            emit-string-set!
             emit-string->number
             emit-string->symbol
             emit-symbol->keyword



reply via email to

[Prev in Thread] Current Thread [Next in Thread]