From 75263eb13ee10deac5b81eed0de927402200924e Mon Sep 17 00:00:00 2001 From: Peter Bex
Date: Tue, 8 Dec 2015 21:50:26 +0100 Subject: [PATCH] Improve irregex matching performance We add type declarations to "cset-contains?" to ensure unsafe versions of char comparison functions, vector access and car/cdr are used. This hacks up irregex core even further, but it's worthwhile: in some cases a regex match can run twice as fast. Conflicts: NEWS --- NEWS | 3 +++ irregex-core.scm | 32 ++++++++++++++++++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index 3d06be8..86d1248 100644 --- a/NEWS +++ b/NEWS @@ -52,6 +52,9 @@ last to resolve ambiguities (#1214). - Compiler rewrites for char{<,>,<=,>=,=}? are now safe (#1122). +- Core libraries + - Irregex matching performance has been improved. + - Unit "posix": The following posix procedures now work on port objects: file-stat, file-size, file-owner, file-permissions, file-modification-time, file-access-time, file-change-time, diff --git a/irregex-core.scm b/irregex-core.scm index c4dbea2..c58300f 100644 --- a/irregex-core.scm +++ b/irregex-core.scm @@ -3676,20 +3676,24 @@ (vector->list cset)))) (define (cset-contains? cset ch) - (let ((len (vector-length cset))) - (case len - ((0) #f) - ((1) (let ((range (vector-ref cset 0))) - (and (char<=? ch (cdr range)) (char<=? (car range) ch)))) - (else (let lp ((lower 0) (upper len)) - (let* ((middle (quotient (+ upper lower) 2)) - (range (vector-ref cset middle))) - (cond ((char (cdr range) ch) - (let ((next (+ middle 1))) - (and (< next upper) (lp next upper)))) - ((char ch (car range)) - (and (< lower middle) (lp lower middle))) - (else #t)))))))) + ;; CHICKEN: Type assumption added for performance. This is a very + ;; hot code path, so every type improvement matters. + (assume ((cset (vector-of (pair char char))) + (ch char)) + (let ((len (vector-length cset))) + (case len + ((0) #f) + ((1) (let ((range (vector-ref cset 0))) + (and (char<=? ch (cdr range)) (char<=? (car range) ch)))) + (else (let lp ((lower 0) (upper len)) + (let* ((middle (quotient (+ upper lower) 2)) + (range (vector-ref cset middle))) + (cond ((char (cdr range) ch) + (let ((next (+ middle 1))) + (and (< next upper) (lp next upper)))) + ((char ch (car range)) + (and (< lower middle) (lp lower middle))) + (else #t))))))))) (define (char-ranges-union a b) (cons (if (char<=? (car a) (car b)) (car a) (car b)) -- 2.1.4