guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-13-2-g731


From: Andy Wingo
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-13-2-g73124c6
Date: Sun, 17 Oct 2010 19:18:10 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=73124c6c630e2cc1877adc118691b96c4aec2c5b

The branch, master has been updated
       via  73124c6c630e2cc1877adc118691b96c4aec2c5b (commit)
       via  15c9af8c71f54be0e28f852bb66b33c85660ab69 (commit)
      from  9546dc8cbf74ebab708a47a5080901213f02f3be (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 73124c6c630e2cc1877adc118691b96c4aec2c5b
Author: Andy Wingo <address@hidden>
Date:   Fri Oct 15 20:56:42 2010 +0200

    web/uri: reimplement for rfc 3986, add tests
    
    * module/web/uri.scm: Reimplement for RFC 3986.
    
    * module/Makefile.am: Add to build.
    
    * test-suite/Makefile.am:
    * test-suite/tests/web-uri.test: Add tests.

commit 15c9af8c71f54be0e28f852bb66b33c85660ab69
Author: Andy Wingo <address@hidden>
Date:   Fri Oct 15 20:51:29 2010 +0200

    import uri.scm from tekuti, from guile-www

-----------------------------------------------------------------------

Summary of changes:
 module/Makefile.am            |    6 +-
 module/web/uri.scm            |  300 +++++++++++++++++++++++++++++++++++++++++
 test-suite/Makefile.am        |    3 +-
 test-suite/tests/web-uri.test |  174 ++++++++++++++++++++++++
 4 files changed, 481 insertions(+), 2 deletions(-)
 create mode 100644 module/web/uri.scm
 create mode 100644 test-suite/tests/web-uri.test

diff --git a/module/Makefile.am b/module/Makefile.am
index a11a1d5..8086d82 100644
--- a/module/Makefile.am
+++ b/module/Makefile.am
@@ -60,7 +60,8 @@ SOURCES =                                     \
   $(ECMASCRIPT_LANG_SOURCES)                   \
   $(ELISP_LANG_SOURCES)                                \
   $(BRAINFUCK_LANG_SOURCES)                    \
-  $(LIB_SOURCES)
+  $(LIB_SOURCES)                               \
+  $(WEB_SOURCES)
 
 ## test.scm is not currently installed.
 EXTRA_DIST +=                                  \
@@ -346,6 +347,9 @@ LIB_SOURCES =                                       \
   texinfo/reflection.scm                       \
   texinfo/serialize.scm
 
+WEB_SOURCES =                                  \
+  web/uri.scm
+
 EXTRA_DIST += oop/ChangeLog-2008
 
 NOCOMP_SOURCES =                               \
diff --git a/module/web/uri.scm b/module/web/uri.scm
new file mode 100644
index 0000000..600e19a
--- /dev/null
+++ b/module/web/uri.scm
@@ -0,0 +1,300 @@
+;;;; (web uri) --- URI manipulation tools
+;;;;
+;;;; Copyright (C) 1997,2001,2002,2010 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;; 
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;; 
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+;;;;
+
+;;; Commentary:
+
+;; Based on (www url). To be documented.
+
+;;; Code:
+
+(define-module (web uri)
+  #:export (uri?
+            uri-scheme uri-userinfo uri-host uri-port
+            uri-path uri-query uri-fragment
+
+            build-uri
+            parse-uri unparse-uri
+            uri-decode uri-encode
+            split-and-decode-uri-path
+            encode-and-join-uri-path)
+  #:use-module (srfi srfi-9)
+  #:use-module (ice-9 regex)
+  #:use-module (ice-9 control)
+  #:use-module (rnrs bytevectors)
+  #:use-module (rnrs io ports))
+
+(define-record-type <uri>
+  (make-uri scheme userinfo host port path query fragment)
+  uri?
+  (scheme uri-scheme)
+  (userinfo uri-userinfo)
+  (host uri-host)
+  (port uri-port)
+  (path uri-path)
+  (query uri-query)
+  (fragment uri-fragment))
+
+(define (positive-exact-integer? port)
+  (and (number? port) (exact? port) (integer? port) (positive? port)))
+
+(define (validate-uri scheme userinfo host port path query fragment)
+  (cond
+   ((not (symbol? scheme))
+    (error "expected a symbol for the URI scheme" scheme))
+   ((and (or userinfo port) (not host))
+    (error "expected host, given userinfo or port"))
+   ((and port (not (positive-exact-integer? port)))
+    (error "expected integer port" port))
+   ((and host (or (not (string? host)) (not (valid-host? host))))
+    (error "expected valid host" host))
+   ((and userinfo (not (string? userinfo)))
+    (error "expected string for userinfo" userinfo))
+   ((not (string? path))
+    (error "expected string for path" path))
+   ((and host (not (string-null? path))
+         (not (eqv? (string-ref path 0) #\/)))
+    (error "expected path of absolute URI to start with a /" path))))
+
+(define* (build-uri scheme #:key userinfo host port (path "") query fragment
+                    (validate? #t))
+  (if validate?
+      (validate-uri scheme userinfo host port path query fragment))
+  (make-uri scheme userinfo host port path query fragment))
+
+;; See RFC 3986 #3.2.2 for comments on percent-encodings, IDNA (RFC
+;; 3490), and non-ASCII host names.
+;;
+(define ipv4-regexp
+  (make-regexp "^([0-9.]+)"))
+(define ipv6-regexp
+  (make-regexp "^\\[([0-9a-fA-F:]+)\\]+"))
+(define domain-label-regexp
+  (make-regexp "^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?$"))
+(define top-label-regexp
+  (make-regexp "^[a-zA-Z]([a-zA-Z0-9-]*[a-zA-Z0-9])?$"))
+
+(define (valid-host? host)
+  (cond
+   ((regexp-exec ipv4-regexp host)
+    => (lambda (m)
+         (false-if-exception (inet-pton AF_INET (match:substring m 1)))))
+   ((regexp-exec ipv6-regexp host)
+    => (lambda (m)
+         (false-if-exception (inet-pton AF_INET6 (match:substring m 1)))))
+   (else
+    (let ((labels (reverse (string-split host #\.))))
+      (and (pair? labels)
+           (regexp-exec top-label-regexp (car labels))
+           (and-map (lambda (label)
+                      (regexp-exec domain-label-regexp label))
+                    (cdr labels)))))))
+
+(define userinfo-pat
+  "[a-zA-Z0-9_.!~*'();:&=+$,-]+")
+(define host-pat
+  "[a-zA-Z0-9.-]+")
+(define port-pat
+  "[0-9]*")
+(define authority-regexp
+  (make-regexp
+   (format #f "^//((~a)@)?(~a)(:(~a))?$"
+           userinfo-pat host-pat port-pat)))
+
+(define (parse-authority authority fail)
+  (let ((m (regexp-exec authority-regexp authority)))
+    (if (and m (valid-host? (match:substring m 3)))
+        (values (match:substring m 2)
+                (match:substring m 3)
+                (let ((port (match:substring m 5)))
+                  (and port (not (string-null? port))
+                       (string->number port))))
+        (fail))))
+
+
+;;; RFC 3986, #3.
+;;;
+;;;   URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+;;;
+;;;   hier-part   = "//" authority path-abempty
+;;;               / path-absolute
+;;;               / path-rootless
+;;;               / path-empty
+
+(define scheme-pat
+  "[a-zA-Z][a-zA-Z0-9+.-]*")
+(define authority-pat
+  "[^/?#]*")
+(define path-pat
+  "[^?#]*")
+(define query-pat
+  "[^#]*")
+(define fragment-pat
+  ".*")
+(define uri-pat
+  (format #f "^(~a):(//~a)?(~a)(\\?(~a))?(#(~a))?$"
+          scheme-pat authority-pat path-pat query-pat fragment-pat))
+(define uri-regexp
+  (make-regexp uri-pat))
+
+(define (parse-uri string)
+  (% (let ((m (regexp-exec uri-regexp string)))
+       (if (not m) (abort))
+       (let ((scheme (string->symbol
+                      (string-downcase (match:substring m 1))))
+             (authority (match:substring m 2))
+             (path (match:substring m 3))
+             (query (match:substring m 5))
+             (fragment (match:substring m 7)))
+         (call-with-values
+             (lambda ()
+               (if authority
+                   (parse-authority authority abort)
+                   (values #f #f #f)))
+           (lambda (userinfo host port)
+             (make-uri scheme userinfo host port path query fragment)))))
+     (lambda (k)
+       #f)))
+
+(define (unparse-uri uri)
+  (let* ((scheme-str (string-append
+                      (symbol->string (uri-scheme uri)) ":"))
+         (userinfo (uri-userinfo uri))
+         (host (uri-host uri))
+         (port (uri-port uri))
+         (path (uri-path uri))
+         (query (uri-query uri))
+         (fragment (uri-fragment uri)))
+    (string-append
+     scheme-str
+     (if host
+         (string-append "//"
+                        (if userinfo (string-append userinfo "@")
+                            "")
+                        host
+                        (if port
+                            (string-append ":" (number->string port))
+                            ""))
+         "")
+     path
+     (if query
+         (string-append "?" query)
+         "")
+     (if fragment
+         (string-append "#" fragment)
+         ""))))
+
+
+;; A note on characters and bytes: URIs are defined to be sequences of
+;; characters in a subset of ASCII. Those characters may encode a
+;; sequence of bytes (octets), which in turn may encode sequences of
+;; characters in other character sets.
+;;
+
+;; Return a new string made from uri-decoding @var{str}.  Specifically,
+;; turn @code{+} into space, and hex-encoded @code{%XX} strings into
+;; their eight-bit characters.
+;;
+(define hex-chars
+  (string->char-set "0123456789abcdefABCDEF"))
+
+(define* (uri-decode str #:key (charset 'utf-8))
+  (let ((len (string-length str)))
+    (call-with-values open-bytevector-output-port
+      (lambda (port get-bytevector)
+        (let lp ((i 0))
+          (if (= i len)
+              ((case charset
+                 ((utf-8) utf8->string)
+                 ((#f) (lambda (x) x)) ; raw bytevector
+                 (else (error "unknown charset" charset)))
+               (get-bytevector))
+              (let ((ch (string-ref str i)))
+                (cond
+                 ((eqv? ch #\+)
+                  (put-u8 port (char->integer #\space))
+                  (lp (1+ i)))
+                 ((and (< (+ i 2) len) (eqv? ch #\%)
+                       (let ((a (string-ref str (+ i 1)))
+                             (b (string-ref str (+ i 2))))
+                         (and (char-set-contains? hex-chars a)
+                              (char-set-contains? hex-chars b)
+                              (string->number (string a b) 16))))
+                  => (lambda (u8)
+                       (put-u8 port u8)
+                       (lp (+ i 3))))
+                 ((< (char->integer ch) 128)
+                  (put-u8 port (char->integer ch))
+                  (lp (1+ i)))
+                 (else
+                  (error "invalid character in encoded URI" str ch))))))))))
+  
+(define ascii-alnum-chars
+  (string->char-set
+   "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"))
+
+;; RFC 3986, #2.2.
+(define gen-delims
+  (string->char-set ":/?#[]@"))
+(define sub-delims
+  (string->char-set "!$&'()*+,l="))
+(define reserved-chars
+  (char-set-union gen-delims sub-delims))
+
+;; RFC 3986, #2.3
+(define unreserved-chars
+  (char-set-union ascii-alnum-chars
+                  (string->char-set "-._~")))
+
+;; Return a new string made from uri-encoding @var{str}, unconditionally
+;; transforming any characters not in @var{unescaped-chars}.
+;;
+(define* (uri-encode str #:key (charset 'utf-8)
+                     (unescaped-chars unreserved-chars))
+  (define (put-utf8 binary-port str)
+    (put-bytevector binary-port (string->utf8 str)))
+
+  ((case charset
+     ((utf-8) utf8->string)
+     ((#f) (lambda (x) x)) ; raw bytevector
+     (else (error "unknown charset" charset)))
+   (call-with-values open-bytevector-output-port
+     (lambda (port get-bytevector)
+       (string-for-each
+        (lambda (ch)
+          (if (char-set-contains? unescaped-chars ch)
+              (put-utf8 port (string ch))
+              (let* ((utf8 (string->utf8 (string ch)))
+                     (len (bytevector-length utf8)))
+                ;; Encode each byte.
+                (let lp ((i 0))
+                  (if (< i len)
+                      (begin
+                        (put-utf8 port (string #\%))
+                        (put-utf8 port
+                                  (number->string (bytevector-u8-ref utf8 i) 
16))
+                        (lp (1+ i))))))))
+        str)
+       (get-bytevector)))))
+
+(define (split-and-decode-uri-path path)
+  (filter (lambda (x) (not (string-null? x)))
+          (map uri-decode (string-split path #\/))))
+
+(define (encode-and-join-uri-path parts)
+  (string-join (map uri-encode parts) "/"))
diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am
index 70e49b2..a76553b 100644
--- a/test-suite/Makefile.am
+++ b/test-suite/Makefile.am
@@ -148,7 +148,8 @@ SCM_TESTS = tests/00-initial-env.test               \
            tests/tree-il.test                  \
            tests/version.test                  \
            tests/vlist.test                    \
-           tests/weaks.test
+           tests/weaks.test                    \
+           tests/web-uri.test
 
 EXTRA_DIST = \
        guile-test \
diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test
new file mode 100644
index 0000000..c410a7c
--- /dev/null
+++ b/test-suite/tests/web-uri.test
@@ -0,0 +1,174 @@
+;;;; web-uri.test --- URI library          -*- mode: scheme; coding: utf-8; -*-
+;;;;
+;;;;   Copyright (C) 2010 Free Software Foundation, Inc.
+;;;;
+;;;; This library is free software; you can redistribute it and/or
+;;;; modify it under the terms of the GNU Lesser General Public
+;;;; License as published by the Free Software Foundation; either
+;;;; version 3 of the License, or (at your option) any later version.
+;;;;
+;;;; This library is distributed in the hope that it will be useful,
+;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;;;; Lesser General Public License for more details.
+;;;;
+;;;; You should have received a copy of the GNU Lesser General Public
+;;;; License along with this library; if not, write to the Free Software
+;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
USA
+
+
+(define-module (test-web-uri)
+  #:use-module (web uri)
+  #:use-module (test-suite lib))
+
+
+;; FIXME: need more decode / encode tests
+
+
+(define* (uri=? uri #:key scheme userinfo host port path query fragment)
+  (and (uri? uri)
+       (equal? (uri-scheme uri) scheme)
+       (equal? (uri-userinfo uri) userinfo)
+       (equal? (uri-host uri) host)
+       (equal? (uri-port uri) port)
+       (equal? (uri-path uri) path)
+       (equal? (uri-query uri) query)
+       (equal? (uri-fragment uri) fragment)))
+
+(define ex:expected '(misc-error . "expected"))
+
+(with-test-prefix "build-uri"
+  (pass-if "ftp:"
+    (uri=? (build-uri 'ftp)
+           #:scheme 'ftp
+           #:path ""))
+  
+  (pass-if "ftp:foo"
+    (uri=? (build-uri 'ftp #:path "foo")
+           #:scheme 'ftp
+           #:path "foo"))
+  
+  (pass-if "ftp://foo";
+    (uri=? (build-uri 'ftp #:host "foo")
+           #:scheme 'ftp
+           #:host "foo"
+           #:path ""))
+  
+  (pass-if "ftp://foo/bar";
+    (uri=? (build-uri 'ftp #:host "foo" #:path "/bar")
+           #:scheme 'ftp
+           #:host "foo"
+           #:path "/bar"))
+  
+  (pass-if "ftp://address@hidden:22/baz";
+    (uri=? (build-uri 'ftp #:userinfo "foo" #:host "bar" #:port 22 #:path 
"/baz")
+           #:scheme 'ftp
+           #:userinfo "foo"
+           #:host "bar"
+           #:port 22
+           #:path "/baz"))
+
+  (pass-if-exception "non-symbol scheme"
+                     ex:expected
+                     (build-uri "nonsym"))
+
+  (pass-if-exception "http://bad.host.1";
+                     ex:expected
+                     (build-uri 'http #:host "bad.host.1"))
+
+  (pass-if "http://bad.host.1 (no validation)"
+    (uri=? (build-uri 'http #:host "bad.host.1" #:validate? #f)
+           #:scheme 'http #:host "bad.host.1" #:path ""))
+
+  (pass-if-exception "http://foo:not-a-port";
+                     ex:expected
+                     (build-uri 'http #:host "foo" #:port "not-a-port"))
+
+  (pass-if-exception "http://foo:10 but port as string"
+                     ex:expected
+                     (build-uri 'http #:host "foo" #:port "10"))
+
+  (pass-if-exception "http://:10";
+                     ex:expected
+                     (build-uri 'http #:port 10))
+
+  (pass-if-exception "http://foo@";
+                     ex:expected
+                     (build-uri 'http #:userinfo "foo")))
+
+
+(with-test-prefix "parse-uri"
+  (pass-if "ftp:"
+    (uri=? (parse-uri "ftp:")
+           #:scheme 'ftp
+           #:path ""))
+  
+  (pass-if "ftp:foo"
+    (uri=? (parse-uri "ftp:foo")
+           #:scheme 'ftp
+           #:path "foo"))
+  
+  (pass-if "ftp://foo/bar";
+    (uri=? (parse-uri "ftp://foo/bar";)
+           #:scheme 'ftp
+           #:host "foo"
+           #:path "/bar"))
+  
+  (pass-if "ftp://address@hidden:22/baz";
+    (uri=? (parse-uri "ftp://address@hidden:22/baz";)
+           #:scheme 'ftp
+           #:userinfo "foo"
+           #:host "bar"
+           #:port 22
+           #:path "/baz"))
+
+  (pass-if "http://bad.host.1";
+    (not (parse-uri "http://bad.host.1";)))
+
+  (pass-if "http://foo:";
+    (uri=? (parse-uri "http://foo:";)
+           #:scheme 'http #:host "foo" #:path ""))
+
+  (pass-if "http://foo:/";
+    (uri=? (parse-uri "http://foo:/";)
+           #:scheme 'http #:host "foo" #:path "/"))
+
+  (pass-if "http://foo:not-a-port";
+    (not (parse-uri "http://foo:not-a-port";)))
+  
+  (pass-if "http://:10";
+    (not (parse-uri "http://:10";)))
+
+  (pass-if "http://foo@";
+    (not (parse-uri "http://foo@";))))
+
+(with-test-prefix "unparse-uri"
+  (pass-if "ftp:"
+    (equal? "ftp:"
+            (unparse-uri (parse-uri "ftp:"))))
+  
+  (pass-if "ftp:foo"
+    (equal? "ftp:foo"
+            (unparse-uri (parse-uri "ftp:foo"))))
+  
+  (pass-if "ftp://foo/bar";
+    (equal? "ftp://foo/bar";
+            (unparse-uri (parse-uri "ftp://foo/bar";))))
+  
+  (pass-if "ftp://address@hidden:22/baz";
+    (equal? "ftp://address@hidden:22/baz";
+            (unparse-uri (parse-uri "ftp://address@hidden:22/baz";))))
+  
+  (pass-if "http://foo:";
+    (equal? "http://foo";
+            (unparse-uri (parse-uri "http://foo:";))))
+  
+  (pass-if "http://foo:/";
+    (equal? "http://foo/";
+            (unparse-uri (parse-uri "http://foo:/";)))))
+
+(with-test-prefix "decode"
+  (pass-if (equal? "foo bar" (uri-decode "foo%20bar"))))
+
+(with-test-prefix "encode"
+  (pass-if (equal? "foo%20bar" (uri-encode "foo bar"))))


hooks/post-receive
-- 
GNU Guile



reply via email to

[Prev in Thread] Current Thread [Next in Thread]