guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-13-98-g64


From: Andy Wingo
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-13-98-g644c516
Date: Tue, 30 Nov 2010 17:57:07 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=644c5165ee449a3beccadeb969e02746954703ee

The branch, master has been updated
       via  644c5165ee449a3beccadeb969e02746954703ee (commit)
       via  612aa5bee87bf85b908ed26e73d496af6f0d8520 (commit)
      from  a4342ba82655deecc8c001d7f457ebe8db01354e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 644c5165ee449a3beccadeb969e02746954703ee
Author: Andy Wingo <address@hidden>
Date:   Tue Nov 23 22:39:06 2010 +0100

    fix regexp matches to refer to chars, not bytes
    
    * libguile/regex-posix.c (fixup_multibyte_match): Fixup the match
      structure to refer to character offsets, not byte offsets. Fixes bug
      31650.
    
    * test-suite/tests/regexp.test: Add a test.

commit 612aa5bee87bf85b908ed26e73d496af6f0d8520
Author: Andy Wingo <address@hidden>
Date:   Tue Nov 23 16:02:21 2010 +0100

    fix web server bugs
    
    * module/web/http.scm (valid-quality?):
    * module/web/server.scm (sanitize-response): Fix a couple bugs.

-----------------------------------------------------------------------

Summary of changes:
 libguile/regex-posix.c       |   52 ++++++++++++++++++++++++++++++++++++++++-
 module/web/http.scm          |    2 +-
 module/web/server.scm        |    6 ++--
 test-suite/tests/regexp.test |    9 ++++++-
 4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/libguile/regex-posix.c b/libguile/regex-posix.c
index 6259f28..4c03577 100644
--- a/libguile/regex-posix.c
+++ b/libguile/regex-posix.c
@@ -1,4 +1,4 @@
-/*     Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2006, 2007 Free 
Software Foundation, Inc.
+/*     Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2006, 2007, 2010 Free 
Software Foundation, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public License
@@ -53,6 +53,10 @@
 #endif
 #endif
 
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 #include "libguile/async.h"
 #include "libguile/smob.h"
 #include "libguile/symbols.h"
@@ -196,6 +200,43 @@ SCM_DEFINE (scm_make_regexp, "make-regexp", 1, 0, 1,
 }
 #undef FUNC_NAME
 
+#ifdef HAVE_WCHAR_H
+/*
+ * While regexec does respect the current locale, it returns byte
+ * offsets instead of character offsets. This routine fixes up the
+ * regmatch_t structures to refer to characters instead. See "Converting
+ * a Character" in the libc manual, for more details.
+ */
+static void
+fixup_multibyte_match (regmatch_t *matches, int nmatches, char *str)
+{
+  mbstate_t state;
+  int i;
+  size_t char_idx, byte_idx;
+  size_t nbytes = 1; /* just to kick off the for loop */
+
+  memset (&state, '\0', sizeof (state));
+
+  for (char_idx = byte_idx = 0; nbytes > 0; char_idx++, byte_idx += nbytes)
+    {
+      for (i = 0; i < nmatches; ++i)
+        {
+          if (matches[i].rm_so == byte_idx)
+            matches[i].rm_so = char_idx;
+          if (matches[i].rm_eo == byte_idx)
+            matches[i].rm_eo = char_idx;
+        }
+
+      nbytes = mbrlen (str + byte_idx, MB_LEN_MAX, &state);
+    }
+
+  if (nbytes >= (size_t) -2)
+    /* Something is wrong. Shouldn't be possible, as the regex match
+       succeeded.  */
+    abort ();
+}
+#endif
+
 SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0,
             (SCM rx, SCM str, SCM start, SCM flags),
            "Match the compiled regular expression @var{rx} against\n"
@@ -256,11 +297,18 @@ SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0,
   /* re_nsub doesn't account for the `subexpression' representing the
      whole regexp, so add 1 to nmatches. */
 
+  c_str = scm_to_locale_string (substr);
+
   nmatches = SCM_RGX(rx)->re_nsub + 1;
   matches = scm_malloc (sizeof (regmatch_t) * nmatches);
-  c_str = scm_to_locale_string (substr);
   status = regexec (SCM_RGX (rx), c_str, nmatches, matches,
                    scm_to_int (flags));
+
+#ifdef HAVE_WCHAR_H
+  if (!status)
+    fixup_multibyte_match (matches, nmatches, c_str);
+#endif
+
   free (c_str);
 
   if (!status)
diff --git a/module/web/http.scm b/module/web/http.scm
index 5063aa9..2ebdb3b 100644
--- a/module/web/http.scm
+++ b/module/web/http.scm
@@ -320,7 +320,7 @@
     (bad-header-component 'quality str))))
 
 (define (valid-quality? q)
-  (and (non-negative-integer? q) (<= 1000 q)))
+  (and (non-negative-integer? q) (<= q 1000)))
 
 (define (write-quality q port)
   (define (digit->char d)
diff --git a/module/web/server.scm b/module/web/server.scm
index 0d5bd40..f8ebf18 100644
--- a/module/web/server.scm
+++ b/module/web/server.scm
@@ -190,9 +190,9 @@
     (values (let ((rlen (response-content-length response))
                   (blen (bytevector-length body)))
               (cond
-               ((rlen) (if (= rlen blen)
-                           response
-                           (error "bad content-length" rlen blen)))
+               (rlen (if (= rlen blen)
+                         response
+                         (error "bad content-length" rlen blen)))
                ((zero? blen) response)
                (else (extend-response response 'content-length blen))))
             body))
diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test
index efa0e7e..1b58789 100644
--- a/test-suite/tests/regexp.test
+++ b/test-suite/tests/regexp.test
@@ -1,4 +1,4 @@
-;;;; regexp.test --- test Guile's regular expression functions -*- scheme -*-
+;;;; regexp.test ---  test Guile's regexps   -*- coding: utf-8; mode: scheme 
-*-
 ;;;; Jim Blandy <address@hidden> --- September 1999
 ;;;;
 ;;;;   Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010 Free Software 
Foundation, Inc.
@@ -265,3 +265,10 @@
   ;; Jan Nieuwenhuizen's bug, 2 Sep 1999
   (try "" "_" (make-string 500 #\_)
        'post))
+
+(with-test-prefix "nonascii locales"
+  (with-locale "en_US.utf8"
+    ;; bug 31650
+    (pass-if "match structures refer to char offsets"
+      (equal? (match:substring (string-match ".*" "calçot") 0)
+              "calçot"))))


hooks/post-receive
-- 
GNU Guile



reply via email to

[Prev in Thread] Current Thread [Next in Thread]