bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/2] fstrcmp: don't assume strlen < INT_MAX


From: Paul Eggert
Subject: [PATCH 2/2] fstrcmp: don't assume strlen < INT_MAX
Date: Sat, 7 Feb 2015 18:10:14 -0800

* lib/fstrcmp.c: Include stddef.h and stdint.h.
(uintptr_t): Remove, as we're now assuming stdint.
(OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded):
Prefer ptrdiff_t to int when the value could exceed INT_MAX
if the input string is long.
(fstrcmp_bounded): Check for size-calculation overflow.  Prefer
uintptr_t to size_t when the underlying value is a pointer casted
to an unsigned integer.  Avoid unnecessary 'buffer != NULL' test.
* modules/fstrcmp (Depends-on): Add stdint.
---
 ChangeLog       | 11 ++++++++++
 lib/fstrcmp.c   | 65 ++++++++++++++++++++++++++++++---------------------------
 modules/fstrcmp |  1 +
 3 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3207028..cd73197 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2015-02-07  Paul Eggert  <address@hidden>
 
+       fstrcmp: don't assume strlen < INT_MAX
+       * lib/fstrcmp.c: Include stddef.h and stdint.h.
+       (uintptr_t): Remove, as we're now assuming stdint.
+       (OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded):
+       Prefer ptrdiff_t to int when the value could exceed INT_MAX
+       if the input string is long.
+       (fstrcmp_bounded): Check for size-calculation overflow.  Prefer
+       uintptr_t to size_t when the underlying value is a pointer casted
+       to an unsigned integer.  Avoid unnecessary 'buffer != NULL' test.
+       * modules/fstrcmp (Depends-on): Add stdint.
+
        diffseq: prefer ptrdiff_t to ssize_t
        * lib/diffseq.h: In commentary, prefer ptrdiff_t to ssize_t.
        ptrdiff_t is the natural type for signed indexes.
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c
index 70db50a..d40e6ef 100644
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -23,7 +23,9 @@
 
 #include <string.h>
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <limits.h>
 
@@ -32,20 +34,16 @@
 #include "minmax.h"
 #include "xalloc.h"
 
-#ifndef uintptr_t
-# define uintptr_t unsigned long
-#endif
-
 
 #define ELEMENT char
 #define EQUAL(x,y) ((x) == (y))
-#define OFFSET int
+#define OFFSET ptrdiff_t
 #define EXTRA_CONTEXT_FIELDS \
   /* The number of edits beyond which the computation can be aborted. */ \
-  int edit_count_limit; \
+  ptrdiff_t edit_count_limit; \
   /* The number of edits (= number of elements inserted, plus the number of \
      elements deleted), temporarily minus edit_count_limit. */ \
-  int edit_count;
+  ptrdiff_t edit_count;
 #define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
 #define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
 #define EARLY_ABORT(ctxt) ctxt->edit_count > 0
@@ -61,8 +59,8 @@
    already allocated memory, store the allocated memory per thread.  Free
    it only when the thread exits.  */
 
-static gl_tls_key_t buffer_key; /* TLS key for a 'int *' */
-static gl_tls_key_t bufmax_key; /* TLS key for a 'size_t' */
+static gl_tls_key_t buffer_key; /* TLS key for a 'ptrdiff_t *' */
+static gl_tls_key_t bufmax_key; /* TLS key for a 'uintptr_t' */
 
 static void
 keys_init (void)
@@ -87,17 +85,22 @@ double
 fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
 {
   struct context ctxt;
-  int xvec_length = strlen (string1);
-  int yvec_length = strlen (string2);
-  int i;
+  size_t xvec_length = strlen (string1);
+  size_t yvec_length = strlen (string2);
+  size_t length_sum = xvec_length + yvec_length;
+  ptrdiff_t i;
 
-  size_t fdiag_len;
-  int *buffer;
-  size_t bufmax;
+  ptrdiff_t fdiag_len;
+  ptrdiff_t *buffer;
+  uintptr_t bufmax;
 
   /* short-circuit obvious comparisons */
   if (xvec_length == 0 || yvec_length == 0) /* Prob: 1% */
-    return (xvec_length == 0 && yvec_length == 0 ? 1.0 : 0.0);
+    return length_sum == 0;
+
+  if (! (xvec_length <= length_sum
+         && length_sum <= MIN (UINTPTR_MAX, PTRDIFF_MAX) - 3))
+    xalloc_die ();
 
   if (lower_bound > 0)
     {
@@ -117,9 +120,8 @@ fstrcmp_bounded (const char *string1, const char *string2, 
double lower_bound)
                 / (xvec_length + yvec_length)
              = 2 * min (xvec_length, yvec_length) / (xvec_length + 
yvec_length).
        */
-      volatile double upper_bound =
-        (double) (2 * MIN (xvec_length, yvec_length))
-        / (xvec_length + yvec_length);
+      ptrdiff_t length_min = MIN (xvec_length, yvec_length);
+      volatile double upper_bound = 2.0 * length_min / length_sum;
 
       if (upper_bound < lower_bound) /* Prob: 74% */
         /* Return an arbitrary value < LOWER_BOUND.  */
@@ -128,7 +130,7 @@ fstrcmp_bounded (const char *string1, const char *string2, 
double lower_bound)
 #if CHAR_BIT <= 8
       /* When X and Y are both small, avoid the overhead of setting up an
          array of size 256.  */
-      if (xvec_length + yvec_length >= 20) /* Prob: 99% */
+      if (length_sum >= 20) /* Prob: 99% */
         {
           /* Compute a less quick upper bound.
              Each edit is an insertion or deletion of a character, hence
@@ -148,8 +150,9 @@ fstrcmp_bounded (const char *string1, const char *string2, 
double lower_bound)
                  <= (xvec_length + yvec_length - sum_C | OCC(X,C) - OCC(Y,C) |)
                     / (xvec_length + yvec_length).
            */
-          int occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C) */
-          int sum;
+          ptrdiff_t occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C) 
*/
+          ptrdiff_t sum;
+          double dsum;
 
           /* Determine the occurrence counts in X.  */
           memset (occ_diff, 0, sizeof (occ_diff));
@@ -162,11 +165,12 @@ fstrcmp_bounded (const char *string1, const char 
*string2, double lower_bound)
           sum = 0;
           for (i = 0; i <= UCHAR_MAX; i++)
             {
-              int d = occ_diff[i];
+              ptrdiff_t d = occ_diff[i];
               sum += (d >= 0 ? d : -d);
             }
 
-          upper_bound = 1.0 - (double) sum / (xvec_length + yvec_length);
+          dsum = sum;
+          upper_bound = 1.0 - dsum / length_sum;
 
           if (upper_bound < lower_bound) /* Prob: 66% */
             /* Return an arbitrary value < LOWER_BOUND.  */
@@ -180,10 +184,10 @@ fstrcmp_bounded (const char *string1, const char 
*string2, double lower_bound)
   ctxt.yvec = string2;
 
   /* Allocate memory for fdiag and bdiag from a thread-local pool.  */
-  fdiag_len = xvec_length + yvec_length + 3;
+  fdiag_len = length_sum + 3;
   gl_once (keys_init_once, keys_init);
-  buffer = (int *) gl_tls_get (buffer_key);
-  bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
+  buffer = gl_tls_get (buffer_key);
+  bufmax = (uintptr_t) gl_tls_get (bufmax_key);
   if (fdiag_len > bufmax)
     {
       /* Need more memory.  */
@@ -192,9 +196,8 @@ fstrcmp_bounded (const char *string1, const char *string2, 
double lower_bound)
         bufmax = fdiag_len;
       /* Calling xrealloc would be a waste: buffer's contents does not need
          to be preserved.  */
-      if (buffer != NULL)
-        free (buffer);
-      buffer = (int *) xnmalloc (bufmax, 2 * sizeof (int));
+      free (buffer);
+      buffer = xnmalloc (bufmax, 2 * sizeof *buffer);
       gl_tls_set (buffer_key, buffer);
       gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
     }
@@ -213,7 +216,7 @@ fstrcmp_bounded (const char *string1, const char *string2, 
double lower_bound)
      rounding errors.  */
   ctxt.edit_count_limit =
     (lower_bound < 1.0
-     ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001))
+     ? (ptrdiff_t) (length_sum * (1.0 - lower_bound + 0.000001))
      : 0);
 
   /* Now do the main comparison algorithm */
diff --git a/modules/fstrcmp b/modules/fstrcmp
index 1ea0f37..77de7a0 100644
--- a/modules/fstrcmp
+++ b/modules/fstrcmp
@@ -10,6 +10,7 @@ diffseq
 lock
 tls
 minmax
+stdint
 xalloc
 
 configure.ac:
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]