[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 2/2] fstrcmp: don't assume strlen < INT_MAX
From: |
Paul Eggert |
Subject: |
[PATCH 2/2] fstrcmp: don't assume strlen < INT_MAX |
Date: |
Sat, 7 Feb 2015 18:10:14 -0800 |
* lib/fstrcmp.c: Include stddef.h and stdint.h.
(uintptr_t): Remove, as we're now assuming stdint.
(OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded):
Prefer ptrdiff_t to int when the value could exceed INT_MAX
if the input string is long.
(fstrcmp_bounded): Check for size-calculation overflow. Prefer
uintptr_t to size_t when the underlying value is a pointer casted
to an unsigned integer. Avoid unnecessary 'buffer != NULL' test.
* modules/fstrcmp (Depends-on): Add stdint.
---
ChangeLog | 11 ++++++++++
lib/fstrcmp.c | 65 ++++++++++++++++++++++++++++++---------------------------
modules/fstrcmp | 1 +
3 files changed, 46 insertions(+), 31 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 3207028..cd73197 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
2015-02-07 Paul Eggert <address@hidden>
+ fstrcmp: don't assume strlen < INT_MAX
+ * lib/fstrcmp.c: Include stddef.h and stdint.h.
+ (uintptr_t): Remove, as we're now assuming stdint.
+ (OFFSET, EXTRA_CONTEXT_FIELDS, fstrcmp_bounded):
+ Prefer ptrdiff_t to int when the value could exceed INT_MAX
+ if the input string is long.
+ (fstrcmp_bounded): Check for size-calculation overflow. Prefer
+ uintptr_t to size_t when the underlying value is a pointer casted
+ to an unsigned integer. Avoid unnecessary 'buffer != NULL' test.
+ * modules/fstrcmp (Depends-on): Add stdint.
+
diffseq: prefer ptrdiff_t to ssize_t
* lib/diffseq.h: In commentary, prefer ptrdiff_t to ssize_t.
ptrdiff_t is the natural type for signed indexes.
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c
index 70db50a..d40e6ef 100644
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -23,7 +23,9 @@
#include <string.h>
#include <stdbool.h>
+#include <stddef.h>
#include <stdio.h>
+#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
@@ -32,20 +34,16 @@
#include "minmax.h"
#include "xalloc.h"
-#ifndef uintptr_t
-# define uintptr_t unsigned long
-#endif
-
#define ELEMENT char
#define EQUAL(x,y) ((x) == (y))
-#define OFFSET int
+#define OFFSET ptrdiff_t
#define EXTRA_CONTEXT_FIELDS \
/* The number of edits beyond which the computation can be aborted. */ \
- int edit_count_limit; \
+ ptrdiff_t edit_count_limit; \
/* The number of edits (= number of elements inserted, plus the number of \
elements deleted), temporarily minus edit_count_limit. */ \
- int edit_count;
+ ptrdiff_t edit_count;
#define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++
#define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++
#define EARLY_ABORT(ctxt) ctxt->edit_count > 0
@@ -61,8 +59,8 @@
already allocated memory, store the allocated memory per thread. Free
it only when the thread exits. */
-static gl_tls_key_t buffer_key; /* TLS key for a 'int *' */
-static gl_tls_key_t bufmax_key; /* TLS key for a 'size_t' */
+static gl_tls_key_t buffer_key; /* TLS key for a 'ptrdiff_t *' */
+static gl_tls_key_t bufmax_key; /* TLS key for a 'uintptr_t' */
static void
keys_init (void)
@@ -87,17 +85,22 @@ double
fstrcmp_bounded (const char *string1, const char *string2, double lower_bound)
{
struct context ctxt;
- int xvec_length = strlen (string1);
- int yvec_length = strlen (string2);
- int i;
+ size_t xvec_length = strlen (string1);
+ size_t yvec_length = strlen (string2);
+ size_t length_sum = xvec_length + yvec_length;
+ ptrdiff_t i;
- size_t fdiag_len;
- int *buffer;
- size_t bufmax;
+ ptrdiff_t fdiag_len;
+ ptrdiff_t *buffer;
+ uintptr_t bufmax;
/* short-circuit obvious comparisons */
if (xvec_length == 0 || yvec_length == 0) /* Prob: 1% */
- return (xvec_length == 0 && yvec_length == 0 ? 1.0 : 0.0);
+ return length_sum == 0;
+
+ if (! (xvec_length <= length_sum
+ && length_sum <= MIN (UINTPTR_MAX, PTRDIFF_MAX) - 3))
+ xalloc_die ();
if (lower_bound > 0)
{
@@ -117,9 +120,8 @@ fstrcmp_bounded (const char *string1, const char *string2,
double lower_bound)
/ (xvec_length + yvec_length)
= 2 * min (xvec_length, yvec_length) / (xvec_length +
yvec_length).
*/
- volatile double upper_bound =
- (double) (2 * MIN (xvec_length, yvec_length))
- / (xvec_length + yvec_length);
+ ptrdiff_t length_min = MIN (xvec_length, yvec_length);
+ volatile double upper_bound = 2.0 * length_min / length_sum;
if (upper_bound < lower_bound) /* Prob: 74% */
/* Return an arbitrary value < LOWER_BOUND. */
@@ -128,7 +130,7 @@ fstrcmp_bounded (const char *string1, const char *string2,
double lower_bound)
#if CHAR_BIT <= 8
/* When X and Y are both small, avoid the overhead of setting up an
array of size 256. */
- if (xvec_length + yvec_length >= 20) /* Prob: 99% */
+ if (length_sum >= 20) /* Prob: 99% */
{
/* Compute a less quick upper bound.
Each edit is an insertion or deletion of a character, hence
@@ -148,8 +150,9 @@ fstrcmp_bounded (const char *string1, const char *string2,
double lower_bound)
<= (xvec_length + yvec_length - sum_C | OCC(X,C) - OCC(Y,C) |)
/ (xvec_length + yvec_length).
*/
- int occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C) */
- int sum;
+ ptrdiff_t occ_diff[UCHAR_MAX + 1]; /* array C -> OCC(X,C) - OCC(Y,C)
*/
+ ptrdiff_t sum;
+ double dsum;
/* Determine the occurrence counts in X. */
memset (occ_diff, 0, sizeof (occ_diff));
@@ -162,11 +165,12 @@ fstrcmp_bounded (const char *string1, const char
*string2, double lower_bound)
sum = 0;
for (i = 0; i <= UCHAR_MAX; i++)
{
- int d = occ_diff[i];
+ ptrdiff_t d = occ_diff[i];
sum += (d >= 0 ? d : -d);
}
- upper_bound = 1.0 - (double) sum / (xvec_length + yvec_length);
+ dsum = sum;
+ upper_bound = 1.0 - dsum / length_sum;
if (upper_bound < lower_bound) /* Prob: 66% */
/* Return an arbitrary value < LOWER_BOUND. */
@@ -180,10 +184,10 @@ fstrcmp_bounded (const char *string1, const char
*string2, double lower_bound)
ctxt.yvec = string2;
/* Allocate memory for fdiag and bdiag from a thread-local pool. */
- fdiag_len = xvec_length + yvec_length + 3;
+ fdiag_len = length_sum + 3;
gl_once (keys_init_once, keys_init);
- buffer = (int *) gl_tls_get (buffer_key);
- bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
+ buffer = gl_tls_get (buffer_key);
+ bufmax = (uintptr_t) gl_tls_get (bufmax_key);
if (fdiag_len > bufmax)
{
/* Need more memory. */
@@ -192,9 +196,8 @@ fstrcmp_bounded (const char *string1, const char *string2,
double lower_bound)
bufmax = fdiag_len;
/* Calling xrealloc would be a waste: buffer's contents does not need
to be preserved. */
- if (buffer != NULL)
- free (buffer);
- buffer = (int *) xnmalloc (bufmax, 2 * sizeof (int));
+ free (buffer);
+ buffer = xnmalloc (bufmax, 2 * sizeof *buffer);
gl_tls_set (buffer_key, buffer);
gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
}
@@ -213,7 +216,7 @@ fstrcmp_bounded (const char *string1, const char *string2,
double lower_bound)
rounding errors. */
ctxt.edit_count_limit =
(lower_bound < 1.0
- ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001))
+ ? (ptrdiff_t) (length_sum * (1.0 - lower_bound + 0.000001))
: 0);
/* Now do the main comparison algorithm */
diff --git a/modules/fstrcmp b/modules/fstrcmp
index 1ea0f37..77de7a0 100644
--- a/modules/fstrcmp
+++ b/modules/fstrcmp
@@ -10,6 +10,7 @@ diffseq
lock
tls
minmax
+stdint
xalloc
configure.ac:
--
2.1.0