bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

strcasestr module


From: Bruno Haible
Subject: strcasestr module
Date: Wed, 17 Aug 2005 17:39:02 +0200
User-agent: KMail/1.5

With the mbuiter module (unbounded iteration, similar to mbiter), it
was now easy to write strcasestr() in a way that works in multibyte locales.

Bruno


========================== modules/strcasestr ================================
Description:
strcasestr() function: case-insensitive search for a substring in a string.

Files:
lib/strcasestr.h
lib/strcasestr.c
m4/strcasestr.m4
m4/mbrtowc.m4

Depends-on:
mbuiter

configure.ac:
gl_FUNC_STRCASESTR

Makefile.am:
lib_SOURCES += strcasestr.h

Include:
"strcasestr.h"

License:
LGPL

Maintainer:
Bruno Haible

========================== lib/strcasestr.h ================================
/* Case-insensitive searching in a string.
   Copyright (C) 2005 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#ifdef __cplusplus
extern "C" {
#endif

/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
   comparison.
   Note: This function may, in multibyte locales, return success even if
   strlen (haystack) < strlen (needle) !  */
extern char *strcasestr (const char *haystack, const char *needle);

#ifdef __cplusplus
}
#endif
========================== lib/strcasestr.c ================================
/* Case-insensitive searching in a string.
   Copyright (C) 2005 Free Software Foundation, Inc.
   Written by Bruno Haible <address@hidden>, 2005.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

/* Specification.  */
#include "strcasestr.h"

#include <ctype.h>

#if HAVE_MBRTOWC
# include "mbuiter.h"
#endif

#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))

/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
   comparison.
   Note: This function may, in multibyte locales, return success even if
   strlen (haystack) < strlen (needle) !  */
char *
strcasestr (const char *haystack, const char *needle)
{
  /* Be careful not to look at the entire extent of haystack or needle
     until needed.  This is useful because of these two cases:
       - haystack may be very long, and a match of needle found early,
       - needle may be very long, and not even a short initial segment of
         needle may be found in haystack.  */
#if HAVE_MBRTOWC
  if (MB_CUR_MAX > 1)
    {
      mbui_iterator_t iter_needle;

      mbui_init (iter_needle, needle);
      if (mbui_avail (iter_needle))
        {
          mbchar_t b;
          mbui_iterator_t iter_haystack;

          mb_copy (&b, &mbui_cur (iter_needle));
          if (b.wc_valid)
            b.wc = towlower (b.wc);

          mbui_init (iter_haystack, haystack);
          for (;; mbui_advance (iter_haystack))
            {
              mbchar_t c;

              if (!mbui_avail (iter_haystack))
                /* No match.  */
                return NULL;

              mb_copy (&c, &mbui_cur (iter_haystack));
              if (c.wc_valid)
                c.wc = towlower (c.wc);
              if (mb_equal (c, b))
                /* The first character matches.  */
                {
                  mbui_iterator_t rhaystack;
                  mbui_iterator_t rneedle;

                  memcpy (&rhaystack, &iter_haystack, sizeof (mbui_iterator_t));
                  mbui_advance (rhaystack);

                  mbui_init (rneedle, needle);
                  if (!mbui_avail (rneedle))
                    abort ();
                  mbui_advance (rneedle);

                  for (;; mbui_advance (rhaystack), mbui_advance (rneedle))
                    {
                      if (!mbui_avail (rneedle))
                        /* Found a match.  */
                        return (char *) haystack;
                      if (!mbui_avail (rhaystack))
                        /* No match.  */
                        return NULL;
                      if (!mb_caseequal (mbui_cur (rhaystack),
                                         mbui_cur (rneedle)))
                        /* Nothing in this round.  */
                        break;
                    }
                }
            }
        }
      else
        return (char *) haystack;
    }
  else
#endif
    {
      if (*needle != '\0')
        {
          /* Speed up the following searches of needle by caching its first
             character.  */
          unsigned char b = TOLOWER ((unsigned char) *needle);

          needle++;
          for (;; haystack++)
            {
              if (*haystack == '\0')
                /* No match.  */
                return NULL;
              if (TOLOWER ((unsigned char) *haystack) == b)
                /* The first character matches.  */
                {
                  const char *rhaystack = haystack + 1;
                  const char *rneedle = needle;

                  for (;; rhaystack++, rneedle++)
                    {
                      if (*rneedle == '\0')
                        /* Found a match.  */
                        return (char *) haystack;
                      if (*rhaystack == '\0')
                        /* No match.  */
                        return NULL;
                      if (TOLOWER ((unsigned char) *rhaystack)
                          != TOLOWER ((unsigned char) *rneedle))
                        /* Nothing in this round.  */
                        break;
                    }
                }
            }
        }
      else
        return (char *) haystack;
    }
}
========================== m4/strcasestr.m4 ================================
# strcasestr.m4 serial 1
dnl Copyright (C) 2005 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.

AC_DEFUN([gl_FUNC_STRCASESTR],
[
  dnl No known system has a strcasestr() function that works correctly in
  dnl multibyte locales. Therefore we use our version always.
  AC_LIBOBJ(strcasestr)
  AC_DEFINE(strcasestr, rpl_strcasestr, [Define to rpl_strcasestr always.])
  gl_PREREQ_STRCASESTR
])

# Prerequisites of lib/strcasestr.c.
AC_DEFUN([gl_PREREQ_STRCASESTR], [
  gl_FUNC_MBRTOWC
])
============================================================================





reply via email to

[Prev in Thread] Current Thread [Next in Thread]