bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: regexp regressions


From: Paul Eggert
Subject: Re: regexp regressions
Date: Sat, 20 Aug 2005 23:01:23 -0700
User-agent: Gnus/5.1007 (Gnus v5.10.7) Emacs/21.4 (gnu/linux)

Sam Steingold <address@hidden> writes:

> the latest and greatest gnulib regexp has the following regressions vs
> the previous (monolithic) version:

Sorry, I didn't understand the notation that you used in
<http://lists.gnu.org/archive/html/bug-gnulib/2005-08/msg00008.html>.

I tried to reproduce the problems by writing a C program (enclosed
below) and it seems to me that the gnulib regexp is correct in all
these test cases.  Perhaps the old regexp was broken.

Here is the output of the C program:

pattern='(^)*' string='-' result='' result[1]='' OK
pattern='([xyz])(-\2)' string='x-y' error Invalid back reference OK
pattern='((.*)\1)+' string='xxxxxx' error Invalid back reference OK
pattern='(.*)*\1' string='xx' result='xx' result[1]='x' OK
pattern='(a*)*' string='aaaa' result='aaaa' result[1]='aaaa' OK
pattern='(a*)+' string='aaaa' result='aaaa' result[1]='aaaa' OK

and here is the C program itself:

#include <sys/types.h>
#include <regex.h>
#include <stdio.h>

struct test
{
  char const *pattern;
  char const *string;
  regoff_t offset;
  regoff_t length;
};

static int
attempt (struct test t)
{
  regex_t r;
  int e;
  regmatch_t match[100];
  int ok;

  printf ("pattern='%s' string='%s' ", t.pattern, t.string);
  e = regcomp (&r, t.pattern, REG_EXTENDED);
  if (e)
    {
      char buf[1000];
      regerror (e, &r, buf, sizeof buf);
      printf ("error %s ", buf);
      ok = (t.offset == (regoff_t) -1);
    }
  else if (regexec (&r, t.string, r.re_nsub + 1, match, 0) == 0)
    {
      int i;
      printf ("result='%.*s' ",
              (int) (match[0].rm_eo - match[0].rm_so),
              t.string + match[0].rm_so);
      ok = (match[0].rm_so == t.offset
            && match[0].rm_eo - match[0].rm_so == t.length);
      for (i = 1; i <= r.re_nsub; i++)
        {
          printf ("result[%d]='%.*s' ",
                  i,
                  (int) (match[i].rm_eo - match[i].rm_so),
                  t.string + match[i].rm_so);
        }
    }
  else
    {
      printf ("did not match ");
      ok = (match[0].rm_so == (regoff_t) -1);
    }

  if (ok)
    printf ("OK\n");
  else
    printf ("NO GOOD\n");

  return !ok;
}

static struct test const test[] =
  {
    { "(^)*", "-", 0, 0 },
    { "([xyz])(-\\2)", "x-y", -1, -1 },
    { "((.*)\\1)+", "xxxxxx", -1, -1 },
    { "(.*)*\\1", "xx", 0, 2 },
    { "(a*)*", "aaaa", 0, 4 },
    { "(a*)+", "aaaa", 0, 4 }
  };

int
main (void)
{
  int status = 0;
  size_t i;

  for (i = 0; i < sizeof test / sizeof test[0]; i++)
    status |= attempt (test[i]);

  return status;
}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]