bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Patch] faster fnmatch


From: Ondrej Bilka
Subject: Re: [Patch] faster fnmatch
Date: Sat, 2 May 2009 18:46:46 +0200
User-agent: Mutt/1.5.18 (2008-05-17)

Hello Ralf,
On Fri, May 01, 2009 at 09:50:23PM +0200, Ralf Wildenhues wrote:

> That NULTEST thing is cute but makes me cringe.  If the second malloc
> runs out of memory, then you return, but leak memory from the first
> malloc.  So in a tight memory situation, your code starts contributing
> to the problem.
Fixed. renamed macro to NOMEMTEST which jumps to err where I free all.

Here is what I improved(benchmark attached)
*abc*
en_US.UTF-8
 user: 2.58
 user: 0.26
C
 user: 0.89
 user: 0.25
just made functions static and __builtin_expect
*[abc]bc*
en_US.UTF-8
 user: 4.04
 user: 1.71
C
 user: 2.23
 user: 0.85
hashing looks at first character instead last. We can skip mblen if they 
differ. I hope filenames with all leading chars same are rare.
*ab[a-z]*
en_US.UTF-8
 user: 2.87
 user: 1.77
C
 user: 0.85
 user: 0.82
using speculative match. Slow because every third string is matched.
casefold
*abc*
en_US.UTF-8
 user: 4.26
 user: 3.24
I convert multibyte chars to lowercase(as mb_casecmp does that too)
Its faster because fnmatch first by mbstowcs computes size then second mbstowcs 
converts. 
I allocate large buffer and put 0 at end. I check if mbstowcs rewrote it and if 
so(unlike) I allocate correct size.
C
 user: 1.00
 user: 0.71
*ab[a-z]*
en_US.UTF-8
 user: 4.62
 user: 5.07
C
 user: 1.06
 user: 1.27
because I match every third string. 

diff --git a/MODULES.html.sh b/MODULES.html.sh
index 06afa2d..03d308e 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -61,6 +61,7 @@ fenv
 float
 fmtmsg
 fnmatch
+fnmatchcomp
 ftw
 glob
 grp
@@ -418,6 +419,7 @@ fmodf
 fmodl
 fmtmsg
 fnmatch
+fnmatchcomp
 fopen
 fork
 fpathconf
diff --git a/lib/fnmatchcomp.c b/lib/fnmatchcomp.c
new file mode 100644
index 0000000..bf24472
--- /dev/null
+++ b/lib/fnmatchcomp.c
@@ -0,0 +1,618 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://     www.gnu.org/licenses/ 
> . */
+
+#include <stdint.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <nl_types.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <ctype.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <errno.h>
+#define  _GNU_SOURCE
+#include <fnmatch.h>
+#include "fnmatchcomp.h"
+
+#define  UINT4 uint32_t
+#define  UINT2 uint16_t
+#define  CHARSPERINT (sizeof (UINT4)/sizeof(char))
+#define  STREQ(a, b) (strcmp(a, b) == 0)
+#define  MAX_PATTERN_SIZE (sizeof (bracket_pattern))
+#define  NOMEMTEST(exp) if (__builtin_expect(!(exp), 0))  {errno = ENOMEM; 
goto err; }
+#define  Calloc(x) calloc(1, x)
+#define  ALLOCALIMIT(n, lim) (__builtin_expect((n < lim), 1) ? alloca(n): 
malloc(n))
+#define  ALLOCAFREE(ptr, n, lim) if (n > lim) free(ptr);
+typedef struct
+{
+  char tp;
+  char chr;
+} chr_pattern;
+typedef struct
+{                               /* We seek chars following *. This provides 
major speedup.
+                                   For some encodings we could be wrong 
because leading char was part of multibyte character. Then after successfull 
match call fnmatch to eliminate false positives  */
+  char tp;
+  char chars[CHARSPERINT];
+  short sizerest;
+  short charno;
+} star_pattern;
+typedef struct
+{
+  char tp;
+} end_pattern;
+typedef struct
+{                               /* * at end of pattern. automatic success  */
+  char tp;
+} endstar_pattern;
+#define  BRACKET_INVMATCH 1
+#define  BRACKET_SLASH 2
+#define  BRACKET_MUSTCHECK 4
+typedef struct
+{
+  char tp;
+  char flags;
+  char *chars;                  /* multibyte characters separated by 0 */
+  int sizerest;
+  uint32_t hash[(UCHAR_MAX / 32) + 1];
+  /* We look at last char of multibyte character. We look at bit with this 
index in hash. On success we check if its one of chars. */
+} bracket_pattern;
+typedef struct
+{                               /* if FNM_PERIOD was set this checks period 
after / */
+  char tp;
+} begindot_pattern;
+typedef struct
+{                               /* if FNM_LEADING_DIR was set we go after / or 
0 to endstar pattern. */
+  char tp;
+} slashend_pattern;
+
+typedef struct
+{                               /* we must call fnmatch */
+  char tp;
+  char *ptr;
+  char *prefix;
+  int flags;
+} fallback_pattern;
+typedef struct
+{                               /* we fold string and then call case sensitive 
version */
+  char tp;
+  void *states;
+} fold_pattern;
+typedef struct
+{                               /* we call our version and if succeed we check 
by fnmatch fallback */
+  char tp;
+  void *states;
+  fallback_pattern *fallback;
+} checkneeded_pattern;
+
+  /* like star but we dont go beyond / */
+typedef star_pattern starslash_pattern;
+enum PATTERNS
+{
+  PATTERN_chr,
+  PATTERN_star,
+  PATTERN_starslash,
+  PATTERN_end,
+  PATTERN_endstar,
+  PATTERN_bracket,
+  PATTERN_begindot,
+  PATTERN_slashend,
+  PATTERN_fallback,
+  PATTERN_fold,
+  PATTERN_checkneeded
+};
+union _patterns
+{
+  chr_pattern chr;
+  star_pattern star;
+  starslash_pattern starslash;
+  end_pattern end;
+  endstar_pattern endstar;
+  bracket_pattern bracket;
+  begindot_pattern begindot;
+  fallback_pattern fallback;
+  fold_pattern fold;
+  checkneeded_pattern checkneeded;
+};
+typedef union _patterns patterns;
+static void freepatterns (patterns * p);
+
+struct states
+{
+  patterns *p;
+  struct states *next;
+};
+struct _fnmatch_state
+{
+  struct states *states;        /* list of states of undrelying NFA. */
+  int flags;
+  int *refcount;
+  patterns *start;
+};
+
+int
+fnmatch_fallbacked (fnmatch_state * s)
+{
+  return (s->states->p->fallback.tp == PATTERN_fallback);
+}
+
+static int
+ascii_compatible_encoding (const char *c)
+{                               /* name is misleading should return 0 if 
characters of encoding cant overlap.
+                                   1 if encoding is stateless
+                                   2 otherwise */
+
+  if (MB_CUR_MAX == 1 || STREQ (c, "UTF-8"))
+    return 0;
+  if (STREQ (c, "BIG5") || STREQ (c, "GB18030"))
+    return 1;
+  return 2;
+}
+
+static void
+strfold (const char *str, char *buf)
+{
+  if (MB_CUR_MAX == 1)
+    {
+      while (*str)
+        *buf++ = tolower (*str++);
+      *buf = 0;
+    }
+  else
+    {
+      int i;
+      int len = 2 * strlen (str) + 1;
+      wchar_t *towc = ALLOCALIMIT (sizeof (wint_t) * (len), 1000);
+      towc[len - 1] = L'\0';
+      mbstowcs (towc, str, len);
+      if (towc[len - 1] != L'\0')
+        {
+          ALLOCAFREE (towc, sizeof (wint_t) * (len), 1000);
+          len = mbstowcs (NULL, str, 0) + 1;
+          towc = ALLOCALIMIT (sizeof (wint_t) * len, 1000);
+          mbstowcs (towc, str, len);
+        }
+      for (i = 0; towc[i] != L'\0'; i++)
+        {
+          towc[i] = towlower (towc[i]);
+        }
+      wcstombs (buf, towc, len);
+      ALLOCAFREE (towc, sizeof (wint_t) * (len), 1000);
+    }
+}
+
+static fnmatch_state *
+initfnmatchstate ()
+{
+  fnmatch_state *st;
+  NOMEMTEST (st = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+  NOMEMTEST (st->states = Calloc (sizeof (struct states)));
+  st->states->next = NULL;
+  NOMEMTEST (st->refcount = Calloc (sizeof (int)));
+  *st->refcount = 1;
+  return st;
+err:
+  if (st)
+    {
+      free (st->states);
+      free (st->refcount);
+    }
+  free (st);
+  return NULL;
+}
+
+static fnmatch_state *
+fnmatch_fallback (const char *str, int flags)
+{
+  fnmatch_state *st;
+  NOMEMTEST (st = initfnmatchstate ());
+  NOMEMTEST (st->start = st->states->p = Calloc (sizeof (fallback_pattern)));
+  st->states->p->fallback.tp = PATTERN_fallback;
+  NOMEMTEST (st->states->p->fallback.ptr = strdup (str));
+  st->states->p->fallback.flags = flags;
+  NOMEMTEST (st->states->p->fallback.prefix = strdup (""));
+  return st;
+err:
+  fnmatch_free (st);
+  return NULL;
+}
+
+static fnmatch_state *
+fold_fallback (fnmatch_state * s)
+{
+  fnmatch_state *st;
+  NOMEMTEST (st = initfnmatchstate ());
+  NOMEMTEST (s);
+  NOMEMTEST (st->start = st->states->p = Calloc (sizeof (fold_pattern)));
+  st->states->p->fold.tp = PATTERN_fold;
+  st->states->p->fold.states = (void *) s;
+  return st;
+err:
+  fnmatch_free (s);
+  fnmatch_free (st);
+  return NULL;
+}
+
+static fnmatch_state *
+checkneeded_fallback (fnmatch_state * s, fnmatch_state * fb)
+{
+  fnmatch_state *st;
+  NOMEMTEST (st = initfnmatchstate ());
+  NOMEMTEST (s);
+  NOMEMTEST (fb);
+  NOMEMTEST (st->start = st->states->p =
+             Calloc (sizeof (checkneeded_pattern)));
+  st->states->p->checkneeded.tp = PATTERN_checkneeded;
+  st->states->p->checkneeded.states = (void *) s;
+  st->states->p->checkneeded.fallback = (void *) fb;
+  return st;
+err:
+  fnmatch_free (s);
+  fnmatch_free (fb);
+  fnmatch_free (st);
+  return NULL;
+}
+
+static int
+parse_bracket (const char *str, patterns * pat, int flags)
+{
+  char *chr, firstbyte;
+  int mlen;
+  const char *s = str;
+  int i;
+  pat->bracket.tp = PATTERN_bracket;
+  if (*s == '!' || (getenv ("POSIXLY_CORRECT") != NULL && *s == '^'))
+    {
+      pat->bracket.flags = BRACKET_INVMATCH;
+      s++;
+    }
+  if (flags & FNM_PATHNAME)
+    pat->bracket.flags |= BRACKET_SLASH;
+  if (!pat->bracket.chars)
+    {
+      if (!(pat->bracket.chars = Calloc (2 * strlen (s) + 2)))
+        return -3;
+    }
+  chr = pat->bracket.chars;
+  do
+    {
+      if (*s == 0 || *s == '-' || (*s == '[' && s[1] == ':'))
+        pat->bracket.flags |= BRACKET_MUSTCHECK | BRACKET_INVMATCH;
+      if (*s == '[' && (s[1] == '=' || s[1] == '.'))
+        return -1;              /* could be multichar sequence */
+      mlen = mblen (s, MB_CUR_MAX);
+      if (mlen < 0)
+        return -3;
+      firstbyte = *s;           /* this is bad hash for multibyte characters.
+                                   We assume that in pathnames is lot of ascii 
*/
+      pat->bracket.hash[firstbyte / 32] |= 1 << (firstbyte % 32);
+      memcpy (chr, s, mlen);
+      s += mlen;
+      chr += mlen + 1;
+    }
+  while (*s && *s != ']');
+  *chr = 0;
+  if (pat->bracket.flags & BRACKET_MUSTCHECK)
+    {                           /* bracket is too complicated to process here
+                                   we replace it with ? and when we match 
entire pattern
+                                   we call fnmatch it wasn't false positive */
+      *pat->bracket.chars = 0;
+      for (i = 0; i < (UCHAR_MAX / 32) + 1; i++)
+        pat->bracket.hash[i] = 0;
+    }
+  return s - str;
+}
+
+#define  NEXTPATTERN(type) pat->chr.tp = PATTERN_##type; pat = (patterns 
*)(((void *) pat)+sizeof (type##_pattern));
+#define  FALLBACK freepatterns(ret); free(ret);
+#define  BRACTEST(x) if ((x) == -3)  {goto err; }
+fnmatch_state *
+fnmatch_compile (const char *str, int flags)
+{
+  const char *s;
+  patterns *pat, *ret = NULL;
+  int i, pass;
+  int size = 0, patsize;
+  int checkneeded = 0;
+  char *codeset = nl_langinfo (CODESET);
+  char *buf;
+  if (ascii_compatible_encoding (codeset) == 1)
+    checkneeded = 1;
+  if (ascii_compatible_encoding (codeset) == 2)
+    {
+      return fnmatch_fallback (str, flags);
+    }
+  if (flags & FNM_CASEFOLD)
+    {
+      NOMEMTEST (buf = Calloc (2 * strlen (str) + 1));
+      strfold (str, buf);
+      fnmatch_state *st =
+        fold_fallback (fnmatch_compile (buf, flags & (~FNM_CASEFOLD)));
+      free (buf);
+      NOMEMTEST (st);
+      return st;
+    }
+
+  ret = (patterns *) Calloc ((strlen (str) + 3) * 2 * MAX_PATTERN_SIZE);
+
+  for (pass = 0; pass < 2; pass++)
+    {
+      /* two passes in first we compute values we use in second to optimize
+       */
+      patsize = size;
+      size = 0;                 /* patsize-size is minimal size of rest of 
string */
+      /* it makes loops tighter and we dont need to check if we went outsize 
string. */
+      pat = ret;
+      if (flags & FNM_PERIOD && *str != '.')
+        {
+        NEXTPATTERN (begindot)}
+      for (s = str; *s;)
+        {
+          if (flags & FNM_EXTMATCH && *(s + 1) == '(' &&
+              (*s == '*' || *s == '+' || *s == '@' || *s == '!'))
+            {
+              FALLBACK;
+              return fnmatch_fallback (str, flags);
+            }
+          switch (*s)
+            {
+            case '*':
+              while (*(s + 1) == '?')
+                {
+                  /* by specification [!]] matches anything except ] */
+                  BRACTEST (parse_bracket ("!", pat, flags));
+                  size++;
+                  pat->bracket.sizerest = patsize - size;
+                  NEXTPATTERN (bracket);
+                  s++;
+                }
+              if (*(s + 1))
+                {
+                  if (pass)
+                    {
+                      /* we will find ocurence of next characters fast. */
+                      patterns *tmppat = pat;
+                      pat->star.sizerest = patsize - size;
+                      NEXTPATTERN (star);
+                      for (i = 0; pat->chr.tp == PATTERN_chr && pat->chr.chr
+                           && i < CHARSPERINT; i++)
+                        {
+                          tmppat->star.chars[i] = pat->chr.chr;
+                          NEXTPATTERN (chr);
+                        }
+                      if (i == 3)
+                        i = 2;
+                      tmppat->star.charno = i;
+                      pat = tmppat;
+                    }
+                  if (flags & FNM_PATHNAME)
+                    {
+                      NEXTPATTERN (starslash);
+                    }
+                  else
+                    {
+                      NEXTPATTERN (star);
+                    }
+                }
+              else
+                {
+                  NEXTPATTERN (endstar);
+                }
+              s++;
+              break;
+            case '?':
+              BRACTEST (parse_bracket ("!", pat, flags));
+              size++;
+              pat->bracket.sizerest = patsize - size;
+              NEXTPATTERN (bracket);
+              s++;
+              break;
+            case '[':
+              {
+                int siz = parse_bracket (s + 1, pat, flags);
+                BRACTEST (siz);
+                if (siz < 0)
+                  {
+                    FALLBACK;
+                    return fnmatch_fallback (str, flags);
+                  }
+                size++;
+                s += siz + 2;
+                pat->bracket.sizerest = patsize - size;
+                if (pat->bracket.flags & BRACKET_MUSTCHECK)
+                  {
+                    checkneeded = 1;
+                  }
+                NEXTPATTERN (bracket);
+              }
+              break;
+            default:
+              if (*s == '\\' && (!(flags & FNM_NOESCAPE)))
+                s++;
+              {
+                int nolp = (*s == '/' && (flags & FNM_PERIOD)
+                            && (flags & FNM_PATHNAME)
+                            && *(s + 1) != '.'), mlen = mblen (s, MB_CUR_MAX);
+                if (mlen < 0)
+                  goto err;
+                for (i = 0; i < mlen; i++, s++)
+                  {
+                    pat->chr.chr = *s;
+                    size++;
+                    NEXTPATTERN (chr);
+                  }
+                if (nolp)
+                  {
+                  NEXTPATTERN (begindot)}
+              }
+              break;
+            }
+        }
+    }
+  if (flags & FNM_LEADING_DIR)
+    {
+      NEXTPATTERN (slashend);
+      NEXTPATTERN (endstar);
+    }
+  else
+    {
+      NEXTPATTERN (end);
+    }
+
+  fnmatch_state *st;
+  NOMEMTEST (st = initfnmatchstate ());
+  st->states->p = ret;
+  st->start = ret;
+  if (checkneeded)
+    st = checkneeded_fallback (st, fnmatch_fallback (str, flags));
+  return st;
+err:
+  freepatterns (ret);
+  return NULL;
+}
+
+#undef NEXTPATTERN
+#define  NEXTPATTERN(type) p = (patterns *)(((void *) p)+sizeof 
(type##_pattern));
+
+static inline int
+fnmatch2_internal (const patterns * p, const char *str, int len)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+int
+fnmatch_exec (const fnmatch_state * p, const char *str)
+{
+  const struct states *s;
+  int len = strlen (str);
+  for (s = p->states; s; s = s->next)
+    {
+      int ret = fnmatch2_internal (s->p, str, len);
+      if (ret <= 0)
+        return ret;
+    }
+  return FNM_NOMATCH;
+}
+
+#define  FNMATCH_PREFIX
+static int
+fnmatch2_prefix_internal (patterns * p, const char *str, int len,
+                          fnmatch_state * ret)
+{
+#include "fnmatchcomp_loop.h"
+}
+
+fnmatch_state *
+fnmatch_prefix (const fnmatch_state * p, const char *str)
+{
+  struct states *s;
+  int len = strlen (str);
+  fnmatch_state *ret;
+  NOMEMTEST (ret = (fnmatch_state *) Calloc (sizeof (fnmatch_state)));
+  memcpy ((void *) ret, (void *) p, sizeof (fnmatch_state));
+  ret->states = NULL;
+  (*ret->refcount)++;
+  for (s = p->states; s; s = s->next)
+    {
+      fnmatch2_prefix_internal (s->p, str, len, ret);
+    }
+  return ret;
+err:
+  return NULL;
+}
+
+static void
+freepatterns (patterns * p)
+{
+  if (!p)
+    return;
+#define  SKIPPATTERN(type) case PATTERN_##type: NEXTPATTERN(type); break;
+  while (p->chr.tp != PATTERN_end && p->chr.tp != PATTERN_endstar)
+    {
+      switch (p->chr.tp)
+        {
+        case PATTERN_chr:
+          if (!p->chr.chr)
+            return;
+          NEXTPATTERN (chr);
+          break;
+          SKIPPATTERN (star);
+          SKIPPATTERN (starslash);
+          SKIPPATTERN (end);
+          SKIPPATTERN (endstar);
+        case PATTERN_bracket:
+          free (p->bracket.chars);
+          NEXTPATTERN (bracket);
+          break;
+          SKIPPATTERN (begindot);
+          SKIPPATTERN (slashend);
+        case PATTERN_fallback:
+          free (p->fallback.ptr);
+          return;
+        case PATTERN_fold:
+          return;
+        case PATTERN_checkneeded:
+          return;
+        }
+    }
+
+}
+
+void
+fnmatch_free (fnmatch_state * s)
+{                               /* fnmatch_free must process incomplete 
fnmatch_state too */
+  if (!s)
+    return;
+  struct states *st, *st2;
+  for (st = s->states; st;)
+    {
+      if (st->p)
+        {
+          switch (st->p->chr.tp)
+            {
+            case PATTERN_fallback:
+              free (st->p->fallback.prefix);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            case PATTERN_fold:
+              fnmatch_free ((fnmatch_state *) st->p->fold.states);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            case PATTERN_checkneeded:
+              fnmatch_free ((fnmatch_state *) st->p->checkneeded.states);
+              fnmatch_free ((fnmatch_state *) st->p->checkneeded.fallback);
+              if (s->start != st->p)
+                free (st->p);
+              break;
+            }
+        }
+      st2 = st->next;
+      free (st);
+      st = st2;
+    }
+  (*s->refcount)--;
+  if (!*s->refcount)
+    {
+      free (s->refcount);
+      freepatterns (s->start);
+      free (s->start);
+    }
+  free (s);
+}
diff --git a/lib/fnmatchcomp.h b/lib/fnmatchcomp.h
new file mode 100644
index 0000000..931c920
--- /dev/null
+++ b/lib/fnmatchcomp.h
@@ -0,0 +1,41 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://     www.gnu.org/licenses/ 
> . */
+
+struct _fnmatch_state;
+typedef struct _fnmatch_state fnmatch_state;
+
+  /* compile pattern */
+fnmatch_state *fnmatch_compile (const char *pattern, int flags);
+
+  /* add prefix to matched strings.
+     suppose you want match foo againist /home/foo, /home/bar/foo,
+     /home/bar/bar
+     fnmatch_state* s = fnmatch_compile("foo", 0);
+     fnmatch_state* home = fnmatch_prefix(s, "/home");
+     fnmatch_state* bar = fnmatch_prefix(home, "/bar");
+     fnmatch_exec(home, "/foo");
+     fnmatch_exec(bar, "/foo");
+     fnmatch_exec(bar, "/bar");
+   */
+fnmatch_state *fnmatch_prefix (const fnmatch_state * pattern,
+                               const char *prefix);
+
+int fnmatch_exec (const fnmatch_state * pattern, const char *string);
+
+void fnmatch_free (fnmatch_state * pattern);
+
+  /* For some encodings we decide just call fnmatch. In that case s you can 
gain some performance by checking fnmatch_fallbacked and if returns 1 call 
fnmatch yourself */
+int fnmatch_fallbacked (fnmatch_state * pattern);
diff --git a/lib/fnmatchcomp_loop.h b/lib/fnmatchcomp_loop.h
new file mode 100644
index 0000000..3bae0ec
--- /dev/null
+++ b/lib/fnmatchcomp_loop.h
@@ -0,0 +1,230 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://     www.gnu.org/licenses/ 
> . */
+
+const char *s = str, *chars;
+int i, buflen;
+char *buf;
+while (1)
+  {
+#ifndef FNMATCH_PREFIX
+    int sizerest, charno, retval;
+#undef ADDSTATE
+#define  ADDSTATE
+#else
+    patterns *pat, *pat2;
+    int _statepresent;
+    struct states *state, *stateiter;
+#undef ADDSTATE
+#define  ADDSTATE _statepresent = 0; \
+  for (stateiter = ret->states; stateiter; stateiter = stateiter->next)  {\
+  if (stateiter->p == p) _statepresent = 1; \
+  } \
+  if (!_statepresent)  {\
+  state = calloc(1, sizeof (struct states)); \
+  state->next = ret->states; \
+  ret->states = state; \
+  state->p = p; \
+  }
+    if (!*s)
+      {
+        ADDSTATE;
+        return 0;
+      }
+#endif
+    switch (p->chr.tp)
+      {
+      case PATTERN_chr:
+        if (__builtin_expect ((*s != p->chr.chr), 1))
+          return FNM_NOMATCH;
+        NEXTPATTERN (chr);
+        s++;
+        break;
+      case PATTERN_starslash:
+#undef SLASHTEST
+#define  SLASHTEST if (str[i] == '/') return FNM_NOMATCH;
+#include "fnmatchcomp_star.h"
+        break;
+      case PATTERN_star:
+#undef SLASHTEST
+#define  SLASHTEST
+#include "fnmatchcomp_star.h"
+        break;
+
+      case PATTERN_end:
+        return (*s) ? FNM_NOMATCH : 0;
+        break;
+      case PATTERN_slashend:
+        if (*s == '/' || *s == 0)
+          {
+            NEXTPATTERN (slashend);
+          }
+        else
+          return FNM_NOMATCH;
+        break;
+      case PATTERN_endstar:
+        ADDSTATE;
+        return 0;
+        break;
+      case PATTERN_bracket:
+        {
+          int mat;
+          int mlen, mbuflen;
+          if (MB_CUR_MAX == 1)
+            {
+              mlen = 1;
+              mat = (p->bracket.hash[*s / 32] & (1 << (*s % 32))) ? 1 : 0;
+            }
+          else
+            {
+              mlen = 0;
+              if (!(p->bracket.hash[*s / 32] & (1 << (*s % 32))))
+                {
+                  mat = 0;
+                  goto match;
+                }
+              mat = 1;
+              mlen = mblen (s, MB_CUR_MAX);
+              if (mlen < 0)
+                return FNM_NOMATCH;
+              chars = p->bracket.chars;
+              while (*chars)
+                {
+                  mbuflen = strlen (chars);
+                  if (!strncmp (chars, s, mlen) && mlen == mbuflen)
+                    goto match;
+                  chars += mbuflen + 1;
+                }
+              mat = 0;
+            }
+        match:
+          if (mat ^ (p->bracket.flags & BRACKET_INVMATCH))
+            {
+              if (!mlen)
+                {
+                  mlen = mblen (s, MB_CUR_MAX);
+                  if (mlen < 0)
+                    return FNM_NOMATCH;
+                }
+              if (p->bracket.flags & BRACKET_SLASH && *s == '/')
+                return FNM_NOMATCH;
+              s += mlen;
+              if (len - (str - s) < p->bracket.sizerest)
+                return FNM_NOMATCH;
+            }
+          else
+            {
+              return FNM_NOMATCH;
+            }
+          NEXTPATTERN (bracket);
+        }
+        break;
+      case PATTERN_begindot:
+        if (*s == '.')
+          return FNM_NOMATCH;
+        NEXTPATTERN (begindot);
+        break;
+      case PATTERN_fallback:
+
+        buflen = strlen (p->fallback.prefix) + len + 1;
+        NOMEMTEST (buf = ALLOCALIMIT (buflen, 1000));
+        strcpy (buf, p->fallback.prefix);
+        strcat (buf, s);
+#ifndef FNMATCH_PREFIX
+        retval = fnmatch (p->fallback.ptr, buf, p->fallback.flags);
+        ALLOCAFREE (buf, buflen, 1000);
+        return retval;
+#else
+        if (!(pat2 = malloc (sizeof (fallback_pattern))))
+          {
+            ALLOCAFREE (buf, buflen, 1000);
+            NOMEMTEST (NULL);
+          }
+        memcpy (pat2, p, sizeof (fallback_pattern));
+        if (!(pat2->fallback.prefix = strdup (buf)))
+          {
+            ALLOCAFREE (buf, buflen, 1000);
+            NOMEMTEST (NULL);
+          }
+        p = (patterns *) pat2;
+        ADDSTATE;
+        return FNM_NOMATCH;
+#endif
+        ALLOCAFREE (buf, buflen, 1000);
+
+      case PATTERN_fold:
+        buflen = 2 * len + 1;
+        NOMEMTEST (buf = (ALLOCALIMIT (buflen, 1000)));
+        strfold (str, buf);
+#ifndef FNMATCH_PREFIX
+        retval = fnmatch_exec ((fnmatch_state *) p->fold.states, buf);
+        ALLOCAFREE (buf, 2 * len + 1, 1000);
+        return retval;
+#else
+        if (!(pat2 = malloc (sizeof (fallback_pattern))))
+          {
+            ALLOCAFREE (buf, buflen, 1000);
+            NOMEMTEST (NULL);
+          }
+        pat2->fold.tp = PATTERN_fold;
+        if (!(pat2->fold.states =
+              (void *) fold_fallback (fnmatch_prefix (p->fold.states, buf))))
+          {
+            free (pat2);
+            ALLOCAFREE (buf, buflen, 1000);
+            NOMEMTEST (NULL);
+          }
+        p = (patterns *) pat2;
+        ADDSTATE;
+        ALLOCAFREE (buf, buflen, 1000);
+        return FNM_NOMATCH;
+#endif
+        break;
+      case PATTERN_checkneeded:
+#ifndef FNMATCH_PREFIX
+        {
+          int ret = fnmatch_exec ((fnmatch_state *) p->checkneeded.states, s);
+          if (ret > 0)
+            return FNM_NOMATCH;
+          if (ret < 0)
+            return ret;
+          return fnmatch_exec ((fnmatch_state *) p->checkneeded.fallback, s);
+        }
+#else
+        NOMEMTEST (pat2 = malloc (sizeof (checkneeded_pattern)));
+        pat2->checkneeded.tp = PATTERN_checkneeded;
+        if (!(pat2->checkneeded.states = (void *)
+              fnmatch_prefix ((fnmatch_state *) p->checkneeded.states, s)))
+          {
+            free (pat2);
+            NOMEMTEST (NULL);
+          }
+        if (!(pat2->checkneeded.fallback =
+              (void *) fnmatch_prefix ((fnmatch_state *) p->
+                                       checkneeded.fallback, s)))
+          {
+            fnmatch_free ((fnmatch_state *) pat2->checkneeded.states);
+            free (pat2);
+            NOMEMTEST (NULL);
+          }
+        ADDSTATE;
+        return FNM_NOMATCH;
+#endif
+        break;
+      }
+  }
+
+err:
+return -1;
diff --git a/lib/fnmatchcomp_star.h b/lib/fnmatchcomp_star.h
new file mode 100644
index 0000000..42a601d
--- /dev/null
+++ b/lib/fnmatchcomp_star.h
@@ -0,0 +1,66 @@
+  /* Copyright (C) 2009
+     Ondrej Bilka < address@hidden >
+
+     This program is free software: you can redistribute it and/or modif y
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation; either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program. If not, see < http://     www.gnu.org/licenses/ 
> . */
+
+#ifndef FNMATCH_PREFIX
+sizerest = p->star.sizerest;
+chars = p->star.chars;
+charno = p->star.charno;
+NEXTPATTERN (star);
+for (i = 0; i < charno; i++)
+  {
+    NEXTPATTERN (chr);
+  }
+
+switch (charno)                 /* we check charno character at once by 
converting to
+                                   coresponding integer type */
+  {
+  case 0:
+    for (i = s - str; i < len - sizerest; i++)
+      {
+        if (!fnmatch2_internal (p, str + i, len - i))
+          return 0;
+        SLASHTEST;
+      }
+    break;
+#define  CASEN(type, no) \
+  case no:\
+  for (i = s-str; i <= len-sizerest; i++)  {\
+  if (__builtin_expect((*((type*)(str+i)) == *(type *) chars), 0)\
+  && !fnmatch2_internal(p, str+i+no, len-i-no)) return 0; \
+  SLASHTEST \
+  } \
+  break;
+    CASEN (char, 1);
+    CASEN (UINT2, 2);
+  case 3:
+    CASEN (UINT4, 4);
+  }
+
+return FNM_NOMATCH;
+#else
+pat = p;
+NEXTPATTERN (star);
+for (i = s - str; i < len; i++)
+  {
+    fnmatch2_prefix_internal (p, str + i, len - i, ret);
+    SLASHTEST;
+  }
+
+p = pat;
+ADDSTATE;
+return FNM_NOMATCH;
+#endif
+break;
diff --git a/modules/fnmatchcomp b/modules/fnmatchcomp
new file mode 100644
index 0000000..c89b1e7
--- /dev/null
+++ b/modules/fnmatchcomp
@@ -0,0 +1,34 @@
+Description:
+ Allows you to compile fnmatch pattern. You can add to this pattern prefix
+ which will be added as prefix of any string you will match with this pattern
+
+Files:
+lib/fnmatchcomp.c
+lib/fnmatchcomp.h
+lib/fnmatchcomp_loop.h
+lib/fnmatchcomp_star.h
+
+Depends-on:
+fnmatch
+extensions
+alloca
+stdbool
+wchar
+wctype
+memchr
+memcmp
+mbsrtowcs
+mbsinit
+
+configure.ac:
+
+Makefile.am:
+
+Include:
+<fnmatchcomp.h>
+
+License:
+LGPLv2+
+
+Maintainer:
+Ondrej Bilka
-- 

Incorrect time synchronization

Attachment: bench
Description: Text document

Attachment: Makefile
Description: Text document

Attachment: ftest.c
Description: Text Data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]