grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-9-g86f6837


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-9-g86f6837
Date: Sat, 01 Mar 2014 06:24:31 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  86f6837e25adf44eeb362e02886854211bb61189 (commit)
       via  46c04437e2a929e4f97454ad0c097f9e1a39f2e3 (commit)
      from  d75266b4fd3dbfee88d81d3e87a2a9366c5d5cbb (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=86f6837e25adf44eeb362e02886854211bb61189


commit 86f6837e25adf44eeb362e02886854211bb61189
Author: Norihiro Tanaka <address@hidden>
Date:   Sat Mar 1 00:07:16 2014 +0900

    grep: remove trivial_case_ignore
    
    * src/main.c (trivial_case_ignore): Remove.
    (main): Remove its use; this optimization is no longer needed.

diff --git a/src/main.c b/src/main.c
index 3a53ee4..cf59e0e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1877,79 +1877,6 @@ parse_grep_colors (void)
    (*(s) = wctob ((wint_t) (wc)), 1) : \
    wcrtomb ((s), (wc), (ps)))
 
-/* If the newline-separated regular expressions, KEYS (with length, LEN
-   and no trailing NUL byte), are amenable to transformation into
-   otherwise equivalent case-ignoring ones, perform the transformation,
-   put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
-   and return true.  Otherwise, return false.  */
-static bool
-trivial_case_ignore (size_t len, char const *keys,
-                     size_t *new_len, char **new_keys)
-{
-  /* Perform this translation only for UTF-8.  Otherwise, this would induce
-     a 100-200x performance penalty for non-UTF8 multibyte locales.  */
-  if ( ! using_utf8 ())
-    return false;
-
-  /* FIXME: consider removing the following restriction:
-     Reject if KEYS contain ASCII '\\' or '['.  */
-  if (memchr (keys, '\\', len) || memchr (keys, '[', len))
-    return false;
-
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
-     other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
-     maps to [BC], which requires MB_CUR_MAX + 3 bytes.   */
-  *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
-  char *p = *new_keys;
-
-  mbstate_t mb_state;
-  memset (&mb_state, 0, sizeof mb_state);
-  while (len)
-    {
-      wchar_t wc;
-      int n = MBRTOWC (&wc, keys, len, &mb_state);
-
-      /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if (n <= 0)
-        {
-        skip_case_ignore_optimization:
-          free (*new_keys);
-          return false;
-        }
-
-      char const *orig = keys;
-      keys += n;
-      len -= n;
-
-      if (!iswalpha (wc))
-        {
-          memcpy (p, orig, n);
-          p += n;
-        }
-      else
-        {
-          *p++ = '[';
-          memcpy (p, orig, n);
-          p += n;
-
-          wchar_t wc2 = iswupper (wc) ? towlower (wc) : towupper (wc);
-          char buf[MB_CUR_MAX];
-          int n2 = WCRTOMB (buf, wc2, &mb_state);
-          if (n2 <= 0)
-            goto skip_case_ignore_optimization;
-          assert (n2 <= MB_CUR_MAX);
-          memcpy (p, buf, n2);
-          p += n2;
-
-          *p++ = ']';
-        }
-    }
-
-  *new_len = p - *new_keys;
-
-  return true;
-}
-
 int
 main (int argc, char **argv)
 {
@@ -2344,35 +2271,6 @@ main (int argc, char **argv)
   else
     usage (EXIT_TROUBLE);
 
-  /* As currently implemented, case-insensitive matching is expensive in
-     multi-byte locales because of a few outlier locales in which some
-     characters change size when converted to upper or lower case.  To
-     accommodate those, we revert to searching the input one line at a
-     time, rather than using the much more efficient buffer search.
-     However, if we have a regular expression, /foo/i, we can convert
-     it to an equivalent case-insensitive /[fF][oO][oO]/, and thus
-     avoid the expensive read-and-process-a-line-at-a-time requirement.
-     Optimize-away the "-i" option, when possible, converting each
-     candidate alpha, C, in the regexp to [Cc].  */
-  if (match_icase)
-    {
-      size_t new_keycc;
-      char *new_keys;
-      /* It is not possible with -F, not useful with -P (pcre) and there is no
-         point when there is no regexp.  It also depends on which constructs
-         appear in the regexp.  See trivial_case_ignore for those details.  */
-      if (keycc
-          && ! (matcher
-                && (STREQ (matcher, "fgrep") || STREQ (matcher, "pcre")))
-          && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys))
-        {
-          match_icase = 0;
-          free (keys);
-          keys = new_keys;
-          keycc = new_keycc;
-        }
-    }
-
 #if MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     build_mbclen_cache ();

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=46c04437e2a929e4f97454ad0c097f9e1a39f2e3


commit 86f6837e25adf44eeb362e02886854211bb61189
Author: Norihiro Tanaka <address@hidden>
Date:   Sat Mar 1 00:07:16 2014 +0900

    grep: remove trivial_case_ignore
    
    * src/main.c (trivial_case_ignore): Remove.
    (main): Remove its use; this optimization is no longer needed.

diff --git a/src/main.c b/src/main.c
index 3a53ee4..cf59e0e 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1877,79 +1877,6 @@ parse_grep_colors (void)
    (*(s) = wctob ((wint_t) (wc)), 1) : \
    wcrtomb ((s), (wc), (ps)))
 
-/* If the newline-separated regular expressions, KEYS (with length, LEN
-   and no trailing NUL byte), are amenable to transformation into
-   otherwise equivalent case-ignoring ones, perform the transformation,
-   put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
-   and return true.  Otherwise, return false.  */
-static bool
-trivial_case_ignore (size_t len, char const *keys,
-                     size_t *new_len, char **new_keys)
-{
-  /* Perform this translation only for UTF-8.  Otherwise, this would induce
-     a 100-200x performance penalty for non-UTF8 multibyte locales.  */
-  if ( ! using_utf8 ())
-    return false;
-
-  /* FIXME: consider removing the following restriction:
-     Reject if KEYS contain ASCII '\\' or '['.  */
-  if (memchr (keys, '\\', len) || memchr (keys, '[', len))
-    return false;
-
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
-     other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
-     maps to [BC], which requires MB_CUR_MAX + 3 bytes.   */
-  *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
-  char *p = *new_keys;
-
-  mbstate_t mb_state;
-  memset (&mb_state, 0, sizeof mb_state);
-  while (len)
-    {
-      wchar_t wc;
-      int n = MBRTOWC (&wc, keys, len, &mb_state);
-
-      /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if (n <= 0)
-        {
-        skip_case_ignore_optimization:
-          free (*new_keys);
-          return false;
-        }
-
-      char const *orig = keys;
-      keys += n;
-      len -= n;
-
-      if (!iswalpha (wc))
-        {
-          memcpy (p, orig, n);
-          p += n;
-        }
-      else
-        {
-          *p++ = '[';
-          memcpy (p, orig, n);
-          p += n;
-
-          wchar_t wc2 = iswupper (wc) ? towlower (wc) : towupper (wc);
-          char buf[MB_CUR_MAX];
-          int n2 = WCRTOMB (buf, wc2, &mb_state);
-          if (n2 <= 0)
-            goto skip_case_ignore_optimization;
-          assert (n2 <= MB_CUR_MAX);
-          memcpy (p, buf, n2);
-          p += n2;
-
-          *p++ = ']';
-        }
-    }
-
-  *new_len = p - *new_keys;
-
-  return true;
-}
-
 int
 main (int argc, char **argv)
 {
@@ -2344,35 +2271,6 @@ main (int argc, char **argv)
   else
     usage (EXIT_TROUBLE);
 
-  /* As currently implemented, case-insensitive matching is expensive in
-     multi-byte locales because of a few outlier locales in which some
-     characters change size when converted to upper or lower case.  To
-     accommodate those, we revert to searching the input one line at a
-     time, rather than using the much more efficient buffer search.
-     However, if we have a regular expression, /foo/i, we can convert
-     it to an equivalent case-insensitive /[fF][oO][oO]/, and thus
-     avoid the expensive read-and-process-a-line-at-a-time requirement.
-     Optimize-away the "-i" option, when possible, converting each
-     candidate alpha, C, in the regexp to [Cc].  */
-  if (match_icase)
-    {
-      size_t new_keycc;
-      char *new_keys;
-      /* It is not possible with -F, not useful with -P (pcre) and there is no
-         point when there is no regexp.  It also depends on which constructs
-         appear in the regexp.  See trivial_case_ignore for those details.  */
-      if (keycc
-          && ! (matcher
-                && (STREQ (matcher, "fgrep") || STREQ (matcher, "pcre")))
-          && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys))
-        {
-          match_icase = 0;
-          free (keys);
-          keys = new_keys;
-          keycc = new_keycc;
-        }
-    }
-
 #if MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     build_mbclen_cache ();

-----------------------------------------------------------------------

Summary of changes:
 src/main.c |  110 +++---------------------------------------------------------
 1 files changed, 5 insertions(+), 105 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]