[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.20-38-g9ea9254
From: |
Paul Eggert |
Subject: |
grep branch, master, updated. v2.20-38-g9ea9254 |
Date: |
Wed, 17 Sep 2014 01:24:21 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".
The branch, master has been updated
via 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7 (commit)
via af3572e2651379566441c9d718dec7e809d3810d (commit)
via a19e4898d4fa7808fbccb0c9b0ab2cd77e3abcbe (commit)
via b5aa641ae5d2f4eeb05f0a09de9d178f8b678cb5 (commit)
via dfff75a432eec187e4c5f2c6ce99ebdadb1089c8 (commit)
via 6e319a818ed7b15b452ed2baab2f6a38d42fd1fe (commit)
via cd36abd46c5e0768606979ea75a51732062f5624 (commit)
via 564a06e761ac06c4a0bcd91ce5060118d35bf912 (commit)
via 55a0c73874bcfaa73948fd034fb34e117266d623 (commit)
via 77262184e3f573206a88374a8361cf3363122fb0 (commit)
from 845b366bef3596b33194d89a22d47e64680293b2 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=af3572e2651379566441c9d718dec7e809d3810d
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=a19e4898d4fa7808fbccb0c9b0ab2cd77e3abcbe
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=b5aa641ae5d2f4eeb05f0a09de9d178f8b678cb5
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=dfff75a432eec187e4c5f2c6ce99ebdadb1089c8
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=6e319a818ed7b15b452ed2baab2f6a38d42fd1fe
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=cd36abd46c5e0768606979ea75a51732062f5624
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=564a06e761ac06c4a0bcd91ce5060118d35bf912
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=55a0c73874bcfaa73948fd034fb34e117266d623
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=77262184e3f573206a88374a8361cf3363122fb0
commit 9ea9254ea58456b84ed2f0c1481ca91cdd325bf7
Author: Paul Eggert <address@hidden>
Date: Mon Sep 15 18:33:19 2014 -0700
grep: fix -P speedup bug with empty match
* src/pcresearch.c (NSUB): New top-level constant, replacing
'nsub' within Pexecute.
(Pcompile, Pexecute): Use it.
(Pexecute): Don't assume sub[1] is zero after a PCRE_ERROR_BADUTF8
match failure.
* tests/pcre-invalid-utf8-input: Test for this bug.
diff --git a/src/pcresearch.c b/src/pcresearch.c
index ce65758..c41f7ef 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -44,6 +44,10 @@ static pcre_jit_stack *jit_stack;
string matches when that flag is used. */
static int empty_match[2];
+/* This must be at least 2; everything after that is for performance
+ in pcre_exec. */
+enum { NSUB = 300 };
+
void
Pcompile (char const *pattern, size_t size)
{
@@ -132,8 +136,10 @@ Pcompile (char const *pattern, size_t size)
# endif
free (re);
- empty_match[false] = pcre_exec (cre, extra, "", 0, 0, PCRE_NOTBOL, NULL, 0);
- empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, NULL, 0);
+ int sub[NSUB];
+ empty_match[false] = pcre_exec (cre, extra, "", 0, 0,
+ PCRE_NOTBOL, sub, NSUB);
+ empty_match[true] = pcre_exec (cre, extra, "", 0, 0, 0, sub, NSUB);
#endif /* HAVE_LIBPCRE */
}
@@ -146,11 +152,7 @@ Pexecute (char const *buf, size_t size, size_t *match_size,
error (EXIT_TROUBLE, 0, _("internal error"));
return -1;
#else
- /* This array must have at least two elements; everything after that
- is just for performance improvement in pcre_exec. */
- enum { nsub = 300 };
- int sub[nsub];
-
+ int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
@@ -174,15 +176,19 @@ Pexecute (char const *buf, size_t size, size_t
*match_size,
{
int options = bol ? 0 : PCRE_NOTBOL;
int valid_bytes;
- e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, nsub);
+ e = pcre_exec (cre, extra, p, line_end - p, 0, options, sub, NSUB);
if (e != PCRE_ERROR_BADUTF8)
break;
valid_bytes = sub[0];
- e = (valid_bytes == 0
- ? empty_match[bol]
- : pcre_exec (cre, extra, p, valid_bytes, 0,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
- sub, nsub));
+ if (valid_bytes == 0)
+ {
+ sub[1] = 0;
+ e = empty_match[bol];
+ }
+ else
+ e = pcre_exec (cre, extra, p, valid_bytes, 0,
+ options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL,
+ sub, NSUB);
if (e != PCRE_ERROR_NOMATCH)
break;
p += valid_bytes + 1;
diff --git a/tests/pcre-invalid-utf8-input b/tests/pcre-invalid-utf8-input
index 9da4b18..78bd1cf 100755
--- a/tests/pcre-invalid-utf8-input
+++ b/tests/pcre-invalid-utf8-input
@@ -21,4 +21,9 @@ test $? -eq 0 || fail=1
LC_ALL=en_US.UTF-8 grep -P 'k$' in
test $? -eq 1 || fail=1
+echo k >exp
+
+LC_ALL=en_US.UTF-8 grep -aoP 'k*' in >out || fail=1
+compare exp out || fail=1
+
Exit $fail
-----------------------------------------------------------------------
Summary of changes:
NEWS | 4 +
bootstrap.conf | 1 +
cfg.mk | 4 +
configure.ac | 5 +
doc/grep.texi | 3 +-
src/grep.c | 398 ++++++++++++++++++++-------------------
src/grep.h | 8 +-
src/pcresearch.c | 51 ++++--
src/search.h | 19 ++
src/searchutils.c | 29 ++--
src/system.h | 9 +-
tests/backref-multibyte-slow | 2 +-
tests/high-bit-range | 2 +-
tests/invalid-multibyte-infloop | 14 ++-
tests/pcre-invalid-utf8-input | 5 +
15 files changed, 323 insertions(+), 231 deletions(-)
hooks/post-receive
--
grep
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- grep branch, master, updated. v2.20-38-g9ea9254,
Paul Eggert <=