From 6e4aa5ddf0f81cfd86303b958d3c0f93c350a028 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Thu, 26 Nov 2015 10:17:47 +0900 Subject: [PATCH] grep -P / grep -Pc consistent results If first 32768 bytes of a file are correct encoding, grep -P marks with not TEXTBIN_TEXT but TEXTBIN_UNKNOWN, and if grep found first match, marks with TEXTBIN_TEXT. However, grep -P -c does not do last behavior. Reported by Jaroslav Skarvada in http://debbugs.gnu.org/22028 * src/grep.c (grep): Fix this. * tests/count-for-binary: Add new test. * tests/Makefile.am: Add test for this. * NEWS: Mention it. --- NEWS | 4 ++++ src/grep.c | 14 +++++++------- tests/Makefile.am | 1 + tests/pcre-count | 23 +++++++++++++++++++++++ 4 files changed, 35 insertions(+), 7 deletions(-) create mode 100755 tests/pcre-count diff --git a/NEWS b/NEWS index ac632d7..f498a5b 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Buf fixes + + Now grep -P / grep -Pc are consistent results. + [bug introduced in grep-2.21] * Noteworthy changes in release 2.22 (2015-11-01) [stable] diff --git a/src/grep.c b/src/grep.c index 2c5e09a..cd1826c 100644 --- a/src/grep.c +++ b/src/grep.c @@ -1415,13 +1415,13 @@ grep (int fd, struct stat const *st) } /* Detect whether leading context is adjacent to previous output. */ - if (lastout) - { - if (textbin == TEXTBIN_UNKNOWN) - textbin = TEXTBIN_TEXT; - if (beg != lastout) - lastout = 0; - } + if (beg != lastout) + lastout = 0; + + /* If the file's textbin has not been determined yet, assume + it's text if has found any matched line already. */ + if (textbin == TEXTBIN_UNKNOWN && nlines) + textbin = TEXTBIN_TEXT; /* Handle some details and read more data to scan. */ save = residue + lim - beg; diff --git a/tests/Makefile.am b/tests/Makefile.am index d379821..2865871 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -105,6 +105,7 @@ TESTS = \ pcre \ pcre-abort \ pcre-context \ + pcre-count \ pcre-infloop \ pcre-invalid-utf8-input \ pcre-jitstack \ diff --git a/tests/pcre-count b/tests/pcre-count new file mode 100755 index 0000000..78e1c7c --- /dev/null +++ b/tests/pcre-count @@ -0,0 +1,23 @@ +#! /bin/sh +# grep -P / grep -Pc are inconsistent results +# This bug affected grep versions 2.21 through 2.22. +# +# Copyright (C) 2015 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_pcre_ + +fail=0 + +printf 'a\n%032768d\nb\x0\n%032768d\na\n' 0 0 > in + +LC_ALL=C grep -P 'a' in | wc -l > exp + +LC_ALL=C grep -Pc 'a' in > out || fail=1 +compare exp out || fail=1 + +Exit $fail -- 2.4.6