[Nano-devel] [PATCH 2/2] search: make the \b and \B anchors work correct

nano-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Nano-devel] [PATCH 2/2] search: make the \b and \B anchors work correct

From:	Benno Schulenberg
Subject:	[Nano-devel] [PATCH 2/2] search: make the \b and \B anchors work correctly in both directions
Date:	Sun, 15 Jan 2017 20:24:28 +0100

That is: remove the special treatment of BOW anchors, and instead make
regexes match against the whole line instead of against an artificially
shortened one, because the latter method creates ghost matches: matches
at the starting point of the search that aren't really matches when seen
in the context of the whole line.

This fixes https://savannah.gnu.org/bugs/?50030.
---
 src/search.c | 24 +-----------------------
 src/utils.c  | 50 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/src/search.c b/src/search.c
index af36de2b..5c212c7f 100644
--- a/src/search.c
+++ b/src/search.c
@@ -38,8 +38,6 @@ static bool history_changed = FALSE;
 #ifdef HAVE_REGEX_H
 static bool regexp_compiled = FALSE;
        /* Have we compiled any regular expressions? */
-static bool bow_anchored = FALSE;
-       /* Whether a regex starts with a beginning-of-word anchor. */
 
 /* Compile the given regular expression and store it in search_regexp.
  * Return TRUE if the expression is valid, and FALSE otherwise. */
@@ -62,10 +60,6 @@ bool regexp_init(const char *regexp)
 
     regexp_compiled = TRUE;
 
-    /* Remember whether the regex starts with a beginning-of-word anchor. */
-    bow_anchored = (strncmp(regexp, "\\<", 2) == 0 ||
-                       strncmp(regexp, "\\b", 2) == 0);
-
     return TRUE;
 }
 
@@ -302,24 +296,8 @@ int findnextstr(const char *needle, bool whole_word_only, 
size_t *match_len,
        if (found != NULL) {
 #ifdef HAVE_REGEX_H
            /* When doing a regex search, compute the length of the match. */
-           if (ISSET(USE_REGEXP)) {
+           if (ISSET(USE_REGEXP))
                found_len = regmatches[0].rm_eo - regmatches[0].rm_so;
-
-               /* If the regex starts with a BOW anchor, check that the found
-                * match actually is the start of a word.  If not, continue. */
-               if (bow_anchored && found != line->data) {
-                   size_t before = move_mbleft(line->data, found - line->data);
-
-                   /* If a word char is before the match, skip this match. */
-                   if (is_word_mbchar(line->data + before, FALSE)) {
-                       if (ISSET(BACKWARDS_SEARCH))
-                           from = line->data + before;
-                       else
-                           from = found + move_mbright(found, 0);
-                       continue;
-                   }
-               }
-           }
 #endif
 #ifndef DISABLE_SPELLER
            /* When we're spell checking, a match should be a separate word;
diff --git a/src/utils.c b/src/utils.c
index bfd9d13c..0029be6a 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -327,29 +327,39 @@ const char *strstrwrapper(const char *haystack, const 
char *needle,
 #ifdef HAVE_REGEX_H
     if (ISSET(USE_REGEXP)) {
        if (ISSET(BACKWARDS_SEARCH)) {
-           if (regexec(&search_regexp, haystack, 1, regmatches, 0) == 0 &&
-                       haystack + regmatches[0].rm_so <= start) {
-               const char *retval = haystack + regmatches[0].rm_so;
-
-               /* Search forward until there are no more matches. */
-               while (regexec(&search_regexp, retval + 1, 1,
-                       regmatches, REG_NOTBOL) == 0 &&
-                       retval + regmatches[0].rm_so + 1 <= start)
-                   retval += regmatches[0].rm_so + 1;
-               /* Finally, put the subexpression matches in global
-                * variable regmatches.  The REG_NOTBOL flag doesn't
-                * matter now. */
-               regexec(&search_regexp, retval, 10, regmatches, 0);
-               return retval;
+           size_t lasthit;
+
+           if (regexec(&search_regexp, haystack, 1, regmatches, 0) != 0)
+               return NULL;
+
+           /* A result beyond the starting point also means: no match. */
+           if (haystack + regmatches[0].rm_so > start)
+               return NULL;
+
+           lasthit = regmatches[0].rm_so;
+
+           /* Move the search range forward until there is no more match;
+            * then the last match we found is the first match backwards. */
+           while (haystack + regmatches[0].rm_so < start) {
+               lasthit = regmatches[0].rm_so;
+               regmatches[0].rm_so = move_mbright(haystack, lasthit);
+               regmatches[0].rm_eo = start - haystack;
+               if (regexec(&search_regexp, haystack, 10, regmatches,
+                                       REG_STARTEND) == REG_NOMATCH)
+                   break;
            }
-       } else if (regexec(&search_regexp, start, 10, regmatches,
-                       (start > haystack) ? REG_NOTBOL : 0) == 0) {
-           const char *retval = start + regmatches[0].rm_so;
 
-           regexec(&search_regexp, retval, 10, regmatches, 0);
-           return retval;
+           return haystack + lasthit;
        }
-       return NULL;
+
+       /* Do a forward regex search from the starting point. */
+       regmatches[0].rm_so = start - haystack;
+       regmatches[0].rm_eo = strlen(haystack);
+       if (regexec(&search_regexp, haystack, 10, regmatches,
+                               REG_STARTEND) == 0)
+           return haystack + regmatches[0].rm_so;
+       else
+           return NULL;
     }
 #endif /* HAVE_REGEX_H */
     if (ISSET(CASE_SENSITIVE)) {
-- 
2.11.0

[Prev in Thread]

Current Thread

[Next in Thread]

[Nano-devel] [PATCH 1/2] search: begin from where we are, to be able to find the first \B, Benno Schulenberg, 2017/01/15
- [Nano-devel] [PATCH 2/2] search: make the \b and \B anchors work correctly in both directions, Benno Schulenberg <=

Prev by Date: [Nano-devel] [PATCH 1/2] search: begin from where we are, to be able to find the first \B
Next by Date: Re: [Nano-devel] can we use REG_STARTEND?
Previous by thread: [Nano-devel] [PATCH 1/2] search: begin from where we are, to be able to find the first \B
Next by thread: [Nano-devel] unable to install - nano-2.7.4.tar.gz in solaris 11.3
Index(es):
- Date
- Thread