bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#5393: control message for bug 5393


From: Lars Ingebrigtsen
Subject: bug#5393: control message for bug 5393
Date: Mon, 29 Feb 2016 13:30:07 +1100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/25.1.50 (gnu/linux)

Stephen Berman <stephen.berman@gmx.net> writes:

> I think it's here:
>
> http://lists.gnu.org/archive/html/emacs-devel/2009-06/msg00094.html

I have included the patch below for easier reading.

Lookahead and lookbehind are useful, I guess, but the regex code in
Emacs is a mystery to me, so I can't really judge the patch.

Index: src/regex.c
===================================================================
RCS file: /sources/emacs/emacs/src/regex.c,v
retrieving revision 1.236
diff -u -r1.236 regex.c
--- src/regex.c 8 Jan 2009 03:15:54 -0000       1.236
+++ src/regex.c 3 Jun 2009 22:35:17 -0000
@@ -735,7 +735,14 @@
   syntaxspec,
 
        /* Matches any character whose syntax is not that specified.  */
-  notsyntaxspec
+  notsyntaxspec,
+
+  lookahead,
+  lookahead_not,
+  lookbehind,
+  lookbehind_not,
+  lookaround_succeed,
+  lookaround_fail
 
 #ifdef emacs
   ,before_dot, /* Succeeds if before point.  */
@@ -1033,6 +1040,36 @@
          fprintf (stderr, "/stop_memory/%d", *p++);
          break;
 
+        case lookahead:
+          extract_number_and_incr (&mcnt, &p);
+          fprintf (stderr, "/lookahead/%d", mcnt);
+          break;
+
+        case lookahead_not:
+          extract_number_and_incr (&mcnt, &p);
+          fprintf (stderr, "/lookahead_not/%d", mcnt);
+          break;
+
+        case lookbehind:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+          fprintf (stderr, "/lookbehind/%d/%d", mcnt, mcnt2);
+          break;
+
+        case lookbehind_not:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+          fprintf (stderr, "/lookbehind_not/%d/%d", mcnt, mcnt2);
+          break;
+
+        case lookaround_succeed:
+         fprintf (stderr, "/lookaround_succeed");
+          break;
+
+        case lookaround_fail:
+          fprintf (stderr, "/lookaround_fail");
+          break;
+            
        case duplicate:
          fprintf (stderr, "/duplicate/%d", *p++);
          break;
@@ -1600,11 +1637,17 @@
     }                                                                  \
   else                                                                 \
     {                                                                  \
-      regend[reg] = POP_FAILURE_POINTER ();                            \
-      regstart[reg] = POP_FAILURE_POINTER ();                          \
-      DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",           \
-                   reg, regstart[reg], regend[reg]);                   \
-    }                                                                  \
+      re_char *start, *end;                                             \
+      end = POP_FAILURE_POINTER ();                                     \
+      start = POP_FAILURE_POINTER ();                                   \
+      if (!discard_saved_regs)                                          \
+        {                                                               \
+          regstart[reg] = start;                                        \
+          regend[reg] = end;                                            \
+          DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",        \
+                        reg, regstart[reg], regend[reg]);               \
+        }                                                               \
+    }                                                                   \
 } while (0)
 
 /* Check that we are not stuck in an infinite loop.  */
@@ -1702,7 +1745,7 @@
   while (fail_stack.frame < fail_stack.avail)                          \
     POP_FAILURE_REG_OR_COUNT ();                                       \
                                                                        \
-  pat = POP_FAILURE_POINTER ();                                \
+  pat = POP_FAILURE_POINTER ();                                         \
   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);                                
\
   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                      \
                                                                        \
@@ -1724,6 +1767,29 @@
 } while (0) /* POP_FAILURE_POINT */
 
 
+#define FINISH_LOOKAROUND()                                     \
+  do {                                                          \
+    re_char *str, *pat;                                         \
+    re_opcode_t op;                                             \
+    discard_saved_regs = 1;                                     \
+    while (!FAIL_STACK_EMPTY ())                                \
+      {                                                         \
+        POP_FAILURE_POINT (str, pat);                           \
+        op = (re_opcode_t) *pat;                                \
+        if (op == lookahead                                     \
+            || op == lookahead_not                              \
+            || op == lookbehind                                 \
+            || op == lookbehind_not)                            \
+          {                                                     \
+            d = str;                                            \
+            dend = ((d >= string1 && d <= end1)                 \
+                    ? end_match_1 : end_match_2);               \
+            break;                                              \
+          }                                                     \
+      }                                                         \
+    discard_saved_regs = 0;                                     \
+  } while (0);
+

 /* Registers are set to a sentinel when they haven't yet matched.  */
 #define REG_UNSET(e) ((e) == NULL)
@@ -1922,6 +1988,7 @@
   pattern_offset_t fixup_alt_jump;
   pattern_offset_t laststart_offset;
   regnum_t regnum;
+  int lookaround;
 } compile_stack_elt_t;
 
 
@@ -2522,6 +2589,8 @@
                                                 compile_stack,
                                                 regnum_t regnum));
 
+static int exact_chars_in_pattern_buffer _RE_ARGS ((struct re_pattern_buffer 
*bufp, re_char *p, re_char *pend));
+
 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
    Returns one of error codes defined in `regex.h', or zero for success.
 
@@ -3261,6 +3330,7 @@
            handle_open:
              {
                int shy = 0;
+                int lookaround = 0;
                regnum_t regnum = 0;
                if (p+1 < pend)
                  {
@@ -3282,6 +3352,27 @@
                              case '1': case '2': case '3': case '4':
                              case '5': case '6': case '7': case '8': case '9':
                                regnum = 10*regnum + (c - '0'); break;
+                              case '=':
+                                /* Positive lookahead assertion.  */
+                                shy = lookaround = 1;
+                                break;
+                              case '!':
+                                /* Negative lookahead assertion.  */
+                                shy = lookaround = 2;
+                                break;
+                              case '<':
+                                {
+                                  PATFETCH (c);
+                                  if (c == '=')
+                                    /* Positive lookbehind assertion.  */
+                                    shy = lookaround = -1;
+                                  else if (c == '!')
+                                    /* Negative lookbehind assertion.  */
+                                    shy = lookaround = -2;
+                                  else
+                                    FREE_STACK_RETURN (REG_BADPAT);
+                                }
+                                break;
                              default:
                                /* Only (?:...) is supported right now. */
                                FREE_STACK_RETURN (REG_BADPAT);
@@ -3328,6 +3419,7 @@
                  = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
                COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
                COMPILE_STACK_TOP.regnum = regnum;
+                COMPILE_STACK_TOP.lookaround = lookaround;
 
                /* Do not push a start_memory for groups beyond the last one
                   we can represent in the compiled pattern.  */
@@ -3377,6 +3469,7 @@
                   later groups should continue to be numbered higher,
                   as in `(ab)c(de)' -- the second group is #2.  */
                regnum_t regnum;
+                int lookaround;
 
                compile_stack.avail--;
                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
@@ -3389,13 +3482,40 @@
                /* If we've reached MAX_REGNUM groups, then this open
                   won't actually generate any code, so we'll have to
                   clear pending_exact explicitly.  */
+                lookaround = COMPILE_STACK_TOP.lookaround;
                pending_exact = 0;
 
                /* We're at the end of the group, so now we know how many
                   groups were inside this one.  */
                if (regnum <= MAX_REGNUM && regnum > 0)
                  BUF_PUSH_2 (stop_memory, regnum);
-             }
+                else if (lookaround)
+                  {
+                    if (lookaround > 0)
+                      {
+                        /* Positive/negative lookahead assertion.  */
+                        GET_BUFFER_SPACE (3);
+                        INSERT_JUMP (lookaround == 1 ? lookahead : 
lookahead_not, laststart, b + 4);
+                        b += 3;
+                      }
+                    else
+                      {
+                        /* Positive/negative lookbehind assertion.  */
+                        int count = exact_chars_in_pattern_buffer (bufp, 
laststart, b);
+                        if (count == -1) /* variable length */
+                          FREE_STACK_RETURN (REG_BADPAT);
+
+                        GET_BUFFER_SPACE (5);
+                        INSERT_JUMP2 (lookaround == -1 ? lookbehind : 
lookbehind_not, laststart, b + 6, count);
+                        b += 5;
+                      }
+                    
+                    /* Negative form.  */
+                    if (lookaround > 1 || lookaround < -1)
+                      BUF_PUSH (lookaround_fail);
+                    BUF_PUSH (lookaround_succeed);
+                  }
+              }
              break;
 
 
@@ -3949,10 +4069,16 @@
        /* After an alternative?         */
     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
        /* After a shy subexpression?  */
-    || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
-       && prev[-1] == '?' && prev[-2] == '('
-       && (syntax & RE_NO_BK_PARENS
-           || (prev - 3 >= pattern && prev[-3] == '\\')));
+    || ((syntax & RE_SHY_GROUPS)
+        && ((prev - 2 >= pattern
+             && prev[-1] == '?' && prev[-2] == '('
+             && (syntax & RE_NO_BK_PARENS
+                 || (prev - 3 >= pattern && prev[-3] == '\\')))
+         || (prev - 3 >= pattern
+             && (*prev == '=' || *prev == '!')
+             && prev[-1] == '<' && prev[-2] == '?' && prev[-3] == '('
+             && (syntax & RE_NO_BK_PARENS
+                 || (prev - 4 >= pattern && prev[-4] == '\\')))));
 }
 
 
@@ -4198,6 +4324,13 @@
            }
          break;
 
+        case lookahead:
+        case lookahead_not:
+        case lookbehind:
+        case lookbehind_not:
+          if (!fastmap) break;
+          return -1;
+          
       /* All cases after this match the empty string.  These end with
         `continue'.  */
 
@@ -4827,7 +4960,7 @@
        {
        case start_memory:
        case stop_memory:
-         p += 2; break;
+          p += 2; break;
        case no_op:
          p += 1; break;
        case jump:
@@ -4843,6 +4976,93 @@
   return p;
 }
 
+static int
+exact_chars_in_pattern_buffer (bufp, p, pend)
+     struct re_pattern_buffer *bufp;
+     re_char *p, *pend;
+{
+  int count = 0;
+  while (p < pend)
+    {
+      switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
+       {
+        case exactn:
+          {
+            int mcnt = *p++;
+            int buf_charlen;
+            while (mcnt > 0) {
+              STRING_CHAR_AND_LENGTH (p, p - pend, buf_charlen);
+              p += buf_charlen;
+              mcnt -= buf_charlen;
+              count++;
+            }
+          }
+          break;
+        case start_memory:
+        case stop_memory:
+          p++;
+          break;
+#ifdef emacs
+        case categoryspec:
+        case notcategoryspec:
+#endif /* emacs */
+        case syntaxspec:
+        case notsyntaxspec:
+          p++;
+        case anychar:
+          count++;
+          break;
+
+        case charset:
+        case charset_not:
+          if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
+            {
+              int mcnt;
+              p = CHARSET_RANGE_TABLE (p - 1);
+              EXTRACT_NUMBER_AND_INCR (mcnt, p);
+              p = CHARSET_RANGE_TABLE_END (p, mcnt);
+            }
+          else
+            p += 1 + CHARSET_BITMAP_SIZE (p - 1);
+          count++;
+          break;
+
+#ifdef emacs
+       case before_dot:
+       case at_dot:
+       case after_dot:
+#endif /* emacs */
+       case no_op:
+       case begline:
+       case endline:
+       case begbuf:
+       case endbuf:
+       case wordbound:
+       case notwordbound:
+       case wordbeg:
+       case wordend:
+       case symbeg:
+       case symend:
+          /* Zero width.  */
+          continue;
+        case lookahead:
+        case lookahead_not:
+        case lookbehind:
+        case lookbehind_not:
+          /* Skip to lookaround_success.  */
+          while (p < pend)
+            {
+              if ((re_opcode_t) *p++ == lookaround_succeed)
+                break;
+            }
+          break;
+        default:
+          return -1;
+        }
+    }
+  return count;
+}
+
 /* Non-zero if "p1 matches something" implies "p2 fails".  */
 static int
 mutually_exclusive_p (bufp, p1, p2)
@@ -5200,6 +5420,9 @@
   re_char **best_regstart, **best_regend;
 #endif
 
+  /* Discard a saved register from the stack.  */
+  boolean discard_saved_regs = 0;
+
   /* Logically, this is `best_regend[0]'.  But we don't want to have to
      allocate space for that if we're not allocating space for anything
      else (see below).  Also, we never need info about register 0 for
@@ -5772,6 +5995,77 @@
          p += 1;
          break;
 
+        case lookahead:
+        case lookahead_not:
+          DEBUG_PRINT1 ((re_opcode_t) *(p - 1) == lookahead ? "EXECUTING 
lookahead.\n" : "EXECUTING lookahead_not.\n");
+
+          p += 2;
+          PUSH_FAILURE_POINT (p - 3, d);
+          break;
+
+        case lookbehind:
+        case lookbehind_not:
+          {
+            int mcnt, count;
+            boolean not = (re_opcode_t) *(p - 1) != lookbehind;
+
+            EXTRACT_NUMBER_AND_INCR (mcnt, p);
+            EXTRACT_NUMBER_AND_INCR (count, p);
+
+            DEBUG_PRINT2 (not
+                          ? "EXECUTING lookbehind_not %d.\n"
+                          : "EXECUTING lookbehind %d.\n", count);
+            
+            dfail = d;
+            while (d != string1 && count > 0)
+              {
+                if (d == string2)
+                  {
+                    if (!string1)
+                      break;
+                    d = end1;
+                    dend = end_match_1;
+                  }
+                
+                if (target_multibyte)
+                  {
+                    re_char *dhead = (d >= string1 && d <= end1) ? string1 : 
string2;
+                    PREV_CHAR_BOUNDARY (d, dhead);
+                  }
+                else
+                  d--;
+                count--;
+              }
+
+            if (count > 0)
+              {
+                if (not)
+                  {
+                    /* There is no enough string to match.
+                       So just make it succeeded here. */
+                    d = dfail;
+                    p = p - 2 + mcnt;
+                    break;
+                  }
+                else
+                  goto fail;
+              }
+
+            PUSH_FAILURE_POINT (p - 5, dfail);
+          }
+          break;
+
+        case lookaround_succeed:
+          DEBUG_PRINT1 ("EXECUTING lookaround_succeed.\n");
+          
+          FINISH_LOOKAROUND();
+          break;
+
+        case lookaround_fail:
+          DEBUG_PRINT1 ("EXECUTING lookaround_fail.\n");
+          
+          FINISH_LOOKAROUND();
+          goto fail;
 
        /* \<digit> has been turned into a `duplicate' command which is
           followed by the numeric value of <digit> as the register number.  */
@@ -6413,12 +6707,16 @@
            case on_failure_jump_loop:
            case on_failure_jump:
            case succeed_n:
+            case lookahead_not:
+            case lookbehind_not:
              d = str;
            continue_failure_jump:
              EXTRACT_NUMBER_AND_INCR (mcnt, pat);
              p = pat + mcnt;
              break;
 
+            case lookahead:
+            case lookbehind:
            case no_op:
              /* A special frame used for nastyloops. */
              goto fail;


Test cases:

;; -*-coding:utf-8-*-

(defvar test-counter 0)

(defmacro test (&rest form)
  `(princ-list (format "%d ... " (setq test-counter (1+ test-counter)))
               (condition-case nil
                   (if (progn ,@form) 'ok 'fail)
                 (error 'invalid))))

(defun expect-invalid (regexp)
  (test (condition-case nil
            (prog1 nil (string-match regexp ""))
          (error t))))

(defun expect-match (regexp string &optional group-number group-string)
  (test (and (string-match regexp string)
             (if group-number
                 (equal (match-string group-number string) group-string)
               t))))

(defun expect-not-match (regexp string)
  (test (not (string-match regexp string))))

(expect-match "\\(?=\\)" "")
(expect-not-match "\\(?=a\\)" "")
(expect-match "a\\(?=b\\)b" "ab")
(expect-not-match "a\\(?=b\\)c" "ab")
(expect-match "\\(?=a\\)a" "a")
(expect-not-match "\\(?=b\\)a" "a")
(expect-match "\\(?=^\\)a" "a")
(expect-match "a\\(?=$\\)$" "a")
(expect-match "a\\(?=\\)$" "a")
(expect-match "a\\(?=.*c\\)b" "abc")
(expect-not-match "a\\(?=.*d\\)b" "abc")
(expect-match "a\\(?=b\\|c\\|d\\|e\\)" "ae")
(expect-not-match "a\\(?=b\\|c\\|d\\|e\\)" "af")
(expect-match "a\\(?=\\(b\\)\\)b" "ab" 1 "b")
(expect-match "a\\(\\(?=b\\)\\)" "ab" 1 "")
(expect-match "a\\(?=\\(b\\)\\)" "ab" 1 "b")
(expect-match "\\(a\\(?=\\(b\\)\\)\\2\\)\\1" "abab" 1 "ab")
(expect-not-match "\\(a\\)\\(?=\\(b\\)\\)\\1" "ab")
(expect-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abc" 1 "a")
(expect-not-match "\\(a\\(?=b\\(?=c\\)\\)\\)" "abd")
(expect-not-match "\\(?!\\)" "")
(expect-match "\\(?!a\\)" "")
(expect-not-match "a\\(?!b\\)b" "ab")
(expect-match "a\\(?!b\\)c" "ac")
(expect-not-match "\\(?!a\\)a" "a")
(expect-match "\\(?!b\\)a" "a")
(expect-match "\\(?!^\\)a" "ba")
(expect-not-match "\\(?!^\\)a" "a")
(expect-not-match "a\\(?!$\\)$" "a")
(expect-not-match "a\\(?!\\)$" "a")
(expect-not-match "a\\(?!.*c\\)b" "abc")
(expect-match "a\\(?!.*d\\)b" "abc")
(expect-not-match "a\\(?!b\\|c\\|d\\|e\\)" "ae")
(expect-match "a\\(?!b\\|c\\|d\\|e\\)" "af")
(expect-match "a\\(?!\\(b\\)\\)c" "ac")
(expect-match "a\\(\\(?!b\\)\\)" "ac")
(expect-match "a\\(?!b\\(?!c\\)\\)" "abc")
(expect-not-match "a\\(?!b\\(?=\\(c\\)\\)\\)" "abc")
(expect-not-match "a\\(?!b\\(?!c\\)\\)" "abd")
(expect-match "\\(?<=\\)" "")
(expect-not-match "\\(?<=a\\)" "")
(expect-match "\\(?<=a\\)" "a")
(expect-not-match "\\(?<=b\\)" "a")
(expect-match "\\(?<=^\\)" "")
(expect-not-match "a\\(?<=^\\)" "")
(expect-match "\\(?<=$\\)" "")
(expect-not-match "\\(?<=$\\)a" "")
(expect-match "\\(?<=a\\)b" "ab")
(expect-not-match "\\(?<=c\\)b" "ab")
(expect-match "\\(?<=\\(?<=a\\)\\)b" "ab")
(expect-not-match "\\(?<=\\(?<=b\\)\\)b" "ab")
(expect-match "\\(?<=\\(?=a\\).\\)b" "ab")
(expect-match "\\(?<=\\(a\\)\\)b\\1" "aba" 1 "a")
(expect-match "\\(?<=.\\)a" "aa")
(expect-match "\\(?<=\\(.\\)\\)a" "aa")
(expect-match "\\(?<=\\w\\)a" "aa")
(expect-not-match "\\(?<=\\w\\)a" "!a")
(expect-match "\\(?<=\\sw\\)a" "aa")
(expect-not-match "\\(?<=\\sw\\)a" "!a")
(expect-match "\\(?<=\\cg\\)a" "λa")
(expect-not-match "\\(?<=\\Cg\\)a" "λa")
(expect-match "\\(?<=[a-z]\\)" "aa")
(expect-not-match "\\(?<=[a-z]\\)a" "1a")
(expect-match "\\(?<=[^a-z]\\)" "1a")
(expect-not-match "\\(?<=[^a-z]\\)" "aa")
(expect-match "\\(?<=[:ascii:]\\)a" "aa")
(expect-match "\\(?<=\\`\\)" "")
(expect-not-match "a\\(?<=\\`\\)" "a")
(expect-match "\\(?<=\\'\\)" "")
(expect-not-match "\\(?<=\\'\\)a" "a")
(expect-not-match "\\(?<=\\=\\)" "")
(expect-match "\\(?<=\\b\\)a" "a")
(expect-not-match "a\\(?<=\\b\\)b" "ab")
(expect-match "\\(?<=\\B\\)a" "aa")
(expect-not-match "\\(?<=\\B\\)a" " a")
(expect-match "\\(?<=\\<\\)a" "a")
(expect-not-match "a\\(?<=\\<\\)b" "ab")
(expect-match "a\\(?<=\\>\\)" "a")
(expect-not-match "a\\(?<=\\>\\)b" "ab")
(expect-match "\\(?<=\\_<\\)a" "a")
(expect-not-match "a\\(?<=\\_<\\)b" "ab")
(expect-match "a\\(?<=\\_>\\)" "a")
(expect-not-match "a\\(?<=\\_>\\)b" "ab")
(expect-invalid "\\(?<=\\(.\\)\\1\\)")  ; duplicate
(expect-invalid "\\(?<=a*\\)")          ; variable width
(expect-invalid "\\(?<=a*?\\)")         ; variable width
(expect-invalid "\\(?<=a+\\)")          ; variable width
(expect-invalid "\\(?<=a+?\\)")         ; variable width
(expect-invalid "\\(?<=a?\\)")          ; variable width
(expect-invalid "\\(?<=a??\\)")         ; variable width
(expect-invalid "\\(?<=a\\{1,4\\}\\)")  ; variable width
(expect-invalid "\\(?<=a\\|bb\\|ccc\\)") ; variable width
(expect-invalid "\\(?<=a\\{4\\}\\)")   ; fixed width but not supported yet
(expect-invalid "\\(?<=a\\|\\b\\c\\)")   ; fixed width but not supported yet
(expect-not-match "\\(?<!\\)" "")
(expect-match "\\(?<!a\\)" "")
(expect-match "\\(?<!a\\)" "a")
(expect-not-match "\\(?<!a\\)b" "ab")
(expect-match "\\(?<!b\\)" "a")
(expect-not-match "\\(?<!^\\)" "")
(expect-not-match "a\\(?<!^\\)" "")
(expect-not-match "\\(?<!$\\)" "")
(expect-match "\\(?<=a\\)b" "ab")
(expect-match "\\(?<!c\\)b" "ab")
(expect-match "\\(?<!\\(?<!a\\)\\)b" "ab")
(expect-not-match "\\(?<!\\(?<!b\\)\\)b" "ab")
(expect-match "\\(?<!\\(?!a\\).\\)b" "ab")
(expect-match "\\(?<!.\\)a" "aa")
(expect-not-match "\\(?<!.\\)b" "ab")
(expect-not-match "\\(?<!\\(.\\)\\)b" "ab")
(expect-not-match "\\(?<!\\w\\)b" "ab")
(expect-not-match "\\(?<!\\w\\)b" "ab")
(expect-not-match "\\(?<!\\sw\\)b" "ab")
(expect-match "\\(?<!\\sw\\)a" "!a")
(expect-not-match "\\(?<!\\cg\\)a" "λa")
(expect-match "\\(?<!\\Cg\\)a" "λa")
(expect-match "\\(?<![a-z]\\)" "aa")
(expect-match "\\(?<![a-z]\\)a" "1a")
(expect-not-match "\\(?<![^a-z]\\)a" "1a")
(expect-not-match "\\(?<![:ascii:]\\)b" "ab")
(expect-not-match "\\(?<!\\`\\)" "")
(expect-match "a\\(?<!\\`\\)" "a")
(expect-not-match "\\(?<!\\'\\)" "")
(expect-match "\\(?<!\\'\\)a" "a")
(expect-match "\\(?<!\\=\\)" "")
(expect-not-match "\\(?<!\\b\\)a" "a")
(expect-match "a\\(?<!\\b\\)b" "ab")
(expect-not-match "\\(?<!\\B\\)b" "ab")
(expect-match "\\(?<!\\B\\)a" " a")
(expect-not-match "\\(?<!\\<\\)a" "a")
(expect-match "a\\(?<!\\<\\)b" "ab")
(expect-not-match "a\\(?<!\\>\\)" "a")
(expect-match "a\\(?<!\\>\\)b" "ab")
(expect-not-match "\\(?<!\\_<\\)a" "a")
(expect-match "a\\(?<!\\_<\\)b" "ab")
(expect-not-match "a\\(?<!\\_>\\)" "a")
(expect-match "a\\(?<!\\_>\\)b" "ab")
(expect-invalid "\\(?<!\\(.\\)\\1\\)")  ; duplicate
(expect-invalid "\\(?<!a*\\)")          ; variable width
(expect-invalid "\\(?<!a*?\\)")         ; variable width
(expect-invalid "\\(?<!a+\\)")          ; variable width
(expect-invalid "\\(?<!a+?\\)")         ; variable width
(expect-invalid "\\(?<!a?\\)")          ; variable width
(expect-invalid "\\(?<!a??\\)")         ; variable width
(expect-invalid "\\(?<!a\\{1,4\\}\\)")  ; variable width
(expect-invalid "\\(?<!a\\|bb\\|ccc\\)") ; variable width
(expect-invalid "\\(?<!a\\{4\\}\\)")   ; fixed width but not supported yet
(expect-invalid "\\(?<!a\\|\\b\\c\\)")   ; fixed width but not supported yet

(expect-match "Hello, \\(?=世界\\)" "Hello, 世界!")
(expect-not-match "Hello, \\(?=せかい\\)" "Hello, 世界!")
(expect-match "Hello, \\(?!せかい\\)" "Hello, 世界!")
(expect-not-match "Hello, \\(?!世界\\)" "Hello, 世界!")
(expect-match "\\(?<=こんにちは\\), World!" "こんにちは, World!")
(expect-not-match "\\(?<=こんにちわ\\), World!" "こんにちは, World!")
(expect-match "\\(?<!こんにちわ\\), World!" "こんにちは, World!")
(expect-not-match "\\(?<!こんにちは\\), World!" "こんにちは, World!")

(require 'cl)

(with-temp-buffer
  (insert "abracadabra")
  (goto-char (point-min))
  (test (equal
         (loop while (re-search-forward "a\\(?=b\\)" nil t)
               collect (point))
         '(2 9))))

(with-temp-buffer
  (insert "abracadabra")
  (test (equal
         (loop while (re-search-backward "a\\(?=b\\)" nil t)
               collect (point))
         '(8 1))))

(with-temp-buffer
  (insert "abracadabra")
  (goto-char (point-min))
  (test (equal
         (loop while (re-search-forward "\\(?<=a\\)b" nil t)
               collect (point))
         '(3 10))))

(with-temp-buffer
  (insert "abracadabra")
  (test (equal
         (loop while (re-search-backward "\\(?<=a\\)b" nil t)
               collect (point))
         '(9 2))))

(with-temp-buffer
  (insert "abcdebc")
  (goto-char 3)
  (test (eq (re-search-forward "\\(?<=b\\)c" nil t) 4)))

(with-temp-buffer
  (insert "abcdebc")
  (goto-char 7)
  ;; search-backward with lookahead over bound is not supported yet
  (test (eq (re-search-backward "b\\(?=c\\)" nil t) 2)))

(when (member "perf" argv)
  ;; generate big file
  (require 'find-func)
  (let ((file (concat (or find-function-C-source-directory "~/src/emacs") 
"/xdisp.c"))
        count)
    (with-temp-buffer
      (insert-file-contents file)
      (dolist (pair '((point-min . re-search-forward) (point-max . 
re-search-backward)))
        (dolist (regexp '("unsigned \\(?:char\\|int\\|long\\)" "unsigned 
\\(?=char\\|int\\|long\\)"
                          "\\(?:unsigned \\)int" "\\(?<=unsigned \\)int"))
          (setq count 0)
          (princ-list regexp
                      ": "
                      (car
                       (benchmark-run
                        10
                        (progn
                          (goto-char (funcall (car pair)))
                          (while (funcall (cdr pair) regexp nil t)
                            (setq count (1+ count))))))
                      " elapsed (" count " found)"))))))


-- 
(domestic pets only, the antidote for overdose, milk.)
   bloggy blog: http://lars.ingebrigtsen.no





reply via email to

[Prev in Thread] Current Thread [Next in Thread]