emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/xr eb4dd40a92 5/7: Add check for or-pattern that could


From: ELPA Syncer
Subject: [elpa] externals/xr eb4dd40a92 5/7: Add check for or-pattern that could be character alternatives
Date: Tue, 1 Aug 2023 09:59:28 -0400 (EDT)

branch: externals/xr
commit eb4dd40a927d7faf909a69cb71a91833bce8069c
Author: Mattias EngdegÄrd <mattiase@acm.org>
Commit: Mattias EngdegÄrd <mattiase@acm.org>

    Add check for or-pattern that could be character alternatives
    
    Only enabled for checks = `all`.
---
 README     |  9 +++++++++
 xr-test.el |  9 +++++++++
 xr.el      | 30 +++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/README b/README
index fe9ae7cc11..5a47cd510a 100644
--- a/README
+++ b/README
@@ -143,6 +143,15 @@ The xr package can be used interactively or by other code 
as a library.
     A character class occurs twice in a single character alternative
     or skip set.
 
+  - Or-pattern more efficiently expressed as character alternative
+
+    When an or-pattern can be written as a character alternative, it
+    becomes more efficient and reduces regexp stack usage.
+    For example, a\|b is better written [ab], and \s-\|\sw is usually
+    better written [[:space:][:word:]]. (There is a subtle difference
+    in how syntax properties are handled but it rarely matters.)
+    This check is only enable when CHECKS=all.
+
   - Duplicated alternative branch
 
     The same expression occurs in two different branches, like in
diff --git a/xr-test.el b/xr-test.el
index 4166766dc2..fd60cb2b21 100644
--- a/xr-test.el
+++ b/xr-test.el
@@ -499,6 +499,15 @@
                        (if (eq checks 'all)
                            '((10 . "Possibly mistyped `:?' at start of group"))
                          nil)))
+        (should
+         (equal
+          (xr-lint "%\\|[abc]\\|[[:digit:]]\\|\\s-\\|\\s_"
+                   nil checks)
+          (if (eq checks 'all)
+              '((3 . "Or-pattern more efficiently expressed as character 
alternative")
+                (10 . "Or-pattern more efficiently expressed as character 
alternative")
+                (23 . "Or-pattern more efficiently expressed as character 
alternative"))
+            nil)))
         ))))
 
 (ert-deftest xr-lint-repetition-of-empty ()
diff --git a/xr.el b/xr.el
index 14231ea796..e7ede29080 100644
--- a/xr.el
+++ b/xr.el
@@ -1481,6 +1481,27 @@ A-SETS and B-SETS are arguments to `any'."
 
        (_ (equal a b))))))
 
+(defun xr--char-alt-equivalent-p (x)
+  "Whether X could be expressed as a combinable character alternative."
+  ;; We exclude `nonl' because it is either something we warn about anyway
+  ;; because of subsumption or patterns like (or nonl "\n") which is just
+  ;; a way of expressing `anychar' in a slightly less efficient way.
+  ;; We also exclude `not'-forms because they usually don't combine in an
+  ;; `or'-expressions to make an `any' form.
+  (pcase x
+    ((pred stringp) (= (length x) 1))
+    ((or 'ascii 'alnum 'alpha 'blank 'cntrl 'digit 'graph
+         'lower 'multibyte 'nonascii 'print 'punct 'space
+         'unibyte 'upper 'word 'xdigit
+         'anything)
+     t)
+    (`(any . ,_) t)
+    ;; Assume for this purpose that \sw and \s- are equivalent to
+    ;; [[:word:]] and [[:space:]] even though they differ in whether syntax
+    ;; properties are respected, because for most uses this doesn't matter.
+    (`(syntax ,(or 'word 'whitespace)) t)
+    (`(or . ,ys) (cl-every #'xr--char-alt-equivalent-p ys))))
+
 (defun xr--parse-alt (warnings purpose checks)
   (let ((alternatives nil))             ; reversed
     (push (xr--parse-seq warnings purpose checks) alternatives)
@@ -1499,7 +1520,14 @@ A-SETS and B-SETS are arguments to `any'."
            ((cl-some (lambda (branch) (xr--superset-p branch seq))
                      alternatives)
             (xr--report warnings pos
-                        "Branch matches subset of a previous branch"))))
+                        "Branch matches subset of a previous branch"))
+           ((and (eq checks 'all)
+                 (xr--char-alt-equivalent-p (car alternatives))
+                 (xr--char-alt-equivalent-p seq))
+            (xr--report
+             warnings pos
+             "Or-pattern more efficiently expressed as character alternative"))
+           ))
         (push seq alternatives)))
     (if (cdr alternatives)
         ;; Simplify (or nonl "\n") to anything



reply via email to

[Prev in Thread] Current Thread [Next in Thread]