[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
emacs-29 a6d961ae2f 2/2: Add a new tree-sitter query predicate 'pred'
From: |
Yuan Fu |
Subject: |
emacs-29 a6d961ae2f 2/2: Add a new tree-sitter query predicate 'pred' |
Date: |
Mon, 26 Dec 2022 20:50:31 -0500 (EST) |
branch: emacs-29
commit a6d961ae2fd0eb93938f2afd932f4d3cb63a0412
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>
Add a new tree-sitter query predicate 'pred'
I realized that using an arbitrary function as the predicate in
queries is very helpful for some queries I'm writing for python and
javascript, and presumably most other languages[1].
Granted, we can already filter out unwanted nodes by using a function
instead of a face for the capture name, and (1) determine whether the
captured node is valid and (2) fontify that node if it's valid.
However, such approach is a bit more cumbersome and more importantly
gets in the way of another potential use of the fontification queries:
context extraction.
For example, I could use the query for the 'variable' feature to get
all the variables in a certain region. In this use-case, we want the
filtering happen before returning the captured nodes.
Besides, the change is relatively small and straightforward: most code
are already there, I just need to add some boilerplate.
[1] For a code like aa.bb(cc), we want bb to be in function face,
because obviously its a function. But for aa.bb, we want bb to be in
property face, because it's a property. In the AST, bb is always a
property, the difference between the two cases is the enclosing node:
in the first case, aa.bb is in a "call_expression" node, indicating
that bb is used as a function (a method). So we want a predicate
function that checks whether bb is used as a function or a property,
and determine whether it should be in function or property face.
* doc/lispref/parsing.texi (Pattern Matching): Update manual.
* src/treesit.c (Ftreesit_pattern_expand): Handle :pred.
(treesit_predicate_capture_name_to_node): A new function extracted
from treesit_predicate_capture_name_to_text.
(treesit_predicate_capture_name_to_text): Use the newly extracted
function.
(treesit_predicate_pred): New predicate function.
(treesit_eval_predicates): Add new predicate. Also fix a bug: we want
to AND the results of each predicate.
* test/src/treesit-tests.el (treesit--ert-pred-last-sibling): New
helper function.
(treesit-query-api): Test #pred predicate.
---
doc/lispref/parsing.texi | 14 ++++++++----
src/treesit.c | 57 +++++++++++++++++++++++++++++++++++++----------
test/src/treesit-tests.el | 13 ++++++++---
3 files changed, 65 insertions(+), 19 deletions(-)
diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi
index 5d1b11935c..63741b69c2 100644
--- a/doc/lispref/parsing.texi
+++ b/doc/lispref/parsing.texi
@@ -1266,10 +1266,11 @@ example, with the following pattern:
@end example
@noindent
-tree-sitter only matches arrays where the first element equals to
-the last element. To attach a predicate to a pattern, we need to
-group them together. A predicate always starts with a @samp{#}.
-Currently there are two predicates, @code{#equal} and @code{#match}.
+tree-sitter only matches arrays where the first element equals to the
+last element. To attach a predicate to a pattern, we need to group
+them together. A predicate always starts with a @samp{#}. Currently
+there are three predicates, @code{#equal}, @code{#match}, and
+@code{#pred}.
@deffn Predicate equal arg1 arg2
Matches if @var{arg1} equals to @var{arg2}. Arguments can be either
@@ -1282,6 +1283,11 @@ Matches if the text that @var{capture-name}'s node spans
in the buffer
matches regular expression @var{regexp}. Matching is case-sensitive.
@end deffn
+@deffn Predicate pred fn &rest nodes
+Matches if function @var{fn} returns non-@code{nil} when passed each
+node in @var{nodes} as arguments.
+@end deffn
+
Note that a predicate can only refer to capture names that appear in
the same pattern. Indeed, it makes little sense to refer to capture
names in other patterns.
diff --git a/src/treesit.c b/src/treesit.c
index ecc977745a..813d4222f9 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -2170,6 +2170,8 @@ See Info node `(elisp)Pattern Matching' for detailed
explanation. */)
return build_pure_c_string ("#equal");
if (EQ (pattern, QCmatch))
return build_pure_c_string ("#match");
+ if (EQ (pattern, QCpred))
+ return build_pure_c_string ("#pred");
Lisp_Object opening_delimeter
= build_pure_c_string (VECTORP (pattern) ? "[" : "(");
Lisp_Object closing_delimiter
@@ -2269,10 +2271,10 @@ treesit_predicates_for_pattern (TSQuery *query,
uint32_t pattern_index)
return Fnreverse (result);
}
-/* Translate a capture NAME (symbol) to the text of the captured node.
+/* Translate a capture NAME (symbol) to a node.
Signals treesit-query-error if such node is not captured. */
static Lisp_Object
-treesit_predicate_capture_name_to_text (Lisp_Object name,
+treesit_predicate_capture_name_to_node (Lisp_Object name,
struct capture_range captures)
{
Lisp_Object node = Qnil;
@@ -2292,6 +2294,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name,
name, build_pure_c_string ("A predicate can only refer"
" to captured nodes in the "
"same pattern"));
+ return node;
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+static Lisp_Object
+treesit_predicate_capture_name_to_text (Lisp_Object name,
+ struct capture_range captures)
+{
+ Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
struct buffer *old_buffer = current_buffer;
set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
@@ -2365,13 +2377,30 @@ treesit_predicate_match (Lisp_Object args, struct
capture_range captures)
return false;
}
-/* About predicates: I decide to hard-code predicates in C instead of
- implementing an extensible system where predicates are translated
- to Lisp functions, and new predicates can be added by extending a
- list of functions, because I really couldn't imagine any useful
- predicates besides equal and match. If we later found out that
- such system is indeed useful and necessary, it can be easily
- added. */
+/* Handles predicate (#pred FN ARG...). Return true if FN returns
+ non-nil; return false otherwise. The arity of FN must match the
+ number of ARGs */
+static bool
+treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
+{
+ if (XFIXNUM (Flength (args)) < 2)
+ xsignal2 (Qtreesit_query_error,
+ build_pure_c_string ("Predicate `pred' requires "
+ "at least two arguments, "
+ "but was only given"),
+ Flength (args));
+
+ Lisp_Object fn = Fintern (XCAR (args), Qnil);
+ Lisp_Object nodes = Qnil;
+ Lisp_Object tail = XCDR (args);
+ FOR_EACH_TAIL (tail)
+ nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
+ captures),
+ nodes);
+ nodes = Fnreverse (nodes);
+
+ return !NILP (CALLN (Fapply, fn, nodes));
+}
/* If all predicates in PREDICATES passes, return true; otherwise
return false. */
@@ -2387,14 +2416,17 @@ treesit_eval_predicates (struct capture_range captures,
Lisp_Object predicates)
Lisp_Object fn = XCAR (predicate);
Lisp_Object args = XCDR (predicate);
if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal"))))
- pass = treesit_predicate_equal (args, captures);
+ pass &= treesit_predicate_equal (args, captures);
else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match"))))
- pass = treesit_predicate_match (args, captures);
+ pass &= treesit_predicate_match (args, captures);
+ else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred"))))
+ pass &= treesit_predicate_pred (args, captures);
else
xsignal3 (Qtreesit_query_error,
build_pure_c_string ("Invalid predicate"),
fn, build_pure_c_string ("Currently Emacs only supports"
- " equal and match predicate"));
+ " equal, match, and pred"
+ " predicate"));
}
/* If all predicates passed, add captures to result list. */
return pass;
@@ -3217,6 +3249,7 @@ syms_of_treesit (void)
DEFSYM (QCanchor, ":anchor");
DEFSYM (QCequal, ":equal");
DEFSYM (QCmatch, ":match");
+ DEFSYM (QCpred, ":pred");
DEFSYM (Qnot_found, "not-found");
DEFSYM (Qsymbol_error, "symbol-error");
diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el
index 3fe59a78d0..3770a4d01e 100644
--- a/test/src/treesit-tests.el
+++ b/test/src/treesit-tests.el
@@ -335,6 +335,9 @@ BODY is the test body."
;;; Query
+(defun treesit--ert-pred-last-sibling (node)
+ (null (treesit-node-next-sibling node t)))
+
(ert-deftest treesit-query-api ()
"Tests for query API."
(skip-unless (treesit-language-available-p 'json))
@@ -357,13 +360,16 @@ BODY is the test body."
(pair key: (_) @keyword)
((_) @bob (#match \"^B.b$\" @bob))
(number) @number
-((number) @n3 (#equal \"3\" @n3)) "
+((number) @n3 (#equal \"3\" @n3))
+((number) @n3p (#pred treesit--ert-pred-last-sibling @n3p))"
;; Sexp query.
((string) @string
(pair key: (_) @keyword)
((_) @bob (:match "^B.b$" @bob))
(number) @number
- ((number) @n3 (:equal "3" @n3)))))
+ ((number) @n3 (:equal "3" @n3))
+ ((number) @n3p (:pred treesit--ert-pred-last-sibling
+ @n3p)))))
;; Test `treesit-query-compile'.
(dolist (query (list query1
(treesit-query-compile 'json query1)))
@@ -375,7 +381,8 @@ BODY is the test body."
(string . "\"Bob\"")
(bob . "Bob")
(number . "3")
- (n3 . "3"))
+ (n3 . "3")
+ (n3p . "3"))
(mapcar (lambda (entry)
(cons (car entry)
(treesit-node-text