emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

emacs-29 e492c21e81 1/5: Fix treesit_cursor_helper (bug#60267)


From: Yuan Fu
Subject: emacs-29 e492c21e81 1/5: Fix treesit_cursor_helper (bug#60267)
Date: Sat, 24 Dec 2022 03:33:27 -0500 (EST)

branch: emacs-29
commit e492c21e81040b9539139b78f6baf98df17bbaab
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>

    Fix treesit_cursor_helper (bug#60267)
    
    The cause of that bug is that in a particular parse tree, the node
    treesit_cursor_helper tries to go to is a missing node, not only is it
    a missing node, it is the first node of a subtree.  So when
    treesit_cursor_helper follows the algorithm and goes down the tree, it
    goes down the previous subtree (because that subtree's end = end_pos,
    because the target node has zero width).
    
        o
        |
     o--+-o
     |    |
     +-+  +-+-+
     | |  | | |
     o x  t o o
    
    (We ended up in x when the target is t, because t has zero width.)
    
    One way to solve it is to go back up the tree if we are at a leaf node
    and still haven't matched the target node.  That's too ugly and
    finicky so I resorted to recursion.  Now one more functions will
    return give up (treesit_node_parent) if we are in a werid parse tree
    that is super deep.  But since we already kind of give up on this kind
    of parse trees (bug#59426), it doesn't really hurt.
    
    * src/treesit.c (treesit_cursor_helper_1): New function.
    (treesit_cursor_helper): Use the new function.  Change return type to
    bool, and accept a cursor pointer.
    
    (Ftreesit_node_parent)
    (Ftreesit_search_subtree)
    (Ftreesit_search_forward)
    (Ftreesit_induce_sparse_tree): Use the new signature.
---
 src/treesit.c | 129 ++++++++++++++++++++++++++++++++++------------------------
 1 file changed, 76 insertions(+), 53 deletions(-)

diff --git a/src/treesit.c b/src/treesit.c
index c882d45513..dc2043e610 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1762,7 +1762,7 @@ If NODE is nil, return nil.  */)
   return build_string (string);
 }
 
-static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object);
+static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
 
 DEFUN ("treesit-node-parent",
        Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
@@ -1778,7 +1778,10 @@ Return nil if NODE has no parent.  If NODE is nil, 
return nil.  */)
 
   TSNode treesit_node = XTS_NODE (node)->node;
   Lisp_Object parser = XTS_NODE (node)->parser;
-  TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, treesit_node, parser))
+    return return_value;
+
   if (ts_tree_cursor_goto_parent (&cursor))
   {
     TSNode parent = ts_tree_cursor_current_node (&cursor);
@@ -2637,8 +2640,59 @@ treesit_assume_true (bool val)
   eassert (val == true);
 }
 
+/* Tries to move CURSOR to point to TARGET.  END_POS is the end of
+   TARGET.  If success, return true, otherwise move CURSOR back to
+   starting position and return false.  LIMIT is the recursion
+   limit.  */
+static bool
+treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
+                        uint32_t end_pos, ptrdiff_t limit)
+{
+  if (limit <= 0)
+    return false;
+
+  TSNode cursor_node = ts_tree_cursor_current_node (cursor);
+  if (ts_node_eq (cursor_node, *target))
+    return true;
+
+  if (!ts_tree_cursor_goto_first_child (cursor))
+    return false;
+
+  /* Skip nodes that definitely don't contain TARGET.  */
+  while (ts_node_end_byte (cursor_node) < end_pos)
+    {
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+       break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Go through each sibling that could contain TARGET.  Because of
+     missing nodes (their width is 0), there could be multiple
+     siblings that could contain TARGET.  */
+  while (ts_node_start_byte (cursor_node) <= end_pos)
+    {
+      if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
+       return true;
+
+      if (!ts_tree_cursor_goto_next_sibling (cursor))
+       break;
+      cursor_node = ts_tree_cursor_current_node (cursor);
+    }
+
+  /* Couldn't find TARGET, must be not in this subtree, move cursor
+     back and pray that other brothers and sisters can succeed.  */
+  treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
+  return false;
+}
+
 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
-   that produced NODE.
+   that produced NODE.  If success, return true, otherwise return
+   false.  This function should almost always succeed, but if the parse
+   tree is strangely too deep and exceeds the recursion limit, this
+   function will fail and return false.
+
+   If this function returns true, caller needs to free CURSOR; if
+   returns false, caller don't need to free CURSOR.
 
    The reason we need this instead of simply using ts_tree_cursor_new
    is that we have to create the cursor on the root node and traverse
@@ -2646,56 +2700,16 @@ treesit_assume_true (bool val)
    Otherwise going to sibling or parent of NODE wouldn't work.
 
    (Wow perfect filling.)  */
-static TSTreeCursor
-treesit_cursor_helper (TSNode node, Lisp_Object parser)
+static bool
+treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
 {
   uint32_t end_pos = ts_node_end_byte (node);
   TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
-  TSTreeCursor cursor = ts_tree_cursor_new (root);
-  TSNode cursor_node = ts_tree_cursor_current_node (&cursor);
-  /* This is like treesit-node-at.  We go down from the root node,
-     either to first child or next sibling, repeatedly, and finally
-     arrive at NODE.  */
-  while (!ts_node_eq (node, cursor_node))
-    {
-      treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
-      cursor_node = ts_tree_cursor_current_node (&cursor);
-      /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so
-        we just go through each sibling.  */
-      while (ts_node_is_missing (cursor_node)
-            || ts_node_end_byte (cursor_node) < end_pos)
-       {
-         /* A "missing" node has zero width, so it's possible that
-            its end = NODE.end but it's not NODE, so we skip them.
-            But we need to make sure this missing node is not the
-            node we are looking for before skipping it.  */
-         if (ts_node_is_missing (cursor_node)
-             && ts_node_eq (node, cursor_node))
-           return cursor;
-         treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor));
-         cursor_node = ts_tree_cursor_current_node (&cursor);
-       }
-      /* Right now CURSOR.end >= NODE.end.  But what if CURSOR.end =
-        NODE.end, and there are missing nodes after CURSOR, and the
-        missing node after CURSOR is the NODE we are looking for??
-        Well, create a probe and look ahead.  (This is tested by
-        treesit-cursor-helper-with-missing-node.)  */
-      TSTreeCursor probe = ts_tree_cursor_copy (&cursor);
-      TSNode probe_node;
-      while (ts_tree_cursor_goto_next_sibling (&probe))
-       {
-         probe_node = ts_tree_cursor_current_node (&probe);
-         if (!ts_node_is_missing (probe_node))
-           break;
-         if (ts_node_eq (probe_node, node))
-           {
-             ts_tree_cursor_delete (&cursor);
-             return probe;
-           }
-       }
-      ts_tree_cursor_delete (&probe);
-    }
-  return cursor;
+  *cursor = ts_tree_cursor_new (root);
+  bool success = treesit_cursor_helper_1 (cursor, &node, end_pos, 1000);
+  if (!success)
+    ts_tree_cursor_delete (cursor);
+  return success;
 }
 
 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
@@ -2968,7 +2982,10 @@ Return the first matched node, or nil if none matches.  
*/)
 
   Lisp_Object parser = XTS_NODE (node)->parser;
   Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
+    return return_value;
+
   if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
                          NILP (all), the_limit, false))
     {
@@ -3022,7 +3039,10 @@ always traverse leaf nodes first, then upwards.  */)
 
   Lisp_Object parser = XTS_NODE (start)->parser;
   Lisp_Object return_value = Qnil;
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
+    return return_value;
+
   if (treesit_search_forward (&cursor, predicate, parser,
                              NILP (backward), NILP (all)))
     {
@@ -3141,7 +3161,10 @@ a regexp.  */)
 
   Lisp_Object parser = XTS_NODE (root)->parser;
   Lisp_Object parent = Fcons (Qnil, Qnil);
-  TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser);
+  TSTreeCursor cursor;
+  if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser))
+    return Qnil;
+
   treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
                             the_limit, parser);
   ts_tree_cursor_delete (&cursor);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]