bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

unreachable states after conflict resolution (was: Re: LR(1) paser gener


From: Joel E. Denny
Subject: unreachable states after conflict resolution (was: Re: LR(1) paser generator based on Pager's algorithm)
Date: Sun, 6 May 2007 22:57:27 -0400 (EDT)

On Sun, 1 Apr 2007, Joel E. Denny wrote:

> Menhir version 20070322 does not detect the problem.  It does at least 
> report a useless production (A: 'a' 'a') that results from the problem, 
> and Bison (sigh) fails to report even that because it fails to eliminate 
> the associated unreachable state.

I committed the following to fix this.

Index: ChangeLog
===================================================================
RCS file: /sources/bison/bison/ChangeLog,v
retrieving revision 1.1696
diff -p -u -r1.1696 ChangeLog
--- ChangeLog   7 May 2007 02:28:42 -0000       1.1696
+++ ChangeLog   7 May 2007 02:51:10 -0000
@@ -1,5 +1,27 @@
 2007-05-06  Joel E. Denny  <address@hidden>
 
+       If conflict resolution makes states unreachable, remove those states,
+       report rules that are then unused, and don't report conflicts in those
+       states.
+       * src/conflicts.c, src/conflicts.h (conflicts_update_state_numbers):
+       New global function.
+       * src/lalr.c, src/lalr.h (lalr_update_state_numbers): New global
+       function.
+       * src/main.c (main): After conflict resolution, remove the unreachable
+       states and update all data structures that reference states by number.
+       * src/state.c (state_new): Initialize each state's reachable member to
+       false.
+       (state_mark_reachable_states): New static function.
+       (state_remove_unreachable_states): New global function.
+       * src/state.h (struct state): Add member bool reachable.
+       (state_remove_unreachable_states): Prototype.
+       * tests/conflicts.at (Unreachable States After Conflict Resolution):
+       New test case.
+       * tests/existing.at (GNU pic Grammar): Update test case output now that
+       an unused rule is discovered.
+
+2007-05-06  Joel E. Denny  <address@hidden>
+
        Minor code cleanup in parser table construction.
        * src/LR0.c (new_itemsets): Use item_number_is_symbol_number.
        (new_itemsets, save_reductions): Update for rename to nitemset.
Index: src/conflicts.c
===================================================================
RCS file: /sources/bison/bison/src/conflicts.c,v
retrieving revision 1.116
diff -p -u -r1.116 conflicts.c
--- src/conflicts.c     10 Jun 2006 03:02:23 -0000      1.116
+++ src/conflicts.c     7 May 2007 02:51:10 -0000
@@ -327,6 +327,16 @@ conflicts_solve (void)
 }
 
 
+void
+conflicts_update_state_numbers (state_number old_to_new[],
+                                state_number nstates_old)
+{
+  for (state_number i = 0; i < nstates_old; ++i)
+    if (old_to_new[i] != nstates_old)
+      conflicts[old_to_new[i]] = conflicts[i];
+}
+
+
 /*---------------------------------------------.
 | Count the number of shift/reduce conflicts.  |
 `---------------------------------------------*/
Index: src/conflicts.h
===================================================================
RCS file: /sources/bison/bison/src/conflicts.h,v
retrieving revision 1.16
diff -p -u -r1.16 conflicts.h
--- src/conflicts.h     10 Jun 2006 03:02:23 -0000      1.16
+++ src/conflicts.h     7 May 2007 02:51:10 -0000
@@ -23,6 +23,19 @@
 # include "state.h"
 
 void conflicts_solve (void);
+
+/**
+ * Update state numbers recorded in internal arrays such that:
+ *   - \c nstates_old is the old number of states.
+ *   - Where \c i is the old state number, <tt>old_to_new[i]</tt> is either:
+ *     - \c nstates_old if state \c i is removed because it is unreachable.
+ *     - The new state number.
+ *   - The highest new state number is the number of remaining states - 1.
+ *   - The numerical order of the remaining states has not changed.
+ */
+void conflicts_update_state_numbers (state_number old_to_new[],
+                                     state_number nstates_old);
+
 void conflicts_print (void);
 int conflicts_total_count (void);
 void conflicts_output (FILE *out);
Index: src/lalr.c
===================================================================
RCS file: /sources/bison/bison/src/lalr.c,v
retrieving revision 1.111
diff -p -u -r1.111 lalr.c
--- src/lalr.c  7 May 2007 02:28:42 -0000       1.111
+++ src/lalr.c  7 May 2007 02:51:11 -0000
@@ -454,6 +454,37 @@ lalr (void)
 
 
 void
+lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
+{
+  goto_number ngotos_reachable = 0;
+  symbol_number nonterminal = 0;
+  aver (nsyms == nvars + ntokens);
+  for (goto_number i = 0; i < ngotos; ++i)
+    {
+      while (i == goto_map[nonterminal])
+        goto_map[nonterminal++] = ngotos_reachable;
+      /* If old_to_new[from_state[i]] = nstates_old, remove this goto
+         entry.  */
+      if (old_to_new[from_state[i]] != nstates_old)
+        {
+          /* from_state[i] is not removed, so it and thus to_state[i] are
+             reachable, so to_state[i] != nstates_old.  */
+          aver (old_to_new[to_state[i]] != nstates_old);
+          from_state[ngotos_reachable] = old_to_new[from_state[i]];
+          to_state[ngotos_reachable] = old_to_new[to_state[i]];
+          ++ngotos_reachable;
+        }
+    }
+  while (nonterminal <= nvars)
+    {
+      aver (ngotos == goto_map[nonterminal]);
+      goto_map[nonterminal++] = ngotos_reachable;
+    }
+  ngotos = ngotos_reachable;
+}
+
+
+void
 lalr_free (void)
 {
   state_number s;
Index: src/lalr.h
===================================================================
RCS file: /sources/bison/bison/src/lalr.h,v
retrieving revision 1.34
diff -p -u -r1.34 lalr.h
--- src/lalr.h  27 Jun 2006 14:09:53 -0000      1.34
+++ src/lalr.h  7 May 2007 02:51:11 -0000
@@ -46,6 +46,18 @@
 */
 void lalr (void);
 
+/**
+ * Update state numbers recorded in #goto_map, #from_state, and #to_state such
+ * that:
+ *   - \c nstates_old is the old number of states.
+ *   - Where \c i is the old state number, <tt>old_to_new[i]</tt> is either:
+ *     - \c nstates_old if state \c i is removed because it is unreachable.
+ *       Thus, remove all goto entries involving this state.
+ *     - The new state number.
+ */
+void lalr_update_state_numbers (state_number old_to_new[],
+                                state_number nstates_old);
+
 
 /** Release the information related to lookahead tokens.
 
Index: src/main.c
===================================================================
RCS file: /sources/bison/bison/src/main.c,v
retrieving revision 1.94
diff -p -u -r1.94 main.c
--- src/main.c  17 Jan 2007 08:36:07 -0000      1.94
+++ src/main.c  7 May 2007 02:51:11 -0000
@@ -115,6 +115,13 @@ main (int argc, char *argv[])
      declarations.  */
   timevar_push (TV_CONFLICTS);
   conflicts_solve ();
+  {
+    state_number old_to_new[nstates];
+    state_number nstates_old = nstates;
+    state_remove_unreachable_states (old_to_new);
+    lalr_update_state_numbers (old_to_new, nstates_old);
+    conflicts_update_state_numbers (old_to_new, nstates_old);
+  }
   conflicts_print ();
   timevar_pop (TV_CONFLICTS);
 
Index: src/state.c
===================================================================
RCS file: /sources/bison/bison/src/state.c,v
retrieving revision 1.42
diff -p -u -r1.42 state.c
--- src/state.c 15 Sep 2006 16:34:48 -0000      1.42
+++ src/state.c 7 May 2007 02:51:12 -0000
@@ -145,6 +145,7 @@ state_new (symbol_number accessing_symbo
   res->errs = NULL;
   res->consistent = 0;
   res->solved_conflicts = NULL;
+  res->reachable = false;
 
   res->nitems = nitems;
   memcpy (res->items, core, items_size);
@@ -352,6 +353,44 @@ state_hash_lookup (size_t nitems, item_n
   return entry;
 }
 
+
+/*------------------------------------------------------.
+| Mark S and all states reachable from S as reachable.  |
+`------------------------------------------------------*/
+
+static void
+state_mark_reachable_states (state *s)
+{
+  if (s->reachable)
+    return;
+  s->reachable = true;
+  for (int i = 0; i < s->transitions->num; ++i)
+    if (!TRANSITION_IS_DISABLED (s->transitions, i))
+      state_mark_reachable_states (s->transitions->states[i]);
+}
+
+void
+state_remove_unreachable_states (state_number old_to_new[])
+{
+  state_number nstates_reachable = 0;
+  state_mark_reachable_states (states[0]);
+  for (state_number i = 0; i < nstates; ++i)
+    {
+      if (states[i]->reachable)
+        {
+          states[nstates_reachable] = states[i];
+          states[nstates_reachable]->number = nstates_reachable;
+          old_to_new[i] = nstates_reachable++;
+        }
+      else
+        {
+          state_free (states[i]);
+          old_to_new[i] = nstates;
+        }
+    }
+  nstates = nstates_reachable;
+}
+
 /* All the decorated states, indexed by the state number.  */
 state **states = NULL;
 
Index: src/state.h
===================================================================
RCS file: /sources/bison/bison/src/state.h,v
retrieving revision 1.51
diff -p -u -r1.51 state.h
--- src/state.h 7 May 2007 02:28:42 -0000       1.51
+++ src/state.h 7 May 2007 02:51:12 -0000
@@ -209,6 +209,11 @@ struct state
      a human readable description of the resolution.  */
   const char *solved_conflicts;
 
+  /* Conflict resolution sometimes makes states unreachable.  Initialized to 0
+     in state_new and then used by state_remove_unreachable_states after
+     conflicts_solve.  */
+  bool reachable;
+
   /* Its items.  Must be last, since ITEMS can be arbitrarily large.
      */
   size_t nitems;
@@ -248,9 +253,16 @@ state *state_hash_lookup (size_t core_si
 /* Insert STATE in the state hash table.  */
 void state_hash_insert (state *s);
 
+/* Remove unreachable states, renumber remaining states, update NSTATES, and
+   write to OLD_TO_NEW a mapping of old state numbers to new state numbers such
+   that the old value of NSTATES is written as the new state number for removed
+   states.  The size of OLD_TO_NEW must be the old value of NSTATES.  */
+void state_remove_unreachable_states (state_number old_to_new[]);
+
 /* All the states, indexed by the state number.  */
 extern state **states;
 
 /* Free all the states.  */
 void states_free (void);
+
 #endif /* !STATE_H_ */
Index: tests/conflicts.at
===================================================================
RCS file: /sources/bison/bison/tests/conflicts.at,v
retrieving revision 1.32
diff -p -u -r1.32 conflicts.at
--- tests/conflicts.at  6 Oct 2006 06:57:00 -0000       1.32
+++ tests/conflicts.at  7 May 2007 02:51:13 -0000
@@ -617,3 +617,201 @@ e:   e '+' e
 
 AT_CHECK([bison -o input.c input.y])
 AT_CLEANUP
+
+
+## ---------------------------------------------- ##
+## Unreachable States After Conflict Resolution.  ##
+## ---------------------------------------------- ##
+
+AT_SETUP([[Unreachable States After Conflict Resolution]])
+
+# If conflict resolution makes states unreachable, remove those states, report
+# rules that are then unused, and don't report conflicts in those states.  Test
+# what happens when a nonterminal becomes useless as a result of state removal
+# since that causes lalr.o's goto map to be rewritten.
+
+AT_DATA([[input.y]],
+[[%output "input.c"
+%left 'a'
+
+%%
+
+start: resolved_conflict 'a' reported_conflicts 'a' ;
+
+/* S/R conflict resolved as shift, so the state with item
+ * (resolved_conflict: 'a' . unreachable1) and all it transition successors are
+ * unreachable, and the associated production is useless.  */
+resolved_conflict:
+    'a' unreachable1
+  | %prec 'a'
+  ;
+
+/* S/R conflict that need not be reported since it is unreachable because of
+ * the previous conflict resolution.  Nonterminal unreachable1 and all its
+ * productions are useless.  */
+unreachable1:
+    'a' unreachable2
+  |
+  ;
+
+/* Likewise for a R/R conflict and nonterminal unreachable2.  */
+unreachable2: | ;
+
+/* Make sure remaining S/R and R/R conflicts are still reported correctly even
+ * when their states are renumbered due to state removal.  */
+reported_conflicts:
+    'a'
+  | 'a'
+  |
+  ;
+
+]])
+
+AT_CHECK([[bison --report=all input.y]], 0, [],
+[[input.y: conflicts: 1 shift/reduce, 1 reduce/reduce
+input.y:12.5-20: warning: rule never reduced because of conflicts: 
resolved_conflict: 'a' unreachable1
+input.y:20.5-20: warning: rule never reduced because of conflicts: 
unreachable1: 'a' unreachable2
+input.y:21.4: warning: rule never reduced because of conflicts: unreachable1: 
/* empty */
+input.y:25.13: warning: rule never reduced because of conflicts: unreachable2: 
/* empty */
+input.y:25.16: warning: rule never reduced because of conflicts: unreachable2: 
/* empty */
+input.y:31.5-7: warning: rule never reduced because of conflicts: 
reported_conflicts: 'a'
+input.y:32.4: warning: rule never reduced because of conflicts: 
reported_conflicts: /* empty */
+]])
+
+AT_CHECK([[cat input.output]], 0,
+[[Rules never reduced
+
+    2 resolved_conflict: 'a' unreachable1
+
+    4 unreachable1: 'a' unreachable2
+    5             | /* empty */
+
+    6 unreachable2: /* empty */
+    7             | /* empty */
+
+    9 reported_conflicts: 'a'
+   10                   | /* empty */
+
+
+State 4 conflicts: 1 shift/reduce
+State 5 conflicts: 1 reduce/reduce
+
+
+Grammar
+
+    0 $accept: start $end
+
+    1 start: resolved_conflict 'a' reported_conflicts 'a'
+
+    2 resolved_conflict: 'a' unreachable1
+    3                  | /* empty */
+
+    4 unreachable1: 'a' unreachable2
+    5             | /* empty */
+
+    6 unreachable2: /* empty */
+    7             | /* empty */
+
+    8 reported_conflicts: 'a'
+    9                   | 'a'
+   10                   | /* empty */
+
+
+Terminals, with rules where they appear
+
+$end (0) 0
+'a' (97) 1 2 4 8 9
+error (256)
+
+
+Nonterminals, with rules where they appear
+
+$accept (4)
+    on left: 0
+start (5)
+    on left: 1, on right: 0
+resolved_conflict (6)
+    on left: 2 3, on right: 1
+unreachable1 (7)
+    on left: 4 5, on right: 2
+unreachable2 (8)
+    on left: 6 7, on right: 4
+reported_conflicts (9)
+    on left: 8 9 10, on right: 1
+
+
+state 0
+
+    0 $accept: . start $end
+    1 start: . resolved_conflict 'a' reported_conflicts 'a'
+    2 resolved_conflict: . 'a' unreachable1
+    3                  | .  ['a']
+
+    $default  reduce using rule 3 (resolved_conflict)
+
+    start              go to state 1
+    resolved_conflict  go to state 2
+
+    Conflict between rule 3 and token 'a' resolved as reduce (%left 'a').
+
+
+state 1
+
+    0 $accept: start . $end
+
+    $end  shift, and go to state 3
+
+
+state 2
+
+    1 start: resolved_conflict . 'a' reported_conflicts 'a'
+
+    'a'  shift, and go to state 4
+
+
+state 3
+
+    0 $accept: start $end .
+
+    $default  accept
+
+
+state 4
+
+    1 start: resolved_conflict 'a' . reported_conflicts 'a'
+    8 reported_conflicts: . 'a'
+    9                   | . 'a'
+   10                   | .  ['a']
+
+    'a'  shift, and go to state 5
+
+    'a'  [reduce using rule 10 (reported_conflicts)]
+
+    reported_conflicts  go to state 6
+
+
+state 5
+
+    8 reported_conflicts: 'a' .  ['a']
+    9                   | 'a' .  ['a']
+
+    'a'       reduce using rule 8 (reported_conflicts)
+    'a'       [reduce using rule 9 (reported_conflicts)]
+    $default  reduce using rule 8 (reported_conflicts)
+
+
+state 6
+
+    1 start: resolved_conflict 'a' reported_conflicts . 'a'
+
+    'a'  shift, and go to state 7
+
+
+state 7
+
+    1 start: resolved_conflict 'a' reported_conflicts 'a' .
+ 
+    $default  reduce using rule 1 (start)
+]])
+
+AT_CLEANUP
Index: tests/existing.at
===================================================================
RCS file: /sources/bison/bison/tests/existing.at,v
retrieving revision 1.10
diff -p -u -r1.10 existing.at
--- tests/existing.at   22 Jan 2006 08:02:47 -0000      1.10
+++ tests/existing.at   7 May 2007 02:51:13 -0000
@@ -1520,6 +1520,8 @@ expr:
 # Pass plenty of options, to exercise plenty of code, even if we
 # don't actually check the output.  But SEGV is watching us, and
 # so might do dmalloc.
-AT_CHECK([[bison --verbose --defines input.y]], 0, [], [])
+AT_CHECK([[bison --verbose --defines input.y]], 0, [],
+[[input.y:453.11-48: warning: rule never reduced because of conflicts: path: 
ORDINAL LAST object_type relative_path
+]])
 
 AT_CLEANUP




reply via email to

[Prev in Thread] Current Thread [Next in Thread]