[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 3/3] yacc: use the most appropriate integral type for state numbe
From: |
Akim Demaille |
Subject: |
[PATCH 3/3] yacc: use the most appropriate integral type for state numbers |
Date: |
Sun, 29 Sep 2019 18:34:11 +0200 |
Currently we properly use the "best" integral type for tables,
including those storing state numbers. However the variables for
state numbers used in yyparse (and its dependencies such as
yy_stack_print) still use int16_t invariably. As a consequence, very
large models overflow these variables.
Let's use the "best" type for these variables too. It turns out that
we can still use 16 bits for twice larger automata: stick to unsigned
types.
Reported by Tom Kramer.
https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html
* data/skeletons/yacc.c (yy_state_num): Be computed from YYNSTATES.
Adjust an assertion: yystate can no longer be negative, and compilers
warn about that.
* tests/linear: New.
* tests/torture.at (State number type): New.
Use it.
---
THANKS | 1 +
data/skeletons/yacc.c | 5 +--
tests/linear | 94 +++++++++++++++++++++++++++++++++++++++++++
tests/local.mk | 2 +-
tests/torture.at | 43 +++++++++++++++++---
5 files changed, 135 insertions(+), 10 deletions(-)
create mode 100755 tests/linear
diff --git a/THANKS b/THANKS
index 2cdd9b0b..569ba173 100644
--- a/THANKS
+++ b/THANKS
@@ -179,6 +179,7 @@ Tim Landscheidt address@hidden
Tim Van Holder address@hidden
Tobias Frost address@hidden
Todd Freed address@hidden
+Tom Kramer address@hidden
Tom Lane address@hidden
Tom Tromey address@hidden
Tomasz Kłoczko address@hidden
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index c15f73d4..74ef76d6 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -432,8 +432,7 @@ typedef short yytype_int16;
/* State numbers. */
-typedef yytype_int16 yy_state_num;
-
+typedef ]b4_int_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
#ifndef YY_
# if defined YYENABLE_NLS && YYENABLE_NLS
@@ -1505,7 +1504,7 @@ yynewstate:
`--------------------------------------------------------------------*/
yysetstate:
YYDPRINTF ((stderr, "Entering state %d\n", yystate));
- YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+ YY_ASSERT (yystate < YYNSTATES);
*yyssp = yystate;
if (yyss + yystacksize - 1 <= yyssp)
diff --git a/tests/linear b/tests/linear
new file mode 100755
index 00000000..d522bd82
--- /dev/null
+++ b/tests/linear
@@ -0,0 +1,94 @@
+#! /usr/bin/env ruby
+
+# Build a grammar whose LALR(1) parser has a given number of states.
+# Useful to test edge cases (e.g., 256 and 257 states, etc.).
+
+class Linear
+ def initialize(states)
+ @states = states - 4
+ @cols = Math.sqrt(@states).to_i
+ @lines = @states / @cols
+ @rest = @states % @cols
+
+ @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
+ end
+
+ def nterms
+ last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
+ (0...last).map { |i| "t#{i}" }.join(' ')
+ end
+
+ def rules
+ res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
+ case @rest
+ when 0
+ res += ' N'
+ when 1
+ res += ' N N'
+ else
+ res += "\nt#{@lines}:#{" N" * @rest}"
+ end
+ res
+ end
+
+ def to_s
+ puts <<~EOF
+ // states: #{@states}
+ // cols: #{@cols}
+ // lines: #{@lines}
+ // rest: #{@rest}
+ // n: #{@n}
+
+ %code {
+ #include <stdio.h>
+ #include <stdlib.h>
+
+ static int yylex (void);
+ static void yyerror (const char *msg);
+ }
+
+ %debug
+ %define api.value.type union
+ %define parse.lac full
+ %define parse.error verbose
+ %printer { fprintf (yyo, "%ld", $$); } <long>
+ %token <long> N
+
+ %%
+
+ exp: #{nterms}
+
+ #{rules}
+
+ %%
+
+ static
+ int yylex (void)
+ {
+ static long count = 0;
+ if (count++ < #{@n})
+ {
+ yylval.N = count;
+ return N;
+ }
+ else
+ return 0;
+ }
+
+ static
+ void yyerror (const char *msg)
+ {
+ fprintf (stderr, "%s\\n", msg);
+ }
+
+ int
+ main (int argc, const char **argv)
+ {
+ yydebug = !!getenv ("YYDEBUG");
+ return yyparse ();
+ }
+ EOF
+ end
+end
+
+puts Linear.new(ARGV[0].to_i).to_s
diff --git a/tests/local.mk b/tests/local.mk
index 9a2b4d51..9f55e552 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -15,7 +15,7 @@
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
-EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
+EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS)
MAINTAINERCLEANFILES += $(TESTSUITE)
diff --git a/tests/torture.at b/tests/torture.at
index b91e059a..29cbbdd3 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
# Ask for 200 MiB, which should be plenty even on a 64-bit host.
AT_INCREASE_DATA_SIZE(204000)
-AT_BISON_CHECK_NO_XML([-v -o input.c input.y])
+AT_BISON_CHECK_NO_XML([-o input.c input.y])
AT_COMPILE([input])
AT_PARSER_CHECK([input])
@@ -241,6 +241,42 @@ AT_CLEANUP
+## ------------------- ##
+## State number type. ##
+## ------------------- ##
+
+# AT_TEST(NUM-STATES, TYPE)
+# -------------------------
+# Check that automaton with NUM-STATES uses TYPE has state number type.
+# Check that parser works.
+
+m4_pushdef([AT_TEST],
+[AT_SETUP([State number type: $1 states])
+
+AT_BISON_OPTION_PUSHDEFS
+
+AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y])
+AT_FULL_COMPILE([input])
+AT_CHECK([grep 'define YYNSTATES *$1' input.c], [], [ignore])
+AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
+AT_PARSER_CHECK([input])
+
+AT_BISON_OPTION_POPDEFS
+AT_CLEANUP])
+
+AT_TEST( [256], [yytype_uint8])
+AT_TEST( [257], [yytype_uint16])
+AT_TEST([65536], [yytype_uint16])
+AT_TEST([65537], [unsigned])
+
+m4_popdef([AT_TEST])
+
+
+
+## ------------------------ ##
+## Many lookahead tokens. ##
+## ------------------------ ##
+
# AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
# --------------------------------------------------
# Create FILE-NAME, containing a self checking parser for a grammar
@@ -340,11 +376,6 @@ mv stdout $1
AT_BISON_OPTION_POPDEFS
])
-
-## ------------------------ ##
-## Many lookahead tokens. ##
-## ------------------------ ##
-
AT_SETUP([Many lookahead tokens])
AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])
--
2.23.0