[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] pkl: make the alien token recognizer smarter
From: |
Mohammad-Reza Nabipoor |
Subject: |
[PATCH] pkl: make the alien token recognizer smarter |
Date: |
Sun, 6 Oct 2024 02:01:08 +0200 |
Instead of using simplistic `$<[^>]*>' pattern to recognize delimited
alien tokens, this commit adds a set of new rules to the lexer to find
an early match for '>'.
With this commit, expressions like `$<*0*> + $<*1>' are equivalent to
addition of two literals instead of a single `*0*> + $<*1' token.
2024-10-06 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
* libpoke/pkl-lex.l: A set of new rules to make alien token
recognizer smarter (instead of using simplistic `$<[^>]*>'
pattern).
(make_alien_token): New helper function to
translate the alien string into a valid token.
(NOT_LT_GT): New name definition.
(ALIEN_TOKEN): New start condition.
(ALIEN_TOKEN_DELIM): Likewise.
* testsuite/poke.cmd/ios-4.pk: New test.
* testsuite/Makefile.am (EXTRA_DIST): Update.
---
Hi Jose.
This makes my life way easier in GDB when dealing with expressions like this:
(gdb) poke uint<8>[$<sizeof(buffer)>] @ $<&buffer>
which gives the following error:
<unknown>:1:9: error: can't access GDB variable 'sizeof(buffer)] @ $<&buffer'
This patch fixes this problem.
Regards,
Mohammad-Reza
ChangeLog | 13 ++
libpoke/pkl-lex.l | 280 ++++++++++++++++++++++--------------
libpoke/pkl-parser.h | 13 +-
testsuite/Makefile.am | 1 +
testsuite/poke.cmd/ios-4.pk | 7 +
5 files changed, 204 insertions(+), 110 deletions(-)
create mode 100644 testsuite/poke.cmd/ios-4.pk
diff --git a/ChangeLog b/ChangeLog
index 106f9539..9cce3edc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2024-10-06 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
+
+ * libpoke/pkl-lex.l: A set of new rules to make alien token
+ recognizer smarter (instead of using simplistic `$<[^>]*>'
+ pattern).
+ (make_alien_token): New helper function to
+ translate the alien string into a valid token.
+ (NOT_LT_GT): New name definition.
+ (ALIEN_TOKEN): New start condition.
+ (ALIEN_TOKEN_DELIM): Likewise.
+ * testsuite/poke.cmd/ios-4.pk: New test.
+ * testsuite/Makefile.am (EXTRA_DIST): Update.
+
2024-10-05 Mohammad-Reza Nabipoor <mnabipoor@gnu.org>
* doc/poke.texi: Replace old representation of weird integers
diff --git a/libpoke/pkl-lex.l b/libpoke/pkl-lex.l
index a7ec56d3..1f1b9125 100644
--- a/libpoke/pkl-lex.l
+++ b/libpoke/pkl-lex.l
@@ -224,6 +224,119 @@ build_overflow_error_msg (uint64_t value, int width)
return msg;
}
+static int
+make_alien_token (struct pkl_parser *parser, const char *text, YYSTYPE *lval,
+ int *retval, pkl_alien_token_handler_fn cb,
+ pkl_alien_dtoken_handler_fn dcb)
+{
+ char *errmsg;
+
+ if (pkl_lexical_cuckolding_p (parser->compiler)
+ && (cb != NULL || dcb != NULL))
+ {
+ struct pkl_alien_token *token
+ = (cb != NULL ? (*cb) (text, &errmsg)
+ : (*dcb) (text[0], text, &errmsg));
+
+ if (token == NULL)
+ {
+ /* Error from alien handler. */
+ parser->alien_errmsg = errmsg;
+ *retval = ALIEN;
+ return 0;
+ }
+
+ switch (token->kind)
+ {
+ case PKL_ALIEN_TOKEN_IDENTIFIER:
+ {
+ char *id = token->value.identifier;
+ lval->ast = pkl_ast_make_identifier (parser->ast, id);
+ free (id);
+ *retval = IDENTIFIER;
+ return 0;
+ }
+ case PKL_ALIEN_TOKEN_STRING:
+ {
+ pkl_ast_node type = pkl_ast_make_string_type (parser->ast);
+
+ lval->ast
+ = pkl_ast_make_string (parser->ast, token->value.string.str);
+ PKL_AST_TYPE (lval->ast) = ASTREF (type);
+
+ *retval = STR;
+ return 0;
+ }
+ case PKL_ALIEN_TOKEN_INTEGER:
+ {
+ uint64_t value = token->value.integer.magnitude;
+ int signed_p = token->value.integer.signed_p;
+ int width = token->value.integer.width;
+ pkl_ast_node type;
+
+ if (width == 0)
+ width = integer_literal_width (value, signed_p);
+ if (integer_literal_overflow_handling (&value, signed_p, width))
+ {
+ lval->exception_msg = build_overflow_error_msg (value, width);
+ *retval = LEXER_EXCEPTION;
+ return 0;
+ }
+
+ type = pkl_ast_make_integral_type (parser->ast, width, signed_p);
+
+ lval->ast = pkl_ast_make_integer (parser->ast, value);
+ PKL_AST_TYPE (lval->ast) = ASTREF (type);
+
+ *retval = INTEGER;
+ return 0;
+ }
+ case PKL_ALIEN_TOKEN_OFFSET:
+ {
+ uint64_t value = token->value.offset.magnitude;
+ int signed_p = token->value.offset.signed_p;
+ int width = token->value.offset.width;
+ pkl_ast_node unit, magnitude, magnitude_type, offset_type,
+ unit_type;
+
+ if (width == 0)
+ width = integer_literal_width (value, signed_p);
+ if (integer_literal_overflow_handling (&value, signed_p, width))
+ {
+ lval->exception_msg = build_overflow_error_msg (value, width);
+ *retval = LEXER_EXCEPTION;
+ return 0;
+ }
+
+ /* Build the offset magnitude. */
+ magnitude_type
+ = pkl_ast_make_integral_type (parser->ast, width, signed_p);
+ magnitude = pkl_ast_make_integer (parser->ast, value);
+ PKL_AST_TYPE (magnitude) = ASTREF (magnitude_type);
+
+ /* Build the offset unit. */
+ unit_type = pkl_ast_make_integral_type (parser->ast, 64, 0);
+ unit
+ = pkl_ast_make_integer (parser->ast, token->value.offset.unit);
+ PKL_AST_TYPE (unit) = ASTREF (unit_type);
+
+ /* Build the offset value itself. */
+ offset_type = pkl_ast_make_offset_type (
+ parser->ast, magnitude_type, unit, NULL /* ref_type */);
+ lval->ast = pkl_ast_make_offset (parser->ast, magnitude, unit);
+ PKL_AST_TYPE (lval->ast) = ASTREF (offset_type);
+ *retval = OFFSET;
+ return 0;
+ }
+ default:
+ PK_UNREACHABLE ();
+ break;
+ }
+ }
+
+ return -1;
+}
+
%}
NEWLINE (\r\n)|\n
@@ -244,9 +357,12 @@ A $
S ::
LT <
GT >
+NOT_LT_GT [^<>]
%x C_COMMENT
%x SHEBANG_COMMENT
+%x ALIEN_TOKEN
+%x ALIEN_TOKEN_DELIM
%%
@@ -419,122 +535,70 @@ GT >
"@!" { return NSMAP; }
"@" { return '@'; }
-({A}({L}|{D})({L}|{D}|({S}({L}|{D})))*)|({A}{LT}[^{GT}]*{GT}) {
- char *errmsg;
- pkl_alien_token_handler_fn cb = NULL;
- pkl_alien_dtoken_handler_fn dcb = NULL;
+({A}) { BEGIN(ALIEN_TOKEN); }
+<ALIEN_TOKEN>(({L}|{D})({L}|{D}|({S}({L}|{D})))*) {
+ int token;
- if (yytext[1] == '<')
- dcb = pkl_alien_dtoken_fn (yyextra->compiler);
- else
- cb = pkl_alien_token_fn (yyextra->compiler);
-
- if (pkl_lexical_cuckolding_p (yyextra->compiler)
- && (cb != NULL || dcb != NULL))
+ yylloc->first_column -= /* {A} */ 1;
+ BEGIN (INITIAL);
+ if (make_alien_token (yyextra, yytext, yylval, &token,
+ pkl_alien_token_fn (yyextra->compiler), NULL)
+ == -1)
{
- struct pkl_alien_token *token
- = (cb != NULL
- ? (*cb) (yytext + 1, &errmsg)
- : (*dcb) (yytext[1], yytext + 1, &errmsg));
-
- if (token == NULL)
- {
- /* Error from alien handler. */
- yyextra->alien_errmsg = errmsg;
- return ALIEN;
- break;
- }
-
- switch (token->kind)
- {
- case PKL_ALIEN_TOKEN_IDENTIFIER:
- {
- char *id = token->value.identifier;
- yylval->ast = pkl_ast_make_identifier (yyextra->ast,
- id);
- free (id);
- return IDENTIFIER;
- break;
- }
- case PKL_ALIEN_TOKEN_STRING:
- {
- pkl_ast_node type = pkl_ast_make_string_type (yyextra->ast);
-
- yylval->ast
- = pkl_ast_make_string (yyextra->ast, token->value.string.str);
- PKL_AST_TYPE (yylval->ast) = ASTREF (type);
-
- return STR;
- break;
- }
- case PKL_ALIEN_TOKEN_INTEGER:
- {
- uint64_t value = token->value.integer.magnitude;
- int signed_p = token->value.integer.signed_p;
- int width = token->value.integer.width;
- pkl_ast_node type;
-
- if (width == 0)
- width = integer_literal_width (value, signed_p);
- if (integer_literal_overflow_handling (&value, signed_p, width))
- {
- yylval->exception_msg = build_overflow_error_msg (value,
width);
- return LEXER_EXCEPTION;
- }
-
- type = pkl_ast_make_integral_type (yyextra->ast, width, signed_p);
-
- yylval->ast = pkl_ast_make_integer (yyextra->ast, value);
- PKL_AST_TYPE (yylval->ast) = ASTREF (type);
-
- return INTEGER;
- break;
- }
- case PKL_ALIEN_TOKEN_OFFSET:
- {
- uint64_t value = token->value.offset.magnitude;
- int signed_p = token->value.offset.signed_p;
- int width = token->value.offset.width;
- pkl_ast_node unit, magnitude, magnitude_type, offset_type,
unit_type;
+ /* Lexical cuckolding is not enabled. */
+ return PKL_TAB_UNDEF;
+ }
+ return token;
+}
+<ALIEN_TOKEN>({LT}) {
+ yyextra->alien_nest_level = 1;
+ yyextra->alien_text[0] = '<';
+ yyextra->alien_text_len = 1;
+ yylloc->first_column -= /* {A} */ 1 + /* {LT} */ 1;
+ BEGIN(ALIEN_TOKEN_DELIM);
+}
- if (width == 0)
- width = integer_literal_width (value, signed_p);
- if (integer_literal_overflow_handling (&value, signed_p, width))
- {
- yylval->exception_msg = build_overflow_error_msg (value,
width);
- return LEXER_EXCEPTION;
- }
+ /* Recognize $<...> tokens with the logic to keep track of ('<', '>')
+ pairs. */
+<ALIEN_TOKEN_DELIM>({LT}) {
+ yyextra->alien_nest_level++;
+ if (yyextra->alien_text_len == ALIEN_TEXT_BUFSZ)
+ YY_FATAL_ERROR ("alien token is too long");
+ yyextra->alien_text[yyextra->alien_text_len++] = '<';
+}
+<ALIEN_TOKEN_DELIM>({NOT_LT_GT}+) {
+ size_t len;
+
+ len = yyextra->alien_text_len + strlen (yytext);
+ if (len >= ALIEN_TEXT_BUFSZ)
+ YY_FATAL_ERROR ("alien token is too long");
+ memcpy (yyextra->alien_text + yyextra->alien_text_len,
+ yytext, strlen (yytext));
+ yyextra->alien_text_len = len;
+}
+<ALIEN_TOKEN_DELIM>({GT}) {
+ if (yyextra->alien_text_len == ALIEN_TEXT_BUFSZ)
+ YY_FATAL_ERROR ("alien token is too long");
+ yyextra->alien_text[yyextra->alien_text_len++] = '>';
- /* Build the offset magnitude. */
- magnitude_type = pkl_ast_make_integral_type (yyextra->ast, width,
signed_p);
- magnitude = pkl_ast_make_integer (yyextra->ast, value);
- PKL_AST_TYPE (magnitude) = ASTREF (magnitude_type);
+ /* This doesn't need an extra check due to reservation of
+ ALIEN_TEXT_BUFSZ+1 bytes for yyextra->alien_text. */
+ yyextra->alien_text[yyextra->alien_text_len] = '\0';
- /* Build the offset unit. */
- unit_type = pkl_ast_make_integral_type (yyextra->ast, 64, 0);
- unit = pkl_ast_make_integer (yyextra->ast,
token->value.offset.unit);
- PKL_AST_TYPE (unit) = ASTREF (unit_type);
+ if (--yyextra->alien_nest_level == 0)
+ {
+ int token;
- /* Build the offset value itself. */
- offset_type = pkl_ast_make_offset_type (yyextra->ast,
- magnitude_type,
- unit, NULL /* ref_type */);
- yylval->ast = pkl_ast_make_offset (yyextra->ast,
- magnitude, unit);
- PKL_AST_TYPE (yylval->ast) = ASTREF (offset_type);
- return OFFSET;
- break;
- }
- default:
- PK_UNREACHABLE ();
- break;
+ BEGIN(INITIAL);
+ if (make_alien_token (yyextra, yyextra->alien_text, yylval, &token,
+ NULL, pkl_alien_dtoken_fn (yyextra->compiler))
+ == -1)
+ {
+ /* Lexical cuckolding is not enabled. */
+ return PKL_TAB_UNDEF;
}
+ return token;
}
-
- /* Lexical cuckolding is not enabled. */
- yylloc->last_column -= strlen (yytext);
- RESTORE_LOC;
- REJECT;
}
'{L}({L}|{D})* {
diff --git a/libpoke/pkl-parser.h b/libpoke/pkl-parser.h
index dd63f02e..142b65e8 100644
--- a/libpoke/pkl-parser.h
+++ b/libpoke/pkl-parser.h
@@ -35,8 +35,14 @@
BOOTSTRAPPED is 1 if the compiler has been bootstrapped. 0
otherwise.
- IN_METHOD_P is 1 if we are parsing the declaration of a struct
- method. 0 otherwise. */
+ IN_METHOD_DECL_P is 1 if we are parsing the declaration of a struct
+ method. 0 otherwise.
+
+ ALIEN_NEST_LEVEL keeps track of unbalanced angle brackets (<>) in alien
+ tokens. */
+
+/* Buffer size to keep content of delimited alien tokens. */
+#define ALIEN_TEXT_BUFSZ 1024
struct pkl_parser
{
@@ -51,6 +57,9 @@ struct pkl_parser
int bootstrapped;
int in_method_decl_p;
char *alien_errmsg;
+ int alien_nest_level;
+ char alien_text[ALIEN_TEXT_BUFSZ + 1];
+ size_t alien_text_len;
pkl_ast_loc prev_loc;
uint32_t init_line;
uint32_t init_column;
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 3dab856a..b8cdfcae 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -92,6 +92,7 @@ EXTRA_DIST = \
poke.cmd/ios-1.pk \
poke.cmd/ios-2.pk \
poke.cmd/ios-3.pk \
+ poke.cmd/ios-4.pk \
poke.cmd/mmap-1.pk \
poke.cmd/mmap-2.pk \
poke.cmd/nbd-1.pk \
diff --git a/testsuite/poke.cmd/ios-4.pk b/testsuite/poke.cmd/ios-4.pk
new file mode 100644
index 00000000..aa6bb7b1
--- /dev/null
+++ b/testsuite/poke.cmd/ios-4.pk
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+
+/* { dg-command { .mem } } */
+/* { dg-command { .mem foo bar} } */
+/* { dg-command { .mem baz } } */
+/* { dg-command { ($<foo bar> * 2 + $<baz>) == 1*2+2 } } */
+/* { dg-output "1" } */
--
2.46.2
- [PATCH] pkl: make the alien token recognizer smarter,
Mohammad-Reza Nabipoor <=