poke-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] pkl: make the alien token recognizer smarter


From: Mohammad-Reza Nabipoor
Subject: [PATCH] pkl: make the alien token recognizer smarter
Date: Sun, 6 Oct 2024 02:01:08 +0200

Instead of using simplistic `$<[^>]*>' pattern to recognize delimited
alien tokens, this commit adds a set of new rules to the lexer to find
an early match for '>'.

With this commit, expressions like `$<*0*> + $<*1>' are equivalent to
addition of two literals instead of a single `*0*> + $<*1' token.

2024-10-06  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>

        * libpoke/pkl-lex.l: A set of new rules to make alien token
        recognizer smarter (instead of using simplistic `$<[^>]*>'
        pattern).
        (make_alien_token): New helper function to
        translate the alien string into a valid token.
        (NOT_LT_GT): New name definition.
        (ALIEN_TOKEN): New start condition.
        (ALIEN_TOKEN_DELIM): Likewise.
        * testsuite/poke.cmd/ios-4.pk: New test.
        * testsuite/Makefile.am (EXTRA_DIST): Update.
---

Hi Jose.

This makes my life way easier in GDB when dealing with expressions like this:

  (gdb) poke uint<8>[$<sizeof(buffer)>] @ $<&buffer>

which gives the following error:

  <unknown>:1:9: error: can't access GDB variable 'sizeof(buffer)] @ $<&buffer'


This patch fixes this problem.

Regards,
Mohammad-Reza


 ChangeLog                   |  13 ++
 libpoke/pkl-lex.l           | 280 ++++++++++++++++++++++--------------
 libpoke/pkl-parser.h        |  13 +-
 testsuite/Makefile.am       |   1 +
 testsuite/poke.cmd/ios-4.pk |   7 +
 5 files changed, 204 insertions(+), 110 deletions(-)
 create mode 100644 testsuite/poke.cmd/ios-4.pk

diff --git a/ChangeLog b/ChangeLog
index 106f9539..9cce3edc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2024-10-06  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>
+
+       * libpoke/pkl-lex.l: A set of new rules to make alien token
+       recognizer smarter (instead of using simplistic `$<[^>]*>'
+       pattern).
+       (make_alien_token): New helper function to
+       translate the alien string into a valid token.
+       (NOT_LT_GT): New name definition.
+       (ALIEN_TOKEN): New start condition.
+       (ALIEN_TOKEN_DELIM): Likewise.
+       * testsuite/poke.cmd/ios-4.pk: New test.
+       * testsuite/Makefile.am (EXTRA_DIST): Update.
+
 2024-10-05  Mohammad-Reza Nabipoor  <mnabipoor@gnu.org>
 
        * doc/poke.texi: Replace old representation of weird integers
diff --git a/libpoke/pkl-lex.l b/libpoke/pkl-lex.l
index a7ec56d3..1f1b9125 100644
--- a/libpoke/pkl-lex.l
+++ b/libpoke/pkl-lex.l
@@ -224,6 +224,119 @@ build_overflow_error_msg (uint64_t value, int width)
   return msg;
 }
 
+static int
+make_alien_token (struct pkl_parser *parser, const char *text, YYSTYPE *lval,
+                  int *retval, pkl_alien_token_handler_fn cb,
+                  pkl_alien_dtoken_handler_fn dcb)
+{
+  char *errmsg;
+
+  if (pkl_lexical_cuckolding_p (parser->compiler)
+      && (cb != NULL || dcb != NULL))
+    {
+      struct pkl_alien_token *token
+          = (cb != NULL ? (*cb) (text, &errmsg)
+                        : (*dcb) (text[0], text, &errmsg));
+
+      if (token == NULL)
+        {
+          /* Error from alien handler.  */
+          parser->alien_errmsg = errmsg;
+          *retval = ALIEN;
+          return 0;
+        }
+
+      switch (token->kind)
+        {
+        case PKL_ALIEN_TOKEN_IDENTIFIER:
+          {
+            char *id = token->value.identifier;
+            lval->ast = pkl_ast_make_identifier (parser->ast, id);
+            free (id);
+            *retval = IDENTIFIER;
+            return 0;
+          }
+        case PKL_ALIEN_TOKEN_STRING:
+          {
+            pkl_ast_node type = pkl_ast_make_string_type (parser->ast);
+
+            lval->ast
+                = pkl_ast_make_string (parser->ast, token->value.string.str);
+            PKL_AST_TYPE (lval->ast) = ASTREF (type);
+
+            *retval = STR;
+            return 0;
+          }
+        case PKL_ALIEN_TOKEN_INTEGER:
+          {
+            uint64_t value = token->value.integer.magnitude;
+            int signed_p = token->value.integer.signed_p;
+            int width = token->value.integer.width;
+            pkl_ast_node type;
+
+            if (width == 0)
+              width = integer_literal_width (value, signed_p);
+            if (integer_literal_overflow_handling (&value, signed_p, width))
+              {
+                lval->exception_msg = build_overflow_error_msg (value, width);
+                *retval = LEXER_EXCEPTION;
+                return 0;
+              }
+
+            type = pkl_ast_make_integral_type (parser->ast, width, signed_p);
+
+            lval->ast = pkl_ast_make_integer (parser->ast, value);
+            PKL_AST_TYPE (lval->ast) = ASTREF (type);
+
+            *retval = INTEGER;
+            return 0;
+          }
+        case PKL_ALIEN_TOKEN_OFFSET:
+          {
+            uint64_t value = token->value.offset.magnitude;
+            int signed_p = token->value.offset.signed_p;
+            int width = token->value.offset.width;
+            pkl_ast_node unit, magnitude, magnitude_type, offset_type,
+                unit_type;
+
+            if (width == 0)
+              width = integer_literal_width (value, signed_p);
+            if (integer_literal_overflow_handling (&value, signed_p, width))
+              {
+                lval->exception_msg = build_overflow_error_msg (value, width);
+                *retval = LEXER_EXCEPTION;
+                return 0;
+              }
+
+            /* Build the offset magnitude.  */
+            magnitude_type
+                = pkl_ast_make_integral_type (parser->ast, width, signed_p);
+            magnitude = pkl_ast_make_integer (parser->ast, value);
+            PKL_AST_TYPE (magnitude) = ASTREF (magnitude_type);
+
+            /* Build the offset unit.  */
+            unit_type = pkl_ast_make_integral_type (parser->ast, 64, 0);
+            unit
+                = pkl_ast_make_integer (parser->ast, token->value.offset.unit);
+            PKL_AST_TYPE (unit) = ASTREF (unit_type);
+
+            /* Build the offset value itself.  */
+            offset_type = pkl_ast_make_offset_type (
+                parser->ast, magnitude_type, unit, NULL /* ref_type */);
+            lval->ast = pkl_ast_make_offset (parser->ast, magnitude, unit);
+            PKL_AST_TYPE (lval->ast) = ASTREF (offset_type);
+            *retval = OFFSET;
+            return 0;
+          }
+        default:
+          PK_UNREACHABLE ();
+          break;
+        }
+    }
+
+  return -1;
+}
+
 %}
 
 NEWLINE            (\r\n)|\n
@@ -244,9 +357,12 @@ A $
 S ::
 LT <
 GT >
+NOT_LT_GT [^<>]
 
 %x C_COMMENT
 %x SHEBANG_COMMENT
+%x ALIEN_TOKEN
+%x ALIEN_TOKEN_DELIM
 
 %%
 
@@ -419,122 +535,70 @@ GT >
 "@!"               { return NSMAP; }
 "@"                { return '@'; }
 
-({A}({L}|{D})({L}|{D}|({S}({L}|{D})))*)|({A}{LT}[^{GT}]*{GT}) {
-  char *errmsg;
-  pkl_alien_token_handler_fn cb = NULL;
-  pkl_alien_dtoken_handler_fn dcb = NULL;
+({A})              { BEGIN(ALIEN_TOKEN); }
+<ALIEN_TOKEN>(({L}|{D})({L}|{D}|({S}({L}|{D})))*) {
+  int token;
 
-  if (yytext[1] == '<')
-    dcb = pkl_alien_dtoken_fn (yyextra->compiler);
-  else
-    cb = pkl_alien_token_fn (yyextra->compiler);
-
-  if (pkl_lexical_cuckolding_p (yyextra->compiler)
-      && (cb != NULL || dcb != NULL))
+  yylloc->first_column -= /* {A} */ 1;
+  BEGIN (INITIAL);
+  if (make_alien_token (yyextra, yytext, yylval, &token,
+                        pkl_alien_token_fn (yyextra->compiler), NULL)
+      == -1)
     {
-      struct pkl_alien_token *token
-        = (cb != NULL
-           ? (*cb) (yytext + 1, &errmsg)
-           : (*dcb) (yytext[1], yytext + 1, &errmsg));
-
-      if (token == NULL)
-        {
-          /* Error from alien handler.  */
-          yyextra->alien_errmsg = errmsg;
-          return ALIEN;
-          break;
-        }
-
-      switch (token->kind)
-        {
-        case PKL_ALIEN_TOKEN_IDENTIFIER:
-          {
-            char *id = token->value.identifier;
-            yylval->ast = pkl_ast_make_identifier (yyextra->ast,
-                                                   id);
-            free (id);
-            return IDENTIFIER;
-            break;
-          }
-        case PKL_ALIEN_TOKEN_STRING:
-          {
-            pkl_ast_node type = pkl_ast_make_string_type (yyextra->ast);
-
-            yylval->ast
-              = pkl_ast_make_string (yyextra->ast, token->value.string.str);
-            PKL_AST_TYPE (yylval->ast) = ASTREF (type);
-
-            return STR;
-            break;
-          }
-        case PKL_ALIEN_TOKEN_INTEGER:
-          {
-            uint64_t value = token->value.integer.magnitude;
-            int signed_p = token->value.integer.signed_p;
-            int width = token->value.integer.width;
-            pkl_ast_node type;
-
-            if (width == 0)
-              width = integer_literal_width (value, signed_p);
-            if (integer_literal_overflow_handling (&value, signed_p, width))
-              {
-                yylval->exception_msg = build_overflow_error_msg (value, 
width);
-                return LEXER_EXCEPTION;
-              }
-
-            type = pkl_ast_make_integral_type (yyextra->ast, width, signed_p);
-
-            yylval->ast = pkl_ast_make_integer (yyextra->ast, value);
-            PKL_AST_TYPE (yylval->ast) = ASTREF (type);
-
-            return INTEGER;
-            break;
-          }
-        case PKL_ALIEN_TOKEN_OFFSET:
-          {
-            uint64_t value = token->value.offset.magnitude;
-            int signed_p = token->value.offset.signed_p;
-            int width = token->value.offset.width;
-            pkl_ast_node unit, magnitude, magnitude_type, offset_type, 
unit_type;
+      /* Lexical cuckolding is not enabled.  */
+      return PKL_TAB_UNDEF;
+    }
+  return token;
+}
+<ALIEN_TOKEN>({LT}) {
+  yyextra->alien_nest_level = 1;
+  yyextra->alien_text[0] = '<';
+  yyextra->alien_text_len = 1;
+  yylloc->first_column -= /* {A} */ 1 + /* {LT} */ 1;
+  BEGIN(ALIEN_TOKEN_DELIM);
+}
 
-            if (width == 0)
-              width = integer_literal_width (value, signed_p);
-            if (integer_literal_overflow_handling (&value, signed_p, width))
-              {
-                yylval->exception_msg = build_overflow_error_msg (value, 
width);
-                return LEXER_EXCEPTION;
-              }
+  /* Recognize $<...> tokens with the logic to keep track of ('<', '>')
+     pairs.  */
+<ALIEN_TOKEN_DELIM>({LT})   {
+  yyextra->alien_nest_level++;
+  if (yyextra->alien_text_len == ALIEN_TEXT_BUFSZ)
+    YY_FATAL_ERROR ("alien token is too long");
+  yyextra->alien_text[yyextra->alien_text_len++] = '<';
+}
+<ALIEN_TOKEN_DELIM>({NOT_LT_GT}+) {
+  size_t len;
+
+  len = yyextra->alien_text_len + strlen (yytext);
+  if (len >= ALIEN_TEXT_BUFSZ)
+    YY_FATAL_ERROR ("alien token is too long");
+  memcpy (yyextra->alien_text + yyextra->alien_text_len,
+          yytext, strlen (yytext));
+  yyextra->alien_text_len = len;
+}
+<ALIEN_TOKEN_DELIM>({GT})   {
+  if (yyextra->alien_text_len == ALIEN_TEXT_BUFSZ)
+    YY_FATAL_ERROR ("alien token is too long");
+  yyextra->alien_text[yyextra->alien_text_len++] = '>';
 
-            /* Build the offset magnitude.  */
-            magnitude_type = pkl_ast_make_integral_type (yyextra->ast, width, 
signed_p);
-            magnitude = pkl_ast_make_integer (yyextra->ast, value);
-            PKL_AST_TYPE (magnitude) = ASTREF (magnitude_type);
+  /* This doesn't need an extra check due to reservation of
+     ALIEN_TEXT_BUFSZ+1 bytes for yyextra->alien_text.  */
+  yyextra->alien_text[yyextra->alien_text_len] = '\0';
 
-            /* Build the offset unit.  */
-            unit_type = pkl_ast_make_integral_type (yyextra->ast, 64, 0);
-            unit = pkl_ast_make_integer (yyextra->ast, 
token->value.offset.unit);
-            PKL_AST_TYPE (unit) = ASTREF (unit_type);
+  if (--yyextra->alien_nest_level == 0)
+    {
+      int token;
 
-            /* Build the offset value itself.  */
-            offset_type = pkl_ast_make_offset_type (yyextra->ast,
-                                                    magnitude_type,
-                                                    unit, NULL /* ref_type */);
-            yylval->ast = pkl_ast_make_offset (yyextra->ast,
-                                               magnitude, unit);
-            PKL_AST_TYPE (yylval->ast) = ASTREF (offset_type);
-            return OFFSET;
-            break;
-          }
-        default:
-          PK_UNREACHABLE ();
-          break;
+      BEGIN(INITIAL);
+      if (make_alien_token (yyextra, yyextra->alien_text, yylval, &token,
+                            NULL, pkl_alien_dtoken_fn (yyextra->compiler))
+          == -1)
+        {
+          /* Lexical cuckolding is not enabled.  */
+          return PKL_TAB_UNDEF;
         }
+      return token;
     }
-
-  /* Lexical cuckolding is not enabled.  */
-  yylloc->last_column -= strlen (yytext);
-  RESTORE_LOC;
-  REJECT;
 }
 
 '{L}({L}|{D})* {
diff --git a/libpoke/pkl-parser.h b/libpoke/pkl-parser.h
index dd63f02e..142b65e8 100644
--- a/libpoke/pkl-parser.h
+++ b/libpoke/pkl-parser.h
@@ -35,8 +35,14 @@
    BOOTSTRAPPED is 1 if the compiler has been bootstrapped.  0
    otherwise.
 
-   IN_METHOD_P is 1 if we are parsing the declaration of a struct
-   method.  0 otherwise.  */
+   IN_METHOD_DECL_P is 1 if we are parsing the declaration of a struct
+   method.  0 otherwise.
+
+   ALIEN_NEST_LEVEL keeps track of unbalanced angle brackets (<>) in alien
+   tokens.  */
+
+/* Buffer size to keep content of delimited alien tokens.  */
+#define ALIEN_TEXT_BUFSZ 1024
 
 struct pkl_parser
 {
@@ -51,6 +57,9 @@ struct pkl_parser
   int bootstrapped;
   int in_method_decl_p;
   char *alien_errmsg;
+  int alien_nest_level;
+  char alien_text[ALIEN_TEXT_BUFSZ + 1];
+  size_t alien_text_len;
   pkl_ast_loc prev_loc;
   uint32_t init_line;
   uint32_t init_column;
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 3dab856a..b8cdfcae 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -92,6 +92,7 @@ EXTRA_DIST = \
   poke.cmd/ios-1.pk \
   poke.cmd/ios-2.pk \
   poke.cmd/ios-3.pk \
+  poke.cmd/ios-4.pk \
   poke.cmd/mmap-1.pk \
   poke.cmd/mmap-2.pk \
   poke.cmd/nbd-1.pk \
diff --git a/testsuite/poke.cmd/ios-4.pk b/testsuite/poke.cmd/ios-4.pk
new file mode 100644
index 00000000..aa6bb7b1
--- /dev/null
+++ b/testsuite/poke.cmd/ios-4.pk
@@ -0,0 +1,7 @@
+/* { dg-do run } */
+
+/* { dg-command { .mem } } */
+/* { dg-command { .mem foo bar} } */
+/* { dg-command { .mem baz } } */
+/* { dg-command { ($<foo bar> * 2 + $<baz>) == 1*2+2 } } */
+/* { dg-output "1" } */
-- 
2.46.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]