guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 3/5] peg: Add expect.


From: Janneke Nieuwenhuizen
Subject: [PATCH v2 3/5] peg: Add expect.
Date: Mon, 14 Oct 2024 09:31:07 +0200

From: Rutger van Beusekom <rutger@dezyne.org>

This adds an expectation parser (expect a, 'a#') to the PEG parser
grammar.

Rationale: PEG will return #f for invalid input.  Adding expect (#)
to the grammar is a way to report syntax errors.

* module/ice-9/peg/string-peg.scm (peg-secondary->defn): Rename from
peg-primary->defn.
(cg-expect-int, gc-expect): New function.
(expect): Add them in new parser.
(peg-primary->defn): Use it in new function.
(peg-as-peg "secondary"): Rename from "primary".
(peg-as-peg "primary"): Use it, inserting `#' as expect.
* test-suite/tests/peg.test (grammar-mapping): Update for secondary.
("PEG Grammar"): Use pass-if-equal for friendlier failure resolving.
("Parsing expect"): Test it.
* doc/ref/api-peg.texi (PEG Syntax Reference): Document it.

Co-authored-by: Janneke Nieuwenhuizen <janneke@gnu.org>
---
 doc/ref/api-peg.texi            | 21 ++++++++++++++++++++
 module/ice-9/peg/codegen.scm    | 11 +++++++++++
 module/ice-9/peg/string-peg.scm | 12 +++++++++--
 test-suite/tests/peg.test       | 35 +++++++++++++++++++++++++++++----
 4 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/doc/ref/api-peg.texi b/doc/ref/api-peg.texi
index dfa806832..df2e74d05 100644
--- a/doc/ref/api-peg.texi
+++ b/doc/ref/api-peg.texi
@@ -159,6 +159,15 @@ Would be:
  (+ "e"))
 @end lisp
 
+@deftp {PEG Pattern} expect a
+Expect to parse @var{a}.  If this succeeds, continues.  If this fails,
+throw a @code{syntax-error} with location and failed expectation.
+
+@code{"a#"}
+
+@code{(expect a)}
+@end deftp
+
 @subsubheading Extended Syntax
 
 There is some extra syntax for S-expressions.
@@ -1037,6 +1046,7 @@ described above.  The function @code{add-peg-compiler!} 
is exported from
 the @code{(ice-9 peg codegen)} module.
 
 @subsubheading Debug tracing
+@anchor{Debug tracing}
 
 Due to the backtracking nature of PEG, the parser result is @code{#f}
 when it cannot match the input text.  It proves to be a big pain
@@ -1051,3 +1061,14 @@ current state of the input, as well as the parse result.
 (parameterize ((%peg:debug? #t))
   (and=> (match-pattern grammar input-text) peg:tree))
 @end lisp
+
+@subsubheading Expect parsing
+
+The best thing about PEG is its backtracking nature making it
+LL(infinite).  At the same time it severely limits the debugability of
+the grammar, as mentioned in @xref{Debug tracing}.  The ability to stop
+backtracking is achieved by introducing the @code{#} operator to the PEG
+language.  Putting a @code{#} behind a terminal or non-terminal
+indicates that its parsing must succeed, otherwise an exception is
+thrown containing the current parser state providing a hook to produce
+informative parse errors.
diff --git a/module/ice-9/peg/codegen.scm b/module/ice-9/peg/codegen.scm
index c450be440..dd24bdac0 100644
--- a/module/ice-9/peg/codegen.scm
+++ b/module/ice-9/peg/codegen.scm
@@ -300,6 +300,16 @@ return EXP."
                                 #f
                                 #,(cggr (baf accum) 'cg-body #''() 
#'at)))))))))))
 
+(define (cg-expect-int clauses accum str strlen at)
+  (syntax-case clauses ()
+    ((pat)
+     #`(or (#,(compile-peg-pattern #'pat accum) #,str #,strlen #,at)
+           (throw 'syntax-error (list #,at (syntax->datum #'pat)))))))
+
+(define (cg-expect clauses accum)
+  #`(lambda (str len pos)
+      #,(cg-expect-int clauses (baf accum) #'str #'len #'pos)))
+
 ;; Association list of functions to handle different expressions as PEGs
 (define peg-compiler-alist '())
 
@@ -317,6 +327,7 @@ return EXP."
 (add-peg-compiler! '? cg-?)
 (add-peg-compiler! 'followed-by cg-followed-by)
 (add-peg-compiler! 'not-followed-by cg-not-followed-by)
+(add-peg-compiler! 'expect cg-expect)
 
 ;; Takes an arbitrary expressions and accumulation variable, then parses it.
 ;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 
'all)
diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
index 45ed14bb1..da98a0da6 100644
--- a/module/ice-9/peg/string-peg.scm
+++ b/module/ice-9/peg/string-peg.scm
@@ -1,6 +1,6 @@
 ;;;; string-peg.scm --- representing PEG grammars as strings
 ;;;;
-;;;;   Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+;;;;   Copyright (C) 2010, 2011, 2024 Free Software Foundation, Inc.
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -42,7 +42,8 @@
 pattern <-- alternative (SLASH sp alternative)*
 alternative <-- ([!&]? sp suffix)+
 suffix <-- primary ([*+?] sp)*
-primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal 
!'<'
+primary <-- secondary ([#] sp)?
+secondary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / 
nonterminal !'<'
 literal <-- ['] (!['] .)* ['] sp
 charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
 CCrange <-- . '-' .
@@ -73,6 +74,8 @@ RB < ']'
 (define-sexp-parser peg-suffix all
   (and peg-primary (* (and (or "*" "+" "?") peg-sp))))
 (define-sexp-parser peg-primary all
+  (and peg-secondary (? (and "#" peg-sp))))
+(define-sexp-parser peg-secondary all
   (or (and "(" peg-sp peg-pattern ")" peg-sp)
       (and "." peg-sp)
       peg-literal
@@ -188,6 +191,11 @@ RB < ']'
 
 ;; Parse a primary.
 (define (peg-primary->defn lst for-syntax)
+  (let ((inner-defn (peg-secondary->defn (cadr lst) for-syntax)))
+    (if (and (pair? (cddr lst)) (equal? (caddr lst) "#")) #`(expect 
#,inner-defn)
+        inner-defn)))
+
+(define (peg-secondary->defn lst for-syntax)
   (let ((el (cadr lst)))
   (cond
    ((list? el)
diff --git a/test-suite/tests/peg.test b/test-suite/tests/peg.test
index 6a8709794..b3586c891 100644
--- a/test-suite/tests/peg.test
+++ b/test-suite/tests/peg.test
@@ -33,6 +33,7 @@
     (alternative peg-alternative)
     (suffix peg-suffix)
     (primary peg-primary)
+    (secondary peg-secondary)
     (literal peg-literal)
     (charclass peg-charclass)
     (CCrange charclass-range)
@@ -63,13 +64,12 @@
   (pass-if
    "defining PEGs with PEG"
    (and (eeval `(define-peg-string-patterns ,(@@ (ice-9 peg) peg-as-peg))) #t))
-  (pass-if
+  (pass-if-equal
    "equivalence of definitions"
-   (equal?
-    (peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) 
peg-as-peg)))
+      (peg:tree (match-pattern (@@ (ice-9 peg) peg-grammar) (@@ (ice-9 peg) 
peg-as-peg)))
     (tree-map
      grammar-transform
-     (peg:tree (match-pattern grammar (@@ (ice-9 peg) peg-as-peg)))))))
+     (peg:tree (match-pattern grammar (@@ (ice-9 peg) peg-as-peg))))))
 
 ;; A grammar for pascal-style comments from Wikipedia.
 (define comment-grammar
@@ -305,3 +305,30 @@ trace-grammar := \"foobarbarbaz\"  next: \"\"
       (with-error-to-string
         (lambda _ (and=> (match-pattern trace-grammar "foobarbarbaz")
                          peg:tree))))))
+
+(define-peg-string-patterns
+  "expect-grammar <-- one two three / .*
+one <-- 'one'#
+two <-- 'two'#
+three <-- 'three'"
+)
+
+(with-test-prefix "Parsing expect"
+  (pass-if-equal "expect okay"
+      '(expect-grammar (one "one") (two "two") (three "three"))
+      (and=> (match-pattern expect-grammar "onetwothree")
+             peg:tree))
+  (pass-if-equal "expect one"
+      '(syntax-error (0 "one"))
+    (catch 'syntax-error
+      (lambda _
+        (and=> (match-pattern expect-grammar "twothree")
+               peg:tree))
+      (lambda args args)))
+  (pass-if-equal "expect two"
+      '(syntax-error (3 "two"))
+    (catch 'syntax-error
+      (lambda _
+        (and=> (match-pattern expect-grammar "onethree")
+               peg:tree))
+      (lambda args args))))
-- 
2.46.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]