[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#54705: [PATCH v2] reader: Add Gemtext reader.
From: |
Arun Isaac |
Subject: |
bug#54705: [PATCH v2] reader: Add Gemtext reader. |
Date: |
Wed, 13 Apr 2022 01:12:32 +0530 |
* src/guile/skribilo/reader/gemtext.scm: New file.
* configure.ac: Set BUILD_GEMTEXT_READER automake conditional to true
if (srfi srfi-171) is found. Else, set it to false.
* src/guile/Makefile.am (readers): Add skribilo/reader/gemtext.scm
if BUILD_GEMTEXT_READER is true.
(EXTRA_DIST): Add skribilo/reader/gemtext.scm if BUILD_GEMTEXT_READER
is false.
* doc/user/syntax.skb (The Gemtext Syntax): New section.
* tests/readers/gemtext.test: New file.
* tests/Makefile.am (TESTS): Add readers/gemtext.test if
BUILD_GEMTEXT_READER is true.
(EXTRA_DIST): Add readers/gemtext.text if BUILD_GEMTEXT_READER is
false.
---
configure.ac | 9 +
doc/user/syntax.skb | 21 ++-
src/guile/Makefile.am | 10 ++
src/guile/skribilo/reader/gemtext.scm | 231 ++++++++++++++++++++++++++
tests/Makefile.am | 10 ++
tests/readers/gemtext.test | 133 +++++++++++++++
6 files changed, 413 insertions(+), 1 deletion(-)
create mode 100644 src/guile/skribilo/reader/gemtext.scm
create mode 100644 tests/readers/gemtext.test
diff --git a/configure.ac b/configure.ac
index 04c7eac..5ad964a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -66,6 +66,15 @@ fi
AM_CONDITIONAL([BUILD_RSS2_READER],
[test "x$have_sxml_simple$have_htmlprag" == "xyesyes"])
+# Check for SRFI-171, needed for the `gemtext' reader.
+GUILE_MODULE_AVAILABLE([have_srfi_171], [(srfi srfi-171)])
+if test "x$have_srfi_171" != "xyes"; then
+ AC_MSG_WARN([SRFI-171 needed by the `gemtext' reader is missing.])
+fi
+
+AM_CONDITIONAL([BUILD_GEMTEXT_READER],
+ [test "x$have_srfi_171" == "xyes"])
+
# Look for `convert', from ImageMagick.
AC_PATH_PROG([CONVERT], [convert])
if test "x$CONVERT" == "x"; then
diff --git a/doc/user/syntax.skb b/doc/user/syntax.skb
index 9a4070c..2de7cbd 100644
--- a/doc/user/syntax.skb
+++ b/doc/user/syntax.skb
@@ -211,7 +211,26 @@ documents that can be output in variety of formats (see
,(numref :text
[Chapter] :ident "engines")). The downside is that, being a very simple
markup-less document format, there are many things that cannot be done
using it, most notably tables, bibliographies, and cross-references.]))
-
+
+ (section :title [The Gemtext Syntax] :ident "gemtext-syntax"
+ (p [,(ref
+:url "https://gemini.circumlunar.space/docs/gemtext.gmi"
+:text "Gemtext"), the lightweight markup language used by the ,(ref
+:url "https://gemini.circumlunar.space" :text "Gemini protocol"), is
+supported as an input syntax. To use it, just pass ,(tt
+[--reader=gemtext]) to the compiler. When used programmatically, the
+Gemtext reader can be customized using the following options.])
+
+ (doc-markup 'make-gemtext-reader
+ '((:join-lines? [If ,(code "#t"), lines which are not
+separated by a blank line are joined into a single paragraph. This is
+a relaxation of the Gemtext standard, and is not done by default.])
+ (:section-numbers? [If ,(code "#t"), sections are
+numbered. Else, they are not.]))
+ :common-args '()
+ :source "skribilo/reader/gemtext.scm"
+ :idx *function-index*))
+
(section :title [The RSS 2.0 Syntax]
:ident "rss2-syntax"
diff --git a/src/guile/Makefile.am b/src/guile/Makefile.am
index 98f2873..09bb7da 100644
--- a/src/guile/Makefile.am
+++ b/src/guile/Makefile.am
@@ -1,5 +1,6 @@
# Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2012,
# 2015, 2018, 2020 Ludovic Courtès <ludo@gnu.org>
+# Copyright 2022 Arun Isaac <arunisaac@systemreboot.net>
#
# This file is part of Skribilo.
#
@@ -59,6 +60,15 @@ EXTRA_DIST += skribilo/reader/rss-2.scm
endif !BUILD_RSS2_READER
+if BUILD_GEMTEXT_READER
+
+readers += skribilo/reader/gemtext.scm
+
+else !BUILD_GEMTEXT_READER
+
+EXTRA_DIST += skribilo/reader/gemtext.scm
+
+endif !BUILD_GEMTEXT_READER
engines = \
skribilo/engine/base.scm skribilo/engine/context.scm \
diff --git a/src/guile/skribilo/reader/gemtext.scm
b/src/guile/skribilo/reader/gemtext.scm
new file mode 100644
index 0000000..7f5905c
--- /dev/null
+++ b/src/guile/skribilo/reader/gemtext.scm
@@ -0,0 +1,231 @@
+;;; gemtext.scm -- A reader for the Gemini protocol's Gemtext markup
+;;;
+;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;;
+;;; This file is part of Skribilo.
+;;;
+;;; Skribilo is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; Skribilo is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with Skribilo. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (skribilo reader gemtext)
+ #:use-module (rnrs io ports)
+ #:use-module (srfi srfi-26)
+ #:use-module (srfi srfi-71)
+ #:use-module (srfi srfi-171)
+ #:use-module (ice-9 match)
+ #:use-module ((ice-9 textual-ports) #:select (unget-char unget-string))
+ #:use-module (skribilo reader)
+ #:use-module (skribilo utils syntax)
+ #:export (reader-specification
+ make-gemtext-reader))
+
+(skribilo-module-syntax)
+
+;;; Author: Arun Isaac
+;;;
+;;; Commentary:
+;;;
+;;; A reader for gemtext, the lightweight markup language used by the
+;;; Gemini protocol
+;;;
+;;; Code:
+
+(define %join-lines?
+ (make-parameter #f))
+
+(define %section-numbers?
+ (make-parameter #f))
+
+(define (string-blank? str)
+ "Return #t if STR contains only whitespace characters. Else, return
+#f."
+ (string-every char-set:whitespace str))
+
+(define (string-remove-prefix prefix str)
+ "Return STR with PREFIX removed. If PREFIX is not a prefix of STR,
+return #f."
+ (and (string-prefix? prefix str)
+ (substring str (string-length prefix))))
+
+(define (string-partition str char-pred)
+ "Return the part of STR before and after the first occurrence of
+CHAR-PRED as two values."
+ (let ((partition-index (string-index str char-pred)))
+ (if partition-index
+ (values (substring str 0 partition-index)
+ (substring str partition-index))
+ (values str #f))))
+
+(define (unget-line port line)
+ "Place the string LINE in PORT so that subsequent read operations
+will read LINE followed by a newline character."
+ (unget-char port #\newline)
+ (unget-string port line))
+
+(define (read-preformatted-text in out)
+ "Read preformatted text from port IN and write it to port OUT."
+ (let ((line (get-line in)))
+ (unless (or (eof-object? line)
+ (string-prefix? "```" line))
+ (put-string out line)
+ (newline out)
+ (read-preformatted-text in out))))
+
+(define (heading-level line)
+ "Return the level of the heading in LINE. If LINE is not a heading,
+return #f."
+ (cond
+ ((string-prefix? "### " line) 3)
+ ((string-prefix? "## " line) 2)
+ ((string-prefix? "# " line) 1)
+ (else #f)))
+
+(define (read-section-children level port)
+ "Read section elements of LEVEL from PORT. Return as a list."
+ (let ((line (get-line port)))
+ (cond
+ ;; End of file
+ ((eof-object? line) (list))
+ ;; If another heading of same or higher level begins, unget line
+ ;; and end section.
+ ((let ((heading-level (heading-level line)))
+ (and heading-level
+ (<= heading-level level)))
+ (unget-line port line)
+ (list))
+ ;; If blank line, continue.
+ ((string-blank? line)
+ (read-section-children level port))
+ ;; Else, add element and continue.
+ (else
+ (unget-line port line)
+ (cons (read-gemtext-element port)
+ (read-section-children level port))))))
+
+(define (paragraph-line? line)
+ "Return #t if LINE is a paragraph line. Else, return #f."
+ (not (or (string-blank? line)
+ (heading-level line)
+ (string-prefix? "* " line)
+ (string-prefix? ">" line)
+ (string-prefix? "=>" line)
+ (string-prefix? "```" line))))
+
+(define (link-line->item line)
+ "Convert link LINE to a skribilo ref expression."
+ (let* ((trimmed-line (string-trim (string-remove-prefix "=>" line)))
+ (url text (string-partition trimmed-line (char-set #\space #\tab))))
+ (if text
+ `(item (ref #:url ,url #:text ,(string-trim text)))
+ `(item (ref #:url ,url)))))
+
+(define (retf-unget-line port result line)
+ "Unget LINE to PORT and return RESULT. This function is used as an
+argument to ttake-while."
+ (unget-line port line)
+ result)
+
+(define (read-gemtext-element port)
+ "Read next gemtext element from PORT."
+ (let ((line (get-line port)))
+ (cond
+ ;; End of file
+ ((eof-object? line) line)
+ ;; Section
+ ((heading-level line)
+ => (lambda (level)
+ `(,(case level
+ ((1) 'section)
+ ((2) 'subsection)
+ ((3) 'subsubsection))
+ #:title ,(substring line (1+ level))
+ #:number ,(%section-numbers?)
+ ,@(read-section-children level port))))
+ ;; List
+ ((string-remove-prefix "* " line)
+ => (lambda (first-item)
+ `(itemize
+ ,@(port-transduce (compose (ttake-while (cut string-prefix? "* "
<>)
+ (cut retf-unget-line port
<> <>))
+ (tmap (lambda (line)
+ `(item ,(string-remove-prefix
"* " line)))))
+ rcons
+ (list `(item ,first-item))
+ get-line
+ port))))
+ ;; Blockquote
+ ((string-remove-prefix ">" line)
+ => (lambda (first-line)
+ (list 'blockquote
+ (if (%join-lines?)
+ (string-join
+ (port-transduce (compose (ttake-while (cut
string-prefix? ">" <>)
+ (cut
retf-unget-line port <> <>))
+ (tmap (cut string-remove-prefix
">" <>)))
+ rcons
+ (list first-line)
+ get-line
+ port)
+ " ")
+ line))))
+ ;; Link
+ ((string-prefix? "=>" line)
+ (cons 'itemize
+ (port-transduce (compose (ttake-while (cut string-prefix? "=>" <>)
+ (cut retf-unget-line port <>
<>))
+ (tmap link-line->item))
+ rcons
+ (list (link-line->item line))
+ get-line
+ port)))
+ ;; Preformatted text
+ ((string-remove-prefix "```" line)
+ => (lambda (alt-text)
+ ;; We don't use the alt text.
+ `(pre ,(call-with-output-string
+ (cut read-preformatted-text port <>)))))
+ ;; Ignore blank lines.
+ ((string-blank? line) (read-gemtext-element port))
+ ;; Paragraph
+ (else
+ (list 'paragraph
+ (if (%join-lines?)
+ (string-join
+ (port-transduce (ttake-while paragraph-line?
+ (cut retf-unget-line port <> <>))
+ rcons
+ (list line)
+ get-line
+ port)
+ " ")
+ line))))))
+
+(define* (make-gemtext-reader :key join-lines? section-numbers?)
+ "Return a gemtext reader.
+
+If JOIN-LINES? is #t, lines which are not separated by a blank line
+are joined into a single paragraph.
+
+If SECTION-NUMBERS? is #t, sections are numbered. Else, they are not."
+ (lambda (port)
+ (parameterize ((%join-lines? join-lines?)
+ (%section-numbers? section-numbers?))
+ (match (port-transduce (tmap identity)
+ rcons
+ read-gemtext-element
+ port)
+ (() (eof-object))
+ (elements `(document ,@elements))))))
+
+(define-reader gemtext "0.1" make-gemtext-reader)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8ba7637..26b05ad 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -26,5 +26,15 @@ EXTRA_DIST = $(TESTS) readers/rss-2.test
endif !BUILD_RSS2_READER
+if BUILD_GEMTEXT_READER
+
+TESTS += readers/gemtext.test
+EXTRA_DIST = $(TESTS)
+
+else !BUILD_GEMTEXT_READER
+
+EXTRA_DIST = $(TESTS) readers/gemtext.test
+
+endif !BUILD_GEMTEXT_READER
CLEANFILES = ast.log resolve.log rss-2.log location.log info.log
diff --git a/tests/readers/gemtext.test b/tests/readers/gemtext.test
new file mode 100644
index 0000000..2340dc0
--- /dev/null
+++ b/tests/readers/gemtext.test
@@ -0,0 +1,133 @@
+;;; Exercise Gemtext reader. -*- Scheme -*-
+;;;
+;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;;
+;;; This file is part of Skribilo.
+;;;
+;;; Skribilo is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; Skribilo is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with Skribilo. If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (tests gemtext)
+ #:use-module (srfi srfi-64)
+ #:use-module (ice-9 match)
+ #:use-module (skribilo reader))
+
+(define make-gemtext-reader
+ (reader:make (lookup-reader 'gemtext)))
+
+(define-syntax-rule (match? exp pattern)
+ (match exp
+ (pattern #t)
+ (_ #f)))
+
+
+
+(test-begin "gemtext")
+
+(test-assert "basic gemtext document"
+ (match? (call-with-input-string "# Heading
+* Mercury
+* Gemini
+* Apollo
+## Subheading
+
+### Subsubheading
+
+> I contend that text-based websites should not exceed in size the major works
of Russian literature.
+
+# Links
+
+=>https://example.com A cool website
+=>gopher://example.com An even cooler gopherhole
+=> gemini://example.com A supremely cool Gemini capsule
+=> sftp://example.com
+
+```
+This is a preformatted block.
+```
+
+```alt
+This is a preformatted block with \"alt text\".
+```"
+ (make-gemtext-reader))
+ `(document
+ (section #:title "Heading" #:number #f
+ (itemize (item "Mercury")
+ (item "Gemini")
+ (item "Apollo"))
+ (subsection #:title "Subheading" #:number #f
+ (subsubsection #:title "Subsubheading"
#:number #f
+ (blockquote "> I contend that
text-based websites should not exceed in size the major works of Russian
literature."))))
+ (section #:title "Links" #:number #f
+ (itemize (item (ref #:url "https://example.com" #:text "A
cool website"))
+ (item (ref #:url "gopher://example.com" #:text
"An even cooler gopherhole"))
+ (item (ref #:url "gemini://example.com" #:text
"A supremely cool Gemini capsule"))
+ (item (ref #:url "sftp://example.com")))
+ (pre "This is a preformatted block.\n")
+ (pre "This is a preformatted block with \"alt
text\".\n")))))
+
+(test-assert "do not join short lines into paragraph"
+ (match? (call-with-input-string "Foo
+Bar"
+ (make-gemtext-reader))
+ `(document
+ (paragraph "Foo")
+ (paragraph "Bar"))))
+
+(test-assert "join short lines into paragraphs"
+ (match? (call-with-input-string "Foo
+Bar"
+ (make-gemtext-reader #:join-lines? #t))
+ `(document
+ (paragraph "Foo Bar"))))
+
+(test-assert "do not number sections"
+ (match? (call-with-input-string "# Foo
+## Bar"
+ (make-gemtext-reader))
+ `(document
+ (section #:title "Foo" #:number #f
+ (subsection #:title "Bar" #:number #f)))))
+
+(test-assert "number sections"
+ (match? (call-with-input-string "# Foo
+## Bar"
+ (make-gemtext-reader #:section-numbers? #t))
+ `(document
+ (section #:title "Foo" #:number #t
+ (subsection #:title "Bar" #:number #t)))))
+
+(test-assert "break up links separated by blank lines into separate lists"
+ (match? (call-with-input-string "=>https://example.com A cool website
+=>gopher://example.com An even cooler gopherhole
+
+=> gemini://example.com A supremely cool Gemini capsule
+=> sftp://example.com"
+ (make-gemtext-reader))
+ `(document
+ (itemize (item (ref #:url "https://example.com" #:text "A cool
website"))
+ (item (ref #:url "gopher://example.com" #:text "An even
cooler gopherhole")))
+ (itemize (item (ref #:url "gemini://example.com" #:text "A
supremely cool Gemini capsule"))
+ (item (ref #:url "sftp://example.com"))))))
+
+(test-assert "ignore blank lines that have a non-zero number of whitespace
characters"
+ (match? (call-with-input-string "Foo
+
+Bar"
+ (make-gemtext-reader))
+ `(document
+ (paragraph "Foo")
+ (paragraph "Bar"))))
+
+(test-end "gemtext")
--
2.35.1