emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to lisp/nxml/nxml-parse.el


From: Mark A. Hershberger
Subject: [Emacs-diffs] Changes to lisp/nxml/nxml-parse.el
Date: Fri, 23 Nov 2007 06:58:18 +0000

CVSROOT:        /sources/emacs
Module name:    emacs
Changes by:     Mark A. Hershberger <hexmode>   07/11/23 06:58:00

Index: lisp/nxml/nxml-parse.el
===================================================================
RCS file: lisp/nxml/nxml-parse.el
diff -N lisp/nxml/nxml-parse.el
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lisp/nxml/nxml-parse.el     23 Nov 2007 06:57:50 -0000      1.1
@@ -0,0 +1,320 @@
+;;; nxml-parse.el --- XML parser, sharing infrastructure with nxml-mode
+
+;; Copyright (C) 2003 Free Software Foundation, Inc.
+
+;; Author: James Clark
+;; Keywords: XML
+
+;; This program is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 2 of
+;; the License, or (at your option) any later version.
+
+;; This program is distributed in the hope that it will be
+;; useful, but WITHOUT ANY WARRANTY; without even the implied
+;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+;; PURPOSE.  See the GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public
+;; License along with this program; if not, write to the Free
+;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+;; MA 02111-1307 USA
+
+;;; Commentary:
+
+;; Entry point is `nxml-parse-file'.
+
+;;; Code:
+
+(require 'nxml-util)
+(require 'xmltok)
+(require 'nxml-enc)
+(require 'nxml-ns)
+
+(defvar nxml-parse-file-name nil)
+
+(defvar nxml-validate-function nil
+  "Nil or a function to be called by `nxml-parse-file' to perform validation.
+The function will be called once for each start-tag or end-tag.  The
+function is passed two arguments TEXT and START-TAG.  For a start-tag,
+START-TAG is a list (NAME ATTRIBUTES) where NAME and ATTRIBUTES are in
+the same form as returned by `nxml-parse-file.  For an end-tag,
+START-TAG is nil.  TEXT is a string containing the text immediately
+preceding the tag, or nil if there was no such text.  An empty element
+is treated as a start-tag followed by an end-tag.
+
+For a start-tag, the namespace state will be the state after
+processing the namespace declarations in the start-tag. For an
+end-tag, the namespace state will be the state before popping the
+namespace declarations for the corresponding start-tag.
+
+The function must return nil if no error is detected or a
+cons (MESSAGE . LOCATION) where MESSAGE is a string containing
+an error message and LOCATION indicates what caused the error
+as follows:
+
+- nil indicates the tag as whole caused it; this is always allowed;
+
+- text indicates the text caused it; this is allowed only if
+TEXT is non-nil;
+
+- tag-close indicates the close of the tag caused it; this is
+allowed only if START-TAG is non-nil;
+
+- (attribute-name . N) indicates that the name of the Nth attribute
+caused it; N counts from 0; this is allowed only if START-TAG is non-nil
+and N must be less than the number of attributes;
+
+- (attribute-value . N) indicates that the value of the Nth attribute
+caused it; N counts from 0; this is allowed only if START-TAG is non-nil
+and N must be less than the number of attributes.")
+
+(defun nxml-parse-file (file)
+  "Parse the XML document in FILE and return it as a list.
+An XML element is represented as a list (NAME ATTRIBUTES . CHILDREN).
+NAME is either a string, in the case where the name does not have a
+namespace, or a cons (NAMESPACE . LOCAL-NAME), where NAMESPACE is a
+symbol and LOCAL-NAME is a string, in the case where the name does
+have a namespace. NAMESPACE is a keyword whose name is `:URI', where
+URI is the namespace name.  ATTRIBUTES is an alist of attributes where
+each attribute has the form (NAME . VALUE), where NAME has the same
+form as an element name, and VALUE is a string.  A namespace
+declaration is represented as an attribute whose name is
+\(:http://www.w3.org/2000/xmlns/ . LOCAL-NAME).  CHILDREN is a list
+containing strings and child elements; CHILDREN never contains two
+consecutive strings and never contains an empty string.  Processing
+instructions and comments are not represented.  The return value is a
+list representing the document element.
+
+If the XML document is not well-formed, an error having the condition
+`nxml-file-parse-error' will be signaled; the error data will be a
+list of the \(FILE POSITION MESSAGE), where POSITION is an integer
+specifying the position where the error was detected, and MESSAGE is a
+string describing the error.
+
+The current contents of FILE will be parsed even if there is a
+modified buffer currently visiting FILE.
+
+If the variable `nxml-validation-function' is non-nil, it will be
+called twice for each element, and any reported error will be signaled
+in the same way as well-formedness error."
+  (save-excursion
+    (set-buffer (nxml-parse-find-file file))
+    (unwind-protect
+       (let ((nxml-parse-file-name file))
+         (nxml-parse-instance))
+      (kill-buffer nil))))
+
+(defun nxml-parse-find-file (file)
+  (save-excursion
+    (set-buffer (get-buffer-create " *nXML Parse*"))
+    (erase-buffer)
+    (let ((set-auto-coding-function 'nxml-set-xml-coding))
+      (insert-file-contents file))
+    (current-buffer)))
+      
+(defun nxml-parse-instance ()
+  (let (xmltok-dtd)
+    (xmltok-save
+      (xmltok-forward-prolog)
+      (nxml-check-xmltok-errors)
+      (nxml-ns-save
+       (nxml-parse-instance-1)))))
+
+(defun nxml-parse-instance-1 ()
+  (let* ((top (cons nil nil))
+        ;; tail is a cons cell, whose cdr is nil
+        ;; additional elements will destructively appended to tail
+        (tail top)
+        ;; stack of tails one for each open element
+        tail-stack
+        ;; list of QNames of open elements
+        open-element-tags
+        ;; list of strings buffering a text node, in reverse order
+        text
+        ;; position of beginning of first (in buffer) string in text
+        text-pos)
+    (while (xmltok-forward)
+      (nxml-check-xmltok-errors)
+      (cond ((memq xmltok-type '(start-tag end-tag empty-element))
+            (when text
+              (setq text (apply 'concat (nreverse text)))
+              (setcdr tail (cons text nil))
+              (setq tail (cdr tail)))
+            (when (not (eq xmltok-type 'end-tag))
+              (when (and (not open-element-tags)
+                         (not (eq tail top)))
+                (nxml-parse-error nil "Multiple top-level elements"))
+              (setq open-element-tags
+                    (cons (xmltok-start-tag-qname)
+                          open-element-tags))
+              (nxml-ns-push-state)
+              (let ((tag (nxml-parse-start-tag)))
+                (nxml-validate-tag text text-pos tag)
+                (setq text nil)
+                (setcdr tail (cons tag nil))
+                (setq tail (cdr tail))
+                (setq tail-stack (cons tail tail-stack))
+                (setq tail (last tag))))
+            (when (not (eq xmltok-type 'start-tag))
+              (or (eq xmltok-type 'empty-element)
+                  (equal (car open-element-tags)
+                         (xmltok-end-tag-qname))
+                  (if open-element-tags
+                      (nxml-parse-error nil
+                                        "Unbalanced end-tag; expected </%s>"
+                                        (car open-element-tags))
+                    (nxml-parse-error nil "Extra end-tag")))
+              (nxml-validate-tag text text-pos nil)
+              (setq text nil)
+              (nxml-ns-pop-state)
+              (setq open-element-tags (cdr open-element-tags))
+              (setq tail (car tail-stack))
+              (setq tail-stack (cdr tail-stack)))
+            (setq text-pos nil))
+           ((memq xmltok-type '(space data entity-ref char-ref cdata-section))
+            (cond (open-element-tags
+                   (unless text-pos
+                     (setq text-pos xmltok-start))
+                   (setq text
+                         (cons (nxml-current-text-string) text)))
+                  ((not (eq xmltok-type 'space))
+                   (nxml-parse-error
+                    nil
+                    "%s at top-level"
+                    (cdr (assq xmltok-type
+                               '((data . "Text characters")
+                                 (entity-ref . "Entity reference")
+                                 (char-ref . "Character reference")
+                                 (cdata-section . "CDATA section"))))))))))
+    (unless (cdr top)
+      (nxml-parse-error (point-max) "Missing document element"))
+    (cadr top)))
+
+(defun nxml-parse-start-tag ()
+  (let (parsed-attributes
+       parsed-namespace-attributes
+       atts att prefixes prefix ns value name)
+    (setq atts xmltok-namespace-attributes)
+    (while atts
+      (setq att (car atts))
+      (setq value (or (xmltok-attribute-value att)
+                     (nxml-parse-error nil "Invalid attribute value")))
+      (setq ns (nxml-make-namespace value))
+      (setq prefix (and (xmltok-attribute-prefix att)
+                       (xmltok-attribute-local-name att)))
+      (cond ((member prefix prefixes)
+            (nxml-parse-error nil "Duplicate namespace declaration"))
+           ((not prefix)
+            (nxml-ns-set-default ns))
+           (ns
+            (nxml-ns-set-prefix prefix ns))
+           (t (nxml-parse-error nil "Cannot undeclare namespace prefix")))
+      (setq prefixes (cons prefix prefixes))
+      (setq parsed-namespace-attributes
+           (cons (cons (nxml-make-name nxml-xmlns-namespace-uri
+                                       (xmltok-attribute-local-name att))
+                       value)
+                 parsed-namespace-attributes))
+      (setq atts (cdr atts)))
+    (setq name
+         (nxml-make-name
+          (let ((prefix (xmltok-start-tag-prefix)))
+            (if prefix
+                (or (nxml-ns-get-prefix prefix)
+                    (nxml-parse-error (1+ xmltok-start)
+                                      "Prefix `%s' undeclared"
+                                      prefix))
+              (nxml-ns-get-default)))
+          (xmltok-start-tag-local-name)))
+    (setq atts xmltok-attributes)
+    (while atts
+      (setq att (car atts))
+      (setq ns
+           (let ((prefix (xmltok-attribute-prefix att)))
+             (and prefix
+                  (or (nxml-ns-get-prefix prefix)
+                      (nxml-parse-error (xmltok-attribute-name-start att)
+                                        "Prefix `%s' undeclared"
+                                        prefix)))))
+      (setq parsed-attributes
+           (let ((nm (nxml-make-name ns
+                                     (xmltok-attribute-local-name att))))
+             (when (assoc nm parsed-attributes)
+               (nxml-parse-error (xmltok-attribute-name-start att)
+                                 "Duplicate attribute"))
+             (cons (cons nm (or (xmltok-attribute-value att)
+                                (nxml-parse-error nil "Invalid attribute 
value")))
+                   parsed-attributes)))
+      (setq atts (cdr atts)))
+    ;; We want to end up with the attributes followed by the
+    ;; the namespace attributes in the same order as
+    ;; xmltok-attributes and xmltok-namespace-attributes respectively.
+    (when parsed-namespace-attributes
+      (setq parsed-attributes
+           (nconc parsed-namespace-attributes parsed-attributes)))
+    (list name (nreverse parsed-attributes))))
+
+(defun nxml-validate-tag (text text-pos tag)
+  (when nxml-validate-function
+    (let ((err (funcall nxml-validate-function text tag))
+         pos)
+      (when err
+       (setq pos (nxml-validate-error-position (cdr err)
+                                               (and text text-pos)
+                                               tag))
+       (or pos (error "Incorrect return value from %s"
+                      nxml-validate-function))
+       (nxml-parse-error pos (car err))))))
+
+(defun nxml-validate-error-position (location text-pos tag)
+  (cond ((null location) xmltok-start)
+       ((eq location 'text)  text-pos)
+       ((eq location 'tag-close)
+        (and tag (- (point) (if (eq xmltok-type 'empty-element ) 2 1))))
+       ((consp location)
+        (let ((att (nth (cdr location) xmltok-attributes)))
+          (when (not att)
+            (setq att (nth (- (cdr location) (length xmltok-attributes))
+                           xmltok-namespace-attributes)))
+          (cond ((not att))
+                ((eq (car location) 'attribute-name)
+                 (xmltok-attribute-name-start att))
+                ((eq (car location) 'attribute-value)
+                 (xmltok-attribute-value-start att)))))))
+
+(defun nxml-make-name (ns local-name)
+  (if ns
+      (cons ns local-name)
+    local-name))
+
+(defun nxml-current-text-string ()
+  (cond ((memq xmltok-type '(space data))
+        (buffer-substring-no-properties xmltok-start
+                                        (point)))
+       ((eq xmltok-type 'cdata-section)
+        (buffer-substring-no-properties (+ xmltok-start 9)
+                                        (- (point) 3)))
+       ((memq xmltok-type '(char-ref entity-ref))
+        (unless xmltok-replacement
+          (nxml-parse-error nil
+                            (if (eq xmltok-type 'char-ref)
+                                "Reference to unsupported Unicode character"
+                              "Unresolvable entity reference")))
+        xmltok-replacement)))
+
+(defun nxml-parse-error (position &rest args)
+  (nxml-signal-file-parse-error nxml-parse-file-name
+                               (or position xmltok-start)
+                               (apply 'format args)))
+
+(defun nxml-check-xmltok-errors ()
+  (when xmltok-errors
+    (let ((err (car (last xmltok-errors))))
+      (nxml-signal-file-parse-error nxml-parse-file-name
+                                   (xmltok-error-start err)
+                                   (xmltok-error-message err)))))
+
+(provide 'nxml-parse)
+
+;;; nxml-parse.el ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]