bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Adding namespace support to xml.el


From: Mark A. Hershberger
Subject: Adding namespace support to xml.el
Date: Thu, 12 Jun 2003 11:07:50 -0500

I'm looking for feedback on the attached patch to add namespace support
to Emacs' xml.el.

The change is backward compatible.  An optional argument (parse-ns) is
added to xml-parse-file, xml-parse-region, and xml-parse-tag.  If the
argument is non-nil, then namespace processing is done.  If it is a
cons, then the cons is used to map namespace prefixes to URIs.

With this change to accommodate namespace processing, xml.el would
produce expanded-names (http://www.w3.org/TR/xpath#dt-expanded-name).

Consider the following piece of XML:

  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
           
xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/";
           xmlns:dc="http://purl.org/dc/elements/1.1/";>
    <rdf:Description
      rdf:about="http://www.intertwingly.net/blog/1170.rdf";
      trackback:ping="http://www.intertwingly.net/blog/1170.tb";
      dc:title="Columbia"
      dc:identifier="http://www.intertwingly.net/blog/1170.html"; />
  </rdf:RDF>

Using the current xml.el, this parses to something like:

((rdf:RDF
  ((xmlns:rdf . "http://www.w3.org/1999/02/22-rdf-syntax-ns#";)
   (xmlns:trackback . "http://madskills.com/public/xml/rss/module/trackback/";)
   (xmlns:dc . "http://purl.org/dc/elements/1.1/";))
  (rdf:Description
   ((rdf:about . "http://www.intertwingly.net/blog/1170.rdf";)
    (trackback:ping . "http://www.intertwingly.net/blog/1170.tb";)
    (dc:title . "Columbia")
    (dc:identifier . "http://www.intertwingly.net/blog/1170.html";)))))

This is adequate for basic XML, but when people use namespaces (and
change the prefix from the conventional usage (e.g. using "dublin"
instead of "dc"), using the parsed data becomes much more difficult.

My proposal is that the xml.el user would be able to specify namespace
processing.  In the above situation, this would result in something like:

(({http://www\.w3\.org/1999/02/22-rdf-syntax-ns\#}RDF
  (({http://www\.w3\.org/2000/xmlns/}rdf
    . "http://www.w3.org/1999/02/22-rdf-syntax-ns#";)
   ({http://www\.w3\.org/2000/xmlns/}trackback
    . "http://madskills.com/public/xml/rss/module/trackback/";)
   ({http://www\.w3\.org/2000/xmlns/}dc
    . "http://purl.org/dc/elements/1.1/";))
  ({http://www\.w3\.org/1999/02/22-rdf-syntax-ns\#}Description
   (({http://www\.w3\.org/1999/02/22-rdf-syntax-ns\#}about
     . "http://www.intertwingly.net/blog/1170.rdf";)
    ({http://madskills\.com/public/xml/rss/module/trackback/}ping
     . "http://www.intertwingly.net/blog/1170.tb";)
    ({http://purl\.org/dc/elements/1\.1/}title . "Columbia")
    ({http://purl\.org/dc/elements/1\.1/}identifier
     . "http://www.intertwingly.net/blog/1170.html";)))))

Thoughts?

(Please note that this patch needs some work, so it isn't ready to be
committed just yet.  This is for comment only!)

--- xml.el      30 May 2003 16:02:26 -0000      1.21
+++ xml.el      12 Jun 2003 15:43:31 -0000
@@ -121,11 +121,13 @@
 ;;*******************************************************************
 
 ;;;###autoload
-(defun xml-parse-file (file &optional parse-dtd)
+(defun xml-parse-file (file &optional parse-dtd parse-ns)
   "Parse the well-formed XML file FILE.
 If FILE is already visited, use its buffer and don't kill it.
 Returns the top node with all its children.
-If PARSE-DTD is non-nil, the DTD is parsed rather than skipped."
+If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
+If PARSE-NS is non-nil, then namespace information is carried through
+the attributes."
   (let ((keep))
     (if (get-file-buffer file)
        (progn
@@ -137,7 +139,7 @@
     (let ((xml (xml-parse-region (point-min)
                                 (point-max)
                                 (current-buffer)
-                                parse-dtd)))
+                                parse-dtd parse-ns)))
       (if keep
          (goto-char keep)
        (kill-buffer (current-buffer)))
@@ -184,13 +186,15 @@
 ;;   prolog    ::=    XMLDecl? Misc* (doctypedecl Misc*)?
 
 ;;;###autoload
-(defun xml-parse-region (beg end &optional buffer parse-dtd)
+(defun xml-parse-region (beg end &optional buffer parse-dtd parse-ns)
   "Parse the region from BEG to END in BUFFER.
 If BUFFER is nil, it defaults to the current buffer.
 Returns the XML list for the region, or raises an error if the region
-is not a well-formed XML file.
+Is not a well-formed XML file.
 If PARSE-DTD is non-nil, the DTD is parsed rather than skipped,
-and returned as the first element of the list."
+and returned as the first element of the list.
+If PARSE-NS is non-nil, then namespace information is carried through
+the attributes."
   (save-restriction
     (narrow-to-region beg end)
     ;; Use fixed syntax table to ensure regexp char classes and syntax
@@ -209,7 +213,7 @@
                  (if xml
                      ;;  translation of rule [1] of XML specifications
                      (error "XML files can have only one toplevel tag")
-                   (setq result (xml-parse-tag parse-dtd))
+                   (setq result (xml-parse-tag parse-dtd parse-ns))
                    (cond
                     ((null result))
                     ((listp (car result))
@@ -219,62 +223,111 @@
                     (t
                      (add-to-list 'xml result)))))
              (goto-char (point-max))))
+         (print xml (get-buffer-create "t"))
          (if parse-dtd
              (cons dtd (nreverse xml))
            (nreverse xml)))))))
 
 
-(defun xml-parse-tag (&optional parse-dtd)
+(defun xml-parse-tag (&optional parse-dtd parse-ns)
   "Parse the tag at point.
 If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
 returned as the first element in the list.
+If PARSE-NS is non-nil then namespace information is carried in the attributes.
 Returns one of:
  - a list : the matching node
  - nil    : the point is not looking at a tag.
  - a pair : the first element is the DTD, the second is the node."
-  (cond
-   ;; Processing instructions (like the <?xml version="1.0"?> tag at the
-   ;; beginning of a document).
-   ((looking-at "<\\?")
-    (search-forward "?>")
-    (skip-syntax-forward " ")
-    (xml-parse-tag parse-dtd))
-   ;;  Character data (CDATA) sections, in which no tag should be interpreted
-   ((looking-at "<!\\[CDATA\\[")
-    (let ((pos (match-end 0)))
-      (unless (search-forward "]]>" nil t)
-       (error "CDATA section does not end anywhere in the document"))
-      (buffer-substring pos (match-beginning 0))))
-   ;;  DTD for the document
-   ((looking-at "<!DOCTYPE")
-    (let (dtd)
-      (if parse-dtd
-         (setq dtd (xml-parse-dtd))
-       (xml-skip-dtd))
+  (let ((xml-ns (if (consp parse-ns)
+                   parse-ns
+                 (if parse-ns
+                     (list
+                      (cons "" "")
+                      (cons "xmlns" "http://www.w3.org/2000/xmlns/";))))))
+    (cond
+     ;; Processing instructions (like the <?xml version="1.0"?> tag at the
+     ;; beginning of a document).
+     ((looking-at "<\\?")
+      (search-forward "?>")
+      (skip-syntax-forward " ")
+      (xml-parse-tag parse-dtd xml-ns))
+     ;;  Character data (CDATA) sections, in which no tag should be interpreted
+     ((looking-at "<!\\[CDATA\\[")
+      (let ((pos (match-end 0)))
+       (unless (search-forward "]]>" nil t)
+         (error "CDATA section does not end anywhere in the document"))
+       (buffer-substring pos (match-beginning 0))))
+     ;;  DTD for the document
+     ((looking-at "<!DOCTYPE")
+      (let (dtd)
+       (if parse-dtd
+           (setq dtd (xml-parse-dtd))
+         (xml-skip-dtd))
       (skip-syntax-forward " ")
       (if dtd
-         (cons dtd (xml-parse-tag))
-       (xml-parse-tag))))
-   ;;  skip comments
-   ((looking-at "<!--")
-    (search-forward "-->")
-    nil)
-   ;;  end tag
-   ((looking-at "</")
-    '())
-   ;;  opening tag
-   ((looking-at "<\\([^/>[:space:]]+\\)")
-    (goto-char (match-end 1))
-    (let* ((node-name (match-string 1))
-          ;; Parse the attribute list.
-          (children (list (xml-parse-attlist) (intern node-name)))
-          pos)
-
-      ;; is this an empty element ?
-      (if (looking-at "/>")
-         (progn
-           (forward-char 2)
-           (nreverse children))
+         (cons dtd (xml-parse-tag nil xml-ns))
+       (xml-parse-tag nil xml-ns))))
+     ;;  skip comments
+     ((looking-at "<!--")
+      (search-forward "-->")
+      nil)
+     ;;  end tag
+     ((looking-at "</")
+      "")
+     ;;  opening tag
+     ((looking-at "<\\([^/>[:space:]]+\\)")
+      (goto-char (match-end 1))
+      (let* ((node-name (match-string 1))
+            ;; Parse the attribute list.
+            (children (list (xml-parse-attlist) (intern node-name)))
+            pos)
+
+       ;; add the xmlns:* attrs to our cache
+       (when (consp xml-ns)
+         (mapcar
+          (lambda (attr)
+            (let* ((splitup (split-string (symbol-name (car attr)) ":"))
+                   (prefix (nth 0 splitup))
+                   (lname (nth 1 splitup)))
+              (when (string= "xmlns" prefix)
+                (setq xml-ns (append (list (cons (if lname 
+                                                     lname
+                                                   "")
+                                                 (cdr attr)))
+                                     xml-ns)))))
+          (car children))
+         (let* ((splitup (split-string (symbol-name (cadr children)) ":"))
+                (lname (or (nth 1 splitup) 
+                           (nth 0 splitup)))
+                (prefix (if (nth 1 splitup)
+                            (nth 0 splitup)
+                          "")))
+           (setcdr children (list 
+                             (intern (concat "{" 
+                                            (cdr (assoc-string prefix xml-ns))
+                                            "}" lname)))))
+
+         (mapcar
+          (lambda (attr)
+            (let* ((splitup (split-string (symbol-name (car attr)) ":"))
+                   (lname (or (nth 1 splitup)
+                              (nth 0 splitup)))
+                   (prefix (if (nth 1 splitup)
+                               (nth 0 splitup)
+                             (caar xml-ns))))
+
+              (print (cdar xml-ns) (get-buffer-create "t"))
+              
+              (setcar attr (intern (concat "{"
+                                           (cdr (assoc-string prefix xml-ns))
+                                           "}" lname)))))
+          (car children)))
+
+       ;; is this an empty element ?
+       (if (looking-at "/>")
+       (progn
+         (forward-char 2)
+         (nreverse children))
 
        ;; is this a valid start tag ?
        (if (eq (char-after) ?>)
@@ -289,7 +342,7 @@
                    (error "XML: Invalid end tag (expecting %s) at pos %d"
                           node-name (point)))
                   ((= (char-after) ?<)
-                   (let ((tag (xml-parse-tag)))
+                   (let ((tag (xml-parse-tag nil xml-ns)))
                      (when tag
                        (push tag children))))
                   (t
@@ -320,8 +373,8 @@
              (nreverse children))
          ;;  This was an invalid start tag
          (error "XML: Invalid attribute list")))))
-   (t ;; This is not a tag.
-    (error "XML: Invalid character"))))
+     (t        ;; This is not a tag.
+      (error "XML: Invalid character")))))
 
 (defun xml-parse-attlist ()
   "Return the attribute-list after point.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]