Line data Source code
1 : ;;; xml.el --- XML parser -*- lexical-binding: t -*-
2 :
3 : ;; Copyright (C) 2000-2017 Free Software Foundation, Inc.
4 :
5 : ;; Author: Emmanuel Briot <briot@gnat.com>
6 : ;; Maintainer: Mark A. Hershberger <mah@everybody.org>
7 : ;; Keywords: xml, data
8 :
9 : ;; This file is part of GNU Emacs.
10 :
11 : ;; GNU Emacs is free software: you can redistribute it and/or modify
12 : ;; it under the terms of the GNU General Public License as published by
13 : ;; the Free Software Foundation, either version 3 of the License, or
14 : ;; (at your option) any later version.
15 :
16 : ;; GNU Emacs is distributed in the hope that it will be useful,
17 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 : ;; GNU General Public License for more details.
20 :
21 : ;; You should have received a copy of the GNU General Public License
22 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 :
24 : ;;; Commentary:
25 :
26 : ;; This file contains a somewhat incomplete non-validating XML parser. It
27 : ;; parses a file, and returns a list that can be used internally by
28 : ;; any other Lisp libraries.
29 :
30 : ;;; FILE FORMAT
31 :
32 : ;; The document type declaration may either be ignored or (optionally)
33 : ;; parsed, but currently the parsing will only accept element
34 : ;; declarations. The XML file is assumed to be well-formed. In case
35 : ;; of error, the parsing stops and the XML file is shown where the
36 : ;; parsing stopped.
37 : ;;
38 : ;; It also knows how to ignore comments and processing instructions.
39 : ;;
40 : ;; The XML file should have the following format:
41 : ;; <node1 attr1="name1" attr2="name2" ...>value
42 : ;; <node2 attr3="name3" attr4="name4">value2</node2>
43 : ;; <node3 attr5="name5" attr6="name6">value3</node3>
44 : ;; </node1>
45 : ;; Of course, the name of the nodes and attributes can be anything. There can
46 : ;; be any number of attributes (or none), as well as any number of children
47 : ;; below the nodes.
48 : ;;
49 : ;; There can be only top level node, but with any number of children below.
50 :
51 : ;;; LIST FORMAT
52 :
53 : ;; The functions `xml-parse-file', `xml-parse-region' and
54 : ;; `xml-parse-tag' return a list with the following format:
55 : ;;
56 : ;; xml-list ::= (node node ...)
57 : ;; node ::= (qname attribute-list . child_node_list)
58 : ;; child_node_list ::= child_node child_node ...
59 : ;; child_node ::= node | string
60 : ;; qname ::= (:namespace-uri . "name") | "name"
61 : ;; attribute_list ::= ((qname . "value") (qname . "value") ...)
62 : ;; | nil
63 : ;; string ::= "..."
64 : ;;
65 : ;; Some macros are provided to ease the parsing of this list.
66 : ;; Whitespace is preserved. Fixme: There should be a tree-walker that
67 : ;; can remove it.
68 :
69 : ;; TODO:
70 : ;; * xml:base, xml:space support
71 : ;; * more complete DOCTYPE parsing
72 : ;; * pi support
73 :
74 : ;;; Code:
75 :
76 : ;; Note that buffer-substring and match-string were formerly used in
77 : ;; several places, because the -no-properties variants remove
78 : ;; composition info. However, after some discussion on emacs-devel,
79 : ;; the consensus was that the speed of the -no-properties variants was
80 : ;; a worthwhile tradeoff especially since we're usually parsing files
81 : ;; instead of hand-crafted XML.
82 :
83 : ;;; Macros to parse the list
84 :
85 : (defconst xml-undefined-entity "?"
86 : "What to substitute for undefined entities")
87 :
88 : (defconst xml-default-ns '(("" . "")
89 : ("xml" . "http://www.w3.org/XML/1998/namespace")
90 : ("xmlns" . "http://www.w3.org/2000/xmlns/"))
91 : "Alist mapping default XML namespaces to their URIs.")
92 :
93 : (defvar xml-entity-alist
94 : '(("lt" . "<")
95 : ("gt" . ">")
96 : ("apos" . "'")
97 : ("quot" . "\"")
98 : ("amp" . "&"))
99 : "Alist mapping XML entities to their replacement text.")
100 :
101 : (defvar xml-entity-expansion-limit 20000
102 : "The maximum size of entity reference expansions.
103 : If the size of the buffer increases by this many characters while
104 : expanding entity references in a segment of character data, the
105 : XML parser signals an error. Setting this to nil removes the
106 : limit (making the parser vulnerable to XML bombs).")
107 :
108 : (defvar xml-parameter-entity-alist nil
109 : "Alist of defined XML parametric entities.")
110 :
111 : (defvar xml-sub-parser nil
112 : "Non-nil when the XML parser is parsing an XML fragment.")
113 :
114 : (defvar xml-validating-parser nil
115 : "Set to non-nil to get validity checking.")
116 :
117 : (defsubst xml-node-name (node)
118 : "Return the tag associated with NODE.
119 : Without namespace-aware parsing, the tag is a symbol.
120 :
121 : With namespace-aware parsing, the tag is a cons of a string
122 : representing the uri of the namespace with the local name of the
123 : tag. For example,
124 :
125 : <foo>
126 :
127 : would be represented by
128 :
129 : (\"\" . \"foo\").
130 :
131 : If you'd just like a plain symbol instead, use `symbol-qnames' in
132 : the PARSE-NS argument."
133 :
134 0 : (car node))
135 :
136 : (defsubst xml-node-attributes (node)
137 : "Return the list of attributes of NODE.
138 : The list can be nil."
139 0 : (nth 1 node))
140 :
141 : (defsubst xml-node-children (node)
142 : "Return the list of children of NODE.
143 : This is a list of nodes, and it can be nil."
144 0 : (cddr node))
145 :
146 : (defun xml-get-children (node child-name)
147 : "Return the children of NODE whose tag is CHILD-NAME.
148 : CHILD-NAME should match the value returned by `xml-node-name'."
149 0 : (let ((match ()))
150 0 : (dolist (child (xml-node-children node))
151 0 : (if (and (listp child)
152 0 : (equal (xml-node-name child) child-name))
153 0 : (push child match)))
154 0 : (nreverse match)))
155 :
156 : (defun xml-get-attribute-or-nil (node attribute)
157 : "Get from NODE the value of ATTRIBUTE.
158 : Return nil if the attribute was not found.
159 :
160 : See also `xml-get-attribute'."
161 0 : (cdr (assoc attribute (xml-node-attributes node))))
162 :
163 : (defsubst xml-get-attribute (node attribute)
164 : "Get from NODE the value of ATTRIBUTE.
165 : An empty string is returned if the attribute was not found.
166 :
167 : See also `xml-get-attribute-or-nil'."
168 0 : (or (xml-get-attribute-or-nil node attribute) ""))
169 :
170 : ;;; Regular expressions for XML components
171 :
172 : ;; The following regexps are used as subexpressions in regexps that
173 : ;; are `eval-when-compile'd for efficiency, so they must be defined at
174 : ;; compile time.
175 : (eval-and-compile
176 :
177 : ;; [4] NameStartChar
178 : ;; See the definition of word syntax in `xml-syntax-table'.
179 : (defconst xml-name-start-char-re (concat "[[:word:]:_]"))
180 :
181 : ;; [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7
182 : ;; | [#x0300-#x036F] | [#x203F-#x2040]
183 : (defconst xml-name-char-re (concat "[-0-9.[:word:]:_·̀-ͯ‿-⁀]"))
184 :
185 : ;; [5] Name ::= NameStartChar (NameChar)*
186 : (defconst xml-name-re (concat xml-name-start-char-re xml-name-char-re "*"))
187 :
188 : ;; [6] Names ::= Name (#x20 Name)*
189 : (defconst xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*"))
190 :
191 : ;; [7] Nmtoken ::= (NameChar)+
192 : (defconst xml-nmtoken-re (concat xml-name-char-re "+"))
193 :
194 : ;; [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
195 : (defconst xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*"))
196 :
197 : ;; [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
198 : (defconst xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)")
199 :
200 : ;; [68] EntityRef ::= '&' Name ';'
201 : (defconst xml-entity-ref (concat "&" xml-name-re ";"))
202 :
203 : (defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([0-9a-fA-F]+\\)\\|\\("
204 : xml-name-re "\\)\\);"))
205 :
206 : ;; [69] PEReference ::= '%' Name ';'
207 : (defconst xml-pe-reference-re (concat "%\\(" xml-name-re "\\);"))
208 :
209 : ;; [67] Reference ::= EntityRef | CharRef
210 : (defconst xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)"))
211 :
212 : ;; [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
213 : ;; | "'" ([^<&'] | Reference)* "'"
214 : (defconst xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|"
215 : xml-reference-re "\\)*\"\\|"
216 : "'\\(?:[^&']\\|" xml-reference-re
217 : "\\)*'\\)"))
218 :
219 : ;; [56] TokenizedType ::= 'ID'
220 : ;; [VC: ID] [VC: One ID / Element Type] [VC: ID Attribute Default]
221 : ;; | 'IDREF' [VC: IDREF]
222 : ;; | 'IDREFS' [VC: IDREF]
223 : ;; | 'ENTITY' [VC: Entity Name]
224 : ;; | 'ENTITIES' [VC: Entity Name]
225 : ;; | 'NMTOKEN' [VC: Name Token]
226 : ;; | 'NMTOKENS' [VC: Name Token]
227 : (defconst xml-tokenized-type-re (concat "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|"
228 : "ENTITIES\\|NMTOKEN\\|NMTOKENS\\)"))
229 :
230 : ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
231 : (defconst xml-notation-type-re
232 : (concat "\\(?:NOTATION\\s-+(\\s-*" xml-name-re
233 : "\\(?:\\s-*|\\s-*" xml-name-re "\\)*\\s-*)\\)"))
234 :
235 : ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
236 : ;; [VC: Enumeration] [VC: No Duplicate Tokens]
237 : (defconst xml-enumeration-re (concat "\\(?:(\\s-*" xml-nmtoken-re
238 : "\\(?:\\s-*|\\s-*" xml-nmtoken-re
239 : "\\)*\\s-+)\\)"))
240 :
241 : ;; [57] EnumeratedType ::= NotationType | Enumeration
242 : (defconst xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re
243 : "\\|" xml-enumeration-re "\\)"))
244 :
245 : ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType
246 : ;; [55] StringType ::= 'CDATA'
247 : (defconst xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re
248 : "\\|" xml-notation-type-re
249 : "\\|" xml-enumerated-type-re "\\)"))
250 :
251 : ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
252 : (defconst xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|"
253 : "\\(?:#FIXED\\s-+\\)*"
254 : xml-att-value-re "\\)"))
255 :
256 : ;; [53] AttDef ::= S Name S AttType S DefaultDecl
257 : (defconst xml-att-def-re (concat "\\(?:\\s-*" xml-name-re
258 : "\\s-*" xml-att-type-re
259 : "\\s-*" xml-default-decl-re "\\)"))
260 :
261 : ;; [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
262 : ;; | "'" ([^%&'] | PEReference | Reference)* "'"
263 : (defconst xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|"
264 : xml-pe-reference-re
265 : "\\|" xml-reference-re
266 : "\\)*\"\\|'\\(?:[^%&']\\|"
267 : xml-pe-reference-re "\\|"
268 : xml-reference-re "\\)*'\\)"))
269 : ) ; End of `eval-when-compile'
270 :
271 :
272 : ;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
273 : ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
274 : ;; [76] NDataDecl ::= S 'NDATA' S
275 : ;; [73] EntityDef ::= EntityValue| (ExternalID NDataDecl?)
276 : ;; [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
277 : ;; [74] PEDef ::= EntityValue | ExternalID
278 : ;; [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
279 : ;; [70] EntityDecl ::= GEDecl | PEDecl
280 :
281 : ;; Note that this is setup so that we can do whitespace-skipping with
282 : ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow
283 : ;; compared with `re-search-forward', but that has been fixed.
284 :
285 : (defvar xml-syntax-table
286 : ;; By default, characters have symbol syntax.
287 : (let ((table (make-char-table 'syntax-table '(3))))
288 : ;; The XML space chars [3], and nothing else, have space syntax.
289 : (dolist (c '(?\s ?\t ?\r ?\n))
290 : (modify-syntax-entry c " " table))
291 : ;; The characters in NameStartChar [4], aside from ':' and '_',
292 : ;; have word syntax. This is used by `xml-name-start-char-re'.
293 : (modify-syntax-entry '(?A . ?Z) "w" table)
294 : (modify-syntax-entry '(?a . ?z) "w" table)
295 : (modify-syntax-entry '(#xC0 . #xD6) "w" table)
296 : (modify-syntax-entry '(#xD8 . #XF6) "w" table)
297 : (modify-syntax-entry '(#xF8 . #X2FF) "w" table)
298 : (modify-syntax-entry '(#x370 . #X37D) "w" table)
299 : (modify-syntax-entry '(#x37F . #x1FFF) "w" table)
300 : (modify-syntax-entry '(#x200C . #x200D) "w" table)
301 : (modify-syntax-entry '(#x2070 . #x218F) "w" table)
302 : (modify-syntax-entry '(#x2C00 . #x2FEF) "w" table)
303 : (modify-syntax-entry '(#x3001 . #xD7FF) "w" table)
304 : (modify-syntax-entry '(#xF900 . #xFDCF) "w" table)
305 : (modify-syntax-entry '(#xFDF0 . #xFFFD) "w" table)
306 : (modify-syntax-entry '(#x10000 . #xEFFFF) "w" table)
307 : table)
308 : "Syntax table used by the XML parser.
309 : In this syntax table, the XML space characters [ \\t\\r\\n], and
310 : only those characters, have whitespace syntax.")
311 :
312 : ;;; Entry points:
313 :
314 : ;;;###autoload
315 : (defun xml-parse-file (file &optional parse-dtd parse-ns)
316 : "Parse the well-formed XML file FILE.
317 : Return the top node with all its children.
318 : If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
319 :
320 : If PARSE-NS is non-nil, then QNAMES are expanded. By default,
321 : the variable `xml-default-ns' is the mapping from namespaces to
322 : URIs, and expanded names will be returned as a cons
323 :
324 : (\"namespace:\" . \"foo\").
325 :
326 : If PARSE-NS is an alist, it will be used as the mapping from
327 : namespace to URIs instead.
328 :
329 : If it is the symbol `symbol-qnames', expanded names will be
330 : returned as a plain symbol `namespace:foo' instead of a cons.
331 :
332 : Both features can be combined by providing a cons cell
333 :
334 : (symbol-qnames . ALIST)."
335 0 : (with-temp-buffer
336 0 : (insert-file-contents file)
337 0 : (xml--parse-buffer parse-dtd parse-ns)))
338 :
339 : ;;;###autoload
340 : (defun xml-parse-region (&optional beg end buffer parse-dtd parse-ns)
341 : "Parse the region from BEG to END in BUFFER.
342 : Return the XML parse tree, or raise an error if the region does
343 : not contain well-formed XML.
344 :
345 : If BEG is nil, it defaults to `point-min'.
346 : If END is nil, it defaults to `point-max'.
347 : If BUFFER is nil, it defaults to the current buffer.
348 : If PARSE-DTD is non-nil, parse the DTD and return it as the first
349 : element of the list.
350 : If PARSE-NS is non-nil, then QNAMES are expanded. By default,
351 : the variable `xml-default-ns' is the mapping from namespaces to
352 : URIs, and expanded names will be returned as a cons
353 :
354 : (\"namespace:\" . \"foo\").
355 :
356 : If PARSE-NS is an alist, it will be used as the mapping from
357 : namespace to URIs instead.
358 :
359 : If it is the symbol `symbol-qnames', expanded names will be
360 : returned as a plain symbol `namespace:foo' instead of a cons.
361 :
362 : Both features can be combined by providing a cons cell
363 :
364 : (symbol-qnames . ALIST)."
365 : ;; Use fixed syntax table to ensure regexp char classes and syntax
366 : ;; specs DTRT.
367 0 : (unless buffer
368 0 : (setq buffer (current-buffer)))
369 0 : (with-temp-buffer
370 0 : (insert-buffer-substring-no-properties buffer beg end)
371 0 : (xml--parse-buffer parse-dtd parse-ns)))
372 :
373 : ;; XML [5]
374 :
375 : ;; Fixme: This needs re-writing to deal with the XML grammar properly, i.e.
376 : ;; document ::= prolog element Misc*
377 : ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
378 :
379 : (defun xml--parse-buffer (parse-dtd parse-ns)
380 0 : (with-syntax-table xml-syntax-table
381 0 : (let ((case-fold-search nil) ; XML is case-sensitive.
382 : ;; Prevent entity definitions from changing the defaults
383 0 : (xml-entity-alist xml-entity-alist)
384 0 : (xml-parameter-entity-alist xml-parameter-entity-alist)
385 : xml result dtd)
386 0 : (goto-char (point-min))
387 0 : (while (not (eobp))
388 0 : (if (search-forward "<" nil t)
389 0 : (progn
390 0 : (forward-char -1)
391 0 : (setq result (xml-parse-tag-1 parse-dtd parse-ns))
392 0 : (cond
393 0 : ((null result)
394 : ;; Not looking at an xml start tag.
395 0 : (unless (eobp)
396 0 : (forward-char 1)))
397 0 : ((and xml (not xml-sub-parser))
398 : ;; Translation of rule [1] of XML specifications
399 0 : (error "XML: (Not Well-Formed) Only one root tag allowed"))
400 0 : ((and (listp (car result))
401 0 : parse-dtd)
402 0 : (setq dtd (car result))
403 0 : (if (cdr result) ; possible leading comment
404 0 : (push (cdr result) xml)))
405 : (t
406 0 : (push result xml))))
407 0 : (goto-char (point-max))))
408 0 : (if parse-dtd
409 0 : (cons dtd (nreverse xml))
410 0 : (nreverse xml)))))
411 :
412 : (defun xml-maybe-do-ns (name default xml-ns)
413 : "Perform any namespace expansion.
414 : NAME is the name to perform the expansion on.
415 : DEFAULT is the default namespace. XML-NS is a cons of namespace
416 : names to uris. When namespace-aware parsing is off, then XML-NS
417 : is nil.
418 :
419 : During namespace-aware parsing, any name without a namespace is
420 : put into the namespace identified by DEFAULT. nil is used to
421 : specify that the name shouldn't be given a namespace.
422 : Expanded names will by default be returned as a cons. If you
423 : would like to get plain symbols instead, provide a cons cell
424 :
425 : (symbol-qnames . ALIST)
426 :
427 : in the XML-NS argument."
428 0 : (if (consp xml-ns)
429 0 : (let* ((symbol-qnames (eq (car-safe xml-ns) 'symbol-qnames))
430 0 : (nsp (string-match ":" name))
431 0 : (lname (if nsp (substring name (match-end 0)) name))
432 0 : (prefix (if nsp (substring name 0 (match-beginning 0)) default))
433 0 : (special (and (string-equal lname "xmlns") (not prefix)))
434 : ;; Setting default to nil will insure that there is not
435 : ;; matching cons in xml-ns. In which case we
436 0 : (ns (or (cdr (assoc (if special "xmlns" prefix)
437 0 : (if symbol-qnames (cdr xml-ns) xml-ns)))
438 0 : "")))
439 0 : (if (and symbol-qnames
440 0 : (not special)
441 0 : (not (string= prefix "xmlns")))
442 0 : (intern (concat ns lname))
443 0 : (cons ns (if special "" lname))))
444 0 : (intern name)))
445 :
446 : (defun xml-parse-tag (&optional parse-dtd parse-ns)
447 : "Parse the tag at point.
448 : If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
449 : returned as the first element in the list.
450 : If PARSE-NS is non-nil, expand QNAMES; for further details, see
451 : `xml-parse-region'.
452 :
453 : Return one of:
454 : - a list : the matching node
455 : - nil : the point is not looking at a tag.
456 : - a pair : the first element is the DTD, the second is the node."
457 0 : (let* ((case-fold-search nil)
458 : ;; Prevent entity definitions from changing the defaults
459 0 : (xml-entity-alist xml-entity-alist)
460 0 : (xml-parameter-entity-alist xml-parameter-entity-alist)
461 0 : (buf (current-buffer))
462 0 : (pos (point)))
463 0 : (with-temp-buffer
464 0 : (with-syntax-table xml-syntax-table
465 0 : (insert-buffer-substring-no-properties buf pos)
466 0 : (goto-char (point-min))
467 0 : (xml-parse-tag-1 parse-dtd parse-ns)))))
468 :
469 : (defun xml-parse-tag-1 (&optional parse-dtd parse-ns)
470 : "Like `xml-parse-tag', but possibly modify the buffer while working."
471 0 : (let* ((xml-validating-parser (or parse-dtd xml-validating-parser))
472 : (xml-ns
473 0 : (cond ((eq parse-ns 'symbol-qnames)
474 0 : (cons 'symbol-qnames xml-default-ns))
475 0 : ((or (consp (car-safe parse-ns))
476 0 : (and (eq (car-safe parse-ns) 'symbol-qnames)
477 0 : (listp (cdr parse-ns))))
478 0 : parse-ns)
479 0 : (parse-ns
480 0 : xml-default-ns))))
481 0 : (cond
482 : ;; Processing instructions, like <?xml version="1.0"?>.
483 0 : ((looking-at-p "<\\?")
484 0 : (search-forward "?>")
485 0 : (skip-syntax-forward " ")
486 0 : (xml-parse-tag-1 parse-dtd xml-ns))
487 : ;; Character data (CDATA) sections, in which no tag should be interpreted
488 0 : ((looking-at "<!\\[CDATA\\[")
489 0 : (let ((pos (match-end 0)))
490 0 : (unless (search-forward "]]>" nil t)
491 0 : (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
492 0 : (concat
493 0 : (buffer-substring-no-properties pos (match-beginning 0))
494 0 : (xml-parse-string))))
495 : ;; DTD for the document
496 0 : ((looking-at-p "<!DOCTYPE[ \t\n\r]")
497 0 : (let ((dtd (xml-parse-dtd parse-ns)))
498 0 : (skip-syntax-forward " ")
499 0 : (if xml-validating-parser
500 0 : (cons dtd (xml-parse-tag-1 nil xml-ns))
501 0 : (xml-parse-tag-1 nil xml-ns))))
502 : ;; skip comments
503 0 : ((looking-at-p "<!--")
504 0 : (search-forward "-->")
505 : ;; FIXME: This loses the skipped-over spaces.
506 0 : (skip-syntax-forward " ")
507 0 : (unless (eobp)
508 0 : (let ((xml-sub-parser t))
509 0 : (xml-parse-tag-1 parse-dtd xml-ns))))
510 : ;; end tag
511 0 : ((looking-at-p "</")
512 : '())
513 : ;; opening tag
514 1 : ((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)")))
515 0 : (goto-char (match-end 1))
516 : ;; Parse this node
517 0 : (let* ((node-name (match-string-no-properties 1))
518 : ;; Parse the attribute list.
519 0 : (attrs (xml-parse-attlist xml-ns))
520 : children)
521 : ;; add the xmlns:* attrs to our cache
522 0 : (when (consp xml-ns)
523 0 : (dolist (attr attrs)
524 0 : (when (and (consp (car attr))
525 0 : (equal "http://www.w3.org/2000/xmlns/"
526 0 : (caar attr)))
527 0 : (push (cons (cdar attr) (cdr attr))
528 0 : (if (symbolp (car xml-ns))
529 0 : (cdr xml-ns)
530 0 : xml-ns)))))
531 0 : (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
532 0 : (cond
533 : ;; is this an empty element ?
534 0 : ((looking-at-p "/>")
535 0 : (forward-char 2)
536 0 : (nreverse children))
537 : ;; is this a valid start tag ?
538 0 : ((eq (char-after) ?>)
539 0 : (forward-char 1)
540 : ;; Now check that we have the right end-tag.
541 0 : (let ((end (concat "</" node-name "\\s-*>")))
542 0 : (while (not (looking-at end))
543 0 : (cond
544 0 : ((eobp)
545 0 : (error "XML: (Not Well-Formed) End of document while reading element `%s'"
546 0 : node-name))
547 0 : ((looking-at-p "</")
548 0 : (forward-char 2)
549 0 : (error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')"
550 0 : (let ((pos (point)))
551 0 : (buffer-substring pos (if (re-search-forward "\\s-*>" nil t)
552 0 : (match-beginning 0)
553 0 : (point-max))))
554 0 : node-name))
555 : ;; Read a sub-element and push it onto CHILDREN.
556 0 : ((= (char-after) ?<)
557 0 : (let ((tag (xml-parse-tag-1 nil xml-ns)))
558 0 : (when tag
559 0 : (push tag children))))
560 : ;; Read some character data.
561 : (t
562 0 : (let ((expansion (xml-parse-string)))
563 0 : (push (if (stringp (car children))
564 : ;; If two strings were separated by a
565 : ;; comment, concat them.
566 0 : (concat (pop children) expansion)
567 0 : expansion)
568 0 : children)))))
569 : ;; Move point past the end-tag.
570 0 : (goto-char (match-end 0))
571 0 : (nreverse children)))
572 : ;; Otherwise this was an invalid start tag (expected ">" not found.)
573 : (t
574 0 : (error "XML: (Well-Formed) Couldn't parse tag: %s"
575 0 : (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
576 :
577 : ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
578 : (t
579 0 : (unless xml-sub-parser ; Usually, we error out.
580 0 : (error "XML: (Well-Formed) Invalid character"))
581 : ;; However, if we're parsing incrementally, then we need to deal
582 : ;; with stray CDATA.
583 0 : (let ((s (xml-parse-string)))
584 0 : (when (zerop (length s))
585 : ;; We haven't consumed any input! We must throw an error in
586 : ;; order to prevent looping forever.
587 0 : (error "XML: (Not Well-Formed) Could not parse: %s"
588 0 : (buffer-substring-no-properties
589 0 : (point) (min (+ (point) 10) (point-max)))))
590 0 : s)))))
591 :
592 : (defun xml-parse-string ()
593 : "Parse character data at point, and return it as a string.
594 : Leave point at the start of the next thing to parse. This
595 : function can modify the buffer by expanding entity and character
596 : references."
597 0 : (let ((start (point))
598 : ;; Keep track of the size of the rest of the buffer:
599 0 : (old-remaining-size (- (buffer-size) (point)))
600 : ref val)
601 0 : (while (and (not (eobp))
602 0 : (not (looking-at-p "<")))
603 : ;; Find the next < or & character.
604 0 : (skip-chars-forward "^<&")
605 0 : (when (eq (char-after) ?&)
606 : ;; If we find an entity or character reference, expand it.
607 0 : (unless (looking-at xml-entity-or-char-ref-re)
608 0 : (error "XML: (Not Well-Formed) Invalid entity reference"))
609 : ;; For a character reference, the next entity or character
610 : ;; reference must be after the replacement. [4.6] "Numerical
611 : ;; character references are expanded immediately when
612 : ;; recognized and MUST be treated as character data."
613 0 : (if (setq ref (match-string 2))
614 0 : (progn ; Numeric char reference
615 0 : (setq val (save-match-data
616 0 : (decode-char 'ucs (string-to-number
617 0 : ref (if (match-string 1) 16)))))
618 0 : (and (null val)
619 0 : xml-validating-parser
620 0 : (error "XML: (Validity) Invalid character reference `%s'"
621 0 : (match-string 0)))
622 0 : (replace-match (if val (string val) xml-undefined-entity) t t))
623 : ;; For an entity reference, search again from the start of
624 : ;; the replaced text, since the replacement can contain
625 : ;; entity or character references, or markup.
626 0 : (setq ref (match-string 3)
627 0 : val (assoc ref xml-entity-alist))
628 0 : (and (null val)
629 0 : xml-validating-parser
630 0 : (error "XML: (Validity) Undefined entity `%s'" ref))
631 0 : (replace-match (or (cdr val) xml-undefined-entity) t t)
632 0 : (goto-char (match-beginning 0)))
633 : ;; Check for XML bombs.
634 0 : (and xml-entity-expansion-limit
635 0 : (> (- (buffer-size) (point))
636 0 : (+ old-remaining-size xml-entity-expansion-limit))
637 0 : (error "XML: Entity reference expansion \
638 0 : surpassed `xml-entity-expansion-limit'"))))
639 : ;; [2.11] Clean up line breaks.
640 0 : (let ((end-marker (point-marker)))
641 0 : (goto-char start)
642 0 : (while (re-search-forward "\r\n?" end-marker t)
643 0 : (replace-match "\n" t t))
644 0 : (goto-char end-marker)
645 0 : (buffer-substring start (point)))))
646 :
647 : (defun xml-parse-attlist (&optional xml-ns)
648 : "Return the attribute-list after point.
649 : Leave point at the first non-blank character after the tag."
650 0 : (let ((attlist ())
651 : end-pos name)
652 0 : (skip-syntax-forward " ")
653 0 : (while (looking-at (eval-when-compile
654 1 : (concat "\\(" xml-name-re "\\)\\s-*=\\s-*")))
655 0 : (setq end-pos (match-end 0))
656 0 : (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
657 0 : (goto-char end-pos)
658 :
659 : ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
660 :
661 : ;; Do we have a string between quotes (or double-quotes),
662 : ;; or a simple word ?
663 0 : (if (looking-at "\"\\([^\"]*\\)\"")
664 0 : (setq end-pos (match-end 0))
665 0 : (if (looking-at "'\\([^']*\\)'")
666 0 : (setq end-pos (match-end 0))
667 0 : (error "XML: (Not Well-Formed) Attribute values must be given between quotes")))
668 :
669 : ;; Each attribute must be unique within a given element
670 0 : (if (assoc name attlist)
671 0 : (error "XML: (Not Well-Formed) Each attribute must be unique within an element"))
672 :
673 : ;; Multiple whitespace characters should be replaced with a single one
674 : ;; in the attributes
675 0 : (let ((string (match-string-no-properties 1)))
676 0 : (replace-regexp-in-string "\\s-\\{2,\\}" " " string)
677 0 : (let ((expansion (xml-substitute-special string)))
678 0 : (unless (stringp expansion)
679 : ;; We say this is the constraint. It is actually that
680 : ;; neither external entities nor "<" can be in an
681 : ;; attribute value.
682 0 : (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements"))
683 0 : (push (cons name expansion) attlist)))
684 :
685 0 : (goto-char end-pos)
686 0 : (skip-syntax-forward " "))
687 0 : (nreverse attlist)))
688 :
689 : ;;; DTD (document type declaration)
690 :
691 : ;; The following functions know how to skip or parse the DTD of a
692 : ;; document. FIXME: it fails at least if the DTD contains conditional
693 : ;; sections.
694 :
695 : (defun xml-skip-dtd ()
696 : "Skip the DTD at point.
697 : This follows the rule [28] in the XML specifications."
698 0 : (let ((xml-validating-parser nil))
699 0 : (xml-parse-dtd)))
700 :
701 : (defun xml-parse-dtd (&optional _parse-ns)
702 : "Parse the DTD at point."
703 1 : (forward-char (eval-when-compile (length "<!DOCTYPE")))
704 0 : (skip-syntax-forward " ")
705 0 : (if (and (looking-at-p ">")
706 0 : xml-validating-parser)
707 0 : (error "XML: (Validity) Invalid DTD (expecting name of the document)"))
708 :
709 : ;; Get the name of the document
710 0 : (looking-at xml-name-re)
711 0 : (let ((dtd (list (match-string-no-properties 0) 'dtd))
712 0 : (xml-parameter-entity-alist xml-parameter-entity-alist)
713 : next-parameter-entity)
714 0 : (goto-char (match-end 0))
715 0 : (skip-syntax-forward " ")
716 :
717 : ;; External subset (XML [75])
718 0 : (cond ((looking-at "PUBLIC\\s-+")
719 0 : (goto-char (match-end 0))
720 0 : (unless (or (re-search-forward
721 : "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\""
722 0 : nil t)
723 0 : (re-search-forward
724 : "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
725 0 : nil t))
726 0 : (error "XML: Missing Public ID"))
727 0 : (let ((pubid (match-string-no-properties 1)))
728 0 : (skip-syntax-forward " ")
729 0 : (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
730 0 : (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
731 0 : (error "XML: Missing System ID"))
732 0 : (push (list pubid (match-string-no-properties 1) 'public) dtd)))
733 0 : ((looking-at "SYSTEM\\s-+")
734 0 : (goto-char (match-end 0))
735 0 : (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
736 0 : (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
737 0 : (error "XML: Missing System ID"))
738 0 : (push (list (match-string-no-properties 1) 'system) dtd)))
739 0 : (skip-syntax-forward " ")
740 :
741 0 : (if (eq (char-after) ?>)
742 :
743 : ;; No internal subset
744 0 : (forward-char)
745 :
746 : ;; Internal subset (XML [28b])
747 0 : (unless (eq (char-after) ?\[)
748 0 : (error "XML: Bad DTD"))
749 0 : (forward-char)
750 :
751 : ;; [2.8]: "markup declarations may be made up in whole or in
752 : ;; part of the replacement text of parameter entities."
753 :
754 : ;; Since parameter entities are valid only within the DTD, we
755 : ;; first search for the position of the next possible parameter
756 : ;; entity. Then, search for the next DTD element; if it ends
757 : ;; before the next parameter entity, expand the parameter entity
758 : ;; and try again.
759 0 : (setq next-parameter-entity
760 0 : (save-excursion
761 0 : (if (re-search-forward xml-pe-reference-re nil t)
762 0 : (match-beginning 0))))
763 :
764 : ;; Parse the rest of the DTD
765 : ;; Fixme: Deal with NOTATION, PIs.
766 0 : (while (not (looking-at-p "\\s-*\\]"))
767 0 : (skip-syntax-forward " ")
768 0 : (cond
769 0 : ((eobp)
770 0 : (error "XML: (Well-Formed) End of document while reading DTD"))
771 : ;; Element declaration [45]:
772 0 : ((and (looking-at (eval-when-compile
773 1 : (concat "<!ELEMENT\\s-+\\(" xml-name-re
774 1 : "\\)\\s-+\\([^>]+\\)>")))
775 0 : (or (null next-parameter-entity)
776 0 : (<= (match-end 0) next-parameter-entity)))
777 0 : (let ((element (match-string-no-properties 1))
778 0 : (type (match-string-no-properties 2))
779 0 : (end-pos (match-end 0)))
780 : ;; Translation of rule [46] of XML specifications
781 0 : (cond
782 0 : ((string-match-p "\\`EMPTY\\s-*\\'" type) ; empty declaration
783 0 : (setq type 'empty))
784 0 : ((string-match-p "\\`ANY\\s-*$" type) ; any type of contents
785 0 : (setq type 'any))
786 0 : ((string-match "\\`(\\(.*\\))\\s-*\\'" type) ; children ([47])
787 0 : (setq type (xml-parse-elem-type
788 0 : (match-string-no-properties 1 type))))
789 0 : ((string-match-p "^%[^;]+;[ \t\n\r]*\\'" type) ; substitution
790 : nil)
791 0 : (xml-validating-parser
792 0 : (error "XML: (Validity) Invalid element type in the DTD")))
793 :
794 : ;; rule [45]: the element declaration must be unique
795 0 : (and (assoc element dtd)
796 0 : xml-validating-parser
797 0 : (error "XML: (Validity) DTD element declarations must be unique (<%s>)"
798 0 : element))
799 :
800 : ;; Store the element in the DTD
801 0 : (push (list element type) dtd)
802 0 : (goto-char end-pos)))
803 :
804 : ;; Attribute-list declaration [52] (currently unsupported):
805 0 : ((and (looking-at (eval-when-compile
806 1 : (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re
807 1 : "\\)[ \t\n\r]*\\(" xml-att-def-re
808 1 : "\\)*[ \t\n\r]*>")))
809 0 : (or (null next-parameter-entity)
810 0 : (<= (match-end 0) next-parameter-entity)))
811 0 : (goto-char (match-end 0)))
812 :
813 : ;; Comments (skip to end, ignoring parameter entity):
814 0 : ((looking-at-p "<!--")
815 0 : (search-forward "-->")
816 0 : (and next-parameter-entity
817 0 : (> (point) next-parameter-entity)
818 0 : (setq next-parameter-entity
819 0 : (save-excursion
820 0 : (if (re-search-forward xml-pe-reference-re nil t)
821 0 : (match-beginning 0))))))
822 :
823 : ;; Internal entity declarations:
824 0 : ((and (looking-at (eval-when-compile
825 1 : (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
826 1 : xml-name-re "\\)[ \t\n\r]*\\("
827 1 : xml-entity-value-re "\\)[ \t\n\r]*>")))
828 0 : (or (null next-parameter-entity)
829 0 : (<= (match-end 0) next-parameter-entity)))
830 0 : (let* ((name (prog1 (match-string-no-properties 2)
831 0 : (goto-char (match-end 0))))
832 0 : (alist (if (match-string 1)
833 : 'xml-parameter-entity-alist
834 0 : 'xml-entity-alist))
835 : ;; Retrieve the deplacement text:
836 0 : (value (xml--entity-replacement-text
837 : ;; Entity value, sans quotation marks:
838 0 : (substring (match-string-no-properties 3) 1 -1))))
839 : ;; If the same entity is declared more than once, the
840 : ;; first declaration is binding.
841 0 : (unless (assoc name (symbol-value alist))
842 0 : (set alist (cons (cons name value) (symbol-value alist))))))
843 :
844 : ;; External entity declarations (currently unsupported):
845 0 : ((and (or (looking-at (eval-when-compile
846 1 : (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
847 1 : xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+"
848 1 : "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")))
849 0 : (looking-at (eval-when-compile
850 1 : (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\("
851 1 : xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+"
852 : "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\""
853 : "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
854 : "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
855 1 : "[ \t\n\r]*>"))))
856 0 : (or (null next-parameter-entity)
857 0 : (<= (match-end 0) next-parameter-entity)))
858 0 : (goto-char (match-end 0)))
859 :
860 : ;; If a parameter entity is in the way, expand it.
861 0 : (next-parameter-entity
862 0 : (save-excursion
863 0 : (goto-char next-parameter-entity)
864 0 : (unless (looking-at xml-pe-reference-re)
865 0 : (error "XML: Internal error"))
866 0 : (let* ((entity (match-string 1))
867 0 : (elt (assoc entity xml-parameter-entity-alist)))
868 0 : (if elt
869 0 : (progn
870 0 : (replace-match (cdr elt) t t)
871 : ;; The replacement can itself be a parameter entity.
872 0 : (goto-char next-parameter-entity))
873 0 : (goto-char (match-end 0))))
874 0 : (setq next-parameter-entity
875 0 : (if (re-search-forward xml-pe-reference-re nil t)
876 0 : (match-beginning 0)))))
877 :
878 : ;; Anything else is garbage (ignored if not validating).
879 0 : (xml-validating-parser
880 0 : (error "XML: (Validity) Invalid DTD item"))
881 : (t
882 0 : (skip-chars-forward "^]"))))
883 :
884 0 : (if (looking-at "\\s-*]>")
885 0 : (goto-char (match-end 0))))
886 0 : (nreverse dtd)))
887 :
888 : (defun xml--entity-replacement-text (string)
889 : "Return the replacement text for the entity value STRING.
890 : The replacement text is obtained by replacing character
891 : references and parameter-entity references."
892 0 : (let ((ref-re (eval-when-compile
893 1 : (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\("
894 1 : xml-name-re "\\)\\);")))
895 : children)
896 0 : (while (string-match ref-re string)
897 0 : (push (substring string 0 (match-beginning 0)) children)
898 0 : (let ((remainder (substring string (match-end 0)))
899 : ref val)
900 0 : (cond ((setq ref (match-string 1 string))
901 : ;; Decimal character reference
902 0 : (setq val (decode-char 'ucs (string-to-number ref)))
903 0 : (if val (push (string val) children)))
904 : ;; Hexadecimal character reference
905 0 : ((setq ref (match-string 2 string))
906 0 : (setq val (decode-char 'ucs (string-to-number ref 16)))
907 0 : (if val (push (string val) children)))
908 : ;; Parameter entity reference
909 0 : ((setq ref (match-string 3 string))
910 0 : (setq val (assoc ref xml-parameter-entity-alist))
911 0 : (and (null val)
912 0 : xml-validating-parser
913 0 : (error "XML: (Validity) Undefined parameter entity `%s'" ref))
914 0 : (push (or (cdr val) xml-undefined-entity) children)))
915 0 : (setq string remainder)))
916 0 : (mapconcat 'identity (nreverse (cons string children)) "")))
917 :
918 : (defun xml-parse-elem-type (string)
919 : "Convert element type STRING into a Lisp structure."
920 :
921 0 : (let (elem modifier)
922 0 : (if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
923 0 : (progn
924 0 : (setq elem (match-string-no-properties 1 string)
925 0 : modifier (match-string-no-properties 2 string))
926 0 : (if (string-match-p "|" elem)
927 0 : (setq elem (cons 'choice
928 0 : (mapcar 'xml-parse-elem-type
929 0 : (split-string elem "|"))))
930 0 : (if (string-match-p "," elem)
931 0 : (setq elem (cons 'seq
932 0 : (mapcar 'xml-parse-elem-type
933 0 : (split-string elem ",")))))))
934 0 : (if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
935 0 : (setq elem (match-string-no-properties 1 string)
936 0 : modifier (match-string-no-properties 2 string))))
937 :
938 0 : (if (and (stringp elem) (string= elem "#PCDATA"))
939 0 : (setq elem 'pcdata))
940 :
941 0 : (cond
942 0 : ((string= modifier "+")
943 0 : (list '+ elem))
944 0 : ((string= modifier "*")
945 0 : (list '* elem))
946 0 : ((string= modifier "?")
947 0 : (list '\? elem))
948 : (t
949 0 : elem))))
950 :
951 : ;;; Substituting special XML sequences
952 :
953 : (defun xml-substitute-special (string)
954 : "Return STRING, after substituting entity and character references.
955 : STRING is assumed to occur in an XML attribute value."
956 0 : (let ((strlen (length string))
957 : children)
958 0 : (while (string-match xml-entity-or-char-ref-re string)
959 0 : (push (substring string 0 (match-beginning 0)) children)
960 0 : (let* ((remainder (substring string (match-end 0)))
961 0 : (is-hex (match-string 1 string)) ; Is it a hex numeric reference?
962 0 : (ref (match-string 2 string))) ; Numeric part of reference
963 0 : (if ref
964 : ;; [4.6] Character references are included as
965 : ;; character data.
966 0 : (let ((val (decode-char 'ucs (string-to-number ref (if is-hex 16)))))
967 0 : (push (cond (val (string val))
968 0 : (xml-validating-parser
969 0 : (error "XML: (Validity) Undefined character `x%s'" ref))
970 0 : (t xml-undefined-entity))
971 0 : children)
972 0 : (setq string remainder
973 0 : strlen (length string)))
974 : ;; [4.4.5] Entity references are "included in literal".
975 : ;; Note that we don't need do anything special to treat
976 : ;; quotes as normal data characters.
977 0 : (setq ref (match-string 3 string)) ; entity name
978 0 : (let ((val (or (cdr (assoc ref xml-entity-alist))
979 0 : (if xml-validating-parser
980 0 : (error "XML: (Validity) Undefined entity `%s'" ref)
981 0 : xml-undefined-entity))))
982 0 : (setq string (concat val remainder)))
983 0 : (and xml-entity-expansion-limit
984 0 : (> (length string) (+ strlen xml-entity-expansion-limit))
985 0 : (error "XML: Passed `xml-entity-expansion-limit' while expanding `&%s;'"
986 0 : ref)))))
987 0 : (mapconcat 'identity (nreverse (cons string children)) "")))
988 :
989 : (defun xml-substitute-numeric-entities (string)
990 : "Substitute SGML numeric entities by their respective utf characters.
991 : This function replaces numeric entities in the input STRING and
992 : returns the modified string. For example \"*\" gets replaced
993 : by \"*\"."
994 0 : (if (and string (stringp string))
995 0 : (let ((start 0))
996 0 : (while (string-match "&#\\([0-9]+\\);" string start)
997 0 : (ignore-errors
998 0 : (setq string (replace-match
999 0 : (string (read (substring string
1000 0 : (match-beginning 1)
1001 0 : (match-end 1))))
1002 0 : nil nil string)))
1003 0 : (setq start (1+ (match-beginning 0))))
1004 0 : string)
1005 0 : nil))
1006 :
1007 : ;;; Printing a parse tree (mainly for debugging).
1008 :
1009 : (defun xml-debug-print (xml &optional indent-string)
1010 : "Outputs the XML in the current buffer.
1011 : XML can be a tree or a list of nodes.
1012 : The first line is indented with the optional INDENT-STRING."
1013 0 : (setq indent-string (or indent-string ""))
1014 0 : (dolist (node xml)
1015 0 : (xml-debug-print-internal node indent-string)))
1016 :
1017 : (defalias 'xml-print 'xml-debug-print)
1018 :
1019 : (defun xml-escape-string (string)
1020 : "Convert STRING into a string containing valid XML character data.
1021 : Replace occurrences of &<>\\='\" in STRING with their default XML
1022 : entity references (e.g., replace each & with &).
1023 :
1024 : XML character data must not contain & or < characters, nor the >
1025 : character under some circumstances. The XML spec does not impose
1026 : restriction on \" or \\=', but we just substitute for these too
1027 : \(as is permitted by the spec)."
1028 0 : (with-temp-buffer
1029 0 : (insert string)
1030 0 : (dolist (substitution '(("&" . "&")
1031 : ("<" . "<")
1032 : (">" . ">")
1033 : ("'" . "'")
1034 : ("\"" . """)))
1035 0 : (goto-char (point-min))
1036 0 : (while (search-forward (car substitution) nil t)
1037 0 : (replace-match (cdr substitution) t t nil)))
1038 0 : (buffer-string)))
1039 :
1040 : (defun xml-debug-print-internal (xml indent-string)
1041 : "Outputs the XML tree in the current buffer.
1042 : The first line is indented with INDENT-STRING."
1043 0 : (let ((tree xml)
1044 : attlist)
1045 0 : (insert indent-string ?< (symbol-name (xml-node-name tree)))
1046 :
1047 : ;; output the attribute list
1048 0 : (setq attlist (xml-node-attributes tree))
1049 0 : (while attlist
1050 0 : (insert ?\ (symbol-name (caar attlist)) "=\""
1051 0 : (xml-escape-string (cdar attlist)) ?\")
1052 0 : (setq attlist (cdr attlist)))
1053 :
1054 0 : (setq tree (xml-node-children tree))
1055 :
1056 0 : (if (null tree)
1057 0 : (insert ?/ ?>)
1058 0 : (insert ?>)
1059 :
1060 : ;; output the children
1061 0 : (dolist (node tree)
1062 0 : (cond
1063 0 : ((listp node)
1064 0 : (insert ?\n)
1065 0 : (xml-debug-print-internal node (concat indent-string " ")))
1066 0 : ((stringp node)
1067 0 : (insert (xml-escape-string node)))
1068 : (t
1069 0 : (error "Invalid XML tree"))))
1070 :
1071 0 : (when (not (and (null (cdr tree))
1072 0 : (stringp (car tree))))
1073 0 : (insert ?\n indent-string))
1074 0 : (insert ?< ?/ (symbol-name (xml-node-name xml)) ?>))))
1075 :
1076 : (provide 'xml)
1077 :
1078 : ;;; xml.el ends here
|