Line data Source code
1 : ;;; url-parse.el --- Uniform Resource Locator parser -*- lexical-binding: t -*-
2 :
3 : ;; Copyright (C) 1996-1999, 2004-2017 Free Software Foundation, Inc.
4 :
5 : ;; Keywords: comm, data, processes
6 :
7 : ;; This file is part of GNU Emacs.
8 : ;;
9 : ;; GNU Emacs is free software: you can redistribute it and/or modify
10 : ;; it under the terms of the GNU General Public License as published by
11 : ;; the Free Software Foundation, either version 3 of the License, or
12 : ;; (at your option) any later version.
13 :
14 : ;; GNU Emacs is distributed in the hope that it will be useful,
15 : ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : ;; GNU General Public License for more details.
18 :
19 : ;; You should have received a copy of the GNU General Public License
20 : ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
21 :
22 : ;;; Commentary:
23 :
24 : ;;; Code:
25 :
26 : (require 'url-vars)
27 : (require 'auth-source)
28 : (eval-when-compile (require 'cl-lib))
29 :
30 : (autoload 'url-scheme-get-property "url-methods")
31 :
32 : (cl-defstruct (url
33 : (:constructor nil)
34 : (:constructor url-parse-make-urlobj
35 : (&optional type user password host portspec filename
36 : target attributes fullness))
37 : (:copier nil))
38 : type user password host portspec filename target attributes fullness
39 : silent (use-cookies t))
40 :
41 : (defsubst url-port (urlobj)
42 : "Return the port number for the URL specified by URLOBJ.
43 : If the port spec is nil (i.e. URLOBJ specifies no port number),
44 : return the default port number for URLOBJ's scheme."
45 : (declare (gv-setter (lambda (port) `(setf (url-portspec ,urlobj) ,port))))
46 0 : (or (url-portspec urlobj)
47 0 : (if (url-type urlobj)
48 0 : (url-scheme-get-property (url-type urlobj) 'default-port))))
49 :
50 : (defun url-path-and-query (urlobj)
51 : "Return the path and query components of URLOBJ.
52 : These two components are stored together in the FILENAME slot of
53 : the object. The return value of this function is (PATH . QUERY),
54 : where each of PATH and QUERY are strings or nil."
55 0 : (let ((name (url-filename urlobj))
56 : path query)
57 0 : (when name
58 0 : (if (string-match "\\?" name)
59 0 : (setq path (substring name 0 (match-beginning 0))
60 0 : query (substring name (match-end 0)))
61 0 : (setq path name)))
62 0 : (cons path query)))
63 :
64 : (defun url-port-if-non-default (urlobj)
65 : "Return the port number specified by URLOBJ, if it is not the default.
66 : If the specified port number is the default, return nil."
67 0 : (let ((port (url-portspec urlobj))
68 : type)
69 0 : (and port
70 0 : (or (null (setq type (url-type urlobj)))
71 0 : (not (equal port (url-scheme-get-property type 'default-port))))
72 0 : port)))
73 :
74 : ;;;###autoload
75 : (defun url-recreate-url (urlobj)
76 : "Recreate a URL string from the parsed URLOBJ."
77 0 : (let* ((type (url-type urlobj))
78 0 : (user (url-user urlobj))
79 0 : (pass (url-password urlobj))
80 0 : (host (url-host urlobj))
81 : ;; RFC 3986: "omit the port component and its : delimiter if
82 : ;; port is empty or if its value would be the same as that of
83 : ;; the scheme's default."
84 0 : (port (url-port-if-non-default urlobj))
85 0 : (file (url-filename urlobj))
86 0 : (frag (url-target urlobj)))
87 0 : (concat (if type (concat type ":"))
88 0 : (if (url-fullness urlobj) "//")
89 0 : (if (or user pass)
90 0 : (concat user
91 0 : (if pass (concat ":" pass))
92 0 : "@"))
93 0 : host
94 0 : (if port (format ":%d" (url-port urlobj)))
95 0 : (or file "/")
96 0 : (if frag (concat "#" frag)))))
97 :
98 : (defun url-recreate-url-attributes (urlobj)
99 : "Recreate the attributes of an URL string from the parsed URLOBJ."
100 : (declare (obsolete nil "24.3"))
101 0 : (when (url-attributes urlobj)
102 0 : (concat ";"
103 0 : (mapconcat (lambda (x)
104 0 : (if (cdr x)
105 0 : (concat (car x) "=" (cdr x))
106 0 : (car x)))
107 0 : (url-attributes urlobj) ";"))))
108 :
109 : ;;;###autoload
110 : (defun url-generic-parse-url (url)
111 : "Return an URL-struct of the parts of URL.
112 : The CL-style struct contains the following fields:
113 :
114 : TYPE is the URI scheme (string or nil).
115 : USER is the user name (string or nil).
116 : PASSWORD is the password (string [deprecated] or nil).
117 : HOST is the host (a registered name, IP literal in square
118 : brackets, or IPv4 address in dotted-decimal form).
119 : PORTSPEC is the specified port (a number), or nil.
120 : FILENAME is the path AND the query component of the URI.
121 : TARGET is the fragment identifier component (used to refer to a
122 : subordinate resource, e.g. a part of a webpage).
123 : ATTRIBUTES is nil; this slot originally stored the attribute and
124 : value alists for IMAP URIs, but this feature was removed
125 : since it conflicts with RFC 3986.
126 : FULLNESS is non-nil if the hierarchical sequence component of
127 : the URL starts with two slashes, \"//\".
128 :
129 : The parser follows RFC 3986, except that it also tries to handle
130 : URIs that are not fully specified (e.g. lacking TYPE), and it
131 : does not check for or perform %-encoding.
132 :
133 : Here is an example. The URL
134 :
135 : foo://bob:pass@example.com:42/a/b/c.dtb?type=animal&name=narwhal#nose
136 :
137 : parses to
138 :
139 : TYPE = \"foo\"
140 : USER = \"bob\"
141 : PASSWORD = \"pass\"
142 : HOST = \"example.com\"
143 : PORTSPEC = 42
144 : FILENAME = \"/a/b/c.dtb?type=animal&name=narwhal\"
145 : TARGET = \"nose\"
146 : ATTRIBUTES = nil
147 : FULLNESS = t"
148 0 : (if (null url)
149 0 : (url-parse-make-urlobj)
150 0 : (with-temp-buffer
151 : ;; Don't let those temp-buffer modifications accidentally
152 : ;; deactivate the mark of the current-buffer.
153 0 : (let ((deactivate-mark nil))
154 0 : (set-syntax-table url-parse-syntax-table)
155 0 : (erase-buffer)
156 0 : (insert url)
157 0 : (goto-char (point-min))
158 0 : (let ((save-pos (point))
159 : scheme user pass host port file fragment full
160 : (inhibit-read-only t))
161 :
162 : ;; 3.1. Scheme
163 : ;; This is nil for a URI that is not fully specified.
164 0 : (when (looking-at "\\([a-zA-Z][-a-zA-Z0-9+.]*\\):")
165 0 : (goto-char (match-end 0))
166 0 : (setq save-pos (point))
167 0 : (setq scheme (downcase (match-string 1))))
168 :
169 : ;; 3.2. Authority
170 0 : (when (looking-at "//")
171 0 : (setq full t)
172 0 : (forward-char 2)
173 0 : (setq save-pos (point))
174 0 : (skip-chars-forward "^/?#")
175 0 : (setq host (buffer-substring save-pos (point)))
176 : ;; 3.2.1 User Information
177 0 : (if (string-match "^\\([^@]+\\)@" host)
178 0 : (setq user (match-string 1 host)
179 0 : host (substring host (match-end 0))))
180 0 : (if (and user (string-match "\\`\\([^:]*\\):\\(.*\\)" user))
181 0 : (setq pass (match-string 2 user)
182 0 : user (match-string 1 user)))
183 0 : (cond
184 : ;; IPv6 literal address.
185 0 : ((string-match "^\\(\\[[^]]+\\]\\)\\(?::\\([0-9]*\\)\\)?$" host)
186 0 : (setq port (match-string 2 host)
187 0 : host (match-string 1 host)))
188 : ;; Registered name or IPv4 address.
189 0 : ((string-match ":\\([0-9]*\\)$" host)
190 0 : (setq port (match-string 1 host)
191 0 : host (substring host 0 (match-beginning 0)))))
192 0 : (cond ((equal port "")
193 0 : (setq port nil))
194 0 : (port
195 0 : (setq port (string-to-number port))))
196 0 : (setq host (downcase host)))
197 :
198 : ;; Now point is on the / ? or # which terminates the
199 : ;; authority, or at the end of the URI, or (if there is no
200 : ;; authority) at the beginning of the absolute path.
201 :
202 0 : (setq save-pos (point))
203 0 : (if (string= "data" scheme)
204 : ;; For the "data" URI scheme, all the rest is the FILE.
205 0 : (setq file (buffer-substring save-pos (point-max)))
206 : ;; For hysterical raisins, our data structure returns the
207 : ;; path and query components together in one slot.
208 : ;; 3.3. Path
209 0 : (skip-chars-forward "^?#")
210 : ;; 3.4. Query
211 0 : (when (looking-at "?")
212 0 : (skip-chars-forward "^#"))
213 0 : (setq file (buffer-substring save-pos (point)))
214 : ;; 3.5 Fragment
215 0 : (when (looking-at "#")
216 0 : (let ((opoint (point)))
217 0 : (forward-char 1)
218 0 : (setq fragment (buffer-substring (point) (point-max)))
219 0 : (delete-region opoint (point-max)))))
220 :
221 0 : (if (and host (string-match "%[0-9][0-9]" host))
222 0 : (setq host (url-unhex-string host)))
223 0 : (url-parse-make-urlobj scheme user pass host port file
224 0 : fragment nil full))))))
225 :
226 : (defmacro url-bit-for-url (method lookfor url)
227 2 : `(let* ((urlobj (url-generic-parse-url ,url))
228 2 : (bit (funcall ,method urlobj))
229 : (methods (list 'url-recreate-url
230 : 'url-host))
231 : auth-info)
232 : (while (and (not bit) (> (length methods) 0))
233 : (setq auth-info (auth-source-search
234 : :max 1
235 : :host (funcall (pop methods) urlobj)
236 : :port (url-type urlobj)))
237 2 : (setq bit (plist-get (nth 0 auth-info) ,lookfor))
238 : (when (functionp bit)
239 : (setq bit (funcall bit))))
240 2 : bit))
241 :
242 : (defun url-user-for-url (url)
243 : "Attempt to use .authinfo to find a user for this URL."
244 0 : (url-bit-for-url 'url-user :user url))
245 :
246 : (defun url-password-for-url (url)
247 : "Attempt to use .authinfo to find a password for this URL."
248 0 : (url-bit-for-url 'url-password :secret url))
249 :
250 : (provide 'url-parse)
251 :
252 : ;;; url-parse.el ends here
|