>From 2ab0fc7b9067f81712714dcb62c40d08fe05ab7a Mon Sep 17 00:00:00 2001 From: "Peder O. Klingenberg" Date: Thu, 13 Jul 2017 00:27:58 +0200 Subject: [PATCH] Make eww (optionally) send Referer-headers * lisp/net/eww.el (eww-render): Set url-current-lastloc to the url we are rendering, to get the referer header right on subsequent requests. * lisp/url/url-queue.el (url-queue): New struct member context-buffer for keeping track of the context a queued job started from. (url-queue-retrieve): Store the current buffer in the queue object. (url-queue-start-retrieve): Make sure url-retrieve is called in the context of the original buffer, if available. * doc/misc/url.texi (Customization): Describe the new user option url-lastloc-privacy. * lisp/url/url-http.el (url-http--get-referer): New function to determine which referer to send, if any, considering the users privacy settings and the target url we are visiting. (url-http-referer): New variable keeping track of the referer computed by url-http--get-referer (url-http-create-request): Use url-http-referer instead of the optional argument to set up the referer header. Leave checking of privacy settings to url-http--get-referer. (url-http): Set up url-http-referer by using url-http--get-referer. * lisp/url/url-util.el (url-domain): New function to determine the domain of a given URL. * lisp/url/url-vars.el (url-current-lastloc): New variable to keep track of the desired "last location" (referer header). (url-lastloc-privacy): New custom setting for more fine-grained control over how lastloc (referer) is sent to servers. (Bug#27012) --- doc/misc/url.texi | 14 ++++++++++++++ etc/NEWS | 16 ++++++++++++++++ lisp/net/eww.el | 7 +++++-- lisp/url/url-http.el | 52 +++++++++++++++++++++++++++++++++++++++------------ lisp/url/url-queue.el | 18 +++++++++++------- lisp/url/url-util.el | 29 ++++++++++++++++++++++++++++ lisp/url/url-vars.el | 28 ++++++++++++++++++++++++++- 7 files changed, 142 insertions(+), 22 deletions(-) diff --git a/doc/misc/url.texi b/doc/misc/url.texi index a3c625edce..c7532c0c32 100644 --- a/doc/misc/url.texi +++ b/doc/misc/url.texi @@ -1303,6 +1303,20 @@ Customization @end defopt @defopt url-privacy-level @end defopt +@defopt url-lastloc-privacy +Provided @code{lastloc} is not prohibited by @code{url-privacy-level}, +this determines who we send our last location to. @code{none} means +we include our last location in every outgoing request. +@code{domain-match} means we send it only if the domain of our last +location matches the domain of the URI we are requesting. +@code{host-match} means we only send our last location back to the +same host. The default is @code{domain-match}. + +Using @code{domain-match} for this option requires emacs to make one +or more DNS requests each time a new host is contacted, to determine +the domain of the host. Results of these lookups are cached, so +repeated visits do not require repeated domain lookups. +@end defopt @defopt url-uncompressor-alist @end defopt @defopt url-passwd-entry-func diff --git a/etc/NEWS b/etc/NEWS index dc9393c87d..90f96ad550 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -653,6 +653,11 @@ replaced by the real images asynchronously, which will also now respect width/height HTML specs (unless they specify widths/heights bigger than the current window). +--- +*** EWW now sends Referer headers +Provided they are allowed by 'url-privacy-level' and +'url-lastloc-privacy'. + ** Ido *** The commands 'find-alternate-file-other-window', @@ -871,6 +876,17 @@ domain. *** 'url-user-agent' now defaults to 'default', and the User-Agent string is computed dynamically based on 'url-privacy-level'. +--- +*** url-http now uses a buffer local variable to determine the referer +for a request. Previously, there was an optional argument to +'url-http-create-request' to set up a referer, but no callers were +using it. Now, callers can set up 'url-current-lastloc' in a buffer +before calling 'url-retrieve'. + ++++ +*** New user option 'url-lastloc-privacy' providing fine-grained +control over who we send referer-headers to. + ** VC and related modes --- diff --git a/lisp/net/eww.el b/lisp/net/eww.el index fe31657914..e922b4f834 100644 --- a/lisp/net/eww.el +++ b/lisp/net/eww.el @@ -271,7 +271,7 @@ eww (insert (format "Loading %s..." url)) (goto-char (point-min))) (url-retrieve url 'eww-render - (list url nil (current-buffer)))) + (list url nil (current-buffer)))) (defun eww--dwim-expand-url (url) (setq url (string-trim url)) @@ -359,7 +359,10 @@ eww-render ;; Save the https peer status. (plist-put eww-data :peer (plist-get status :peer)) ;; Make buffer listings more informative. - (setq list-buffers-directory url)) + (setq list-buffers-directory url) + ;; Let the URL library have a handle to the current URL for + ;; referer purposes. + (setq url-current-lastloc (url-generic-parse-url url))) (unwind-protect (progn (cond diff --git a/lisp/url/url-http.el b/lisp/url/url-http.el index 06d32861b2..35a0f06d48 100644 --- a/lisp/url/url-http.el +++ b/lisp/url/url-http.el @@ -54,6 +54,7 @@ url-http-response-version (defvar url-http-target-url) (defvar url-http-transfer-encoding) (defvar url-show-status) +(defvar url-http-referer) (require 'url-gw) (require 'url-parse) @@ -238,6 +239,34 @@ url-http--user-agent-default-string emacs-info os-info)) " "))) +(defun url-http--get-referer (url) + (url-http-debug "getting referer from buffer: buffer:%S target-url:%S lastloc:%S" (current-buffer) url url-current-lastloc) + (when url-current-lastloc + (if (not (url-p url-current-lastloc)) + (setq url-current-lastloc (url-generic-parse-url url-current-lastloc))) + (let* ((referer url-current-lastloc) + (referer-string (url-recreate-url referer))) + (when (and (not (memq url-privacy-level '(low high paranoid))) + (not (and (listp url-privacy-level) + (memq 'lastloc url-privacy-level)))) + ;; url-privacy-level allows referer. But url-lastloc-privacy + ;; may restrict who we send it to. + (cl-case url-lastloc-privacy + (host-match + (let ((referer-host (url-host referer)) + (url-host (url-host url))) + (when (string= referer-host url-host) + referer-string))) + (domain-match + (let ((referer-domain (url-domain referer)) + (url-domain (url-domain url))) + (when (and referer-domain + url-domain + (string= referer-domain url-domain)) + referer-string))) + (otherwise + referer-string)))))) + ;; Building an HTTP request (defun url-http-user-agent-string () "Compute a User-Agent string. @@ -254,8 +283,9 @@ url-http-user-agent-string ((eq url-user-agent 'default) (url-http--user-agent-default-string)))))) (if ua-string (format "User-Agent: %s\r\n" (string-trim ua-string)) ""))) -(defun url-http-create-request (&optional ref-url) - "Create an HTTP request for `url-http-target-url', referred to by REF-URL." +(defun url-http-create-request () + "Create an HTTP request for `url-http-target-url', using `url-http-referer' +as the Referer-header (subject to `url-privacy-level'." (let* ((extra-headers) (request nil) (no-cache (cdr-safe (assoc "Pragma" url-http-extra-headers))) @@ -274,7 +304,8 @@ url-http-create-request (url-get-authentication (or (and (boundp 'proxy-info) proxy-info) - url-http-target-url) nil 'any nil)))) + url-http-target-url) nil 'any nil))) + (ref-url url-http-referer)) (if (equal "" real-fname) (setq real-fname "/")) (setq no-cache (and no-cache (string-match "no-cache" no-cache))) @@ -288,12 +319,6 @@ url-http-create-request (string= ref-url ""))) (setq ref-url nil)) - ;; We do not want to expose the referrer if the user is paranoid. - (if (or (memq url-privacy-level '(low high paranoid)) - (and (listp url-privacy-level) - (memq 'lastloc url-privacy-level))) - (setq ref-url nil)) - ;; url-http-extra-headers contains an assoc-list of ;; header/value pairs that we need to put into the request. (setq extra-headers (mapconcat @@ -1255,7 +1280,8 @@ url-http (mime-accept-string url-mime-accept-string) (buffer (or retry-buffer (generate-new-buffer - (format " *http %s:%d*" (url-host url) (url-port url)))))) + (format " *http %s:%d*" (url-host url) (url-port url))))) + (referer (url-http--get-referer url))) (if (not connection) ;; Failed to open the connection for some reason (progn @@ -1290,7 +1316,8 @@ url-http url-http-no-retry url-http-connection-opened url-mime-accept-string - url-http-proxy)) + url-http-proxy + url-http-referer)) (set (make-local-variable var) nil)) (setq url-http-method (or url-request-method "GET") @@ -1308,7 +1335,8 @@ url-http url-http-no-retry retry-buffer url-http-connection-opened nil url-mime-accept-string mime-accept-string - url-http-proxy url-using-proxy) + url-http-proxy url-using-proxy + url-http-referer referer) (set-process-buffer connection buffer) (set-process-filter connection 'url-http-generic-filter) diff --git a/lisp/url/url-queue.el b/lisp/url/url-queue.el index dd1699bd08..db0c16c793 100644 --- a/lisp/url/url-queue.el +++ b/lisp/url/url-queue.el @@ -52,7 +52,7 @@ url-queue-progress-timer (cl-defstruct url-queue url callback cbargs silentp buffer start-time pre-triggered - inhibit-cookiesp) + inhibit-cookiesp context-buffer) ;;;###autoload (defun url-queue-retrieve (url callback &optional cbargs silent inhibit-cookies) @@ -67,7 +67,8 @@ url-queue-retrieve :callback callback :cbargs cbargs :silentp silent - :inhibit-cookiesp inhibit-cookies)))) + :inhibit-cookiesp inhibit-cookies + :context-buffer (current-buffer))))) (url-queue-setup-runners)) ;; To ensure asynch behavior, we start the required number of queue @@ -147,11 +148,14 @@ url-queue-remove-jobs-from-host (defun url-queue-start-retrieve (job) (setf (url-queue-buffer job) (ignore-errors - (let ((url-request-noninteractive t)) - (url-retrieve (url-queue-url job) - #'url-queue-callback-function (list job) - (url-queue-silentp job) - (url-queue-inhibit-cookiesp job)))))) + (with-current-buffer (if (buffer-live-p (url-queue-context-buffer job)) + (url-queue-context-buffer job) + (current-buffer)) + (let ((url-request-noninteractive t)) + (url-retrieve (url-queue-url job) + #'url-queue-callback-function (list job) + (url-queue-silentp job) + (url-queue-inhibit-cookiesp job))))))) (defun url-queue-prune-old-entries () (let (dead-jobs) diff --git a/lisp/url/url-util.el b/lisp/url/url-util.el index a3c9655ebd..6be0a05314 100644 --- a/lisp/url/url-util.el +++ b/lisp/url/url-util.el @@ -627,6 +627,35 @@ url-make-private-file (error "Danger: `%s' is a symbolic link" file)) (set-file-modes file #o0600)))) +(autoload 'dns-query "dns") + +(defvar url--domain-cache (make-hash-table :test 'equal :size 17) + "Cache to minimize dns lookups.") + +;;;###autoload +(defun url-domain (url) + "Return the domain of the host of the url, or nil if url does +not contain a registered name." + ;; Determining the domain of a name can not be done with simple + ;; textual manipulations. a.b.c is either host a in domain b.c + ;; (www.google.com), or domain a.b.c with no separate host + ;; (bbc.co.uk). Instead of guessing based on tld (which in any case + ;; may be inaccurate in the face of subdelegations), we look for + ;; domain delegations in DNS. + ;; + ;; Domain delegations change rarely enough that we won't bother with + ;; cache invalidation, I think. + (let* ((host-parts (split-string (url-host url) "\\.")) + (result (gethash host-parts url--domain-cache 'not-found))) + (when (eq result 'not-found) + (setq result + (cl-loop for parts on host-parts + for dom = (mapconcat #'identity parts ".") + when (dns-query dom 'SOA) + return dom)) + (puthash host-parts result url--domain-cache)) + result)) + (provide 'url-util) ;;; url-util.el ends here diff --git a/lisp/url/url-vars.el b/lisp/url/url-vars.el index f08779f695..41ed6d57d8 100644 --- a/lisp/url/url-vars.el +++ b/lisp/url/url-vars.el @@ -60,10 +60,18 @@ url-current-object (defvar url-current-mime-headers nil "A parsed representation of the MIME headers for the current URL.") +(defvar url-current-lastloc nil + "A parsed representation of the URL to be considered as the last location. +Use of this value on outbound connections is subject to +`url-privacy-level' and `url-lastloc-privacy'. This is never set +by the url library, applications are expected to set this +variable in buffers representing a displayed location.") + (mapc 'make-variable-buffer-local '( url-current-object url-current-mime-headers + url-current-lastloc )) (defcustom url-honor-refresh-requests t @@ -117,7 +125,7 @@ url-privacy-level email -- the email address os -- the operating system info emacs -- the version of Emacs -lastloc -- the last location +lastloc -- the last location (see also `url-lastloc-privacy') agent -- do not send the User-Agent string cookies -- never accept HTTP cookies @@ -150,6 +158,24 @@ url-privacy-level (const :tag "No cookies" :value cookie))) :group 'url) +(defcustom url-lastloc-privacy 'domain-match + "Further restrictions on sending the last location. +This value is only consulted if `url-privacy-level' permits +sending last location in the first place. + +Valid values are: +none -- Always send last location. +domain-match -- Send last location if the new location is within the + same domain +host-match -- Send last location if the new location is on the + same host +" + :version "26.1" + :type '(radio (const :tag "Always send" none) + (const :tag "Domains match" domain-match) + (const :tag "Hosts match" host-match)) + :group 'url) + (defvar url-inhibit-uncompression nil "Do not do decompression if non-nil.") (defcustom url-uncompressor-alist '((".z" . "x-gzip") -- 2.11.0