bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Bug-wget] [PATCH] Disable automatic wget headers.


From: Tim Rühsen
Subject: Re: [Bug-wget] [PATCH] Disable automatic wget headers.
Date: Sat, 27 Apr 2019 19:26:05 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.6.1

Hi,

thanks for your contribution !

On a first (quick) glimpse, it looks pretty good to me.

There are just a few things you could improve:

- please use the same coding style as the existing code, e.g.
space after function names, space after 'if' and 'for', space after a
cast ().

+          if (!disabled_header("Authorisation") && auth_err == RETROK)
typo: "Authorization"

+  for (p = s; *p && *p != '\0' && !c_isspace (*p); p++)
checking "*p != '\0'" is redunndant, it is the same as *p

- please add documentation in doc/wget.texi

- please add one or more tests in testenv/

- please add "disable-header=" into fuzz/wget_options_fuzzer.dict plus
the used header names (fixed strings like "Authorization"). This is for
helping the fuzzer to generate input that covers the new code.

Did you think about corner cases, e.g. what happens when a user puts
disableheader=referer into .wgetrc and later wants to override it on the
command line via --header="Referer=..." ?

Regards, Tim


On 27.04.19 18:34, adham elkarn wrote:
> From: sulfastor <address@hidden>
> 
> Hello,
> We've worked on this features (bug #54769 
> (https://savannah.gnu.org/bugs/?54769)) taking in account the comments made 
> by Darshit some time ago. We would like you to review our changes and code. 
> We thank you for your helping us improving our programming skills.
> 
> * src/http.c: removed disabled headers before its creation
> * src/init.c: added new functions to check user disabled headers, disable 
> headers
> * src/main.c: added new option disable-header, added help description
> * src/options.h: added new option disable-header
> 
> From bug #54769 (https://savannah.gnu.org/bugs/?54769).
> Some servers doesn't handle well some headers. A --disable-header option will 
> ensure a request header
> will not be included on the request. In addition a empty header value in 
> --header="headername: " will also disable
> the request header.
> 
> Signed-off-by: sulfastor <address@hidden>, adham elkarn <address@hidden>
> ---
>  src/http.c    | 137 ++++++++++++++++++++++++++++++++++----------------
>  src/init.c    |  43 ++++++++++++++++
>  src/main.c    |   4 ++
>  src/options.h |   1 +
>  4 files changed, 142 insertions(+), 43 deletions(-)
> 
> diff --git a/src/http.c b/src/http.c
> index 289d1101..79cd2168 100644
> --- a/src/http.c
> +++ b/src/http.c
> @@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, 
> const char *);
>  static bool known_authentication_scheme_p (const char *, const char *);
>  static void ensure_extension (struct http_stat *, const char *, int *);
>  static void load_cookies (void);
> +static bool disabled_header(char*);
>  
>  static bool cookies_loaded_p;
>  static struct cookie_jar *wget_cookie_jar;
> @@ -152,6 +153,8 @@ struct request {
>    int hcount, hcapacity;
>  };
>  
> +/* Forward decls. */
> +static bool request_remove_header (struct request*, const char*);
>  
>  /* Create a new, empty request. Set the request's method and its
>     arguments.  METHOD should be a literal string (or it should outlive
> @@ -245,6 +248,13 @@ request_set_header (struct request *req, const char 
> *name, const char *value,
>        return;
>      }
>  
> +  /* A empty value is a disabled header; so remove it from the request */
> +  if (!*value)
> +    {
> +      request_remove_header(req, name);
> +      return;
> +    }
> +
>    for (i = 0; i < req->hcount; i++)
>      {
>        hdr = &req->headers[i];
> @@ -436,7 +446,7 @@ maybe_send_basic_creds (const char *hostname, const char 
> *user,
>        DEBUGP (("Host %s has not issued a general basic challenge.\n",
>                quote (hostname)));
>      }
> -  if (do_challenge)
> +  if (!disabled_header("Authorization") && do_challenge)
>      {
>        request_set_header (req, "Authorization",
>                            basic_authentication_encode (user, passwd),
> @@ -1770,23 +1780,29 @@ read_response_body (struct http_stat *hs, int sock, 
> FILE *fp, wgint contlen,
>  
>  #ifdef __VMS
>  #define SET_USER_AGENT(req) do {                                         \
> -  if (!opt.useragent)                                                    \
> -    request_set_header (req, "User-Agent",                               \
> -                        aprintf ("Wget/%s (VMS %s %s)",                  \
> -                        version_string, vms_arch(), vms_vers()),         \
> -                        rel_value);                                      \
> -  else if (*opt.useragent)                                               \
> -    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
> +  if(!disabled_header("User-Agent"))                                  \
> +    {                                                                    \
> +      if (!opt.useragent)                                             \
> +     request_set_header (req, "User-Agent",                           \
> +                         aprintf ("Wget/%s (VMS %s %s)",              \
> +                         version_string, vms_arch(), vms_vers()),     \
> +                         rel_value);                                  \
> +      else if (*opt.useragent)                                           \
> +     request_set_header (req, "User-Agent", opt.useragent, rel_none); \
> +    }                                                                        
>  \
>  } while (0)
>  #else /* def __VMS */
>  #define SET_USER_AGENT(req) do {                                         \
> -  if (!opt.useragent)                                                    \
> -    request_set_header (req, "User-Agent",                               \
> -                        aprintf ("Wget/%s (%s)",                         \
> -                        version_string, OS_TYPE),                        \
> -                        rel_value);                                      \
> -  else if (*opt.useragent)                                               \
> -    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
> +  if(!disabled_header("User-Agent"))                                     \
> +    {                                                                    \
> +      if (!opt.useragent)                                                \
> +     request_set_header (req, "User-Agent",                           \
> +                         aprintf ("Wget/%s (%s)",                     \
> +                         version_string, OS_TYPE),                    \
> +                         rel_value);                                  \
> +      else if (*opt.useragent)                                           \
> +     request_set_header (req, "User-Agent", opt.useragent, rel_none); \
> +    }                                                                    \
>  } while (0)
>  #endif /* def __VMS [else] */
>  
> @@ -1842,6 +1858,24 @@ time_to_rfc1123 (time_t time, char *buf, size_t 
> bufsize)
>    return RETROK;
>  }
>  
> +static bool
> +disabled_header (char* header_name)
> +{
> +  char** p = opt.disabled_headers;  
> +  char *s;
> +  
> +  if(!p)
> +    return 0;
> +
> +  for(;*p != NULL;++p) {
> +    s = strchrnul(header_name, ':');
> +    if (!strncmp(header_name, *p, (size_t)(s - header_name)))
> +      return 1;
> +  }
> +  
> +  return 0;
> +}
> +
>  static struct request *
>  initialize_request (const struct url *u, struct http_stat *hs, int *dt, 
> struct url *proxy,
>                      bool inhibit_keep_alive, bool *basic_auth_finished,
> @@ -1874,15 +1908,17 @@ initialize_request (const struct url *u, struct 
> http_stat *hs, int *dt, struct u
>        meth_arg = url_full_path (u);
>      req = request_new (meth, meth_arg);
>    }
> -
> -  request_set_header (req, "Referer", (char *) hs->referer, rel_none);
> +  if(!disabled_header("Referer"))
> +    request_set_header (req, "Referer", (char *) hs->referer, rel_none);
>    if (*dt & SEND_NOCACHE)
>      {
>        /* Cache-Control MUST be obeyed by all HTTP/1.1 caching mechanisms...  
> */
> -      request_set_header (req, "Cache-Control", "no-cache", rel_none);
> +      if(!disabled_header("Cache-Control"))
> +     request_set_header (req, "Cache-Control", "no-cache", rel_none);
>  
>        /* ... but some HTTP/1.0 caches doesn't implement Cache-Control.  */
> -      request_set_header (req, "Pragma", "no-cache", rel_none);
> +      if(!disabled_header("Pragma"))
> +     request_set_header (req, "Pragma", "no-cache", rel_none);
>      }
>    if (*dt & IF_MODIFIED_SINCE)
>      {
> @@ -1896,21 +1932,27 @@ initialize_request (const struct url *u, struct 
> http_stat *hs, int *dt, struct u
>                                    "time.\n"));
>            strcpy (strtime, "Thu, 01 Jan 1970 00:00:00 GMT");
>          }
> -      request_set_header (req, "If-Modified-Since", xstrdup (strtime), 
> rel_value);
> +      if(!disabled_header("If-Modified-Since"))
> +     request_set_header (req, "If-Modified-Since", xstrdup (strtime), 
> rel_value);
>      }
> -  if (hs->restval)
> +  if (!disabled_header("Range") && hs->restval)
>      request_set_header (req, "Range",
>                          aprintf ("bytes=%s-",
>                                   number_to_static_string (hs->restval)),
>                          rel_value);
> +
>    SET_USER_AGENT (req);
> -  request_set_header (req, "Accept", "*/*", rel_none);
> +
> +  if(!disabled_header("Accept"))    
> +    request_set_header (req, "Accept", "*/*", rel_none);
> +  
>  #ifdef HAVE_LIBZ
> -  if (opt.compression != compression_none)
> +  if (!disabled_header("Accept-Encoding") && opt.compression != 
> compression_none)
>      request_set_header (req, "Accept-Encoding", "gzip", rel_none);
>    else
>  #endif
> -    request_set_header (req, "Accept-Encoding", "identity", rel_none);
> +    if(!disabled_header("Accept-Encoding"))
> +      request_set_header (req, "Accept-Encoding", "identity", rel_none);
>  
>    /* Find the username with priority */
>    if (u->user)
> @@ -1966,17 +2008,19 @@ initialize_request (const struct url *u, struct 
> http_stat *hs, int *dt, struct u
>      };
>      int add_port = u->port != scheme_default_port (u->scheme);
>      int add_squares = strchr (u->host, ':') != NULL;
> -    request_set_header (req, "Host",
> -                        aprintf (hfmt[add_port][add_squares], u->host, 
> u->port),
> -                        rel_value);
> +    if(!disabled_header("Host"))
> +      request_set_header (req, "Host",
> +                       aprintf (hfmt[add_port][add_squares], u->host, 
> u->port),
> +                       rel_value);
>    }
>  
> -  if (inhibit_keep_alive)
> +  if (!disabled_header("Connection") && inhibit_keep_alive)
>      request_set_header (req, "Connection", "Close", rel_none);
>    else
>      {
> -      request_set_header (req, "Connection", "Keep-Alive", rel_none);
> -      if (proxy)
> +      if (!disabled_header("Connection"))
> +     request_set_header (req, "Connection", "Keep-Alive", rel_none);
> +      if (!disabled_header("Proxy-Connection") && proxy)
>          request_set_header (req, "Proxy-Connection", "Keep-Alive", rel_none);
>      }
>  
> @@ -1985,8 +2029,9 @@ initialize_request (const struct url *u, struct 
> http_stat *hs, int *dt, struct u
>  
>        if (opt.body_data || opt.body_file)
>          {
> -          request_set_header (req, "Content-Type",
> -                              "application/x-www-form-urlencoded", rel_none);
> +       if (!disabled_header("Content-Type"))
> +         request_set_header (req, "Content-Type",
> +                             "application/x-www-form-urlencoded", rel_none);
>  
>            if (opt.body_data)
>              *body_data_size = strlen (opt.body_data);
> @@ -2002,11 +2047,13 @@ initialize_request (const struct url *u, struct 
> http_stat *hs, int *dt, struct u
>                    return NULL;
>                  }
>              }
> -          request_set_header (req, "Content-Length",
> -                              xstrdup (number_to_static_string 
> (*body_data_size)),
> -                              rel_value);
> +       if (!disabled_header("Content-Length"))
> +         request_set_header (req, "Content-Length",
> +                             xstrdup (number_to_static_string 
> (*body_data_size)),
> +                             rel_value);
>          }
> -      else if (c_strcasecmp (opt.method, "post") == 0
> +      else if (!disabled_header("Content-Length")
> +            && c_strcasecmp (opt.method, "post") == 0
>                 || c_strcasecmp (opt.method, "put") == 0
>                 || c_strcasecmp (opt.method, "patch") == 0)
>          request_set_header (req, "Content-Length", "0", rel_none);
> @@ -2043,7 +2090,8 @@ initialize_proxy_configuration (const struct url *u, 
> struct request *req,
>  #ifdef HAVE_SSL
>    if (u->scheme != SCHEME_HTTPS)
>  #endif
> -    request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
> +    if (!disabled_header("Proxy-Authorization"))
> +      request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value);
>  }
>  
>  static uerr_t
> @@ -2133,8 +2181,9 @@ establish_connection (const struct url *u, const struct 
> url **conn_ref,
>               CONNECT method to request passthrough.  */
>            struct request *connreq = request_new ("CONNECT",
>                                aprintf ("%s:%d", u->host, u->port));
> +
>            SET_USER_AGENT (connreq);
> -          if (proxyauth)
> +          if (!disabled_header("Proxy-Authorization") && proxyauth)
>              {
>                request_set_header (connreq, "Proxy-Authorization",
>                                    *proxyauth, rel_value);
> @@ -2143,9 +2192,10 @@ establish_connection (const struct url *u, const 
> struct url **conn_ref,
>                   the regular request below.  */
>                *proxyauth = NULL;
>              }
> -          request_set_header (connreq, "Host",
> -                              aprintf ("%s:%d", u->host, u->port),
> -                              rel_value);
> +       if (!disabled_header("Host"))
> +         request_set_header (connreq, "Host",
> +                             aprintf ("%s:%d", u->host, u->port),
> +                             rel_value);
>  
>            write_error = request_send (connreq, sock, 0);
>            request_free (&connreq);
> @@ -2456,7 +2506,7 @@ check_auth (const struct url *u, char *user, char 
> *passwd, struct response *resp
>            auth_err = *auth_stat;
>            xfree (auth_stat);
>            xfree (pth);
> -          if (auth_err == RETROK)
> +          if (!disabled_header("Authorisation") && auth_err == RETROK)
>              {
>                request_set_header (req, "Authorization", value, rel_value);
>  
> @@ -3264,7 +3314,8 @@ gethttp (const struct url *u, struct url *original_url, 
> struct http_stat *hs,
>      {
>        int i;
>        for (i = 0; opt.user_headers[i]; i++)
> -        request_set_user_header (req, opt.user_headers[i]);
> +     if (!disabled_header(opt.user_headers[i]))
> +       request_set_user_header (req, opt.user_headers[i]);
>      }
>  
>    proxyauth = NULL;
> diff --git a/src/init.c b/src/init.c
> index 9b6665a6..a2b6f311 100644
> --- a/src/init.c
> +++ b/src/init.c
> @@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression);
>  #endif
>  CMD_DECLARE (cmd_spec_dirstruct);
>  CMD_DECLARE (cmd_spec_header);
> +CMD_DECLARE (cmd_dis_header);
>  CMD_DECLARE (cmd_spec_warc_header);
>  CMD_DECLARE (cmd_spec_htmlify);
>  CMD_DECLARE (cmd_spec_mirror);
> @@ -183,6 +184,7 @@ static const struct {
>    { "deleteafter",      &opt.delete_after,      cmd_boolean },
>    { "dirprefix",        &opt.dir_prefix,        cmd_directory },
>    { "dirstruct",        NULL,                   cmd_spec_dirstruct },
> +  { "disableheader",    NULL,                   cmd_dis_header},
>    { "dnscache",         &opt.dns_cache,         cmd_boolean },
>  #ifdef HAVE_LIBCARES
>    { "dnsservers",       &opt.dns_servers,       cmd_string },
> @@ -398,6 +400,7 @@ defaults (void)
>    opt.metalink_index = -1;
>  #endif
>  
> +  opt.disabled_headers = NULL;
>    opt.cookies = true;
>    opt.verbose = -1;
>    opt.ntry = 20;
> @@ -1459,6 +1462,7 @@ cmd_cert_type (const char *com, const char *val, void 
> *place)
>     options specially.  */
>  
>  static bool check_user_specified_header (const char *);
> +static bool check_user_disabled_header (const char *);
>  
>  #ifdef HAVE_LIBZ
>  static bool
> @@ -1493,6 +1497,27 @@ cmd_spec_dirstruct (const char *com, const char *val, 
> void *place_ignored _GL_UN
>    return true;
>  }
>  
> +static bool
> +cmd_dis_header (const char *com, const char *val, void *place_ignored 
> _GL_UNUSED)
> +{
> +  /* Empty value means reset the list of headers. */
> +  if (*val == '\0')
> +    {
> +      free_vec (opt.disabled_headers);
> +      opt.disabled_headers = NULL;
> +      return true;
> +    }
> +
> +  if (!check_user_disabled_header (val))
> +    {
> +      fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
> +               exec_name, com, quote (val));
> +      return false;
> +    }
> +  opt.disabled_headers = vec_append (opt.disabled_headers, val);
> +  return true;
> +}
> +
>  static bool
>  cmd_spec_header (const char *com, const char *val, void *place_ignored 
> _GL_UNUSED)
>  {
> @@ -1850,6 +1875,23 @@ simple_atof (const char *beg, const char *end, double 
> *dest)
>     contain a colon preceded by non-white-space characters and must not
>     contain newlines.  */
>  
> +
> +static bool
> +check_user_disabled_header (const char* s)
> +{
> +  const char *p;
> +
> +  for (p = s; *p && *p != '\0' && !c_isspace (*p); p++)
> +    ;
> +
> +  if (p == s)
> +    return false;
> +  /* The header MUST NOT contain newlines.  */
> +  if (strchr (s, '\n'))
> +    return false;
> +  return true;
> +}
> +
>  static bool
>  check_user_specified_header (const char *s)
>  {
> @@ -1977,6 +2019,7 @@ cleanup (void)
>    xfree (opt.http_passwd);
>    xfree (opt.dot_style);
>    free_vec (opt.user_headers);
> +  free_vec (opt.disabled_headers);
>    free_vec (opt.warc_user_headers);
>  # ifdef HAVE_SSL
>    xfree (opt.cert_file);
> diff --git a/src/main.c b/src/main.c
> index 65b7f3f3..a73b39d9 100644
> --- a/src/main.c
> +++ b/src/main.c
> @@ -304,6 +304,7 @@ static struct cmdline_option option_data[] =
>      { "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
>      { "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
>      { "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
> +    { "disable-header", 0, OPT_VALUE, "disableheader", -1 },
>      { "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
>  #ifdef HAVE_LIBCARES
>      { "dns-servers", 0, OPT_VALUE, "dnsservers", -1 },
> @@ -544,6 +545,7 @@ init_switches (void)
>               identical to "--foo", except it has opposite meaning and
>               it doesn't allow an argument.  */
>            longopt = &long_options[o++];
> +
>            longopt->name = no_prefix (cmdopt->long_name);
>            longopt->has_arg = no_argument;
>            /* Mask the value so we'll be able to recognize that we're
> @@ -792,6 +794,8 @@ HTTP options:\n"),
>         --ignore-length             ignore 'Content-Length' header field\n"),
>      N_("\
>         --header=STRING             insert STRING among the headers\n"),
> +    N_("\
> +       --disable-header=STRING     disable STRING among the headers\n"),
>  #ifdef HAVE_LIBZ
>      N_("\
>         --compression=TYPE          choose compression, one of auto, gzip and 
> none. (default: none)\n"),
> diff --git a/src/options.h b/src/options.h
> index 881e2b2e..5559c6fa 100644
> --- a/src/options.h
> +++ b/src/options.h
> @@ -147,6 +147,7 @@ struct options
>    char *http_user;              /* HTTP username. */
>    char *http_passwd;            /* HTTP password. */
>    char **user_headers;          /* User-defined header(s). */
> +  char **disabled_headers;       /* User-disabled header(s) */
>    bool http_keep_alive;         /* whether we use keep-alive */
>  
>    bool use_proxy;               /* Do we use proxy? */
> 

Attachment: signature.asc
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]