paulo@0: /* paulo@0: * $Id: http_request.c,v 1.25 2005/01/04 15:03:41 mkern Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #include "gt_gnutella.h" paulo@0: #include "gt_version.h" paulo@0: paulo@0: #include "gt_accept.h" paulo@0: #include "gt_web_cache.h" paulo@0: paulo@0: #include "http_request.h" paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: #define MAX_REDIRECTS (5) paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static void decode_chunked_data (int fd, input_id id, TCPC *c); paulo@0: static void read_chunked_header (int fd, input_id id, TCPC *c); paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* paulo@0: * Dummy callbacks paulo@0: */ paulo@0: paulo@0: static void dummy_close (HttpRequest *r, int code) paulo@0: { paulo@0: return; paulo@0: } paulo@0: paulo@0: static BOOL dummy_recv (HttpRequest *r, char *d, size_t l) paulo@0: { paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL dummy_add_header (HttpRequest *r, Dataset **d) paulo@0: { paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL dummy_redirect (HttpRequest *r, const char *h, const char *p) paulo@0: { paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: BOOL gt_http_url_parse (char *value, char **r_host, char **r_path) paulo@0: { paulo@0: char *host_name; paulo@0: paulo@0: if (r_host) paulo@0: *r_host = NULL; paulo@0: if (r_path) paulo@0: *r_path = NULL; paulo@0: paulo@0: string_sep (&value, "http://"); paulo@0: paulo@0: /* divide the url in two parts */ paulo@0: host_name = string_sep (&value, "/"); paulo@0: paulo@0: if (r_host) paulo@0: *r_host = host_name; paulo@0: paulo@0: if (r_path) paulo@0: *r_path = STRING_NOTNULL (value); paulo@0: paulo@0: if (!host_name || host_name[0] == 0) paulo@0: return FALSE; paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static void setup_dummy_functbl (HttpRequest *r) paulo@0: { paulo@0: r->close_req_func = dummy_close; paulo@0: r->recv_func = dummy_recv; paulo@0: r->add_header_func = dummy_add_header; paulo@0: r->redirect_func = dummy_redirect; paulo@0: } paulo@0: paulo@0: HttpRequest *gt_http_request_new (const char *url, const char *request) paulo@0: { paulo@0: HttpRequest *req; paulo@0: char *dup; paulo@0: char *host; paulo@0: char *path; paulo@0: paulo@0: if (!(dup = STRDUP (url))) paulo@0: return NULL; paulo@0: paulo@0: if (!gt_http_url_parse (dup, &host, &path) || paulo@0: !(req = MALLOC (sizeof (HttpRequest)))) paulo@0: { paulo@0: free (dup); paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: req->host = STRDUP (host); paulo@0: req->path = STRDUP (path); paulo@0: req->request = STRDUP (request); paulo@0: req->timeout = 0; paulo@0: req->redirects = 0; paulo@0: req->headers = NULL; paulo@0: paulo@0: /* setup functbl */ paulo@0: setup_dummy_functbl (req); paulo@0: paulo@0: free (dup); paulo@0: paulo@0: return req; paulo@0: } paulo@0: paulo@0: static void gt_http_request_free (HttpRequest *req) paulo@0: { paulo@0: if (!req) paulo@0: return; paulo@0: paulo@0: dataset_clear (req->headers); paulo@0: paulo@0: free (req->host); paulo@0: free (req->path); paulo@0: free (req->request); paulo@0: paulo@0: free (req); paulo@0: } paulo@0: paulo@0: void gt_http_request_close (HttpRequest *req, int error_code) paulo@0: { paulo@0: /* notify the callback */ paulo@0: req->close_req_func (req, error_code); paulo@0: paulo@0: if (req->c) paulo@0: tcp_close (req->c); paulo@0: paulo@0: timer_remove_zero (&req->timeout); paulo@0: paulo@0: gt_http_request_free (req); paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static BOOL request_timeout (HttpRequest *req) paulo@0: { paulo@0: GT->DBGFN (GT, "request to %s timed out", req->host); paulo@0: gt_http_request_close (req, -1); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: void gt_http_request_set_timeout (HttpRequest *req, time_t time) paulo@0: { paulo@0: if (!req) paulo@0: return; paulo@0: paulo@0: if (req->timeout) paulo@0: timer_remove (req->timeout); paulo@0: paulo@0: req->timeout = timer_add (time, (TimerCallback)request_timeout, req); paulo@0: } paulo@0: paulo@0: void gt_http_request_set_proxy (HttpRequest *req, const char *proxy) paulo@0: { paulo@0: free (req->proxy); paulo@0: req->proxy = NULL; paulo@0: paulo@0: if (!proxy) paulo@0: return; paulo@0: paulo@0: req->proxy = STRDUP (proxy); paulo@0: } paulo@0: paulo@0: void gt_http_request_set_conn (HttpRequest *req, TCPC *c) paulo@0: { paulo@0: assert (c->udata == NULL); paulo@0: assert (req->c == NULL); paulo@0: paulo@0: req->c = c; paulo@0: c->udata = req; paulo@0: } paulo@0: paulo@0: void gt_http_request_set_max_len (HttpRequest *req, size_t max_len) paulo@0: { paulo@0: req->max_len = max_len; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static BOOL write_data (HttpRequest *req, char *data, size_t len) paulo@0: { paulo@0: if (!req) paulo@0: return FALSE; paulo@0: paulo@0: req->recvd_len += len; paulo@0: paulo@0: /* check if we overflowed the max length the user wants to receive */ paulo@0: if (req->max_len > 0 && req->recvd_len > req->max_len) paulo@0: { paulo@0: GT->DBGFN (GT, "%s sent %lu bytes overflowing max length of %lu", paulo@0: req->host, req->recvd_len, req->max_len); paulo@0: gt_http_request_close (req, -1); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: /* send the data to the listener */ paulo@0: if (req->recv_func (req, data, len) == FALSE) paulo@0: { paulo@0: gt_http_request_close (req, -1); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static void write_header (ds_data_t *key, ds_data_t *value, String *s) paulo@0: { paulo@0: char *header = key->data; paulo@0: char *field = value->data; paulo@0: paulo@0: string_appendf (s, "%s: %s\r\n", header, field); paulo@0: } paulo@0: paulo@0: static int http_send (TCPC *c, char *command, char *request, paulo@0: Dataset *headers) paulo@0: { paulo@0: String *s; paulo@0: int ret; paulo@0: paulo@0: if (!command || !request) paulo@0: return -1; paulo@0: paulo@0: if (!(s = string_new (NULL, 0, 0, TRUE))) paulo@0: return -1; paulo@0: paulo@0: string_appendf (s, "%s %s HTTP/1.1\r\n", command, request); paulo@0: paulo@0: dataset_foreach (headers, DS_FOREACH(write_header), s); paulo@0: string_append (s, "\r\n"); paulo@0: paulo@0: GT->DBGSOCK (GT, c, " sending:\n%s", s->str); paulo@0: paulo@0: ret = tcp_send (c, s->str, s->len); paulo@0: string_free (s); paulo@0: paulo@0: return ret; paulo@0: } paulo@0: paulo@0: static HttpRequest *get_request (TCPC *c) paulo@0: { paulo@0: return c->udata; paulo@0: } paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static void decode_chunked_data (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: HttpRequest *req; paulo@0: FDBuf *buf; paulo@0: char *data; paulo@0: int data_len = 0; paulo@0: int n; paulo@0: paulo@0: req = get_request (c); paulo@0: paulo@0: if (!req->size) paulo@0: { paulo@0: gt_http_request_close (req, 200); paulo@0: return; paulo@0: } paulo@0: paulo@0: buf = tcp_readbuf (c); paulo@0: paulo@0: if ((n = fdbuf_fill (buf, req->size)) < 0) paulo@0: { paulo@0: GT->DBGFN (GT, "error on host %s: %s", req->host, GIFT_NETERROR ()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (gt_fdbuf_full (buf)) paulo@0: { paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (n > 0) paulo@0: return; paulo@0: paulo@0: data = fdbuf_data (buf, &data_len); paulo@0: fdbuf_release (buf); paulo@0: paulo@0: if (!write_data (req, data, data_len)) paulo@0: return; paulo@0: paulo@0: input_remove (id); paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)read_chunked_header, TIMEOUT_DEF); paulo@0: } paulo@0: paulo@0: static void read_chunked_header (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: HttpRequest *req; paulo@0: FDBuf *buf; paulo@0: char *response; paulo@0: int n; paulo@0: paulo@0: req = get_request (c); paulo@0: buf = tcp_readbuf (c); paulo@0: paulo@0: if ((n = fdbuf_delim (buf, "\n")) < 0) paulo@0: { paulo@0: GT->DBGFN (GT, "error on %s: %s", req->host, GIFT_NETERROR ()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (gt_fdbuf_full (buf)) paulo@0: { paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (n > 0) paulo@0: return; paulo@0: paulo@0: response = fdbuf_data (buf, NULL); paulo@0: fdbuf_release (buf); paulo@0: paulo@0: /* read the chunk size, its a hexadecimal integer */ paulo@0: req->size = strtoul (response, NULL, 16); paulo@0: GT->DBGFN (GT, "server sent chunk size of %lu", req->size); paulo@0: paulo@0: if (req->size == ULONG_MAX) paulo@0: { paulo@0: GT->DBGFN (GT, "overflow reading chunk size: %s", GIFT_STRERROR ()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (req->size == 0) paulo@0: { paulo@0: /* ok, done */ paulo@0: if (!write_data (req, NULL, 0)) paulo@0: return; paulo@0: paulo@0: /* there could be a CRLF at the end. should we read it? paulo@0: * To avoid screwing up persistent http, yes.. */ paulo@0: gt_http_request_close (req, 200); paulo@0: return; paulo@0: } paulo@0: paulo@0: input_remove (id); paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)decode_chunked_data, TIMEOUT_DEF); paulo@0: } paulo@0: paulo@0: /* read the amount of data specified by Content-Length: */ paulo@0: static void read_file (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: HttpRequest *req; paulo@0: FDBuf *buf; paulo@0: int n; paulo@0: size_t len; paulo@0: unsigned char *data; paulo@0: paulo@0: req = get_request (c); paulo@0: paulo@0: if (!req->size) paulo@0: { paulo@0: gt_http_request_close (req, 200); paulo@0: return; paulo@0: } paulo@0: paulo@0: buf = tcp_readbuf (c); paulo@0: paulo@0: if ((n = fdbuf_fill (buf, req->size)) < 0) paulo@0: { paulo@0: GT->DBGFN (GT, "error from %s: %s", req->host, GIFT_NETERROR ()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (n > 0) paulo@0: return; paulo@0: paulo@0: data = fdbuf_data (buf, &len); paulo@0: fdbuf_release (buf); paulo@0: paulo@0: if (!write_data (req, data, len)) paulo@0: return; paulo@0: paulo@0: /* paulo@0: * We've read all the data, the total length of the request being provided paulo@0: * by fdbuf_fill(). Now send the closing notification to our callback. paulo@0: */ paulo@0: if (!write_data (req, NULL, 0)) paulo@0: return; paulo@0: paulo@0: /* success */ paulo@0: gt_http_request_close (req, 200); paulo@0: } paulo@0: paulo@0: /* callback to read when no Content-Length: header is provided */ paulo@0: static void read_until_eof (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: char data[RW_BUFFER]; paulo@0: int n; paulo@0: HttpRequest *req; paulo@0: paulo@0: req = get_request (c); paulo@0: paulo@0: if ((n = tcp_recv (c, data, sizeof (data) - 1)) < 0) paulo@0: { paulo@0: GT->DBGFN (GT, "error from %s: %s", req->host, GIFT_NETERROR()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: /* terminate the buffer */ paulo@0: data[n] = 0; paulo@0: paulo@0: if (n == 0) paulo@0: { paulo@0: /* signal to the listener that EOF was reached */ paulo@0: if (!write_data (req, NULL, 0)) paulo@0: return; paulo@0: paulo@0: gt_http_request_close (req, 200); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (!write_data (req, data, n)) paulo@0: return; paulo@0: } paulo@0: paulo@0: static void reset_request (HttpRequest *req, const char *host, paulo@0: const char *path) paulo@0: { paulo@0: free (req->host); paulo@0: free (req->path); paulo@0: req->host = STRDUP (host); paulo@0: req->path = STRDUP (path); paulo@0: paulo@0: dataset_clear (req->headers); paulo@0: req->headers = NULL; paulo@0: } paulo@0: paulo@0: /* paulo@0: * This will do a limited redirect on the same connection. paulo@0: * One bug is it doesn't care if the Location header posts a different port, paulo@0: */ paulo@0: static void handle_redirect (HttpRequest *req, int code) paulo@0: { paulo@0: char *new_host; paulo@0: char *new_path; paulo@0: char *location; paulo@0: paulo@0: /* make sure the Location: header points to the same host */ paulo@0: location = dataset_lookupstr (req->headers, "location"); paulo@0: paulo@0: /* butchers Location header, but it will be freed soon anyway */ paulo@0: if (!location || paulo@0: !gt_http_url_parse (location, &new_host, &new_path)) paulo@0: { paulo@0: gt_http_request_close (req, code); paulo@0: return; paulo@0: } paulo@0: paulo@0: assert (new_host != NULL); paulo@0: paulo@0: if (++req->redirects >= MAX_REDIRECTS) paulo@0: { paulo@0: GT->DBGSOCK (GT, req->c, "Too many redirects"); paulo@0: gt_http_request_close (req, code); paulo@0: return; paulo@0: } paulo@0: paulo@0: /* paulo@0: * Let the caller know we're redirecting so it can reset it's ancilliary paulo@0: * data. paulo@0: */ paulo@0: if (req->redirect_func (req, new_host, new_path) == FALSE) paulo@0: { paulo@0: gt_http_request_close (req, code); paulo@0: return; paulo@0: } paulo@0: paulo@0: /* setup the new request */ paulo@0: reset_request (req, new_host, new_path); paulo@0: paulo@0: /* restart the request */ paulo@0: input_remove_all (req->c->fd); paulo@0: input_add (req->c->fd, req->c, INPUT_WRITE, paulo@0: (InputCallback)gt_http_request_handle, TIMEOUT_DEF); paulo@0: } paulo@0: paulo@0: static BOOL parse_server_response (char *reply, HttpRequest *req) paulo@0: { paulo@0: char *response; paulo@0: int code; /* 200, 404, ... */ paulo@0: paulo@0: response = string_sep (&reply, "\r\n"); paulo@0: paulo@0: if (!response) paulo@0: return FALSE; paulo@0: paulo@0: /* */ string_sep (&response, " "); /* shift past HTTP/1.1 */ paulo@0: code = ATOI (string_sep (&response, " ")); /* shift past 200 */ paulo@0: paulo@0: /* parse the headers */ paulo@0: gt_http_header_parse (reply, &req->headers); paulo@0: paulo@0: if (code >= 200 && code <= 299) paulo@0: return TRUE; paulo@0: paulo@0: /* redirection */ paulo@0: if (code >= 300 && code <= 399) paulo@0: { paulo@0: handle_redirect (req, code); paulo@0: return FALSE; /* stop this request */ paulo@0: } paulo@0: paulo@0: /* request error: could blacklist the server in recv_callback */ paulo@0: GT->DBGFN (GT, "error parsing response from %s, closing", req->host); paulo@0: gt_http_request_close (req, code); paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: static void read_headers (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: HttpRequest *req; paulo@0: FDBuf *buf; paulo@0: char *response; paulo@0: size_t response_len = 0; paulo@0: char *encoding; paulo@0: char *len_str; paulo@0: int n; paulo@0: paulo@0: req = get_request (c); paulo@0: buf = tcp_readbuf (c); paulo@0: paulo@0: if ((n = fdbuf_delim (buf, "\n")) < 0) paulo@0: { paulo@0: GT->DBGFN (GT, "error reading from %s: %s", net_peer_ip (c->fd), paulo@0: GIFT_NETERROR ()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (gt_fdbuf_full (buf)) paulo@0: { paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (n > 0) paulo@0: return; paulo@0: paulo@0: response = fdbuf_data (buf, &response_len); paulo@0: paulo@0: if (response_len >= req->max_len) paulo@0: { paulo@0: GT->DBGFN (GT, "headers too large(%lu)", (long)response_len); paulo@0: gt_http_request_close (req, -1); paulo@0: } paulo@0: paulo@0: if (!gt_http_header_terminated (response, response_len)) paulo@0: return; paulo@0: paulo@0: fdbuf_release (buf); paulo@0: GT->DBGFN (GT, "response=\n%s", response); paulo@0: paulo@0: if (!parse_server_response (response, req)) paulo@0: return; paulo@0: paulo@0: input_remove (id); paulo@0: paulo@0: encoding = dataset_lookupstr (req->headers, "transfer-encoding"); paulo@0: paulo@0: if (encoding && !strcasecmp (encoding, "chunked")) paulo@0: { paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)read_chunked_header, TIMEOUT_DEF); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (!(len_str = dataset_lookupstr (req->headers, "content-length"))) paulo@0: { paulo@0: GT->warn (GT, "no Content-Length header from %s", req->host); paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)read_until_eof, TIMEOUT_DEF); paulo@0: return; paulo@0: } paulo@0: paulo@0: req->size = ATOUL (len_str); paulo@0: paulo@0: if (req->max_len > 0 && req->size >= req->max_len) paulo@0: { paulo@0: GT->DBGFN (GT, "bad size (%s) in content length field for %s", paulo@0: len_str, req->host); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)read_file, TIMEOUT_DEF); paulo@0: } paulo@0: paulo@0: /* paulo@0: * Determine the part after the GET. If proxied, this need to be a complete paulo@0: * URL, and otherwise should be a simple path. paulo@0: */ paulo@0: static void append_request_line (String *s, HttpRequest *req) paulo@0: { paulo@0: if (req->proxy) paulo@0: string_appendf (s, "http://%s", req->host); paulo@0: paulo@0: string_appendf (s, "/%s", STRING_NOTNULL(req->path)); paulo@0: } paulo@0: paulo@0: static int send_request (HttpRequest *req) paulo@0: { paulo@0: Dataset *headers = NULL; paulo@0: String *s; paulo@0: int ret; paulo@0: paulo@0: if (!(s = string_new (NULL, 0, 0, TRUE))) paulo@0: return -1; paulo@0: paulo@0: append_request_line (s, req); paulo@0: paulo@0: if (!string_isempty (req->request)) paulo@0: string_appendf (s, "?%s", req->request); paulo@0: paulo@0: dataset_insertstr (&headers, "Host", req->host); /* required by HTTP/1.1 */ paulo@0: dataset_insertstr (&headers, "User-Agent", gt_version ()); paulo@0: paulo@0: if (req->add_header_func (req, &headers) == FALSE) paulo@0: { paulo@0: /* Hmm, this is our error, what should the error code be */ paulo@0: gt_http_request_close (req, -1); paulo@0: dataset_clear (headers); paulo@0: string_free (s); paulo@0: return -1; paulo@0: } paulo@0: paulo@0: ret = http_send (req->c, "GET", s->str, headers); paulo@0: paulo@0: dataset_clear (headers); paulo@0: string_free (s); paulo@0: paulo@0: return ret; paulo@0: } paulo@0: paulo@0: void gt_http_request_handle (int fd, input_id id, TCPC *c) paulo@0: { paulo@0: HttpRequest *req; paulo@0: paulo@0: req = get_request (c); paulo@0: paulo@0: if (send_request (req) <= 0) paulo@0: { paulo@0: GT->DBGFN (GT, "send failed: %s", GIFT_NETERROR()); paulo@0: gt_http_request_close (req, -1); paulo@0: return; paulo@0: } paulo@0: paulo@0: input_remove (id); paulo@0: input_add (fd, c, INPUT_READ, paulo@0: (InputCallback)read_headers, TIMEOUT_DEF); paulo@0: }