view src/http_request.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line source
1 /*
2 * $Id: http_request.c,v 1.25 2005/01/04 15:03:41 mkern Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
17 #include "gt_gnutella.h"
18 #include "gt_version.h"
20 #include "gt_accept.h"
21 #include "gt_web_cache.h"
23 #include "http_request.h"
25 /*****************************************************************************/
27 #define MAX_REDIRECTS (5)
29 /*****************************************************************************/
31 static void decode_chunked_data (int fd, input_id id, TCPC *c);
32 static void read_chunked_header (int fd, input_id id, TCPC *c);
34 /*****************************************************************************/
36 /*
37 * Dummy callbacks
38 */
40 static void dummy_close (HttpRequest *r, int code)
41 {
42 return;
43 }
45 static BOOL dummy_recv (HttpRequest *r, char *d, size_t l)
46 {
47 return TRUE;
48 }
50 static BOOL dummy_add_header (HttpRequest *r, Dataset **d)
51 {
52 return TRUE;
53 }
55 static BOOL dummy_redirect (HttpRequest *r, const char *h, const char *p)
56 {
57 return TRUE;
58 }
60 /*****************************************************************************/
62 BOOL gt_http_url_parse (char *value, char **r_host, char **r_path)
63 {
64 char *host_name;
66 if (r_host)
67 *r_host = NULL;
68 if (r_path)
69 *r_path = NULL;
71 string_sep (&value, "http://");
73 /* divide the url in two parts */
74 host_name = string_sep (&value, "/");
76 if (r_host)
77 *r_host = host_name;
79 if (r_path)
80 *r_path = STRING_NOTNULL (value);
82 if (!host_name || host_name[0] == 0)
83 return FALSE;
85 return TRUE;
86 }
88 static void setup_dummy_functbl (HttpRequest *r)
89 {
90 r->close_req_func = dummy_close;
91 r->recv_func = dummy_recv;
92 r->add_header_func = dummy_add_header;
93 r->redirect_func = dummy_redirect;
94 }
96 HttpRequest *gt_http_request_new (const char *url, const char *request)
97 {
98 HttpRequest *req;
99 char *dup;
100 char *host;
101 char *path;
103 if (!(dup = STRDUP (url)))
104 return NULL;
106 if (!gt_http_url_parse (dup, &host, &path) ||
107 !(req = MALLOC (sizeof (HttpRequest))))
108 {
109 free (dup);
110 return NULL;
111 }
113 req->host = STRDUP (host);
114 req->path = STRDUP (path);
115 req->request = STRDUP (request);
116 req->timeout = 0;
117 req->redirects = 0;
118 req->headers = NULL;
120 /* setup functbl */
121 setup_dummy_functbl (req);
123 free (dup);
125 return req;
126 }
128 static void gt_http_request_free (HttpRequest *req)
129 {
130 if (!req)
131 return;
133 dataset_clear (req->headers);
135 free (req->host);
136 free (req->path);
137 free (req->request);
139 free (req);
140 }
142 void gt_http_request_close (HttpRequest *req, int error_code)
143 {
144 /* notify the callback */
145 req->close_req_func (req, error_code);
147 if (req->c)
148 tcp_close (req->c);
150 timer_remove_zero (&req->timeout);
152 gt_http_request_free (req);
153 }
155 /*****************************************************************************/
157 static BOOL request_timeout (HttpRequest *req)
158 {
159 GT->DBGFN (GT, "request to %s timed out", req->host);
160 gt_http_request_close (req, -1);
161 return FALSE;
162 }
164 void gt_http_request_set_timeout (HttpRequest *req, time_t time)
165 {
166 if (!req)
167 return;
169 if (req->timeout)
170 timer_remove (req->timeout);
172 req->timeout = timer_add (time, (TimerCallback)request_timeout, req);
173 }
175 void gt_http_request_set_proxy (HttpRequest *req, const char *proxy)
176 {
177 free (req->proxy);
178 req->proxy = NULL;
180 if (!proxy)
181 return;
183 req->proxy = STRDUP (proxy);
184 }
186 void gt_http_request_set_conn (HttpRequest *req, TCPC *c)
187 {
188 assert (c->udata == NULL);
189 assert (req->c == NULL);
191 req->c = c;
192 c->udata = req;
193 }
195 void gt_http_request_set_max_len (HttpRequest *req, size_t max_len)
196 {
197 req->max_len = max_len;
198 }
200 /*****************************************************************************/
202 static BOOL write_data (HttpRequest *req, char *data, size_t len)
203 {
204 if (!req)
205 return FALSE;
207 req->recvd_len += len;
209 /* check if we overflowed the max length the user wants to receive */
210 if (req->max_len > 0 && req->recvd_len > req->max_len)
211 {
212 GT->DBGFN (GT, "%s sent %lu bytes overflowing max length of %lu",
213 req->host, req->recvd_len, req->max_len);
214 gt_http_request_close (req, -1);
215 return FALSE;
216 }
218 /* send the data to the listener */
219 if (req->recv_func (req, data, len) == FALSE)
220 {
221 gt_http_request_close (req, -1);
222 return FALSE;
223 }
225 return TRUE;
226 }
228 /*****************************************************************************/
230 static void write_header (ds_data_t *key, ds_data_t *value, String *s)
231 {
232 char *header = key->data;
233 char *field = value->data;
235 string_appendf (s, "%s: %s\r\n", header, field);
236 }
238 static int http_send (TCPC *c, char *command, char *request,
239 Dataset *headers)
240 {
241 String *s;
242 int ret;
244 if (!command || !request)
245 return -1;
247 if (!(s = string_new (NULL, 0, 0, TRUE)))
248 return -1;
250 string_appendf (s, "%s %s HTTP/1.1\r\n", command, request);
252 dataset_foreach (headers, DS_FOREACH(write_header), s);
253 string_append (s, "\r\n");
255 GT->DBGSOCK (GT, c, "<http_request.c> sending:\n%s", s->str);
257 ret = tcp_send (c, s->str, s->len);
258 string_free (s);
260 return ret;
261 }
263 static HttpRequest *get_request (TCPC *c)
264 {
265 return c->udata;
266 }
268 /*****************************************************************************/
270 static void decode_chunked_data (int fd, input_id id, TCPC *c)
271 {
272 HttpRequest *req;
273 FDBuf *buf;
274 char *data;
275 int data_len = 0;
276 int n;
278 req = get_request (c);
280 if (!req->size)
281 {
282 gt_http_request_close (req, 200);
283 return;
284 }
286 buf = tcp_readbuf (c);
288 if ((n = fdbuf_fill (buf, req->size)) < 0)
289 {
290 GT->DBGFN (GT, "error on host %s: %s", req->host, GIFT_NETERROR ());
291 gt_http_request_close (req, -1);
292 return;
293 }
295 if (gt_fdbuf_full (buf))
296 {
297 gt_http_request_close (req, -1);
298 return;
299 }
301 if (n > 0)
302 return;
304 data = fdbuf_data (buf, &data_len);
305 fdbuf_release (buf);
307 if (!write_data (req, data, data_len))
308 return;
310 input_remove (id);
311 input_add (fd, c, INPUT_READ,
312 (InputCallback)read_chunked_header, TIMEOUT_DEF);
313 }
315 static void read_chunked_header (int fd, input_id id, TCPC *c)
316 {
317 HttpRequest *req;
318 FDBuf *buf;
319 char *response;
320 int n;
322 req = get_request (c);
323 buf = tcp_readbuf (c);
325 if ((n = fdbuf_delim (buf, "\n")) < 0)
326 {
327 GT->DBGFN (GT, "error on %s: %s", req->host, GIFT_NETERROR ());
328 gt_http_request_close (req, -1);
329 return;
330 }
332 if (gt_fdbuf_full (buf))
333 {
334 gt_http_request_close (req, -1);
335 return;
336 }
338 if (n > 0)
339 return;
341 response = fdbuf_data (buf, NULL);
342 fdbuf_release (buf);
344 /* read the chunk size, its a hexadecimal integer */
345 req->size = strtoul (response, NULL, 16);
346 GT->DBGFN (GT, "server sent chunk size of %lu", req->size);
348 if (req->size == ULONG_MAX)
349 {
350 GT->DBGFN (GT, "overflow reading chunk size: %s", GIFT_STRERROR ());
351 gt_http_request_close (req, -1);
352 return;
353 }
355 if (req->size == 0)
356 {
357 /* ok, done */
358 if (!write_data (req, NULL, 0))
359 return;
361 /* there could be a CRLF at the end. should we read it?
362 * To avoid screwing up persistent http, yes.. */
363 gt_http_request_close (req, 200);
364 return;
365 }
367 input_remove (id);
368 input_add (fd, c, INPUT_READ,
369 (InputCallback)decode_chunked_data, TIMEOUT_DEF);
370 }
372 /* read the amount of data specified by Content-Length: */
373 static void read_file (int fd, input_id id, TCPC *c)
374 {
375 HttpRequest *req;
376 FDBuf *buf;
377 int n;
378 size_t len;
379 unsigned char *data;
381 req = get_request (c);
383 if (!req->size)
384 {
385 gt_http_request_close (req, 200);
386 return;
387 }
389 buf = tcp_readbuf (c);
391 if ((n = fdbuf_fill (buf, req->size)) < 0)
392 {
393 GT->DBGFN (GT, "error from %s: %s", req->host, GIFT_NETERROR ());
394 gt_http_request_close (req, -1);
395 return;
396 }
398 if (n > 0)
399 return;
401 data = fdbuf_data (buf, &len);
402 fdbuf_release (buf);
404 if (!write_data (req, data, len))
405 return;
407 /*
408 * We've read all the data, the total length of the request being provided
409 * by fdbuf_fill(). Now send the closing notification to our callback.
410 */
411 if (!write_data (req, NULL, 0))
412 return;
414 /* success */
415 gt_http_request_close (req, 200);
416 }
418 /* callback to read when no Content-Length: header is provided */
419 static void read_until_eof (int fd, input_id id, TCPC *c)
420 {
421 char data[RW_BUFFER];
422 int n;
423 HttpRequest *req;
425 req = get_request (c);
427 if ((n = tcp_recv (c, data, sizeof (data) - 1)) < 0)
428 {
429 GT->DBGFN (GT, "error from %s: %s", req->host, GIFT_NETERROR());
430 gt_http_request_close (req, -1);
431 return;
432 }
434 /* terminate the buffer */
435 data[n] = 0;
437 if (n == 0)
438 {
439 /* signal to the listener that EOF was reached */
440 if (!write_data (req, NULL, 0))
441 return;
443 gt_http_request_close (req, 200);
444 return;
445 }
447 if (!write_data (req, data, n))
448 return;
449 }
451 static void reset_request (HttpRequest *req, const char *host,
452 const char *path)
453 {
454 free (req->host);
455 free (req->path);
456 req->host = STRDUP (host);
457 req->path = STRDUP (path);
459 dataset_clear (req->headers);
460 req->headers = NULL;
461 }
463 /*
464 * This will do a limited redirect on the same connection.
465 * One bug is it doesn't care if the Location header posts a different port,
466 */
467 static void handle_redirect (HttpRequest *req, int code)
468 {
469 char *new_host;
470 char *new_path;
471 char *location;
473 /* make sure the Location: header points to the same host */
474 location = dataset_lookupstr (req->headers, "location");
476 /* butchers Location header, but it will be freed soon anyway */
477 if (!location ||
478 !gt_http_url_parse (location, &new_host, &new_path))
479 {
480 gt_http_request_close (req, code);
481 return;
482 }
484 assert (new_host != NULL);
486 if (++req->redirects >= MAX_REDIRECTS)
487 {
488 GT->DBGSOCK (GT, req->c, "Too many redirects");
489 gt_http_request_close (req, code);
490 return;
491 }
493 /*
494 * Let the caller know we're redirecting so it can reset it's ancilliary
495 * data.
496 */
497 if (req->redirect_func (req, new_host, new_path) == FALSE)
498 {
499 gt_http_request_close (req, code);
500 return;
501 }
503 /* setup the new request */
504 reset_request (req, new_host, new_path);
506 /* restart the request */
507 input_remove_all (req->c->fd);
508 input_add (req->c->fd, req->c, INPUT_WRITE,
509 (InputCallback)gt_http_request_handle, TIMEOUT_DEF);
510 }
512 static BOOL parse_server_response (char *reply, HttpRequest *req)
513 {
514 char *response;
515 int code; /* 200, 404, ... */
517 response = string_sep (&reply, "\r\n");
519 if (!response)
520 return FALSE;
522 /* */ string_sep (&response, " "); /* shift past HTTP/1.1 */
523 code = ATOI (string_sep (&response, " ")); /* shift past 200 */
525 /* parse the headers */
526 gt_http_header_parse (reply, &req->headers);
528 if (code >= 200 && code <= 299)
529 return TRUE;
531 /* redirection */
532 if (code >= 300 && code <= 399)
533 {
534 handle_redirect (req, code);
535 return FALSE; /* stop this request */
536 }
538 /* request error: could blacklist the server in recv_callback */
539 GT->DBGFN (GT, "error parsing response from %s, closing", req->host);
540 gt_http_request_close (req, code);
542 return FALSE;
543 }
545 static void read_headers (int fd, input_id id, TCPC *c)
546 {
547 HttpRequest *req;
548 FDBuf *buf;
549 char *response;
550 size_t response_len = 0;
551 char *encoding;
552 char *len_str;
553 int n;
555 req = get_request (c);
556 buf = tcp_readbuf (c);
558 if ((n = fdbuf_delim (buf, "\n")) < 0)
559 {
560 GT->DBGFN (GT, "error reading from %s: %s", net_peer_ip (c->fd),
561 GIFT_NETERROR ());
562 gt_http_request_close (req, -1);
563 return;
564 }
566 if (gt_fdbuf_full (buf))
567 {
568 gt_http_request_close (req, -1);
569 return;
570 }
572 if (n > 0)
573 return;
575 response = fdbuf_data (buf, &response_len);
577 if (response_len >= req->max_len)
578 {
579 GT->DBGFN (GT, "headers too large(%lu)", (long)response_len);
580 gt_http_request_close (req, -1);
581 }
583 if (!gt_http_header_terminated (response, response_len))
584 return;
586 fdbuf_release (buf);
587 GT->DBGFN (GT, "response=\n%s", response);
589 if (!parse_server_response (response, req))
590 return;
592 input_remove (id);
594 encoding = dataset_lookupstr (req->headers, "transfer-encoding");
596 if (encoding && !strcasecmp (encoding, "chunked"))
597 {
598 input_add (fd, c, INPUT_READ,
599 (InputCallback)read_chunked_header, TIMEOUT_DEF);
600 return;
601 }
603 if (!(len_str = dataset_lookupstr (req->headers, "content-length")))
604 {
605 GT->warn (GT, "no Content-Length header from %s", req->host);
606 input_add (fd, c, INPUT_READ,
607 (InputCallback)read_until_eof, TIMEOUT_DEF);
608 return;
609 }
611 req->size = ATOUL (len_str);
613 if (req->max_len > 0 && req->size >= req->max_len)
614 {
615 GT->DBGFN (GT, "bad size (%s) in content length field for %s",
616 len_str, req->host);
617 gt_http_request_close (req, -1);
618 return;
619 }
621 input_add (fd, c, INPUT_READ,
622 (InputCallback)read_file, TIMEOUT_DEF);
623 }
625 /*
626 * Determine the part after the GET. If proxied, this need to be a complete
627 * URL, and otherwise should be a simple path.
628 */
629 static void append_request_line (String *s, HttpRequest *req)
630 {
631 if (req->proxy)
632 string_appendf (s, "http://%s", req->host);
634 string_appendf (s, "/%s", STRING_NOTNULL(req->path));
635 }
637 static int send_request (HttpRequest *req)
638 {
639 Dataset *headers = NULL;
640 String *s;
641 int ret;
643 if (!(s = string_new (NULL, 0, 0, TRUE)))
644 return -1;
646 append_request_line (s, req);
648 if (!string_isempty (req->request))
649 string_appendf (s, "?%s", req->request);
651 dataset_insertstr (&headers, "Host", req->host); /* required by HTTP/1.1 */
652 dataset_insertstr (&headers, "User-Agent", gt_version ());
654 if (req->add_header_func (req, &headers) == FALSE)
655 {
656 /* Hmm, this is our error, what should the error code be */
657 gt_http_request_close (req, -1);
658 dataset_clear (headers);
659 string_free (s);
660 return -1;
661 }
663 ret = http_send (req->c, "GET", s->str, headers);
665 dataset_clear (headers);
666 string_free (s);
668 return ret;
669 }
671 void gt_http_request_handle (int fd, input_id id, TCPC *c)
672 {
673 HttpRequest *req;
675 req = get_request (c);
677 if (send_request (req) <= 0)
678 {
679 GT->DBGFN (GT, "send failed: %s", GIFT_NETERROR());
680 gt_http_request_close (req, -1);
681 return;
682 }
684 input_remove (id);
685 input_add (fd, c, INPUT_READ,
686 (InputCallback)read_headers, TIMEOUT_DEF);
687 }