Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
comparison src/gt_web_cache.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bce3edf59d1d |
---|---|
1 /* | |
2 * $Id: gt_web_cache.c,v 1.65 2006/08/06 16:53:36 hexwab Exp $ | |
3 * | |
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License as published by the | |
8 * Free Software Foundation; either version 2, or (at your option) any | |
9 * later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, but | |
12 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * General Public License for more details. | |
15 */ | |
16 | |
17 #include "gt_gnutella.h" | |
18 | |
19 #include "file_cache.h" | |
20 #include "http_request.h" | |
21 | |
22 #include "gt_connect.h" | |
23 #include "gt_node.h" | |
24 #include "gt_node_list.h" | |
25 #include "gt_netorg.h" | |
26 | |
27 #include "gt_web_cache.h" | |
28 #include "gt_conf.h" | |
29 | |
30 #include "dns.h" | |
31 | |
32 /*****************************************************************************/ | |
33 | |
34 /* minimum time to wait before reconnecting to a webcache */ | |
35 #define CACHE_RETRY_TIME (8 * EHOURS) | |
36 | |
37 /*****************************************************************************/ | |
38 | |
39 /* number of times we have hit some gwebcaches */ | |
40 static int cache_hits; | |
41 | |
42 /* the absolute next time we will allow ourselves to access a cache */ | |
43 static time_t next_atime; | |
44 | |
45 /* amount of time to layoff the caches once we've received some data */ | |
46 static time_t backoff_time = 1 * EHOURS; | |
47 | |
48 /* holds all the caches */ | |
49 static FileCache *web_caches; | |
50 | |
51 /* proxy server to contact */ | |
52 static char *gt_proxy_server; | |
53 | |
54 /* webcaches that succeeded connecting, but returned errors or an | |
55 * unparseable response */ | |
56 static FileCache *bad_caches; | |
57 | |
58 /* whether we are in the process of checking the caches */ | |
59 static BOOL checking_caches; | |
60 | |
61 /*****************************************************************************/ | |
62 | |
63 static void parse_hostfile_response (HttpRequest *http_req, char *hosts_file); | |
64 static void parse_urlfile_response (HttpRequest *http_req, char *url_file); | |
65 | |
66 /*****************************************************************************/ | |
67 | |
68 /* parse the extended data in the webcaches file, now its just mtime */ | |
69 static BOOL parse_web_cache_value (char *value, time_t *r_atime) | |
70 { | |
71 time_t atime; | |
72 | |
73 if ((atime = ATOUL (value)) == (unsigned long) -1) | |
74 atime = 0; | |
75 | |
76 if (r_atime) | |
77 *r_atime = atime; | |
78 | |
79 return TRUE; | |
80 } | |
81 | |
82 /*****************************************************************************/ | |
83 | |
84 static char *new_webcache_url (const char *host, const char *path) | |
85 { | |
86 return stringf_dup ("http://%s/%s", host, STRING_NOTNULL(path)); | |
87 } | |
88 | |
89 static void ban_webcache (HttpRequest *req, const char *why) | |
90 { | |
91 char *url; | |
92 | |
93 url = new_webcache_url (req->host, req->path); | |
94 GT->dbg (GT, "banning webcache %s", url); | |
95 | |
96 file_cache_insert (bad_caches, url, why); | |
97 file_cache_sync (bad_caches); | |
98 | |
99 free (url); | |
100 } | |
101 | |
102 static void insert_webcache (const char *host_name, const char *remote_path, | |
103 time_t atime) | |
104 { | |
105 char *url; | |
106 char *field; | |
107 | |
108 url = new_webcache_url (host_name, remote_path); | |
109 field = stringf_dup ("%lu", atime); | |
110 | |
111 file_cache_insert (web_caches, url, field); | |
112 | |
113 free (url); | |
114 free (field); | |
115 } | |
116 | |
117 /*****************************************************************************/ | |
118 | |
119 static void handle_close_request (HttpRequest *req, int error_code) | |
120 { | |
121 String *s; | |
122 | |
123 if (error_code < 0 || error_code < 200 || error_code >= 300) | |
124 { | |
125 if (error_code == -1) | |
126 { | |
127 /* the error was our fault, out of mem, etc. dont do anything */ | |
128 GT->DBGFN (GT, "connect to server %s failed for some reason", | |
129 req->host); | |
130 } | |
131 else | |
132 { | |
133 char err[32]; | |
134 | |
135 snprintf (err, sizeof(err), "Received error %d", error_code); | |
136 | |
137 /* | |
138 * Not found, internal server error, or too many redirects: ban | |
139 * the server's URL | |
140 */ | |
141 GT->DBGFN (GT, "server %s returned error %i", req->host, | |
142 error_code); | |
143 ban_webcache (req, err); | |
144 } | |
145 } | |
146 | |
147 /* TODO: this assumes this is the one hostfile request flying around, | |
148 * and not a urlfile request, which probably needs to be handled | |
149 * separately */ | |
150 checking_caches = FALSE; | |
151 | |
152 if ((s = req->data)) | |
153 string_free (s); | |
154 } | |
155 | |
156 static void parse_hostfile_response (HttpRequest *http_req, char *host_file) | |
157 { | |
158 int hosts = 0; | |
159 GtNode *node; | |
160 time_t now; | |
161 | |
162 if (!host_file) | |
163 { | |
164 GT->DBGFN (GT, "empty host file from %s", http_req->host); | |
165 return; | |
166 } | |
167 | |
168 GT->DBGFN (GT, "hostfile from server = %s", host_file); | |
169 | |
170 now = time (NULL); | |
171 | |
172 /* | |
173 * If the response start with "ERROR: " (or pseudo-html '<' char), ban the | |
174 * webcache. | |
175 */ | |
176 if (!strncasecmp (host_file, "ERROR", sizeof ("ERROR") - 1) || | |
177 host_file[0] == '<') | |
178 { | |
179 ban_webcache (http_req, "Malformed response content"); | |
180 return; | |
181 } | |
182 | |
183 while (host_file && *host_file) | |
184 { | |
185 char *host; | |
186 in_addr_t ip; | |
187 in_port_t port; | |
188 | |
189 host = string_sep_set (&host_file, "\r\n"); | |
190 | |
191 ip = net_ip (string_sep (&host, ":")); | |
192 port = ATOI (host); | |
193 | |
194 if (!port || !ip || ip == INADDR_NONE) | |
195 continue; | |
196 | |
197 GT->DBGFN (GT, "registering %s:%hu (from cache %s)", net_ip_str (ip), | |
198 port, http_req->host); | |
199 | |
200 /* register the hosts as ultrapeers */ | |
201 node = gt_node_register (ip, port, GT_NODE_ULTRA); | |
202 hosts++; | |
203 | |
204 if (!node) | |
205 continue; | |
206 | |
207 /* set the vitality on this node to preserve it across restarts */ | |
208 node->vitality = now; | |
209 | |
210 /* might be connected already */ | |
211 if (node->state != GT_NODE_DISCONNECTED) | |
212 continue; | |
213 | |
214 /* try to connect to the first 5 */ | |
215 if (hosts <= 5 && gt_conn_need_connections (GT_NODE_ULTRA)) | |
216 gt_connect (node); | |
217 | |
218 /* don't allow the cache to register an infinite number of hosts */ | |
219 if (hosts >= 50) | |
220 break; | |
221 } | |
222 | |
223 /* save the nodes we added to disk so we dont hit the caches again */ | |
224 gt_node_list_save (); | |
225 | |
226 /* | |
227 * Do an exponential backoff from the caches. If we were online and | |
228 * able to receive data, we should be getting node information | |
229 * some other way now. | |
230 */ | |
231 if (hosts >= 5) | |
232 { | |
233 next_atime = now + backoff_time; | |
234 backoff_time *= 2; | |
235 } | |
236 } | |
237 | |
238 static void parse_urlfile_response (HttpRequest *http_req, char *url_file) | |
239 { | |
240 int caches = 0; | |
241 | |
242 if (!url_file) | |
243 { | |
244 GT->DBGFN (GT, "empty url file from %s", http_req->host); | |
245 return; | |
246 } | |
247 | |
248 GT->DBGFN (GT, "urlfile from server = %s", url_file); | |
249 | |
250 while (url_file && *url_file) | |
251 { | |
252 char *url; | |
253 char *host_name; | |
254 char *remote_path; | |
255 | |
256 url = string_sep_set (&url_file, "\r\n"); | |
257 | |
258 /* skip past http:// */ | |
259 string_sep (&url, "http://"); | |
260 | |
261 host_name = string_sep (&url, "/"); | |
262 remote_path = url; | |
263 | |
264 /* NOTE: remote_path is possibly empty */ | |
265 if (!host_name) | |
266 continue; | |
267 | |
268 url = stringf ("http://%s/%s", host_name, STRING_NOTNULL(remote_path)); | |
269 | |
270 /* if the webcache is already in our db, skip it */ | |
271 if (file_cache_lookup (web_caches, url)) | |
272 continue; | |
273 | |
274 /* | |
275 * Only allow caches to register two more caches: this | |
276 * small number helps to avoid our list of caches getting | |
277 * polluted. | |
278 */ | |
279 if (++caches > 2) | |
280 break; | |
281 | |
282 /* format is: <url> <last time visited> */ | |
283 file_cache_insert (web_caches, url, "0"); | |
284 } | |
285 | |
286 /* sync the pending web caches to disk */ | |
287 file_cache_sync (web_caches); | |
288 } | |
289 | |
290 static void end_request (HttpRequest *req, char *data) | |
291 { | |
292 char *str = req->request; | |
293 | |
294 if (str && !strncmp (str, "hostfile", strlen ("hostfile"))) | |
295 parse_hostfile_response (req, data); | |
296 else if (str && !strncmp (str, "urlfile", strlen ("urlfile"))) | |
297 parse_urlfile_response (req, data); | |
298 else | |
299 abort (); | |
300 } | |
301 | |
302 /*****************************************************************************/ | |
303 | |
304 /* | |
305 * Return TRUE if newname is in the same domain as oldname. For example, | |
306 * "new.gwc.example.com", "example.com", and "cache.example.com" are all | |
307 * considered in the same domain as "www.example.com". | |
308 * | |
309 * This is called on redirects, to make sure the cache can't redirect to an | |
310 * innocent site as part of a DDoS attack. | |
311 */ | |
312 static BOOL in_same_domain (const char *oldname, const char *newname) | |
313 { | |
314 return FALSE; | |
315 #if 0 | |
316 const char *p; | |
317 const char *largest = NULL; | |
318 int periods = 0; | |
319 | |
320 p = newname; | |
321 | |
322 /* get the largest common substring */ | |
323 while (p != NULL) | |
324 { | |
325 if ((largest = strstr (oldname, p))) | |
326 break; | |
327 | |
328 /* advance to next domain part */ | |
329 p = strchr (p + 1, '.'); | |
330 } | |
331 | |
332 if (!largest) | |
333 return FALSE; | |
334 | |
335 /* | |
336 * Make sure the substring matches completely to the end. This will | |
337 * actually fail when it shouldn't if one name includes the '.' toplevel | |
338 * domain and one doesn't. Oh well. | |
339 */ | |
340 if (strcmp (largest, p) != 0) | |
341 return FALSE; | |
342 | |
343 /* | |
344 * Count the number of periods to find the number of subdomains in the | |
345 * largest common substring. | |
346 */ | |
347 for (p = largest; *p != 0; p++) | |
348 { | |
349 if (*p == '.') | |
350 periods++; | |
351 } | |
352 | |
353 /* | |
354 * If the last character is the root '.', subtract one, since we are | |
355 * looking for the number of common subdomains, and the root is shared by | |
356 * all names. | |
357 */ | |
358 if (largest[strlen (largest) - 1] == '.') | |
359 periods--; | |
360 | |
361 /* | |
362 * If there are two periods, at least two toplevel domains match. | |
363 */ | |
364 if (periods >= 2) | |
365 return TRUE; | |
366 | |
367 /* | |
368 * If there is only one period shared, the names MAY be in the same | |
369 * domain: one of the names has to be completely contained within the | |
370 * other, such as the case of "foo.example.com" and "example.com". | |
371 */ | |
372 if (periods == 1 && | |
373 (strcmp (largest, oldname) == 0 || strcmp (largest, newname) == 0)) | |
374 { | |
375 return TRUE; | |
376 } | |
377 | |
378 /* not in same domain */ | |
379 return FALSE; | |
380 #endif | |
381 } | |
382 | |
383 /* | |
384 * Called to when the webcache sends a 300-level response with a provided | |
385 * Location: header. Have to make sure the domain the cache directs us | |
386 * to is the same. | |
387 */ | |
388 static BOOL handle_redirect (HttpRequest *req, const char *new_host, | |
389 const char *new_path) | |
390 { | |
391 assert (new_host != NULL); | |
392 | |
393 if (in_same_domain (req->host, new_host) == FALSE) | |
394 return FALSE; | |
395 | |
396 /* might want to do something else if the ban list later becomes per host | |
397 * rather than per URL */ | |
398 ban_webcache (req, "Redirected"); | |
399 | |
400 GT->DBGFN (GT, "Redirecting to new webcache %s/%s", new_host, new_path); | |
401 | |
402 insert_webcache (new_host, new_path, time (NULL)); | |
403 file_cache_sync (web_caches); | |
404 | |
405 return TRUE; | |
406 } | |
407 | |
408 /*****************************************************************************/ | |
409 | |
410 static BOOL handle_recv (HttpRequest *req, char *data, size_t len) | |
411 { | |
412 String *s; | |
413 | |
414 /* EOF */ | |
415 if (!data) | |
416 { | |
417 char *str = NULL; | |
418 | |
419 if ((s = req->data)) | |
420 str = s->str; | |
421 | |
422 GT->DBGFN (GT, "read %s from server %s", str, req->host); | |
423 end_request (req, str); | |
424 | |
425 /* clear data link */ | |
426 req->data = NULL; | |
427 | |
428 return TRUE; | |
429 } | |
430 | |
431 if (!len) | |
432 return TRUE; | |
433 | |
434 GT->DBGFN (GT, "server sent us: %s", data); | |
435 | |
436 if (!(s = req->data) && !(s = req->data = string_new (NULL, 0, 0, TRUE))) | |
437 return FALSE; | |
438 | |
439 if (string_append (s, data) != len) | |
440 { | |
441 GT->DBGFN (GT, "string append failed"); | |
442 return FALSE; | |
443 } | |
444 | |
445 return TRUE; | |
446 } | |
447 | |
448 /*****************************************************************************/ | |
449 | |
450 static BOOL handle_add_headers (HttpRequest *req, Dataset **headers) | |
451 { | |
452 /* don't let intermediaries cache our request, I think */ | |
453 dataset_insertstr (headers, "Cache-Control", "no-cache"); | |
454 | |
455 return TRUE; | |
456 } | |
457 | |
458 /*****************************************************************************/ | |
459 | |
460 static BOOL parse_host_and_port (char **r_host, in_port_t *r_port) | |
461 { | |
462 char *str; | |
463 char *host; | |
464 long port; | |
465 | |
466 str = *r_host; | |
467 | |
468 if (r_port) | |
469 *r_port = 80; | |
470 | |
471 /* skip leading 'http://' if found */ | |
472 if (strstr (str, "http://")) | |
473 str += strlen ("http://"); | |
474 | |
475 host = string_sep (&str, ":"); | |
476 | |
477 if (!host) | |
478 return FALSE; | |
479 | |
480 *r_host = host; | |
481 | |
482 if (str && !string_isempty (str)) | |
483 { | |
484 port = gift_strtol (str); | |
485 | |
486 /* make sure port is valid */ | |
487 if (port <= 0 || port >= 65536) | |
488 return FALSE; | |
489 | |
490 *r_port = port; | |
491 } | |
492 | |
493 return TRUE; | |
494 } | |
495 | |
496 static TCPC *open_http_connection (HttpRequest *req, const char *http_name) | |
497 { | |
498 in_addr_t ip; | |
499 in_port_t port; | |
500 char *str; | |
501 char *name; | |
502 TCPC *c; | |
503 struct hostent *host; | |
504 | |
505 if (!http_name) | |
506 return NULL; | |
507 | |
508 if (!(str = STRDUP (http_name))) | |
509 return NULL; | |
510 | |
511 name = str; | |
512 | |
513 if (!parse_host_and_port (&name, &port)) | |
514 { | |
515 GT->DBGFN (GT, "error parsing hostname \"%s\"", str); | |
516 free (str); | |
517 return NULL; | |
518 } | |
519 | |
520 if (!(host = gt_dns_lookup (name))) | |
521 { | |
522 free (str); | |
523 return NULL; | |
524 } | |
525 | |
526 /* ip is in network-order already */ | |
527 memcpy (&ip, host->h_addr, MIN (host->h_length, sizeof (ip))); | |
528 | |
529 if (net_match_host (ip, "LOCAL")) | |
530 { | |
531 free (str); | |
532 ban_webcache (req, "Resolved to local IP"); | |
533 return NULL; | |
534 } | |
535 | |
536 c = tcp_open (ip, port, FALSE); | |
537 if (!c) | |
538 { | |
539 GT->DBGFN (GT, "couldn't open connection to %s [%s]: %s", | |
540 http_name, net_ip_str (ip), GIFT_NETERROR()); | |
541 } | |
542 | |
543 free (str); | |
544 return c; | |
545 } | |
546 | |
547 /* return the name we have to lookup */ | |
548 static char *get_http_name (char *name) | |
549 { | |
550 char *proxy; | |
551 char *host; | |
552 | |
553 host = name; | |
554 proxy = HTTP_PROXY; | |
555 | |
556 string_trim (proxy); | |
557 | |
558 if (proxy && !string_isempty (proxy)) | |
559 { | |
560 /* connect to the proxy instead */ | |
561 if (STRCMP (proxy, gt_proxy_server) != 0) | |
562 { | |
563 GT->DBGFN (GT, "using proxy server %s", proxy); | |
564 free (gt_proxy_server); | |
565 gt_proxy_server = STRDUP (proxy); | |
566 } | |
567 | |
568 host = proxy; | |
569 } | |
570 | |
571 return host; | |
572 } | |
573 | |
574 static void check_dns_error (const char *name, HttpRequest *req) | |
575 { | |
576 int error; | |
577 | |
578 error = gt_dns_get_errno (); | |
579 | |
580 if (!error) | |
581 return; | |
582 | |
583 GT->DBGFN (GT, "lookup failed on \"%s\": %s", name, gt_dns_strerror(error)); | |
584 | |
585 /* ban the host, but only if not using a proxy server */ | |
586 if (error == HOST_NOT_FOUND && gt_proxy_server == NULL) | |
587 { | |
588 GT->DBGFN (GT, "webcache \"%s\" not in DNS. banning", name); | |
589 ban_webcache (req, "Host not found in DNS"); | |
590 return; | |
591 } | |
592 } | |
593 | |
594 static BOOL make_request (char *host_name, char *remote_path, char *request) | |
595 { | |
596 HttpRequest *req; | |
597 TCPC *c; | |
598 char *resolve_name; | |
599 char *url; | |
600 | |
601 url = stringf_dup ("http://%s/%s", host_name, STRING_NOTNULL(remote_path)); | |
602 | |
603 if (!(req = gt_http_request_new (url, request))) | |
604 { | |
605 free (url); | |
606 return FALSE; | |
607 } | |
608 | |
609 free (url); | |
610 | |
611 resolve_name = get_http_name (host_name); | |
612 | |
613 gt_dns_set_errno (0); | |
614 | |
615 if (!(c = open_http_connection (req, resolve_name))) | |
616 { | |
617 check_dns_error (resolve_name, req); | |
618 gt_http_request_close (req, -1); | |
619 return FALSE; | |
620 } | |
621 | |
622 GT->DBGFN (GT, "opening connection to %s [%s]", | |
623 resolve_name, net_ip_str (c->host)); | |
624 | |
625 req->recv_func = handle_recv; | |
626 req->add_header_func = handle_add_headers; | |
627 req->close_req_func = handle_close_request; | |
628 req->redirect_func = handle_redirect; | |
629 | |
630 gt_http_request_set_conn (req, c); /* setup references */ | |
631 gt_http_request_set_proxy (req, gt_proxy_server); /* maybe use proxy */ | |
632 gt_http_request_set_timeout (req, 2 * MINUTES); /* don't wait forever */ | |
633 gt_http_request_set_max_len (req, 65536); /* don't read forever */ | |
634 | |
635 input_add (c->fd, c, INPUT_WRITE, | |
636 (InputCallback)gt_http_request_handle, TIMEOUT_DEF); | |
637 | |
638 return TRUE; | |
639 } | |
640 | |
641 /*****************************************************************************/ | |
642 | |
643 struct find_rand_args | |
644 { | |
645 int n; | |
646 time_t now; | |
647 char *url; | |
648 char *field; | |
649 }; | |
650 | |
651 /* get a random cache from the webcaches dataset */ | |
652 static void foreach_rand_cache (ds_data_t *key, ds_data_t *value, | |
653 struct find_rand_args *args) | |
654 { | |
655 time_t atime; | |
656 float range = args->n; | |
657 char *str; | |
658 char *url = key->data; | |
659 char *hostname, *path; | |
660 int ret; | |
661 | |
662 if (!parse_web_cache_value (value->data, &atime)) | |
663 return; | |
664 | |
665 /* skip the cache entirely if we've retried too soon */ | |
666 if (args->now - atime < CACHE_RETRY_TIME) | |
667 return; | |
668 | |
669 /* | |
670 * Make sure the cache has a parseable url | |
671 * | |
672 * TODO: This is ugly, it really should be parsed into a | |
673 * a data structure once instead. | |
674 */ | |
675 str = STRDUP (url); | |
676 ret = gt_http_url_parse (str, &hostname, &path); | |
677 free (str); | |
678 | |
679 if (!ret) | |
680 { | |
681 GT->warn (GT, "bad webcache url \"%s\" from %s/gwebcaches", | |
682 key->data, gift_conf_path ("Gnutella")); | |
683 return; | |
684 } | |
685 | |
686 /* decrease probability of selecting the next web cache */ | |
687 args->n++; | |
688 | |
689 /* | |
690 * Select this webcache with probability 1/n. | |
691 * | |
692 * Also select this cache if we haven't chosen one yet, which may be the | |
693 * case on if the index of the cache is > 0 when there are banned caches. | |
694 */ | |
695 if (args->url == NULL || | |
696 range * rand() / (RAND_MAX + 1.0) < 1.0) | |
697 { | |
698 char *keystr = key->data; | |
699 char *valuestr = value->data; | |
700 | |
701 /* check if this is a bad gwebcache */ | |
702 if (file_cache_lookup (bad_caches, url)) | |
703 { | |
704 #if 1 | |
705 GT->warn (GT, "skipping webcache %s, in bad gwebcaches", url); | |
706 #endif | |
707 /* pretend we didn't select this to ensure equal distribution */ | |
708 args->n--; | |
709 | |
710 return; | |
711 } | |
712 | |
713 /* free the old values */ | |
714 free (args->url); | |
715 free (args->field); | |
716 | |
717 args->url = STRDUP (keystr); | |
718 args->field = STRDUP (valuestr); | |
719 } | |
720 } | |
721 | |
722 static BOOL get_random_cache (time_t now, char **r_host_name, | |
723 char **r_remote_path) | |
724 { | |
725 int ret; | |
726 struct find_rand_args args; | |
727 | |
728 args.n = 1; /* initial probability */ | |
729 args.now = now; /* current time */ | |
730 args.url = NULL; | |
731 args.field = NULL; | |
732 | |
733 dataset_foreach (web_caches->d, DS_FOREACH(foreach_rand_cache), &args); | |
734 | |
735 if (!args.url) | |
736 { | |
737 GT->DBGFN (GT, "couldn't find random cache"); | |
738 return FALSE; | |
739 } | |
740 | |
741 ret = gt_http_url_parse (args.url, r_host_name, r_remote_path); | |
742 | |
743 if (!*r_host_name || !*r_remote_path) | |
744 { | |
745 free (args.url); | |
746 free (args.field); | |
747 return FALSE; | |
748 } | |
749 | |
750 *r_host_name = STRDUP (*r_host_name); | |
751 *r_remote_path = STRDUP (*r_remote_path); | |
752 | |
753 /* free the original buffer */ | |
754 free (args.url); | |
755 free (args.field); | |
756 | |
757 return ret; | |
758 } | |
759 | |
760 static void access_gwebcaches (void) | |
761 { | |
762 int len; | |
763 char *host_name; | |
764 char *remote_path; | |
765 time_t now; | |
766 int host_requests = 0; | |
767 #if 0 | |
768 int url_requests = 0; | |
769 #endif | |
770 int max_requests = 1; | |
771 BOOL ret; | |
772 BOOL need_sync; | |
773 | |
774 /* | |
775 * We may get called while a check of the gwebcaches is already | |
776 * in progress. | |
777 */ | |
778 if (checking_caches) | |
779 { | |
780 GT->DBGFN (GT, "Access already in progress"); | |
781 return; | |
782 } | |
783 | |
784 now = time (NULL); | |
785 | |
786 len = dataset_length (web_caches->d); | |
787 | |
788 if (max_requests > len) | |
789 max_requests = len; | |
790 | |
791 need_sync = FALSE; | |
792 | |
793 while (host_requests < max_requests) | |
794 { | |
795 if (!get_random_cache (now, &host_name, &remote_path)) | |
796 { | |
797 GT->DBGFN (GT, "error looking up cache"); | |
798 break; | |
799 } | |
800 | |
801 #if 0 | |
802 /* make a url request sometimes to keep the cache file up to date, but | |
803 * mostly ask for hosts */ | |
804 if (10.0 * rand() / (RAND_MAX + 1.0) < 1.0) | |
805 { | |
806 ret = make_request (host_name, remote_path, | |
807 "urlfile=1&client=GIFT&version=" GT_VERSION); | |
808 url_requests++; | |
809 } | |
810 else | |
811 #endif | |
812 { | |
813 ret = make_request (host_name, remote_path, | |
814 "hostfile=1&client=GIFT&version=" GT_VERSION); | |
815 | |
816 if (ret) | |
817 checking_caches = TRUE; | |
818 | |
819 host_requests++; | |
820 } | |
821 | |
822 if (ret) | |
823 { | |
824 GT->DBGFN (GT, "hitting web cache [total cache hits %u] " | |
825 "(cache: http://%s/%s)", cache_hits, | |
826 host_name, STRING_NOTNULL(remote_path)); | |
827 | |
828 cache_hits++; | |
829 need_sync = TRUE; | |
830 | |
831 /* reset the atime for the cache */ | |
832 insert_webcache (host_name, remote_path, now); | |
833 } | |
834 | |
835 free (host_name); | |
836 free (remote_path); | |
837 } | |
838 | |
839 /* only sync when we successfully accessed a cache */ | |
840 if (need_sync) | |
841 file_cache_sync (web_caches); | |
842 } | |
843 | |
844 static BOOL webcache_update (void *udata) | |
845 { | |
846 char *webcache_file; | |
847 int web_exists; | |
848 time_t now; | |
849 size_t nodes_len; | |
850 struct stat st; | |
851 | |
852 if (GNUTELLA_LOCAL_MODE) | |
853 return TRUE; | |
854 | |
855 now = time (NULL); | |
856 nodes_len = gt_conn_length (GT_NODE_NONE, GT_NODE_ANY); | |
857 | |
858 /* | |
859 * If we've already accessed the caches successfully, we won't | |
860 * allow another access to go through, _unless_ the node list | |
861 * is small enough, in which case it could be we really do need | |
862 * to access the caches. | |
863 */ | |
864 if (now < next_atime && nodes_len >= 20) | |
865 return FALSE; | |
866 | |
867 webcache_file = STRDUP (gift_conf_path ("Gnutella/gwebcaches")); | |
868 web_exists = file_stat (webcache_file, &st); | |
869 | |
870 if (!web_exists) | |
871 { | |
872 GIFT_ERROR (("gwebcaches file doesn't exist")); | |
873 return FALSE; | |
874 } | |
875 | |
876 /* | |
877 * next_atime, the absolute next time we allow ourselves to contact the | |
878 * caches, gets set when we sucessfully access the caches, and if we | |
879 * manage to get some hosts from a cache we access in an exponentially | |
880 * decreasing interval. | |
881 */ | |
882 access_gwebcaches (); | |
883 | |
884 free (webcache_file); | |
885 return TRUE; | |
886 } | |
887 | |
888 /*****************************************************************************/ | |
889 | |
890 void gt_web_cache_update (void) | |
891 { | |
892 webcache_update (NULL); | |
893 } | |
894 | |
895 BOOL gt_web_cache_init (void) | |
896 { | |
897 /* | |
898 * Copy the gwebcaches file to from the data dir to | |
899 * ~/.giFT/Gnutella if it is newer or if ~/.giFT/Gnutella/gwebcaches | |
900 * doesn't exist. | |
901 */ | |
902 gt_config_load_file ("Gnutella/gwebcaches", TRUE, FALSE); | |
903 | |
904 web_caches = file_cache_new (gift_conf_path ("Gnutella/gwebcaches")); | |
905 bad_caches = file_cache_new (gift_conf_path ("Gnutella/bad_gwebcaches")); | |
906 | |
907 if (!web_caches) | |
908 return FALSE; | |
909 | |
910 return TRUE; | |
911 } | |
912 | |
913 void gt_web_cache_cleanup (void) | |
914 { | |
915 file_cache_free (web_caches); | |
916 web_caches = NULL; | |
917 | |
918 file_cache_free (bad_caches); | |
919 bad_caches = NULL; | |
920 | |
921 cache_hits = 0; | |
922 next_atime = 0; | |
923 | |
924 checking_caches = FALSE; | |
925 } |