view src/gt_search.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line source
1 /*
2 * $Id: gt_search.c,v 1.60 2004/11/29 12:32:12 mkern Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
17 #include "gt_gnutella.h"
19 #include "gt_node.h"
20 #include "gt_node_list.h"
21 #include "gt_share.h"
22 #include "gt_share_file.h"
23 #include "gt_packet.h"
24 #include "gt_search.h"
25 #include "gt_xfer.h"
27 #include "sha1.h"
29 #include "encoding/url.h" /* gt_url_decode */
31 #include "transfer/download.h"
32 #include "transfer/source.h"
34 #include <libgift/mime.h>
36 /******************************************************************************/
38 /* how often we check if the search has timed out */
39 #define TIMEOUT_CHECK_INTERVAL (20 * SECONDS)
41 /* after this many results, no more search submissions will occur */
42 #define RESULTS_BACKOFF (200)
44 /*
45 * Gnutella searches don't notify when they are done. So, we close the
46 * search after the following critieria are met:
47 *
48 * - we have submitted the search to at least 3 Ultrapeers
49 * [MIN_NODES]
50 * - at least 3 minutes have passed since we last submitted to an ultrapeer
51 * [MIN_SUBMIT_WAIT]
52 * - no results have been seen in the last minute
53 * [MIN_RESULT_WAIT]
54 *
55 * This means the fastest we'll time out a search is 3 minutes if
56 * we submit to 3 ultrapeers immediately and get no results within
57 * 1 minute of the 3 minute time limit.
58 *
59 * For hash searches, we wait for 2 * MIN_SUBMIT_WAIT, because the other
60 * factors won't come into play.
61 *
62 * There is also a large timeout for searches that receive no results
63 * [ANCIENT_TIME]. Searches that exceed this age and haven't received
64 * any results in the same time will automatically be cancelled, regardless of
65 * other critieria.
66 */
67 #define MIN_NODES (3) /* ultrapeers */
68 #define MIN_SUBMIT_WAIT (3 * EMINUTES)
69 #define MIN_RESULT_WAIT (1 * EMINUTES)
70 #define ANCIENT_TIME (10 * EMINUTES)
72 /******************************************************************************/
74 /* active keyword and hash searches from this node */
75 static List *active_searches;
77 /* probability of the next hash search not being dropped */
78 static double locate_pass_prob;
80 /******************************************************************************/
82 static BOOL finish_search (GtSearch *search)
83 {
84 GT->DBGFN (GT, "search query for \"%s\" timed out", search->query);
85 gt_search_free (search);
86 return FALSE;
87 }
89 static BOOL search_is_ancient (GtSearch *search, time_t now)
90 {
91 if (difftime (now, search->start) < ANCIENT_TIME)
92 return FALSE;
94 /*
95 * If the search is greater than ANCIENT_TIME and hasn't received
96 * a result in the same time, consider it ancient.
97 */
98 if (search->last_result == 0)
99 return TRUE;
101 if (difftime (now, search->last_result) >= ANCIENT_TIME)
102 return TRUE;
104 return FALSE;
105 }
107 /*
108 * search_timeout: check if the search needs to be closed.
109 *
110 * Its impossible to guarantee this will not close the search too early.
111 * It is more likely to miss results if bandwidth is being dedicated to
112 * other purposes besides reading Gnutella messages, or if the TTL and
113 * consequently the latency of the search is high.
114 *
115 * TODO: this should take into account that we may have disconnected
116 * from the nodes we submitted the search to. Perhaps, have
117 * a list of the submitted nodes, and make sure the list len >=
118 * MIN_NODES (but this may run into trouble with not submitting
119 * searches with results >= RESULTS_BACKOFF...)
120 */
121 static BOOL search_timeout (GtSearch *search)
122 {
123 time_t now;
124 double submit_wait;
125 double result_wait;
127 time (&now);
129 /* check if this search is really old and should be expired */
130 if (search_is_ancient (search, now))
131 return finish_search (search);
133 if (search->submitted < MIN_NODES)
134 return TRUE;
136 submit_wait = MIN_SUBMIT_WAIT;
137 result_wait = MIN_RESULT_WAIT;
139 /* hash searches get very few results, so give them a longer base time */
140 if (search->type == GT_SEARCH_HASH)
141 submit_wait *= 2;
143 /*
144 * If the search has lots of results, don't wait as long.
145 *
146 * RESULTS_BACKOFF is a conservative value for not submitting to other
147 * nodes when we already have plenty of results, and we want to be a
148 * little less conservative here, so multiply RESULTS_BACKOFF by 2.
149 */
150 if (search->results >= 2 * RESULTS_BACKOFF)
151 {
152 submit_wait /= 2;
153 result_wait /= 2;
154 }
156 if (difftime (now, search->last_submit) < submit_wait)
157 return TRUE;
159 if (difftime (now, search->last_result) < result_wait)
160 return TRUE;
162 /* the search has timed out */
163 return finish_search (search);
164 }
166 /*****************************************************************************/
168 GtSearch *gt_search_new (IFEvent *event, char *query, gt_search_type_t type)
169 {
170 GtSearch *search;
172 if (!(search = malloc (sizeof (GtSearch))))
173 return NULL;
175 memset (search, 0, sizeof (GtSearch));
177 search->event = event;
178 search->type = type;
179 search->guid = gt_guid_new ();
180 search->query = STRDUP (query);
181 search->results = 0;
182 search->start = time (NULL);
184 search->timeout_timer = timer_add (TIMEOUT_CHECK_INTERVAL,
185 (TimerCallback)search_timeout,
186 search);
188 GT->DBGFN (GT, "new search \"%s\"", query);
190 active_searches = list_prepend (active_searches, search);
192 return search;
193 }
195 void gt_search_free (GtSearch *search)
196 {
197 if (!search)
198 return;
200 if (!list_find (active_searches, search))
201 {
202 GIFT_ERROR (("couldn't find search %p (query:'%s')",
203 search, search->query));
204 return;
205 }
207 if (search->timeout_timer)
208 timer_remove (search->timeout_timer);
210 if (search->event)
211 GT->search_complete (GT, search->event);
213 /* NOTE: search_complete may have removed the search by calling
214 * gt_search_disable */
215 active_searches = list_remove (active_searches, search);
217 free (search->hash);
218 free (search->realm);
219 free (search->guid);
220 free (search->query);
221 free (search);
222 }
224 static int find_by_event (GtSearch *search, IFEvent *event)
225 {
226 if (search->event == event)
227 return 0;
229 return -1;
230 }
232 void gt_search_disable (IFEvent *event)
233 {
234 List *ls;
235 GtSearch *search;
237 ls = list_find_custom (active_searches, event,
238 (CompareFunc) find_by_event);
240 if (!ls)
241 {
242 GT->DBGFN (GT, "didnt find search id %p", (long) event);
243 return;
244 }
246 search = ls->data;
248 GT->DBGFN (GT, "disabled search event %p (query '%s')", event, search->query);
249 search->event = NULL;
250 }
252 /******************************************************************************/
254 static int find_by_guid (GtSearch *a, GtSearch *b)
255 {
256 return gt_guid_cmp (a->guid, b->guid);
257 }
259 GtSearch *gt_search_find (gt_guid_t *guid)
260 {
261 GtSearch key;
262 List *l;
264 key.guid = guid;
266 l = list_find_custom (active_searches, &key, (CompareFunc) find_by_guid);
268 if (!l)
269 return NULL;
271 return l->data;
272 }
274 static BOOL search_matches_realm (GtSearch *search, GtShare *share)
275 {
276 char *mime;
278 if (!search->realm)
279 return TRUE;
281 if (!(mime = mime_type (share->filename)))
282 return FALSE;
284 if (strstr (mime, search->realm))
285 return TRUE;
287 if (!STRCMP (search->realm, "text"))
288 {
289 /* HACK: special case application/pdf */
290 if (strstr (mime, "pdf"))
291 return TRUE;
293 /* HACK: special case application/msword */
294 if (strstr (mime, "doc"))
295 return TRUE;
296 }
298 return FALSE;
299 }
301 static BOOL search_matches_hash (GtSearch *search, Share *file)
302 {
303 Hash *hash;
304 char *str;
305 int ret;
307 if (search->type != GT_SEARCH_HASH)
308 return TRUE;
310 if (!(hash = share_get_hash (file, "SHA1")))
311 {
312 GT->DBGFN (GT, "bad result for hash query");
313 return FALSE;
314 }
316 if (!(str = hash_dsp (hash)))
317 return FALSE;
319 ret = strcmp (search->hash, hashstr_data (str));
321 free (str);
323 return (ret == 0);
324 }
326 /*
327 * We have to filter out backslashes from the name to workaround a bug
328 * in lib/file.c.
329 */
330 static void set_display_name (Share *share, const char *path)
331 {
332 char *p;
333 char *disp_name;
335 if (!(p = disp_name = STRDUP (path)))
336 return;
338 while (*p)
339 {
340 if (*p == '\\')
341 *p = '_';
342 p++;
343 }
345 /* NOTE: this makes the GtShare->filename invalid because it shares memory
346 * with the Share */
347 share_set_path (share, disp_name);
348 free (disp_name);
349 }
351 void gt_search_reply (GtSearch *search, TCPC *c, in_addr_t host,
352 in_port_t gt_port, gt_guid_t *client_guid,
353 int availability, BOOL firewalled,
354 FileShare *file)
355 {
356 char server[128];
357 char *url;
358 char *host_str;
359 char *path;
360 GtShare *share;
361 GtNode *node;
362 BOOL is_local;
364 node = GT_NODE(c);
366 if (!search->event)
367 return;
369 if (gt_is_local_ip (host, node->ip))
370 is_local = TRUE;
371 else
372 is_local = FALSE;
374 /* derive firewalled status if the address is local */
375 if (is_local)
376 firewalled = TRUE;
378 /* if they are firewalled and so are we, don't bother.
379 * NOTE: if we have a download proxy, we shouldnt do this */
380 if (firewalled && GT_SELF->firewalled)
381 return;
383 if (!(share = share_get_udata (file, GT->name)))
384 return;
386 /* check if the mimetype for the result matches the query (i.e. this does
387 * client-side filtering) */
388 if (!search_matches_realm (search, share))
389 return;
391 /* match against the hash if this is a hash search */
392 if (!search_matches_hash (search, file))
393 return;
395 /* get the whole path (result may have '/' separators) */
396 path = file->path;
397 assert (path != NULL);
399 url = gt_source_url_new (path, share->index, host, gt_port,
400 node->ip, node->gt_port,
401 firewalled, client_guid);
403 if (!url)
404 return;
406 /* workaround bug in lib/file.c */
407 set_display_name (file, path);
409 /* print out the server data so we know which connection to
410 * talk to when sending a push request */
411 snprintf (server, sizeof (server) - 1, "%s:%hu",
412 net_ip_str (node->ip), node->gt_port);
414 if (is_local)
415 {
416 /* use the Client GUID for the user if the remote connection is
417 * on the Internet and the host is 0 or local */
418 host_str = stringf_dup ("%s@%s", net_ip_str (host),
419 gt_guid_str (client_guid));
420 }
421 else
422 {
423 /* Just use a plain host for cleanliness */
424 host_str = stringf_dup ("%s", net_ip_str (host));
425 }
427 GT->search_result (GT, search->event, host_str, server,
428 url, availability, file);
430 /* update statistics */
431 search->results++;
432 time (&search->last_result);
434 free (host_str);
435 free (url);
436 }
438 /******************************************************************************/
440 static uint8_t get_search_ttl (GtNode *node, gt_search_type_t type)
441 {
442 char *max_ttl;
443 uint8_t ttl = 0;
445 if ((max_ttl = dataset_lookupstr (node->hdr, "x-max-ttl")))
446 ttl = ATOI (max_ttl);
448 if (ttl > GT_SEARCH_TTL || ttl == 0)
449 ttl = GT_SEARCH_TTL;
451 /* ok because locates are rate-limited */
452 #if 0
453 if (type == GT_SEARCH_HASH)
454 ttl = 1;
455 #endif
457 return ttl;
458 }
460 static TCPC *broadcast_search (TCPC *c, GtNode *node, GtSearch *search)
461 {
462 gt_query_flags_t flags;
463 uint8_t ttl;
464 char *hash = NULL;
465 GtPacket *pkt;
467 /* set this query as having flags to be interpolated */
468 flags = QF_HAS_FLAGS;
470 /* request that only non-firewalled nodes respond if we are firewalled
471 * NOTE: if we ever support a download proxy, need to unset this */
472 if (GT_SELF->firewalled)
473 flags |= QF_ONLY_NON_FW;
475 #ifdef USE_LIBXML2
476 flags |= QF_WANTS_XML;
477 #endif /* USE_LIBXML2 */
479 ttl = get_search_ttl (node, search->type);
481 if (search->type == GT_SEARCH_HASH && !search->hash)
482 {
483 GT->DBGFN (GT, "trying to search for \"%s\" without a hash?!?",
484 search->query);
485 return NULL;
486 }
488 if (!(pkt = gt_packet_new (GT_MSG_QUERY, ttl, search->guid)))
489 return NULL;
491 gt_packet_put_uint16 (pkt, flags);
492 gt_packet_put_str (pkt, search->query);
494 if (search->hash)
495 hash = stringf_dup ("urn:sha1:%s", search->hash);
497 if (hash)
498 gt_packet_put_str (pkt, hash);
500 gt_packet_send (c, pkt);
501 gt_packet_free (pkt);
503 free (hash);
505 /* TODO: check error return from gt_packet_send_fmt! */
506 search->submitted++;
507 time (&search->last_submit);
509 return NULL;
510 }
512 static BOOL submit_search (GtSearch *search, TCPC *c)
513 {
514 if (search->results >= RESULTS_BACKOFF)
515 {
516 /* still count the search as submitted to this node */
517 search->submitted++;
518 return FALSE;
519 }
521 broadcast_search (c, GT_NODE(c), search);
522 return FALSE;
523 }
525 static BOOL submit_searches (TCPC *c)
526 {
527 list_foreach (active_searches, (ListForeachFunc)submit_search, c);
528 GT_NODE(c)->search_timer = 0;
529 return FALSE;
530 }
532 static BOOL reset_submit (GtSearch *search, time_t *now)
533 {
534 if (search->results >= RESULTS_BACKOFF)
535 return FALSE;
537 search->last_submit = *now;
538 return FALSE;
539 }
541 void gt_searches_submit (TCPC *c, time_t delay_ms)
542 {
543 time_t now;
545 /* reset each search timeout because we will submit each search soon */
546 time (&now);
547 list_foreach (active_searches, (ListForeachFunc)reset_submit, &now);
549 /* submit the searches once after a delay */
550 if (!GT_NODE(c)->search_timer)
551 {
552 GT_NODE(c)->search_timer = timer_add (delay_ms,
553 (TimerCallback)submit_searches, c);
554 }
555 }
557 BOOL gnutella_search (Protocol *p, IFEvent *event, char *query, char *exclude,
558 char *realm, Dataset *meta)
559 {
560 GtSearch *search;
562 search = gt_search_new (event, query, GT_SEARCH_KEYWORD);
563 search->realm = STRDUP (realm);
565 gt_conn_foreach (GT_CONN_FOREACH(broadcast_search), search,
566 GT_NODE_NONE, GT_NODE_CONNECTED, 0);
568 return TRUE;
569 }
571 /*****************************************************************************/
573 /*
574 * Using the hash, grab words to stuff in the query section by looking at the
575 * download list.
576 */
577 char *get_query_words (char *htype, char *hash)
578 {
579 Source *src;
580 GtSource *gt_src;
581 char *dup;
583 if (htype && strcmp (htype, "SHA1") != 0)
584 {
585 GT->DBGFN (GT, "htype != \"SHA1\"!?: %s", htype);
586 return NULL;
587 }
589 /* HACK: need gift's prefix */
590 if (!(dup = stringf_dup ("SHA1:%s", hash)))
591 return NULL;
593 src = gt_download_lookup (dup);
594 free (dup);
596 if (!src)
597 return NULL;
599 if (!(gt_src = src->udata))
600 {
601 GT->DBGFN (GT, "gt_src == NULL?!?!");
602 return NULL;
603 }
605 return gt_url_decode (gt_src->filename);
606 }
608 /*
609 * Returns TRUE if the current locate is ok to send and FALSE if it should be
610 * dropped to rate-limit locates. To determine that, we assign the locate a
611 * "probability of passage". Then we roll dice and if it's less than the
612 * probability, accept.
613 *
614 * For each locate attempt the probability of success for the next locate is
615 * halved, down to a minimum of 0.01%. For each minute that passes since the
616 * last locate, the probability of the locate succeeding increases by 1%.
617 */
618 static BOOL should_send_locate (void)
619 {
620 static time_t last_locate = 0;
621 time_t now;
622 double n;
623 BOOL passed;
625 time (&now);
627 if (last_locate == 0)
628 locate_pass_prob = 100.0;
629 else
630 locate_pass_prob += difftime (now, last_locate) / (1.0 * EMINUTES);
632 last_locate = now;
634 if (locate_pass_prob > 100.0)
635 locate_pass_prob = 100.0;
637 /* hmm, should this be removed? */
638 if (locate_pass_prob < 0.01)
639 locate_pass_prob = 0.01;
641 n = 100.0 * rand() / (RAND_MAX + 1.0);
643 GT->DBGFN (GT, "locate_pass_prob=%f n=%f", locate_pass_prob, n);
644 passed = BOOL_EXPR (n < locate_pass_prob);
646 /* drop next chance of succeeding */
647 locate_pass_prob /= 2;
649 return passed;
650 }
652 BOOL gnutella_locate (Protocol *p, IFEvent *event, char *htype, char *hash)
653 {
654 GtSearch *search;
655 unsigned char *bin;
656 char *fname;
658 /* Only locate hashes which are valid on Gnutella. */
659 if (STRCMP (htype, "SHA1"))
660 return FALSE;
662 GT->DBGFN (GT, "new hash search: %s", hash);
664 /* sha1_bin() needs a string of at least 32 characters */
665 if (STRLEN (hash) < 32)
666 return FALSE;
668 /* skip the hash if it's not parseable in base32 */
669 if (!(bin = sha1_bin (hash)))
670 return FALSE;
672 free (bin);
674 /* rate-limit locate searches to save bandwidth */
675 if (should_send_locate () == FALSE)
676 {
677 GT->DBGFN (GT, "dropping locate for %s "
678 "(too many searches in short period)", hash);
679 return FALSE;
680 }
682 /* make sure the hash is uppercase (canonical form on Gnet) */
683 string_upper (hash);
685 /*
686 * Look for a download with this hash, to put those words in the query to
687 * reduce the bandwidth consumed through QRP.
688 */
689 if (!(fname = get_query_words (htype, hash)))
690 fname = STRDUP ("");
692 if (!(search = gt_search_new (event, fname, GT_SEARCH_HASH)))
693 {
694 free (fname);
695 return FALSE;
696 }
698 free (fname);
700 search->hash = STRDUP (hash);
702 gt_conn_foreach (GT_CONN_FOREACH(broadcast_search), search, GT_NODE_NONE,
703 GT_NODE_CONNECTED, 0);
705 return TRUE;
706 }
708 void gnutella_search_cancel (Protocol *p, IFEvent *event)
709 {
710 gt_search_disable (event);
711 }
713 /*****************************************************************************/
715 void gt_search_init (void)
716 {
717 /* nothing */
718 }
720 BOOL rm_search (GtSearch *search, void *udata)
721 {
722 gt_search_free (search);
724 /* return FALSE here because gt_search_free() removes search from list */
725 return FALSE;
726 }
728 void gt_search_cleanup (void)
729 {
730 list_foreach_remove (active_searches, (ListForeachFunc)rm_search, NULL);
731 assert (active_searches == NULL);
732 }