diff src/message/query.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/message/query.c	Sat Feb 20 21:18:28 2010 -0800
     1.3 @@ -0,0 +1,529 @@
     1.4 +/*
     1.5 + * $Id: query.c,v 1.10 2004/06/04 15:44:59 hipnod Exp $
     1.6 + *
     1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
     1.8 + *
     1.9 + * This program is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License as published by the
    1.11 + * Free Software Foundation; either version 2, or (at your option) any
    1.12 + * later version.
    1.13 + *
    1.14 + * This program is distributed in the hope that it will be useful, but
    1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of
    1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.17 + * General Public License for more details.
    1.18 + */
    1.19 +
    1.20 +#include "gt_gnutella.h"
    1.21 +#include "message/msg_handler.h"
    1.22 +
    1.23 +#include "sha1.h"
    1.24 +#include "xml.h"
    1.25 +
    1.26 +#include "gt_share.h"
    1.27 +#include "gt_share_file.h"
    1.28 +#include "gt_share_state.h"
    1.29 +
    1.30 +#include "gt_search.h"
    1.31 +#include "gt_search_exec.h"
    1.32 +#include "gt_urn.h"
    1.33 +
    1.34 +#include "transfer/push_proxy.h"
    1.35 +
    1.36 +/*****************************************************************************/
    1.37 +
    1.38 +#define LOG_RESULT_PACKETS    gt_config_get_int("search/log_result_packets=0")
    1.39 +
    1.40 +/*****************************************************************************/
    1.41 +
    1.42 +typedef struct gt_search_reply
    1.43 +{
    1.44 +	uint8_t     ttl;
    1.45 +	uint8_t     results;  /* number of results on the current packet */
    1.46 +	GtPacket   *packet;   /* the current packet to stack results on */
    1.47 +	gt_guid_t  *guid;
    1.48 +} gt_search_reply_t;
    1.49 +
    1.50 +/*****************************************************************************/
    1.51 +
    1.52 +/* cache of recent queries TODO: flush this on plugin unload */
    1.53 +static Dataset    *query_cache    = NULL;
    1.54 +
    1.55 +/* flushes the old cache entries */
    1.56 +static timer_id    flush_timer    = 0;
    1.57 +
    1.58 +/*****************************************************************************/
    1.59 +
    1.60 +static BOOL is_printable (const char *s)
    1.61 +{
    1.62 +	while (*s)
    1.63 +	{
    1.64 +		if (!isprint (*s))
    1.65 +			return FALSE;
    1.66 +
    1.67 +		s++;
    1.68 +	}
    1.69 +
    1.70 +	return TRUE;
    1.71 +}
    1.72 +
    1.73 +static void parse_text_meta (const char *data, Dataset **meta)
    1.74 +{
    1.75 +	int      rate, freq, min, sec;
    1.76 +	int      n;
    1.77 +	char    *lower;
    1.78 +
    1.79 +	if (!data)
    1.80 +		return;
    1.81 +
    1.82 +	/* only ASCII strings are plaintext metadata */
    1.83 +	if (!is_printable (data))
    1.84 +		return;
    1.85 +
    1.86 +	/* skip strings that start with "urn:", we know what those are */
    1.87 +	if (!strncasecmp (data, "urn:", 4))
    1.88 +		return;
    1.89 +
    1.90 +	if (!(lower = STRDUP (data)))
    1.91 +		return;
    1.92 +
    1.93 +	string_lower (lower);
    1.94 +	n = sscanf (lower, "%d kbps %d khz %d:%d", &rate, &freq, &min, &sec);
    1.95 +
    1.96 +	/* try again with a slightly different format if it failed */
    1.97 +	if (n != 4)
    1.98 +		n = sscanf (lower, "%d kbps(vbr) %d khz %d:%d", &rate, &freq, &min, &sec);
    1.99 +
   1.100 +	free (lower);
   1.101 +
   1.102 +	if (n != 4)
   1.103 +	{
   1.104 +#if 0
   1.105 +		static int warned = 0;
   1.106 +
   1.107 +		if (warned++ < 4)
   1.108 +			GT->DBGFN (GT, "unknown plaintext metadata?: %s", data);
   1.109 +#endif
   1.110 +
   1.111 +		return;
   1.112 +	}
   1.113 +
   1.114 +	/* XXX: actually this should be META_DEBUG */
   1.115 +	if (XML_DEBUG)
   1.116 +		GT->DBGFN (GT, "parsed %d kbps %d khz %d:%d", rate, freq, min, sec);
   1.117 +
   1.118 +	dataset_insertstr (meta, "bitrate",   stringf ("%li", rate * 1000));
   1.119 +	dataset_insertstr (meta, "frequency", stringf ("%u", freq * 1000));
   1.120 +	dataset_insertstr (meta, "duration",  stringf ("%i", min * 60 + sec));
   1.121 +}
   1.122 +
   1.123 +void gt_parse_extended_data (char *ext_block, gt_urn_t **r_urn,
   1.124 +                             Dataset **r_meta)
   1.125 +{
   1.126 +	gt_urn_t  *urn = NULL;
   1.127 +	char      *ext;
   1.128 +
   1.129 +	if (r_urn)
   1.130 +		*r_urn = NULL;
   1.131 +	if (r_meta)
   1.132 +		*r_meta = NULL;
   1.133 +
   1.134 +	if (!ext_block)
   1.135 +		return;
   1.136 +
   1.137 +	/*
   1.138 +	 * 0x1c is the separator character for so-called "GEM"
   1.139 +	 * (Gnutella-Extension Mechanism) extensions.
   1.140 +	 */
   1.141 +	while ((ext = string_sep (&ext_block, "\x1c")))
   1.142 +	{
   1.143 +		if (string_isempty (ext))
   1.144 +			break;
   1.145 +
   1.146 +		if (r_urn && (urn = gt_urn_parse (ext)))
   1.147 +		{
   1.148 +			free (*r_urn);
   1.149 +			*r_urn = urn;
   1.150 +		}
   1.151 +
   1.152 +		if (r_meta)
   1.153 +		{
   1.154 +			parse_text_meta (ext, r_meta);
   1.155 +			gt_xml_parse (ext, r_meta);
   1.156 +		}
   1.157 +	}
   1.158 +}
   1.159 +
   1.160 +static BOOL append_result (GtPacket *packet, FileShare *file)
   1.161 +{
   1.162 +	GtShare    *share;
   1.163 +	Hash       *hash;
   1.164 +
   1.165 +	if (!(share = share_get_udata (file, GT->name)))
   1.166 +		return FALSE;
   1.167 +
   1.168 +	/* search results
   1.169 +	 * format: <index#> <file size> <file name> <extra data(include hash)> */
   1.170 +	gt_packet_put_uint32 (packet, share->index);
   1.171 +	gt_packet_put_uint32 (packet, file->size);
   1.172 +	gt_packet_put_str    (packet, share->filename);
   1.173 +
   1.174 +	/*
   1.175 +	 * This is the information that goes "between the nulls" in a
   1.176 +	 * query hit. The first null comes after the filename.
   1.177 +	 *
   1.178 +	 * This is a bit specific and icky. It should be abstracted away.
   1.179 +	 */
   1.180 +	if ((hash = share_get_hash (file, "SHA1")))
   1.181 +	{
   1.182 +		char *sha1;
   1.183 +
   1.184 +		assert (hash->len == SHA1_BINSIZE);
   1.185 +
   1.186 +		if ((sha1 = sha1_string (hash->data)))
   1.187 +		{
   1.188 +			char  buf[128];
   1.189 +			int   len;
   1.190 +
   1.191 +			/* make the hash be uppercase */
   1.192 +			string_upper (sha1);
   1.193 +
   1.194 +			len = strlen (sha1);
   1.195 +			assert (len == SHA1_STRLEN);
   1.196 +
   1.197 +			snprintf (buf, sizeof (buf) - 1, "urn:sha1:%s", sha1);
   1.198 +			len += strlen ("urn:sha1:");
   1.199 +
   1.200 +			gt_packet_put_ustr (packet, buf, len);
   1.201 +			free (sha1);
   1.202 +		}
   1.203 +	}
   1.204 +
   1.205 +	/* put the second null there */
   1.206 +	gt_packet_put_uint8 (packet, 0);
   1.207 +
   1.208 +	if (gt_packet_error (packet))
   1.209 +	{
   1.210 +		gt_packet_free (packet);
   1.211 +		return FALSE;
   1.212 +	}
   1.213 +
   1.214 +	return TRUE;
   1.215 +}
   1.216 +
   1.217 +/* add a trailer to the packets */
   1.218 +static void transmit_results (TCPC *c, GtPacket *packet, uint8_t hits)
   1.219 +{
   1.220 +	gt_eqhd1_t eqhd1 = EQHD1_EMPTY;
   1.221 +	gt_eqhd2_t eqhd2 = EQHD2_EMPTY;
   1.222 +	uint8_t   *ggep;
   1.223 +	size_t     ggep_len;
   1.224 +
   1.225 +	/* set the push bit as significant */
   1.226 +	eqhd2 |= EQHD2_HAS_PUSH;
   1.227 +	/* set the busy bit as significant */
   1.228 + 	eqhd1 |= EQHD1_HAS_BUSY;
   1.229 +
   1.230 +	/*
   1.231 +	 * We shouldnt mark ourselves firewalled if the destination is
   1.232 +	 * a local ip address and ttl == 1. However, for greater TTLs,
   1.233 +	 * there's no knowing if we should mark it or not...
   1.234 +	 */
   1.235 +	if (GT_SELF->firewalled)
   1.236 +		eqhd1 |= EQHD1_PUSH_FLAG;
   1.237 +
   1.238 +	if (upload_availability () == 0)
   1.239 +		eqhd2 |= EQHD2_BUSY_FLAG;
   1.240 +
   1.241 +	/* add the query hit descriptor
   1.242 +	 * <vendor id> <length> <qhd_data1> <qhd_data2> <private_data> */
   1.243 +	gt_packet_put_ustr   (packet, (const unsigned char *)"GIFT", 4);
   1.244 +	gt_packet_put_uint8  (packet, 2);
   1.245 +	gt_packet_put_uint8  (packet, eqhd1);
   1.246 +	gt_packet_put_uint8  (packet, eqhd2);
   1.247 +
   1.248 +	/* append GGEP block (only contains PUSH proxies for now) */
   1.249 +	if (gt_push_proxy_get_ggep_block (&ggep, &ggep_len))
   1.250 +	    gt_packet_put_ustr (packet, ggep, ggep_len);
   1.251 +
   1.252 +	/* client identifier */
   1.253 +	gt_packet_put_ustr (packet, GT_SELF_GUID, 16);
   1.254 +
   1.255 +	if (gt_packet_error (packet))
   1.256 +	{
   1.257 +		gt_packet_free (packet);
   1.258 +		return;
   1.259 +	}
   1.260 +
   1.261 +#if 0
   1.262 +	GT->DBGFN (GT, "packet before twiddling result number: (will twiddle %i)", hits);
   1.263 +	TRACE_MEM (packet->data, packet->len);
   1.264 +#endif
   1.265 +
   1.266 +	/* rewind the packet to the search hit count and replace the hitcount
   1.267 +	 * it is the first byte after the header
   1.268 +	 * XXX: this should use a facility of gt_packet */
   1.269 +	packet->data[GNUTELLA_HDR_LEN] = hits;
   1.270 +
   1.271 +#if 0
   1.272 +	GT->DBGFN (GT, "packet after twiddling:");
   1.273 +	TRACE_MEM (packet->data, packet->len);
   1.274 +#endif
   1.275 +
   1.276 +	if (LOG_RESULT_PACKETS)
   1.277 +		GT->dbg (GT, "transmitting %i", hits);
   1.278 +
   1.279 +	/* send the reply along the path to the node that queried us */
   1.280 +	gt_packet_send (c, packet);
   1.281 +	gt_packet_free (packet);
   1.282 +}
   1.283 +
   1.284 +static BOOL query_request_result (TCPC *c, FileShare *file,
   1.285 +                                  gt_search_reply_t *reply)
   1.286 +{
   1.287 +	GtPacket *packet;
   1.288 +
   1.289 +	if (!file)
   1.290 +	{
   1.291 +		/* send any remaining data */
   1.292 +		if (reply->packet)
   1.293 +			transmit_results (c, reply->packet, reply->results);
   1.294 +
   1.295 +		return FALSE;
   1.296 +	}
   1.297 +
   1.298 +	packet = reply->packet;
   1.299 +
   1.300 +	if (packet)
   1.301 +	{
   1.302 +		/* send the packet if the max results per packet is reached
   1.303 +		 * or the size of the packet is large */
   1.304 +		if (reply->results == 255 || gt_packet_payload_len (packet) > 2000)
   1.305 +		{
   1.306 +			transmit_results (c, packet, reply->results);
   1.307 +
   1.308 +			reply->packet  = NULL;
   1.309 +			reply->results = 0;
   1.310 +
   1.311 +			/* handle this item again */
   1.312 +			return TRUE;
   1.313 +		}
   1.314 +
   1.315 +		if (append_result (packet, file))
   1.316 +			reply->results++;
   1.317 +
   1.318 +		return FALSE;
   1.319 +	}
   1.320 +
   1.321 +	/* allocate a packet */
   1.322 +	if (!(packet = gt_packet_new (GT_MSG_QUERY_REPLY, reply->ttl, reply->guid)))
   1.323 +	{
   1.324 +		GIFT_ERROR (("mem failure?"));
   1.325 +		return FALSE;
   1.326 +	}
   1.327 +
   1.328 +	/* setup the search header */
   1.329 +	gt_packet_put_uint8  (packet, 0);
   1.330 +	gt_packet_put_port   (packet, GT_SELF->gt_port);
   1.331 +	gt_packet_put_ip     (packet, GT_NODE(c)->my_ip);
   1.332 +	gt_packet_put_uint32 (packet, 0); /* speed (kbits) */
   1.333 +
   1.334 +	if (gt_packet_error (packet))
   1.335 +	{
   1.336 +		GIFT_ERROR (("failed seting up search result packet"));
   1.337 +		gt_packet_free (packet);
   1.338 +		return FALSE;
   1.339 +	}
   1.340 +
   1.341 +	reply->packet = packet;
   1.342 +
   1.343 +	/* handle this item again */
   1.344 +	return TRUE;
   1.345 +}
   1.346 +
   1.347 +static BOOL query_request_result_free (TCPC *c, FileShare *file,
   1.348 +                                       gt_search_reply_t *reply)
   1.349 +{
   1.350 +	GtShare *share;
   1.351 +
   1.352 +	if (!file)
   1.353 +	{
   1.354 +		free (reply->guid);
   1.355 +		free (reply);
   1.356 +		return FALSE;
   1.357 +	}
   1.358 +
   1.359 +	/* just a sanity check */
   1.360 +	if (file && !(share = share_get_udata (file, GT->name)))
   1.361 +		return FALSE;
   1.362 +
   1.363 +	return FALSE;
   1.364 +}
   1.365 +
   1.366 +/* This emulates the old queue interface */
   1.367 +static BOOL send_result (FileShare *file, void **args)
   1.368 +{
   1.369 +	TCPC              *c     = args[0];
   1.370 +	gt_search_reply_t *reply = args[1];
   1.371 +
   1.372 +	while (query_request_result (c, file, reply))
   1.373 +		;
   1.374 +
   1.375 +	query_request_result_free (c, file, reply);
   1.376 +	return TRUE;
   1.377 +}
   1.378 +
   1.379 +static void send_results (TCPC *c, List *results, gt_search_reply_t *reply)
   1.380 +{
   1.381 +	void *args[2];
   1.382 +
   1.383 +	args[0] = c;
   1.384 +	args[1] = reply;
   1.385 +
   1.386 +	results = list_foreach_remove (results, (ListForeachFunc)send_result, args);
   1.387 +	assert (results == NULL);
   1.388 +
   1.389 +	query_request_result (c, NULL, reply);
   1.390 +	query_request_result_free (c, NULL, reply);
   1.391 +}
   1.392 +
   1.393 +static int flush_old (ds_data_t *key, ds_data_t *value, time_t *now)
   1.394 +{
   1.395 +	time_t *timestamp = value->data;
   1.396 +
   1.397 +	if (*now - *timestamp >= 10 * EMINUTES)
   1.398 +		return DS_CONTINUE | DS_REMOVE;
   1.399 +
   1.400 +	return DS_CONTINUE;
   1.401 +}
   1.402 +
   1.403 +static BOOL flush_qcache (Dataset *cache)
   1.404 +{
   1.405 +	time_t now = time (NULL);
   1.406 +
   1.407 +	assert (query_cache != NULL);
   1.408 +	dataset_foreach_ex (query_cache, DS_FOREACH_EX(flush_old), &now);
   1.409 +
   1.410 +	return TRUE;
   1.411 +}
   1.412 +
   1.413 +/* TODO: need to break up this file soon to isolate these things */
   1.414 +static BOOL query_cache_lookup (gt_guid_t *guid)
   1.415 +{
   1.416 +	time_t now;
   1.417 +
   1.418 +	if (dataset_lookup (query_cache, guid, GT_GUID_LEN))
   1.419 +		return TRUE;
   1.420 +
   1.421 +	/* limit the maximum length the query cache can grow */
   1.422 +	if (dataset_length (query_cache) >= 2000)
   1.423 +		return FALSE;
   1.424 +
   1.425 +	/* add the guid for catching duplicates next time */
   1.426 +	now = time (NULL);
   1.427 +	dataset_insert (&query_cache, guid, GT_GUID_LEN, &now, sizeof (now));
   1.428 +
   1.429 +	if (!flush_timer)
   1.430 +	{
   1.431 +		flush_timer = timer_add (5 * MINUTES, (TimerCallback)flush_qcache,
   1.432 +		                         NULL);
   1.433 +	}
   1.434 +
   1.435 +	return FALSE;
   1.436 +}
   1.437 +
   1.438 +GT_MSG_HANDLER(gt_msg_query)
   1.439 +{
   1.440 +	char              *query;
   1.441 +	char              *extended;
   1.442 +	gt_guid_t         *guid;
   1.443 +	gt_urn_t          *urn;
   1.444 +	List              *list;
   1.445 +	uint8_t            ttl;
   1.446 +	uint8_t            hops;
   1.447 +	unsigned char     *hash;
   1.448 +	gt_query_flags_t   flags;
   1.449 +	gt_search_type_t   type;
   1.450 +	gt_search_reply_t *reply;
   1.451 +
   1.452 +	flags     = gt_packet_get_uint16 (packet);
   1.453 +	query     = gt_packet_get_str    (packet);
   1.454 +	extended  = gt_packet_get_str    (packet);
   1.455 +
   1.456 +	guid = gt_packet_guid (packet);
   1.457 +
   1.458 +	/*
   1.459 +	 * TODO: node->share_state can be null here, if the node hasn't
   1.460 +	 * successfully handshaked yet.  Should fix this by storing messages until
   1.461 +	 * handshake is complete.
   1.462 +	 */
   1.463 +	if (node->share_state && node->share_state->hidden)
   1.464 +		return;
   1.465 +
   1.466 +	/* don't reply if the host is firewalled and we are too */
   1.467 +	if ((flags & QF_HAS_FLAGS) && (flags & QF_ONLY_NON_FW) &&
   1.468 +	    GT_SELF->firewalled)
   1.469 +	{
   1.470 +		return;
   1.471 +	}
   1.472 +
   1.473 +	/* don't reply if this is our own search -- TODO: substitute a
   1.474 +	 * full-fledged routing table */
   1.475 +	if (gt_search_find (guid))
   1.476 +	{
   1.477 +		if (MSG_DEBUG)
   1.478 +		{
   1.479 +			GT->dbg (GT, "not searching, own search (guid %s)",
   1.480 +			         gt_guid_str (guid));
   1.481 +		}
   1.482 +
   1.483 +		return;
   1.484 +	}
   1.485 +
   1.486 +	/* check if we've handled this search already */
   1.487 +	if (query_cache_lookup (guid))
   1.488 +	{
   1.489 +		if (MSG_DEBUG)
   1.490 +			GT->DBGSOCK (GT, c, "duplicate search (%s)", gt_guid_str (guid));
   1.491 +
   1.492 +		return;
   1.493 +	}
   1.494 +
   1.495 +	gt_parse_extended_data (extended, &urn, NULL);
   1.496 +
   1.497 +	/* WARNING: this assumes sha1 */
   1.498 +	hash = gt_urn_data (urn);
   1.499 +
   1.500 +	if (hash)
   1.501 +		type = GT_SEARCH_HASH;
   1.502 +	else
   1.503 +		type = GT_SEARCH_KEYWORD;
   1.504 +
   1.505 +#if 0
   1.506 +	GT->DBGFN (GT, "min_speed = %hu, query = '%s', extended data = '%s'",
   1.507 +	           min_speed, query, extended);
   1.508 +#endif
   1.509 +
   1.510 +	ttl  = gt_packet_ttl  (packet);
   1.511 +	hops = gt_packet_hops (packet);
   1.512 +
   1.513 +	list = gt_search_exec (query, type, urn, ttl, hops);
   1.514 +	free (urn);
   1.515 +
   1.516 +	if (!list)
   1.517 +		return;
   1.518 +
   1.519 +	if (!(reply = MALLOC (sizeof (gt_search_reply_t))))
   1.520 +	{
   1.521 +		list_free (list);
   1.522 +		return;
   1.523 +	}
   1.524 +
   1.525 +	/* set the ttl of the reply to be +1 the hops the request travelled */
   1.526 +	reply->ttl = gt_packet_hops (packet) + 1;
   1.527 +
   1.528 +	/* use the guid of the packet in replying to results */
   1.529 +	reply->guid = gt_guid_dup (guid);
   1.530 +
   1.531 +	send_results (c, list, reply);
   1.532 +}