paulo@0: /* paulo@0: * $Id: query.c,v 1.10 2004/06/04 15:44:59 hipnod Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #include "gt_gnutella.h" paulo@0: #include "message/msg_handler.h" paulo@0: paulo@0: #include "sha1.h" paulo@0: #include "xml.h" paulo@0: paulo@0: #include "gt_share.h" paulo@0: #include "gt_share_file.h" paulo@0: #include "gt_share_state.h" paulo@0: paulo@0: #include "gt_search.h" paulo@0: #include "gt_search_exec.h" paulo@0: #include "gt_urn.h" paulo@0: paulo@0: #include "transfer/push_proxy.h" paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: #define LOG_RESULT_PACKETS gt_config_get_int("search/log_result_packets=0") paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: typedef struct gt_search_reply paulo@0: { paulo@0: uint8_t ttl; paulo@0: uint8_t results; /* number of results on the current packet */ paulo@0: GtPacket *packet; /* the current packet to stack results on */ paulo@0: gt_guid_t *guid; paulo@0: } gt_search_reply_t; paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: /* cache of recent queries TODO: flush this on plugin unload */ paulo@0: static Dataset *query_cache = NULL; paulo@0: paulo@0: /* flushes the old cache entries */ paulo@0: static timer_id flush_timer = 0; paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static BOOL is_printable (const char *s) paulo@0: { paulo@0: while (*s) paulo@0: { paulo@0: if (!isprint (*s)) paulo@0: return FALSE; paulo@0: paulo@0: s++; paulo@0: } paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static void parse_text_meta (const char *data, Dataset **meta) paulo@0: { paulo@0: int rate, freq, min, sec; paulo@0: int n; paulo@0: char *lower; paulo@0: paulo@0: if (!data) paulo@0: return; paulo@0: paulo@0: /* only ASCII strings are plaintext metadata */ paulo@0: if (!is_printable (data)) paulo@0: return; paulo@0: paulo@0: /* skip strings that start with "urn:", we know what those are */ paulo@0: if (!strncasecmp (data, "urn:", 4)) paulo@0: return; paulo@0: paulo@0: if (!(lower = STRDUP (data))) paulo@0: return; paulo@0: paulo@0: string_lower (lower); paulo@0: n = sscanf (lower, "%d kbps %d khz %d:%d", &rate, &freq, &min, &sec); paulo@0: paulo@0: /* try again with a slightly different format if it failed */ paulo@0: if (n != 4) paulo@0: n = sscanf (lower, "%d kbps(vbr) %d khz %d:%d", &rate, &freq, &min, &sec); paulo@0: paulo@0: free (lower); paulo@0: paulo@0: if (n != 4) paulo@0: { paulo@0: #if 0 paulo@0: static int warned = 0; paulo@0: paulo@0: if (warned++ < 4) paulo@0: GT->DBGFN (GT, "unknown plaintext metadata?: %s", data); paulo@0: #endif paulo@0: paulo@0: return; paulo@0: } paulo@0: paulo@0: /* XXX: actually this should be META_DEBUG */ paulo@0: if (XML_DEBUG) paulo@0: GT->DBGFN (GT, "parsed %d kbps %d khz %d:%d", rate, freq, min, sec); paulo@0: paulo@0: dataset_insertstr (meta, "bitrate", stringf ("%li", rate * 1000)); paulo@0: dataset_insertstr (meta, "frequency", stringf ("%u", freq * 1000)); paulo@0: dataset_insertstr (meta, "duration", stringf ("%i", min * 60 + sec)); paulo@0: } paulo@0: paulo@0: void gt_parse_extended_data (char *ext_block, gt_urn_t **r_urn, paulo@0: Dataset **r_meta) paulo@0: { paulo@0: gt_urn_t *urn = NULL; paulo@0: char *ext; paulo@0: paulo@0: if (r_urn) paulo@0: *r_urn = NULL; paulo@0: if (r_meta) paulo@0: *r_meta = NULL; paulo@0: paulo@0: if (!ext_block) paulo@0: return; paulo@0: paulo@0: /* paulo@0: * 0x1c is the separator character for so-called "GEM" paulo@0: * (Gnutella-Extension Mechanism) extensions. paulo@0: */ paulo@0: while ((ext = string_sep (&ext_block, "\x1c"))) paulo@0: { paulo@0: if (string_isempty (ext)) paulo@0: break; paulo@0: paulo@0: if (r_urn && (urn = gt_urn_parse (ext))) paulo@0: { paulo@0: free (*r_urn); paulo@0: *r_urn = urn; paulo@0: } paulo@0: paulo@0: if (r_meta) paulo@0: { paulo@0: parse_text_meta (ext, r_meta); paulo@0: gt_xml_parse (ext, r_meta); paulo@0: } paulo@0: } paulo@0: } paulo@0: paulo@0: static BOOL append_result (GtPacket *packet, FileShare *file) paulo@0: { paulo@0: GtShare *share; paulo@0: Hash *hash; paulo@0: paulo@0: if (!(share = share_get_udata (file, GT->name))) paulo@0: return FALSE; paulo@0: paulo@0: /* search results paulo@0: * format: */ paulo@0: gt_packet_put_uint32 (packet, share->index); paulo@0: gt_packet_put_uint32 (packet, file->size); paulo@0: gt_packet_put_str (packet, share->filename); paulo@0: paulo@0: /* paulo@0: * This is the information that goes "between the nulls" in a paulo@0: * query hit. The first null comes after the filename. paulo@0: * paulo@0: * This is a bit specific and icky. It should be abstracted away. paulo@0: */ paulo@0: if ((hash = share_get_hash (file, "SHA1"))) paulo@0: { paulo@0: char *sha1; paulo@0: paulo@0: assert (hash->len == SHA1_BINSIZE); paulo@0: paulo@0: if ((sha1 = sha1_string (hash->data))) paulo@0: { paulo@0: char buf[128]; paulo@0: int len; paulo@0: paulo@0: /* make the hash be uppercase */ paulo@0: string_upper (sha1); paulo@0: paulo@0: len = strlen (sha1); paulo@0: assert (len == SHA1_STRLEN); paulo@0: paulo@0: snprintf (buf, sizeof (buf) - 1, "urn:sha1:%s", sha1); paulo@0: len += strlen ("urn:sha1:"); paulo@0: paulo@0: gt_packet_put_ustr (packet, buf, len); paulo@0: free (sha1); paulo@0: } paulo@0: } paulo@0: paulo@0: /* put the second null there */ paulo@0: gt_packet_put_uint8 (packet, 0); paulo@0: paulo@0: if (gt_packet_error (packet)) paulo@0: { paulo@0: gt_packet_free (packet); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: /* add a trailer to the packets */ paulo@0: static void transmit_results (TCPC *c, GtPacket *packet, uint8_t hits) paulo@0: { paulo@0: gt_eqhd1_t eqhd1 = EQHD1_EMPTY; paulo@0: gt_eqhd2_t eqhd2 = EQHD2_EMPTY; paulo@0: uint8_t *ggep; paulo@0: size_t ggep_len; paulo@0: paulo@0: /* set the push bit as significant */ paulo@0: eqhd2 |= EQHD2_HAS_PUSH; paulo@0: /* set the busy bit as significant */ paulo@0: eqhd1 |= EQHD1_HAS_BUSY; paulo@0: paulo@0: /* paulo@0: * We shouldnt mark ourselves firewalled if the destination is paulo@0: * a local ip address and ttl == 1. However, for greater TTLs, paulo@0: * there's no knowing if we should mark it or not... paulo@0: */ paulo@0: if (GT_SELF->firewalled) paulo@0: eqhd1 |= EQHD1_PUSH_FLAG; paulo@0: paulo@0: if (upload_availability () == 0) paulo@0: eqhd2 |= EQHD2_BUSY_FLAG; paulo@0: paulo@0: /* add the query hit descriptor paulo@0: * */ paulo@0: gt_packet_put_ustr (packet, (const unsigned char *)"GIFT", 4); paulo@0: gt_packet_put_uint8 (packet, 2); paulo@0: gt_packet_put_uint8 (packet, eqhd1); paulo@0: gt_packet_put_uint8 (packet, eqhd2); paulo@0: paulo@0: /* append GGEP block (only contains PUSH proxies for now) */ paulo@0: if (gt_push_proxy_get_ggep_block (&ggep, &ggep_len)) paulo@0: gt_packet_put_ustr (packet, ggep, ggep_len); paulo@0: paulo@0: /* client identifier */ paulo@0: gt_packet_put_ustr (packet, GT_SELF_GUID, 16); paulo@0: paulo@0: if (gt_packet_error (packet)) paulo@0: { paulo@0: gt_packet_free (packet); paulo@0: return; paulo@0: } paulo@0: paulo@0: #if 0 paulo@0: GT->DBGFN (GT, "packet before twiddling result number: (will twiddle %i)", hits); paulo@0: TRACE_MEM (packet->data, packet->len); paulo@0: #endif paulo@0: paulo@0: /* rewind the packet to the search hit count and replace the hitcount paulo@0: * it is the first byte after the header paulo@0: * XXX: this should use a facility of gt_packet */ paulo@0: packet->data[GNUTELLA_HDR_LEN] = hits; paulo@0: paulo@0: #if 0 paulo@0: GT->DBGFN (GT, "packet after twiddling:"); paulo@0: TRACE_MEM (packet->data, packet->len); paulo@0: #endif paulo@0: paulo@0: if (LOG_RESULT_PACKETS) paulo@0: GT->dbg (GT, "transmitting %i", hits); paulo@0: paulo@0: /* send the reply along the path to the node that queried us */ paulo@0: gt_packet_send (c, packet); paulo@0: gt_packet_free (packet); paulo@0: } paulo@0: paulo@0: static BOOL query_request_result (TCPC *c, FileShare *file, paulo@0: gt_search_reply_t *reply) paulo@0: { paulo@0: GtPacket *packet; paulo@0: paulo@0: if (!file) paulo@0: { paulo@0: /* send any remaining data */ paulo@0: if (reply->packet) paulo@0: transmit_results (c, reply->packet, reply->results); paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: packet = reply->packet; paulo@0: paulo@0: if (packet) paulo@0: { paulo@0: /* send the packet if the max results per packet is reached paulo@0: * or the size of the packet is large */ paulo@0: if (reply->results == 255 || gt_packet_payload_len (packet) > 2000) paulo@0: { paulo@0: transmit_results (c, packet, reply->results); paulo@0: paulo@0: reply->packet = NULL; paulo@0: reply->results = 0; paulo@0: paulo@0: /* handle this item again */ paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: if (append_result (packet, file)) paulo@0: reply->results++; paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: /* allocate a packet */ paulo@0: if (!(packet = gt_packet_new (GT_MSG_QUERY_REPLY, reply->ttl, reply->guid))) paulo@0: { paulo@0: GIFT_ERROR (("mem failure?")); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: /* setup the search header */ paulo@0: gt_packet_put_uint8 (packet, 0); paulo@0: gt_packet_put_port (packet, GT_SELF->gt_port); paulo@0: gt_packet_put_ip (packet, GT_NODE(c)->my_ip); paulo@0: gt_packet_put_uint32 (packet, 0); /* speed (kbits) */ paulo@0: paulo@0: if (gt_packet_error (packet)) paulo@0: { paulo@0: GIFT_ERROR (("failed seting up search result packet")); paulo@0: gt_packet_free (packet); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: reply->packet = packet; paulo@0: paulo@0: /* handle this item again */ paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static BOOL query_request_result_free (TCPC *c, FileShare *file, paulo@0: gt_search_reply_t *reply) paulo@0: { paulo@0: GtShare *share; paulo@0: paulo@0: if (!file) paulo@0: { paulo@0: free (reply->guid); paulo@0: free (reply); paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: /* just a sanity check */ paulo@0: if (file && !(share = share_get_udata (file, GT->name))) paulo@0: return FALSE; paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: /* This emulates the old queue interface */ paulo@0: static BOOL send_result (FileShare *file, void **args) paulo@0: { paulo@0: TCPC *c = args[0]; paulo@0: gt_search_reply_t *reply = args[1]; paulo@0: paulo@0: while (query_request_result (c, file, reply)) paulo@0: ; paulo@0: paulo@0: query_request_result_free (c, file, reply); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: static void send_results (TCPC *c, List *results, gt_search_reply_t *reply) paulo@0: { paulo@0: void *args[2]; paulo@0: paulo@0: args[0] = c; paulo@0: args[1] = reply; paulo@0: paulo@0: results = list_foreach_remove (results, (ListForeachFunc)send_result, args); paulo@0: assert (results == NULL); paulo@0: paulo@0: query_request_result (c, NULL, reply); paulo@0: query_request_result_free (c, NULL, reply); paulo@0: } paulo@0: paulo@0: static int flush_old (ds_data_t *key, ds_data_t *value, time_t *now) paulo@0: { paulo@0: time_t *timestamp = value->data; paulo@0: paulo@0: if (*now - *timestamp >= 10 * EMINUTES) paulo@0: return DS_CONTINUE | DS_REMOVE; paulo@0: paulo@0: return DS_CONTINUE; paulo@0: } paulo@0: paulo@0: static BOOL flush_qcache (Dataset *cache) paulo@0: { paulo@0: time_t now = time (NULL); paulo@0: paulo@0: assert (query_cache != NULL); paulo@0: dataset_foreach_ex (query_cache, DS_FOREACH_EX(flush_old), &now); paulo@0: paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: /* TODO: need to break up this file soon to isolate these things */ paulo@0: static BOOL query_cache_lookup (gt_guid_t *guid) paulo@0: { paulo@0: time_t now; paulo@0: paulo@0: if (dataset_lookup (query_cache, guid, GT_GUID_LEN)) paulo@0: return TRUE; paulo@0: paulo@0: /* limit the maximum length the query cache can grow */ paulo@0: if (dataset_length (query_cache) >= 2000) paulo@0: return FALSE; paulo@0: paulo@0: /* add the guid for catching duplicates next time */ paulo@0: now = time (NULL); paulo@0: dataset_insert (&query_cache, guid, GT_GUID_LEN, &now, sizeof (now)); paulo@0: paulo@0: if (!flush_timer) paulo@0: { paulo@0: flush_timer = timer_add (5 * MINUTES, (TimerCallback)flush_qcache, paulo@0: NULL); paulo@0: } paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: GT_MSG_HANDLER(gt_msg_query) paulo@0: { paulo@0: char *query; paulo@0: char *extended; paulo@0: gt_guid_t *guid; paulo@0: gt_urn_t *urn; paulo@0: List *list; paulo@0: uint8_t ttl; paulo@0: uint8_t hops; paulo@0: unsigned char *hash; paulo@0: gt_query_flags_t flags; paulo@0: gt_search_type_t type; paulo@0: gt_search_reply_t *reply; paulo@0: paulo@0: flags = gt_packet_get_uint16 (packet); paulo@0: query = gt_packet_get_str (packet); paulo@0: extended = gt_packet_get_str (packet); paulo@0: paulo@0: guid = gt_packet_guid (packet); paulo@0: paulo@0: /* paulo@0: * TODO: node->share_state can be null here, if the node hasn't paulo@0: * successfully handshaked yet. Should fix this by storing messages until paulo@0: * handshake is complete. paulo@0: */ paulo@0: if (node->share_state && node->share_state->hidden) paulo@0: return; paulo@0: paulo@0: /* don't reply if the host is firewalled and we are too */ paulo@0: if ((flags & QF_HAS_FLAGS) && (flags & QF_ONLY_NON_FW) && paulo@0: GT_SELF->firewalled) paulo@0: { paulo@0: return; paulo@0: } paulo@0: paulo@0: /* don't reply if this is our own search -- TODO: substitute a paulo@0: * full-fledged routing table */ paulo@0: if (gt_search_find (guid)) paulo@0: { paulo@0: if (MSG_DEBUG) paulo@0: { paulo@0: GT->dbg (GT, "not searching, own search (guid %s)", paulo@0: gt_guid_str (guid)); paulo@0: } paulo@0: paulo@0: return; paulo@0: } paulo@0: paulo@0: /* check if we've handled this search already */ paulo@0: if (query_cache_lookup (guid)) paulo@0: { paulo@0: if (MSG_DEBUG) paulo@0: GT->DBGSOCK (GT, c, "duplicate search (%s)", gt_guid_str (guid)); paulo@0: paulo@0: return; paulo@0: } paulo@0: paulo@0: gt_parse_extended_data (extended, &urn, NULL); paulo@0: paulo@0: /* WARNING: this assumes sha1 */ paulo@0: hash = gt_urn_data (urn); paulo@0: paulo@0: if (hash) paulo@0: type = GT_SEARCH_HASH; paulo@0: else paulo@0: type = GT_SEARCH_KEYWORD; paulo@0: paulo@0: #if 0 paulo@0: GT->DBGFN (GT, "min_speed = %hu, query = '%s', extended data = '%s'", paulo@0: min_speed, query, extended); paulo@0: #endif paulo@0: paulo@0: ttl = gt_packet_ttl (packet); paulo@0: hops = gt_packet_hops (packet); paulo@0: paulo@0: list = gt_search_exec (query, type, urn, ttl, hops); paulo@0: free (urn); paulo@0: paulo@0: if (!list) paulo@0: return; paulo@0: paulo@0: if (!(reply = MALLOC (sizeof (gt_search_reply_t)))) paulo@0: { paulo@0: list_free (list); paulo@0: return; paulo@0: } paulo@0: paulo@0: /* set the ttl of the reply to be +1 the hops the request travelled */ paulo@0: reply->ttl = gt_packet_hops (packet) + 1; paulo@0: paulo@0: /* use the guid of the packet in replying to results */ paulo@0: reply->guid = gt_guid_dup (guid); paulo@0: paulo@0: send_results (c, list, reply); paulo@0: }