paulo@0: /* paulo@0: * $Id: gt_search_exec.c,v 1.23 2004/01/04 05:15:28 hipnod Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #include "gt_gnutella.h" paulo@0: paulo@0: #include "gt_search.h" paulo@0: #include "gt_search_exec.h" paulo@0: paulo@0: #include "gt_query_route.h" /* QRP_DELIMITERS */ paulo@0: #include "gt_share.h" /* gt_share_local_lookup_by_urn */ paulo@0: #include "gt_share_file.h" paulo@0: paulo@0: #include "sha1.h" /* sha1_string */ paulo@0: #include "trie.h" paulo@0: paulo@0: #include paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: #define MAX_RESULTS 200 paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: static Trie *gt_search_trie; paulo@0: paulo@0: static StopWatch *search_sw; paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: GtTokenSet *gt_token_set_new () paulo@0: { paulo@0: GtTokenSet *ts; paulo@0: paulo@0: if (!(ts = MALLOC (sizeof (GtTokenSet)))) paulo@0: return NULL; paulo@0: paulo@0: return ts; paulo@0: } paulo@0: paulo@0: void gt_token_set_free (GtTokenSet *ts) paulo@0: { paulo@0: if (!ts) paulo@0: return; paulo@0: paulo@0: free (ts->data); paulo@0: free (ts); paulo@0: } paulo@0: paulo@0: void gt_token_set_append (GtTokenSet *ts, uint32_t token) paulo@0: { paulo@0: if (ts->len >= ts->data_len) paulo@0: { paulo@0: uint32_t *new_tokens; paulo@0: paulo@0: ts->data_len += 8; paulo@0: new_tokens = realloc (ts->data, ts->data_len * sizeof (uint32_t)); paulo@0: paulo@0: assert (new_tokens != NULL); paulo@0: ts->data = new_tokens; paulo@0: } paulo@0: paulo@0: ts->data[ts->len++] = token; paulo@0: } paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: static List *by_hash (unsigned char *hash, size_t *n) paulo@0: { paulo@0: FileShare *file; paulo@0: char *str; paulo@0: char *urn; paulo@0: paulo@0: *n = 0; paulo@0: paulo@0: /* ugly that this is so sha1 specific:need a urn abstraction desperately */ paulo@0: if (!(str = sha1_string (hash))) paulo@0: return NULL; paulo@0: paulo@0: urn = stringf_dup ("urn:sha1:%s", str); paulo@0: free (str); paulo@0: paulo@0: if (!(file = gt_share_local_lookup_by_urn (urn))) paulo@0: { paulo@0: free (urn); paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: if (LOG_RESULTS) paulo@0: { paulo@0: GT->DBGFN (GT, "Wuh-HOO! Answered a query-by-hash (%s) for (%s)", paulo@0: urn, share_get_hpath (file)); paulo@0: } paulo@0: paulo@0: *n = 1; paulo@0: free (urn); paulo@0: paulo@0: return list_append (NULL, file); paulo@0: } paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: /* some defines for list_find_custom that work like DS_BREAK, etc. */ paulo@0: #define LIST_BREAK 0 paulo@0: #define LIST_CONTINUE 1 paulo@0: paulo@0: static int search_slowly (Share *file, void **cmp) paulo@0: { paulo@0: GtTokenSet *tokens = cmp[0]; paulo@0: List **results = cmp[1]; paulo@0: int *max_results = cmp[2]; paulo@0: int *count = cmp[3]; paulo@0: GtShare *share; paulo@0: int i, j; paulo@0: int matched; paulo@0: GtTokenSet *cmp_tokens; paulo@0: paulo@0: if (*count >= *max_results) paulo@0: return LIST_BREAK; paulo@0: paulo@0: if (!(share = share_get_udata (file, GT->name))) paulo@0: return LIST_CONTINUE; paulo@0: paulo@0: cmp_tokens = share->tokens; paulo@0: matched = 0; paulo@0: paulo@0: for (i = 0; i < tokens->len; i++) paulo@0: { paulo@0: int old_matched = matched; paulo@0: paulo@0: for (j = 0; j < cmp_tokens->len; j++) paulo@0: { paulo@0: if (tokens->data[i] == cmp_tokens->data[j]) paulo@0: { paulo@0: matched++; paulo@0: break; paulo@0: } paulo@0: } paulo@0: paulo@0: if (matched == old_matched) paulo@0: break; paulo@0: } paulo@0: paulo@0: if (matched >= tokens->len) paulo@0: { paulo@0: *results = list_prepend (*results, file); paulo@0: (*count)++; paulo@0: } paulo@0: paulo@0: return LIST_CONTINUE; paulo@0: } paulo@0: paulo@0: /* paulo@0: * Find the smallest list corresponding to a word in the query paulo@0: * that is in the trie of shares. paulo@0: */ paulo@0: static List *find_smallest (char *query) paulo@0: { paulo@0: char *str; paulo@0: char *str0; paulo@0: List *smallest = NULL; paulo@0: size_t smallest_size = 0; paulo@0: char *tok; paulo@0: size_t size; paulo@0: paulo@0: if (!(str = str0 = STRDUP (query))) paulo@0: return NULL; paulo@0: paulo@0: string_lower (str); paulo@0: paulo@0: while ((tok = string_sep_set (&str, QRP_DELIMITERS))) paulo@0: { paulo@0: List *list; paulo@0: paulo@0: if (string_isempty (tok)) paulo@0: continue; paulo@0: paulo@0: if (!(list = trie_lookup (gt_search_trie, tok))) paulo@0: { paulo@0: /* no shares have this token: therefore no match */ paulo@0: smallest = NULL; paulo@0: smallest_size = 0; paulo@0: break; paulo@0: } paulo@0: paulo@0: if ((size = list_length (list)) < smallest_size || smallest_size == 0) paulo@0: { paulo@0: /* keep this one */ paulo@0: smallest_size = size; paulo@0: smallest = list; paulo@0: } paulo@0: } paulo@0: paulo@0: free (str0); paulo@0: paulo@0: if (LOG_RESULTS) paulo@0: GT->DBGFN (GT, "scanning list of %d size", smallest_size); paulo@0: paulo@0: return smallest; paulo@0: } paulo@0: paulo@0: /* paulo@0: * Look up the list of all shares for each word in the query in the paulo@0: * Trie of all local shares. Find the keyword with the smallest paulo@0: * list, then iterate the list looking for shares. paulo@0: */ paulo@0: static List *by_keyword (char *query, size_t max_results, size_t *n) paulo@0: { paulo@0: GtTokenSet *tokens; paulo@0: List *results = NULL; paulo@0: List *smallest; paulo@0: void *cmp[4]; paulo@0: paulo@0: if (!query || string_isempty (query)) paulo@0: return NULL; paulo@0: paulo@0: if (!(tokens = gt_share_tokenize (query))) paulo@0: return NULL; paulo@0: paulo@0: cmp[0] = tokens; paulo@0: cmp[1] = &results; paulo@0: cmp[2] = &max_results; paulo@0: cmp[3] = n; paulo@0: paulo@0: smallest = find_smallest (query); paulo@0: list_find_custom (smallest, cmp, (ListForeachFunc)search_slowly); paulo@0: paulo@0: gt_token_set_free (tokens); paulo@0: return results; paulo@0: } paulo@0: paulo@0: List *gt_search_exec (char *query, gt_search_type_t type, void *extended, paulo@0: uint8_t ttl, uint8_t hops) paulo@0: { paulo@0: List *results; paulo@0: int max_res = MAX_RESULTS; paulo@0: double elapsed; paulo@0: unsigned char *hash; paulo@0: size_t n = 0; paulo@0: paulo@0: stopwatch_start (search_sw); paulo@0: paulo@0: hash = extended; paulo@0: paulo@0: switch (type) paulo@0: { paulo@0: case GT_SEARCH_KEYWORD: results = by_keyword (query, max_res, &n); break; paulo@0: case GT_SEARCH_HASH: results = by_hash (hash, &n); break; paulo@0: default: abort (); paulo@0: } paulo@0: paulo@0: stopwatch_stop (search_sw); paulo@0: elapsed = stopwatch_elapsed (search_sw, NULL); paulo@0: paulo@0: if (LOG_RESULTS) paulo@0: { paulo@0: GT->dbg (GT, "results: [%03d] [%d|%d] %.06fs (%s)", n, paulo@0: ttl, hops, elapsed, query); paulo@0: } paulo@0: paulo@0: return results; paulo@0: } paulo@0: paulo@0: /******************************************************************************/ paulo@0: paulo@0: static void add_share (Trie *trie, char *tok, Share *share) paulo@0: { paulo@0: List *list; paulo@0: paulo@0: list = trie_lookup (trie, tok); paulo@0: paulo@0: /* the share may already be in the list if the filename has paulo@0: * two words that are the same */ paulo@0: if (list_find (list, share)) paulo@0: return; paulo@0: paulo@0: list = list_prepend (list, share); paulo@0: paulo@0: trie_remove (trie, tok); paulo@0: trie_insert (trie, tok, list); paulo@0: } paulo@0: paulo@0: static void del_share (Trie *trie, char *tok, Share *share) paulo@0: { paulo@0: List *list; paulo@0: paulo@0: list = trie_lookup (trie, tok); paulo@0: list = list_remove (list, share); paulo@0: paulo@0: trie_remove (trie, tok); paulo@0: paulo@0: if (!list) paulo@0: return; paulo@0: paulo@0: trie_insert (trie, tok, list); paulo@0: } paulo@0: paulo@0: static void search_trie_change (Trie *trie, Share *share, BOOL add) paulo@0: { paulo@0: char *tok; paulo@0: char *str0, *str; paulo@0: paulo@0: if (!(str0 = str = STRDUP (share_get_hpath (share)))) paulo@0: return; paulo@0: paulo@0: string_lower (str); paulo@0: paulo@0: while ((tok = string_sep_set (&str, QRP_DELIMITERS))) paulo@0: { paulo@0: if (string_isempty (tok)) paulo@0: continue; paulo@0: paulo@0: if (add) paulo@0: add_share (trie, tok, share); paulo@0: else paulo@0: del_share (trie, tok, share); paulo@0: } paulo@0: paulo@0: free (str0); paulo@0: } paulo@0: paulo@0: void gt_search_exec_add (Share *share) paulo@0: { paulo@0: search_trie_change (gt_search_trie, share, TRUE); paulo@0: } paulo@0: paulo@0: void gt_search_exec_remove (Share *share) paulo@0: { paulo@0: search_trie_change (gt_search_trie, share, FALSE); paulo@0: } paulo@0: paulo@0: void gt_search_exec_sync (void) paulo@0: { paulo@0: #if 0 paulo@0: printf ("SHARE TRIE:\n"); paulo@0: trie_print (gt_search_trie); paulo@0: #endif paulo@0: } paulo@0: paulo@0: void gt_search_exec_init (void) paulo@0: { paulo@0: gt_search_trie = trie_new (); paulo@0: paulo@0: search_sw = stopwatch_new (FALSE); paulo@0: } paulo@0: paulo@0: void gt_search_exec_cleanup (void) paulo@0: { paulo@0: trie_free (gt_search_trie); paulo@0: gt_search_trie = NULL; paulo@0: paulo@0: stopwatch_free (search_sw); paulo@0: search_sw = NULL; paulo@0: }