Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
diff src/gt_search_exec.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/gt_search_exec.c Sat Feb 20 21:18:28 2010 -0800 1.3 @@ -0,0 +1,366 @@ 1.4 +/* 1.5 + * $Id: gt_search_exec.c,v 1.23 2004/01/04 05:15:28 hipnod Exp $ 1.6 + * 1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) 1.8 + * 1.9 + * This program is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License as published by the 1.11 + * Free Software Foundation; either version 2, or (at your option) any 1.12 + * later version. 1.13 + * 1.14 + * This program is distributed in the hope that it will be useful, but 1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + * General Public License for more details. 1.18 + */ 1.19 + 1.20 +#include "gt_gnutella.h" 1.21 + 1.22 +#include "gt_search.h" 1.23 +#include "gt_search_exec.h" 1.24 + 1.25 +#include "gt_query_route.h" /* QRP_DELIMITERS */ 1.26 +#include "gt_share.h" /* gt_share_local_lookup_by_urn */ 1.27 +#include "gt_share_file.h" 1.28 + 1.29 +#include "sha1.h" /* sha1_string */ 1.30 +#include "trie.h" 1.31 + 1.32 +#include <libgift/stopwatch.h> 1.33 + 1.34 +/******************************************************************************/ 1.35 + 1.36 +#define MAX_RESULTS 200 1.37 + 1.38 +/******************************************************************************/ 1.39 + 1.40 +static Trie *gt_search_trie; 1.41 + 1.42 +static StopWatch *search_sw; 1.43 + 1.44 +/******************************************************************************/ 1.45 + 1.46 +GtTokenSet *gt_token_set_new () 1.47 +{ 1.48 + GtTokenSet *ts; 1.49 + 1.50 + if (!(ts = MALLOC (sizeof (GtTokenSet)))) 1.51 + return NULL; 1.52 + 1.53 + return ts; 1.54 +} 1.55 + 1.56 +void gt_token_set_free (GtTokenSet *ts) 1.57 +{ 1.58 + if (!ts) 1.59 + return; 1.60 + 1.61 + free (ts->data); 1.62 + free (ts); 1.63 +} 1.64 + 1.65 +void gt_token_set_append (GtTokenSet *ts, uint32_t token) 1.66 +{ 1.67 + if (ts->len >= ts->data_len) 1.68 + { 1.69 + uint32_t *new_tokens; 1.70 + 1.71 + ts->data_len += 8; 1.72 + new_tokens = realloc (ts->data, ts->data_len * sizeof (uint32_t)); 1.73 + 1.74 + assert (new_tokens != NULL); 1.75 + ts->data = new_tokens; 1.76 + } 1.77 + 1.78 + ts->data[ts->len++] = token; 1.79 +} 1.80 + 1.81 +/******************************************************************************/ 1.82 + 1.83 +static List *by_hash (unsigned char *hash, size_t *n) 1.84 +{ 1.85 + FileShare *file; 1.86 + char *str; 1.87 + char *urn; 1.88 + 1.89 + *n = 0; 1.90 + 1.91 + /* ugly that this is so sha1 specific:need a urn abstraction desperately */ 1.92 + if (!(str = sha1_string (hash))) 1.93 + return NULL; 1.94 + 1.95 + urn = stringf_dup ("urn:sha1:%s", str); 1.96 + free (str); 1.97 + 1.98 + if (!(file = gt_share_local_lookup_by_urn (urn))) 1.99 + { 1.100 + free (urn); 1.101 + return NULL; 1.102 + } 1.103 + 1.104 + if (LOG_RESULTS) 1.105 + { 1.106 + GT->DBGFN (GT, "Wuh-HOO! Answered a query-by-hash (%s) for (%s)", 1.107 + urn, share_get_hpath (file)); 1.108 + } 1.109 + 1.110 + *n = 1; 1.111 + free (urn); 1.112 + 1.113 + return list_append (NULL, file); 1.114 +} 1.115 + 1.116 +/******************************************************************************/ 1.117 + 1.118 +/* some defines for list_find_custom that work like DS_BREAK, etc. */ 1.119 +#define LIST_BREAK 0 1.120 +#define LIST_CONTINUE 1 1.121 + 1.122 +static int search_slowly (Share *file, void **cmp) 1.123 +{ 1.124 + GtTokenSet *tokens = cmp[0]; 1.125 + List **results = cmp[1]; 1.126 + int *max_results = cmp[2]; 1.127 + int *count = cmp[3]; 1.128 + GtShare *share; 1.129 + int i, j; 1.130 + int matched; 1.131 + GtTokenSet *cmp_tokens; 1.132 + 1.133 + if (*count >= *max_results) 1.134 + return LIST_BREAK; 1.135 + 1.136 + if (!(share = share_get_udata (file, GT->name))) 1.137 + return LIST_CONTINUE; 1.138 + 1.139 + cmp_tokens = share->tokens; 1.140 + matched = 0; 1.141 + 1.142 + for (i = 0; i < tokens->len; i++) 1.143 + { 1.144 + int old_matched = matched; 1.145 + 1.146 + for (j = 0; j < cmp_tokens->len; j++) 1.147 + { 1.148 + if (tokens->data[i] == cmp_tokens->data[j]) 1.149 + { 1.150 + matched++; 1.151 + break; 1.152 + } 1.153 + } 1.154 + 1.155 + if (matched == old_matched) 1.156 + break; 1.157 + } 1.158 + 1.159 + if (matched >= tokens->len) 1.160 + { 1.161 + *results = list_prepend (*results, file); 1.162 + (*count)++; 1.163 + } 1.164 + 1.165 + return LIST_CONTINUE; 1.166 +} 1.167 + 1.168 +/* 1.169 + * Find the smallest list corresponding to a word in the query 1.170 + * that is in the trie of shares. 1.171 + */ 1.172 +static List *find_smallest (char *query) 1.173 +{ 1.174 + char *str; 1.175 + char *str0; 1.176 + List *smallest = NULL; 1.177 + size_t smallest_size = 0; 1.178 + char *tok; 1.179 + size_t size; 1.180 + 1.181 + if (!(str = str0 = STRDUP (query))) 1.182 + return NULL; 1.183 + 1.184 + string_lower (str); 1.185 + 1.186 + while ((tok = string_sep_set (&str, QRP_DELIMITERS))) 1.187 + { 1.188 + List *list; 1.189 + 1.190 + if (string_isempty (tok)) 1.191 + continue; 1.192 + 1.193 + if (!(list = trie_lookup (gt_search_trie, tok))) 1.194 + { 1.195 + /* no shares have this token: therefore no match */ 1.196 + smallest = NULL; 1.197 + smallest_size = 0; 1.198 + break; 1.199 + } 1.200 + 1.201 + if ((size = list_length (list)) < smallest_size || smallest_size == 0) 1.202 + { 1.203 + /* keep this one */ 1.204 + smallest_size = size; 1.205 + smallest = list; 1.206 + } 1.207 + } 1.208 + 1.209 + free (str0); 1.210 + 1.211 + if (LOG_RESULTS) 1.212 + GT->DBGFN (GT, "scanning list of %d size", smallest_size); 1.213 + 1.214 + return smallest; 1.215 +} 1.216 + 1.217 +/* 1.218 + * Look up the list of all shares for each word in the query in the 1.219 + * Trie of all local shares. Find the keyword with the smallest 1.220 + * list, then iterate the list looking for shares. 1.221 + */ 1.222 +static List *by_keyword (char *query, size_t max_results, size_t *n) 1.223 +{ 1.224 + GtTokenSet *tokens; 1.225 + List *results = NULL; 1.226 + List *smallest; 1.227 + void *cmp[4]; 1.228 + 1.229 + if (!query || string_isempty (query)) 1.230 + return NULL; 1.231 + 1.232 + if (!(tokens = gt_share_tokenize (query))) 1.233 + return NULL; 1.234 + 1.235 + cmp[0] = tokens; 1.236 + cmp[1] = &results; 1.237 + cmp[2] = &max_results; 1.238 + cmp[3] = n; 1.239 + 1.240 + smallest = find_smallest (query); 1.241 + list_find_custom (smallest, cmp, (ListForeachFunc)search_slowly); 1.242 + 1.243 + gt_token_set_free (tokens); 1.244 + return results; 1.245 +} 1.246 + 1.247 +List *gt_search_exec (char *query, gt_search_type_t type, void *extended, 1.248 + uint8_t ttl, uint8_t hops) 1.249 +{ 1.250 + List *results; 1.251 + int max_res = MAX_RESULTS; 1.252 + double elapsed; 1.253 + unsigned char *hash; 1.254 + size_t n = 0; 1.255 + 1.256 + stopwatch_start (search_sw); 1.257 + 1.258 + hash = extended; 1.259 + 1.260 + switch (type) 1.261 + { 1.262 + case GT_SEARCH_KEYWORD: results = by_keyword (query, max_res, &n); break; 1.263 + case GT_SEARCH_HASH: results = by_hash (hash, &n); break; 1.264 + default: abort (); 1.265 + } 1.266 + 1.267 + stopwatch_stop (search_sw); 1.268 + elapsed = stopwatch_elapsed (search_sw, NULL); 1.269 + 1.270 + if (LOG_RESULTS) 1.271 + { 1.272 + GT->dbg (GT, "results: [%03d] [%d|%d] %.06fs (%s)", n, 1.273 + ttl, hops, elapsed, query); 1.274 + } 1.275 + 1.276 + return results; 1.277 +} 1.278 + 1.279 +/******************************************************************************/ 1.280 + 1.281 +static void add_share (Trie *trie, char *tok, Share *share) 1.282 +{ 1.283 + List *list; 1.284 + 1.285 + list = trie_lookup (trie, tok); 1.286 + 1.287 + /* the share may already be in the list if the filename has 1.288 + * two words that are the same */ 1.289 + if (list_find (list, share)) 1.290 + return; 1.291 + 1.292 + list = list_prepend (list, share); 1.293 + 1.294 + trie_remove (trie, tok); 1.295 + trie_insert (trie, tok, list); 1.296 +} 1.297 + 1.298 +static void del_share (Trie *trie, char *tok, Share *share) 1.299 +{ 1.300 + List *list; 1.301 + 1.302 + list = trie_lookup (trie, tok); 1.303 + list = list_remove (list, share); 1.304 + 1.305 + trie_remove (trie, tok); 1.306 + 1.307 + if (!list) 1.308 + return; 1.309 + 1.310 + trie_insert (trie, tok, list); 1.311 +} 1.312 + 1.313 +static void search_trie_change (Trie *trie, Share *share, BOOL add) 1.314 +{ 1.315 + char *tok; 1.316 + char *str0, *str; 1.317 + 1.318 + if (!(str0 = str = STRDUP (share_get_hpath (share)))) 1.319 + return; 1.320 + 1.321 + string_lower (str); 1.322 + 1.323 + while ((tok = string_sep_set (&str, QRP_DELIMITERS))) 1.324 + { 1.325 + if (string_isempty (tok)) 1.326 + continue; 1.327 + 1.328 + if (add) 1.329 + add_share (trie, tok, share); 1.330 + else 1.331 + del_share (trie, tok, share); 1.332 + } 1.333 + 1.334 + free (str0); 1.335 +} 1.336 + 1.337 +void gt_search_exec_add (Share *share) 1.338 +{ 1.339 + search_trie_change (gt_search_trie, share, TRUE); 1.340 +} 1.341 + 1.342 +void gt_search_exec_remove (Share *share) 1.343 +{ 1.344 + search_trie_change (gt_search_trie, share, FALSE); 1.345 +} 1.346 + 1.347 +void gt_search_exec_sync (void) 1.348 +{ 1.349 +#if 0 1.350 + printf ("SHARE TRIE:\n"); 1.351 + trie_print (gt_search_trie); 1.352 +#endif 1.353 +} 1.354 + 1.355 +void gt_search_exec_init (void) 1.356 +{ 1.357 + gt_search_trie = trie_new (); 1.358 + 1.359 + search_sw = stopwatch_new (FALSE); 1.360 +} 1.361 + 1.362 +void gt_search_exec_cleanup (void) 1.363 +{ 1.364 + trie_free (gt_search_trie); 1.365 + gt_search_trie = NULL; 1.366 + 1.367 + stopwatch_free (search_sw); 1.368 + search_sw = NULL; 1.369 +}