comparison src/gt_search_exec.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3151ebfc5c18
1 /*
2 * $Id: gt_search_exec.c,v 1.23 2004/01/04 05:15:28 hipnod Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16
17 #include "gt_gnutella.h"
18
19 #include "gt_search.h"
20 #include "gt_search_exec.h"
21
22 #include "gt_query_route.h" /* QRP_DELIMITERS */
23 #include "gt_share.h" /* gt_share_local_lookup_by_urn */
24 #include "gt_share_file.h"
25
26 #include "sha1.h" /* sha1_string */
27 #include "trie.h"
28
29 #include <libgift/stopwatch.h>
30
31 /******************************************************************************/
32
33 #define MAX_RESULTS 200
34
35 /******************************************************************************/
36
37 static Trie *gt_search_trie;
38
39 static StopWatch *search_sw;
40
41 /******************************************************************************/
42
43 GtTokenSet *gt_token_set_new ()
44 {
45 GtTokenSet *ts;
46
47 if (!(ts = MALLOC (sizeof (GtTokenSet))))
48 return NULL;
49
50 return ts;
51 }
52
53 void gt_token_set_free (GtTokenSet *ts)
54 {
55 if (!ts)
56 return;
57
58 free (ts->data);
59 free (ts);
60 }
61
62 void gt_token_set_append (GtTokenSet *ts, uint32_t token)
63 {
64 if (ts->len >= ts->data_len)
65 {
66 uint32_t *new_tokens;
67
68 ts->data_len += 8;
69 new_tokens = realloc (ts->data, ts->data_len * sizeof (uint32_t));
70
71 assert (new_tokens != NULL);
72 ts->data = new_tokens;
73 }
74
75 ts->data[ts->len++] = token;
76 }
77
78 /******************************************************************************/
79
80 static List *by_hash (unsigned char *hash, size_t *n)
81 {
82 FileShare *file;
83 char *str;
84 char *urn;
85
86 *n = 0;
87
88 /* ugly that this is so sha1 specific:need a urn abstraction desperately */
89 if (!(str = sha1_string (hash)))
90 return NULL;
91
92 urn = stringf_dup ("urn:sha1:%s", str);
93 free (str);
94
95 if (!(file = gt_share_local_lookup_by_urn (urn)))
96 {
97 free (urn);
98 return NULL;
99 }
100
101 if (LOG_RESULTS)
102 {
103 GT->DBGFN (GT, "Wuh-HOO! Answered a query-by-hash (%s) for (%s)",
104 urn, share_get_hpath (file));
105 }
106
107 *n = 1;
108 free (urn);
109
110 return list_append (NULL, file);
111 }
112
113 /******************************************************************************/
114
115 /* some defines for list_find_custom that work like DS_BREAK, etc. */
116 #define LIST_BREAK 0
117 #define LIST_CONTINUE 1
118
119 static int search_slowly (Share *file, void **cmp)
120 {
121 GtTokenSet *tokens = cmp[0];
122 List **results = cmp[1];
123 int *max_results = cmp[2];
124 int *count = cmp[3];
125 GtShare *share;
126 int i, j;
127 int matched;
128 GtTokenSet *cmp_tokens;
129
130 if (*count >= *max_results)
131 return LIST_BREAK;
132
133 if (!(share = share_get_udata (file, GT->name)))
134 return LIST_CONTINUE;
135
136 cmp_tokens = share->tokens;
137 matched = 0;
138
139 for (i = 0; i < tokens->len; i++)
140 {
141 int old_matched = matched;
142
143 for (j = 0; j < cmp_tokens->len; j++)
144 {
145 if (tokens->data[i] == cmp_tokens->data[j])
146 {
147 matched++;
148 break;
149 }
150 }
151
152 if (matched == old_matched)
153 break;
154 }
155
156 if (matched >= tokens->len)
157 {
158 *results = list_prepend (*results, file);
159 (*count)++;
160 }
161
162 return LIST_CONTINUE;
163 }
164
165 /*
166 * Find the smallest list corresponding to a word in the query
167 * that is in the trie of shares.
168 */
169 static List *find_smallest (char *query)
170 {
171 char *str;
172 char *str0;
173 List *smallest = NULL;
174 size_t smallest_size = 0;
175 char *tok;
176 size_t size;
177
178 if (!(str = str0 = STRDUP (query)))
179 return NULL;
180
181 string_lower (str);
182
183 while ((tok = string_sep_set (&str, QRP_DELIMITERS)))
184 {
185 List *list;
186
187 if (string_isempty (tok))
188 continue;
189
190 if (!(list = trie_lookup (gt_search_trie, tok)))
191 {
192 /* no shares have this token: therefore no match */
193 smallest = NULL;
194 smallest_size = 0;
195 break;
196 }
197
198 if ((size = list_length (list)) < smallest_size || smallest_size == 0)
199 {
200 /* keep this one */
201 smallest_size = size;
202 smallest = list;
203 }
204 }
205
206 free (str0);
207
208 if (LOG_RESULTS)
209 GT->DBGFN (GT, "scanning list of %d size", smallest_size);
210
211 return smallest;
212 }
213
214 /*
215 * Look up the list of all shares for each word in the query in the
216 * Trie of all local shares. Find the keyword with the smallest
217 * list, then iterate the list looking for shares.
218 */
219 static List *by_keyword (char *query, size_t max_results, size_t *n)
220 {
221 GtTokenSet *tokens;
222 List *results = NULL;
223 List *smallest;
224 void *cmp[4];
225
226 if (!query || string_isempty (query))
227 return NULL;
228
229 if (!(tokens = gt_share_tokenize (query)))
230 return NULL;
231
232 cmp[0] = tokens;
233 cmp[1] = &results;
234 cmp[2] = &max_results;
235 cmp[3] = n;
236
237 smallest = find_smallest (query);
238 list_find_custom (smallest, cmp, (ListForeachFunc)search_slowly);
239
240 gt_token_set_free (tokens);
241 return results;
242 }
243
244 List *gt_search_exec (char *query, gt_search_type_t type, void *extended,
245 uint8_t ttl, uint8_t hops)
246 {
247 List *results;
248 int max_res = MAX_RESULTS;
249 double elapsed;
250 unsigned char *hash;
251 size_t n = 0;
252
253 stopwatch_start (search_sw);
254
255 hash = extended;
256
257 switch (type)
258 {
259 case GT_SEARCH_KEYWORD: results = by_keyword (query, max_res, &n); break;
260 case GT_SEARCH_HASH: results = by_hash (hash, &n); break;
261 default: abort ();
262 }
263
264 stopwatch_stop (search_sw);
265 elapsed = stopwatch_elapsed (search_sw, NULL);
266
267 if (LOG_RESULTS)
268 {
269 GT->dbg (GT, "results: [%03d] [%d|%d] %.06fs (%s)", n,
270 ttl, hops, elapsed, query);
271 }
272
273 return results;
274 }
275
276 /******************************************************************************/
277
278 static void add_share (Trie *trie, char *tok, Share *share)
279 {
280 List *list;
281
282 list = trie_lookup (trie, tok);
283
284 /* the share may already be in the list if the filename has
285 * two words that are the same */
286 if (list_find (list, share))
287 return;
288
289 list = list_prepend (list, share);
290
291 trie_remove (trie, tok);
292 trie_insert (trie, tok, list);
293 }
294
295 static void del_share (Trie *trie, char *tok, Share *share)
296 {
297 List *list;
298
299 list = trie_lookup (trie, tok);
300 list = list_remove (list, share);
301
302 trie_remove (trie, tok);
303
304 if (!list)
305 return;
306
307 trie_insert (trie, tok, list);
308 }
309
310 static void search_trie_change (Trie *trie, Share *share, BOOL add)
311 {
312 char *tok;
313 char *str0, *str;
314
315 if (!(str0 = str = STRDUP (share_get_hpath (share))))
316 return;
317
318 string_lower (str);
319
320 while ((tok = string_sep_set (&str, QRP_DELIMITERS)))
321 {
322 if (string_isempty (tok))
323 continue;
324
325 if (add)
326 add_share (trie, tok, share);
327 else
328 del_share (trie, tok, share);
329 }
330
331 free (str0);
332 }
333
334 void gt_search_exec_add (Share *share)
335 {
336 search_trie_change (gt_search_trie, share, TRUE);
337 }
338
339 void gt_search_exec_remove (Share *share)
340 {
341 search_trie_change (gt_search_trie, share, FALSE);
342 }
343
344 void gt_search_exec_sync (void)
345 {
346 #if 0
347 printf ("SHARE TRIE:\n");
348 trie_print (gt_search_trie);
349 #endif
350 }
351
352 void gt_search_exec_init (void)
353 {
354 gt_search_trie = trie_new ();
355
356 search_sw = stopwatch_new (FALSE);
357 }
358
359 void gt_search_exec_cleanup (void)
360 {
361 trie_free (gt_search_trie);
362 gt_search_trie = NULL;
363
364 stopwatch_free (search_sw);
365 search_sw = NULL;
366 }