rev |
line source |
paulo@0
|
1 /*
|
paulo@0
|
2 * $Id: gt_search_exec.c,v 1.23 2004/01/04 05:15:28 hipnod Exp $
|
paulo@0
|
3 *
|
paulo@0
|
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
|
paulo@0
|
5 *
|
paulo@0
|
6 * This program is free software; you can redistribute it and/or modify it
|
paulo@0
|
7 * under the terms of the GNU General Public License as published by the
|
paulo@0
|
8 * Free Software Foundation; either version 2, or (at your option) any
|
paulo@0
|
9 * later version.
|
paulo@0
|
10 *
|
paulo@0
|
11 * This program is distributed in the hope that it will be useful, but
|
paulo@0
|
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
|
paulo@0
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
paulo@0
|
14 * General Public License for more details.
|
paulo@0
|
15 */
|
paulo@0
|
16
|
paulo@0
|
17 #include "gt_gnutella.h"
|
paulo@0
|
18
|
paulo@0
|
19 #include "gt_search.h"
|
paulo@0
|
20 #include "gt_search_exec.h"
|
paulo@0
|
21
|
paulo@0
|
22 #include "gt_query_route.h" /* QRP_DELIMITERS */
|
paulo@0
|
23 #include "gt_share.h" /* gt_share_local_lookup_by_urn */
|
paulo@0
|
24 #include "gt_share_file.h"
|
paulo@0
|
25
|
paulo@0
|
26 #include "sha1.h" /* sha1_string */
|
paulo@0
|
27 #include "trie.h"
|
paulo@0
|
28
|
paulo@0
|
29 #include <libgift/stopwatch.h>
|
paulo@0
|
30
|
paulo@0
|
31 /******************************************************************************/
|
paulo@0
|
32
|
paulo@0
|
33 #define MAX_RESULTS 200
|
paulo@0
|
34
|
paulo@0
|
35 /******************************************************************************/
|
paulo@0
|
36
|
paulo@0
|
37 static Trie *gt_search_trie;
|
paulo@0
|
38
|
paulo@0
|
39 static StopWatch *search_sw;
|
paulo@0
|
40
|
paulo@0
|
41 /******************************************************************************/
|
paulo@0
|
42
|
paulo@0
|
43 GtTokenSet *gt_token_set_new ()
|
paulo@0
|
44 {
|
paulo@0
|
45 GtTokenSet *ts;
|
paulo@0
|
46
|
paulo@0
|
47 if (!(ts = MALLOC (sizeof (GtTokenSet))))
|
paulo@0
|
48 return NULL;
|
paulo@0
|
49
|
paulo@0
|
50 return ts;
|
paulo@0
|
51 }
|
paulo@0
|
52
|
paulo@0
|
53 void gt_token_set_free (GtTokenSet *ts)
|
paulo@0
|
54 {
|
paulo@0
|
55 if (!ts)
|
paulo@0
|
56 return;
|
paulo@0
|
57
|
paulo@0
|
58 free (ts->data);
|
paulo@0
|
59 free (ts);
|
paulo@0
|
60 }
|
paulo@0
|
61
|
paulo@0
|
62 void gt_token_set_append (GtTokenSet *ts, uint32_t token)
|
paulo@0
|
63 {
|
paulo@0
|
64 if (ts->len >= ts->data_len)
|
paulo@0
|
65 {
|
paulo@0
|
66 uint32_t *new_tokens;
|
paulo@0
|
67
|
paulo@0
|
68 ts->data_len += 8;
|
paulo@0
|
69 new_tokens = realloc (ts->data, ts->data_len * sizeof (uint32_t));
|
paulo@0
|
70
|
paulo@0
|
71 assert (new_tokens != NULL);
|
paulo@0
|
72 ts->data = new_tokens;
|
paulo@0
|
73 }
|
paulo@0
|
74
|
paulo@0
|
75 ts->data[ts->len++] = token;
|
paulo@0
|
76 }
|
paulo@0
|
77
|
paulo@0
|
78 /******************************************************************************/
|
paulo@0
|
79
|
paulo@0
|
80 static List *by_hash (unsigned char *hash, size_t *n)
|
paulo@0
|
81 {
|
paulo@0
|
82 FileShare *file;
|
paulo@0
|
83 char *str;
|
paulo@0
|
84 char *urn;
|
paulo@0
|
85
|
paulo@0
|
86 *n = 0;
|
paulo@0
|
87
|
paulo@0
|
88 /* ugly that this is so sha1 specific:need a urn abstraction desperately */
|
paulo@0
|
89 if (!(str = sha1_string (hash)))
|
paulo@0
|
90 return NULL;
|
paulo@0
|
91
|
paulo@0
|
92 urn = stringf_dup ("urn:sha1:%s", str);
|
paulo@0
|
93 free (str);
|
paulo@0
|
94
|
paulo@0
|
95 if (!(file = gt_share_local_lookup_by_urn (urn)))
|
paulo@0
|
96 {
|
paulo@0
|
97 free (urn);
|
paulo@0
|
98 return NULL;
|
paulo@0
|
99 }
|
paulo@0
|
100
|
paulo@0
|
101 if (LOG_RESULTS)
|
paulo@0
|
102 {
|
paulo@0
|
103 GT->DBGFN (GT, "Wuh-HOO! Answered a query-by-hash (%s) for (%s)",
|
paulo@0
|
104 urn, share_get_hpath (file));
|
paulo@0
|
105 }
|
paulo@0
|
106
|
paulo@0
|
107 *n = 1;
|
paulo@0
|
108 free (urn);
|
paulo@0
|
109
|
paulo@0
|
110 return list_append (NULL, file);
|
paulo@0
|
111 }
|
paulo@0
|
112
|
paulo@0
|
113 /******************************************************************************/
|
paulo@0
|
114
|
paulo@0
|
115 /* some defines for list_find_custom that work like DS_BREAK, etc. */
|
paulo@0
|
116 #define LIST_BREAK 0
|
paulo@0
|
117 #define LIST_CONTINUE 1
|
paulo@0
|
118
|
paulo@0
|
119 static int search_slowly (Share *file, void **cmp)
|
paulo@0
|
120 {
|
paulo@0
|
121 GtTokenSet *tokens = cmp[0];
|
paulo@0
|
122 List **results = cmp[1];
|
paulo@0
|
123 int *max_results = cmp[2];
|
paulo@0
|
124 int *count = cmp[3];
|
paulo@0
|
125 GtShare *share;
|
paulo@0
|
126 int i, j;
|
paulo@0
|
127 int matched;
|
paulo@0
|
128 GtTokenSet *cmp_tokens;
|
paulo@0
|
129
|
paulo@0
|
130 if (*count >= *max_results)
|
paulo@0
|
131 return LIST_BREAK;
|
paulo@0
|
132
|
paulo@0
|
133 if (!(share = share_get_udata (file, GT->name)))
|
paulo@0
|
134 return LIST_CONTINUE;
|
paulo@0
|
135
|
paulo@0
|
136 cmp_tokens = share->tokens;
|
paulo@0
|
137 matched = 0;
|
paulo@0
|
138
|
paulo@0
|
139 for (i = 0; i < tokens->len; i++)
|
paulo@0
|
140 {
|
paulo@0
|
141 int old_matched = matched;
|
paulo@0
|
142
|
paulo@0
|
143 for (j = 0; j < cmp_tokens->len; j++)
|
paulo@0
|
144 {
|
paulo@0
|
145 if (tokens->data[i] == cmp_tokens->data[j])
|
paulo@0
|
146 {
|
paulo@0
|
147 matched++;
|
paulo@0
|
148 break;
|
paulo@0
|
149 }
|
paulo@0
|
150 }
|
paulo@0
|
151
|
paulo@0
|
152 if (matched == old_matched)
|
paulo@0
|
153 break;
|
paulo@0
|
154 }
|
paulo@0
|
155
|
paulo@0
|
156 if (matched >= tokens->len)
|
paulo@0
|
157 {
|
paulo@0
|
158 *results = list_prepend (*results, file);
|
paulo@0
|
159 (*count)++;
|
paulo@0
|
160 }
|
paulo@0
|
161
|
paulo@0
|
162 return LIST_CONTINUE;
|
paulo@0
|
163 }
|
paulo@0
|
164
|
paulo@0
|
165 /*
|
paulo@0
|
166 * Find the smallest list corresponding to a word in the query
|
paulo@0
|
167 * that is in the trie of shares.
|
paulo@0
|
168 */
|
paulo@0
|
169 static List *find_smallest (char *query)
|
paulo@0
|
170 {
|
paulo@0
|
171 char *str;
|
paulo@0
|
172 char *str0;
|
paulo@0
|
173 List *smallest = NULL;
|
paulo@0
|
174 size_t smallest_size = 0;
|
paulo@0
|
175 char *tok;
|
paulo@0
|
176 size_t size;
|
paulo@0
|
177
|
paulo@0
|
178 if (!(str = str0 = STRDUP (query)))
|
paulo@0
|
179 return NULL;
|
paulo@0
|
180
|
paulo@0
|
181 string_lower (str);
|
paulo@0
|
182
|
paulo@0
|
183 while ((tok = string_sep_set (&str, QRP_DELIMITERS)))
|
paulo@0
|
184 {
|
paulo@0
|
185 List *list;
|
paulo@0
|
186
|
paulo@0
|
187 if (string_isempty (tok))
|
paulo@0
|
188 continue;
|
paulo@0
|
189
|
paulo@0
|
190 if (!(list = trie_lookup (gt_search_trie, tok)))
|
paulo@0
|
191 {
|
paulo@0
|
192 /* no shares have this token: therefore no match */
|
paulo@0
|
193 smallest = NULL;
|
paulo@0
|
194 smallest_size = 0;
|
paulo@0
|
195 break;
|
paulo@0
|
196 }
|
paulo@0
|
197
|
paulo@0
|
198 if ((size = list_length (list)) < smallest_size || smallest_size == 0)
|
paulo@0
|
199 {
|
paulo@0
|
200 /* keep this one */
|
paulo@0
|
201 smallest_size = size;
|
paulo@0
|
202 smallest = list;
|
paulo@0
|
203 }
|
paulo@0
|
204 }
|
paulo@0
|
205
|
paulo@0
|
206 free (str0);
|
paulo@0
|
207
|
paulo@0
|
208 if (LOG_RESULTS)
|
paulo@0
|
209 GT->DBGFN (GT, "scanning list of %d size", smallest_size);
|
paulo@0
|
210
|
paulo@0
|
211 return smallest;
|
paulo@0
|
212 }
|
paulo@0
|
213
|
paulo@0
|
214 /*
|
paulo@0
|
215 * Look up the list of all shares for each word in the query in the
|
paulo@0
|
216 * Trie of all local shares. Find the keyword with the smallest
|
paulo@0
|
217 * list, then iterate the list looking for shares.
|
paulo@0
|
218 */
|
paulo@0
|
219 static List *by_keyword (char *query, size_t max_results, size_t *n)
|
paulo@0
|
220 {
|
paulo@0
|
221 GtTokenSet *tokens;
|
paulo@0
|
222 List *results = NULL;
|
paulo@0
|
223 List *smallest;
|
paulo@0
|
224 void *cmp[4];
|
paulo@0
|
225
|
paulo@0
|
226 if (!query || string_isempty (query))
|
paulo@0
|
227 return NULL;
|
paulo@0
|
228
|
paulo@0
|
229 if (!(tokens = gt_share_tokenize (query)))
|
paulo@0
|
230 return NULL;
|
paulo@0
|
231
|
paulo@0
|
232 cmp[0] = tokens;
|
paulo@0
|
233 cmp[1] = &results;
|
paulo@0
|
234 cmp[2] = &max_results;
|
paulo@0
|
235 cmp[3] = n;
|
paulo@0
|
236
|
paulo@0
|
237 smallest = find_smallest (query);
|
paulo@0
|
238 list_find_custom (smallest, cmp, (ListForeachFunc)search_slowly);
|
paulo@0
|
239
|
paulo@0
|
240 gt_token_set_free (tokens);
|
paulo@0
|
241 return results;
|
paulo@0
|
242 }
|
paulo@0
|
243
|
paulo@0
|
244 List *gt_search_exec (char *query, gt_search_type_t type, void *extended,
|
paulo@0
|
245 uint8_t ttl, uint8_t hops)
|
paulo@0
|
246 {
|
paulo@0
|
247 List *results;
|
paulo@0
|
248 int max_res = MAX_RESULTS;
|
paulo@0
|
249 double elapsed;
|
paulo@0
|
250 unsigned char *hash;
|
paulo@0
|
251 size_t n = 0;
|
paulo@0
|
252
|
paulo@0
|
253 stopwatch_start (search_sw);
|
paulo@0
|
254
|
paulo@0
|
255 hash = extended;
|
paulo@0
|
256
|
paulo@0
|
257 switch (type)
|
paulo@0
|
258 {
|
paulo@0
|
259 case GT_SEARCH_KEYWORD: results = by_keyword (query, max_res, &n); break;
|
paulo@0
|
260 case GT_SEARCH_HASH: results = by_hash (hash, &n); break;
|
paulo@0
|
261 default: abort ();
|
paulo@0
|
262 }
|
paulo@0
|
263
|
paulo@0
|
264 stopwatch_stop (search_sw);
|
paulo@0
|
265 elapsed = stopwatch_elapsed (search_sw, NULL);
|
paulo@0
|
266
|
paulo@0
|
267 if (LOG_RESULTS)
|
paulo@0
|
268 {
|
paulo@0
|
269 GT->dbg (GT, "results: [%03d] [%d|%d] %.06fs (%s)", n,
|
paulo@0
|
270 ttl, hops, elapsed, query);
|
paulo@0
|
271 }
|
paulo@0
|
272
|
paulo@0
|
273 return results;
|
paulo@0
|
274 }
|
paulo@0
|
275
|
paulo@0
|
276 /******************************************************************************/
|
paulo@0
|
277
|
paulo@0
|
278 static void add_share (Trie *trie, char *tok, Share *share)
|
paulo@0
|
279 {
|
paulo@0
|
280 List *list;
|
paulo@0
|
281
|
paulo@0
|
282 list = trie_lookup (trie, tok);
|
paulo@0
|
283
|
paulo@0
|
284 /* the share may already be in the list if the filename has
|
paulo@0
|
285 * two words that are the same */
|
paulo@0
|
286 if (list_find (list, share))
|
paulo@0
|
287 return;
|
paulo@0
|
288
|
paulo@0
|
289 list = list_prepend (list, share);
|
paulo@0
|
290
|
paulo@0
|
291 trie_remove (trie, tok);
|
paulo@0
|
292 trie_insert (trie, tok, list);
|
paulo@0
|
293 }
|
paulo@0
|
294
|
paulo@0
|
295 static void del_share (Trie *trie, char *tok, Share *share)
|
paulo@0
|
296 {
|
paulo@0
|
297 List *list;
|
paulo@0
|
298
|
paulo@0
|
299 list = trie_lookup (trie, tok);
|
paulo@0
|
300 list = list_remove (list, share);
|
paulo@0
|
301
|
paulo@0
|
302 trie_remove (trie, tok);
|
paulo@0
|
303
|
paulo@0
|
304 if (!list)
|
paulo@0
|
305 return;
|
paulo@0
|
306
|
paulo@0
|
307 trie_insert (trie, tok, list);
|
paulo@0
|
308 }
|
paulo@0
|
309
|
paulo@0
|
310 static void search_trie_change (Trie *trie, Share *share, BOOL add)
|
paulo@0
|
311 {
|
paulo@0
|
312 char *tok;
|
paulo@0
|
313 char *str0, *str;
|
paulo@0
|
314
|
paulo@0
|
315 if (!(str0 = str = STRDUP (share_get_hpath (share))))
|
paulo@0
|
316 return;
|
paulo@0
|
317
|
paulo@0
|
318 string_lower (str);
|
paulo@0
|
319
|
paulo@0
|
320 while ((tok = string_sep_set (&str, QRP_DELIMITERS)))
|
paulo@0
|
321 {
|
paulo@0
|
322 if (string_isempty (tok))
|
paulo@0
|
323 continue;
|
paulo@0
|
324
|
paulo@0
|
325 if (add)
|
paulo@0
|
326 add_share (trie, tok, share);
|
paulo@0
|
327 else
|
paulo@0
|
328 del_share (trie, tok, share);
|
paulo@0
|
329 }
|
paulo@0
|
330
|
paulo@0
|
331 free (str0);
|
paulo@0
|
332 }
|
paulo@0
|
333
|
paulo@0
|
334 void gt_search_exec_add (Share *share)
|
paulo@0
|
335 {
|
paulo@0
|
336 search_trie_change (gt_search_trie, share, TRUE);
|
paulo@0
|
337 }
|
paulo@0
|
338
|
paulo@0
|
339 void gt_search_exec_remove (Share *share)
|
paulo@0
|
340 {
|
paulo@0
|
341 search_trie_change (gt_search_trie, share, FALSE);
|
paulo@0
|
342 }
|
paulo@0
|
343
|
paulo@0
|
344 void gt_search_exec_sync (void)
|
paulo@0
|
345 {
|
paulo@0
|
346 #if 0
|
paulo@0
|
347 printf ("SHARE TRIE:\n");
|
paulo@0
|
348 trie_print (gt_search_trie);
|
paulo@0
|
349 #endif
|
paulo@0
|
350 }
|
paulo@0
|
351
|
paulo@0
|
352 void gt_search_exec_init (void)
|
paulo@0
|
353 {
|
paulo@0
|
354 gt_search_trie = trie_new ();
|
paulo@0
|
355
|
paulo@0
|
356 search_sw = stopwatch_new (FALSE);
|
paulo@0
|
357 }
|
paulo@0
|
358
|
paulo@0
|
359 void gt_search_exec_cleanup (void)
|
paulo@0
|
360 {
|
paulo@0
|
361 trie_free (gt_search_trie);
|
paulo@0
|
362 gt_search_trie = NULL;
|
paulo@0
|
363
|
paulo@0
|
364 stopwatch_free (search_sw);
|
paulo@0
|
365 search_sw = NULL;
|
paulo@0
|
366 }
|