Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
comparison src/gt_search_exec.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3151ebfc5c18 |
---|---|
1 /* | |
2 * $Id: gt_search_exec.c,v 1.23 2004/01/04 05:15:28 hipnod Exp $ | |
3 * | |
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License as published by the | |
8 * Free Software Foundation; either version 2, or (at your option) any | |
9 * later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, but | |
12 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * General Public License for more details. | |
15 */ | |
16 | |
17 #include "gt_gnutella.h" | |
18 | |
19 #include "gt_search.h" | |
20 #include "gt_search_exec.h" | |
21 | |
22 #include "gt_query_route.h" /* QRP_DELIMITERS */ | |
23 #include "gt_share.h" /* gt_share_local_lookup_by_urn */ | |
24 #include "gt_share_file.h" | |
25 | |
26 #include "sha1.h" /* sha1_string */ | |
27 #include "trie.h" | |
28 | |
29 #include <libgift/stopwatch.h> | |
30 | |
31 /******************************************************************************/ | |
32 | |
33 #define MAX_RESULTS 200 | |
34 | |
35 /******************************************************************************/ | |
36 | |
37 static Trie *gt_search_trie; | |
38 | |
39 static StopWatch *search_sw; | |
40 | |
41 /******************************************************************************/ | |
42 | |
43 GtTokenSet *gt_token_set_new () | |
44 { | |
45 GtTokenSet *ts; | |
46 | |
47 if (!(ts = MALLOC (sizeof (GtTokenSet)))) | |
48 return NULL; | |
49 | |
50 return ts; | |
51 } | |
52 | |
53 void gt_token_set_free (GtTokenSet *ts) | |
54 { | |
55 if (!ts) | |
56 return; | |
57 | |
58 free (ts->data); | |
59 free (ts); | |
60 } | |
61 | |
62 void gt_token_set_append (GtTokenSet *ts, uint32_t token) | |
63 { | |
64 if (ts->len >= ts->data_len) | |
65 { | |
66 uint32_t *new_tokens; | |
67 | |
68 ts->data_len += 8; | |
69 new_tokens = realloc (ts->data, ts->data_len * sizeof (uint32_t)); | |
70 | |
71 assert (new_tokens != NULL); | |
72 ts->data = new_tokens; | |
73 } | |
74 | |
75 ts->data[ts->len++] = token; | |
76 } | |
77 | |
78 /******************************************************************************/ | |
79 | |
80 static List *by_hash (unsigned char *hash, size_t *n) | |
81 { | |
82 FileShare *file; | |
83 char *str; | |
84 char *urn; | |
85 | |
86 *n = 0; | |
87 | |
88 /* ugly that this is so sha1 specific:need a urn abstraction desperately */ | |
89 if (!(str = sha1_string (hash))) | |
90 return NULL; | |
91 | |
92 urn = stringf_dup ("urn:sha1:%s", str); | |
93 free (str); | |
94 | |
95 if (!(file = gt_share_local_lookup_by_urn (urn))) | |
96 { | |
97 free (urn); | |
98 return NULL; | |
99 } | |
100 | |
101 if (LOG_RESULTS) | |
102 { | |
103 GT->DBGFN (GT, "Wuh-HOO! Answered a query-by-hash (%s) for (%s)", | |
104 urn, share_get_hpath (file)); | |
105 } | |
106 | |
107 *n = 1; | |
108 free (urn); | |
109 | |
110 return list_append (NULL, file); | |
111 } | |
112 | |
113 /******************************************************************************/ | |
114 | |
115 /* some defines for list_find_custom that work like DS_BREAK, etc. */ | |
116 #define LIST_BREAK 0 | |
117 #define LIST_CONTINUE 1 | |
118 | |
119 static int search_slowly (Share *file, void **cmp) | |
120 { | |
121 GtTokenSet *tokens = cmp[0]; | |
122 List **results = cmp[1]; | |
123 int *max_results = cmp[2]; | |
124 int *count = cmp[3]; | |
125 GtShare *share; | |
126 int i, j; | |
127 int matched; | |
128 GtTokenSet *cmp_tokens; | |
129 | |
130 if (*count >= *max_results) | |
131 return LIST_BREAK; | |
132 | |
133 if (!(share = share_get_udata (file, GT->name))) | |
134 return LIST_CONTINUE; | |
135 | |
136 cmp_tokens = share->tokens; | |
137 matched = 0; | |
138 | |
139 for (i = 0; i < tokens->len; i++) | |
140 { | |
141 int old_matched = matched; | |
142 | |
143 for (j = 0; j < cmp_tokens->len; j++) | |
144 { | |
145 if (tokens->data[i] == cmp_tokens->data[j]) | |
146 { | |
147 matched++; | |
148 break; | |
149 } | |
150 } | |
151 | |
152 if (matched == old_matched) | |
153 break; | |
154 } | |
155 | |
156 if (matched >= tokens->len) | |
157 { | |
158 *results = list_prepend (*results, file); | |
159 (*count)++; | |
160 } | |
161 | |
162 return LIST_CONTINUE; | |
163 } | |
164 | |
165 /* | |
166 * Find the smallest list corresponding to a word in the query | |
167 * that is in the trie of shares. | |
168 */ | |
169 static List *find_smallest (char *query) | |
170 { | |
171 char *str; | |
172 char *str0; | |
173 List *smallest = NULL; | |
174 size_t smallest_size = 0; | |
175 char *tok; | |
176 size_t size; | |
177 | |
178 if (!(str = str0 = STRDUP (query))) | |
179 return NULL; | |
180 | |
181 string_lower (str); | |
182 | |
183 while ((tok = string_sep_set (&str, QRP_DELIMITERS))) | |
184 { | |
185 List *list; | |
186 | |
187 if (string_isempty (tok)) | |
188 continue; | |
189 | |
190 if (!(list = trie_lookup (gt_search_trie, tok))) | |
191 { | |
192 /* no shares have this token: therefore no match */ | |
193 smallest = NULL; | |
194 smallest_size = 0; | |
195 break; | |
196 } | |
197 | |
198 if ((size = list_length (list)) < smallest_size || smallest_size == 0) | |
199 { | |
200 /* keep this one */ | |
201 smallest_size = size; | |
202 smallest = list; | |
203 } | |
204 } | |
205 | |
206 free (str0); | |
207 | |
208 if (LOG_RESULTS) | |
209 GT->DBGFN (GT, "scanning list of %d size", smallest_size); | |
210 | |
211 return smallest; | |
212 } | |
213 | |
214 /* | |
215 * Look up the list of all shares for each word in the query in the | |
216 * Trie of all local shares. Find the keyword with the smallest | |
217 * list, then iterate the list looking for shares. | |
218 */ | |
219 static List *by_keyword (char *query, size_t max_results, size_t *n) | |
220 { | |
221 GtTokenSet *tokens; | |
222 List *results = NULL; | |
223 List *smallest; | |
224 void *cmp[4]; | |
225 | |
226 if (!query || string_isempty (query)) | |
227 return NULL; | |
228 | |
229 if (!(tokens = gt_share_tokenize (query))) | |
230 return NULL; | |
231 | |
232 cmp[0] = tokens; | |
233 cmp[1] = &results; | |
234 cmp[2] = &max_results; | |
235 cmp[3] = n; | |
236 | |
237 smallest = find_smallest (query); | |
238 list_find_custom (smallest, cmp, (ListForeachFunc)search_slowly); | |
239 | |
240 gt_token_set_free (tokens); | |
241 return results; | |
242 } | |
243 | |
244 List *gt_search_exec (char *query, gt_search_type_t type, void *extended, | |
245 uint8_t ttl, uint8_t hops) | |
246 { | |
247 List *results; | |
248 int max_res = MAX_RESULTS; | |
249 double elapsed; | |
250 unsigned char *hash; | |
251 size_t n = 0; | |
252 | |
253 stopwatch_start (search_sw); | |
254 | |
255 hash = extended; | |
256 | |
257 switch (type) | |
258 { | |
259 case GT_SEARCH_KEYWORD: results = by_keyword (query, max_res, &n); break; | |
260 case GT_SEARCH_HASH: results = by_hash (hash, &n); break; | |
261 default: abort (); | |
262 } | |
263 | |
264 stopwatch_stop (search_sw); | |
265 elapsed = stopwatch_elapsed (search_sw, NULL); | |
266 | |
267 if (LOG_RESULTS) | |
268 { | |
269 GT->dbg (GT, "results: [%03d] [%d|%d] %.06fs (%s)", n, | |
270 ttl, hops, elapsed, query); | |
271 } | |
272 | |
273 return results; | |
274 } | |
275 | |
276 /******************************************************************************/ | |
277 | |
278 static void add_share (Trie *trie, char *tok, Share *share) | |
279 { | |
280 List *list; | |
281 | |
282 list = trie_lookup (trie, tok); | |
283 | |
284 /* the share may already be in the list if the filename has | |
285 * two words that are the same */ | |
286 if (list_find (list, share)) | |
287 return; | |
288 | |
289 list = list_prepend (list, share); | |
290 | |
291 trie_remove (trie, tok); | |
292 trie_insert (trie, tok, list); | |
293 } | |
294 | |
295 static void del_share (Trie *trie, char *tok, Share *share) | |
296 { | |
297 List *list; | |
298 | |
299 list = trie_lookup (trie, tok); | |
300 list = list_remove (list, share); | |
301 | |
302 trie_remove (trie, tok); | |
303 | |
304 if (!list) | |
305 return; | |
306 | |
307 trie_insert (trie, tok, list); | |
308 } | |
309 | |
310 static void search_trie_change (Trie *trie, Share *share, BOOL add) | |
311 { | |
312 char *tok; | |
313 char *str0, *str; | |
314 | |
315 if (!(str0 = str = STRDUP (share_get_hpath (share)))) | |
316 return; | |
317 | |
318 string_lower (str); | |
319 | |
320 while ((tok = string_sep_set (&str, QRP_DELIMITERS))) | |
321 { | |
322 if (string_isempty (tok)) | |
323 continue; | |
324 | |
325 if (add) | |
326 add_share (trie, tok, share); | |
327 else | |
328 del_share (trie, tok, share); | |
329 } | |
330 | |
331 free (str0); | |
332 } | |
333 | |
334 void gt_search_exec_add (Share *share) | |
335 { | |
336 search_trie_change (gt_search_trie, share, TRUE); | |
337 } | |
338 | |
339 void gt_search_exec_remove (Share *share) | |
340 { | |
341 search_trie_change (gt_search_trie, share, FALSE); | |
342 } | |
343 | |
344 void gt_search_exec_sync (void) | |
345 { | |
346 #if 0 | |
347 printf ("SHARE TRIE:\n"); | |
348 trie_print (gt_search_trie); | |
349 #endif | |
350 } | |
351 | |
352 void gt_search_exec_init (void) | |
353 { | |
354 gt_search_trie = trie_new (); | |
355 | |
356 search_sw = stopwatch_new (FALSE); | |
357 } | |
358 | |
359 void gt_search_exec_cleanup (void) | |
360 { | |
361 trie_free (gt_search_trie); | |
362 gt_search_trie = NULL; | |
363 | |
364 stopwatch_free (search_sw); | |
365 search_sw = NULL; | |
366 } |