rev |
line source |
paulo@0
|
1 /*
|
paulo@0
|
2 * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $
|
paulo@0
|
3 *
|
paulo@0
|
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
|
paulo@0
|
5 *
|
paulo@0
|
6 * This program is free software; you can redistribute it and/or modify it
|
paulo@0
|
7 * under the terms of the GNU General Public License as published by the
|
paulo@0
|
8 * Free Software Foundation; either version 2, or (at your option) any
|
paulo@0
|
9 * later version.
|
paulo@0
|
10 *
|
paulo@0
|
11 * This program is distributed in the hope that it will be useful, but
|
paulo@0
|
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
|
paulo@0
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
paulo@0
|
14 * General Public License for more details.
|
paulo@0
|
15 */
|
paulo@0
|
16
|
paulo@0
|
17 #ifdef STANDALONE
|
paulo@0
|
18 #include <libgift/libgift.h>
|
paulo@0
|
19 #else
|
paulo@0
|
20 #include "gt_gnutella.h"
|
paulo@0
|
21 #endif /* STANDALONE */
|
paulo@0
|
22
|
paulo@0
|
23 #include "trie.h"
|
paulo@0
|
24
|
paulo@0
|
25 /*****************************************************************************/
|
paulo@0
|
26
|
paulo@0
|
27 static Trie *trie_alloc (char c)
|
paulo@0
|
28 {
|
paulo@0
|
29 Trie *trie;
|
paulo@0
|
30
|
paulo@0
|
31 if (!(trie = MALLOC (sizeof (Trie))))
|
paulo@0
|
32 return NULL;
|
paulo@0
|
33
|
paulo@0
|
34 trie->c = c;
|
paulo@0
|
35
|
paulo@0
|
36 return trie;
|
paulo@0
|
37 }
|
paulo@0
|
38
|
paulo@0
|
39 Trie *trie_new (void)
|
paulo@0
|
40 {
|
paulo@0
|
41 /*
|
paulo@0
|
42 * Null won't match any character in the trie, so use
|
paulo@0
|
43 * that as a sentinel in the root node.
|
paulo@0
|
44 */
|
paulo@0
|
45 return trie_alloc (0);
|
paulo@0
|
46 }
|
paulo@0
|
47
|
paulo@0
|
48 static int free_children (Trie *trie, void *udata)
|
paulo@0
|
49 {
|
paulo@0
|
50 trie_free (trie);
|
paulo@0
|
51 return TRUE;
|
paulo@0
|
52 }
|
paulo@0
|
53
|
paulo@0
|
54 void trie_free (Trie *trie)
|
paulo@0
|
55 {
|
paulo@0
|
56 List *children;
|
paulo@0
|
57
|
paulo@0
|
58 if (!trie)
|
paulo@0
|
59 return;
|
paulo@0
|
60
|
paulo@0
|
61 children = trie->children;
|
paulo@0
|
62
|
paulo@0
|
63 if (trie->terminal_node)
|
paulo@0
|
64 {
|
paulo@0
|
65 /* the first data item is the data item for this node */
|
paulo@0
|
66 children = list_remove_link (children, children);
|
paulo@0
|
67 }
|
paulo@0
|
68
|
paulo@0
|
69 list_foreach_remove (children, (ListForeachFunc)free_children, NULL);
|
paulo@0
|
70 free (trie);
|
paulo@0
|
71 }
|
paulo@0
|
72
|
paulo@0
|
73 static Trie *find_node (Trie *trie, char c)
|
paulo@0
|
74 {
|
paulo@0
|
75 List *children;
|
paulo@0
|
76 List *ptr;
|
paulo@0
|
77
|
paulo@0
|
78 children = trie->children;
|
paulo@0
|
79
|
paulo@0
|
80 /*
|
paulo@0
|
81 * If this is a terminal node, skip the data list
|
paulo@0
|
82 * that is in the first position.
|
paulo@0
|
83 */
|
paulo@0
|
84 if (trie->terminal_node)
|
paulo@0
|
85 children = children->next;
|
paulo@0
|
86
|
paulo@0
|
87 /*
|
paulo@0
|
88 * Could use list_find_custom here, but we want
|
paulo@0
|
89 * this to be as fast as possible.
|
paulo@0
|
90 *
|
paulo@0
|
91 * Should probably use a realloc'd array for the
|
paulo@0
|
92 * child list instead.
|
paulo@0
|
93 */
|
paulo@0
|
94 for (ptr = children; ptr; ptr = ptr->next)
|
paulo@0
|
95 {
|
paulo@0
|
96 Trie *node = ptr->data;
|
paulo@0
|
97
|
paulo@0
|
98 if (node->c == c)
|
paulo@0
|
99 break;
|
paulo@0
|
100 }
|
paulo@0
|
101
|
paulo@0
|
102 #undef TEST_MOVE
|
paulo@0
|
103 #ifdef TEST_MOVE
|
paulo@0
|
104 if (ptr)
|
paulo@0
|
105 {
|
paulo@0
|
106 void *data = ptr->data;
|
paulo@0
|
107 int index;
|
paulo@0
|
108
|
paulo@0
|
109 /* remove from the old position */
|
paulo@0
|
110 trie->children = list_remove_link (trie->children, ptr);
|
paulo@0
|
111
|
paulo@0
|
112 /* insert at the beginning of the list, taking into account data
|
paulo@0
|
113 * list at the head if this is a terminal node */
|
paulo@0
|
114 index = trie->terminal_node;
|
paulo@0
|
115 trie->children = list_insert (trie->children, index, data);
|
paulo@0
|
116
|
paulo@0
|
117 return data;
|
paulo@0
|
118 }
|
paulo@0
|
119
|
paulo@0
|
120 return NULL;
|
paulo@0
|
121 #else
|
paulo@0
|
122 /* we directly access result->data here for efficiency */
|
paulo@0
|
123 return ptr ? ptr->data : NULL;
|
paulo@0
|
124 #endif
|
paulo@0
|
125 }
|
paulo@0
|
126
|
paulo@0
|
127 /*
|
paulo@0
|
128 * Find a node in the Trie.
|
paulo@0
|
129 *
|
paulo@0
|
130 * @param trie The Trie to look in
|
paulo@0
|
131 * @param s The string to search for
|
paulo@0
|
132 * @param alloc Whether to allocate space for non-existent nodes during
|
paulo@0
|
133 * lookup
|
paulo@0
|
134 */
|
paulo@0
|
135 static Trie *t_node_lookup (Trie *trie, char *s, int alloc)
|
paulo@0
|
136 {
|
paulo@0
|
137 Trie *result;
|
paulo@0
|
138 char c;
|
paulo@0
|
139
|
paulo@0
|
140 while ((c = *s++))
|
paulo@0
|
141 {
|
paulo@0
|
142 if (!trie)
|
paulo@0
|
143 break;
|
paulo@0
|
144
|
paulo@0
|
145 result = find_node (trie, c);
|
paulo@0
|
146
|
paulo@0
|
147 if (!result && alloc)
|
paulo@0
|
148 {
|
paulo@0
|
149 if (!(result = trie_alloc (c)))
|
paulo@0
|
150 return NULL;
|
paulo@0
|
151
|
paulo@0
|
152 trie->children = list_append (trie->children, result);
|
paulo@0
|
153 }
|
paulo@0
|
154
|
paulo@0
|
155 trie = result;
|
paulo@0
|
156 }
|
paulo@0
|
157
|
paulo@0
|
158 return trie;
|
paulo@0
|
159 }
|
paulo@0
|
160
|
paulo@0
|
161 void *trie_lookup (Trie *trie, char *s)
|
paulo@0
|
162 {
|
paulo@0
|
163 Trie *node;
|
paulo@0
|
164
|
paulo@0
|
165 node = t_node_lookup (trie, s, FALSE);
|
paulo@0
|
166
|
paulo@0
|
167 if (!node)
|
paulo@0
|
168 return NULL;
|
paulo@0
|
169
|
paulo@0
|
170 if (node->terminal_node)
|
paulo@0
|
171 return list_nth_data (node->children, 0);
|
paulo@0
|
172
|
paulo@0
|
173 return NULL;
|
paulo@0
|
174 }
|
paulo@0
|
175
|
paulo@0
|
176 void trie_insert (Trie *trie, char *s, void *value)
|
paulo@0
|
177 {
|
paulo@0
|
178 Trie *node;
|
paulo@0
|
179 void *data;
|
paulo@0
|
180 List *head;
|
paulo@0
|
181
|
paulo@0
|
182 node = t_node_lookup (trie, s, TRUE);
|
paulo@0
|
183
|
paulo@0
|
184 if (!node)
|
paulo@0
|
185 {
|
paulo@0
|
186 /* must be memory allocation error */
|
paulo@0
|
187 assert (0);
|
paulo@0
|
188 return;
|
paulo@0
|
189 }
|
paulo@0
|
190
|
paulo@0
|
191 if (!node->terminal_node)
|
paulo@0
|
192 {
|
paulo@0
|
193 /* could be a mem allocation error here... */
|
paulo@0
|
194 node->children = list_prepend (node->children, value);
|
paulo@0
|
195
|
paulo@0
|
196 /* convert this node to a terminal node, with the data list
|
paulo@0
|
197 * in the first position of ->children */
|
paulo@0
|
198 node->terminal_node = TRUE;
|
paulo@0
|
199 return;
|
paulo@0
|
200 }
|
paulo@0
|
201
|
paulo@0
|
202 /*
|
paulo@0
|
203 * This should not happen unless the user didn't call
|
paulo@0
|
204 * remove first. That may leak memory, so assert here temporarily.
|
paulo@0
|
205 */
|
paulo@0
|
206 assert (0);
|
paulo@0
|
207
|
paulo@0
|
208 head = list_nth (node->children, 0);
|
paulo@0
|
209 data = list_nth_data (node->children, 0);
|
paulo@0
|
210
|
paulo@0
|
211 /*
|
paulo@0
|
212 * Remove the first item, then insert a new list.
|
paulo@0
|
213 */
|
paulo@0
|
214 node->children = list_remove_link (node->children, head);
|
paulo@0
|
215
|
paulo@0
|
216 /* insert the new data list back */
|
paulo@0
|
217 node->children = list_prepend (node->children, value);
|
paulo@0
|
218 }
|
paulo@0
|
219
|
paulo@0
|
220 int trie_is_empty (Trie *trie)
|
paulo@0
|
221 {
|
paulo@0
|
222 if (trie->children == NULL)
|
paulo@0
|
223 return TRUE;
|
paulo@0
|
224
|
paulo@0
|
225 return FALSE;
|
paulo@0
|
226 }
|
paulo@0
|
227
|
paulo@0
|
228 static void remove_if_empty (Trie *root, Trie *child)
|
paulo@0
|
229 {
|
paulo@0
|
230 if (!trie_is_empty (child))
|
paulo@0
|
231 return;
|
paulo@0
|
232
|
paulo@0
|
233 root->children = list_remove (root->children, child);
|
paulo@0
|
234 trie_free (child);
|
paulo@0
|
235 }
|
paulo@0
|
236
|
paulo@0
|
237 static void t_remove_node (Trie *trie)
|
paulo@0
|
238 {
|
paulo@0
|
239 void *value;
|
paulo@0
|
240 List *value_ptr;
|
paulo@0
|
241
|
paulo@0
|
242 if (!trie->terminal_node)
|
paulo@0
|
243 return;
|
paulo@0
|
244
|
paulo@0
|
245 #if 0
|
paulo@0
|
246 /* this assertion is broken due to duplicates */
|
paulo@0
|
247 assert (trie->terminal_node == TRUE);
|
paulo@0
|
248 #endif
|
paulo@0
|
249
|
paulo@0
|
250 value_ptr = list_nth (trie->children, 0);
|
paulo@0
|
251 value = list_nth_data (trie->children, 0);
|
paulo@0
|
252
|
paulo@0
|
253 #if 0
|
paulo@0
|
254 /* these will falsely trigger for files that have the same
|
paulo@0
|
255 * token in them twice, and end up getting removed twice */
|
paulo@0
|
256 assert (list_length (data_list) > 0);
|
paulo@0
|
257 assert (list_find (data_list, value) != NULL);
|
paulo@0
|
258 #endif
|
paulo@0
|
259
|
paulo@0
|
260 /* remove the old data list */
|
paulo@0
|
261 trie->children = list_remove_link (trie->children, value_ptr);
|
paulo@0
|
262 trie->terminal_node = FALSE;
|
paulo@0
|
263 }
|
paulo@0
|
264
|
paulo@0
|
265 void trie_remove (Trie *trie, char *s)
|
paulo@0
|
266 {
|
paulo@0
|
267 Trie *child;
|
paulo@0
|
268
|
paulo@0
|
269 /* remove the node if we found it */
|
paulo@0
|
270 if (string_isempty (s))
|
paulo@0
|
271 {
|
paulo@0
|
272 t_remove_node (trie);
|
paulo@0
|
273 return;
|
paulo@0
|
274 }
|
paulo@0
|
275
|
paulo@0
|
276 child = find_node (trie, *s);
|
paulo@0
|
277 s++;
|
paulo@0
|
278
|
paulo@0
|
279 if (!child)
|
paulo@0
|
280 return;
|
paulo@0
|
281 #if 0
|
paulo@0
|
282 assert (child != NULL);
|
paulo@0
|
283 #endif
|
paulo@0
|
284
|
paulo@0
|
285 /* recursively remove all nodes */
|
paulo@0
|
286 trie_remove (child, s);
|
paulo@0
|
287
|
paulo@0
|
288 /* remove this node if it has no children anymore */
|
paulo@0
|
289 remove_if_empty (trie, child);
|
paulo@0
|
290 }
|
paulo@0
|
291
|
paulo@0
|
292 static void print_children (List *children)
|
paulo@0
|
293 {
|
paulo@0
|
294 List *ptr;
|
paulo@0
|
295 int printed_open = FALSE;
|
paulo@0
|
296 Trie *trie;
|
paulo@0
|
297
|
paulo@0
|
298 for (ptr = children; ptr; ptr = list_next (ptr))
|
paulo@0
|
299 {
|
paulo@0
|
300 if (!printed_open)
|
paulo@0
|
301 {
|
paulo@0
|
302 printf ("{ ");
|
paulo@0
|
303 printed_open = TRUE;
|
paulo@0
|
304 }
|
paulo@0
|
305
|
paulo@0
|
306 trie = list_nth_data (ptr, 0);
|
paulo@0
|
307 trie_print (trie);
|
paulo@0
|
308
|
paulo@0
|
309 if (list_next (ptr))
|
paulo@0
|
310 printf(",");
|
paulo@0
|
311 }
|
paulo@0
|
312
|
paulo@0
|
313 if (children)
|
paulo@0
|
314 printf (" }");
|
paulo@0
|
315 }
|
paulo@0
|
316
|
paulo@0
|
317 void trie_print (Trie *trie)
|
paulo@0
|
318 {
|
paulo@0
|
319 List *children;
|
paulo@0
|
320
|
paulo@0
|
321 if (trie->c)
|
paulo@0
|
322 printf ("%c", trie->c);
|
paulo@0
|
323
|
paulo@0
|
324 children = trie->children;
|
paulo@0
|
325
|
paulo@0
|
326 if (trie->terminal_node)
|
paulo@0
|
327 {
|
paulo@0
|
328 printf ("*");
|
paulo@0
|
329 children = list_next (children);
|
paulo@0
|
330 }
|
paulo@0
|
331
|
paulo@0
|
332 print_children (children);
|
paulo@0
|
333 }
|
paulo@0
|
334
|
paulo@0
|
335 #ifdef STANDALONE
|
paulo@0
|
336 int main(int argc, char **argv)
|
paulo@0
|
337 {
|
paulo@0
|
338 List *args = NULL;
|
paulo@0
|
339 List *ptr;
|
paulo@0
|
340 Trie *trie;
|
paulo@0
|
341 int i;
|
paulo@0
|
342
|
paulo@0
|
343 trie = trie_new ();
|
paulo@0
|
344
|
paulo@0
|
345 for (i = 1; i < argc; i++)
|
paulo@0
|
346 {
|
paulo@0
|
347 trie_insert (trie, argv[i], argv[i]);
|
paulo@0
|
348 args = list_prepend (args, argv[i]);
|
paulo@0
|
349 }
|
paulo@0
|
350
|
paulo@0
|
351 trie_print (trie);
|
paulo@0
|
352 printf ("\n");
|
paulo@0
|
353
|
paulo@0
|
354 while ((ptr = args))
|
paulo@0
|
355 {
|
paulo@0
|
356 trie_remove (trie, ptr->data);
|
paulo@0
|
357 args = list_remove_link (args, ptr);
|
paulo@0
|
358 }
|
paulo@0
|
359
|
paulo@0
|
360 trie_insert (trie, "book", "book");
|
paulo@0
|
361 trie_insert (trie, "boo", "boo");
|
paulo@0
|
362 trie_print (trie);
|
paulo@0
|
363 printf ("\n");
|
paulo@0
|
364 trie_remove (trie, "book");
|
paulo@0
|
365 trie_remove (trie, "boo");
|
paulo@0
|
366
|
paulo@0
|
367 trie_print (trie);
|
paulo@0
|
368 printf ("\n");
|
paulo@0
|
369 #if 0
|
paulo@0
|
370 trie_free (trie);
|
paulo@0
|
371 #endif
|
paulo@0
|
372
|
paulo@0
|
373 return 0;
|
paulo@0
|
374 }
|
paulo@0
|
375 #endif /* STANDALONE */
|