comparison src/trie.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:23a42db59e77
1 /*
2 * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16
17 #ifdef STANDALONE
18 #include <libgift/libgift.h>
19 #else
20 #include "gt_gnutella.h"
21 #endif /* STANDALONE */
22
23 #include "trie.h"
24
25 /*****************************************************************************/
26
27 static Trie *trie_alloc (char c)
28 {
29 Trie *trie;
30
31 if (!(trie = MALLOC (sizeof (Trie))))
32 return NULL;
33
34 trie->c = c;
35
36 return trie;
37 }
38
39 Trie *trie_new (void)
40 {
41 /*
42 * Null won't match any character in the trie, so use
43 * that as a sentinel in the root node.
44 */
45 return trie_alloc (0);
46 }
47
48 static int free_children (Trie *trie, void *udata)
49 {
50 trie_free (trie);
51 return TRUE;
52 }
53
54 void trie_free (Trie *trie)
55 {
56 List *children;
57
58 if (!trie)
59 return;
60
61 children = trie->children;
62
63 if (trie->terminal_node)
64 {
65 /* the first data item is the data item for this node */
66 children = list_remove_link (children, children);
67 }
68
69 list_foreach_remove (children, (ListForeachFunc)free_children, NULL);
70 free (trie);
71 }
72
73 static Trie *find_node (Trie *trie, char c)
74 {
75 List *children;
76 List *ptr;
77
78 children = trie->children;
79
80 /*
81 * If this is a terminal node, skip the data list
82 * that is in the first position.
83 */
84 if (trie->terminal_node)
85 children = children->next;
86
87 /*
88 * Could use list_find_custom here, but we want
89 * this to be as fast as possible.
90 *
91 * Should probably use a realloc'd array for the
92 * child list instead.
93 */
94 for (ptr = children; ptr; ptr = ptr->next)
95 {
96 Trie *node = ptr->data;
97
98 if (node->c == c)
99 break;
100 }
101
102 #undef TEST_MOVE
103 #ifdef TEST_MOVE
104 if (ptr)
105 {
106 void *data = ptr->data;
107 int index;
108
109 /* remove from the old position */
110 trie->children = list_remove_link (trie->children, ptr);
111
112 /* insert at the beginning of the list, taking into account data
113 * list at the head if this is a terminal node */
114 index = trie->terminal_node;
115 trie->children = list_insert (trie->children, index, data);
116
117 return data;
118 }
119
120 return NULL;
121 #else
122 /* we directly access result->data here for efficiency */
123 return ptr ? ptr->data : NULL;
124 #endif
125 }
126
127 /*
128 * Find a node in the Trie.
129 *
130 * @param trie The Trie to look in
131 * @param s The string to search for
132 * @param alloc Whether to allocate space for non-existent nodes during
133 * lookup
134 */
135 static Trie *t_node_lookup (Trie *trie, char *s, int alloc)
136 {
137 Trie *result;
138 char c;
139
140 while ((c = *s++))
141 {
142 if (!trie)
143 break;
144
145 result = find_node (trie, c);
146
147 if (!result && alloc)
148 {
149 if (!(result = trie_alloc (c)))
150 return NULL;
151
152 trie->children = list_append (trie->children, result);
153 }
154
155 trie = result;
156 }
157
158 return trie;
159 }
160
161 void *trie_lookup (Trie *trie, char *s)
162 {
163 Trie *node;
164
165 node = t_node_lookup (trie, s, FALSE);
166
167 if (!node)
168 return NULL;
169
170 if (node->terminal_node)
171 return list_nth_data (node->children, 0);
172
173 return NULL;
174 }
175
176 void trie_insert (Trie *trie, char *s, void *value)
177 {
178 Trie *node;
179 void *data;
180 List *head;
181
182 node = t_node_lookup (trie, s, TRUE);
183
184 if (!node)
185 {
186 /* must be memory allocation error */
187 assert (0);
188 return;
189 }
190
191 if (!node->terminal_node)
192 {
193 /* could be a mem allocation error here... */
194 node->children = list_prepend (node->children, value);
195
196 /* convert this node to a terminal node, with the data list
197 * in the first position of ->children */
198 node->terminal_node = TRUE;
199 return;
200 }
201
202 /*
203 * This should not happen unless the user didn't call
204 * remove first. That may leak memory, so assert here temporarily.
205 */
206 assert (0);
207
208 head = list_nth (node->children, 0);
209 data = list_nth_data (node->children, 0);
210
211 /*
212 * Remove the first item, then insert a new list.
213 */
214 node->children = list_remove_link (node->children, head);
215
216 /* insert the new data list back */
217 node->children = list_prepend (node->children, value);
218 }
219
220 int trie_is_empty (Trie *trie)
221 {
222 if (trie->children == NULL)
223 return TRUE;
224
225 return FALSE;
226 }
227
228 static void remove_if_empty (Trie *root, Trie *child)
229 {
230 if (!trie_is_empty (child))
231 return;
232
233 root->children = list_remove (root->children, child);
234 trie_free (child);
235 }
236
237 static void t_remove_node (Trie *trie)
238 {
239 void *value;
240 List *value_ptr;
241
242 if (!trie->terminal_node)
243 return;
244
245 #if 0
246 /* this assertion is broken due to duplicates */
247 assert (trie->terminal_node == TRUE);
248 #endif
249
250 value_ptr = list_nth (trie->children, 0);
251 value = list_nth_data (trie->children, 0);
252
253 #if 0
254 /* these will falsely trigger for files that have the same
255 * token in them twice, and end up getting removed twice */
256 assert (list_length (data_list) > 0);
257 assert (list_find (data_list, value) != NULL);
258 #endif
259
260 /* remove the old data list */
261 trie->children = list_remove_link (trie->children, value_ptr);
262 trie->terminal_node = FALSE;
263 }
264
265 void trie_remove (Trie *trie, char *s)
266 {
267 Trie *child;
268
269 /* remove the node if we found it */
270 if (string_isempty (s))
271 {
272 t_remove_node (trie);
273 return;
274 }
275
276 child = find_node (trie, *s);
277 s++;
278
279 if (!child)
280 return;
281 #if 0
282 assert (child != NULL);
283 #endif
284
285 /* recursively remove all nodes */
286 trie_remove (child, s);
287
288 /* remove this node if it has no children anymore */
289 remove_if_empty (trie, child);
290 }
291
292 static void print_children (List *children)
293 {
294 List *ptr;
295 int printed_open = FALSE;
296 Trie *trie;
297
298 for (ptr = children; ptr; ptr = list_next (ptr))
299 {
300 if (!printed_open)
301 {
302 printf ("{ ");
303 printed_open = TRUE;
304 }
305
306 trie = list_nth_data (ptr, 0);
307 trie_print (trie);
308
309 if (list_next (ptr))
310 printf(",");
311 }
312
313 if (children)
314 printf (" }");
315 }
316
317 void trie_print (Trie *trie)
318 {
319 List *children;
320
321 if (trie->c)
322 printf ("%c", trie->c);
323
324 children = trie->children;
325
326 if (trie->terminal_node)
327 {
328 printf ("*");
329 children = list_next (children);
330 }
331
332 print_children (children);
333 }
334
335 #ifdef STANDALONE
336 int main(int argc, char **argv)
337 {
338 List *args = NULL;
339 List *ptr;
340 Trie *trie;
341 int i;
342
343 trie = trie_new ();
344
345 for (i = 1; i < argc; i++)
346 {
347 trie_insert (trie, argv[i], argv[i]);
348 args = list_prepend (args, argv[i]);
349 }
350
351 trie_print (trie);
352 printf ("\n");
353
354 while ((ptr = args))
355 {
356 trie_remove (trie, ptr->data);
357 args = list_remove_link (args, ptr);
358 }
359
360 trie_insert (trie, "book", "book");
361 trie_insert (trie, "boo", "boo");
362 trie_print (trie);
363 printf ("\n");
364 trie_remove (trie, "book");
365 trie_remove (trie, "boo");
366
367 trie_print (trie);
368 printf ("\n");
369 #if 0
370 trie_free (trie);
371 #endif
372
373 return 0;
374 }
375 #endif /* STANDALONE */