view src/trie.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line source
1 /*
2 * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
17 #ifdef STANDALONE
18 #include <libgift/libgift.h>
19 #else
20 #include "gt_gnutella.h"
21 #endif /* STANDALONE */
23 #include "trie.h"
25 /*****************************************************************************/
27 static Trie *trie_alloc (char c)
28 {
29 Trie *trie;
31 if (!(trie = MALLOC (sizeof (Trie))))
32 return NULL;
34 trie->c = c;
36 return trie;
37 }
39 Trie *trie_new (void)
40 {
41 /*
42 * Null won't match any character in the trie, so use
43 * that as a sentinel in the root node.
44 */
45 return trie_alloc (0);
46 }
48 static int free_children (Trie *trie, void *udata)
49 {
50 trie_free (trie);
51 return TRUE;
52 }
54 void trie_free (Trie *trie)
55 {
56 List *children;
58 if (!trie)
59 return;
61 children = trie->children;
63 if (trie->terminal_node)
64 {
65 /* the first data item is the data item for this node */
66 children = list_remove_link (children, children);
67 }
69 list_foreach_remove (children, (ListForeachFunc)free_children, NULL);
70 free (trie);
71 }
73 static Trie *find_node (Trie *trie, char c)
74 {
75 List *children;
76 List *ptr;
78 children = trie->children;
80 /*
81 * If this is a terminal node, skip the data list
82 * that is in the first position.
83 */
84 if (trie->terminal_node)
85 children = children->next;
87 /*
88 * Could use list_find_custom here, but we want
89 * this to be as fast as possible.
90 *
91 * Should probably use a realloc'd array for the
92 * child list instead.
93 */
94 for (ptr = children; ptr; ptr = ptr->next)
95 {
96 Trie *node = ptr->data;
98 if (node->c == c)
99 break;
100 }
102 #undef TEST_MOVE
103 #ifdef TEST_MOVE
104 if (ptr)
105 {
106 void *data = ptr->data;
107 int index;
109 /* remove from the old position */
110 trie->children = list_remove_link (trie->children, ptr);
112 /* insert at the beginning of the list, taking into account data
113 * list at the head if this is a terminal node */
114 index = trie->terminal_node;
115 trie->children = list_insert (trie->children, index, data);
117 return data;
118 }
120 return NULL;
121 #else
122 /* we directly access result->data here for efficiency */
123 return ptr ? ptr->data : NULL;
124 #endif
125 }
127 /*
128 * Find a node in the Trie.
129 *
130 * @param trie The Trie to look in
131 * @param s The string to search for
132 * @param alloc Whether to allocate space for non-existent nodes during
133 * lookup
134 */
135 static Trie *t_node_lookup (Trie *trie, char *s, int alloc)
136 {
137 Trie *result;
138 char c;
140 while ((c = *s++))
141 {
142 if (!trie)
143 break;
145 result = find_node (trie, c);
147 if (!result && alloc)
148 {
149 if (!(result = trie_alloc (c)))
150 return NULL;
152 trie->children = list_append (trie->children, result);
153 }
155 trie = result;
156 }
158 return trie;
159 }
161 void *trie_lookup (Trie *trie, char *s)
162 {
163 Trie *node;
165 node = t_node_lookup (trie, s, FALSE);
167 if (!node)
168 return NULL;
170 if (node->terminal_node)
171 return list_nth_data (node->children, 0);
173 return NULL;
174 }
176 void trie_insert (Trie *trie, char *s, void *value)
177 {
178 Trie *node;
179 void *data;
180 List *head;
182 node = t_node_lookup (trie, s, TRUE);
184 if (!node)
185 {
186 /* must be memory allocation error */
187 assert (0);
188 return;
189 }
191 if (!node->terminal_node)
192 {
193 /* could be a mem allocation error here... */
194 node->children = list_prepend (node->children, value);
196 /* convert this node to a terminal node, with the data list
197 * in the first position of ->children */
198 node->terminal_node = TRUE;
199 return;
200 }
202 /*
203 * This should not happen unless the user didn't call
204 * remove first. That may leak memory, so assert here temporarily.
205 */
206 assert (0);
208 head = list_nth (node->children, 0);
209 data = list_nth_data (node->children, 0);
211 /*
212 * Remove the first item, then insert a new list.
213 */
214 node->children = list_remove_link (node->children, head);
216 /* insert the new data list back */
217 node->children = list_prepend (node->children, value);
218 }
220 int trie_is_empty (Trie *trie)
221 {
222 if (trie->children == NULL)
223 return TRUE;
225 return FALSE;
226 }
228 static void remove_if_empty (Trie *root, Trie *child)
229 {
230 if (!trie_is_empty (child))
231 return;
233 root->children = list_remove (root->children, child);
234 trie_free (child);
235 }
237 static void t_remove_node (Trie *trie)
238 {
239 void *value;
240 List *value_ptr;
242 if (!trie->terminal_node)
243 return;
245 #if 0
246 /* this assertion is broken due to duplicates */
247 assert (trie->terminal_node == TRUE);
248 #endif
250 value_ptr = list_nth (trie->children, 0);
251 value = list_nth_data (trie->children, 0);
253 #if 0
254 /* these will falsely trigger for files that have the same
255 * token in them twice, and end up getting removed twice */
256 assert (list_length (data_list) > 0);
257 assert (list_find (data_list, value) != NULL);
258 #endif
260 /* remove the old data list */
261 trie->children = list_remove_link (trie->children, value_ptr);
262 trie->terminal_node = FALSE;
263 }
265 void trie_remove (Trie *trie, char *s)
266 {
267 Trie *child;
269 /* remove the node if we found it */
270 if (string_isempty (s))
271 {
272 t_remove_node (trie);
273 return;
274 }
276 child = find_node (trie, *s);
277 s++;
279 if (!child)
280 return;
281 #if 0
282 assert (child != NULL);
283 #endif
285 /* recursively remove all nodes */
286 trie_remove (child, s);
288 /* remove this node if it has no children anymore */
289 remove_if_empty (trie, child);
290 }
292 static void print_children (List *children)
293 {
294 List *ptr;
295 int printed_open = FALSE;
296 Trie *trie;
298 for (ptr = children; ptr; ptr = list_next (ptr))
299 {
300 if (!printed_open)
301 {
302 printf ("{ ");
303 printed_open = TRUE;
304 }
306 trie = list_nth_data (ptr, 0);
307 trie_print (trie);
309 if (list_next (ptr))
310 printf(",");
311 }
313 if (children)
314 printf (" }");
315 }
317 void trie_print (Trie *trie)
318 {
319 List *children;
321 if (trie->c)
322 printf ("%c", trie->c);
324 children = trie->children;
326 if (trie->terminal_node)
327 {
328 printf ("*");
329 children = list_next (children);
330 }
332 print_children (children);
333 }
335 #ifdef STANDALONE
336 int main(int argc, char **argv)
337 {
338 List *args = NULL;
339 List *ptr;
340 Trie *trie;
341 int i;
343 trie = trie_new ();
345 for (i = 1; i < argc; i++)
346 {
347 trie_insert (trie, argv[i], argv[i]);
348 args = list_prepend (args, argv[i]);
349 }
351 trie_print (trie);
352 printf ("\n");
354 while ((ptr = args))
355 {
356 trie_remove (trie, ptr->data);
357 args = list_remove_link (args, ptr);
358 }
360 trie_insert (trie, "book", "book");
361 trie_insert (trie, "boo", "boo");
362 trie_print (trie);
363 printf ("\n");
364 trie_remove (trie, "book");
365 trie_remove (trie, "boo");
367 trie_print (trie);
368 printf ("\n");
369 #if 0
370 trie_free (trie);
371 #endif
373 return 0;
374 }
375 #endif /* STANDALONE */