annotate src/trie.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
rev   line source
paulo@0 1 /*
paulo@0 2 * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $
paulo@0 3 *
paulo@0 4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
paulo@0 5 *
paulo@0 6 * This program is free software; you can redistribute it and/or modify it
paulo@0 7 * under the terms of the GNU General Public License as published by the
paulo@0 8 * Free Software Foundation; either version 2, or (at your option) any
paulo@0 9 * later version.
paulo@0 10 *
paulo@0 11 * This program is distributed in the hope that it will be useful, but
paulo@0 12 * WITHOUT ANY WARRANTY; without even the implied warranty of
paulo@0 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
paulo@0 14 * General Public License for more details.
paulo@0 15 */
paulo@0 16
paulo@0 17 #ifdef STANDALONE
paulo@0 18 #include <libgift/libgift.h>
paulo@0 19 #else
paulo@0 20 #include "gt_gnutella.h"
paulo@0 21 #endif /* STANDALONE */
paulo@0 22
paulo@0 23 #include "trie.h"
paulo@0 24
paulo@0 25 /*****************************************************************************/
paulo@0 26
paulo@0 27 static Trie *trie_alloc (char c)
paulo@0 28 {
paulo@0 29 Trie *trie;
paulo@0 30
paulo@0 31 if (!(trie = MALLOC (sizeof (Trie))))
paulo@0 32 return NULL;
paulo@0 33
paulo@0 34 trie->c = c;
paulo@0 35
paulo@0 36 return trie;
paulo@0 37 }
paulo@0 38
paulo@0 39 Trie *trie_new (void)
paulo@0 40 {
paulo@0 41 /*
paulo@0 42 * Null won't match any character in the trie, so use
paulo@0 43 * that as a sentinel in the root node.
paulo@0 44 */
paulo@0 45 return trie_alloc (0);
paulo@0 46 }
paulo@0 47
paulo@0 48 static int free_children (Trie *trie, void *udata)
paulo@0 49 {
paulo@0 50 trie_free (trie);
paulo@0 51 return TRUE;
paulo@0 52 }
paulo@0 53
paulo@0 54 void trie_free (Trie *trie)
paulo@0 55 {
paulo@0 56 List *children;
paulo@0 57
paulo@0 58 if (!trie)
paulo@0 59 return;
paulo@0 60
paulo@0 61 children = trie->children;
paulo@0 62
paulo@0 63 if (trie->terminal_node)
paulo@0 64 {
paulo@0 65 /* the first data item is the data item for this node */
paulo@0 66 children = list_remove_link (children, children);
paulo@0 67 }
paulo@0 68
paulo@0 69 list_foreach_remove (children, (ListForeachFunc)free_children, NULL);
paulo@0 70 free (trie);
paulo@0 71 }
paulo@0 72
paulo@0 73 static Trie *find_node (Trie *trie, char c)
paulo@0 74 {
paulo@0 75 List *children;
paulo@0 76 List *ptr;
paulo@0 77
paulo@0 78 children = trie->children;
paulo@0 79
paulo@0 80 /*
paulo@0 81 * If this is a terminal node, skip the data list
paulo@0 82 * that is in the first position.
paulo@0 83 */
paulo@0 84 if (trie->terminal_node)
paulo@0 85 children = children->next;
paulo@0 86
paulo@0 87 /*
paulo@0 88 * Could use list_find_custom here, but we want
paulo@0 89 * this to be as fast as possible.
paulo@0 90 *
paulo@0 91 * Should probably use a realloc'd array for the
paulo@0 92 * child list instead.
paulo@0 93 */
paulo@0 94 for (ptr = children; ptr; ptr = ptr->next)
paulo@0 95 {
paulo@0 96 Trie *node = ptr->data;
paulo@0 97
paulo@0 98 if (node->c == c)
paulo@0 99 break;
paulo@0 100 }
paulo@0 101
paulo@0 102 #undef TEST_MOVE
paulo@0 103 #ifdef TEST_MOVE
paulo@0 104 if (ptr)
paulo@0 105 {
paulo@0 106 void *data = ptr->data;
paulo@0 107 int index;
paulo@0 108
paulo@0 109 /* remove from the old position */
paulo@0 110 trie->children = list_remove_link (trie->children, ptr);
paulo@0 111
paulo@0 112 /* insert at the beginning of the list, taking into account data
paulo@0 113 * list at the head if this is a terminal node */
paulo@0 114 index = trie->terminal_node;
paulo@0 115 trie->children = list_insert (trie->children, index, data);
paulo@0 116
paulo@0 117 return data;
paulo@0 118 }
paulo@0 119
paulo@0 120 return NULL;
paulo@0 121 #else
paulo@0 122 /* we directly access result->data here for efficiency */
paulo@0 123 return ptr ? ptr->data : NULL;
paulo@0 124 #endif
paulo@0 125 }
paulo@0 126
paulo@0 127 /*
paulo@0 128 * Find a node in the Trie.
paulo@0 129 *
paulo@0 130 * @param trie The Trie to look in
paulo@0 131 * @param s The string to search for
paulo@0 132 * @param alloc Whether to allocate space for non-existent nodes during
paulo@0 133 * lookup
paulo@0 134 */
paulo@0 135 static Trie *t_node_lookup (Trie *trie, char *s, int alloc)
paulo@0 136 {
paulo@0 137 Trie *result;
paulo@0 138 char c;
paulo@0 139
paulo@0 140 while ((c = *s++))
paulo@0 141 {
paulo@0 142 if (!trie)
paulo@0 143 break;
paulo@0 144
paulo@0 145 result = find_node (trie, c);
paulo@0 146
paulo@0 147 if (!result && alloc)
paulo@0 148 {
paulo@0 149 if (!(result = trie_alloc (c)))
paulo@0 150 return NULL;
paulo@0 151
paulo@0 152 trie->children = list_append (trie->children, result);
paulo@0 153 }
paulo@0 154
paulo@0 155 trie = result;
paulo@0 156 }
paulo@0 157
paulo@0 158 return trie;
paulo@0 159 }
paulo@0 160
paulo@0 161 void *trie_lookup (Trie *trie, char *s)
paulo@0 162 {
paulo@0 163 Trie *node;
paulo@0 164
paulo@0 165 node = t_node_lookup (trie, s, FALSE);
paulo@0 166
paulo@0 167 if (!node)
paulo@0 168 return NULL;
paulo@0 169
paulo@0 170 if (node->terminal_node)
paulo@0 171 return list_nth_data (node->children, 0);
paulo@0 172
paulo@0 173 return NULL;
paulo@0 174 }
paulo@0 175
paulo@0 176 void trie_insert (Trie *trie, char *s, void *value)
paulo@0 177 {
paulo@0 178 Trie *node;
paulo@0 179 void *data;
paulo@0 180 List *head;
paulo@0 181
paulo@0 182 node = t_node_lookup (trie, s, TRUE);
paulo@0 183
paulo@0 184 if (!node)
paulo@0 185 {
paulo@0 186 /* must be memory allocation error */
paulo@0 187 assert (0);
paulo@0 188 return;
paulo@0 189 }
paulo@0 190
paulo@0 191 if (!node->terminal_node)
paulo@0 192 {
paulo@0 193 /* could be a mem allocation error here... */
paulo@0 194 node->children = list_prepend (node->children, value);
paulo@0 195
paulo@0 196 /* convert this node to a terminal node, with the data list
paulo@0 197 * in the first position of ->children */
paulo@0 198 node->terminal_node = TRUE;
paulo@0 199 return;
paulo@0 200 }
paulo@0 201
paulo@0 202 /*
paulo@0 203 * This should not happen unless the user didn't call
paulo@0 204 * remove first. That may leak memory, so assert here temporarily.
paulo@0 205 */
paulo@0 206 assert (0);
paulo@0 207
paulo@0 208 head = list_nth (node->children, 0);
paulo@0 209 data = list_nth_data (node->children, 0);
paulo@0 210
paulo@0 211 /*
paulo@0 212 * Remove the first item, then insert a new list.
paulo@0 213 */
paulo@0 214 node->children = list_remove_link (node->children, head);
paulo@0 215
paulo@0 216 /* insert the new data list back */
paulo@0 217 node->children = list_prepend (node->children, value);
paulo@0 218 }
paulo@0 219
paulo@0 220 int trie_is_empty (Trie *trie)
paulo@0 221 {
paulo@0 222 if (trie->children == NULL)
paulo@0 223 return TRUE;
paulo@0 224
paulo@0 225 return FALSE;
paulo@0 226 }
paulo@0 227
paulo@0 228 static void remove_if_empty (Trie *root, Trie *child)
paulo@0 229 {
paulo@0 230 if (!trie_is_empty (child))
paulo@0 231 return;
paulo@0 232
paulo@0 233 root->children = list_remove (root->children, child);
paulo@0 234 trie_free (child);
paulo@0 235 }
paulo@0 236
paulo@0 237 static void t_remove_node (Trie *trie)
paulo@0 238 {
paulo@0 239 void *value;
paulo@0 240 List *value_ptr;
paulo@0 241
paulo@0 242 if (!trie->terminal_node)
paulo@0 243 return;
paulo@0 244
paulo@0 245 #if 0
paulo@0 246 /* this assertion is broken due to duplicates */
paulo@0 247 assert (trie->terminal_node == TRUE);
paulo@0 248 #endif
paulo@0 249
paulo@0 250 value_ptr = list_nth (trie->children, 0);
paulo@0 251 value = list_nth_data (trie->children, 0);
paulo@0 252
paulo@0 253 #if 0
paulo@0 254 /* these will falsely trigger for files that have the same
paulo@0 255 * token in them twice, and end up getting removed twice */
paulo@0 256 assert (list_length (data_list) > 0);
paulo@0 257 assert (list_find (data_list, value) != NULL);
paulo@0 258 #endif
paulo@0 259
paulo@0 260 /* remove the old data list */
paulo@0 261 trie->children = list_remove_link (trie->children, value_ptr);
paulo@0 262 trie->terminal_node = FALSE;
paulo@0 263 }
paulo@0 264
paulo@0 265 void trie_remove (Trie *trie, char *s)
paulo@0 266 {
paulo@0 267 Trie *child;
paulo@0 268
paulo@0 269 /* remove the node if we found it */
paulo@0 270 if (string_isempty (s))
paulo@0 271 {
paulo@0 272 t_remove_node (trie);
paulo@0 273 return;
paulo@0 274 }
paulo@0 275
paulo@0 276 child = find_node (trie, *s);
paulo@0 277 s++;
paulo@0 278
paulo@0 279 if (!child)
paulo@0 280 return;
paulo@0 281 #if 0
paulo@0 282 assert (child != NULL);
paulo@0 283 #endif
paulo@0 284
paulo@0 285 /* recursively remove all nodes */
paulo@0 286 trie_remove (child, s);
paulo@0 287
paulo@0 288 /* remove this node if it has no children anymore */
paulo@0 289 remove_if_empty (trie, child);
paulo@0 290 }
paulo@0 291
paulo@0 292 static void print_children (List *children)
paulo@0 293 {
paulo@0 294 List *ptr;
paulo@0 295 int printed_open = FALSE;
paulo@0 296 Trie *trie;
paulo@0 297
paulo@0 298 for (ptr = children; ptr; ptr = list_next (ptr))
paulo@0 299 {
paulo@0 300 if (!printed_open)
paulo@0 301 {
paulo@0 302 printf ("{ ");
paulo@0 303 printed_open = TRUE;
paulo@0 304 }
paulo@0 305
paulo@0 306 trie = list_nth_data (ptr, 0);
paulo@0 307 trie_print (trie);
paulo@0 308
paulo@0 309 if (list_next (ptr))
paulo@0 310 printf(",");
paulo@0 311 }
paulo@0 312
paulo@0 313 if (children)
paulo@0 314 printf (" }");
paulo@0 315 }
paulo@0 316
paulo@0 317 void trie_print (Trie *trie)
paulo@0 318 {
paulo@0 319 List *children;
paulo@0 320
paulo@0 321 if (trie->c)
paulo@0 322 printf ("%c", trie->c);
paulo@0 323
paulo@0 324 children = trie->children;
paulo@0 325
paulo@0 326 if (trie->terminal_node)
paulo@0 327 {
paulo@0 328 printf ("*");
paulo@0 329 children = list_next (children);
paulo@0 330 }
paulo@0 331
paulo@0 332 print_children (children);
paulo@0 333 }
paulo@0 334
paulo@0 335 #ifdef STANDALONE
paulo@0 336 int main(int argc, char **argv)
paulo@0 337 {
paulo@0 338 List *args = NULL;
paulo@0 339 List *ptr;
paulo@0 340 Trie *trie;
paulo@0 341 int i;
paulo@0 342
paulo@0 343 trie = trie_new ();
paulo@0 344
paulo@0 345 for (i = 1; i < argc; i++)
paulo@0 346 {
paulo@0 347 trie_insert (trie, argv[i], argv[i]);
paulo@0 348 args = list_prepend (args, argv[i]);
paulo@0 349 }
paulo@0 350
paulo@0 351 trie_print (trie);
paulo@0 352 printf ("\n");
paulo@0 353
paulo@0 354 while ((ptr = args))
paulo@0 355 {
paulo@0 356 trie_remove (trie, ptr->data);
paulo@0 357 args = list_remove_link (args, ptr);
paulo@0 358 }
paulo@0 359
paulo@0 360 trie_insert (trie, "book", "book");
paulo@0 361 trie_insert (trie, "boo", "boo");
paulo@0 362 trie_print (trie);
paulo@0 363 printf ("\n");
paulo@0 364 trie_remove (trie, "book");
paulo@0 365 trie_remove (trie, "boo");
paulo@0 366
paulo@0 367 trie_print (trie);
paulo@0 368 printf ("\n");
paulo@0 369 #if 0
paulo@0 370 trie_free (trie);
paulo@0 371 #endif
paulo@0 372
paulo@0 373 return 0;
paulo@0 374 }
paulo@0 375 #endif /* STANDALONE */