Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
diff src/trie.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/trie.c Sat Feb 20 21:18:28 2010 -0800 1.3 @@ -0,0 +1,375 @@ 1.4 +/* 1.5 + * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $ 1.6 + * 1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) 1.8 + * 1.9 + * This program is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License as published by the 1.11 + * Free Software Foundation; either version 2, or (at your option) any 1.12 + * later version. 1.13 + * 1.14 + * This program is distributed in the hope that it will be useful, but 1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + * General Public License for more details. 1.18 + */ 1.19 + 1.20 +#ifdef STANDALONE 1.21 +#include <libgift/libgift.h> 1.22 +#else 1.23 +#include "gt_gnutella.h" 1.24 +#endif /* STANDALONE */ 1.25 + 1.26 +#include "trie.h" 1.27 + 1.28 +/*****************************************************************************/ 1.29 + 1.30 +static Trie *trie_alloc (char c) 1.31 +{ 1.32 + Trie *trie; 1.33 + 1.34 + if (!(trie = MALLOC (sizeof (Trie)))) 1.35 + return NULL; 1.36 + 1.37 + trie->c = c; 1.38 + 1.39 + return trie; 1.40 +} 1.41 + 1.42 +Trie *trie_new (void) 1.43 +{ 1.44 + /* 1.45 + * Null won't match any character in the trie, so use 1.46 + * that as a sentinel in the root node. 1.47 + */ 1.48 + return trie_alloc (0); 1.49 +} 1.50 + 1.51 +static int free_children (Trie *trie, void *udata) 1.52 +{ 1.53 + trie_free (trie); 1.54 + return TRUE; 1.55 +} 1.56 + 1.57 +void trie_free (Trie *trie) 1.58 +{ 1.59 + List *children; 1.60 + 1.61 + if (!trie) 1.62 + return; 1.63 + 1.64 + children = trie->children; 1.65 + 1.66 + if (trie->terminal_node) 1.67 + { 1.68 + /* the first data item is the data item for this node */ 1.69 + children = list_remove_link (children, children); 1.70 + } 1.71 + 1.72 + list_foreach_remove (children, (ListForeachFunc)free_children, NULL); 1.73 + free (trie); 1.74 +} 1.75 + 1.76 +static Trie *find_node (Trie *trie, char c) 1.77 +{ 1.78 + List *children; 1.79 + List *ptr; 1.80 + 1.81 + children = trie->children; 1.82 + 1.83 + /* 1.84 + * If this is a terminal node, skip the data list 1.85 + * that is in the first position. 1.86 + */ 1.87 + if (trie->terminal_node) 1.88 + children = children->next; 1.89 + 1.90 + /* 1.91 + * Could use list_find_custom here, but we want 1.92 + * this to be as fast as possible. 1.93 + * 1.94 + * Should probably use a realloc'd array for the 1.95 + * child list instead. 1.96 + */ 1.97 + for (ptr = children; ptr; ptr = ptr->next) 1.98 + { 1.99 + Trie *node = ptr->data; 1.100 + 1.101 + if (node->c == c) 1.102 + break; 1.103 + } 1.104 + 1.105 +#undef TEST_MOVE 1.106 +#ifdef TEST_MOVE 1.107 + if (ptr) 1.108 + { 1.109 + void *data = ptr->data; 1.110 + int index; 1.111 + 1.112 + /* remove from the old position */ 1.113 + trie->children = list_remove_link (trie->children, ptr); 1.114 + 1.115 + /* insert at the beginning of the list, taking into account data 1.116 + * list at the head if this is a terminal node */ 1.117 + index = trie->terminal_node; 1.118 + trie->children = list_insert (trie->children, index, data); 1.119 + 1.120 + return data; 1.121 + } 1.122 + 1.123 + return NULL; 1.124 +#else 1.125 + /* we directly access result->data here for efficiency */ 1.126 + return ptr ? ptr->data : NULL; 1.127 +#endif 1.128 +} 1.129 + 1.130 +/* 1.131 + * Find a node in the Trie. 1.132 + * 1.133 + * @param trie The Trie to look in 1.134 + * @param s The string to search for 1.135 + * @param alloc Whether to allocate space for non-existent nodes during 1.136 + * lookup 1.137 + */ 1.138 +static Trie *t_node_lookup (Trie *trie, char *s, int alloc) 1.139 +{ 1.140 + Trie *result; 1.141 + char c; 1.142 + 1.143 + while ((c = *s++)) 1.144 + { 1.145 + if (!trie) 1.146 + break; 1.147 + 1.148 + result = find_node (trie, c); 1.149 + 1.150 + if (!result && alloc) 1.151 + { 1.152 + if (!(result = trie_alloc (c))) 1.153 + return NULL; 1.154 + 1.155 + trie->children = list_append (trie->children, result); 1.156 + } 1.157 + 1.158 + trie = result; 1.159 + } 1.160 + 1.161 + return trie; 1.162 +} 1.163 + 1.164 +void *trie_lookup (Trie *trie, char *s) 1.165 +{ 1.166 + Trie *node; 1.167 + 1.168 + node = t_node_lookup (trie, s, FALSE); 1.169 + 1.170 + if (!node) 1.171 + return NULL; 1.172 + 1.173 + if (node->terminal_node) 1.174 + return list_nth_data (node->children, 0); 1.175 + 1.176 + return NULL; 1.177 +} 1.178 + 1.179 +void trie_insert (Trie *trie, char *s, void *value) 1.180 +{ 1.181 + Trie *node; 1.182 + void *data; 1.183 + List *head; 1.184 + 1.185 + node = t_node_lookup (trie, s, TRUE); 1.186 + 1.187 + if (!node) 1.188 + { 1.189 + /* must be memory allocation error */ 1.190 + assert (0); 1.191 + return; 1.192 + } 1.193 + 1.194 + if (!node->terminal_node) 1.195 + { 1.196 + /* could be a mem allocation error here... */ 1.197 + node->children = list_prepend (node->children, value); 1.198 + 1.199 + /* convert this node to a terminal node, with the data list 1.200 + * in the first position of ->children */ 1.201 + node->terminal_node = TRUE; 1.202 + return; 1.203 + } 1.204 + 1.205 + /* 1.206 + * This should not happen unless the user didn't call 1.207 + * remove first. That may leak memory, so assert here temporarily. 1.208 + */ 1.209 + assert (0); 1.210 + 1.211 + head = list_nth (node->children, 0); 1.212 + data = list_nth_data (node->children, 0); 1.213 + 1.214 + /* 1.215 + * Remove the first item, then insert a new list. 1.216 + */ 1.217 + node->children = list_remove_link (node->children, head); 1.218 + 1.219 + /* insert the new data list back */ 1.220 + node->children = list_prepend (node->children, value); 1.221 +} 1.222 + 1.223 +int trie_is_empty (Trie *trie) 1.224 +{ 1.225 + if (trie->children == NULL) 1.226 + return TRUE; 1.227 + 1.228 + return FALSE; 1.229 +} 1.230 + 1.231 +static void remove_if_empty (Trie *root, Trie *child) 1.232 +{ 1.233 + if (!trie_is_empty (child)) 1.234 + return; 1.235 + 1.236 + root->children = list_remove (root->children, child); 1.237 + trie_free (child); 1.238 +} 1.239 + 1.240 +static void t_remove_node (Trie *trie) 1.241 +{ 1.242 + void *value; 1.243 + List *value_ptr; 1.244 + 1.245 + if (!trie->terminal_node) 1.246 + return; 1.247 + 1.248 +#if 0 1.249 + /* this assertion is broken due to duplicates */ 1.250 + assert (trie->terminal_node == TRUE); 1.251 +#endif 1.252 + 1.253 + value_ptr = list_nth (trie->children, 0); 1.254 + value = list_nth_data (trie->children, 0); 1.255 + 1.256 +#if 0 1.257 + /* these will falsely trigger for files that have the same 1.258 + * token in them twice, and end up getting removed twice */ 1.259 + assert (list_length (data_list) > 0); 1.260 + assert (list_find (data_list, value) != NULL); 1.261 +#endif 1.262 + 1.263 + /* remove the old data list */ 1.264 + trie->children = list_remove_link (trie->children, value_ptr); 1.265 + trie->terminal_node = FALSE; 1.266 +} 1.267 + 1.268 +void trie_remove (Trie *trie, char *s) 1.269 +{ 1.270 + Trie *child; 1.271 + 1.272 + /* remove the node if we found it */ 1.273 + if (string_isempty (s)) 1.274 + { 1.275 + t_remove_node (trie); 1.276 + return; 1.277 + } 1.278 + 1.279 + child = find_node (trie, *s); 1.280 + s++; 1.281 + 1.282 + if (!child) 1.283 + return; 1.284 +#if 0 1.285 + assert (child != NULL); 1.286 +#endif 1.287 + 1.288 + /* recursively remove all nodes */ 1.289 + trie_remove (child, s); 1.290 + 1.291 + /* remove this node if it has no children anymore */ 1.292 + remove_if_empty (trie, child); 1.293 +} 1.294 + 1.295 +static void print_children (List *children) 1.296 +{ 1.297 + List *ptr; 1.298 + int printed_open = FALSE; 1.299 + Trie *trie; 1.300 + 1.301 + for (ptr = children; ptr; ptr = list_next (ptr)) 1.302 + { 1.303 + if (!printed_open) 1.304 + { 1.305 + printf ("{ "); 1.306 + printed_open = TRUE; 1.307 + } 1.308 + 1.309 + trie = list_nth_data (ptr, 0); 1.310 + trie_print (trie); 1.311 + 1.312 + if (list_next (ptr)) 1.313 + printf(","); 1.314 + } 1.315 + 1.316 + if (children) 1.317 + printf (" }"); 1.318 +} 1.319 + 1.320 +void trie_print (Trie *trie) 1.321 +{ 1.322 + List *children; 1.323 + 1.324 + if (trie->c) 1.325 + printf ("%c", trie->c); 1.326 + 1.327 + children = trie->children; 1.328 + 1.329 + if (trie->terminal_node) 1.330 + { 1.331 + printf ("*"); 1.332 + children = list_next (children); 1.333 + } 1.334 + 1.335 + print_children (children); 1.336 +} 1.337 + 1.338 +#ifdef STANDALONE 1.339 +int main(int argc, char **argv) 1.340 +{ 1.341 + List *args = NULL; 1.342 + List *ptr; 1.343 + Trie *trie; 1.344 + int i; 1.345 + 1.346 + trie = trie_new (); 1.347 + 1.348 + for (i = 1; i < argc; i++) 1.349 + { 1.350 + trie_insert (trie, argv[i], argv[i]); 1.351 + args = list_prepend (args, argv[i]); 1.352 + } 1.353 + 1.354 + trie_print (trie); 1.355 + printf ("\n"); 1.356 + 1.357 + while ((ptr = args)) 1.358 + { 1.359 + trie_remove (trie, ptr->data); 1.360 + args = list_remove_link (args, ptr); 1.361 + } 1.362 + 1.363 + trie_insert (trie, "book", "book"); 1.364 + trie_insert (trie, "boo", "boo"); 1.365 + trie_print (trie); 1.366 + printf ("\n"); 1.367 + trie_remove (trie, "book"); 1.368 + trie_remove (trie, "boo"); 1.369 + 1.370 + trie_print (trie); 1.371 + printf ("\n"); 1.372 +#if 0 1.373 + trie_free (trie); 1.374 +#endif 1.375 + 1.376 + return 0; 1.377 +} 1.378 +#endif /* STANDALONE */