paulo@0: /* paulo@0: * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $ paulo@0: * paulo@0: * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) paulo@0: * paulo@0: * This program is free software; you can redistribute it and/or modify it paulo@0: * under the terms of the GNU General Public License as published by the paulo@0: * Free Software Foundation; either version 2, or (at your option) any paulo@0: * later version. paulo@0: * paulo@0: * This program is distributed in the hope that it will be useful, but paulo@0: * WITHOUT ANY WARRANTY; without even the implied warranty of paulo@0: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU paulo@0: * General Public License for more details. paulo@0: */ paulo@0: paulo@0: #ifdef STANDALONE paulo@0: #include paulo@0: #else paulo@0: #include "gt_gnutella.h" paulo@0: #endif /* STANDALONE */ paulo@0: paulo@0: #include "trie.h" paulo@0: paulo@0: /*****************************************************************************/ paulo@0: paulo@0: static Trie *trie_alloc (char c) paulo@0: { paulo@0: Trie *trie; paulo@0: paulo@0: if (!(trie = MALLOC (sizeof (Trie)))) paulo@0: return NULL; paulo@0: paulo@0: trie->c = c; paulo@0: paulo@0: return trie; paulo@0: } paulo@0: paulo@0: Trie *trie_new (void) paulo@0: { paulo@0: /* paulo@0: * Null won't match any character in the trie, so use paulo@0: * that as a sentinel in the root node. paulo@0: */ paulo@0: return trie_alloc (0); paulo@0: } paulo@0: paulo@0: static int free_children (Trie *trie, void *udata) paulo@0: { paulo@0: trie_free (trie); paulo@0: return TRUE; paulo@0: } paulo@0: paulo@0: void trie_free (Trie *trie) paulo@0: { paulo@0: List *children; paulo@0: paulo@0: if (!trie) paulo@0: return; paulo@0: paulo@0: children = trie->children; paulo@0: paulo@0: if (trie->terminal_node) paulo@0: { paulo@0: /* the first data item is the data item for this node */ paulo@0: children = list_remove_link (children, children); paulo@0: } paulo@0: paulo@0: list_foreach_remove (children, (ListForeachFunc)free_children, NULL); paulo@0: free (trie); paulo@0: } paulo@0: paulo@0: static Trie *find_node (Trie *trie, char c) paulo@0: { paulo@0: List *children; paulo@0: List *ptr; paulo@0: paulo@0: children = trie->children; paulo@0: paulo@0: /* paulo@0: * If this is a terminal node, skip the data list paulo@0: * that is in the first position. paulo@0: */ paulo@0: if (trie->terminal_node) paulo@0: children = children->next; paulo@0: paulo@0: /* paulo@0: * Could use list_find_custom here, but we want paulo@0: * this to be as fast as possible. paulo@0: * paulo@0: * Should probably use a realloc'd array for the paulo@0: * child list instead. paulo@0: */ paulo@0: for (ptr = children; ptr; ptr = ptr->next) paulo@0: { paulo@0: Trie *node = ptr->data; paulo@0: paulo@0: if (node->c == c) paulo@0: break; paulo@0: } paulo@0: paulo@0: #undef TEST_MOVE paulo@0: #ifdef TEST_MOVE paulo@0: if (ptr) paulo@0: { paulo@0: void *data = ptr->data; paulo@0: int index; paulo@0: paulo@0: /* remove from the old position */ paulo@0: trie->children = list_remove_link (trie->children, ptr); paulo@0: paulo@0: /* insert at the beginning of the list, taking into account data paulo@0: * list at the head if this is a terminal node */ paulo@0: index = trie->terminal_node; paulo@0: trie->children = list_insert (trie->children, index, data); paulo@0: paulo@0: return data; paulo@0: } paulo@0: paulo@0: return NULL; paulo@0: #else paulo@0: /* we directly access result->data here for efficiency */ paulo@0: return ptr ? ptr->data : NULL; paulo@0: #endif paulo@0: } paulo@0: paulo@0: /* paulo@0: * Find a node in the Trie. paulo@0: * paulo@0: * @param trie The Trie to look in paulo@0: * @param s The string to search for paulo@0: * @param alloc Whether to allocate space for non-existent nodes during paulo@0: * lookup paulo@0: */ paulo@0: static Trie *t_node_lookup (Trie *trie, char *s, int alloc) paulo@0: { paulo@0: Trie *result; paulo@0: char c; paulo@0: paulo@0: while ((c = *s++)) paulo@0: { paulo@0: if (!trie) paulo@0: break; paulo@0: paulo@0: result = find_node (trie, c); paulo@0: paulo@0: if (!result && alloc) paulo@0: { paulo@0: if (!(result = trie_alloc (c))) paulo@0: return NULL; paulo@0: paulo@0: trie->children = list_append (trie->children, result); paulo@0: } paulo@0: paulo@0: trie = result; paulo@0: } paulo@0: paulo@0: return trie; paulo@0: } paulo@0: paulo@0: void *trie_lookup (Trie *trie, char *s) paulo@0: { paulo@0: Trie *node; paulo@0: paulo@0: node = t_node_lookup (trie, s, FALSE); paulo@0: paulo@0: if (!node) paulo@0: return NULL; paulo@0: paulo@0: if (node->terminal_node) paulo@0: return list_nth_data (node->children, 0); paulo@0: paulo@0: return NULL; paulo@0: } paulo@0: paulo@0: void trie_insert (Trie *trie, char *s, void *value) paulo@0: { paulo@0: Trie *node; paulo@0: void *data; paulo@0: List *head; paulo@0: paulo@0: node = t_node_lookup (trie, s, TRUE); paulo@0: paulo@0: if (!node) paulo@0: { paulo@0: /* must be memory allocation error */ paulo@0: assert (0); paulo@0: return; paulo@0: } paulo@0: paulo@0: if (!node->terminal_node) paulo@0: { paulo@0: /* could be a mem allocation error here... */ paulo@0: node->children = list_prepend (node->children, value); paulo@0: paulo@0: /* convert this node to a terminal node, with the data list paulo@0: * in the first position of ->children */ paulo@0: node->terminal_node = TRUE; paulo@0: return; paulo@0: } paulo@0: paulo@0: /* paulo@0: * This should not happen unless the user didn't call paulo@0: * remove first. That may leak memory, so assert here temporarily. paulo@0: */ paulo@0: assert (0); paulo@0: paulo@0: head = list_nth (node->children, 0); paulo@0: data = list_nth_data (node->children, 0); paulo@0: paulo@0: /* paulo@0: * Remove the first item, then insert a new list. paulo@0: */ paulo@0: node->children = list_remove_link (node->children, head); paulo@0: paulo@0: /* insert the new data list back */ paulo@0: node->children = list_prepend (node->children, value); paulo@0: } paulo@0: paulo@0: int trie_is_empty (Trie *trie) paulo@0: { paulo@0: if (trie->children == NULL) paulo@0: return TRUE; paulo@0: paulo@0: return FALSE; paulo@0: } paulo@0: paulo@0: static void remove_if_empty (Trie *root, Trie *child) paulo@0: { paulo@0: if (!trie_is_empty (child)) paulo@0: return; paulo@0: paulo@0: root->children = list_remove (root->children, child); paulo@0: trie_free (child); paulo@0: } paulo@0: paulo@0: static void t_remove_node (Trie *trie) paulo@0: { paulo@0: void *value; paulo@0: List *value_ptr; paulo@0: paulo@0: if (!trie->terminal_node) paulo@0: return; paulo@0: paulo@0: #if 0 paulo@0: /* this assertion is broken due to duplicates */ paulo@0: assert (trie->terminal_node == TRUE); paulo@0: #endif paulo@0: paulo@0: value_ptr = list_nth (trie->children, 0); paulo@0: value = list_nth_data (trie->children, 0); paulo@0: paulo@0: #if 0 paulo@0: /* these will falsely trigger for files that have the same paulo@0: * token in them twice, and end up getting removed twice */ paulo@0: assert (list_length (data_list) > 0); paulo@0: assert (list_find (data_list, value) != NULL); paulo@0: #endif paulo@0: paulo@0: /* remove the old data list */ paulo@0: trie->children = list_remove_link (trie->children, value_ptr); paulo@0: trie->terminal_node = FALSE; paulo@0: } paulo@0: paulo@0: void trie_remove (Trie *trie, char *s) paulo@0: { paulo@0: Trie *child; paulo@0: paulo@0: /* remove the node if we found it */ paulo@0: if (string_isempty (s)) paulo@0: { paulo@0: t_remove_node (trie); paulo@0: return; paulo@0: } paulo@0: paulo@0: child = find_node (trie, *s); paulo@0: s++; paulo@0: paulo@0: if (!child) paulo@0: return; paulo@0: #if 0 paulo@0: assert (child != NULL); paulo@0: #endif paulo@0: paulo@0: /* recursively remove all nodes */ paulo@0: trie_remove (child, s); paulo@0: paulo@0: /* remove this node if it has no children anymore */ paulo@0: remove_if_empty (trie, child); paulo@0: } paulo@0: paulo@0: static void print_children (List *children) paulo@0: { paulo@0: List *ptr; paulo@0: int printed_open = FALSE; paulo@0: Trie *trie; paulo@0: paulo@0: for (ptr = children; ptr; ptr = list_next (ptr)) paulo@0: { paulo@0: if (!printed_open) paulo@0: { paulo@0: printf ("{ "); paulo@0: printed_open = TRUE; paulo@0: } paulo@0: paulo@0: trie = list_nth_data (ptr, 0); paulo@0: trie_print (trie); paulo@0: paulo@0: if (list_next (ptr)) paulo@0: printf(","); paulo@0: } paulo@0: paulo@0: if (children) paulo@0: printf (" }"); paulo@0: } paulo@0: paulo@0: void trie_print (Trie *trie) paulo@0: { paulo@0: List *children; paulo@0: paulo@0: if (trie->c) paulo@0: printf ("%c", trie->c); paulo@0: paulo@0: children = trie->children; paulo@0: paulo@0: if (trie->terminal_node) paulo@0: { paulo@0: printf ("*"); paulo@0: children = list_next (children); paulo@0: } paulo@0: paulo@0: print_children (children); paulo@0: } paulo@0: paulo@0: #ifdef STANDALONE paulo@0: int main(int argc, char **argv) paulo@0: { paulo@0: List *args = NULL; paulo@0: List *ptr; paulo@0: Trie *trie; paulo@0: int i; paulo@0: paulo@0: trie = trie_new (); paulo@0: paulo@0: for (i = 1; i < argc; i++) paulo@0: { paulo@0: trie_insert (trie, argv[i], argv[i]); paulo@0: args = list_prepend (args, argv[i]); paulo@0: } paulo@0: paulo@0: trie_print (trie); paulo@0: printf ("\n"); paulo@0: paulo@0: while ((ptr = args)) paulo@0: { paulo@0: trie_remove (trie, ptr->data); paulo@0: args = list_remove_link (args, ptr); paulo@0: } paulo@0: paulo@0: trie_insert (trie, "book", "book"); paulo@0: trie_insert (trie, "boo", "boo"); paulo@0: trie_print (trie); paulo@0: printf ("\n"); paulo@0: trie_remove (trie, "book"); paulo@0: trie_remove (trie, "boo"); paulo@0: paulo@0: trie_print (trie); paulo@0: printf ("\n"); paulo@0: #if 0 paulo@0: trie_free (trie); paulo@0: #endif paulo@0: paulo@0: return 0; paulo@0: } paulo@0: #endif /* STANDALONE */