diff src/trie.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/trie.c	Sat Feb 20 21:18:28 2010 -0800
     1.3 @@ -0,0 +1,375 @@
     1.4 +/*
     1.5 + * $Id: trie.c,v 1.6 2005/01/04 14:58:42 mkern Exp $
     1.6 + *
     1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
     1.8 + *
     1.9 + * This program is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License as published by the
    1.11 + * Free Software Foundation; either version 2, or (at your option) any
    1.12 + * later version.
    1.13 + *
    1.14 + * This program is distributed in the hope that it will be useful, but
    1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of
    1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.17 + * General Public License for more details.
    1.18 + */
    1.19 +
    1.20 +#ifdef STANDALONE
    1.21 +#include <libgift/libgift.h>
    1.22 +#else
    1.23 +#include "gt_gnutella.h"
    1.24 +#endif /* STANDALONE */
    1.25 +
    1.26 +#include "trie.h"
    1.27 +
    1.28 +/*****************************************************************************/
    1.29 +
    1.30 +static Trie *trie_alloc (char c)
    1.31 +{
    1.32 +	Trie *trie;
    1.33 +
    1.34 +	if (!(trie = MALLOC (sizeof (Trie))))
    1.35 +		return NULL;
    1.36 +
    1.37 +	trie->c = c;
    1.38 +
    1.39 +	return trie;
    1.40 +}
    1.41 +
    1.42 +Trie *trie_new (void)
    1.43 +{
    1.44 +	/*
    1.45 +	 * Null won't match any character in the trie, so use 
    1.46 +	 * that as a sentinel in the root node.
    1.47 +	 */
    1.48 +	return trie_alloc (0);
    1.49 +}
    1.50 +
    1.51 +static int free_children (Trie *trie, void *udata)
    1.52 +{
    1.53 +	trie_free (trie);
    1.54 +	return TRUE;
    1.55 +}
    1.56 +		
    1.57 +void trie_free (Trie *trie)
    1.58 +{
    1.59 +	List *children;
    1.60 +
    1.61 +	if (!trie)
    1.62 +		return;
    1.63 +
    1.64 +	children = trie->children;
    1.65 +
    1.66 +	if (trie->terminal_node)
    1.67 +	{
    1.68 +		/* the first data item is the data item for this node */
    1.69 +		children = list_remove_link (children, children);
    1.70 +	}
    1.71 +
    1.72 +	list_foreach_remove (children, (ListForeachFunc)free_children, NULL);
    1.73 +	free (trie);
    1.74 +}
    1.75 +
    1.76 +static Trie *find_node (Trie *trie, char c)
    1.77 +{
    1.78 +	List   *children;
    1.79 +	List   *ptr;
    1.80 +
    1.81 +	children = trie->children;
    1.82 +
    1.83 +	/*
    1.84 +	 * If this is a terminal node, skip the data list
    1.85 +	 * that is in the first position.
    1.86 +	 */
    1.87 +	if (trie->terminal_node)
    1.88 +		children = children->next;
    1.89 +
    1.90 +	/*
    1.91 +	 * Could use list_find_custom here, but we want
    1.92 +	 * this to be as fast as possible.
    1.93 +	 *
    1.94 +	 * Should probably use a realloc'd array for the 
    1.95 +	 * child list instead.
    1.96 +	 */
    1.97 +	for (ptr = children; ptr; ptr = ptr->next)
    1.98 +	{
    1.99 +		Trie *node = ptr->data;
   1.100 +
   1.101 +		if (node->c == c)
   1.102 +			break;
   1.103 +	}
   1.104 +
   1.105 +#undef TEST_MOVE
   1.106 +#ifdef TEST_MOVE
   1.107 +	if (ptr)
   1.108 +	{
   1.109 +		void *data = ptr->data;
   1.110 +		int   index;
   1.111 +
   1.112 +		/* remove from the old position */
   1.113 +		trie->children = list_remove_link (trie->children, ptr);
   1.114 +
   1.115 +		/* insert at the beginning of the list, taking into account data
   1.116 +		 * list at the head if this is a terminal node */
   1.117 +		index = trie->terminal_node;
   1.118 +		trie->children = list_insert (trie->children, index, data);
   1.119 +
   1.120 +		return data;
   1.121 +	}
   1.122 +
   1.123 +	return NULL;
   1.124 +#else
   1.125 +	/* we directly access result->data here for efficiency */
   1.126 +	return ptr ? ptr->data : NULL;
   1.127 +#endif
   1.128 +}
   1.129 +
   1.130 +/*
   1.131 + * Find a node in the Trie. 
   1.132 + *
   1.133 + * @param trie  The Trie to look in
   1.134 + * @param s     The string to search for
   1.135 + * @param alloc Whether to allocate space for non-existent nodes during
   1.136 + *              lookup
   1.137 + */
   1.138 +static Trie *t_node_lookup (Trie *trie, char *s, int alloc)
   1.139 +{
   1.140 +	Trie   *result;
   1.141 +	char    c;
   1.142 +
   1.143 +	while ((c = *s++))
   1.144 +	{
   1.145 +		if (!trie)
   1.146 +			break;
   1.147 +
   1.148 +		result = find_node (trie, c);
   1.149 +
   1.150 +		if (!result && alloc)
   1.151 +		{
   1.152 +			if (!(result = trie_alloc (c)))
   1.153 +				return NULL;
   1.154 +
   1.155 +			trie->children = list_append (trie->children, result);
   1.156 +		}
   1.157 +
   1.158 +		trie = result;
   1.159 +	}
   1.160 +
   1.161 +	return trie;
   1.162 +}
   1.163 +
   1.164 +void *trie_lookup (Trie *trie, char *s)
   1.165 +{
   1.166 +	Trie *node;
   1.167 +
   1.168 +	node = t_node_lookup (trie, s, FALSE);
   1.169 +
   1.170 +	if (!node)
   1.171 +		return NULL;
   1.172 +
   1.173 +	if (node->terminal_node)
   1.174 +		return list_nth_data (node->children, 0);
   1.175 +
   1.176 +	return NULL;
   1.177 +}
   1.178 +
   1.179 +void trie_insert (Trie *trie, char *s, void *value)
   1.180 +{
   1.181 +	Trie   *node;
   1.182 +	void   *data;
   1.183 +	List   *head;
   1.184 +
   1.185 +	node = t_node_lookup (trie, s, TRUE);
   1.186 +
   1.187 +	if (!node)
   1.188 +	{
   1.189 +		/* must be memory allocation error */
   1.190 +		assert (0);
   1.191 +		return;
   1.192 +	}
   1.193 +
   1.194 +	if (!node->terminal_node)
   1.195 +	{
   1.196 +		/* could be a mem allocation error here... */
   1.197 +		node->children = list_prepend (node->children, value);
   1.198 +
   1.199 +		/* convert this node to a terminal node, with the data list
   1.200 +		 * in the first position of ->children */
   1.201 +		node->terminal_node = TRUE;
   1.202 +		return;
   1.203 +	}
   1.204 +	
   1.205 +	/* 
   1.206 +	 * This should not happen unless the user didn't call 
   1.207 +	 * remove first. That may leak memory, so assert here temporarily.
   1.208 +	 */
   1.209 +	assert (0);
   1.210 +
   1.211 +	head = list_nth (node->children, 0);
   1.212 +	data = list_nth_data (node->children, 0);
   1.213 +
   1.214 +	/* 
   1.215 +	 * Remove the first item, then insert a new list.
   1.216 +	 */
   1.217 +	node->children = list_remove_link (node->children, head);
   1.218 +
   1.219 +	/* insert the new data list back */
   1.220 +	node->children = list_prepend (node->children, value);
   1.221 +}
   1.222 +
   1.223 +int trie_is_empty (Trie *trie)
   1.224 +{
   1.225 +	if (trie->children == NULL)
   1.226 +		return TRUE;
   1.227 +
   1.228 +	return FALSE;
   1.229 +}
   1.230 +
   1.231 +static void remove_if_empty (Trie *root, Trie *child)
   1.232 +{
   1.233 +	if (!trie_is_empty (child))
   1.234 +		return;
   1.235 +
   1.236 +	root->children = list_remove (root->children, child);
   1.237 +	trie_free (child);
   1.238 +}
   1.239 +
   1.240 +static void t_remove_node (Trie *trie)
   1.241 +{
   1.242 +	void   *value;
   1.243 +	List   *value_ptr;
   1.244 +
   1.245 +	if (!trie->terminal_node)
   1.246 +		return;
   1.247 +
   1.248 +#if 0
   1.249 +	/* this assertion is broken due to duplicates */
   1.250 +	assert (trie->terminal_node == TRUE);
   1.251 +#endif
   1.252 +	
   1.253 +	value_ptr = list_nth (trie->children, 0);
   1.254 +	value     = list_nth_data (trie->children, 0);
   1.255 +
   1.256 +#if 0
   1.257 +	/* these will falsely trigger for files that have the same
   1.258 +	 * token in them twice, and end up getting removed twice */
   1.259 +	assert (list_length (data_list) > 0);
   1.260 +	assert (list_find (data_list, value) != NULL);
   1.261 +#endif
   1.262 +
   1.263 +	/* remove the old data list */
   1.264 +	trie->children      = list_remove_link (trie->children, value_ptr);
   1.265 +	trie->terminal_node = FALSE;
   1.266 +}
   1.267 +
   1.268 +void trie_remove (Trie *trie, char *s)
   1.269 +{
   1.270 +	Trie *child;
   1.271 +
   1.272 +	/* remove the node if we found it */
   1.273 +	if (string_isempty (s))
   1.274 +	{
   1.275 +		t_remove_node (trie);
   1.276 +		return;
   1.277 +	}
   1.278 +
   1.279 +	child = find_node (trie, *s);
   1.280 +	s++;
   1.281 +
   1.282 +	if (!child)
   1.283 +		return;
   1.284 +#if 0
   1.285 +	assert (child != NULL);
   1.286 +#endif
   1.287 +
   1.288 +	/* recursively remove all nodes */
   1.289 +	trie_remove (child, s);
   1.290 +
   1.291 +	/* remove this node if it has no children anymore */
   1.292 +	remove_if_empty (trie, child);
   1.293 +}
   1.294 +
   1.295 +static void print_children (List *children)
   1.296 +{
   1.297 +	List   *ptr;
   1.298 +	int     printed_open = FALSE;
   1.299 +	Trie   *trie;
   1.300 +
   1.301 +	for (ptr = children; ptr; ptr = list_next (ptr))
   1.302 +	{
   1.303 +		if (!printed_open)
   1.304 +		{
   1.305 +			printf ("{ ");
   1.306 +			printed_open = TRUE;
   1.307 +		}
   1.308 +
   1.309 +		trie = list_nth_data (ptr, 0);
   1.310 +		trie_print (trie);
   1.311 +
   1.312 +		if (list_next (ptr))
   1.313 +			printf(",");
   1.314 +	}
   1.315 +
   1.316 +	if (children)
   1.317 +		printf (" }");
   1.318 +}
   1.319 +
   1.320 +void trie_print (Trie *trie)
   1.321 +{
   1.322 +	List *children;
   1.323 +
   1.324 +	if (trie->c)
   1.325 +		printf ("%c", trie->c);
   1.326 +
   1.327 +	children = trie->children;
   1.328 +
   1.329 +	if (trie->terminal_node)
   1.330 +	{
   1.331 +		printf ("*");
   1.332 +		children = list_next (children);
   1.333 +	}
   1.334 +
   1.335 +	print_children (children);
   1.336 +}
   1.337 +
   1.338 +#ifdef STANDALONE
   1.339 +int main(int argc, char **argv)
   1.340 +{
   1.341 +	List *args = NULL;
   1.342 +	List *ptr;
   1.343 +	Trie *trie;
   1.344 +	int   i;
   1.345 +
   1.346 +	trie = trie_new ();
   1.347 +
   1.348 +	for (i = 1; i < argc; i++)
   1.349 +	{
   1.350 +		trie_insert (trie, argv[i], argv[i]);
   1.351 +		args = list_prepend (args, argv[i]);
   1.352 +	}
   1.353 +
   1.354 +	trie_print (trie);
   1.355 +	printf ("\n");
   1.356 +
   1.357 +	while ((ptr = args))
   1.358 +	{
   1.359 +		trie_remove (trie, ptr->data);
   1.360 +		args = list_remove_link (args, ptr);
   1.361 +	}
   1.362 +
   1.363 +	trie_insert (trie, "book", "book");
   1.364 +	trie_insert (trie, "boo", "boo");
   1.365 +	trie_print (trie);
   1.366 +	printf ("\n");
   1.367 +	trie_remove (trie, "book");
   1.368 +	trie_remove (trie, "boo");
   1.369 +
   1.370 +	trie_print (trie);
   1.371 +	printf ("\n");
   1.372 +#if 0
   1.373 +	trie_free (trie);
   1.374 +#endif
   1.375 +
   1.376 +	return 0;
   1.377 +}
   1.378 +#endif /* STANDALONE */