diff src/xml.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/xml.c	Sat Feb 20 21:18:28 2010 -0800
     1.3 @@ -0,0 +1,384 @@
     1.4 +/*
     1.5 + * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $
     1.6 + *
     1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
     1.8 + *
     1.9 + * This program is free software; you can redistribute it and/or modify it
    1.10 + * under the terms of the GNU General Public License as published by the
    1.11 + * Free Software Foundation; either version 2, or (at your option) any
    1.12 + * later version.
    1.13 + *
    1.14 + * This program is distributed in the hope that it will be useful, but
    1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of
    1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    1.17 + * General Public License for more details.
    1.18 + */
    1.19 +
    1.20 +#include "gt_gnutella.h"
    1.21 +
    1.22 +#include <zlib.h>
    1.23 +
    1.24 +#ifdef USE_LIBXML2
    1.25 +#include <libxml/parser.h>         /* xmlParseMemory() */
    1.26 +#include <libxml/xmlerror.h>       /* xmlSetGenericErrorFunc() */
    1.27 +#endif /* USE_LIBXML2 */
    1.28 +
    1.29 +#include "xml.h"
    1.30 +
    1.31 +/*****************************************************************************/
    1.32 +
    1.33 +#ifndef USE_LIBXML2
    1.34 +BOOL gt_xml_parse (const char *xml, Dataset **ret)
    1.35 +{
    1.36 +	return FALSE;
    1.37 +}
    1.38 +
    1.39 +BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
    1.40 +                           size_t shares_len)
    1.41 +{
    1.42 +	return FALSE;
    1.43 +}
    1.44 +#endif /* !USE_LIBXML2 */
    1.45 +
    1.46 +/*****************************************************************************/
    1.47 +
    1.48 +/* the rest of this file is conditional on using libxml */
    1.49 +#ifdef USE_LIBXML2
    1.50 +
    1.51 +/*****************************************************************************/
    1.52 +
    1.53 +#define MAX_XML_BUFSIZE  65536
    1.54 +
    1.55 +static char      *xml_buf;     /* for decompressing xml */
    1.56 +static size_t     xml_buf_size;
    1.57 +static z_stream   zxml;
    1.58 +
    1.59 +/*****************************************************************************/
    1.60 +
    1.61 +static void print_nodes (xmlNodePtr node, Dataset **ret)
    1.62 +{
    1.63 +	while (node != NULL)
    1.64 +	{
    1.65 +		/*
    1.66 +		 * If this node has no children, it is a leaf node,
    1.67 +		 * so set the metadata from it.
    1.68 +		 */
    1.69 +		if (node->xmlChildrenNode)
    1.70 +			print_nodes (node->xmlChildrenNode, ret);
    1.71 +		else
    1.72 +			GT->DBGFN (GT, "name=%s", node->name);
    1.73 +
    1.74 +		node = node->next;
    1.75 +	}
    1.76 +}
    1.77 +
    1.78 +BOOL gt_xml_parse (const char *xml, Dataset **ret)
    1.79 +{
    1.80 +	xmlDocPtr doc;
    1.81 +
    1.82 +	/* disable for now because it doesn't work anyway: need to share
    1.83 +	 * code with parse_indexed */
    1.84 +	if (!XML_DEBUG)
    1.85 +		return FALSE;
    1.86 +
    1.87 +	/* only parse documents starting with '<' */
    1.88 +	if (!xml || xml[0] != '<')
    1.89 +		return FALSE;
    1.90 +
    1.91 +	if (!(doc = xmlParseMemory (xml, strlen (xml))))
    1.92 +		return FALSE;
    1.93 +
    1.94 +	print_nodes (doc->xmlChildrenNode, ret);
    1.95 +
    1.96 +	xmlFreeDoc (doc);
    1.97 +
    1.98 +	return TRUE;
    1.99 +}
   1.100 +
   1.101 +static void add_child (Dataset **children, const char *key, const char *value)
   1.102 +{
   1.103 +	char *dup = NULL;
   1.104 +
   1.105 +	if (!key || !value)
   1.106 +		return;
   1.107 +
   1.108 +	/*
   1.109 +	 * Hack to map some of the attributes from XML documents found
   1.110 +	 * on Gnutella to ones peddled by giFT.
   1.111 +	 */
   1.112 +	if (!strcasecmp (key, "bitrate"))
   1.113 +	{
   1.114 +		dup = stringf_dup ("%s000", value);
   1.115 +		value = dup;
   1.116 +	}
   1.117 +	else if (!strcasecmp (key, "seconds"))
   1.118 +	{
   1.119 +		key = "duration";
   1.120 +	}
   1.121 +
   1.122 +	dataset_insertstr (children, key, value);
   1.123 +	free (dup);
   1.124 +}
   1.125 +
   1.126 +static Dataset *collect_attributes (xmlNode *node)
   1.127 +{
   1.128 +	const xmlAttr *attr;
   1.129 +	Dataset       *children = NULL;
   1.130 +	BOOL           do_log   = XML_DEBUG;
   1.131 +
   1.132 +	for (attr = node->properties; attr != NULL; attr = attr->next)
   1.133 +	{
   1.134 +		xmlChar *str;
   1.135 +
   1.136 +		/* is there an easier way to get attribute content? */
   1.137 +		str = xmlGetProp (node, attr->name);
   1.138 +
   1.139 +		if (do_log)
   1.140 +		{
   1.141 +			GT->dbg (GT, "name=%s content=%s",
   1.142 +			         (const char *)attr->name, (const char *)str);
   1.143 +		}
   1.144 +
   1.145 +		/* add the key->value pair to the dataset */
   1.146 +		add_child (&children, (const char *)attr->name,
   1.147 +		           (const char *)str);
   1.148 +
   1.149 +		/* xmlGetProp() allocates memory */
   1.150 +		free (str);
   1.151 +	}
   1.152 +
   1.153 +	return children;
   1.154 +}
   1.155 +
   1.156 +static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share)
   1.157 +{
   1.158 +	char *meta_key = key->data;
   1.159 +	char *meta_val = value->data;
   1.160 +
   1.161 +	share_set_meta (share, meta_key, meta_val);
   1.162 +}
   1.163 +
   1.164 +static void set_share_meta (Share **shares, size_t shares_len,
   1.165 +                            Dataset *children)
   1.166 +{
   1.167 +	char      *index_str;
   1.168 +	size_t     index;
   1.169 +
   1.170 +	/*
   1.171 +	 * Lookup the "index" attribute, and use that to determine
   1.172 +	 * which Share the XML applies to.
   1.173 +	 */
   1.174 +	if (!(index_str = dataset_lookupstr (children, "index")))
   1.175 +		return;
   1.176 +
   1.177 +	index = gift_strtoul (index_str);
   1.178 +
   1.179 +	if (index >= shares_len)
   1.180 +		return;
   1.181 +
   1.182 +	if (!shares[index])
   1.183 +		return;
   1.184 +
   1.185 +	/* skip the index attribute */
   1.186 +	dataset_removestr (children, "index");
   1.187 +
   1.188 +	dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]);
   1.189 +}
   1.190 +
   1.191 +static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len,
   1.192 +                                           xmlDoc *doc)
   1.193 +{
   1.194 +	xmlNode *node;
   1.195 +
   1.196 +	if (!(node = xmlDocGetRootElement (doc)))
   1.197 +		return;
   1.198 +
   1.199 +	for (node = node->xmlChildrenNode; node != NULL; node = node->next)
   1.200 +	{
   1.201 +		Dataset *children;
   1.202 +
   1.203 +		children = collect_attributes (node);
   1.204 +
   1.205 +		set_share_meta (shares, shares_len, children);
   1.206 +		dataset_clear (children);
   1.207 +	}
   1.208 +}
   1.209 +
   1.210 +static int try_inflate_xml (const char *xml, size_t bin_len)
   1.211 +{
   1.212 +	int ret;
   1.213 +
   1.214 +	/* set zlib allocation data */
   1.215 +	zxml.zalloc    = Z_NULL;
   1.216 +	zxml.zfree     = Z_NULL;
   1.217 +	zxml.opaque    = Z_NULL;
   1.218 +
   1.219 +	/* set the input parameters */
   1.220 +	zxml.next_in   = (char *)xml;
   1.221 +	zxml.avail_in  = bin_len;
   1.222 +
   1.223 +	/* set the output parameters */
   1.224 +	zxml.next_out  = xml_buf;
   1.225 +	zxml.avail_out = xml_buf_size - 1;
   1.226 +
   1.227 +	if ((ret = inflateInit (&zxml)) != Z_OK)
   1.228 +		return ret;
   1.229 +
   1.230 +	ret = inflate (&zxml, Z_FINISH);
   1.231 +	inflateEnd (&zxml);
   1.232 +
   1.233 +	return ret;
   1.234 +}
   1.235 +
   1.236 +static const char *inflate_xml (const char *xml, size_t bin_len)
   1.237 +{
   1.238 +	size_t xml_len;
   1.239 +	int    ret;
   1.240 +
   1.241 +	ret = try_inflate_xml (xml, bin_len);
   1.242 +
   1.243 +	if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE)
   1.244 +	{
   1.245 +		size_t newsize = xml_buf_size * 2;
   1.246 +		char  *newbuf;
   1.247 +
   1.248 +		if (!(newbuf = realloc (xml_buf, newsize)))
   1.249 +			return NULL;
   1.250 +
   1.251 +		xml_buf      = newbuf;
   1.252 +		xml_buf_size = newsize;
   1.253 +
   1.254 +		/* retry with bigger buffer */
   1.255 +		return inflate_xml (xml, bin_len);
   1.256 +	}
   1.257 +
   1.258 +	if (ret != Z_STREAM_END)
   1.259 +		return NULL;
   1.260 +
   1.261 +	/* null terminate (the now hopefully plaintext) XML */
   1.262 +	xml_len = (xml_buf_size - 1) - zxml.avail_out;
   1.263 +	xml_buf[xml_len] = 0;
   1.264 +
   1.265 +	if (XML_DEBUG)
   1.266 +		GT->dbg (GT, "inflated xml: %s", xml_buf);
   1.267 +
   1.268 +	return xml_buf;
   1.269 +}
   1.270 +
   1.271 +BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
   1.272 +                           size_t shares_len)
   1.273 +{
   1.274 +	xmlDoc     *doc;
   1.275 +	size_t      xml_len;
   1.276 +	const char *next;
   1.277 +	const char *ptr;
   1.278 +
   1.279 +	if (!xml || bin_len <= 4)
   1.280 +		return FALSE;
   1.281 +
   1.282 +	/*
   1.283 +	 * Look for the encoding type, currently possible
   1.284 +	 * encoding values are: "{}" meaning plain text, "{plaintext}",
   1.285 +	 * and "{deflate}".
   1.286 +	 */
   1.287 +
   1.288 +	if (!strncmp (xml, "{}", 2))
   1.289 +	{
   1.290 +		xml += 2;
   1.291 +	}
   1.292 +	else if (bin_len >= sizeof("{plaintext}") - 1 &&
   1.293 +	         !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1))
   1.294 +	{
   1.295 +		xml += sizeof("{plaintext}") - 1;
   1.296 +	}
   1.297 +	else if (bin_len >= sizeof("{deflate}") - 1 &&
   1.298 +	         !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1))
   1.299 +	{
   1.300 +		/* the len passed here should be bin_len - 1, but some servents (MRPH)
   1.301 +		 * don't terminate the XML */
   1.302 +		xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len);
   1.303 +
   1.304 +		if (XML_DEBUG)
   1.305 +			assert (xml != NULL);    /* assume valid input */
   1.306 +
   1.307 +		if (!xml)
   1.308 +			return FALSE;
   1.309 +	}
   1.310 +
   1.311 +	xml_len = strlen (xml);
   1.312 +
   1.313 +	/*
   1.314 +	 * The XML block is a sequence of XML documents, separated by the <?xml
   1.315 +	 * version="1.0"> document prefix.  Parse each one separately.
   1.316 +	 */
   1.317 +	for (ptr = xml; ptr != NULL; ptr = next)
   1.318 +	{
   1.319 +		size_t chunk_len;
   1.320 +
   1.321 +		if (ptr[0] != '<')
   1.322 +			return FALSE;
   1.323 +
   1.324 +		next = strstr (ptr + 1, "<?xml");
   1.325 +
   1.326 +		chunk_len = xml_len;
   1.327 +		if (next)
   1.328 +			chunk_len = next - ptr;
   1.329 +
   1.330 +		if (!(doc = xmlParseMemory (ptr, chunk_len)))
   1.331 +			return FALSE;
   1.332 +
   1.333 +		xml_len -= chunk_len;
   1.334 +
   1.335 +		set_metadata_from_indexed_xml (shares, shares_len, doc);
   1.336 +		xmlFreeDoc (doc);
   1.337 +	}
   1.338 +
   1.339 +	return TRUE;
   1.340 +}
   1.341 +
   1.342 +/* gets called when there are parsing errors */
   1.343 +static void error_handler_func (void *udata, const char *msg, ...)
   1.344 +{
   1.345 +	char     buf[1024];
   1.346 +	va_list  args;
   1.347 +
   1.348 +	/* this is here until i figure out why i get a message about
   1.349 +	 * namespace errors (but it still seems to work...) */
   1.350 +	if (!XML_DEBUG)
   1.351 +		return;
   1.352 +
   1.353 +	va_start (args, msg);
   1.354 +	vsnprintf (buf, sizeof (buf) - 1, msg, args);
   1.355 +	va_end (args);
   1.356 +
   1.357 +	GT->DBGFN (GT, "xml parse error: %s", buf);
   1.358 +}
   1.359 +
   1.360 +/*****************************************************************************/
   1.361 +
   1.362 +#endif /* USE_LIBXML2 */
   1.363 +
   1.364 +/*****************************************************************************/
   1.365 +
   1.366 +void gt_xml_init (void)
   1.367 +{
   1.368 +#ifdef USE_LIBXML2
   1.369 +	/* so libxml doesn't print messages on stderr */
   1.370 +	xmlSetGenericErrorFunc (NULL, error_handler_func);
   1.371 +
   1.372 +	xml_buf = malloc (32);
   1.373 +	assert (xml_buf != NULL);
   1.374 +	xml_buf_size = 32;
   1.375 +
   1.376 +	memset (&zxml, 0, sizeof (zxml));
   1.377 +#endif /* USE_LIBXML2 */
   1.378 +}
   1.379 +
   1.380 +void gt_xml_cleanup (void)
   1.381 +{
   1.382 +#ifdef USE_LIBXML2
   1.383 +	free (xml_buf);
   1.384 +	xml_buf      = NULL;
   1.385 +	xml_buf_size = 0;
   1.386 +#endif /* USE_LIBXML2 */
   1.387 +}