Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
diff src/xml.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/xml.c Sat Feb 20 21:18:28 2010 -0800 1.3 @@ -0,0 +1,384 @@ 1.4 +/* 1.5 + * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $ 1.6 + * 1.7 + * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) 1.8 + * 1.9 + * This program is free software; you can redistribute it and/or modify it 1.10 + * under the terms of the GNU General Public License as published by the 1.11 + * Free Software Foundation; either version 2, or (at your option) any 1.12 + * later version. 1.13 + * 1.14 + * This program is distributed in the hope that it will be useful, but 1.15 + * WITHOUT ANY WARRANTY; without even the implied warranty of 1.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1.17 + * General Public License for more details. 1.18 + */ 1.19 + 1.20 +#include "gt_gnutella.h" 1.21 + 1.22 +#include <zlib.h> 1.23 + 1.24 +#ifdef USE_LIBXML2 1.25 +#include <libxml/parser.h> /* xmlParseMemory() */ 1.26 +#include <libxml/xmlerror.h> /* xmlSetGenericErrorFunc() */ 1.27 +#endif /* USE_LIBXML2 */ 1.28 + 1.29 +#include "xml.h" 1.30 + 1.31 +/*****************************************************************************/ 1.32 + 1.33 +#ifndef USE_LIBXML2 1.34 +BOOL gt_xml_parse (const char *xml, Dataset **ret) 1.35 +{ 1.36 + return FALSE; 1.37 +} 1.38 + 1.39 +BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares, 1.40 + size_t shares_len) 1.41 +{ 1.42 + return FALSE; 1.43 +} 1.44 +#endif /* !USE_LIBXML2 */ 1.45 + 1.46 +/*****************************************************************************/ 1.47 + 1.48 +/* the rest of this file is conditional on using libxml */ 1.49 +#ifdef USE_LIBXML2 1.50 + 1.51 +/*****************************************************************************/ 1.52 + 1.53 +#define MAX_XML_BUFSIZE 65536 1.54 + 1.55 +static char *xml_buf; /* for decompressing xml */ 1.56 +static size_t xml_buf_size; 1.57 +static z_stream zxml; 1.58 + 1.59 +/*****************************************************************************/ 1.60 + 1.61 +static void print_nodes (xmlNodePtr node, Dataset **ret) 1.62 +{ 1.63 + while (node != NULL) 1.64 + { 1.65 + /* 1.66 + * If this node has no children, it is a leaf node, 1.67 + * so set the metadata from it. 1.68 + */ 1.69 + if (node->xmlChildrenNode) 1.70 + print_nodes (node->xmlChildrenNode, ret); 1.71 + else 1.72 + GT->DBGFN (GT, "name=%s", node->name); 1.73 + 1.74 + node = node->next; 1.75 + } 1.76 +} 1.77 + 1.78 +BOOL gt_xml_parse (const char *xml, Dataset **ret) 1.79 +{ 1.80 + xmlDocPtr doc; 1.81 + 1.82 + /* disable for now because it doesn't work anyway: need to share 1.83 + * code with parse_indexed */ 1.84 + if (!XML_DEBUG) 1.85 + return FALSE; 1.86 + 1.87 + /* only parse documents starting with '<' */ 1.88 + if (!xml || xml[0] != '<') 1.89 + return FALSE; 1.90 + 1.91 + if (!(doc = xmlParseMemory (xml, strlen (xml)))) 1.92 + return FALSE; 1.93 + 1.94 + print_nodes (doc->xmlChildrenNode, ret); 1.95 + 1.96 + xmlFreeDoc (doc); 1.97 + 1.98 + return TRUE; 1.99 +} 1.100 + 1.101 +static void add_child (Dataset **children, const char *key, const char *value) 1.102 +{ 1.103 + char *dup = NULL; 1.104 + 1.105 + if (!key || !value) 1.106 + return; 1.107 + 1.108 + /* 1.109 + * Hack to map some of the attributes from XML documents found 1.110 + * on Gnutella to ones peddled by giFT. 1.111 + */ 1.112 + if (!strcasecmp (key, "bitrate")) 1.113 + { 1.114 + dup = stringf_dup ("%s000", value); 1.115 + value = dup; 1.116 + } 1.117 + else if (!strcasecmp (key, "seconds")) 1.118 + { 1.119 + key = "duration"; 1.120 + } 1.121 + 1.122 + dataset_insertstr (children, key, value); 1.123 + free (dup); 1.124 +} 1.125 + 1.126 +static Dataset *collect_attributes (xmlNode *node) 1.127 +{ 1.128 + const xmlAttr *attr; 1.129 + Dataset *children = NULL; 1.130 + BOOL do_log = XML_DEBUG; 1.131 + 1.132 + for (attr = node->properties; attr != NULL; attr = attr->next) 1.133 + { 1.134 + xmlChar *str; 1.135 + 1.136 + /* is there an easier way to get attribute content? */ 1.137 + str = xmlGetProp (node, attr->name); 1.138 + 1.139 + if (do_log) 1.140 + { 1.141 + GT->dbg (GT, "name=%s content=%s", 1.142 + (const char *)attr->name, (const char *)str); 1.143 + } 1.144 + 1.145 + /* add the key->value pair to the dataset */ 1.146 + add_child (&children, (const char *)attr->name, 1.147 + (const char *)str); 1.148 + 1.149 + /* xmlGetProp() allocates memory */ 1.150 + free (str); 1.151 + } 1.152 + 1.153 + return children; 1.154 +} 1.155 + 1.156 +static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share) 1.157 +{ 1.158 + char *meta_key = key->data; 1.159 + char *meta_val = value->data; 1.160 + 1.161 + share_set_meta (share, meta_key, meta_val); 1.162 +} 1.163 + 1.164 +static void set_share_meta (Share **shares, size_t shares_len, 1.165 + Dataset *children) 1.166 +{ 1.167 + char *index_str; 1.168 + size_t index; 1.169 + 1.170 + /* 1.171 + * Lookup the "index" attribute, and use that to determine 1.172 + * which Share the XML applies to. 1.173 + */ 1.174 + if (!(index_str = dataset_lookupstr (children, "index"))) 1.175 + return; 1.176 + 1.177 + index = gift_strtoul (index_str); 1.178 + 1.179 + if (index >= shares_len) 1.180 + return; 1.181 + 1.182 + if (!shares[index]) 1.183 + return; 1.184 + 1.185 + /* skip the index attribute */ 1.186 + dataset_removestr (children, "index"); 1.187 + 1.188 + dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]); 1.189 +} 1.190 + 1.191 +static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len, 1.192 + xmlDoc *doc) 1.193 +{ 1.194 + xmlNode *node; 1.195 + 1.196 + if (!(node = xmlDocGetRootElement (doc))) 1.197 + return; 1.198 + 1.199 + for (node = node->xmlChildrenNode; node != NULL; node = node->next) 1.200 + { 1.201 + Dataset *children; 1.202 + 1.203 + children = collect_attributes (node); 1.204 + 1.205 + set_share_meta (shares, shares_len, children); 1.206 + dataset_clear (children); 1.207 + } 1.208 +} 1.209 + 1.210 +static int try_inflate_xml (const char *xml, size_t bin_len) 1.211 +{ 1.212 + int ret; 1.213 + 1.214 + /* set zlib allocation data */ 1.215 + zxml.zalloc = Z_NULL; 1.216 + zxml.zfree = Z_NULL; 1.217 + zxml.opaque = Z_NULL; 1.218 + 1.219 + /* set the input parameters */ 1.220 + zxml.next_in = (char *)xml; 1.221 + zxml.avail_in = bin_len; 1.222 + 1.223 + /* set the output parameters */ 1.224 + zxml.next_out = xml_buf; 1.225 + zxml.avail_out = xml_buf_size - 1; 1.226 + 1.227 + if ((ret = inflateInit (&zxml)) != Z_OK) 1.228 + return ret; 1.229 + 1.230 + ret = inflate (&zxml, Z_FINISH); 1.231 + inflateEnd (&zxml); 1.232 + 1.233 + return ret; 1.234 +} 1.235 + 1.236 +static const char *inflate_xml (const char *xml, size_t bin_len) 1.237 +{ 1.238 + size_t xml_len; 1.239 + int ret; 1.240 + 1.241 + ret = try_inflate_xml (xml, bin_len); 1.242 + 1.243 + if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE) 1.244 + { 1.245 + size_t newsize = xml_buf_size * 2; 1.246 + char *newbuf; 1.247 + 1.248 + if (!(newbuf = realloc (xml_buf, newsize))) 1.249 + return NULL; 1.250 + 1.251 + xml_buf = newbuf; 1.252 + xml_buf_size = newsize; 1.253 + 1.254 + /* retry with bigger buffer */ 1.255 + return inflate_xml (xml, bin_len); 1.256 + } 1.257 + 1.258 + if (ret != Z_STREAM_END) 1.259 + return NULL; 1.260 + 1.261 + /* null terminate (the now hopefully plaintext) XML */ 1.262 + xml_len = (xml_buf_size - 1) - zxml.avail_out; 1.263 + xml_buf[xml_len] = 0; 1.264 + 1.265 + if (XML_DEBUG) 1.266 + GT->dbg (GT, "inflated xml: %s", xml_buf); 1.267 + 1.268 + return xml_buf; 1.269 +} 1.270 + 1.271 +BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares, 1.272 + size_t shares_len) 1.273 +{ 1.274 + xmlDoc *doc; 1.275 + size_t xml_len; 1.276 + const char *next; 1.277 + const char *ptr; 1.278 + 1.279 + if (!xml || bin_len <= 4) 1.280 + return FALSE; 1.281 + 1.282 + /* 1.283 + * Look for the encoding type, currently possible 1.284 + * encoding values are: "{}" meaning plain text, "{plaintext}", 1.285 + * and "{deflate}". 1.286 + */ 1.287 + 1.288 + if (!strncmp (xml, "{}", 2)) 1.289 + { 1.290 + xml += 2; 1.291 + } 1.292 + else if (bin_len >= sizeof("{plaintext}") - 1 && 1.293 + !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1)) 1.294 + { 1.295 + xml += sizeof("{plaintext}") - 1; 1.296 + } 1.297 + else if (bin_len >= sizeof("{deflate}") - 1 && 1.298 + !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1)) 1.299 + { 1.300 + /* the len passed here should be bin_len - 1, but some servents (MRPH) 1.301 + * don't terminate the XML */ 1.302 + xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len); 1.303 + 1.304 + if (XML_DEBUG) 1.305 + assert (xml != NULL); /* assume valid input */ 1.306 + 1.307 + if (!xml) 1.308 + return FALSE; 1.309 + } 1.310 + 1.311 + xml_len = strlen (xml); 1.312 + 1.313 + /* 1.314 + * The XML block is a sequence of XML documents, separated by the <?xml 1.315 + * version="1.0"> document prefix. Parse each one separately. 1.316 + */ 1.317 + for (ptr = xml; ptr != NULL; ptr = next) 1.318 + { 1.319 + size_t chunk_len; 1.320 + 1.321 + if (ptr[0] != '<') 1.322 + return FALSE; 1.323 + 1.324 + next = strstr (ptr + 1, "<?xml"); 1.325 + 1.326 + chunk_len = xml_len; 1.327 + if (next) 1.328 + chunk_len = next - ptr; 1.329 + 1.330 + if (!(doc = xmlParseMemory (ptr, chunk_len))) 1.331 + return FALSE; 1.332 + 1.333 + xml_len -= chunk_len; 1.334 + 1.335 + set_metadata_from_indexed_xml (shares, shares_len, doc); 1.336 + xmlFreeDoc (doc); 1.337 + } 1.338 + 1.339 + return TRUE; 1.340 +} 1.341 + 1.342 +/* gets called when there are parsing errors */ 1.343 +static void error_handler_func (void *udata, const char *msg, ...) 1.344 +{ 1.345 + char buf[1024]; 1.346 + va_list args; 1.347 + 1.348 + /* this is here until i figure out why i get a message about 1.349 + * namespace errors (but it still seems to work...) */ 1.350 + if (!XML_DEBUG) 1.351 + return; 1.352 + 1.353 + va_start (args, msg); 1.354 + vsnprintf (buf, sizeof (buf) - 1, msg, args); 1.355 + va_end (args); 1.356 + 1.357 + GT->DBGFN (GT, "xml parse error: %s", buf); 1.358 +} 1.359 + 1.360 +/*****************************************************************************/ 1.361 + 1.362 +#endif /* USE_LIBXML2 */ 1.363 + 1.364 +/*****************************************************************************/ 1.365 + 1.366 +void gt_xml_init (void) 1.367 +{ 1.368 +#ifdef USE_LIBXML2 1.369 + /* so libxml doesn't print messages on stderr */ 1.370 + xmlSetGenericErrorFunc (NULL, error_handler_func); 1.371 + 1.372 + xml_buf = malloc (32); 1.373 + assert (xml_buf != NULL); 1.374 + xml_buf_size = 32; 1.375 + 1.376 + memset (&zxml, 0, sizeof (zxml)); 1.377 +#endif /* USE_LIBXML2 */ 1.378 +} 1.379 + 1.380 +void gt_xml_cleanup (void) 1.381 +{ 1.382 +#ifdef USE_LIBXML2 1.383 + free (xml_buf); 1.384 + xml_buf = NULL; 1.385 + xml_buf_size = 0; 1.386 +#endif /* USE_LIBXML2 */ 1.387 +}