annotate src/xml.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
rev   line source
paulo@0 1 /*
paulo@0 2 * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $
paulo@0 3 *
paulo@0 4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
paulo@0 5 *
paulo@0 6 * This program is free software; you can redistribute it and/or modify it
paulo@0 7 * under the terms of the GNU General Public License as published by the
paulo@0 8 * Free Software Foundation; either version 2, or (at your option) any
paulo@0 9 * later version.
paulo@0 10 *
paulo@0 11 * This program is distributed in the hope that it will be useful, but
paulo@0 12 * WITHOUT ANY WARRANTY; without even the implied warranty of
paulo@0 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
paulo@0 14 * General Public License for more details.
paulo@0 15 */
paulo@0 16
paulo@0 17 #include "gt_gnutella.h"
paulo@0 18
paulo@0 19 #include <zlib.h>
paulo@0 20
paulo@0 21 #ifdef USE_LIBXML2
paulo@0 22 #include <libxml/parser.h> /* xmlParseMemory() */
paulo@0 23 #include <libxml/xmlerror.h> /* xmlSetGenericErrorFunc() */
paulo@0 24 #endif /* USE_LIBXML2 */
paulo@0 25
paulo@0 26 #include "xml.h"
paulo@0 27
paulo@0 28 /*****************************************************************************/
paulo@0 29
paulo@0 30 #ifndef USE_LIBXML2
paulo@0 31 BOOL gt_xml_parse (const char *xml, Dataset **ret)
paulo@0 32 {
paulo@0 33 return FALSE;
paulo@0 34 }
paulo@0 35
paulo@0 36 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
paulo@0 37 size_t shares_len)
paulo@0 38 {
paulo@0 39 return FALSE;
paulo@0 40 }
paulo@0 41 #endif /* !USE_LIBXML2 */
paulo@0 42
paulo@0 43 /*****************************************************************************/
paulo@0 44
paulo@0 45 /* the rest of this file is conditional on using libxml */
paulo@0 46 #ifdef USE_LIBXML2
paulo@0 47
paulo@0 48 /*****************************************************************************/
paulo@0 49
paulo@0 50 #define MAX_XML_BUFSIZE 65536
paulo@0 51
paulo@0 52 static char *xml_buf; /* for decompressing xml */
paulo@0 53 static size_t xml_buf_size;
paulo@0 54 static z_stream zxml;
paulo@0 55
paulo@0 56 /*****************************************************************************/
paulo@0 57
paulo@0 58 static void print_nodes (xmlNodePtr node, Dataset **ret)
paulo@0 59 {
paulo@0 60 while (node != NULL)
paulo@0 61 {
paulo@0 62 /*
paulo@0 63 * If this node has no children, it is a leaf node,
paulo@0 64 * so set the metadata from it.
paulo@0 65 */
paulo@0 66 if (node->xmlChildrenNode)
paulo@0 67 print_nodes (node->xmlChildrenNode, ret);
paulo@0 68 else
paulo@0 69 GT->DBGFN (GT, "name=%s", node->name);
paulo@0 70
paulo@0 71 node = node->next;
paulo@0 72 }
paulo@0 73 }
paulo@0 74
paulo@0 75 BOOL gt_xml_parse (const char *xml, Dataset **ret)
paulo@0 76 {
paulo@0 77 xmlDocPtr doc;
paulo@0 78
paulo@0 79 /* disable for now because it doesn't work anyway: need to share
paulo@0 80 * code with parse_indexed */
paulo@0 81 if (!XML_DEBUG)
paulo@0 82 return FALSE;
paulo@0 83
paulo@0 84 /* only parse documents starting with '<' */
paulo@0 85 if (!xml || xml[0] != '<')
paulo@0 86 return FALSE;
paulo@0 87
paulo@0 88 if (!(doc = xmlParseMemory (xml, strlen (xml))))
paulo@0 89 return FALSE;
paulo@0 90
paulo@0 91 print_nodes (doc->xmlChildrenNode, ret);
paulo@0 92
paulo@0 93 xmlFreeDoc (doc);
paulo@0 94
paulo@0 95 return TRUE;
paulo@0 96 }
paulo@0 97
paulo@0 98 static void add_child (Dataset **children, const char *key, const char *value)
paulo@0 99 {
paulo@0 100 char *dup = NULL;
paulo@0 101
paulo@0 102 if (!key || !value)
paulo@0 103 return;
paulo@0 104
paulo@0 105 /*
paulo@0 106 * Hack to map some of the attributes from XML documents found
paulo@0 107 * on Gnutella to ones peddled by giFT.
paulo@0 108 */
paulo@0 109 if (!strcasecmp (key, "bitrate"))
paulo@0 110 {
paulo@0 111 dup = stringf_dup ("%s000", value);
paulo@0 112 value = dup;
paulo@0 113 }
paulo@0 114 else if (!strcasecmp (key, "seconds"))
paulo@0 115 {
paulo@0 116 key = "duration";
paulo@0 117 }
paulo@0 118
paulo@0 119 dataset_insertstr (children, key, value);
paulo@0 120 free (dup);
paulo@0 121 }
paulo@0 122
paulo@0 123 static Dataset *collect_attributes (xmlNode *node)
paulo@0 124 {
paulo@0 125 const xmlAttr *attr;
paulo@0 126 Dataset *children = NULL;
paulo@0 127 BOOL do_log = XML_DEBUG;
paulo@0 128
paulo@0 129 for (attr = node->properties; attr != NULL; attr = attr->next)
paulo@0 130 {
paulo@0 131 xmlChar *str;
paulo@0 132
paulo@0 133 /* is there an easier way to get attribute content? */
paulo@0 134 str = xmlGetProp (node, attr->name);
paulo@0 135
paulo@0 136 if (do_log)
paulo@0 137 {
paulo@0 138 GT->dbg (GT, "name=%s content=%s",
paulo@0 139 (const char *)attr->name, (const char *)str);
paulo@0 140 }
paulo@0 141
paulo@0 142 /* add the key->value pair to the dataset */
paulo@0 143 add_child (&children, (const char *)attr->name,
paulo@0 144 (const char *)str);
paulo@0 145
paulo@0 146 /* xmlGetProp() allocates memory */
paulo@0 147 free (str);
paulo@0 148 }
paulo@0 149
paulo@0 150 return children;
paulo@0 151 }
paulo@0 152
paulo@0 153 static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share)
paulo@0 154 {
paulo@0 155 char *meta_key = key->data;
paulo@0 156 char *meta_val = value->data;
paulo@0 157
paulo@0 158 share_set_meta (share, meta_key, meta_val);
paulo@0 159 }
paulo@0 160
paulo@0 161 static void set_share_meta (Share **shares, size_t shares_len,
paulo@0 162 Dataset *children)
paulo@0 163 {
paulo@0 164 char *index_str;
paulo@0 165 size_t index;
paulo@0 166
paulo@0 167 /*
paulo@0 168 * Lookup the "index" attribute, and use that to determine
paulo@0 169 * which Share the XML applies to.
paulo@0 170 */
paulo@0 171 if (!(index_str = dataset_lookupstr (children, "index")))
paulo@0 172 return;
paulo@0 173
paulo@0 174 index = gift_strtoul (index_str);
paulo@0 175
paulo@0 176 if (index >= shares_len)
paulo@0 177 return;
paulo@0 178
paulo@0 179 if (!shares[index])
paulo@0 180 return;
paulo@0 181
paulo@0 182 /* skip the index attribute */
paulo@0 183 dataset_removestr (children, "index");
paulo@0 184
paulo@0 185 dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]);
paulo@0 186 }
paulo@0 187
paulo@0 188 static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len,
paulo@0 189 xmlDoc *doc)
paulo@0 190 {
paulo@0 191 xmlNode *node;
paulo@0 192
paulo@0 193 if (!(node = xmlDocGetRootElement (doc)))
paulo@0 194 return;
paulo@0 195
paulo@0 196 for (node = node->xmlChildrenNode; node != NULL; node = node->next)
paulo@0 197 {
paulo@0 198 Dataset *children;
paulo@0 199
paulo@0 200 children = collect_attributes (node);
paulo@0 201
paulo@0 202 set_share_meta (shares, shares_len, children);
paulo@0 203 dataset_clear (children);
paulo@0 204 }
paulo@0 205 }
paulo@0 206
paulo@0 207 static int try_inflate_xml (const char *xml, size_t bin_len)
paulo@0 208 {
paulo@0 209 int ret;
paulo@0 210
paulo@0 211 /* set zlib allocation data */
paulo@0 212 zxml.zalloc = Z_NULL;
paulo@0 213 zxml.zfree = Z_NULL;
paulo@0 214 zxml.opaque = Z_NULL;
paulo@0 215
paulo@0 216 /* set the input parameters */
paulo@0 217 zxml.next_in = (char *)xml;
paulo@0 218 zxml.avail_in = bin_len;
paulo@0 219
paulo@0 220 /* set the output parameters */
paulo@0 221 zxml.next_out = xml_buf;
paulo@0 222 zxml.avail_out = xml_buf_size - 1;
paulo@0 223
paulo@0 224 if ((ret = inflateInit (&zxml)) != Z_OK)
paulo@0 225 return ret;
paulo@0 226
paulo@0 227 ret = inflate (&zxml, Z_FINISH);
paulo@0 228 inflateEnd (&zxml);
paulo@0 229
paulo@0 230 return ret;
paulo@0 231 }
paulo@0 232
paulo@0 233 static const char *inflate_xml (const char *xml, size_t bin_len)
paulo@0 234 {
paulo@0 235 size_t xml_len;
paulo@0 236 int ret;
paulo@0 237
paulo@0 238 ret = try_inflate_xml (xml, bin_len);
paulo@0 239
paulo@0 240 if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE)
paulo@0 241 {
paulo@0 242 size_t newsize = xml_buf_size * 2;
paulo@0 243 char *newbuf;
paulo@0 244
paulo@0 245 if (!(newbuf = realloc (xml_buf, newsize)))
paulo@0 246 return NULL;
paulo@0 247
paulo@0 248 xml_buf = newbuf;
paulo@0 249 xml_buf_size = newsize;
paulo@0 250
paulo@0 251 /* retry with bigger buffer */
paulo@0 252 return inflate_xml (xml, bin_len);
paulo@0 253 }
paulo@0 254
paulo@0 255 if (ret != Z_STREAM_END)
paulo@0 256 return NULL;
paulo@0 257
paulo@0 258 /* null terminate (the now hopefully plaintext) XML */
paulo@0 259 xml_len = (xml_buf_size - 1) - zxml.avail_out;
paulo@0 260 xml_buf[xml_len] = 0;
paulo@0 261
paulo@0 262 if (XML_DEBUG)
paulo@0 263 GT->dbg (GT, "inflated xml: %s", xml_buf);
paulo@0 264
paulo@0 265 return xml_buf;
paulo@0 266 }
paulo@0 267
paulo@0 268 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
paulo@0 269 size_t shares_len)
paulo@0 270 {
paulo@0 271 xmlDoc *doc;
paulo@0 272 size_t xml_len;
paulo@0 273 const char *next;
paulo@0 274 const char *ptr;
paulo@0 275
paulo@0 276 if (!xml || bin_len <= 4)
paulo@0 277 return FALSE;
paulo@0 278
paulo@0 279 /*
paulo@0 280 * Look for the encoding type, currently possible
paulo@0 281 * encoding values are: "{}" meaning plain text, "{plaintext}",
paulo@0 282 * and "{deflate}".
paulo@0 283 */
paulo@0 284
paulo@0 285 if (!strncmp (xml, "{}", 2))
paulo@0 286 {
paulo@0 287 xml += 2;
paulo@0 288 }
paulo@0 289 else if (bin_len >= sizeof("{plaintext}") - 1 &&
paulo@0 290 !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1))
paulo@0 291 {
paulo@0 292 xml += sizeof("{plaintext}") - 1;
paulo@0 293 }
paulo@0 294 else if (bin_len >= sizeof("{deflate}") - 1 &&
paulo@0 295 !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1))
paulo@0 296 {
paulo@0 297 /* the len passed here should be bin_len - 1, but some servents (MRPH)
paulo@0 298 * don't terminate the XML */
paulo@0 299 xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len);
paulo@0 300
paulo@0 301 if (XML_DEBUG)
paulo@0 302 assert (xml != NULL); /* assume valid input */
paulo@0 303
paulo@0 304 if (!xml)
paulo@0 305 return FALSE;
paulo@0 306 }
paulo@0 307
paulo@0 308 xml_len = strlen (xml);
paulo@0 309
paulo@0 310 /*
paulo@0 311 * The XML block is a sequence of XML documents, separated by the <?xml
paulo@0 312 * version="1.0"> document prefix. Parse each one separately.
paulo@0 313 */
paulo@0 314 for (ptr = xml; ptr != NULL; ptr = next)
paulo@0 315 {
paulo@0 316 size_t chunk_len;
paulo@0 317
paulo@0 318 if (ptr[0] != '<')
paulo@0 319 return FALSE;
paulo@0 320
paulo@0 321 next = strstr (ptr + 1, "<?xml");
paulo@0 322
paulo@0 323 chunk_len = xml_len;
paulo@0 324 if (next)
paulo@0 325 chunk_len = next - ptr;
paulo@0 326
paulo@0 327 if (!(doc = xmlParseMemory (ptr, chunk_len)))
paulo@0 328 return FALSE;
paulo@0 329
paulo@0 330 xml_len -= chunk_len;
paulo@0 331
paulo@0 332 set_metadata_from_indexed_xml (shares, shares_len, doc);
paulo@0 333 xmlFreeDoc (doc);
paulo@0 334 }
paulo@0 335
paulo@0 336 return TRUE;
paulo@0 337 }
paulo@0 338
paulo@0 339 /* gets called when there are parsing errors */
paulo@0 340 static void error_handler_func (void *udata, const char *msg, ...)
paulo@0 341 {
paulo@0 342 char buf[1024];
paulo@0 343 va_list args;
paulo@0 344
paulo@0 345 /* this is here until i figure out why i get a message about
paulo@0 346 * namespace errors (but it still seems to work...) */
paulo@0 347 if (!XML_DEBUG)
paulo@0 348 return;
paulo@0 349
paulo@0 350 va_start (args, msg);
paulo@0 351 vsnprintf (buf, sizeof (buf) - 1, msg, args);
paulo@0 352 va_end (args);
paulo@0 353
paulo@0 354 GT->DBGFN (GT, "xml parse error: %s", buf);
paulo@0 355 }
paulo@0 356
paulo@0 357 /*****************************************************************************/
paulo@0 358
paulo@0 359 #endif /* USE_LIBXML2 */
paulo@0 360
paulo@0 361 /*****************************************************************************/
paulo@0 362
paulo@0 363 void gt_xml_init (void)
paulo@0 364 {
paulo@0 365 #ifdef USE_LIBXML2
paulo@0 366 /* so libxml doesn't print messages on stderr */
paulo@0 367 xmlSetGenericErrorFunc (NULL, error_handler_func);
paulo@0 368
paulo@0 369 xml_buf = malloc (32);
paulo@0 370 assert (xml_buf != NULL);
paulo@0 371 xml_buf_size = 32;
paulo@0 372
paulo@0 373 memset (&zxml, 0, sizeof (zxml));
paulo@0 374 #endif /* USE_LIBXML2 */
paulo@0 375 }
paulo@0 376
paulo@0 377 void gt_xml_cleanup (void)
paulo@0 378 {
paulo@0 379 #ifdef USE_LIBXML2
paulo@0 380 free (xml_buf);
paulo@0 381 xml_buf = NULL;
paulo@0 382 xml_buf_size = 0;
paulo@0 383 #endif /* USE_LIBXML2 */
paulo@0 384 }