view src/xml.c @ 0:d39e1d0d75b6

initial add
author paulo@hit-nxdomain.opendns.com
date Sat, 20 Feb 2010 21:18:28 -0800
parents
children
line source
1 /*
2 * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $
3 *
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
17 #include "gt_gnutella.h"
19 #include <zlib.h>
21 #ifdef USE_LIBXML2
22 #include <libxml/parser.h> /* xmlParseMemory() */
23 #include <libxml/xmlerror.h> /* xmlSetGenericErrorFunc() */
24 #endif /* USE_LIBXML2 */
26 #include "xml.h"
28 /*****************************************************************************/
30 #ifndef USE_LIBXML2
31 BOOL gt_xml_parse (const char *xml, Dataset **ret)
32 {
33 return FALSE;
34 }
36 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
37 size_t shares_len)
38 {
39 return FALSE;
40 }
41 #endif /* !USE_LIBXML2 */
43 /*****************************************************************************/
45 /* the rest of this file is conditional on using libxml */
46 #ifdef USE_LIBXML2
48 /*****************************************************************************/
50 #define MAX_XML_BUFSIZE 65536
52 static char *xml_buf; /* for decompressing xml */
53 static size_t xml_buf_size;
54 static z_stream zxml;
56 /*****************************************************************************/
58 static void print_nodes (xmlNodePtr node, Dataset **ret)
59 {
60 while (node != NULL)
61 {
62 /*
63 * If this node has no children, it is a leaf node,
64 * so set the metadata from it.
65 */
66 if (node->xmlChildrenNode)
67 print_nodes (node->xmlChildrenNode, ret);
68 else
69 GT->DBGFN (GT, "name=%s", node->name);
71 node = node->next;
72 }
73 }
75 BOOL gt_xml_parse (const char *xml, Dataset **ret)
76 {
77 xmlDocPtr doc;
79 /* disable for now because it doesn't work anyway: need to share
80 * code with parse_indexed */
81 if (!XML_DEBUG)
82 return FALSE;
84 /* only parse documents starting with '<' */
85 if (!xml || xml[0] != '<')
86 return FALSE;
88 if (!(doc = xmlParseMemory (xml, strlen (xml))))
89 return FALSE;
91 print_nodes (doc->xmlChildrenNode, ret);
93 xmlFreeDoc (doc);
95 return TRUE;
96 }
98 static void add_child (Dataset **children, const char *key, const char *value)
99 {
100 char *dup = NULL;
102 if (!key || !value)
103 return;
105 /*
106 * Hack to map some of the attributes from XML documents found
107 * on Gnutella to ones peddled by giFT.
108 */
109 if (!strcasecmp (key, "bitrate"))
110 {
111 dup = stringf_dup ("%s000", value);
112 value = dup;
113 }
114 else if (!strcasecmp (key, "seconds"))
115 {
116 key = "duration";
117 }
119 dataset_insertstr (children, key, value);
120 free (dup);
121 }
123 static Dataset *collect_attributes (xmlNode *node)
124 {
125 const xmlAttr *attr;
126 Dataset *children = NULL;
127 BOOL do_log = XML_DEBUG;
129 for (attr = node->properties; attr != NULL; attr = attr->next)
130 {
131 xmlChar *str;
133 /* is there an easier way to get attribute content? */
134 str = xmlGetProp (node, attr->name);
136 if (do_log)
137 {
138 GT->dbg (GT, "name=%s content=%s",
139 (const char *)attr->name, (const char *)str);
140 }
142 /* add the key->value pair to the dataset */
143 add_child (&children, (const char *)attr->name,
144 (const char *)str);
146 /* xmlGetProp() allocates memory */
147 free (str);
148 }
150 return children;
151 }
153 static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share)
154 {
155 char *meta_key = key->data;
156 char *meta_val = value->data;
158 share_set_meta (share, meta_key, meta_val);
159 }
161 static void set_share_meta (Share **shares, size_t shares_len,
162 Dataset *children)
163 {
164 char *index_str;
165 size_t index;
167 /*
168 * Lookup the "index" attribute, and use that to determine
169 * which Share the XML applies to.
170 */
171 if (!(index_str = dataset_lookupstr (children, "index")))
172 return;
174 index = gift_strtoul (index_str);
176 if (index >= shares_len)
177 return;
179 if (!shares[index])
180 return;
182 /* skip the index attribute */
183 dataset_removestr (children, "index");
185 dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]);
186 }
188 static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len,
189 xmlDoc *doc)
190 {
191 xmlNode *node;
193 if (!(node = xmlDocGetRootElement (doc)))
194 return;
196 for (node = node->xmlChildrenNode; node != NULL; node = node->next)
197 {
198 Dataset *children;
200 children = collect_attributes (node);
202 set_share_meta (shares, shares_len, children);
203 dataset_clear (children);
204 }
205 }
207 static int try_inflate_xml (const char *xml, size_t bin_len)
208 {
209 int ret;
211 /* set zlib allocation data */
212 zxml.zalloc = Z_NULL;
213 zxml.zfree = Z_NULL;
214 zxml.opaque = Z_NULL;
216 /* set the input parameters */
217 zxml.next_in = (char *)xml;
218 zxml.avail_in = bin_len;
220 /* set the output parameters */
221 zxml.next_out = xml_buf;
222 zxml.avail_out = xml_buf_size - 1;
224 if ((ret = inflateInit (&zxml)) != Z_OK)
225 return ret;
227 ret = inflate (&zxml, Z_FINISH);
228 inflateEnd (&zxml);
230 return ret;
231 }
233 static const char *inflate_xml (const char *xml, size_t bin_len)
234 {
235 size_t xml_len;
236 int ret;
238 ret = try_inflate_xml (xml, bin_len);
240 if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE)
241 {
242 size_t newsize = xml_buf_size * 2;
243 char *newbuf;
245 if (!(newbuf = realloc (xml_buf, newsize)))
246 return NULL;
248 xml_buf = newbuf;
249 xml_buf_size = newsize;
251 /* retry with bigger buffer */
252 return inflate_xml (xml, bin_len);
253 }
255 if (ret != Z_STREAM_END)
256 return NULL;
258 /* null terminate (the now hopefully plaintext) XML */
259 xml_len = (xml_buf_size - 1) - zxml.avail_out;
260 xml_buf[xml_len] = 0;
262 if (XML_DEBUG)
263 GT->dbg (GT, "inflated xml: %s", xml_buf);
265 return xml_buf;
266 }
268 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
269 size_t shares_len)
270 {
271 xmlDoc *doc;
272 size_t xml_len;
273 const char *next;
274 const char *ptr;
276 if (!xml || bin_len <= 4)
277 return FALSE;
279 /*
280 * Look for the encoding type, currently possible
281 * encoding values are: "{}" meaning plain text, "{plaintext}",
282 * and "{deflate}".
283 */
285 if (!strncmp (xml, "{}", 2))
286 {
287 xml += 2;
288 }
289 else if (bin_len >= sizeof("{plaintext}") - 1 &&
290 !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1))
291 {
292 xml += sizeof("{plaintext}") - 1;
293 }
294 else if (bin_len >= sizeof("{deflate}") - 1 &&
295 !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1))
296 {
297 /* the len passed here should be bin_len - 1, but some servents (MRPH)
298 * don't terminate the XML */
299 xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len);
301 if (XML_DEBUG)
302 assert (xml != NULL); /* assume valid input */
304 if (!xml)
305 return FALSE;
306 }
308 xml_len = strlen (xml);
310 /*
311 * The XML block is a sequence of XML documents, separated by the <?xml
312 * version="1.0"> document prefix. Parse each one separately.
313 */
314 for (ptr = xml; ptr != NULL; ptr = next)
315 {
316 size_t chunk_len;
318 if (ptr[0] != '<')
319 return FALSE;
321 next = strstr (ptr + 1, "<?xml");
323 chunk_len = xml_len;
324 if (next)
325 chunk_len = next - ptr;
327 if (!(doc = xmlParseMemory (ptr, chunk_len)))
328 return FALSE;
330 xml_len -= chunk_len;
332 set_metadata_from_indexed_xml (shares, shares_len, doc);
333 xmlFreeDoc (doc);
334 }
336 return TRUE;
337 }
339 /* gets called when there are parsing errors */
340 static void error_handler_func (void *udata, const char *msg, ...)
341 {
342 char buf[1024];
343 va_list args;
345 /* this is here until i figure out why i get a message about
346 * namespace errors (but it still seems to work...) */
347 if (!XML_DEBUG)
348 return;
350 va_start (args, msg);
351 vsnprintf (buf, sizeof (buf) - 1, msg, args);
352 va_end (args);
354 GT->DBGFN (GT, "xml parse error: %s", buf);
355 }
357 /*****************************************************************************/
359 #endif /* USE_LIBXML2 */
361 /*****************************************************************************/
363 void gt_xml_init (void)
364 {
365 #ifdef USE_LIBXML2
366 /* so libxml doesn't print messages on stderr */
367 xmlSetGenericErrorFunc (NULL, error_handler_func);
369 xml_buf = malloc (32);
370 assert (xml_buf != NULL);
371 xml_buf_size = 32;
373 memset (&zxml, 0, sizeof (zxml));
374 #endif /* USE_LIBXML2 */
375 }
377 void gt_xml_cleanup (void)
378 {
379 #ifdef USE_LIBXML2
380 free (xml_buf);
381 xml_buf = NULL;
382 xml_buf_size = 0;
383 #endif /* USE_LIBXML2 */
384 }