rev |
line source |
paulo@0
|
1 /*
|
paulo@0
|
2 * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $
|
paulo@0
|
3 *
|
paulo@0
|
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net)
|
paulo@0
|
5 *
|
paulo@0
|
6 * This program is free software; you can redistribute it and/or modify it
|
paulo@0
|
7 * under the terms of the GNU General Public License as published by the
|
paulo@0
|
8 * Free Software Foundation; either version 2, or (at your option) any
|
paulo@0
|
9 * later version.
|
paulo@0
|
10 *
|
paulo@0
|
11 * This program is distributed in the hope that it will be useful, but
|
paulo@0
|
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
|
paulo@0
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
paulo@0
|
14 * General Public License for more details.
|
paulo@0
|
15 */
|
paulo@0
|
16
|
paulo@0
|
17 #include "gt_gnutella.h"
|
paulo@0
|
18
|
paulo@0
|
19 #include <zlib.h>
|
paulo@0
|
20
|
paulo@0
|
21 #ifdef USE_LIBXML2
|
paulo@0
|
22 #include <libxml/parser.h> /* xmlParseMemory() */
|
paulo@0
|
23 #include <libxml/xmlerror.h> /* xmlSetGenericErrorFunc() */
|
paulo@0
|
24 #endif /* USE_LIBXML2 */
|
paulo@0
|
25
|
paulo@0
|
26 #include "xml.h"
|
paulo@0
|
27
|
paulo@0
|
28 /*****************************************************************************/
|
paulo@0
|
29
|
paulo@0
|
30 #ifndef USE_LIBXML2
|
paulo@0
|
31 BOOL gt_xml_parse (const char *xml, Dataset **ret)
|
paulo@0
|
32 {
|
paulo@0
|
33 return FALSE;
|
paulo@0
|
34 }
|
paulo@0
|
35
|
paulo@0
|
36 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
|
paulo@0
|
37 size_t shares_len)
|
paulo@0
|
38 {
|
paulo@0
|
39 return FALSE;
|
paulo@0
|
40 }
|
paulo@0
|
41 #endif /* !USE_LIBXML2 */
|
paulo@0
|
42
|
paulo@0
|
43 /*****************************************************************************/
|
paulo@0
|
44
|
paulo@0
|
45 /* the rest of this file is conditional on using libxml */
|
paulo@0
|
46 #ifdef USE_LIBXML2
|
paulo@0
|
47
|
paulo@0
|
48 /*****************************************************************************/
|
paulo@0
|
49
|
paulo@0
|
50 #define MAX_XML_BUFSIZE 65536
|
paulo@0
|
51
|
paulo@0
|
52 static char *xml_buf; /* for decompressing xml */
|
paulo@0
|
53 static size_t xml_buf_size;
|
paulo@0
|
54 static z_stream zxml;
|
paulo@0
|
55
|
paulo@0
|
56 /*****************************************************************************/
|
paulo@0
|
57
|
paulo@0
|
58 static void print_nodes (xmlNodePtr node, Dataset **ret)
|
paulo@0
|
59 {
|
paulo@0
|
60 while (node != NULL)
|
paulo@0
|
61 {
|
paulo@0
|
62 /*
|
paulo@0
|
63 * If this node has no children, it is a leaf node,
|
paulo@0
|
64 * so set the metadata from it.
|
paulo@0
|
65 */
|
paulo@0
|
66 if (node->xmlChildrenNode)
|
paulo@0
|
67 print_nodes (node->xmlChildrenNode, ret);
|
paulo@0
|
68 else
|
paulo@0
|
69 GT->DBGFN (GT, "name=%s", node->name);
|
paulo@0
|
70
|
paulo@0
|
71 node = node->next;
|
paulo@0
|
72 }
|
paulo@0
|
73 }
|
paulo@0
|
74
|
paulo@0
|
75 BOOL gt_xml_parse (const char *xml, Dataset **ret)
|
paulo@0
|
76 {
|
paulo@0
|
77 xmlDocPtr doc;
|
paulo@0
|
78
|
paulo@0
|
79 /* disable for now because it doesn't work anyway: need to share
|
paulo@0
|
80 * code with parse_indexed */
|
paulo@0
|
81 if (!XML_DEBUG)
|
paulo@0
|
82 return FALSE;
|
paulo@0
|
83
|
paulo@0
|
84 /* only parse documents starting with '<' */
|
paulo@0
|
85 if (!xml || xml[0] != '<')
|
paulo@0
|
86 return FALSE;
|
paulo@0
|
87
|
paulo@0
|
88 if (!(doc = xmlParseMemory (xml, strlen (xml))))
|
paulo@0
|
89 return FALSE;
|
paulo@0
|
90
|
paulo@0
|
91 print_nodes (doc->xmlChildrenNode, ret);
|
paulo@0
|
92
|
paulo@0
|
93 xmlFreeDoc (doc);
|
paulo@0
|
94
|
paulo@0
|
95 return TRUE;
|
paulo@0
|
96 }
|
paulo@0
|
97
|
paulo@0
|
98 static void add_child (Dataset **children, const char *key, const char *value)
|
paulo@0
|
99 {
|
paulo@0
|
100 char *dup = NULL;
|
paulo@0
|
101
|
paulo@0
|
102 if (!key || !value)
|
paulo@0
|
103 return;
|
paulo@0
|
104
|
paulo@0
|
105 /*
|
paulo@0
|
106 * Hack to map some of the attributes from XML documents found
|
paulo@0
|
107 * on Gnutella to ones peddled by giFT.
|
paulo@0
|
108 */
|
paulo@0
|
109 if (!strcasecmp (key, "bitrate"))
|
paulo@0
|
110 {
|
paulo@0
|
111 dup = stringf_dup ("%s000", value);
|
paulo@0
|
112 value = dup;
|
paulo@0
|
113 }
|
paulo@0
|
114 else if (!strcasecmp (key, "seconds"))
|
paulo@0
|
115 {
|
paulo@0
|
116 key = "duration";
|
paulo@0
|
117 }
|
paulo@0
|
118
|
paulo@0
|
119 dataset_insertstr (children, key, value);
|
paulo@0
|
120 free (dup);
|
paulo@0
|
121 }
|
paulo@0
|
122
|
paulo@0
|
123 static Dataset *collect_attributes (xmlNode *node)
|
paulo@0
|
124 {
|
paulo@0
|
125 const xmlAttr *attr;
|
paulo@0
|
126 Dataset *children = NULL;
|
paulo@0
|
127 BOOL do_log = XML_DEBUG;
|
paulo@0
|
128
|
paulo@0
|
129 for (attr = node->properties; attr != NULL; attr = attr->next)
|
paulo@0
|
130 {
|
paulo@0
|
131 xmlChar *str;
|
paulo@0
|
132
|
paulo@0
|
133 /* is there an easier way to get attribute content? */
|
paulo@0
|
134 str = xmlGetProp (node, attr->name);
|
paulo@0
|
135
|
paulo@0
|
136 if (do_log)
|
paulo@0
|
137 {
|
paulo@0
|
138 GT->dbg (GT, "name=%s content=%s",
|
paulo@0
|
139 (const char *)attr->name, (const char *)str);
|
paulo@0
|
140 }
|
paulo@0
|
141
|
paulo@0
|
142 /* add the key->value pair to the dataset */
|
paulo@0
|
143 add_child (&children, (const char *)attr->name,
|
paulo@0
|
144 (const char *)str);
|
paulo@0
|
145
|
paulo@0
|
146 /* xmlGetProp() allocates memory */
|
paulo@0
|
147 free (str);
|
paulo@0
|
148 }
|
paulo@0
|
149
|
paulo@0
|
150 return children;
|
paulo@0
|
151 }
|
paulo@0
|
152
|
paulo@0
|
153 static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share)
|
paulo@0
|
154 {
|
paulo@0
|
155 char *meta_key = key->data;
|
paulo@0
|
156 char *meta_val = value->data;
|
paulo@0
|
157
|
paulo@0
|
158 share_set_meta (share, meta_key, meta_val);
|
paulo@0
|
159 }
|
paulo@0
|
160
|
paulo@0
|
161 static void set_share_meta (Share **shares, size_t shares_len,
|
paulo@0
|
162 Dataset *children)
|
paulo@0
|
163 {
|
paulo@0
|
164 char *index_str;
|
paulo@0
|
165 size_t index;
|
paulo@0
|
166
|
paulo@0
|
167 /*
|
paulo@0
|
168 * Lookup the "index" attribute, and use that to determine
|
paulo@0
|
169 * which Share the XML applies to.
|
paulo@0
|
170 */
|
paulo@0
|
171 if (!(index_str = dataset_lookupstr (children, "index")))
|
paulo@0
|
172 return;
|
paulo@0
|
173
|
paulo@0
|
174 index = gift_strtoul (index_str);
|
paulo@0
|
175
|
paulo@0
|
176 if (index >= shares_len)
|
paulo@0
|
177 return;
|
paulo@0
|
178
|
paulo@0
|
179 if (!shares[index])
|
paulo@0
|
180 return;
|
paulo@0
|
181
|
paulo@0
|
182 /* skip the index attribute */
|
paulo@0
|
183 dataset_removestr (children, "index");
|
paulo@0
|
184
|
paulo@0
|
185 dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]);
|
paulo@0
|
186 }
|
paulo@0
|
187
|
paulo@0
|
188 static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len,
|
paulo@0
|
189 xmlDoc *doc)
|
paulo@0
|
190 {
|
paulo@0
|
191 xmlNode *node;
|
paulo@0
|
192
|
paulo@0
|
193 if (!(node = xmlDocGetRootElement (doc)))
|
paulo@0
|
194 return;
|
paulo@0
|
195
|
paulo@0
|
196 for (node = node->xmlChildrenNode; node != NULL; node = node->next)
|
paulo@0
|
197 {
|
paulo@0
|
198 Dataset *children;
|
paulo@0
|
199
|
paulo@0
|
200 children = collect_attributes (node);
|
paulo@0
|
201
|
paulo@0
|
202 set_share_meta (shares, shares_len, children);
|
paulo@0
|
203 dataset_clear (children);
|
paulo@0
|
204 }
|
paulo@0
|
205 }
|
paulo@0
|
206
|
paulo@0
|
207 static int try_inflate_xml (const char *xml, size_t bin_len)
|
paulo@0
|
208 {
|
paulo@0
|
209 int ret;
|
paulo@0
|
210
|
paulo@0
|
211 /* set zlib allocation data */
|
paulo@0
|
212 zxml.zalloc = Z_NULL;
|
paulo@0
|
213 zxml.zfree = Z_NULL;
|
paulo@0
|
214 zxml.opaque = Z_NULL;
|
paulo@0
|
215
|
paulo@0
|
216 /* set the input parameters */
|
paulo@0
|
217 zxml.next_in = (char *)xml;
|
paulo@0
|
218 zxml.avail_in = bin_len;
|
paulo@0
|
219
|
paulo@0
|
220 /* set the output parameters */
|
paulo@0
|
221 zxml.next_out = xml_buf;
|
paulo@0
|
222 zxml.avail_out = xml_buf_size - 1;
|
paulo@0
|
223
|
paulo@0
|
224 if ((ret = inflateInit (&zxml)) != Z_OK)
|
paulo@0
|
225 return ret;
|
paulo@0
|
226
|
paulo@0
|
227 ret = inflate (&zxml, Z_FINISH);
|
paulo@0
|
228 inflateEnd (&zxml);
|
paulo@0
|
229
|
paulo@0
|
230 return ret;
|
paulo@0
|
231 }
|
paulo@0
|
232
|
paulo@0
|
233 static const char *inflate_xml (const char *xml, size_t bin_len)
|
paulo@0
|
234 {
|
paulo@0
|
235 size_t xml_len;
|
paulo@0
|
236 int ret;
|
paulo@0
|
237
|
paulo@0
|
238 ret = try_inflate_xml (xml, bin_len);
|
paulo@0
|
239
|
paulo@0
|
240 if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE)
|
paulo@0
|
241 {
|
paulo@0
|
242 size_t newsize = xml_buf_size * 2;
|
paulo@0
|
243 char *newbuf;
|
paulo@0
|
244
|
paulo@0
|
245 if (!(newbuf = realloc (xml_buf, newsize)))
|
paulo@0
|
246 return NULL;
|
paulo@0
|
247
|
paulo@0
|
248 xml_buf = newbuf;
|
paulo@0
|
249 xml_buf_size = newsize;
|
paulo@0
|
250
|
paulo@0
|
251 /* retry with bigger buffer */
|
paulo@0
|
252 return inflate_xml (xml, bin_len);
|
paulo@0
|
253 }
|
paulo@0
|
254
|
paulo@0
|
255 if (ret != Z_STREAM_END)
|
paulo@0
|
256 return NULL;
|
paulo@0
|
257
|
paulo@0
|
258 /* null terminate (the now hopefully plaintext) XML */
|
paulo@0
|
259 xml_len = (xml_buf_size - 1) - zxml.avail_out;
|
paulo@0
|
260 xml_buf[xml_len] = 0;
|
paulo@0
|
261
|
paulo@0
|
262 if (XML_DEBUG)
|
paulo@0
|
263 GT->dbg (GT, "inflated xml: %s", xml_buf);
|
paulo@0
|
264
|
paulo@0
|
265 return xml_buf;
|
paulo@0
|
266 }
|
paulo@0
|
267
|
paulo@0
|
268 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares,
|
paulo@0
|
269 size_t shares_len)
|
paulo@0
|
270 {
|
paulo@0
|
271 xmlDoc *doc;
|
paulo@0
|
272 size_t xml_len;
|
paulo@0
|
273 const char *next;
|
paulo@0
|
274 const char *ptr;
|
paulo@0
|
275
|
paulo@0
|
276 if (!xml || bin_len <= 4)
|
paulo@0
|
277 return FALSE;
|
paulo@0
|
278
|
paulo@0
|
279 /*
|
paulo@0
|
280 * Look for the encoding type, currently possible
|
paulo@0
|
281 * encoding values are: "{}" meaning plain text, "{plaintext}",
|
paulo@0
|
282 * and "{deflate}".
|
paulo@0
|
283 */
|
paulo@0
|
284
|
paulo@0
|
285 if (!strncmp (xml, "{}", 2))
|
paulo@0
|
286 {
|
paulo@0
|
287 xml += 2;
|
paulo@0
|
288 }
|
paulo@0
|
289 else if (bin_len >= sizeof("{plaintext}") - 1 &&
|
paulo@0
|
290 !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1))
|
paulo@0
|
291 {
|
paulo@0
|
292 xml += sizeof("{plaintext}") - 1;
|
paulo@0
|
293 }
|
paulo@0
|
294 else if (bin_len >= sizeof("{deflate}") - 1 &&
|
paulo@0
|
295 !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1))
|
paulo@0
|
296 {
|
paulo@0
|
297 /* the len passed here should be bin_len - 1, but some servents (MRPH)
|
paulo@0
|
298 * don't terminate the XML */
|
paulo@0
|
299 xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len);
|
paulo@0
|
300
|
paulo@0
|
301 if (XML_DEBUG)
|
paulo@0
|
302 assert (xml != NULL); /* assume valid input */
|
paulo@0
|
303
|
paulo@0
|
304 if (!xml)
|
paulo@0
|
305 return FALSE;
|
paulo@0
|
306 }
|
paulo@0
|
307
|
paulo@0
|
308 xml_len = strlen (xml);
|
paulo@0
|
309
|
paulo@0
|
310 /*
|
paulo@0
|
311 * The XML block is a sequence of XML documents, separated by the <?xml
|
paulo@0
|
312 * version="1.0"> document prefix. Parse each one separately.
|
paulo@0
|
313 */
|
paulo@0
|
314 for (ptr = xml; ptr != NULL; ptr = next)
|
paulo@0
|
315 {
|
paulo@0
|
316 size_t chunk_len;
|
paulo@0
|
317
|
paulo@0
|
318 if (ptr[0] != '<')
|
paulo@0
|
319 return FALSE;
|
paulo@0
|
320
|
paulo@0
|
321 next = strstr (ptr + 1, "<?xml");
|
paulo@0
|
322
|
paulo@0
|
323 chunk_len = xml_len;
|
paulo@0
|
324 if (next)
|
paulo@0
|
325 chunk_len = next - ptr;
|
paulo@0
|
326
|
paulo@0
|
327 if (!(doc = xmlParseMemory (ptr, chunk_len)))
|
paulo@0
|
328 return FALSE;
|
paulo@0
|
329
|
paulo@0
|
330 xml_len -= chunk_len;
|
paulo@0
|
331
|
paulo@0
|
332 set_metadata_from_indexed_xml (shares, shares_len, doc);
|
paulo@0
|
333 xmlFreeDoc (doc);
|
paulo@0
|
334 }
|
paulo@0
|
335
|
paulo@0
|
336 return TRUE;
|
paulo@0
|
337 }
|
paulo@0
|
338
|
paulo@0
|
339 /* gets called when there are parsing errors */
|
paulo@0
|
340 static void error_handler_func (void *udata, const char *msg, ...)
|
paulo@0
|
341 {
|
paulo@0
|
342 char buf[1024];
|
paulo@0
|
343 va_list args;
|
paulo@0
|
344
|
paulo@0
|
345 /* this is here until i figure out why i get a message about
|
paulo@0
|
346 * namespace errors (but it still seems to work...) */
|
paulo@0
|
347 if (!XML_DEBUG)
|
paulo@0
|
348 return;
|
paulo@0
|
349
|
paulo@0
|
350 va_start (args, msg);
|
paulo@0
|
351 vsnprintf (buf, sizeof (buf) - 1, msg, args);
|
paulo@0
|
352 va_end (args);
|
paulo@0
|
353
|
paulo@0
|
354 GT->DBGFN (GT, "xml parse error: %s", buf);
|
paulo@0
|
355 }
|
paulo@0
|
356
|
paulo@0
|
357 /*****************************************************************************/
|
paulo@0
|
358
|
paulo@0
|
359 #endif /* USE_LIBXML2 */
|
paulo@0
|
360
|
paulo@0
|
361 /*****************************************************************************/
|
paulo@0
|
362
|
paulo@0
|
363 void gt_xml_init (void)
|
paulo@0
|
364 {
|
paulo@0
|
365 #ifdef USE_LIBXML2
|
paulo@0
|
366 /* so libxml doesn't print messages on stderr */
|
paulo@0
|
367 xmlSetGenericErrorFunc (NULL, error_handler_func);
|
paulo@0
|
368
|
paulo@0
|
369 xml_buf = malloc (32);
|
paulo@0
|
370 assert (xml_buf != NULL);
|
paulo@0
|
371 xml_buf_size = 32;
|
paulo@0
|
372
|
paulo@0
|
373 memset (&zxml, 0, sizeof (zxml));
|
paulo@0
|
374 #endif /* USE_LIBXML2 */
|
paulo@0
|
375 }
|
paulo@0
|
376
|
paulo@0
|
377 void gt_xml_cleanup (void)
|
paulo@0
|
378 {
|
paulo@0
|
379 #ifdef USE_LIBXML2
|
paulo@0
|
380 free (xml_buf);
|
paulo@0
|
381 xml_buf = NULL;
|
paulo@0
|
382 xml_buf_size = 0;
|
paulo@0
|
383 #endif /* USE_LIBXML2 */
|
paulo@0
|
384 }
|