Mercurial > hg > index.fcgi > gift-gnutella > gift-gnutella-0.0.11-1pba
comparison src/xml.c @ 0:d39e1d0d75b6
initial add
author | paulo@hit-nxdomain.opendns.com |
---|---|
date | Sat, 20 Feb 2010 21:18:28 -0800 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6d09d72091b2 |
---|---|
1 /* | |
2 * $Id: xml.c,v 1.10 2004/04/13 07:25:18 hipnod Exp $ | |
3 * | |
4 * Copyright (C) 2001-2003 giFT project (gift.sourceforge.net) | |
5 * | |
6 * This program is free software; you can redistribute it and/or modify it | |
7 * under the terms of the GNU General Public License as published by the | |
8 * Free Software Foundation; either version 2, or (at your option) any | |
9 * later version. | |
10 * | |
11 * This program is distributed in the hope that it will be useful, but | |
12 * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 * General Public License for more details. | |
15 */ | |
16 | |
17 #include "gt_gnutella.h" | |
18 | |
19 #include <zlib.h> | |
20 | |
21 #ifdef USE_LIBXML2 | |
22 #include <libxml/parser.h> /* xmlParseMemory() */ | |
23 #include <libxml/xmlerror.h> /* xmlSetGenericErrorFunc() */ | |
24 #endif /* USE_LIBXML2 */ | |
25 | |
26 #include "xml.h" | |
27 | |
28 /*****************************************************************************/ | |
29 | |
30 #ifndef USE_LIBXML2 | |
31 BOOL gt_xml_parse (const char *xml, Dataset **ret) | |
32 { | |
33 return FALSE; | |
34 } | |
35 | |
36 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares, | |
37 size_t shares_len) | |
38 { | |
39 return FALSE; | |
40 } | |
41 #endif /* !USE_LIBXML2 */ | |
42 | |
43 /*****************************************************************************/ | |
44 | |
45 /* the rest of this file is conditional on using libxml */ | |
46 #ifdef USE_LIBXML2 | |
47 | |
48 /*****************************************************************************/ | |
49 | |
50 #define MAX_XML_BUFSIZE 65536 | |
51 | |
52 static char *xml_buf; /* for decompressing xml */ | |
53 static size_t xml_buf_size; | |
54 static z_stream zxml; | |
55 | |
56 /*****************************************************************************/ | |
57 | |
58 static void print_nodes (xmlNodePtr node, Dataset **ret) | |
59 { | |
60 while (node != NULL) | |
61 { | |
62 /* | |
63 * If this node has no children, it is a leaf node, | |
64 * so set the metadata from it. | |
65 */ | |
66 if (node->xmlChildrenNode) | |
67 print_nodes (node->xmlChildrenNode, ret); | |
68 else | |
69 GT->DBGFN (GT, "name=%s", node->name); | |
70 | |
71 node = node->next; | |
72 } | |
73 } | |
74 | |
75 BOOL gt_xml_parse (const char *xml, Dataset **ret) | |
76 { | |
77 xmlDocPtr doc; | |
78 | |
79 /* disable for now because it doesn't work anyway: need to share | |
80 * code with parse_indexed */ | |
81 if (!XML_DEBUG) | |
82 return FALSE; | |
83 | |
84 /* only parse documents starting with '<' */ | |
85 if (!xml || xml[0] != '<') | |
86 return FALSE; | |
87 | |
88 if (!(doc = xmlParseMemory (xml, strlen (xml)))) | |
89 return FALSE; | |
90 | |
91 print_nodes (doc->xmlChildrenNode, ret); | |
92 | |
93 xmlFreeDoc (doc); | |
94 | |
95 return TRUE; | |
96 } | |
97 | |
98 static void add_child (Dataset **children, const char *key, const char *value) | |
99 { | |
100 char *dup = NULL; | |
101 | |
102 if (!key || !value) | |
103 return; | |
104 | |
105 /* | |
106 * Hack to map some of the attributes from XML documents found | |
107 * on Gnutella to ones peddled by giFT. | |
108 */ | |
109 if (!strcasecmp (key, "bitrate")) | |
110 { | |
111 dup = stringf_dup ("%s000", value); | |
112 value = dup; | |
113 } | |
114 else if (!strcasecmp (key, "seconds")) | |
115 { | |
116 key = "duration"; | |
117 } | |
118 | |
119 dataset_insertstr (children, key, value); | |
120 free (dup); | |
121 } | |
122 | |
123 static Dataset *collect_attributes (xmlNode *node) | |
124 { | |
125 const xmlAttr *attr; | |
126 Dataset *children = NULL; | |
127 BOOL do_log = XML_DEBUG; | |
128 | |
129 for (attr = node->properties; attr != NULL; attr = attr->next) | |
130 { | |
131 xmlChar *str; | |
132 | |
133 /* is there an easier way to get attribute content? */ | |
134 str = xmlGetProp (node, attr->name); | |
135 | |
136 if (do_log) | |
137 { | |
138 GT->dbg (GT, "name=%s content=%s", | |
139 (const char *)attr->name, (const char *)str); | |
140 } | |
141 | |
142 /* add the key->value pair to the dataset */ | |
143 add_child (&children, (const char *)attr->name, | |
144 (const char *)str); | |
145 | |
146 /* xmlGetProp() allocates memory */ | |
147 free (str); | |
148 } | |
149 | |
150 return children; | |
151 } | |
152 | |
153 static void set_meta_foreach (ds_data_t *key, ds_data_t *value, Share *share) | |
154 { | |
155 char *meta_key = key->data; | |
156 char *meta_val = value->data; | |
157 | |
158 share_set_meta (share, meta_key, meta_val); | |
159 } | |
160 | |
161 static void set_share_meta (Share **shares, size_t shares_len, | |
162 Dataset *children) | |
163 { | |
164 char *index_str; | |
165 size_t index; | |
166 | |
167 /* | |
168 * Lookup the "index" attribute, and use that to determine | |
169 * which Share the XML applies to. | |
170 */ | |
171 if (!(index_str = dataset_lookupstr (children, "index"))) | |
172 return; | |
173 | |
174 index = gift_strtoul (index_str); | |
175 | |
176 if (index >= shares_len) | |
177 return; | |
178 | |
179 if (!shares[index]) | |
180 return; | |
181 | |
182 /* skip the index attribute */ | |
183 dataset_removestr (children, "index"); | |
184 | |
185 dataset_foreach (children, DS_FOREACH(set_meta_foreach), shares[index]); | |
186 } | |
187 | |
188 static void set_metadata_from_indexed_xml (Share **shares, size_t shares_len, | |
189 xmlDoc *doc) | |
190 { | |
191 xmlNode *node; | |
192 | |
193 if (!(node = xmlDocGetRootElement (doc))) | |
194 return; | |
195 | |
196 for (node = node->xmlChildrenNode; node != NULL; node = node->next) | |
197 { | |
198 Dataset *children; | |
199 | |
200 children = collect_attributes (node); | |
201 | |
202 set_share_meta (shares, shares_len, children); | |
203 dataset_clear (children); | |
204 } | |
205 } | |
206 | |
207 static int try_inflate_xml (const char *xml, size_t bin_len) | |
208 { | |
209 int ret; | |
210 | |
211 /* set zlib allocation data */ | |
212 zxml.zalloc = Z_NULL; | |
213 zxml.zfree = Z_NULL; | |
214 zxml.opaque = Z_NULL; | |
215 | |
216 /* set the input parameters */ | |
217 zxml.next_in = (char *)xml; | |
218 zxml.avail_in = bin_len; | |
219 | |
220 /* set the output parameters */ | |
221 zxml.next_out = xml_buf; | |
222 zxml.avail_out = xml_buf_size - 1; | |
223 | |
224 if ((ret = inflateInit (&zxml)) != Z_OK) | |
225 return ret; | |
226 | |
227 ret = inflate (&zxml, Z_FINISH); | |
228 inflateEnd (&zxml); | |
229 | |
230 return ret; | |
231 } | |
232 | |
233 static const char *inflate_xml (const char *xml, size_t bin_len) | |
234 { | |
235 size_t xml_len; | |
236 int ret; | |
237 | |
238 ret = try_inflate_xml (xml, bin_len); | |
239 | |
240 if (ret == Z_BUF_ERROR && xml_buf_size < MAX_XML_BUFSIZE) | |
241 { | |
242 size_t newsize = xml_buf_size * 2; | |
243 char *newbuf; | |
244 | |
245 if (!(newbuf = realloc (xml_buf, newsize))) | |
246 return NULL; | |
247 | |
248 xml_buf = newbuf; | |
249 xml_buf_size = newsize; | |
250 | |
251 /* retry with bigger buffer */ | |
252 return inflate_xml (xml, bin_len); | |
253 } | |
254 | |
255 if (ret != Z_STREAM_END) | |
256 return NULL; | |
257 | |
258 /* null terminate (the now hopefully plaintext) XML */ | |
259 xml_len = (xml_buf_size - 1) - zxml.avail_out; | |
260 xml_buf[xml_len] = 0; | |
261 | |
262 if (XML_DEBUG) | |
263 GT->dbg (GT, "inflated xml: %s", xml_buf); | |
264 | |
265 return xml_buf; | |
266 } | |
267 | |
268 BOOL gt_xml_parse_indexed (const char *xml, size_t bin_len, Share **shares, | |
269 size_t shares_len) | |
270 { | |
271 xmlDoc *doc; | |
272 size_t xml_len; | |
273 const char *next; | |
274 const char *ptr; | |
275 | |
276 if (!xml || bin_len <= 4) | |
277 return FALSE; | |
278 | |
279 /* | |
280 * Look for the encoding type, currently possible | |
281 * encoding values are: "{}" meaning plain text, "{plaintext}", | |
282 * and "{deflate}". | |
283 */ | |
284 | |
285 if (!strncmp (xml, "{}", 2)) | |
286 { | |
287 xml += 2; | |
288 } | |
289 else if (bin_len >= sizeof("{plaintext}") - 1 && | |
290 !strncasecmp (xml, "{plaintext}", sizeof("{plaintext}") - 1)) | |
291 { | |
292 xml += sizeof("{plaintext}") - 1; | |
293 } | |
294 else if (bin_len >= sizeof("{deflate}") - 1 && | |
295 !strncasecmp (xml, "{deflate}", sizeof("{deflate}") - 1)) | |
296 { | |
297 /* the len passed here should be bin_len - 1, but some servents (MRPH) | |
298 * don't terminate the XML */ | |
299 xml = inflate_xml (xml + sizeof("{deflate}") - 1, bin_len); | |
300 | |
301 if (XML_DEBUG) | |
302 assert (xml != NULL); /* assume valid input */ | |
303 | |
304 if (!xml) | |
305 return FALSE; | |
306 } | |
307 | |
308 xml_len = strlen (xml); | |
309 | |
310 /* | |
311 * The XML block is a sequence of XML documents, separated by the <?xml | |
312 * version="1.0"> document prefix. Parse each one separately. | |
313 */ | |
314 for (ptr = xml; ptr != NULL; ptr = next) | |
315 { | |
316 size_t chunk_len; | |
317 | |
318 if (ptr[0] != '<') | |
319 return FALSE; | |
320 | |
321 next = strstr (ptr + 1, "<?xml"); | |
322 | |
323 chunk_len = xml_len; | |
324 if (next) | |
325 chunk_len = next - ptr; | |
326 | |
327 if (!(doc = xmlParseMemory (ptr, chunk_len))) | |
328 return FALSE; | |
329 | |
330 xml_len -= chunk_len; | |
331 | |
332 set_metadata_from_indexed_xml (shares, shares_len, doc); | |
333 xmlFreeDoc (doc); | |
334 } | |
335 | |
336 return TRUE; | |
337 } | |
338 | |
339 /* gets called when there are parsing errors */ | |
340 static void error_handler_func (void *udata, const char *msg, ...) | |
341 { | |
342 char buf[1024]; | |
343 va_list args; | |
344 | |
345 /* this is here until i figure out why i get a message about | |
346 * namespace errors (but it still seems to work...) */ | |
347 if (!XML_DEBUG) | |
348 return; | |
349 | |
350 va_start (args, msg); | |
351 vsnprintf (buf, sizeof (buf) - 1, msg, args); | |
352 va_end (args); | |
353 | |
354 GT->DBGFN (GT, "xml parse error: %s", buf); | |
355 } | |
356 | |
357 /*****************************************************************************/ | |
358 | |
359 #endif /* USE_LIBXML2 */ | |
360 | |
361 /*****************************************************************************/ | |
362 | |
363 void gt_xml_init (void) | |
364 { | |
365 #ifdef USE_LIBXML2 | |
366 /* so libxml doesn't print messages on stderr */ | |
367 xmlSetGenericErrorFunc (NULL, error_handler_func); | |
368 | |
369 xml_buf = malloc (32); | |
370 assert (xml_buf != NULL); | |
371 xml_buf_size = 32; | |
372 | |
373 memset (&zxml, 0, sizeof (zxml)); | |
374 #endif /* USE_LIBXML2 */ | |
375 } | |
376 | |
377 void gt_xml_cleanup (void) | |
378 { | |
379 #ifdef USE_LIBXML2 | |
380 free (xml_buf); | |
381 xml_buf = NULL; | |
382 xml_buf_size = 0; | |
383 #endif /* USE_LIBXML2 */ | |
384 } |