blob: 654dd559b6681708fdb83e14734f8a6583e18325 [file] [log] [blame]
Tristan Matthews0a329cc2013-07-17 13:20:14 -04001/* $Id$ */
2/*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <pjlib-util/xml.h>
21#include <pjlib-util/scanner.h>
22#include <pj/except.h>
23#include <pj/pool.h>
24#include <pj/string.h>
25#include <pj/log.h>
26#include <pj/os.h>
27
28#define EX_SYNTAX_ERROR 12
29#define THIS_FILE "xml.c"
30
31static void on_syntax_error(struct pj_scanner *scanner)
32{
33 PJ_UNUSED_ARG(scanner);
34 PJ_THROW(EX_SYNTAX_ERROR);
35}
36
37static pj_xml_node *alloc_node( pj_pool_t *pool )
38{
39 pj_xml_node *node;
40
41 node = PJ_POOL_ZALLOC_T(pool, pj_xml_node);
42 pj_list_init( &node->attr_head );
43 pj_list_init( &node->node_head );
44
45 return node;
46}
47
48static pj_xml_attr *alloc_attr( pj_pool_t *pool )
49{
50 return PJ_POOL_ZALLOC_T(pool, pj_xml_attr);
51}
52
53/* This is a recursive function! */
54static pj_xml_node *xml_parse_node( pj_pool_t *pool, pj_scanner *scanner)
55{
56 pj_xml_node *node;
57 pj_str_t end_name;
58
59 PJ_CHECK_STACK();
60
61 if (*scanner->curptr != '<')
62 on_syntax_error(scanner);
63
64 /* Handle Processing Instructino (PI) construct (i.e. "<?") */
65 if (*scanner->curptr == '<' && *(scanner->curptr+1) == '?') {
66 pj_scan_advance_n(scanner, 2, PJ_FALSE);
67 for (;;) {
68 pj_str_t dummy;
69 pj_scan_get_until_ch(scanner, '?', &dummy);
70 if (*scanner->curptr=='?' && *(scanner->curptr+1)=='>') {
71 pj_scan_advance_n(scanner, 2, PJ_TRUE);
72 break;
73 } else {
74 pj_scan_advance_n(scanner, 1, PJ_FALSE);
75 }
76 }
77 return xml_parse_node(pool, scanner);
78 }
79
80 /* Handle comments construct (i.e. "<!") */
81 if (pj_scan_strcmp(scanner, "<!", 2) == 0) {
82 pj_scan_advance_n(scanner, 2, PJ_FALSE);
83 for (;;) {
84 pj_str_t dummy;
85 pj_scan_get_until_ch(scanner, '>', &dummy);
86 if (pj_scan_strcmp(scanner, ">", 1) == 0) {
87 pj_scan_advance_n(scanner, 1, PJ_TRUE);
88 break;
89 } else {
90 pj_scan_advance_n(scanner, 1, PJ_FALSE);
91 }
92 }
93 return xml_parse_node(pool, scanner);
94 }
95
96 /* Alloc node. */
97 node = alloc_node(pool);
98
99 /* Get '<' */
100 pj_scan_get_char(scanner);
101
102 /* Get node name. */
103 pj_scan_get_until_chr( scanner, " />\t\r\n", &node->name);
104
105 /* Get attributes. */
106 while (*scanner->curptr != '>' && *scanner->curptr != '/') {
107 pj_xml_attr *attr = alloc_attr(pool);
108
109 pj_scan_get_until_chr( scanner, "=> \t\r\n", &attr->name);
110 if (*scanner->curptr == '=') {
111 pj_scan_get_char( scanner );
112 pj_scan_get_quotes(scanner, "\"'", "\"'", 2, &attr->value);
113 /* remove quote characters */
114 ++attr->value.ptr;
115 attr->value.slen -= 2;
116 }
117
118 pj_list_push_back( &node->attr_head, attr );
119 }
120
121 if (*scanner->curptr == '/') {
122 pj_scan_get_char(scanner);
123 if (pj_scan_get_char(scanner) != '>')
124 on_syntax_error(scanner);
125 return node;
126 }
127
128 /* Enclosing bracket. */
129 if (pj_scan_get_char(scanner) != '>')
130 on_syntax_error(scanner);
131
132 /* Sub nodes. */
133 while (*scanner->curptr == '<' && *(scanner->curptr+1) != '/') {
134 pj_xml_node *sub_node = xml_parse_node(pool, scanner);
135 pj_list_push_back( &node->node_head, sub_node );
136 }
137
138 /* Content. */
139 if (!pj_scan_is_eof(scanner) && *scanner->curptr != '<') {
140 pj_scan_get_until_ch(scanner, '<', &node->content);
141 }
142
143 /* Enclosing node. */
144 if (pj_scan_get_char(scanner) != '<' || pj_scan_get_char(scanner) != '/')
145 on_syntax_error(scanner);
146
147 pj_scan_get_until_chr(scanner, " \t>", &end_name);
148
149 /* Compare name. */
150 if (pj_stricmp(&node->name, &end_name) != 0)
151 on_syntax_error(scanner);
152
153 /* Enclosing '>' */
154 if (pj_scan_get_char(scanner) != '>')
155 on_syntax_error(scanner);
156
157 return node;
158}
159
160PJ_DEF(pj_xml_node*) pj_xml_parse( pj_pool_t *pool, char *msg, pj_size_t len)
161{
162 pj_xml_node *node = NULL;
163 pj_scanner scanner;
164 PJ_USE_EXCEPTION;
165
166 if (!msg || !len || !pool)
167 return NULL;
168
169 pj_scan_init( &scanner, msg, len,
170 PJ_SCAN_AUTOSKIP_WS|PJ_SCAN_AUTOSKIP_NEWLINE,
171 &on_syntax_error);
172 PJ_TRY {
173 node = xml_parse_node(pool, &scanner);
174 }
175 PJ_CATCH_ANY {
176 PJ_LOG(4,(THIS_FILE, "Syntax error parsing XML in line %d column %d",
177 scanner.line, pj_scan_get_col(&scanner)));
178 }
179 PJ_END;
180 pj_scan_fini( &scanner );
181 return node;
182}
183
184/* This is a recursive function. */
185static int xml_print_node( const pj_xml_node *node, int indent,
186 char *buf, pj_size_t len )
187{
188 int i;
189 char *p = buf;
190 pj_xml_attr *attr;
191 pj_xml_node *sub_node;
192
193#define SIZE_LEFT() ((int)(len - (p-buf)))
194
195 PJ_CHECK_STACK();
196
197 /* Print name. */
198 if (SIZE_LEFT() < node->name.slen + indent + 5)
199 return -1;
200 for (i=0; i<indent; ++i)
201 *p++ = ' ';
202 *p++ = '<';
203 pj_memcpy(p, node->name.ptr, node->name.slen);
204 p += node->name.slen;
205
206 /* Print attributes. */
207 attr = node->attr_head.next;
208 while (attr != &node->attr_head) {
209
210 if (SIZE_LEFT() < attr->name.slen + attr->value.slen + 4)
211 return -1;
212
213 *p++ = ' ';
214
215 /* Attribute name. */
216 pj_memcpy(p, attr->name.ptr, attr->name.slen);
217 p += attr->name.slen;
218
219 /* Attribute value. */
220 if (attr->value.slen) {
221 *p++ = '=';
222 *p++ = '"';
223 pj_memcpy(p, attr->value.ptr, attr->value.slen);
224 p += attr->value.slen;
225 *p++ = '"';
226 }
227
228 attr = attr->next;
229 }
230
231 /* Check for empty node. */
232 if (node->content.slen==0 &&
233 node->node_head.next==(pj_xml_node*)&node->node_head)
234 {
235 *p++ = ' ';
236 *p++ = '/';
237 *p++ = '>';
238 return (int)(p-buf);
239 }
240
241 /* Enclosing '>' */
242 if (SIZE_LEFT() < 1) return -1;
243 *p++ = '>';
244
245 /* Print sub nodes. */
246 sub_node = node->node_head.next;
247 while (sub_node != (pj_xml_node*)&node->node_head) {
248 int printed;
249
250 if (SIZE_LEFT() < indent + 3)
251 return -1;
252 //*p++ = '\r';
253 *p++ = '\n';
254
255 printed = xml_print_node(sub_node, indent + 1, p, SIZE_LEFT());
256 if (printed < 0)
257 return -1;
258
259 p += printed;
260 sub_node = sub_node->next;
261 }
262
263 /* Content. */
264 if (node->content.slen) {
265 if (SIZE_LEFT() < node->content.slen) return -1;
266 pj_memcpy(p, node->content.ptr, node->content.slen);
267 p += node->content.slen;
268 }
269
270 /* Enclosing node. */
271 if (node->node_head.next != (pj_xml_node*)&node->node_head) {
272 if (SIZE_LEFT() < node->name.slen + 5 + indent)
273 return -1;
274 //*p++ = '\r';
275 *p++ = '\n';
276 for (i=0; i<indent; ++i)
277 *p++ = ' ';
278 } else {
279 if (SIZE_LEFT() < node->name.slen + 3)
280 return -1;
281 }
282 *p++ = '<';
283 *p++ = '/';
284 pj_memcpy(p, node->name.ptr, node->name.slen);
285 p += node->name.slen;
286 *p++ = '>';
287
288#undef SIZE_LEFT
289
290 return (int)(p-buf);
291}
292
293PJ_DEF(int) pj_xml_print(const pj_xml_node *node, char *buf, pj_size_t len,
294 pj_bool_t include_prolog)
295{
296 int prolog_len = 0;
297 int printed;
298
299 if (!node || !buf || !len)
300 return 0;
301
302 if (include_prolog) {
303 pj_str_t prolog = {"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39};
304 if ((int)len < prolog.slen)
305 return -1;
306 pj_memcpy(buf, prolog.ptr, prolog.slen);
307 prolog_len = (int)prolog.slen;
308 }
309
310 printed = xml_print_node(node, 0, buf+prolog_len, len-prolog_len) + prolog_len;
311 if (printed > 0 && len-printed >= 1) {
312 buf[printed++] = '\n';
313 }
314 return printed;
315}
316
317PJ_DEF(pj_xml_node*) pj_xml_node_new(pj_pool_t *pool, const pj_str_t *name)
318{
319 pj_xml_node *node = alloc_node(pool);
320 pj_strdup(pool, &node->name, name);
321 return node;
322}
323
324PJ_DEF(pj_xml_attr*) pj_xml_attr_new( pj_pool_t *pool, const pj_str_t *name,
325 const pj_str_t *value)
326{
327 pj_xml_attr *attr = alloc_attr(pool);
328 pj_strdup( pool, &attr->name, name);
329 pj_strdup( pool, &attr->value, value);
330 return attr;
331}
332
333PJ_DEF(void) pj_xml_add_node( pj_xml_node *parent, pj_xml_node *node )
334{
335 pj_list_push_back(&parent->node_head, node);
336}
337
338PJ_DEF(void) pj_xml_add_attr( pj_xml_node *node, pj_xml_attr *attr )
339{
340 pj_list_push_back(&node->attr_head, attr);
341}
342
343PJ_DEF(pj_xml_node*) pj_xml_find_node(const pj_xml_node *parent,
344 const pj_str_t *name)
345{
346 const pj_xml_node *node = parent->node_head.next;
347
348 PJ_CHECK_STACK();
349
350 while (node != (void*)&parent->node_head) {
351 if (pj_stricmp(&node->name, name) == 0)
352 return (pj_xml_node*)node;
353 node = node->next;
354 }
355 return NULL;
356}
357
358PJ_DEF(pj_xml_node*) pj_xml_find_node_rec(const pj_xml_node *parent,
359 const pj_str_t *name)
360{
361 const pj_xml_node *node = parent->node_head.next;
362
363 PJ_CHECK_STACK();
364
365 while (node != (void*)&parent->node_head) {
366 pj_xml_node *found;
367 if (pj_stricmp(&node->name, name) == 0)
368 return (pj_xml_node*)node;
369 found = pj_xml_find_node_rec(node, name);
370 if (found)
371 return (pj_xml_node*)found;
372 node = node->next;
373 }
374 return NULL;
375}
376
377PJ_DEF(pj_xml_node*) pj_xml_find_next_node( const pj_xml_node *parent,
378 const pj_xml_node *node,
379 const pj_str_t *name)
380{
381 PJ_CHECK_STACK();
382
383 node = node->next;
384 while (node != (void*)&parent->node_head) {
385 if (pj_stricmp(&node->name, name) == 0)
386 return (pj_xml_node*)node;
387 node = node->next;
388 }
389 return NULL;
390}
391
392
393PJ_DEF(pj_xml_attr*) pj_xml_find_attr( const pj_xml_node *node,
394 const pj_str_t *name,
395 const pj_str_t *value)
396{
397 const pj_xml_attr *attr = node->attr_head.next;
398 while (attr != (void*)&node->attr_head) {
399 if (pj_stricmp(&attr->name, name)==0) {
400 if (value) {
401 if (pj_stricmp(&attr->value, value)==0)
402 return (pj_xml_attr*)attr;
403 } else {
404 return (pj_xml_attr*)attr;
405 }
406 }
407 attr = attr->next;
408 }
409 return NULL;
410}
411
412
413
414PJ_DEF(pj_xml_node*) pj_xml_find( const pj_xml_node *parent,
415 const pj_str_t *name,
416 const void *data,
417 pj_bool_t (*match)(const pj_xml_node *,
418 const void*))
419{
420 const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
421
422 if (!name && !match)
423 return NULL;
424
425 while (node != (const pj_xml_node*) &parent->node_head) {
426 if (name) {
427 if (pj_stricmp(&node->name, name)!=0) {
428 node = node->next;
429 continue;
430 }
431 }
432 if (match) {
433 if (match(node, data))
434 return (pj_xml_node*)node;
435 } else {
436 return (pj_xml_node*)node;
437 }
438
439 node = node->next;
440 }
441 return NULL;
442}
443
444PJ_DEF(pj_xml_node*) pj_xml_find_rec( const pj_xml_node *parent,
445 const pj_str_t *name,
446 const void *data,
447 pj_bool_t (*match)(const pj_xml_node*,
448 const void*))
449{
450 const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next;
451
452 if (!name && !match)
453 return NULL;
454
455 while (node != (const pj_xml_node*) &parent->node_head) {
456 pj_xml_node *found;
457
458 if (name) {
459 if (pj_stricmp(&node->name, name)==0) {
460 if (match) {
461 if (match(node, data))
462 return (pj_xml_node*)node;
463 } else {
464 return (pj_xml_node*)node;
465 }
466 }
467
468 } else if (match) {
469 if (match(node, data))
470 return (pj_xml_node*)node;
471 }
472
473 found = pj_xml_find_rec(node, name, data, match);
474 if (found)
475 return found;
476
477 node = node->next;
478 }
479 return NULL;
480}
481
482PJ_DEF(pj_xml_node*) pj_xml_clone( pj_pool_t *pool, const pj_xml_node *rhs)
483{
484 pj_xml_node *node;
485 const pj_xml_attr *r_attr;
486 const pj_xml_node *child;
487
488 node = alloc_node(pool);
489
490 pj_strdup(pool, &node->name, &rhs->name);
491 pj_strdup(pool, &node->content, &rhs->content);
492
493 /* Clone all attributes */
494 r_attr = rhs->attr_head.next;
495 while (r_attr != &rhs->attr_head) {
496
497 pj_xml_attr *attr;
498
499 attr = alloc_attr(pool);
500 pj_strdup(pool, &attr->name, &r_attr->name);
501 pj_strdup(pool, &attr->value, &r_attr->value);
502
503 pj_list_push_back(&node->attr_head, attr);
504
505 r_attr = r_attr->next;
506 }
507
508 /* Clone all child nodes. */
509 child = rhs->node_head.next;
510 while (child != (pj_xml_node*) &rhs->node_head) {
511 pj_xml_node *new_child;
512
513 new_child = pj_xml_clone(pool, child);
514 pj_list_push_back(&node->node_head, new_child);
515
516 child = child->next;
517 }
518
519 return node;
520}