Tristan Matthews | 0a329cc | 2013-07-17 13:20:14 -0400 | [diff] [blame] | 1 | /* $Id$ */ |
| 2 | /* |
| 3 | * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com) |
| 4 | * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by |
| 8 | * the Free Software Foundation; either version 2 of the License, or |
| 9 | * (at your option) any later version. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * GNU General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License |
| 17 | * along with this program; if not, write to the Free Software |
| 18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
| 19 | */ |
| 20 | #include <pjlib-util/xml.h> |
| 21 | #include <pjlib-util/scanner.h> |
| 22 | #include <pj/except.h> |
| 23 | #include <pj/pool.h> |
| 24 | #include <pj/string.h> |
| 25 | #include <pj/log.h> |
| 26 | #include <pj/os.h> |
| 27 | |
| 28 | #define EX_SYNTAX_ERROR 12 |
| 29 | #define THIS_FILE "xml.c" |
| 30 | |
| 31 | static void on_syntax_error(struct pj_scanner *scanner) |
| 32 | { |
| 33 | PJ_UNUSED_ARG(scanner); |
| 34 | PJ_THROW(EX_SYNTAX_ERROR); |
| 35 | } |
| 36 | |
| 37 | static pj_xml_node *alloc_node( pj_pool_t *pool ) |
| 38 | { |
| 39 | pj_xml_node *node; |
| 40 | |
| 41 | node = PJ_POOL_ZALLOC_T(pool, pj_xml_node); |
| 42 | pj_list_init( &node->attr_head ); |
| 43 | pj_list_init( &node->node_head ); |
| 44 | |
| 45 | return node; |
| 46 | } |
| 47 | |
| 48 | static pj_xml_attr *alloc_attr( pj_pool_t *pool ) |
| 49 | { |
| 50 | return PJ_POOL_ZALLOC_T(pool, pj_xml_attr); |
| 51 | } |
| 52 | |
| 53 | /* This is a recursive function! */ |
| 54 | static pj_xml_node *xml_parse_node( pj_pool_t *pool, pj_scanner *scanner) |
| 55 | { |
| 56 | pj_xml_node *node; |
| 57 | pj_str_t end_name; |
| 58 | |
| 59 | PJ_CHECK_STACK(); |
| 60 | |
| 61 | if (*scanner->curptr != '<') |
| 62 | on_syntax_error(scanner); |
| 63 | |
| 64 | /* Handle Processing Instructino (PI) construct (i.e. "<?") */ |
| 65 | if (*scanner->curptr == '<' && *(scanner->curptr+1) == '?') { |
| 66 | pj_scan_advance_n(scanner, 2, PJ_FALSE); |
| 67 | for (;;) { |
| 68 | pj_str_t dummy; |
| 69 | pj_scan_get_until_ch(scanner, '?', &dummy); |
| 70 | if (*scanner->curptr=='?' && *(scanner->curptr+1)=='>') { |
| 71 | pj_scan_advance_n(scanner, 2, PJ_TRUE); |
| 72 | break; |
| 73 | } else { |
| 74 | pj_scan_advance_n(scanner, 1, PJ_FALSE); |
| 75 | } |
| 76 | } |
| 77 | return xml_parse_node(pool, scanner); |
| 78 | } |
| 79 | |
| 80 | /* Handle comments construct (i.e. "<!") */ |
| 81 | if (pj_scan_strcmp(scanner, "<!", 2) == 0) { |
| 82 | pj_scan_advance_n(scanner, 2, PJ_FALSE); |
| 83 | for (;;) { |
| 84 | pj_str_t dummy; |
| 85 | pj_scan_get_until_ch(scanner, '>', &dummy); |
| 86 | if (pj_scan_strcmp(scanner, ">", 1) == 0) { |
| 87 | pj_scan_advance_n(scanner, 1, PJ_TRUE); |
| 88 | break; |
| 89 | } else { |
| 90 | pj_scan_advance_n(scanner, 1, PJ_FALSE); |
| 91 | } |
| 92 | } |
| 93 | return xml_parse_node(pool, scanner); |
| 94 | } |
| 95 | |
| 96 | /* Alloc node. */ |
| 97 | node = alloc_node(pool); |
| 98 | |
| 99 | /* Get '<' */ |
| 100 | pj_scan_get_char(scanner); |
| 101 | |
| 102 | /* Get node name. */ |
| 103 | pj_scan_get_until_chr( scanner, " />\t\r\n", &node->name); |
| 104 | |
| 105 | /* Get attributes. */ |
| 106 | while (*scanner->curptr != '>' && *scanner->curptr != '/') { |
| 107 | pj_xml_attr *attr = alloc_attr(pool); |
| 108 | |
| 109 | pj_scan_get_until_chr( scanner, "=> \t\r\n", &attr->name); |
| 110 | if (*scanner->curptr == '=') { |
| 111 | pj_scan_get_char( scanner ); |
| 112 | pj_scan_get_quotes(scanner, "\"'", "\"'", 2, &attr->value); |
| 113 | /* remove quote characters */ |
| 114 | ++attr->value.ptr; |
| 115 | attr->value.slen -= 2; |
| 116 | } |
| 117 | |
| 118 | pj_list_push_back( &node->attr_head, attr ); |
| 119 | } |
| 120 | |
| 121 | if (*scanner->curptr == '/') { |
| 122 | pj_scan_get_char(scanner); |
| 123 | if (pj_scan_get_char(scanner) != '>') |
| 124 | on_syntax_error(scanner); |
| 125 | return node; |
| 126 | } |
| 127 | |
| 128 | /* Enclosing bracket. */ |
| 129 | if (pj_scan_get_char(scanner) != '>') |
| 130 | on_syntax_error(scanner); |
| 131 | |
| 132 | /* Sub nodes. */ |
| 133 | while (*scanner->curptr == '<' && *(scanner->curptr+1) != '/') { |
| 134 | pj_xml_node *sub_node = xml_parse_node(pool, scanner); |
| 135 | pj_list_push_back( &node->node_head, sub_node ); |
| 136 | } |
| 137 | |
| 138 | /* Content. */ |
| 139 | if (!pj_scan_is_eof(scanner) && *scanner->curptr != '<') { |
| 140 | pj_scan_get_until_ch(scanner, '<', &node->content); |
| 141 | } |
| 142 | |
| 143 | /* Enclosing node. */ |
| 144 | if (pj_scan_get_char(scanner) != '<' || pj_scan_get_char(scanner) != '/') |
| 145 | on_syntax_error(scanner); |
| 146 | |
| 147 | pj_scan_get_until_chr(scanner, " \t>", &end_name); |
| 148 | |
| 149 | /* Compare name. */ |
| 150 | if (pj_stricmp(&node->name, &end_name) != 0) |
| 151 | on_syntax_error(scanner); |
| 152 | |
| 153 | /* Enclosing '>' */ |
| 154 | if (pj_scan_get_char(scanner) != '>') |
| 155 | on_syntax_error(scanner); |
| 156 | |
| 157 | return node; |
| 158 | } |
| 159 | |
| 160 | PJ_DEF(pj_xml_node*) pj_xml_parse( pj_pool_t *pool, char *msg, pj_size_t len) |
| 161 | { |
| 162 | pj_xml_node *node = NULL; |
| 163 | pj_scanner scanner; |
| 164 | PJ_USE_EXCEPTION; |
| 165 | |
| 166 | if (!msg || !len || !pool) |
| 167 | return NULL; |
| 168 | |
| 169 | pj_scan_init( &scanner, msg, len, |
| 170 | PJ_SCAN_AUTOSKIP_WS|PJ_SCAN_AUTOSKIP_NEWLINE, |
| 171 | &on_syntax_error); |
| 172 | PJ_TRY { |
| 173 | node = xml_parse_node(pool, &scanner); |
| 174 | } |
| 175 | PJ_CATCH_ANY { |
| 176 | PJ_LOG(4,(THIS_FILE, "Syntax error parsing XML in line %d column %d", |
| 177 | scanner.line, pj_scan_get_col(&scanner))); |
| 178 | } |
| 179 | PJ_END; |
| 180 | pj_scan_fini( &scanner ); |
| 181 | return node; |
| 182 | } |
| 183 | |
| 184 | /* This is a recursive function. */ |
| 185 | static int xml_print_node( const pj_xml_node *node, int indent, |
| 186 | char *buf, pj_size_t len ) |
| 187 | { |
| 188 | int i; |
| 189 | char *p = buf; |
| 190 | pj_xml_attr *attr; |
| 191 | pj_xml_node *sub_node; |
| 192 | |
| 193 | #define SIZE_LEFT() ((int)(len - (p-buf))) |
| 194 | |
| 195 | PJ_CHECK_STACK(); |
| 196 | |
| 197 | /* Print name. */ |
| 198 | if (SIZE_LEFT() < node->name.slen + indent + 5) |
| 199 | return -1; |
| 200 | for (i=0; i<indent; ++i) |
| 201 | *p++ = ' '; |
| 202 | *p++ = '<'; |
| 203 | pj_memcpy(p, node->name.ptr, node->name.slen); |
| 204 | p += node->name.slen; |
| 205 | |
| 206 | /* Print attributes. */ |
| 207 | attr = node->attr_head.next; |
| 208 | while (attr != &node->attr_head) { |
| 209 | |
| 210 | if (SIZE_LEFT() < attr->name.slen + attr->value.slen + 4) |
| 211 | return -1; |
| 212 | |
| 213 | *p++ = ' '; |
| 214 | |
| 215 | /* Attribute name. */ |
| 216 | pj_memcpy(p, attr->name.ptr, attr->name.slen); |
| 217 | p += attr->name.slen; |
| 218 | |
| 219 | /* Attribute value. */ |
| 220 | if (attr->value.slen) { |
| 221 | *p++ = '='; |
| 222 | *p++ = '"'; |
| 223 | pj_memcpy(p, attr->value.ptr, attr->value.slen); |
| 224 | p += attr->value.slen; |
| 225 | *p++ = '"'; |
| 226 | } |
| 227 | |
| 228 | attr = attr->next; |
| 229 | } |
| 230 | |
| 231 | /* Check for empty node. */ |
| 232 | if (node->content.slen==0 && |
| 233 | node->node_head.next==(pj_xml_node*)&node->node_head) |
| 234 | { |
| 235 | *p++ = ' '; |
| 236 | *p++ = '/'; |
| 237 | *p++ = '>'; |
| 238 | return (int)(p-buf); |
| 239 | } |
| 240 | |
| 241 | /* Enclosing '>' */ |
| 242 | if (SIZE_LEFT() < 1) return -1; |
| 243 | *p++ = '>'; |
| 244 | |
| 245 | /* Print sub nodes. */ |
| 246 | sub_node = node->node_head.next; |
| 247 | while (sub_node != (pj_xml_node*)&node->node_head) { |
| 248 | int printed; |
| 249 | |
| 250 | if (SIZE_LEFT() < indent + 3) |
| 251 | return -1; |
| 252 | //*p++ = '\r'; |
| 253 | *p++ = '\n'; |
| 254 | |
| 255 | printed = xml_print_node(sub_node, indent + 1, p, SIZE_LEFT()); |
| 256 | if (printed < 0) |
| 257 | return -1; |
| 258 | |
| 259 | p += printed; |
| 260 | sub_node = sub_node->next; |
| 261 | } |
| 262 | |
| 263 | /* Content. */ |
| 264 | if (node->content.slen) { |
| 265 | if (SIZE_LEFT() < node->content.slen) return -1; |
| 266 | pj_memcpy(p, node->content.ptr, node->content.slen); |
| 267 | p += node->content.slen; |
| 268 | } |
| 269 | |
| 270 | /* Enclosing node. */ |
| 271 | if (node->node_head.next != (pj_xml_node*)&node->node_head) { |
| 272 | if (SIZE_LEFT() < node->name.slen + 5 + indent) |
| 273 | return -1; |
| 274 | //*p++ = '\r'; |
| 275 | *p++ = '\n'; |
| 276 | for (i=0; i<indent; ++i) |
| 277 | *p++ = ' '; |
| 278 | } else { |
| 279 | if (SIZE_LEFT() < node->name.slen + 3) |
| 280 | return -1; |
| 281 | } |
| 282 | *p++ = '<'; |
| 283 | *p++ = '/'; |
| 284 | pj_memcpy(p, node->name.ptr, node->name.slen); |
| 285 | p += node->name.slen; |
| 286 | *p++ = '>'; |
| 287 | |
| 288 | #undef SIZE_LEFT |
| 289 | |
| 290 | return (int)(p-buf); |
| 291 | } |
| 292 | |
| 293 | PJ_DEF(int) pj_xml_print(const pj_xml_node *node, char *buf, pj_size_t len, |
| 294 | pj_bool_t include_prolog) |
| 295 | { |
| 296 | int prolog_len = 0; |
| 297 | int printed; |
| 298 | |
| 299 | if (!node || !buf || !len) |
| 300 | return 0; |
| 301 | |
| 302 | if (include_prolog) { |
| 303 | pj_str_t prolog = {"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39}; |
| 304 | if ((int)len < prolog.slen) |
| 305 | return -1; |
| 306 | pj_memcpy(buf, prolog.ptr, prolog.slen); |
| 307 | prolog_len = (int)prolog.slen; |
| 308 | } |
| 309 | |
| 310 | printed = xml_print_node(node, 0, buf+prolog_len, len-prolog_len) + prolog_len; |
| 311 | if (printed > 0 && len-printed >= 1) { |
| 312 | buf[printed++] = '\n'; |
| 313 | } |
| 314 | return printed; |
| 315 | } |
| 316 | |
| 317 | PJ_DEF(pj_xml_node*) pj_xml_node_new(pj_pool_t *pool, const pj_str_t *name) |
| 318 | { |
| 319 | pj_xml_node *node = alloc_node(pool); |
| 320 | pj_strdup(pool, &node->name, name); |
| 321 | return node; |
| 322 | } |
| 323 | |
| 324 | PJ_DEF(pj_xml_attr*) pj_xml_attr_new( pj_pool_t *pool, const pj_str_t *name, |
| 325 | const pj_str_t *value) |
| 326 | { |
| 327 | pj_xml_attr *attr = alloc_attr(pool); |
| 328 | pj_strdup( pool, &attr->name, name); |
| 329 | pj_strdup( pool, &attr->value, value); |
| 330 | return attr; |
| 331 | } |
| 332 | |
| 333 | PJ_DEF(void) pj_xml_add_node( pj_xml_node *parent, pj_xml_node *node ) |
| 334 | { |
| 335 | pj_list_push_back(&parent->node_head, node); |
| 336 | } |
| 337 | |
| 338 | PJ_DEF(void) pj_xml_add_attr( pj_xml_node *node, pj_xml_attr *attr ) |
| 339 | { |
| 340 | pj_list_push_back(&node->attr_head, attr); |
| 341 | } |
| 342 | |
| 343 | PJ_DEF(pj_xml_node*) pj_xml_find_node(const pj_xml_node *parent, |
| 344 | const pj_str_t *name) |
| 345 | { |
| 346 | const pj_xml_node *node = parent->node_head.next; |
| 347 | |
| 348 | PJ_CHECK_STACK(); |
| 349 | |
| 350 | while (node != (void*)&parent->node_head) { |
| 351 | if (pj_stricmp(&node->name, name) == 0) |
| 352 | return (pj_xml_node*)node; |
| 353 | node = node->next; |
| 354 | } |
| 355 | return NULL; |
| 356 | } |
| 357 | |
| 358 | PJ_DEF(pj_xml_node*) pj_xml_find_node_rec(const pj_xml_node *parent, |
| 359 | const pj_str_t *name) |
| 360 | { |
| 361 | const pj_xml_node *node = parent->node_head.next; |
| 362 | |
| 363 | PJ_CHECK_STACK(); |
| 364 | |
| 365 | while (node != (void*)&parent->node_head) { |
| 366 | pj_xml_node *found; |
| 367 | if (pj_stricmp(&node->name, name) == 0) |
| 368 | return (pj_xml_node*)node; |
| 369 | found = pj_xml_find_node_rec(node, name); |
| 370 | if (found) |
| 371 | return (pj_xml_node*)found; |
| 372 | node = node->next; |
| 373 | } |
| 374 | return NULL; |
| 375 | } |
| 376 | |
| 377 | PJ_DEF(pj_xml_node*) pj_xml_find_next_node( const pj_xml_node *parent, |
| 378 | const pj_xml_node *node, |
| 379 | const pj_str_t *name) |
| 380 | { |
| 381 | PJ_CHECK_STACK(); |
| 382 | |
| 383 | node = node->next; |
| 384 | while (node != (void*)&parent->node_head) { |
| 385 | if (pj_stricmp(&node->name, name) == 0) |
| 386 | return (pj_xml_node*)node; |
| 387 | node = node->next; |
| 388 | } |
| 389 | return NULL; |
| 390 | } |
| 391 | |
| 392 | |
| 393 | PJ_DEF(pj_xml_attr*) pj_xml_find_attr( const pj_xml_node *node, |
| 394 | const pj_str_t *name, |
| 395 | const pj_str_t *value) |
| 396 | { |
| 397 | const pj_xml_attr *attr = node->attr_head.next; |
| 398 | while (attr != (void*)&node->attr_head) { |
| 399 | if (pj_stricmp(&attr->name, name)==0) { |
| 400 | if (value) { |
| 401 | if (pj_stricmp(&attr->value, value)==0) |
| 402 | return (pj_xml_attr*)attr; |
| 403 | } else { |
| 404 | return (pj_xml_attr*)attr; |
| 405 | } |
| 406 | } |
| 407 | attr = attr->next; |
| 408 | } |
| 409 | return NULL; |
| 410 | } |
| 411 | |
| 412 | |
| 413 | |
| 414 | PJ_DEF(pj_xml_node*) pj_xml_find( const pj_xml_node *parent, |
| 415 | const pj_str_t *name, |
| 416 | const void *data, |
| 417 | pj_bool_t (*match)(const pj_xml_node *, |
| 418 | const void*)) |
| 419 | { |
| 420 | const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next; |
| 421 | |
| 422 | if (!name && !match) |
| 423 | return NULL; |
| 424 | |
| 425 | while (node != (const pj_xml_node*) &parent->node_head) { |
| 426 | if (name) { |
| 427 | if (pj_stricmp(&node->name, name)!=0) { |
| 428 | node = node->next; |
| 429 | continue; |
| 430 | } |
| 431 | } |
| 432 | if (match) { |
| 433 | if (match(node, data)) |
| 434 | return (pj_xml_node*)node; |
| 435 | } else { |
| 436 | return (pj_xml_node*)node; |
| 437 | } |
| 438 | |
| 439 | node = node->next; |
| 440 | } |
| 441 | return NULL; |
| 442 | } |
| 443 | |
| 444 | PJ_DEF(pj_xml_node*) pj_xml_find_rec( const pj_xml_node *parent, |
| 445 | const pj_str_t *name, |
| 446 | const void *data, |
| 447 | pj_bool_t (*match)(const pj_xml_node*, |
| 448 | const void*)) |
| 449 | { |
| 450 | const pj_xml_node *node = (const pj_xml_node *)parent->node_head.next; |
| 451 | |
| 452 | if (!name && !match) |
| 453 | return NULL; |
| 454 | |
| 455 | while (node != (const pj_xml_node*) &parent->node_head) { |
| 456 | pj_xml_node *found; |
| 457 | |
| 458 | if (name) { |
| 459 | if (pj_stricmp(&node->name, name)==0) { |
| 460 | if (match) { |
| 461 | if (match(node, data)) |
| 462 | return (pj_xml_node*)node; |
| 463 | } else { |
| 464 | return (pj_xml_node*)node; |
| 465 | } |
| 466 | } |
| 467 | |
| 468 | } else if (match) { |
| 469 | if (match(node, data)) |
| 470 | return (pj_xml_node*)node; |
| 471 | } |
| 472 | |
| 473 | found = pj_xml_find_rec(node, name, data, match); |
| 474 | if (found) |
| 475 | return found; |
| 476 | |
| 477 | node = node->next; |
| 478 | } |
| 479 | return NULL; |
| 480 | } |
| 481 | |
| 482 | PJ_DEF(pj_xml_node*) pj_xml_clone( pj_pool_t *pool, const pj_xml_node *rhs) |
| 483 | { |
| 484 | pj_xml_node *node; |
| 485 | const pj_xml_attr *r_attr; |
| 486 | const pj_xml_node *child; |
| 487 | |
| 488 | node = alloc_node(pool); |
| 489 | |
| 490 | pj_strdup(pool, &node->name, &rhs->name); |
| 491 | pj_strdup(pool, &node->content, &rhs->content); |
| 492 | |
| 493 | /* Clone all attributes */ |
| 494 | r_attr = rhs->attr_head.next; |
| 495 | while (r_attr != &rhs->attr_head) { |
| 496 | |
| 497 | pj_xml_attr *attr; |
| 498 | |
| 499 | attr = alloc_attr(pool); |
| 500 | pj_strdup(pool, &attr->name, &r_attr->name); |
| 501 | pj_strdup(pool, &attr->value, &r_attr->value); |
| 502 | |
| 503 | pj_list_push_back(&node->attr_head, attr); |
| 504 | |
| 505 | r_attr = r_attr->next; |
| 506 | } |
| 507 | |
| 508 | /* Clone all child nodes. */ |
| 509 | child = rhs->node_head.next; |
| 510 | while (child != (pj_xml_node*) &rhs->node_head) { |
| 511 | pj_xml_node *new_child; |
| 512 | |
| 513 | new_child = pj_xml_clone(pool, child); |
| 514 | pj_list_push_back(&node->node_head, new_child); |
| 515 | |
| 516 | child = child->next; |
| 517 | } |
| 518 | |
| 519 | return node; |
| 520 | } |