blob: 97230f55599c406a51cd7bf6431a8a6c3b83de24 [file] [log] [blame]
Alexandre Lision8af73cb2013-12-10 14:11:20 -05001/* $Id$ */
2/*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#include <pjlib-util/scanner.h>
21#include <pj/ctype.h>
22#include <pj/string.h>
23#include <pj/except.h>
24#include <pj/os.h>
25#include <pj/errno.h>
26#include <pj/assert.h>
27
28#define PJ_SCAN_IS_SPACE(c) ((c)==' ' || (c)=='\t')
29#define PJ_SCAN_IS_NEWLINE(c) ((c)=='\r' || (c)=='\n')
30#define PJ_SCAN_IS_PROBABLY_SPACE(c) ((c) <= 32)
31#define PJ_SCAN_CHECK_EOF(s) (s != scanner->end)
32
33
34#if defined(PJ_SCANNER_USE_BITWISE) && PJ_SCANNER_USE_BITWISE != 0
35# include "scanner_cis_bitwise.c"
36#else
37# include "scanner_cis_uint.c"
38#endif
39
40
41static void pj_scan_syntax_err(pj_scanner *scanner)
42{
43 (*scanner->callback)(scanner);
44}
45
46
47PJ_DEF(void) pj_cis_add_range(pj_cis_t *cis, int cstart, int cend)
48{
49 /* Can not set zero. This is the requirement of the parser. */
50 pj_assert(cstart > 0);
51
52 while (cstart != cend) {
53 PJ_CIS_SET(cis, cstart);
54 ++cstart;
55 }
56}
57
58PJ_DEF(void) pj_cis_add_alpha(pj_cis_t *cis)
59{
60 pj_cis_add_range( cis, 'a', 'z'+1);
61 pj_cis_add_range( cis, 'A', 'Z'+1);
62}
63
64PJ_DEF(void) pj_cis_add_num(pj_cis_t *cis)
65{
66 pj_cis_add_range( cis, '0', '9'+1);
67}
68
69PJ_DEF(void) pj_cis_add_str( pj_cis_t *cis, const char *str)
70{
71 while (*str) {
72 PJ_CIS_SET(cis, *str);
73 ++str;
74 }
75}
76
77PJ_DEF(void) pj_cis_add_cis( pj_cis_t *cis, const pj_cis_t *rhs)
78{
79 int i;
80 for (i=0; i<256; ++i) {
81 if (PJ_CIS_ISSET(rhs, i))
82 PJ_CIS_SET(cis, i);
83 }
84}
85
86PJ_DEF(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend)
87{
88 while (cstart != cend) {
89 PJ_CIS_CLR(cis, cstart);
90 cstart++;
91 }
92}
93
94PJ_DEF(void) pj_cis_del_str( pj_cis_t *cis, const char *str)
95{
96 while (*str) {
97 PJ_CIS_CLR(cis, *str);
98 ++str;
99 }
100}
101
102PJ_DEF(void) pj_cis_invert( pj_cis_t *cis )
103{
104 unsigned i;
105 /* Can not set zero. This is the requirement of the parser. */
106 for (i=1; i<256; ++i) {
107 if (PJ_CIS_ISSET(cis,i))
108 PJ_CIS_CLR(cis,i);
109 else
110 PJ_CIS_SET(cis,i);
111 }
112}
113
114PJ_DEF(void) pj_scan_init( pj_scanner *scanner, char *bufstart,
115 pj_size_t buflen, unsigned options,
116 pj_syn_err_func_ptr callback )
117{
118 PJ_CHECK_STACK();
119
120 scanner->begin = scanner->curptr = bufstart;
121 scanner->end = bufstart + buflen;
122 scanner->line = 1;
123 scanner->start_line = scanner->begin;
124 scanner->callback = callback;
125 scanner->skip_ws = options;
126
127 if (scanner->skip_ws)
128 pj_scan_skip_whitespace(scanner);
129}
130
131
132PJ_DEF(void) pj_scan_fini( pj_scanner *scanner )
133{
134 PJ_CHECK_STACK();
135 PJ_UNUSED_ARG(scanner);
136}
137
138PJ_DEF(void) pj_scan_skip_whitespace( pj_scanner *scanner )
139{
140 register char *s = scanner->curptr;
141
142 while (PJ_SCAN_IS_SPACE(*s)) {
143 ++s;
144 }
145
146 if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_NEWLINE)) {
147 for (;;) {
148 if (*s == '\r') {
149 ++s;
150 if (*s == '\n') ++s;
151 ++scanner->line;
152 scanner->curptr = scanner->start_line = s;
153 } else if (*s == '\n') {
154 ++s;
155 ++scanner->line;
156 scanner->curptr = scanner->start_line = s;
157 } else if (PJ_SCAN_IS_SPACE(*s)) {
158 do {
159 ++s;
160 } while (PJ_SCAN_IS_SPACE(*s));
161 } else {
162 break;
163 }
164 }
165 }
166
167 if (PJ_SCAN_IS_NEWLINE(*s) && (scanner->skip_ws & PJ_SCAN_AUTOSKIP_WS_HEADER)==PJ_SCAN_AUTOSKIP_WS_HEADER) {
168 /* Check for header continuation. */
169 scanner->curptr = s;
170
171 if (*s == '\r') {
172 ++s;
173 }
174 if (*s == '\n') {
175 ++s;
176 }
177 scanner->start_line = s;
178
179 if (PJ_SCAN_IS_SPACE(*s)) {
180 register char *t = s;
181 do {
182 ++t;
183 } while (PJ_SCAN_IS_SPACE(*t));
184
185 ++scanner->line;
186 scanner->curptr = t;
187 }
188 } else {
189 scanner->curptr = s;
190 }
191}
192
193PJ_DEF(void) pj_scan_skip_line( pj_scanner *scanner )
194{
195 char *s = pj_ansi_strchr(scanner->curptr, '\n');
196 if (!s) {
197 scanner->curptr = scanner->end;
198 } else {
199 scanner->curptr = scanner->start_line = s+1;
200 scanner->line++;
201 }
202}
203
204PJ_DEF(int) pj_scan_peek( pj_scanner *scanner,
205 const pj_cis_t *spec, pj_str_t *out)
206{
207 register char *s = scanner->curptr;
208
209 if (s >= scanner->end) {
210 pj_scan_syntax_err(scanner);
211 return -1;
212 }
213
214 /* Don't need to check EOF with PJ_SCAN_CHECK_EOF(s) */
215 while (pj_cis_match(spec, *s))
216 ++s;
217
218 pj_strset3(out, scanner->curptr, s);
219 return *s;
220}
221
222
223PJ_DEF(int) pj_scan_peek_n( pj_scanner *scanner,
224 pj_size_t len, pj_str_t *out)
225{
226 char *endpos = scanner->curptr + len;
227
228 if (endpos > scanner->end) {
229 pj_scan_syntax_err(scanner);
230 return -1;
231 }
232
233 pj_strset(out, scanner->curptr, len);
234 return *endpos;
235}
236
237
238PJ_DEF(int) pj_scan_peek_until( pj_scanner *scanner,
239 const pj_cis_t *spec,
240 pj_str_t *out)
241{
242 register char *s = scanner->curptr;
243
244 if (s >= scanner->end) {
245 pj_scan_syntax_err(scanner);
246 return -1;
247 }
248
249 while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match( spec, *s))
250 ++s;
251
252 pj_strset3(out, scanner->curptr, s);
253 return *s;
254}
255
256
257PJ_DEF(void) pj_scan_get( pj_scanner *scanner,
258 const pj_cis_t *spec, pj_str_t *out)
259{
260 register char *s = scanner->curptr;
261
262 pj_assert(pj_cis_match(spec,0)==0);
263
264 /* EOF is detected implicitly */
265 if (!pj_cis_match(spec, *s)) {
266 pj_scan_syntax_err(scanner);
267 return;
268 }
269
270 do {
271 ++s;
272 } while (pj_cis_match(spec, *s));
273 /* No need to check EOF here (PJ_SCAN_CHECK_EOF(s)) because
274 * buffer is NULL terminated and pj_cis_match(spec,0) should be
275 * false.
276 */
277
278 pj_strset3(out, scanner->curptr, s);
279
280 scanner->curptr = s;
281
282 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
283 pj_scan_skip_whitespace(scanner);
284 }
285}
286
287
288PJ_DEF(void) pj_scan_get_unescape( pj_scanner *scanner,
289 const pj_cis_t *spec, pj_str_t *out)
290{
291 register char *s = scanner->curptr;
292 char *dst = s;
293
294 pj_assert(pj_cis_match(spec,0)==0);
295
296 /* Must not match character '%' */
297 pj_assert(pj_cis_match(spec,'%')==0);
298
299 /* EOF is detected implicitly */
300 if (!pj_cis_match(spec, *s) && *s != '%') {
301 pj_scan_syntax_err(scanner);
302 return;
303 }
304
305 out->ptr = s;
306 do {
307 if (*s == '%') {
308 if (s+3 <= scanner->end && pj_isxdigit(*(s+1)) &&
309 pj_isxdigit(*(s+2)))
310 {
311 *dst = (pj_uint8_t) ((pj_hex_digit_to_val(*(s+1)) << 4) +
312 pj_hex_digit_to_val(*(s+2)));
313 ++dst;
314 s += 3;
315 } else {
316 *dst++ = *s++;
317 *dst++ = *s++;
318 break;
319 }
320 }
321
322 if (pj_cis_match(spec, *s)) {
323 char *start = s;
324 do {
325 ++s;
326 } while (pj_cis_match(spec, *s));
327
328 if (dst != start) pj_memmove(dst, start, s-start);
329 dst += (s-start);
330 }
331
332 } while (*s == '%');
333
334 scanner->curptr = s;
335 out->slen = (dst - out->ptr);
336
337 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
338 pj_scan_skip_whitespace(scanner);
339 }
340}
341
342
343PJ_DEF(void) pj_scan_get_quote( pj_scanner *scanner,
344 int begin_quote, int end_quote,
345 pj_str_t *out)
346{
347 char beg = (char)begin_quote;
348 char end = (char)end_quote;
349 pj_scan_get_quotes(scanner, &beg, &end, 1, out);
350}
351
352PJ_DEF(void) pj_scan_get_quotes(pj_scanner *scanner,
353 const char *begin_quote, const char *end_quote,
354 int qsize, pj_str_t *out)
355{
356 register char *s = scanner->curptr;
357 int qpair = -1;
358 int i;
359
360 pj_assert(qsize > 0);
361
362 /* Check and eat the begin_quote. */
363 for (i = 0; i < qsize; ++i) {
364 if (*s == begin_quote[i]) {
365 qpair = i;
366 break;
367 }
368 }
369 if (qpair == -1) {
370 pj_scan_syntax_err(scanner);
371 return;
372 }
373 ++s;
374
375 /* Loop until end_quote is found.
376 */
377 do {
378 /* loop until end_quote is found. */
379 while (PJ_SCAN_CHECK_EOF(s) && *s != '\n' && *s != end_quote[qpair]) {
380 ++s;
381 }
382
383 /* check that no backslash character precedes the end_quote. */
384 if (*s == end_quote[qpair]) {
385 if (*(s-1) == '\\') {
386 char *q = s-2;
387 char *r = s-2;
388
389 while (r != scanner->begin && *r == '\\') {
390 --r;
391 }
392 /* break from main loop if we have odd number of backslashes */
393 if (((unsigned)(q-r) & 0x01) == 1) {
394 break;
395 }
396 ++s;
397 } else {
398 /* end_quote is not preceeded by backslash. break now. */
399 break;
400 }
401 } else {
402 /* loop ended by non-end_quote character. break now. */
403 break;
404 }
405 } while (1);
406
407 /* Check and eat the end quote. */
408 if (*s != end_quote[qpair]) {
409 pj_scan_syntax_err(scanner);
410 return;
411 }
412 ++s;
413
414 pj_strset3(out, scanner->curptr, s);
415
416 scanner->curptr = s;
417
418 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
419 pj_scan_skip_whitespace(scanner);
420 }
421}
422
423
424PJ_DEF(void) pj_scan_get_n( pj_scanner *scanner,
425 unsigned N, pj_str_t *out)
426{
427 if (scanner->curptr + N > scanner->end) {
428 pj_scan_syntax_err(scanner);
429 return;
430 }
431
432 pj_strset(out, scanner->curptr, N);
433
434 scanner->curptr += N;
435
436 if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
437 pj_scan_skip_whitespace(scanner);
438 }
439}
440
441
442PJ_DEF(int) pj_scan_get_char( pj_scanner *scanner )
443{
444 int chr = *scanner->curptr;
445
446 if (!chr) {
447 pj_scan_syntax_err(scanner);
448 return 0;
449 }
450
451 ++scanner->curptr;
452
453 if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
454 pj_scan_skip_whitespace(scanner);
455 }
456 return chr;
457}
458
459
460PJ_DEF(void) pj_scan_get_newline( pj_scanner *scanner )
461{
462 if (!PJ_SCAN_IS_NEWLINE(*scanner->curptr)) {
463 pj_scan_syntax_err(scanner);
464 return;
465 }
466
467 if (*scanner->curptr == '\r') {
468 ++scanner->curptr;
469 }
470 if (*scanner->curptr == '\n') {
471 ++scanner->curptr;
472 }
473
474 ++scanner->line;
475 scanner->start_line = scanner->curptr;
476
477 /**
478 * This probably is a bug, see PROTOS test #2480.
479 * This would cause scanner to incorrectly eat two new lines, e.g.
480 * when parsing:
481 *
482 * Content-Length: 120\r\n
483 * \r\n
484 * <space><space><space>...
485 *
486 * When pj_scan_get_newline() is called to parse the first newline
487 * in the Content-Length header, it will eat the second newline
488 * too because it thinks that it's a header continuation.
489 *
490 * if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && scanner->skip_ws) {
491 * pj_scan_skip_whitespace(scanner);
492 * }
493 */
494}
495
496
497PJ_DEF(void) pj_scan_get_until( pj_scanner *scanner,
498 const pj_cis_t *spec, pj_str_t *out)
499{
500 register char *s = scanner->curptr;
501
502 if (s >= scanner->end) {
503 pj_scan_syntax_err(scanner);
504 return;
505 }
506
507 while (PJ_SCAN_CHECK_EOF(s) && !pj_cis_match(spec, *s)) {
508 ++s;
509 }
510
511 pj_strset3(out, scanner->curptr, s);
512
513 scanner->curptr = s;
514
515 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
516 pj_scan_skip_whitespace(scanner);
517 }
518}
519
520
521PJ_DEF(void) pj_scan_get_until_ch( pj_scanner *scanner,
522 int until_char, pj_str_t *out)
523{
524 register char *s = scanner->curptr;
525
526 if (s >= scanner->end) {
527 pj_scan_syntax_err(scanner);
528 return;
529 }
530
531 while (PJ_SCAN_CHECK_EOF(s) && *s != until_char) {
532 ++s;
533 }
534
535 pj_strset3(out, scanner->curptr, s);
536
537 scanner->curptr = s;
538
539 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
540 pj_scan_skip_whitespace(scanner);
541 }
542}
543
544
545PJ_DEF(void) pj_scan_get_until_chr( pj_scanner *scanner,
546 const char *until_spec, pj_str_t *out)
547{
548 register char *s = scanner->curptr;
549 pj_size_t speclen;
550
551 if (s >= scanner->end) {
552 pj_scan_syntax_err(scanner);
553 return;
554 }
555
556 speclen = strlen(until_spec);
557 while (PJ_SCAN_CHECK_EOF(s) && !memchr(until_spec, *s, speclen)) {
558 ++s;
559 }
560
561 pj_strset3(out, scanner->curptr, s);
562
563 scanner->curptr = s;
564
565 if (PJ_SCAN_IS_PROBABLY_SPACE(*s) && scanner->skip_ws) {
566 pj_scan_skip_whitespace(scanner);
567 }
568}
569
570PJ_DEF(void) pj_scan_advance_n( pj_scanner *scanner,
571 unsigned N, pj_bool_t skip_ws)
572{
573 if (scanner->curptr + N > scanner->end) {
574 pj_scan_syntax_err(scanner);
575 return;
576 }
577
578 scanner->curptr += N;
579
580 if (PJ_SCAN_IS_PROBABLY_SPACE(*scanner->curptr) && skip_ws) {
581 pj_scan_skip_whitespace(scanner);
582 }
583}
584
585
586PJ_DEF(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len)
587{
588 if (scanner->curptr + len > scanner->end) {
589 pj_scan_syntax_err(scanner);
590 return -1;
591 }
592 return strncmp(scanner->curptr, s, len);
593}
594
595
596PJ_DEF(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len)
597{
598 if (scanner->curptr + len > scanner->end) {
599 pj_scan_syntax_err(scanner);
600 return -1;
601 }
602 return pj_ansi_strnicmp(scanner->curptr, s, len);
603}
604
605PJ_DEF(int) pj_scan_stricmp_alnum( pj_scanner *scanner, const char *s,
606 int len)
607{
608 if (scanner->curptr + len > scanner->end) {
609 pj_scan_syntax_err(scanner);
610 return -1;
611 }
612 return strnicmp_alnum(scanner->curptr, s, len);
613}
614
615PJ_DEF(void) pj_scan_save_state( const pj_scanner *scanner,
616 pj_scan_state *state)
617{
618 state->curptr = scanner->curptr;
619 state->line = scanner->line;
620 state->start_line = scanner->start_line;
621}
622
623
624PJ_DEF(void) pj_scan_restore_state( pj_scanner *scanner,
625 pj_scan_state *state)
626{
627 scanner->curptr = state->curptr;
628 scanner->line = state->line;
629 scanner->start_line = state->start_line;
630}
631
632