blob: 0a95e117767945a8998ca7d001d2ed50206d1f84 [file] [log] [blame]
Alexandre Lision67916dd2014-01-24 13:33:04 -05001/* $Id$ */
2/*
3 * Copyright (C) 2008-2011 Teluu Inc. (http://www.teluu.com)
4 * Copyright (C) 2003-2008 Benny Prijono <benny@prijono.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20#ifndef __PJ_SCANNER_H__
21#define __PJ_SCANNER_H__
22
23/**
24 * @file scanner.h
25 * @brief Text Scanning.
26 */
27
28#include <pjlib-util/types.h>
29
30PJ_BEGIN_DECL
31
32/**
33 * @defgroup PJ_SCAN Fast Text Scanning
34 * @ingroup PJLIB_TEXT
35 * @brief Text scanning utility.
36 *
37 * This module describes a fast text scanning functions.
38 *
39 * @{
40 */
41#if defined(PJ_SCANNER_USE_BITWISE) && PJ_SCANNER_USE_BITWISE != 0
42# include <pjlib-util/scanner_cis_bitwise.h>
43#else
44# include <pjlib-util/scanner_cis_uint.h>
45#endif
46
47/**
48 * Initialize scanner input specification buffer.
49 *
50 * @param cs_buf The scanner character specification.
51 */
52PJ_DECL(void) pj_cis_buf_init(pj_cis_buf_t *cs_buf);
53
54/**
55 * Create a new input specification.
56 *
57 * @param cs_buf Specification buffer.
58 * @param cis Character input specification to be initialized.
59 *
60 * @return PJ_SUCCESS if new specification has been successfully
61 * created, or PJ_ETOOMANY if there are already too many
62 * specifications in the buffer.
63 */
64PJ_DECL(pj_status_t) pj_cis_init(pj_cis_buf_t *cs_buf, pj_cis_t *cis);
65
66/**
67 * Create a new input specification based on an existing specification.
68 *
69 * @param new_cis The new specification to be initialized.
70 * @param existing The existing specification, from which the input
71 * bitmask will be copied to the new specification.
72 *
73 * @return PJ_SUCCESS if new specification has been successfully
74 * created, or PJ_ETOOMANY if there are already too many
75 * specifications in the buffer.
76 */
77PJ_DECL(pj_status_t) pj_cis_dup(pj_cis_t *new_cis, pj_cis_t *existing);
78
79/**
80 * Add the characters in the specified range '[cstart, cend)' to the
81 * specification (the last character itself ('cend') is not added).
82 *
83 * @param cis The scanner character specification.
84 * @param cstart The first character in the range.
85 * @param cend The next character after the last character in the range.
86 */
87PJ_DECL(void) pj_cis_add_range( pj_cis_t *cis, int cstart, int cend);
88
89/**
90 * Add alphabetic characters to the specification.
91 *
92 * @param cis The scanner character specification.
93 */
94PJ_DECL(void) pj_cis_add_alpha( pj_cis_t *cis);
95
96/**
97 * Add numeric characters to the specification.
98 *
99 * @param cis The scanner character specification.
100 */
101PJ_DECL(void) pj_cis_add_num( pj_cis_t *cis);
102
103/**
104 * Add the characters in the string to the specification.
105 *
106 * @param cis The scanner character specification.
107 * @param str The string.
108 */
109PJ_DECL(void) pj_cis_add_str( pj_cis_t *cis, const char *str);
110
111/**
112 * Add specification from another specification.
113 *
114 * @param cis The specification is to be set.
115 * @param rhs The specification to be copied.
116 */
117PJ_DECL(void) pj_cis_add_cis( pj_cis_t *cis, const pj_cis_t *rhs);
118
119/**
120 * Delete characters in the specified range from the specification.
121 *
122 * @param cis The scanner character specification.
123 * @param cstart The first character in the range.
124 * @param cend The next character after the last character in the range.
125 */
126PJ_DECL(void) pj_cis_del_range( pj_cis_t *cis, int cstart, int cend);
127
128/**
129 * Delete characters in the specified string from the specification.
130 *
131 * @param cis The scanner character specification.
132 * @param str The string.
133 */
134PJ_DECL(void) pj_cis_del_str( pj_cis_t *cis, const char *str);
135
136/**
137 * Invert specification.
138 *
139 * @param cis The scanner character specification.
140 */
141PJ_DECL(void) pj_cis_invert( pj_cis_t *cis );
142
143/**
144 * Check whether the specified character belongs to the specification.
145 *
146 * @param cis The scanner character specification.
147 * @param c The character to check for matching.
148 *
149 * @return Non-zero if match (not necessarily one).
150 */
151PJ_INLINE(int) pj_cis_match( const pj_cis_t *cis, pj_uint8_t c )
152{
153 return PJ_CIS_ISSET(cis, c);
154}
155
156
157/**
158 * Flags for scanner.
159 */
160enum
161{
162 /** This flags specifies that the scanner should automatically skip
163 whitespaces
164 */
165 PJ_SCAN_AUTOSKIP_WS = 1,
166
167 /** This flags specifies that the scanner should automatically skip
168 SIP header continuation. This flag implies PJ_SCAN_AUTOSKIP_WS.
169 */
170 PJ_SCAN_AUTOSKIP_WS_HEADER = 3,
171
172 /** Auto-skip new lines.
173 */
174 PJ_SCAN_AUTOSKIP_NEWLINE = 4
175};
176
177
178/* Forward decl. */
179struct pj_scanner;
180
181
182/**
183 * The callback function type to be called by the scanner when it encounters
184 * syntax error.
185 *
186 * @param scanner The scanner instance that calls the callback .
187 */
188typedef void (*pj_syn_err_func_ptr)(struct pj_scanner *scanner);
189
190
191/**
192 * The text scanner structure.
193 */
194typedef struct pj_scanner
195{
196 char *begin; /**< Start of input buffer. */
197 char *end; /**< End of input buffer. */
198 char *curptr; /**< Current pointer. */
199 int line; /**< Current line. */
200 char *start_line; /**< Where current line starts. */
201 int skip_ws; /**< Skip whitespace flag. */
202 pj_syn_err_func_ptr callback; /**< Syntax error callback. */
203} pj_scanner;
204
205
206/**
207 * This structure can be used by application to store the state of the parser,
208 * so that the scanner state can be rollback to this state when necessary.
209 */
210typedef struct pj_scan_state
211{
212 char *curptr; /**< Current scanner's pointer. */
213 int line; /**< Current line. */
214 char *start_line; /**< Start of current line. */
215} pj_scan_state;
216
217
218/**
219 * Initialize the scanner. Note that the input string buffer must have
220 * length at least buflen+1 because the scanner will NULL terminate the
221 * string during initialization.
222 *
223 * @param scanner The scanner to be initialized.
224 * @param bufstart The input buffer to scan. Note that buffer[buflen] will be
225 * filled with NULL char until scanner is destroyed, so
226 * the actual buffer length must be at least buflen+1.
227 * @param buflen The length of the input buffer, which normally is
228 * strlen(bufstart).
229 * @param options Zero, or combination of PJ_SCAN_AUTOSKIP_WS or
230 * PJ_SCAN_AUTOSKIP_WS_HEADER
231 * @param callback Callback to be called when the scanner encounters syntax
232 * error condition.
233 */
234PJ_DECL(void) pj_scan_init( pj_scanner *scanner, char *bufstart,
235 pj_size_t buflen,
236 unsigned options,
237 pj_syn_err_func_ptr callback );
238
239
240/**
241 * Call this function when application has finished using the scanner.
242 *
243 * @param scanner The scanner.
244 */
245PJ_DECL(void) pj_scan_fini( pj_scanner *scanner );
246
247
248/**
249 * Determine whether the EOF condition for the scanner has been met.
250 *
251 * @param scanner The scanner.
252 *
253 * @return Non-zero if scanner is EOF.
254 */
255PJ_INLINE(int) pj_scan_is_eof( const pj_scanner *scanner)
256{
257 return scanner->curptr >= scanner->end;
258}
259
260
261/**
262 * Peek strings in current position according to parameter spec, and return
263 * the strings in parameter out. The current scanner position will not be
264 * moved. If the scanner is already in EOF state, syntax error callback will
265 * be called thrown.
266 *
267 * @param scanner The scanner.
268 * @param spec The spec to match input string.
269 * @param out String to store the result.
270 *
271 * @return the character right after the peek-ed position or zero if there's
272 * no more characters.
273 */
274PJ_DECL(int) pj_scan_peek( pj_scanner *scanner,
275 const pj_cis_t *spec, pj_str_t *out);
276
277
278/**
279 * Peek len characters in current position, and return them in out parameter.
280 * Note that whitespaces or newlines will be returned as it is, regardless
281 * of PJ_SCAN_AUTOSKIP_WS settings. If the character left is less than len,
282 * syntax error callback will be called.
283 *
284 * @param scanner The scanner.
285 * @param len Length to peek.
286 * @param out String to store the result.
287 *
288 * @return the character right after the peek-ed position or zero if there's
289 * no more characters.
290 */
291PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner,
292 pj_size_t len, pj_str_t *out);
293
294
295/**
296 * Peek strings in current position until spec is matched, and return
297 * the strings in parameter out. The current scanner position will not be
298 * moved. If the scanner is already in EOF state, syntax error callback will
299 * be called.
300 *
301 * @param scanner The scanner.
302 * @param spec The peeking will stop when the input match this spec.
303 * @param out String to store the result.
304 *
305 * @return the character right after the peek-ed position.
306 */
307PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner,
308 const pj_cis_t *spec,
309 pj_str_t *out);
310
311
312/**
313 * Get characters from the buffer according to the spec, and return them
314 * in out parameter. The scanner will attempt to get as many characters as
315 * possible as long as the spec matches. If the first character doesn't
316 * match the spec, or scanner is already in EOF when this function is called,
317 * an exception will be thrown.
318 *
319 * @param scanner The scanner.
320 * @param spec The spec to match input string.
321 * @param out String to store the result.
322 */
323PJ_DECL(void) pj_scan_get( pj_scanner *scanner,
324 const pj_cis_t *spec, pj_str_t *out);
325
326
327/**
328 * Just like #pj_scan_get(), but additionally performs unescaping when
329 * escaped ('%') character is found. The input spec MUST NOT contain the
330 * specification for '%' characted.
331 *
332 * @param scanner The scanner.
333 * @param spec The spec to match input string.
334 * @param out String to store the result.
335 */
336PJ_DECL(void) pj_scan_get_unescape( pj_scanner *scanner,
337 const pj_cis_t *spec, pj_str_t *out);
338
339
340/**
341 * Get characters between quotes. If current input doesn't match begin_quote,
342 * syntax error will be thrown. Note that the resulting string will contain
343 * the enclosing quote.
344 *
345 * @param scanner The scanner.
346 * @param begin_quote The character to begin the quote.
347 * @param end_quote The character to end the quote.
348 * @param out String to store the result.
349 */
350PJ_DECL(void) pj_scan_get_quote( pj_scanner *scanner,
351 int begin_quote, int end_quote,
352 pj_str_t *out);
353
354/**
355 * Get characters between quotes. If current input doesn't match begin_quote,
356 * syntax error will be thrown. Note that the resulting string will contain
357 * the enclosing quote.
358 *
359 * @param scanner The scanner.
360 * @param begin_quotes The character array to begin the quotes. For example,
361 * the two characters " and '.
362 * @param end_quotes The character array to end the quotes. The position
363 * found in the begin_quotes array will be used to match
364 * the end quotes. So if the begin_quotes was the array
365 * of "'< the end_quotes should be "'>. If begin_array
366 * matched the ' then the end_quotes will look for ' to
367 * match at the end.
368 * @param qsize The size of the begin_quotes and end_quotes arrays.
369 * @param out String to store the result.
370 */
371PJ_DECL(void) pj_scan_get_quotes(pj_scanner *scanner,
372 const char *begin_quotes,
373 const char *end_quotes, int qsize,
374 pj_str_t *out);
375
376
377/**
378 * Get N characters from the scanner.
379 *
380 * @param scanner The scanner.
381 * @param N Number of characters to get.
382 * @param out String to store the result.
383 */
384PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner,
385 unsigned N, pj_str_t *out);
386
387
388/**
389 * Get one character from the scanner.
390 *
391 * @param scanner The scanner.
392 *
393 * @return The character.
394 */
395PJ_DECL(int) pj_scan_get_char( pj_scanner *scanner );
396
397
398/**
399 * Get characters from the scanner and move the scanner position until the
400 * current character matches the spec.
401 *
402 * @param scanner The scanner.
403 * @param spec Get until the input match this spec.
404 * @param out String to store the result.
405 */
406PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner,
407 const pj_cis_t *spec, pj_str_t *out);
408
409
410/**
411 * Get characters from the scanner and move the scanner position until the
412 * current character matches until_char.
413 *
414 * @param scanner The scanner.
415 * @param until_char Get until the input match this character.
416 * @param out String to store the result.
417 */
418PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner,
419 int until_char, pj_str_t *out);
420
421
422/**
423 * Get characters from the scanner and move the scanner position until the
424 * current character matches until_char.
425 *
426 * @param scanner The scanner.
427 * @param until_spec Get until the input match any of these characters.
428 * @param out String to store the result.
429 */
430PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner,
431 const char *until_spec, pj_str_t *out);
432
433/**
434 * Advance the scanner N characters, and skip whitespace
435 * if necessary.
436 *
437 * @param scanner The scanner.
438 * @param N Number of characters to skip.
439 * @param skip Flag to specify whether whitespace should be skipped
440 * after skipping the characters.
441 */
442PJ_DECL(void) pj_scan_advance_n( pj_scanner *scanner,
443 unsigned N, pj_bool_t skip);
444
445
446/**
447 * Compare string in current position with the specified string.
448 *
449 * @param scanner The scanner.
450 * @param s The string to compare with.
451 * @param len Length of the string to compare.
452 *
453 * @return zero, <0, or >0 (just like strcmp()).
454 */
455PJ_DECL(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len);
456
457
458/**
459 * Case-less string comparison of current position with the specified
460 * string.
461 *
462 * @param scanner The scanner.
463 * @param s The string to compare with.
464 * @param len Length of the string to compare with.
465 *
466 * @return zero, <0, or >0 (just like strcmp()).
467 */
468PJ_DECL(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len);
469
470/**
471 * Perform case insensitive string comparison of string in current position,
472 * knowing that the string to compare only consists of alphanumeric
473 * characters.
474 *
475 * Note that unlike #pj_scan_stricmp, this function can only return zero or
476 * -1.
477 *
478 * @param scanner The scanner.
479 * @param s The string to compare with.
480 * @param len Length of the string to compare with.
481 *
482 * @return zero if equal or -1.
483 *
484 * @see strnicmp_alnum, pj_stricmp_alnum
485 */
486PJ_DECL(int) pj_scan_stricmp_alnum( pj_scanner *scanner, const char *s,
487 int len);
488
489
490/**
491 * Get a newline from the scanner. A newline is defined as '\\n', or '\\r', or
492 * "\\r\\n". If current input is not newline, syntax error will be thrown.
493 *
494 * @param scanner The scanner.
495 */
496PJ_DECL(void) pj_scan_get_newline( pj_scanner *scanner );
497
498
499/**
500 * Manually skip whitespaces according to flag that was specified when
501 * the scanner was initialized.
502 *
503 * @param scanner The scanner.
504 */
505PJ_DECL(void) pj_scan_skip_whitespace( pj_scanner *scanner );
506
507
508/**
509 * Skip current line.
510 *
511 * @param scanner The scanner.
512 */
513PJ_DECL(void) pj_scan_skip_line( pj_scanner *scanner );
514
515/**
516 * Save the full scanner state.
517 *
518 * @param scanner The scanner.
519 * @param state Variable to store scanner's state.
520 */
521PJ_DECL(void) pj_scan_save_state( const pj_scanner *scanner,
522 pj_scan_state *state);
523
524
525/**
526 * Restore the full scanner state.
527 * Note that this would not restore the string if application has modified
528 * it. This will only restore the scanner scanning position.
529 *
530 * @param scanner The scanner.
531 * @param state State of the scanner.
532 */
533PJ_DECL(void) pj_scan_restore_state( pj_scanner *scanner,
534 pj_scan_state *state);
535
536/**
537 * Get current column position.
538 *
539 * @param scanner The scanner.
540 *
541 * @return The column position.
542 */
543PJ_INLINE(int) pj_scan_get_col( const pj_scanner *scanner )
544{
545 return (int)(scanner->curptr - scanner->start_line);
546}
547
548/**
549 * @}
550 */
551
552
553PJ_END_DECL
554
555#endif
556