blob: f1b0b133b56577539cbe416f89d6e6691693575e [file] [log] [blame]
Benny Prijono4766ffe2005-11-01 17:56:59 +00001/* $Id$
2 *
3 */
Benny Prijonodd859a62005-11-01 16:42:51 +00004
5#ifndef __PJ_PARSER_H__
6#define __PJ_PARSER_H__
7
8/**
9 * @file scanner.h
10 * @brief Text Scanning.
11 */
12
13#include <pj/types.h>
14
15PJ_BEGIN_DECL
16
17/**
18 * @defgroup PJ_SCAN Text Scanning
19 * @ingroup PJ_MISC
20 * @brief
21 * Text scanning utility.
22 */
23
24/**
25 * @defgroup PJ_CHARSPEC Character Filter Specification
26 * @ingroup PJ_SCAN
27 * @brief
28 * The type pj_char_spec is a specification of character set used in
29 * scanner. Application can define multiple character specs, such as to
30 * scan alpha numerics, numbers, tokens, etc.
31 * @{
32 */
33
34/**
35 * This describes the type of individual character specification in
36 * #pj_char_spec.
37 */
38typedef pj_uint8_t pj_char_spec_element_t;
39
40/**
41 * The character specification is implemented as array of boolean flags. Each
42 * flag indicates the membership of the character in the spec. If the flag
43 * at one position is non-zero, then the character at that position belongs
44 * to the specification, and vice versa.
45 */
46typedef pj_char_spec_element_t pj_char_spec[256];
47// Note: it's got to be 256 (not 128) to cater for extended character in input.
48
49/**
50 * Initialize character spec.
51 * @param cs the scanner character specification.
52 */
53PJ_DECL(void) pj_cs_init( pj_char_spec cs);
54
55/**
56 * Set the membership of the specified character to TRUE.
57 * @param cs the scanner character specification.
58 * @param c the character.
59 */
60PJ_DECL(void) pj_cs_set( pj_char_spec cs, int c);
61
62/**
63 * Add the characters in the specified range '[cstart, cend)' to the
64 * specification (the last character itself ('cend') is not added).
65 * @param cs the scanner character specification.
66 * @param cstart the first character in the range.
67 * @param cend the next character after the last character in the range.
68 */
69PJ_DECL(void) pj_cs_add_range( pj_char_spec cs, int cstart, int cend);
70
71/**
72 * Add alphabetic characters to the specification.
73 * @param cs the scanner character specification.
74 */
75PJ_DECL(void) pj_cs_add_alpha( pj_char_spec cs);
76
77/**
78 * Add numeric characters to the specification.
79 * @param cs the scanner character specification.
80 */
81PJ_DECL(void) pj_cs_add_num( pj_char_spec cs);
82
83/**
84 * Add the characters in the string to the specification.
85 * @param cs the scanner character specification.
86 * @param str the string.
87 */
88PJ_DECL(void) pj_cs_add_str( pj_char_spec cs, const char *str);
89
90/**
91 * Delete characters in the specified range from the specification.
92 * @param cs the scanner character specification.
93 * @param cstart the first character in the range.
94 * @param cend the next character after the last character in the range.
95 */
96PJ_DECL(void) pj_cs_del_range( pj_char_spec cs, int cstart, int cend);
97
98/**
99 * Delete characters in the specified string from the specification.
100 * @param cs the scanner character specification.
101 * @param str the string.
102 */
103PJ_DECL(void) pj_cs_del_str( pj_char_spec cs, const char *str);
104
105/**
106 * Invert specification.
107 * @param cs the scanner character specification.
108 */
109PJ_DECL(void) pj_cs_invert( pj_char_spec cs );
110
111/**
112 * Check whether the specified character belongs to the specification.
113 * @param cs the scanner character specification.
114 * @param c the character to check for matching.
115 */
116PJ_INLINE(int) pj_cs_match( const pj_char_spec cs, int c )
117{
118 return cs[c];
119}
120
121/**
122 * @}
123 */
124
125/**
126 * @defgroup PJ_SCANNER Text Scanner
127 * @ingroup PJ_SCAN
128 * @{
129 */
130
131/**
132 * Flags for scanner.
133 */
134enum
135{
136 /** This flags specifies that the scanner should automatically skip
137 whitespaces
138 */
139 PJ_SCAN_AUTOSKIP_WS = 1,
140
141 /** This flags specifies that the scanner should automatically skip
142 SIP header continuation. This flag implies PJ_SCAN_AUTOSKIP_WS.
143 */
144 PJ_SCAN_AUTOSKIP_WS_HEADER = 3,
145
146 /** Auto-skip new lines.
147 */
148 PJ_SCAN_AUTOSKIP_NEWLINE = 4,
149};
150
151
152/* Forward decl. */
153struct pj_scanner;
154
155
156/**
157 * The callback function type to be called by the scanner when it encounters
158 * syntax error.
159 * @param scanner The scanner instance that calls the callback .
160 */
161typedef void (*pj_syn_err_func_ptr)(struct pj_scanner *scanner);
162
163
164/**
165 * The text scanner structure.
166 */
167typedef struct pj_scanner
168{
169 char *begin; /**< Start of input buffer. */
170 char *end; /**< End of input buffer. */
171 char *curptr; /**< Current pointer. */
172 int line; /**< Current line. */
173 int col; /**< Current column. */
174 int skip_ws; /**< Skip whitespace flag. */
175 pj_syn_err_func_ptr callback; /**< Syntax error callback. */
176} pj_scanner;
177
178
179/**
180 * This structure can be used by application to store the state of the parser,
181 * so that the scanner state can be rollback to this state when necessary.
182 */
183typedef struct pj_scan_state
184{
185 char *curptr; /**< Current scanner's pointer. */
186 int line; /**< Current line. */
187 int col; /**< Current column. */
188} pj_scan_state;
189
190
191/**
192 * Initialize the scanner. Note that the input string buffer must have
193 * length at least buflen+1 because the scanner will NULL terminate the
194 * string during initialization.
195 *
196 * @param scanner The scanner to be initialized.
197 * @param bufstart The input buffer to scan. Note that buffer[buflen] will be
198 * filled with NULL char until scanner is destroyed, so
199 * the actual buffer length must be at least buflen+1.
200 * @param buflen The length of the input buffer, which normally is
201 * strlen(bufstart).
202 * @param options Zero, or combination of PJ_SCAN_AUTOSKIP_WS or
203 * PJ_SCAN_AUTOSKIP_WS_HEADER
204 * @param callback Callback to be called when the scanner encounters syntax
205 * error condition.
206 */
207PJ_DECL(void) pj_scan_init( pj_scanner *scanner, char *bufstart, int buflen,
208 unsigned options,
209 pj_syn_err_func_ptr callback );
210
211
212/**
213 * Call this function when application has finished using the scanner.
214 *
215 * @param scanner The scanner.
216 */
217PJ_DECL(void) pj_scan_fini( pj_scanner *scanner );
218
219
220/**
221 * Determine whether the EOF condition for the scanner has been met.
222 *
223 * @param scanner The scanner.
224 *
225 * @return Non-zero if scanner is EOF.
226 */
227PJ_INLINE(int) pj_scan_is_eof( const pj_scanner *scanner)
228{
229 return scanner->curptr >= scanner->end;
230}
231
232
233/**
234 * Peek strings in current position according to parameter spec, and return
235 * the strings in parameter out. The current scanner position will not be
236 * moved. If the scanner is already in EOF state, syntax error callback will
237 * be called thrown.
238 *
239 * @param scanner The scanner.
240 * @param spec The spec to match input string.
241 * @param out String to store the result.
242 *
243 * @return the character right after the peek-ed position or zero if there's
244 * no more characters.
245 */
246PJ_DECL(int) pj_scan_peek( pj_scanner *scanner,
247 const pj_char_spec spec, pj_str_t *out);
248
249
250/**
251 * Peek len characters in current position, and return them in out parameter.
252 * Note that whitespaces or newlines will be returned as it is, regardless
253 * of PJ_SCAN_AUTOSKIP_WS settings. If the character left is less than len,
254 * syntax error callback will be called.
255 *
256 * @param scanner The scanner.
257 * @param len Length to peek.
258 * @param out String to store the result.
259 *
260 * @return the character right after the peek-ed position or zero if there's
261 * no more characters.
262 */
263PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner,
264 pj_size_t len, pj_str_t *out);
265
266
267/**
268 * Peek strings in current position until spec is matched, and return
269 * the strings in parameter out. The current scanner position will not be
270 * moved. If the scanner is already in EOF state, syntax error callback will
271 * be called.
272 *
273 * @param scanner The scanner.
274 * @param spec The peeking will stop when the input match this spec.
275 * @param out String to store the result.
276 *
277 * @return the character right after the peek-ed position.
278 */
279PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner,
280 const pj_char_spec spec,
281 pj_str_t *out);
282
283
284/**
285 * Get characters from the buffer according to the spec, and return them
286 * in out parameter. The scanner will attempt to get as many characters as
287 * possible as long as the spec matches. If the first character doesn't
288 * match the spec, or scanner is already in EOF when this function is called,
289 * an exception will be thrown.
290 *
291 * @param scanner The scanner.
292 * @param spec The spec to match input string.
293 * @param out String to store the result.
294 */
295PJ_DECL(void) pj_scan_get( pj_scanner *scanner,
296 const pj_char_spec spec, pj_str_t *out);
297
298
299/**
300 * Get characters between quotes. If current input doesn't match begin_quote,
301 * syntax error will be thrown.
302 *
303 * @param scanner The scanner.
304 * @param begin_quote The character to begin the quote.
305 * @param end_quote The character to end the quote.
306 * @param out String to store the result.
307 */
308PJ_DECL(void) pj_scan_get_quote( pj_scanner *scanner,
309 int begin_quote, int end_quote,
310 pj_str_t *out);
311
312/**
313 * Get N characters from the scanner.
314 *
315 * @param scanner The scanner.
316 * @param N Number of characters to get.
317 * @param out String to store the result.
318 */
319PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner,
320 unsigned N, pj_str_t *out);
321
322
323/**
324 * Get one character from the scanner.
325 *
326 * @param scanner The scanner.
327 *
328 * @return (unknown)
329 */
330PJ_DECL(int) pj_scan_get_char( pj_scanner *scanner );
331
332
333/**
334 * Get a newline from the scanner. A newline is defined as '\\n', or '\\r', or
335 * "\\r\\n". If current input is not newline, syntax error will be thrown.
336 *
337 * @param scanner The scanner.
338 */
339PJ_DECL(void) pj_scan_get_newline( pj_scanner *scanner );
340
341
342/**
343 * Get characters from the scanner and move the scanner position until the
344 * current character matches the spec.
345 *
346 * @param scanner The scanner.
347 * @param spec Get until the input match this spec.
348 * @param out String to store the result.
349 */
350PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner,
351 const pj_char_spec spec, pj_str_t *out);
352
353
354/**
355 * Get characters from the scanner and move the scanner position until the
356 * current character matches until_char.
357 *
358 * @param scanner The scanner.
359 * @param until_char Get until the input match this character.
360 * @param out String to store the result.
361 */
362PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner,
363 int until_char, pj_str_t *out);
364
365
366/**
367 * Get characters from the scanner and move the scanner position until the
368 * current character matches until_char.
369 *
370 * @param scanner The scanner.
371 * @param until_spec Get until the input match any of these characters.
372 * @param out String to store the result.
373 */
374PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner,
375 const char *until_spec, pj_str_t *out);
376
377/**
378 * Advance the scanner N characters, and skip whitespace
379 * if necessary.
380 *
381 * @param scanner The scanner.
382 * @param N Number of characters to skip.
383 * @param skip Flag to specify whether whitespace should be skipped
384 * after skipping the characters.
385 */
386PJ_DECL(void) pj_scan_advance_n( pj_scanner *scanner,
387 unsigned N, pj_bool_t skip);
388
389
390/**
391 * Compare string in current position with the specified string.
392 *
393 * @param scanner The scanner.
394 * @param s The string to compare with.
395 * @param len Length of the string to compare.
396 *
397 * @return zero, <0, or >0 (just like strcmp()).
398 */
399PJ_DECL(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len);
400
401
402/**
403 * Case-less string comparison of current position with the specified
404 * string.
405 *
406 * @param scanner The scanner.
407 * @param s The string to compare with.
408 * @param len Length of the string to compare with.
409 *
410 * @return zero, <0, or >0 (just like strcmp()).
411 */
412PJ_DECL(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len);
413
414
415/**
416 * Manually skip whitespaces according to flag that was specified when
417 * the scanner was initialized.
418 *
419 * @param scanner The scanner.
420 */
421PJ_DECL(void) pj_scan_skip_whitespace( pj_scanner *scanner );
422
423
424/**
425 * Save the full scanner state.
426 *
427 * @param scanner The scanner.
428 * @param state Variable to store scanner's state.
429 */
430PJ_DECL(void) pj_scan_save_state( pj_scanner *scanner, pj_scan_state *state);
431
432
433/**
434 * Restore the full scanner state.
435 * Note that this would not restore the string if application has modified
436 * it. This will only restore the scanner scanning position.
437 *
438 * @param scanner The scanner.
439 * @param state State of the scanner.
440 */
441PJ_DECL(void) pj_scan_restore_state( pj_scanner *scanner,
442 pj_scan_state *state);
443
444/**
445 * @}
446 */
447
448#if PJ_FUNCTIONS_ARE_INLINED
449# include "scanner_i.h"
450#endif
451
452
453PJ_END_DECL
454
455#endif
456