blob: 5a82ff91946b83a1de6a4e16afb707138e5809f7 [file] [log] [blame]
Benny Prijonodd859a62005-11-01 16:42:51 +00001/* $Header: /pjproject-0.3/pjlib/include/pj/scanner.h 10 10/14/05 12:26a Bennylp $ */
2
3#ifndef __PJ_PARSER_H__
4#define __PJ_PARSER_H__
5
6/**
7 * @file scanner.h
8 * @brief Text Scanning.
9 */
10
11#include <pj/types.h>
12
13PJ_BEGIN_DECL
14
15/**
16 * @defgroup PJ_SCAN Text Scanning
17 * @ingroup PJ_MISC
18 * @brief
19 * Text scanning utility.
20 */
21
22/**
23 * @defgroup PJ_CHARSPEC Character Filter Specification
24 * @ingroup PJ_SCAN
25 * @brief
26 * The type pj_char_spec is a specification of character set used in
27 * scanner. Application can define multiple character specs, such as to
28 * scan alpha numerics, numbers, tokens, etc.
29 * @{
30 */
31
32/**
33 * This describes the type of individual character specification in
34 * #pj_char_spec.
35 */
36typedef pj_uint8_t pj_char_spec_element_t;
37
38/**
39 * The character specification is implemented as array of boolean flags. Each
40 * flag indicates the membership of the character in the spec. If the flag
41 * at one position is non-zero, then the character at that position belongs
42 * to the specification, and vice versa.
43 */
44typedef pj_char_spec_element_t pj_char_spec[256];
45// Note: it's got to be 256 (not 128) to cater for extended character in input.
46
47/**
48 * Initialize character spec.
49 * @param cs the scanner character specification.
50 */
51PJ_DECL(void) pj_cs_init( pj_char_spec cs);
52
53/**
54 * Set the membership of the specified character to TRUE.
55 * @param cs the scanner character specification.
56 * @param c the character.
57 */
58PJ_DECL(void) pj_cs_set( pj_char_spec cs, int c);
59
60/**
61 * Add the characters in the specified range '[cstart, cend)' to the
62 * specification (the last character itself ('cend') is not added).
63 * @param cs the scanner character specification.
64 * @param cstart the first character in the range.
65 * @param cend the next character after the last character in the range.
66 */
67PJ_DECL(void) pj_cs_add_range( pj_char_spec cs, int cstart, int cend);
68
69/**
70 * Add alphabetic characters to the specification.
71 * @param cs the scanner character specification.
72 */
73PJ_DECL(void) pj_cs_add_alpha( pj_char_spec cs);
74
75/**
76 * Add numeric characters to the specification.
77 * @param cs the scanner character specification.
78 */
79PJ_DECL(void) pj_cs_add_num( pj_char_spec cs);
80
81/**
82 * Add the characters in the string to the specification.
83 * @param cs the scanner character specification.
84 * @param str the string.
85 */
86PJ_DECL(void) pj_cs_add_str( pj_char_spec cs, const char *str);
87
88/**
89 * Delete characters in the specified range from the specification.
90 * @param cs the scanner character specification.
91 * @param cstart the first character in the range.
92 * @param cend the next character after the last character in the range.
93 */
94PJ_DECL(void) pj_cs_del_range( pj_char_spec cs, int cstart, int cend);
95
96/**
97 * Delete characters in the specified string from the specification.
98 * @param cs the scanner character specification.
99 * @param str the string.
100 */
101PJ_DECL(void) pj_cs_del_str( pj_char_spec cs, const char *str);
102
103/**
104 * Invert specification.
105 * @param cs the scanner character specification.
106 */
107PJ_DECL(void) pj_cs_invert( pj_char_spec cs );
108
109/**
110 * Check whether the specified character belongs to the specification.
111 * @param cs the scanner character specification.
112 * @param c the character to check for matching.
113 */
114PJ_INLINE(int) pj_cs_match( const pj_char_spec cs, int c )
115{
116 return cs[c];
117}
118
119/**
120 * @}
121 */
122
123/**
124 * @defgroup PJ_SCANNER Text Scanner
125 * @ingroup PJ_SCAN
126 * @{
127 */
128
129/**
130 * Flags for scanner.
131 */
132enum
133{
134 /** This flags specifies that the scanner should automatically skip
135 whitespaces
136 */
137 PJ_SCAN_AUTOSKIP_WS = 1,
138
139 /** This flags specifies that the scanner should automatically skip
140 SIP header continuation. This flag implies PJ_SCAN_AUTOSKIP_WS.
141 */
142 PJ_SCAN_AUTOSKIP_WS_HEADER = 3,
143
144 /** Auto-skip new lines.
145 */
146 PJ_SCAN_AUTOSKIP_NEWLINE = 4,
147};
148
149
150/* Forward decl. */
151struct pj_scanner;
152
153
154/**
155 * The callback function type to be called by the scanner when it encounters
156 * syntax error.
157 * @param scanner The scanner instance that calls the callback .
158 */
159typedef void (*pj_syn_err_func_ptr)(struct pj_scanner *scanner);
160
161
162/**
163 * The text scanner structure.
164 */
165typedef struct pj_scanner
166{
167 char *begin; /**< Start of input buffer. */
168 char *end; /**< End of input buffer. */
169 char *curptr; /**< Current pointer. */
170 int line; /**< Current line. */
171 int col; /**< Current column. */
172 int skip_ws; /**< Skip whitespace flag. */
173 pj_syn_err_func_ptr callback; /**< Syntax error callback. */
174} pj_scanner;
175
176
177/**
178 * This structure can be used by application to store the state of the parser,
179 * so that the scanner state can be rollback to this state when necessary.
180 */
181typedef struct pj_scan_state
182{
183 char *curptr; /**< Current scanner's pointer. */
184 int line; /**< Current line. */
185 int col; /**< Current column. */
186} pj_scan_state;
187
188
189/**
190 * Initialize the scanner. Note that the input string buffer must have
191 * length at least buflen+1 because the scanner will NULL terminate the
192 * string during initialization.
193 *
194 * @param scanner The scanner to be initialized.
195 * @param bufstart The input buffer to scan. Note that buffer[buflen] will be
196 * filled with NULL char until scanner is destroyed, so
197 * the actual buffer length must be at least buflen+1.
198 * @param buflen The length of the input buffer, which normally is
199 * strlen(bufstart).
200 * @param options Zero, or combination of PJ_SCAN_AUTOSKIP_WS or
201 * PJ_SCAN_AUTOSKIP_WS_HEADER
202 * @param callback Callback to be called when the scanner encounters syntax
203 * error condition.
204 */
205PJ_DECL(void) pj_scan_init( pj_scanner *scanner, char *bufstart, int buflen,
206 unsigned options,
207 pj_syn_err_func_ptr callback );
208
209
210/**
211 * Call this function when application has finished using the scanner.
212 *
213 * @param scanner The scanner.
214 */
215PJ_DECL(void) pj_scan_fini( pj_scanner *scanner );
216
217
218/**
219 * Determine whether the EOF condition for the scanner has been met.
220 *
221 * @param scanner The scanner.
222 *
223 * @return Non-zero if scanner is EOF.
224 */
225PJ_INLINE(int) pj_scan_is_eof( const pj_scanner *scanner)
226{
227 return scanner->curptr >= scanner->end;
228}
229
230
231/**
232 * Peek strings in current position according to parameter spec, and return
233 * the strings in parameter out. The current scanner position will not be
234 * moved. If the scanner is already in EOF state, syntax error callback will
235 * be called thrown.
236 *
237 * @param scanner The scanner.
238 * @param spec The spec to match input string.
239 * @param out String to store the result.
240 *
241 * @return the character right after the peek-ed position or zero if there's
242 * no more characters.
243 */
244PJ_DECL(int) pj_scan_peek( pj_scanner *scanner,
245 const pj_char_spec spec, pj_str_t *out);
246
247
248/**
249 * Peek len characters in current position, and return them in out parameter.
250 * Note that whitespaces or newlines will be returned as it is, regardless
251 * of PJ_SCAN_AUTOSKIP_WS settings. If the character left is less than len,
252 * syntax error callback will be called.
253 *
254 * @param scanner The scanner.
255 * @param len Length to peek.
256 * @param out String to store the result.
257 *
258 * @return the character right after the peek-ed position or zero if there's
259 * no more characters.
260 */
261PJ_DECL(int) pj_scan_peek_n( pj_scanner *scanner,
262 pj_size_t len, pj_str_t *out);
263
264
265/**
266 * Peek strings in current position until spec is matched, and return
267 * the strings in parameter out. The current scanner position will not be
268 * moved. If the scanner is already in EOF state, syntax error callback will
269 * be called.
270 *
271 * @param scanner The scanner.
272 * @param spec The peeking will stop when the input match this spec.
273 * @param out String to store the result.
274 *
275 * @return the character right after the peek-ed position.
276 */
277PJ_DECL(int) pj_scan_peek_until( pj_scanner *scanner,
278 const pj_char_spec spec,
279 pj_str_t *out);
280
281
282/**
283 * Get characters from the buffer according to the spec, and return them
284 * in out parameter. The scanner will attempt to get as many characters as
285 * possible as long as the spec matches. If the first character doesn't
286 * match the spec, or scanner is already in EOF when this function is called,
287 * an exception will be thrown.
288 *
289 * @param scanner The scanner.
290 * @param spec The spec to match input string.
291 * @param out String to store the result.
292 */
293PJ_DECL(void) pj_scan_get( pj_scanner *scanner,
294 const pj_char_spec spec, pj_str_t *out);
295
296
297/**
298 * Get characters between quotes. If current input doesn't match begin_quote,
299 * syntax error will be thrown.
300 *
301 * @param scanner The scanner.
302 * @param begin_quote The character to begin the quote.
303 * @param end_quote The character to end the quote.
304 * @param out String to store the result.
305 */
306PJ_DECL(void) pj_scan_get_quote( pj_scanner *scanner,
307 int begin_quote, int end_quote,
308 pj_str_t *out);
309
310/**
311 * Get N characters from the scanner.
312 *
313 * @param scanner The scanner.
314 * @param N Number of characters to get.
315 * @param out String to store the result.
316 */
317PJ_DECL(void) pj_scan_get_n( pj_scanner *scanner,
318 unsigned N, pj_str_t *out);
319
320
321/**
322 * Get one character from the scanner.
323 *
324 * @param scanner The scanner.
325 *
326 * @return (unknown)
327 */
328PJ_DECL(int) pj_scan_get_char( pj_scanner *scanner );
329
330
331/**
332 * Get a newline from the scanner. A newline is defined as '\\n', or '\\r', or
333 * "\\r\\n". If current input is not newline, syntax error will be thrown.
334 *
335 * @param scanner The scanner.
336 */
337PJ_DECL(void) pj_scan_get_newline( pj_scanner *scanner );
338
339
340/**
341 * Get characters from the scanner and move the scanner position until the
342 * current character matches the spec.
343 *
344 * @param scanner The scanner.
345 * @param spec Get until the input match this spec.
346 * @param out String to store the result.
347 */
348PJ_DECL(void) pj_scan_get_until( pj_scanner *scanner,
349 const pj_char_spec spec, pj_str_t *out);
350
351
352/**
353 * Get characters from the scanner and move the scanner position until the
354 * current character matches until_char.
355 *
356 * @param scanner The scanner.
357 * @param until_char Get until the input match this character.
358 * @param out String to store the result.
359 */
360PJ_DECL(void) pj_scan_get_until_ch( pj_scanner *scanner,
361 int until_char, pj_str_t *out);
362
363
364/**
365 * Get characters from the scanner and move the scanner position until the
366 * current character matches until_char.
367 *
368 * @param scanner The scanner.
369 * @param until_spec Get until the input match any of these characters.
370 * @param out String to store the result.
371 */
372PJ_DECL(void) pj_scan_get_until_chr( pj_scanner *scanner,
373 const char *until_spec, pj_str_t *out);
374
375/**
376 * Advance the scanner N characters, and skip whitespace
377 * if necessary.
378 *
379 * @param scanner The scanner.
380 * @param N Number of characters to skip.
381 * @param skip Flag to specify whether whitespace should be skipped
382 * after skipping the characters.
383 */
384PJ_DECL(void) pj_scan_advance_n( pj_scanner *scanner,
385 unsigned N, pj_bool_t skip);
386
387
388/**
389 * Compare string in current position with the specified string.
390 *
391 * @param scanner The scanner.
392 * @param s The string to compare with.
393 * @param len Length of the string to compare.
394 *
395 * @return zero, <0, or >0 (just like strcmp()).
396 */
397PJ_DECL(int) pj_scan_strcmp( pj_scanner *scanner, const char *s, int len);
398
399
400/**
401 * Case-less string comparison of current position with the specified
402 * string.
403 *
404 * @param scanner The scanner.
405 * @param s The string to compare with.
406 * @param len Length of the string to compare with.
407 *
408 * @return zero, <0, or >0 (just like strcmp()).
409 */
410PJ_DECL(int) pj_scan_stricmp( pj_scanner *scanner, const char *s, int len);
411
412
413/**
414 * Manually skip whitespaces according to flag that was specified when
415 * the scanner was initialized.
416 *
417 * @param scanner The scanner.
418 */
419PJ_DECL(void) pj_scan_skip_whitespace( pj_scanner *scanner );
420
421
422/**
423 * Save the full scanner state.
424 *
425 * @param scanner The scanner.
426 * @param state Variable to store scanner's state.
427 */
428PJ_DECL(void) pj_scan_save_state( pj_scanner *scanner, pj_scan_state *state);
429
430
431/**
432 * Restore the full scanner state.
433 * Note that this would not restore the string if application has modified
434 * it. This will only restore the scanner scanning position.
435 *
436 * @param scanner The scanner.
437 * @param state State of the scanner.
438 */
439PJ_DECL(void) pj_scan_restore_state( pj_scanner *scanner,
440 pj_scan_state *state);
441
442/**
443 * @}
444 */
445
446#if PJ_FUNCTIONS_ARE_INLINED
447# include "scanner_i.h"
448#endif
449
450
451PJ_END_DECL
452
453#endif
454