Blame - jni/libpcre/sources/doc/html/pcredemo.html - jami-client-android

blob: cbe03e1159f548c4d5e3d54d7ae759c52417a8f8 [file] [log] [blame]

Tristan Matthews	0461646	2013-11-14 16:09:34 -0500	[diff] [blame]	1	<html>
				2	<head>
				3	<title>pcredemo specification</title>
				4	</head>
				5	<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
				6	<h1>pcredemo man page</h1>
				7	<p>
				8	Return to the <a href="index.html">PCRE index page</a>.
				9	</p>
				10	<p>
				11	This page is part of the PCRE HTML documentation. It was generated automatically
				12	from the original man page. If there is any nonsense in it, please consult the
				13	man page, in case the conversion went wrong.
				14	<br>
				15	<ul>
				16	</ul>
				17	<PRE>
				18	/*************************************************
				19	* PCRE DEMONSTRATION PROGRAM *
				20	*************************************************/
				21
				22	/* This is a demonstration program to illustrate the most straightforward ways
				23	of calling the PCRE regular expression library from a C program. See the
				24	pcresample documentation for a short discussion ("man pcresample" if you have
				25	the PCRE man pages installed).
				26
				27	In Unix-like environments, if PCRE is installed in your standard system
				28	libraries, you should be able to compile this program using this command:
				29
				30	gcc -Wall pcredemo.c -lpcre -o pcredemo
				31
				32	If PCRE is not installed in a standard place, it is likely to be installed with
				33	support for the pkg-config mechanism. If you have pkg-config, you can compile
				34	this program using this command:
				35
				36	gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
				37
				38	If you do not have pkg-config, you may have to use this:
				39
				40	gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
				41	-R/usr/local/lib -lpcre -o pcredemo
				42
				43	Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
				44	library files for PCRE are installed on your system. Only some operating
				45	systems (e.g. Solaris) use the -R option.
				46
				47	Building under Windows:
				48
				49	If you want to statically link this program against a non-dll .a file, you must
				50	define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
				51	pcre_free() exported functions will be declared __declspec(dllimport), with
				52	unwanted results. So in this environment, uncomment the following line. */
				53
				54	/* #define PCRE_STATIC */
				55
				56	#include <stdio.h>
				57	#include <string.h>
				58	#include <pcre.h>
				59
				60	#define OVECCOUNT 30 /* should be a multiple of 3 */
				61
				62
				63	int main(int argc, char **argv)
				64	{
				65	pcre *re;
				66	const char *error;
				67	char *pattern;
				68	char *subject;
				69	unsigned char *name_table;
				70	unsigned int option_bits;
				71	int erroffset;
				72	int find_all;
				73	int crlf_is_newline;
				74	int namecount;
				75	int name_entry_size;
				76	int ovector[OVECCOUNT];
				77	int subject_length;
				78	int rc, i;
				79	int utf8;
				80
				81
				82	/**************************************************************************
				83	* First, sort out the command line. There is only one possible option at *
				84	* the moment, "-g" to request repeated matching to find all occurrences, *
				85	* like Perl's /g option. We set the variable find_all to a non-zero value *
				86	* if the -g option is present. Apart from that, there must be exactly two *
				87	* arguments. *
				88	**************************************************************************/
				89
				90	find_all = 0;
				91	for (i = 1; i < argc; i++)
				92	{
				93	if (strcmp(argv[i], "-g") == 0) find_all = 1;
				94	else break;
				95	}
				96
				97	/* After the options, we require exactly two arguments, which are the pattern,
				98	and the subject string. */
				99
				100	if (argc - i != 2)
				101	{
				102	printf("Two arguments required: a regex and a subject string\n");
				103	return 1;
				104	}
				105
				106	pattern = argv[i];
				107	subject = argv[i+1];
				108	subject_length = (int)strlen(subject);
				109
				110
				111	/*************************************************************************
				112	* Now we are going to compile the regular expression pattern, and handle *
				113	* and errors that are detected. *
				114	*************************************************************************/
				115
				116	re = pcre_compile(
				117	pattern, /* the pattern */
				118	0, /* default options */
				119	&error, /* for error message */
				120	&erroffset, /* for error offset */
				121	NULL); /* use default character tables */
				122
				123	/* Compilation failed: print the error message and exit */
				124
				125	if (re == NULL)
				126	{
				127	printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
				128	return 1;
				129	}
				130
				131
				132	/*************************************************************************
				133	* If the compilation succeeded, we call PCRE again, in order to do a *
				134	* pattern match against the subject string. This does just ONE match. If *
				135	* further matching is needed, it will be done below. *
				136	*************************************************************************/
				137
				138	rc = pcre_exec(
				139	re, /* the compiled pattern */
				140	NULL, /* no extra data - we didn't study the pattern */
				141	subject, /* the subject string */
				142	subject_length, /* the length of the subject */
				143	0, /* start at offset 0 in the subject */
				144	0, /* default options */
				145	ovector, /* output vector for substring information */
				146	OVECCOUNT); /* number of elements in the output vector */
				147
				148	/* Matching failed: handle error cases */
				149
				150	if (rc < 0)
				151	{
				152	switch(rc)
				153	{
				154	case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
				155	/*
				156	Handle other special cases if you like
				157	*/
				158	default: printf("Matching error %d\n", rc); break;
				159	}
				160	pcre_free(re); /* Release memory used for the compiled pattern */
				161	return 1;
				162	}
				163
				164	/* Match succeded */
				165
				166	printf("\nMatch succeeded at offset %d\n", ovector[0]);
				167
				168
				169	/*************************************************************************
				170	* We have found the first match within the subject string. If the output *
				171	* vector wasn't big enough, say so. Then output any substrings that were *
				172	* captured. *
				173	*************************************************************************/
				174
				175	/* The output vector wasn't big enough */
				176
				177	if (rc == 0)
				178	{
				179	rc = OVECCOUNT/3;
				180	printf("ovector only has room for %d captured substrings\n", rc - 1);
				181	}
				182
				183	/* Show substrings stored in the output vector by number. Obviously, in a real
				184	application you might want to do things other than print them. */
				185
				186	for (i = 0; i < rc; i++)
				187	{
				188	char substring_start = subject + ovector[2i];
				189	int substring_length = ovector[2i+1] - ovector[2i];
				190	printf("%2d: %.*s\n", i, substring_length, substring_start);
				191	}
				192
				193
				194	/**************************************************************************
				195	* That concludes the basic part of this demonstration program. We have *
				196	* compiled a pattern, and performed a single match. The code that follows *
				197	* shows first how to access named substrings, and then how to code for *
				198	* repeated matches on the same subject. *
				199	**************************************************************************/
				200
				201	/* See if there are any named substrings, and if so, show them by name. First
				202	we have to extract the count of named parentheses from the pattern. */
				203
				204	(void)pcre_fullinfo(
				205	re, /* the compiled pattern */
				206	NULL, /* no extra data - we didn't study the pattern */
				207	PCRE_INFO_NAMECOUNT, /* number of named substrings */
				208	&namecount); /* where to put the answer */
				209
				210	if (namecount <= 0) printf("No named substrings\n"); else
				211	{
				212	unsigned char *tabptr;
				213	printf("Named substrings\n");
				214
				215	/* Before we can access the substrings, we must extract the table for
				216	translating names to numbers, and the size of each entry in the table. */
				217
				218	(void)pcre_fullinfo(
				219	re, /* the compiled pattern */
				220	NULL, /* no extra data - we didn't study the pattern */
				221	PCRE_INFO_NAMETABLE, /* address of the table */
				222	&name_table); /* where to put the answer */
				223
				224	(void)pcre_fullinfo(
				225	re, /* the compiled pattern */
				226	NULL, /* no extra data - we didn't study the pattern */
				227	PCRE_INFO_NAMEENTRYSIZE, /* size of each entry in the table */
				228	&name_entry_size); /* where to put the answer */
				229
				230	/* Now we can scan the table and, for each entry, print the number, the name,
				231	and the substring itself. */
				232
				233	tabptr = name_table;
				234	for (i = 0; i < namecount; i++)
				235	{
				236	int n = (tabptr[0] << 8) \| tabptr[1];
				237	printf("(%d) %s: %.s\n", n, name_entry_size - 3, tabptr + 2,
				238	ovector[2n+1] - ovector[2n], subject + ovector[2*n]);
				239	tabptr += name_entry_size;
				240	}
				241	}
				242
				243
				244	/*************************************************************************
				245	* If the "-g" option was given on the command line, we want to continue *
				246	* to search for additional matches in the subject string, in a similar *
				247	* way to the /g option in Perl. This turns out to be trickier than you *
				248	* might think because of the possibility of matching an empty string. *
				249	* What happens is as follows: *
				250	* *
				251	* If the previous match was NOT for an empty string, we can just start *
				252	* the next match at the end of the previous one. *
				253	* *
				254	* If the previous match WAS for an empty string, we can't do that, as it *
				255	* would lead to an infinite loop. Instead, a special call of pcre_exec() *
				256	* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set. *
				257	* The first of these tells PCRE that an empty string at the start of the *
				258	* subject is not a valid match; other possibilities must be tried. The *
				259	* second flag restricts PCRE to one match attempt at the initial string *
				260	* position. If this match succeeds, an alternative to the empty string *
				261	* match has been found, and we can print it and proceed round the loop, *
				262	* advancing by the length of whatever was found. If this match does not *
				263	* succeed, we still stay in the loop, advancing by just one character. *
				264	* In UTF-8 mode, which can be set by (UTF8) in the pattern, this may be
				265	* more than one byte. *
				266	* *
				267	* However, there is a complication concerned with newlines. When the *
				268	* newline convention is such that CRLF is a valid newline, we want must *
				269	* advance by two characters rather than one. The newline convention can *
				270	* be set in the regex by (CR), etc.; if not, we must find the default.
				271	*************************************************************************/
				272
				273	if (!find_all) /* Check for -g */
				274	{
				275	pcre_free(re); /* Release the memory used for the compiled pattern */
				276	return 0; /* Finish unless -g was given */
				277	}
				278
				279	/* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
				280	sequence. First, find the options with which the regex was compiled; extract
				281	the UTF-8 state, and mask off all but the newline options. */
				282
				283	(void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
				284	utf8 = option_bits & PCRE_UTF8;
				285	option_bits &= PCRE_NEWLINE_CR\|PCRE_NEWLINE_LF\|PCRE_NEWLINE_CRLF\|
				286	PCRE_NEWLINE_ANY\|PCRE_NEWLINE_ANYCRLF;
				287
				288	/* If no newline options were set, find the default newline convention from the
				289	build configuration. */
				290
				291	if (option_bits == 0)
				292	{
				293	int d;
				294	(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
				295	/* Note that these values are always the ASCII ones, even in
				296	EBCDIC environments. CR = 13, NL = 10. */
				297	option_bits = (d == 13)? PCRE_NEWLINE_CR :
				298	(d == 10)? PCRE_NEWLINE_LF :
				299	(d == (13<<8 \| 10))? PCRE_NEWLINE_CRLF :
				300	(d == -2)? PCRE_NEWLINE_ANYCRLF :
				301	(d == -1)? PCRE_NEWLINE_ANY : 0;
				302	}
				303
				304	/* See if CRLF is a valid newline sequence. */
				305
				306	crlf_is_newline =
				307	option_bits == PCRE_NEWLINE_ANY \|\|
				308	option_bits == PCRE_NEWLINE_CRLF \|\|
				309	option_bits == PCRE_NEWLINE_ANYCRLF;
				310
				311	/* Loop for second and subsequent matches */
				312
				313	for (;;)
				314	{
				315	int options = 0; /* Normally no options */
				316	int start_offset = ovector[1]; /* Start at end of previous match */
				317
				318	/* If the previous match was for an empty string, we are finished if we are
				319	at the end of the subject. Otherwise, arrange to run another match at the
				320	same point to see if a non-empty match can be found. */
				321
				322	if (ovector[0] == ovector[1])
				323	{
				324	if (ovector[0] == subject_length) break;
				325	options = PCRE_NOTEMPTY_ATSTART \| PCRE_ANCHORED;
				326	}
				327
				328	/* Run the next matching operation */
				329
				330	rc = pcre_exec(
				331	re, /* the compiled pattern */
				332	NULL, /* no extra data - we didn't study the pattern */
				333	subject, /* the subject string */
				334	subject_length, /* the length of the subject */
				335	start_offset, /* starting offset in the subject */
				336	options, /* options */
				337	ovector, /* output vector for substring information */
				338	OVECCOUNT); /* number of elements in the output vector */
				339
				340	/* This time, a result of NOMATCH isn't an error. If the value in "options"
				341	is zero, it just means we have found all possible matches, so the loop ends.
				342	Otherwise, it means we have failed to find a non-empty-string match at a
				343	point where there was a previous empty-string match. In this case, we do what
				344	Perl does: advance the matching position by one character, and continue. We
				345	do this by setting the "end of previous match" offset, because that is picked
				346	up at the top of the loop as the point at which to start again.
				347
				348	There are two complications: (a) When CRLF is a valid newline sequence, and
				349	the current position is just before it, advance by an extra byte. (b)
				350	Otherwise we must ensure that we skip an entire UTF-8 character if we are in
				351	UTF-8 mode. */
				352
				353	if (rc == PCRE_ERROR_NOMATCH)
				354	{
				355	if (options == 0) break; /* All matches found */
				356	ovector[1] = start_offset + 1; /* Advance one byte */
				357	if (crlf_is_newline && /* If CRLF is newline & */
				358	start_offset < subject_length - 1 && /* we are at CRLF, */
				359	subject[start_offset] == '\r' &&
				360	subject[start_offset + 1] == '\n')
				361	ovector[1] += 1; /* Advance by one more. */
				362	else if (utf8) /* Otherwise, ensure we */
				363	{ /* advance a whole UTF-8 */
				364	while (ovector[1] < subject_length) /* character. */
				365	{
				366	if ((subject[ovector[1]] & 0xc0) != 0x80) break;
				367	ovector[1] += 1;
				368	}
				369	}
				370	continue; /* Go round the loop again */
				371	}
				372
				373	/* Other matching errors are not recoverable. */
				374
				375	if (rc < 0)
				376	{
				377	printf("Matching error %d\n", rc);
				378	pcre_free(re); /* Release memory used for the compiled pattern */
				379	return 1;
				380	}
				381
				382	/* Match succeded */
				383
				384	printf("\nMatch succeeded again at offset %d\n", ovector[0]);
				385
				386	/* The match succeeded, but the output vector wasn't big enough. */
				387
				388	if (rc == 0)
				389	{
				390	rc = OVECCOUNT/3;
				391	printf("ovector only has room for %d captured substrings\n", rc - 1);
				392	}
				393
				394	/* As before, show substrings stored in the output vector by number, and then
				395	also any named substrings. */
				396
				397	for (i = 0; i < rc; i++)
				398	{
				399	char substring_start = subject + ovector[2i];
				400	int substring_length = ovector[2i+1] - ovector[2i];
				401	printf("%2d: %.*s\n", i, substring_length, substring_start);
				402	}
				403
				404	if (namecount <= 0) printf("No named substrings\n"); else
				405	{
				406	unsigned char *tabptr = name_table;
				407	printf("Named substrings\n");
				408	for (i = 0; i < namecount; i++)
				409	{
				410	int n = (tabptr[0] << 8) \| tabptr[1];
				411	printf("(%d) %s: %.s\n", n, name_entry_size - 3, tabptr + 2,
				412	ovector[2n+1] - ovector[2n], subject + ovector[2*n]);
				413	tabptr += name_entry_size;
				414	}
				415	}
				416	} /* End of loop to find second and subsequent matches */
				417
				418	printf("\n");
				419	pcre_free(re); /* Release memory used for the compiled pattern */
				420	return 0;
				421	}
				422
				423	/* End of pcredemo.c */
				424	<p>
				425	Return to the <a href="index.html">PCRE index page</a>.
				426	</p>