Blame - jni/libpcre/pcre_dfa_exec.c - jami-client-android

blob: 1ffbe3be842076e742a463ecb770a7acfafa128b [file] [log] [blame]

Tristan Matthews	0461646	2013-11-14 16:09:34 -0500	[diff] [blame^]	1	/*************************************************
				2	* Perl-Compatible Regular Expressions *
				3	*************************************************/
				4
				5	/* PCRE is a library of functions to support regular expressions whose syntax
				6	and semantics are as close as possible to those of the Perl 5 language (but see
				7	below for why this module is different).
				8
				9	Written by Philip Hazel
				10	Copyright (c) 1997-2011 University of Cambridge
				11
				12	-----------------------------------------------------------------------------
				13	Redistribution and use in source and binary forms, with or without
				14	modification, are permitted provided that the following conditions are met:
				15
				16	* Redistributions of source code must retain the above copyright notice,
				17	this list of conditions and the following disclaimer.
				18
				19	* Redistributions in binary form must reproduce the above copyright
				20	notice, this list of conditions and the following disclaimer in the
				21	documentation and/or other materials provided with the distribution.
				22
				23	* Neither the name of the University of Cambridge nor the names of its
				24	contributors may be used to endorse or promote products derived from
				25	this software without specific prior written permission.
				26
				27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				37	POSSIBILITY OF SUCH DAMAGE.
				38	-----------------------------------------------------------------------------
				39	*/
				40
				41
				42	/* This module contains the external function pcre_dfa_exec(), which is an
				43	alternative matching function that uses a sort of DFA algorithm (not a true
				44	FSM). This is NOT Perl- compatible, but it has advantages in certain
				45	applications. */
				46
				47
				48	/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
				49	the performance of his patterns greatly. I could not use it as it stood, as it
				50	was not thread safe, and made assumptions about pattern sizes. Also, it caused
				51	test 7 to loop, and test 9 to crash with a segfault.
				52
				53	The issue is the check for duplicate states, which is done by a simple linear
				54	search up the state list. (Grep for "duplicate" below to find the code.) For
				55	many patterns, there will never be many states active at one time, so a simple
				56	linear search is fine. In patterns that have many active states, it might be a
				57	bottleneck. The suggested code used an indexing scheme to remember which states
				58	had previously been used for each character, and avoided the linear search when
				59	it knew there was no chance of a duplicate. This was implemented when adding
				60	states to the state lists.
				61
				62	I wrote some thread-safe, not-limited code to try something similar at the time
				63	of checking for duplicates (instead of when adding states), using index vectors
				64	on the stack. It did give a 13% improvement with one specially constructed
				65	pattern for certain subject strings, but on other strings and on many of the
				66	simpler patterns in the test suite it did worse. The major problem, I think,
				67	was the extra time to initialize the index. This had to be done for each call
				68	of internal_dfa_exec(). (The supplied patch used a static vector, initialized
				69	only once - I suspect this was the cause of the problems with the tests.)
				70
				71	Overall, I concluded that the gains in some cases did not outweigh the losses
				72	in others, so I abandoned this code. */
				73
				74
				75
				76	#ifdef HAVE_CONFIG_H
				77	#include "config.h"
				78	#endif
				79
				80	#define NLBLOCK md /* Block containing newline information */
				81	#define PSSTART start_subject /* Field containing processed string start */
				82	#define PSEND end_subject /* Field containing processed string end */
				83
				84	#include "pcre_internal.h"
				85
				86
				87	/* For use to indent debugging output */
				88
				89	#define SP " "
				90
				91
				92	/*************************************************
				93	* Code parameters and static tables *
				94	*************************************************/
				95
				96	/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
				97	into others, under special conditions. A gap of 20 between the blocks should be
				98	enough. The resulting opcodes don't have to be less than 256 because they are
				99	never stored, so we push them well clear of the normal opcodes. */
				100
				101	#define OP_PROP_EXTRA 300
				102	#define OP_EXTUNI_EXTRA 320
				103	#define OP_ANYNL_EXTRA 340
				104	#define OP_HSPACE_EXTRA 360
				105	#define OP_VSPACE_EXTRA 380
				106
				107
				108	/* This table identifies those opcodes that are followed immediately by a
				109	character that is to be tested in some way. This makes it possible to
				110	centralize the loading of these characters. In the case of Type * etc, the
				111	"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
				112	small value. Non-zero values in the table are the offsets from the opcode where
				113	the character is to be found. *NOTE* If the start of this table is
				114	modified, the three tables that follow must also be modified. */
				115
				116	static const uschar coptable[] = {
				117	0, /* End */
				118	0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
				119	0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
				120	0, 0, 0, /* Any, AllAny, Anybyte */
				121	0, 0, /* \P, \p */
				122	0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
				123	0, /* \X */
				124	0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
				125	1, /* Char */
				126	1, /* Chari */
				127	1, /* not */
				128	1, /* noti */
				129	/* Positive single-char repeats */
				130	1, 1, 1, 1, 1, 1, /* , ?, +, +?, ?, ?? */
				131	3, 3, 3, /* upto, minupto, exact */
				132	1, 1, 1, 3, /* +, ++, ?+, upto+ /
				133	1, 1, 1, 1, 1, 1, /* I, ?I, +I, +?I, ?I, ??I */
				134	3, 3, 3, /* upto I, minupto I, exact I */
				135	1, 1, 1, 3, /* +I, ++I, ?+I, upto+I /
				136	/* Negative single-char repeats - only for chars < 256 */
				137	1, 1, 1, 1, 1, 1, /* NOT , ?, +, +?, ?, ?? */
				138	3, 3, 3, /* NOT upto, minupto, exact */
				139	1, 1, 1, 3, /* NOT +, ++, ?+, upto+ /
				140	1, 1, 1, 1, 1, 1, /* NOT I, ?I, +I, +?I, ?I, ??I */
				141	3, 3, 3, /* NOT upto I, minupto I, exact I */
				142	1, 1, 1, 3, /* NOT +I, ++I, ?+I, upto+I /
				143	/* Positive type repeats */
				144	1, 1, 1, 1, 1, 1, /* Type , ?, +, +?, ?, ?? */
				145	3, 3, 3, /* Type upto, minupto, exact */
				146	1, 1, 1, 3, /* Type +, ++, ?+, upto+ /
				147	/* Character class & ref repeats */
				148	0, 0, 0, 0, 0, 0, /* , ?, +, +?, ?, ?? */
				149	0, 0, /* CRRANGE, CRMINRANGE */
				150	0, /* CLASS */
				151	0, /* NCLASS */
				152	0, /* XCLASS - variable length */
				153	0, /* REF */
				154	0, /* REFI */
				155	0, /* RECURSE */
				156	0, /* CALLOUT */
				157	0, /* Alt */
				158	0, /* Ket */
				159	0, /* KetRmax */
				160	0, /* KetRmin */
				161	0, /* KetRpos */
				162	0, /* Reverse */
				163	0, /* Assert */
				164	0, /* Assert not */
				165	0, /* Assert behind */
				166	0, /* Assert behind not */
				167	0, 0, /* ONCE, ONCE_NC */
				168	0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
				169	0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
				170	0, 0, /* CREF, NCREF */
				171	0, 0, /* RREF, NRREF */
				172	0, /* DEF */
				173	0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
				174	0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
				175	0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
				176	0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
				177	0, 0 /* CLOSE, SKIPZERO */
				178	};
				179
				180	/* This table identifies those opcodes that inspect a character. It is used to
				181	remember the fact that a character could have been inspected when the end of
				182	the subject is reached. *NOTE* If the start of this table is modified, the
				183	two tables that follow must also be modified. */
				184
				185	static const uschar poptable[] = {
				186	0, /* End */
				187	0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
				188	1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
				189	1, 1, 1, /* Any, AllAny, Anybyte */
				190	1, 1, /* \P, \p */
				191	1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
				192	1, /* \X */
				193	0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
				194	1, /* Char */
				195	1, /* Chari */
				196	1, /* not */
				197	1, /* noti */
				198	/* Positive single-char repeats */
				199	1, 1, 1, 1, 1, 1, /* , ?, +, +?, ?, ?? */
				200	1, 1, 1, /* upto, minupto, exact */
				201	1, 1, 1, 1, /* +, ++, ?+, upto+ /
				202	1, 1, 1, 1, 1, 1, /* I, ?I, +I, +?I, ?I, ??I */
				203	1, 1, 1, /* upto I, minupto I, exact I */
				204	1, 1, 1, 1, /* +I, ++I, ?+I, upto+I /
				205	/* Negative single-char repeats - only for chars < 256 */
				206	1, 1, 1, 1, 1, 1, /* NOT , ?, +, +?, ?, ?? */
				207	1, 1, 1, /* NOT upto, minupto, exact */
				208	1, 1, 1, 1, /* NOT +, ++, ?+, upto+ /
				209	1, 1, 1, 1, 1, 1, /* NOT I, ?I, +I, +?I, ?I, ??I */
				210	1, 1, 1, /* NOT upto I, minupto I, exact I */
				211	1, 1, 1, 1, /* NOT +I, ++I, ?+I, upto+I /
				212	/* Positive type repeats */
				213	1, 1, 1, 1, 1, 1, /* Type , ?, +, +?, ?, ?? */
				214	1, 1, 1, /* Type upto, minupto, exact */
				215	1, 1, 1, 1, /* Type +, ++, ?+, upto+ /
				216	/* Character class & ref repeats */
				217	1, 1, 1, 1, 1, 1, /* , ?, +, +?, ?, ?? */
				218	1, 1, /* CRRANGE, CRMINRANGE */
				219	1, /* CLASS */
				220	1, /* NCLASS */
				221	1, /* XCLASS - variable length */
				222	0, /* REF */
				223	0, /* REFI */
				224	0, /* RECURSE */
				225	0, /* CALLOUT */
				226	0, /* Alt */
				227	0, /* Ket */
				228	0, /* KetRmax */
				229	0, /* KetRmin */
				230	0, /* KetRpos */
				231	0, /* Reverse */
				232	0, /* Assert */
				233	0, /* Assert not */
				234	0, /* Assert behind */
				235	0, /* Assert behind not */
				236	0, 0, /* ONCE, ONCE_NC */
				237	0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
				238	0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
				239	0, 0, /* CREF, NCREF */
				240	0, 0, /* RREF, NRREF */
				241	0, /* DEF */
				242	0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
				243	0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
				244	0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
				245	0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
				246	0, 0 /* CLOSE, SKIPZERO */
				247	};
				248
				249	/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
				250	and \w */
				251
				252	static const uschar toptable1[] = {
				253	0, 0, 0, 0, 0, 0,
				254	ctype_digit, ctype_digit,
				255	ctype_space, ctype_space,
				256	ctype_word, ctype_word,
				257	0, 0 /* OP_ANY, OP_ALLANY */
				258	};
				259
				260	static const uschar toptable2[] = {
				261	0, 0, 0, 0, 0, 0,
				262	ctype_digit, 0,
				263	ctype_space, 0,
				264	ctype_word, 0,
				265	1, 1 /* OP_ANY, OP_ALLANY */
				266	};
				267
				268
				269	/* Structure for holding data about a particular state, which is in effect the
				270	current data for an active path through the match tree. It must consist
				271	entirely of ints because the working vector we are passed, and which we put
				272	these structures in, is a vector of ints. */
				273
				274	typedef struct stateblock {
				275	int offset; /* Offset to opcode */
				276	int count; /* Count for repeats */
				277	int data; /* Some use extra data */
				278	} stateblock;
				279
				280	#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
				281
				282
				283	#ifdef PCRE_DEBUG
				284	/*************************************************
				285	* Print character string *
				286	*************************************************/
				287
				288	/* Character string printing function for debugging.
				289
				290	Arguments:
				291	p points to string
				292	length number of bytes
				293	f where to print
				294
				295	Returns: nothing
				296	*/
				297
				298	static void
				299	pchars(unsigned char p, int length, FILE f)
				300	{
				301	int c;
				302	while (length-- > 0)
				303	{
				304	if (isprint(c = *(p++)))
				305	fprintf(f, "%c", c);
				306	else
				307	fprintf(f, "\\x%02x", c);
				308	}
				309	}
				310	#endif
				311
				312
				313
				314	/*************************************************
				315	* Execute a Regular Expression - DFA engine *
				316	*************************************************/
				317
				318	/* This internal function applies a compiled pattern to a subject string,
				319	starting at a given point, using a DFA engine. This function is called from the
				320	external one, possibly multiple times if the pattern is not anchored. The
				321	function calls itself recursively for some kinds of subpattern.
				322
				323	Arguments:
				324	md the match_data block with fixed information
				325	this_start_code the opening bracket of this subexpression's code
				326	current_subject where we currently are in the subject string
				327	start_offset start offset in the subject string
				328	offsets vector to contain the matching string offsets
				329	offsetcount size of same
				330	workspace vector of workspace
				331	wscount size of same
				332	rlevel function call recursion level
				333
				334	Returns: > 0 => number of match offset pairs placed in offsets
				335	= 0 => offsets overflowed; longest matches are present
				336	-1 => failed to match
				337	< -1 => some kind of unexpected problem
				338
				339	The following macros are used for adding states to the two state vectors (one
				340	for the current character, one for the following character). */
				341
				342	#define ADD_ACTIVE(x,y) \
				343	if (active_count++ < wscount) \
				344	{ \
				345	next_active_state->offset = (x); \
				346	next_active_state->count = (y); \
				347	next_active_state++; \
				348	DPRINTF(("%.sADD_ACTIVE(%d,%d)\n", rlevel2-2, SP, (x), (y))); \
				349	} \
				350	else return PCRE_ERROR_DFA_WSSIZE
				351
				352	#define ADD_ACTIVE_DATA(x,y,z) \
				353	if (active_count++ < wscount) \
				354	{ \
				355	next_active_state->offset = (x); \
				356	next_active_state->count = (y); \
				357	next_active_state->data = (z); \
				358	next_active_state++; \
				359	DPRINTF(("%.sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel2-2, SP, (x), (y), (z))); \
				360	} \
				361	else return PCRE_ERROR_DFA_WSSIZE
				362
				363	#define ADD_NEW(x,y) \
				364	if (new_count++ < wscount) \
				365	{ \
				366	next_new_state->offset = (x); \
				367	next_new_state->count = (y); \
				368	next_new_state++; \
				369	DPRINTF(("%.sADD_NEW(%d,%d)\n", rlevel2-2, SP, (x), (y))); \
				370	} \
				371	else return PCRE_ERROR_DFA_WSSIZE
				372
				373	#define ADD_NEW_DATA(x,y,z) \
				374	if (new_count++ < wscount) \
				375	{ \
				376	next_new_state->offset = (x); \
				377	next_new_state->count = (y); \
				378	next_new_state->data = (z); \
				379	next_new_state++; \
				380	DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d)\n", rlevel2-2, SP, (x), (y), (z))); \
				381	} \
				382	else return PCRE_ERROR_DFA_WSSIZE
				383
				384	/* And now, here is the code */
				385
				386	static int
				387	internal_dfa_exec(
				388	dfa_match_data *md,
				389	const uschar *this_start_code,
				390	const uschar *current_subject,
				391	int start_offset,
				392	int *offsets,
				393	int offsetcount,
				394	int *workspace,
				395	int wscount,
				396	int rlevel)
				397	{
				398	stateblock active_states, new_states, *temp_states;
				399	stateblock next_active_state, next_new_state;
				400
				401	const uschar ctypes, lcc, *fcc;
				402	const uschar *ptr;
				403	const uschar end_code, first_op;
				404
				405	dfa_recursion_info new_recursive;
				406
				407	int active_count, new_count, match_count;
				408
				409	/* Some fields in the md block are frequently referenced, so we load them into
				410	independent variables in the hope that this will perform better. */
				411
				412	const uschar *start_subject = md->start_subject;
				413	const uschar *end_subject = md->end_subject;
				414	const uschar *start_code = md->start_code;
				415
				416	#ifdef SUPPORT_UTF8
				417	BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
				418	#else
				419	BOOL utf8 = FALSE;
				420	#endif
				421
				422	rlevel++;
				423	offsetcount &= (-2);
				424
				425	wscount -= 2;
				426	wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
				427	(2 * INTS_PER_STATEBLOCK);
				428
				429	DPRINTF(("\n%.*s---------------------\n"
				430	"%.*sCall to internal_dfa_exec f=%d\n",
				431	rlevel2-2, SP, rlevel2-2, SP, rlevel));
				432
				433	ctypes = md->tables + ctypes_offset;
				434	lcc = md->tables + lcc_offset;
				435	fcc = md->tables + fcc_offset;
				436
				437	match_count = PCRE_ERROR_NOMATCH; /* A negative number */
				438
				439	active_states = (stateblock *)(workspace + 2);
				440	next_new_state = new_states = active_states + wscount;
				441	new_count = 0;
				442
				443	first_op = this_start_code + 1 + LINK_SIZE +
				444	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
				445	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)? 2:0);
				446
				447	/* The first thing in any (sub) pattern is a bracket of some sort. Push all
				448	the alternative states onto the list, and find out where the end is. This
				449	makes is possible to use this function recursively, when we want to stop at a
				450	matching internal ket rather than at the end.
				451
				452	If the first opcode in the first alternative is OP_REVERSE, we are dealing with
				453	a backward assertion. In that case, we have to find out the maximum amount to
				454	move back, and set up each alternative appropriately. */
				455
				456	if (*first_op == OP_REVERSE)
				457	{
				458	int max_back = 0;
				459	int gone_back;
				460
				461	end_code = this_start_code;
				462	do
				463	{
				464	int back = GET(end_code, 2+LINK_SIZE);
				465	if (back > max_back) max_back = back;
				466	end_code += GET(end_code, 1);
				467	}
				468	while (*end_code == OP_ALT);
				469
				470	/* If we can't go back the amount required for the longest lookbehind
				471	pattern, go back as far as we can; some alternatives may still be viable. */
				472
				473	#ifdef SUPPORT_UTF8
				474	/* In character mode we have to step back character by character */
				475
				476	if (utf8)
				477	{
				478	for (gone_back = 0; gone_back < max_back; gone_back++)
				479	{
				480	if (current_subject <= start_subject) break;
				481	current_subject--;
				482	while (current_subject > start_subject &&
				483	(*current_subject & 0xc0) == 0x80)
				484	current_subject--;
				485	}
				486	}
				487	else
				488	#endif
				489
				490	/* In byte-mode we can do this quickly. */
				491
				492	{
				493	gone_back = (current_subject - max_back < start_subject)?
				494	(int)(current_subject - start_subject) : max_back;
				495	current_subject -= gone_back;
				496	}
				497
				498	/* Save the earliest consulted character */
				499
				500	if (current_subject < md->start_used_ptr)
				501	md->start_used_ptr = current_subject;
				502
				503	/* Now we can process the individual branches. */
				504
				505	end_code = this_start_code;
				506	do
				507	{
				508	int back = GET(end_code, 2+LINK_SIZE);
				509	if (back <= gone_back)
				510	{
				511	int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
				512	ADD_NEW_DATA(-bstate, 0, gone_back - back);
				513	}
				514	end_code += GET(end_code, 1);
				515	}
				516	while (*end_code == OP_ALT);
				517	}
				518
				519	/* This is the code for a "normal" subpattern (not a backward assertion). The
				520	start of a whole pattern is always one of these. If we are at the top level,
				521	we may be asked to restart matching from the same point that we reached for a
				522	previous partial match. We still have to scan through the top-level branches to
				523	find the end state. */
				524
				525	else
				526	{
				527	end_code = this_start_code;
				528
				529	/* Restarting */
				530
				531	if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
				532	{
				533	do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
				534	new_count = workspace[1];
				535	if (!workspace[0])
				536	memcpy(new_states, active_states, new_count * sizeof(stateblock));
				537	}
				538
				539	/* Not restarting */
				540
				541	else
				542	{
				543	int length = 1 + LINK_SIZE +
				544	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
				545	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)?
				546	2:0);
				547	do
				548	{
				549	ADD_NEW((int)(end_code - start_code + length), 0);
				550	end_code += GET(end_code, 1);
				551	length = 1 + LINK_SIZE;
				552	}
				553	while (*end_code == OP_ALT);
				554	}
				555	}
				556
				557	workspace[0] = 0; /* Bit indicating which vector is current */
				558
				559	DPRINTF(("%.sEnd state = %d\n", rlevel2-2, SP, end_code - start_code));
				560
				561	/* Loop for scanning the subject */
				562
				563	ptr = current_subject;
				564	for (;;)
				565	{
				566	int i, j;
				567	int clen, dlen;
				568	unsigned int c, d;
				569	int forced_fail = 0;
				570	BOOL could_continue = FALSE;
				571
				572	/* Make the new state list into the active state list and empty the
				573	new state list. */
				574
				575	temp_states = active_states;
				576	active_states = new_states;
				577	new_states = temp_states;
				578	active_count = new_count;
				579	new_count = 0;
				580
				581	workspace[0] ^= 1; /* Remember for the restarting feature */
				582	workspace[1] = active_count;
				583
				584	#ifdef PCRE_DEBUG
				585	printf("%.sNext character: rest of subject = \"", rlevel2-2, SP);
				586	pchars((uschar )ptr, strlen((char )ptr), stdout);
				587	printf("\"\n");
				588
				589	printf("%.sActive states: ", rlevel2-2, SP);
				590	for (i = 0; i < active_count; i++)
				591	printf("%d/%d ", active_states[i].offset, active_states[i].count);
				592	printf("\n");
				593	#endif
				594
				595	/* Set the pointers for adding new states */
				596
				597	next_active_state = active_states + active_count;
				598	next_new_state = new_states;
				599
				600	/* Load the current character from the subject outside the loop, as many
				601	different states may want to look at it, and we assume that at least one
				602	will. */
				603
				604	if (ptr < end_subject)
				605	{
				606	clen = 1; /* Number of bytes in the character */
				607	#ifdef SUPPORT_UTF8
				608	if (utf8) { GETCHARLEN(c, ptr, clen); } else
				609	#endif /* SUPPORT_UTF8 */
				610	c = *ptr;
				611	}
				612	else
				613	{
				614	clen = 0; /* This indicates the end of the subject */
				615	c = NOTACHAR; /* This value should never actually be used */
				616	}
				617
				618	/* Scan up the active states and act on each one. The result of an action
				619	may be to add more states to the currently active list (e.g. on hitting a
				620	parenthesis) or it may be to put states on the new list, for considering
				621	when we move the character pointer on. */
				622
				623	for (i = 0; i < active_count; i++)
				624	{
				625	stateblock *current_state = active_states + i;
				626	BOOL caseless = FALSE;
				627	const uschar *code;
				628	int state_offset = current_state->offset;
				629	int count, codevalue, rrc;
				630
				631	#ifdef PCRE_DEBUG
				632	printf ("%.sProcessing state %d c=", rlevel2-2, SP, state_offset);
				633	if (clen == 0) printf("EOL\n");
				634	else if (c > 32 && c < 127) printf("'%c'\n", c);
				635	else printf("0x%02x\n", c);
				636	#endif
				637
				638	/* A negative offset is a special case meaning "hold off going to this
				639	(negated) state until the number of characters in the data field have
				640	been skipped". */
				641
				642	if (state_offset < 0)
				643	{
				644	if (current_state->data > 0)
				645	{
				646	DPRINTF(("%.sSkipping this character\n", rlevel2-2, SP));
				647	ADD_NEW_DATA(state_offset, current_state->count,
				648	current_state->data - 1);
				649	continue;
				650	}
				651	else
				652	{
				653	current_state->offset = state_offset = -state_offset;
				654	}
				655	}
				656
				657	/* Check for a duplicate state with the same count, and skip if found.
				658	See the note at the head of this module about the possibility of improving
				659	performance here. */
				660
				661	for (j = 0; j < i; j++)
				662	{
				663	if (active_states[j].offset == state_offset &&
				664	active_states[j].count == current_state->count)
				665	{
				666	DPRINTF(("%.sDuplicate state: skipped\n", rlevel2-2, SP));
				667	goto NEXT_ACTIVE_STATE;
				668	}
				669	}
				670
				671	/* The state offset is the offset to the opcode */
				672
				673	code = start_code + state_offset;
				674	codevalue = *code;
				675
				676	/* If this opcode inspects a character, but we are at the end of the
				677	subject, remember the fact for use when testing for a partial match. */
				678
				679	if (clen == 0 && poptable[codevalue] != 0)
				680	could_continue = TRUE;
				681
				682	/* If this opcode is followed by an inline character, load it. It is
				683	tempting to test for the presence of a subject character here, but that
				684	is wrong, because sometimes zero repetitions of the subject are
				685	permitted.
				686
				687	We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
				688	argument that is not a data character - but is always one byte long. We
				689	have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
				690	this case. To keep the other cases fast, convert these ones to new opcodes.
				691	*/
				692
				693	if (coptable[codevalue] > 0)
				694	{
				695	dlen = 1;
				696	#ifdef SUPPORT_UTF8
				697	if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
				698	#endif /* SUPPORT_UTF8 */
				699	d = code[coptable[codevalue]];
				700	if (codevalue >= OP_TYPESTAR)
				701	{
				702	switch(d)
				703	{
				704	case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
				705	case OP_NOTPROP:
				706	case OP_PROP: codevalue += OP_PROP_EXTRA; break;
				707	case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
				708	case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
				709	case OP_NOT_HSPACE:
				710	case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
				711	case OP_NOT_VSPACE:
				712	case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
				713	default: break;
				714	}
				715	}
				716	}
				717	else
				718	{
				719	dlen = 0; /* Not strictly necessary, but compilers moan */
				720	d = NOTACHAR; /* if these variables are not set. */
				721	}
				722
				723
				724	/* Now process the individual opcodes */
				725
				726	switch (codevalue)
				727	{
				728	/* ========================================================================== */
				729	/* These cases are never obeyed. This is a fudge that causes a compile-
				730	time error if the vectors coptable or poptable, which are indexed by
				731	opcode, are not the correct length. It seems to be the only way to do
				732	such a check at compile time, as the sizeof() operator does not work
				733	in the C preprocessor. */
				734
				735	case OP_TABLE_LENGTH:
				736	case OP_TABLE_LENGTH +
				737	((sizeof(coptable) == OP_TABLE_LENGTH) &&
				738	(sizeof(poptable) == OP_TABLE_LENGTH)):
				739	break;
				740
				741	/* ========================================================================== */
				742	/* Reached a closing bracket. If not at the end of the pattern, carry
				743	on with the next opcode. For repeating opcodes, also add the repeat
				744	state. Note that KETRPOS will always be encountered at the end of the
				745	subpattern, because the possessive subpattern repeats are always handled
				746	using recursive calls. Thus, it never adds any new states.
				747
				748	At the end of the (sub)pattern, unless we have an empty string and
				749	PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
				750	start of the subject, save the match data, shifting up all previous
				751	matches so we always have the longest first. */
				752
				753	case OP_KET:
				754	case OP_KETRMIN:
				755	case OP_KETRMAX:
				756	case OP_KETRPOS:
				757	if (code != end_code)
				758	{
				759	ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
				760	if (codevalue != OP_KET)
				761	{
				762	ADD_ACTIVE(state_offset - GET(code, 1), 0);
				763	}
				764	}
				765	else
				766	{
				767	if (ptr > current_subject \|\|
				768	((md->moptions & PCRE_NOTEMPTY) == 0 &&
				769	((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 \|\|
				770	current_subject > start_subject + md->start_offset)))
				771	{
				772	if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
				773	else if (match_count > 0 && ++match_count * 2 > offsetcount)
				774	match_count = 0;
				775	count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
				776	if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
				777	if (offsetcount >= 2)
				778	{
				779	offsets[0] = (int)(current_subject - start_subject);
				780	offsets[1] = (int)(ptr - start_subject);
				781	DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*2-2, SP,
				782	offsets[1] - offsets[0], current_subject));
				783	}
				784	if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
				785	{
				786	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
				787	"%.s---------------------\n\n", rlevel2-2, SP, rlevel,
				788	match_count, rlevel*2-2, SP));
				789	return match_count;
				790	}
				791	}
				792	}
				793	break;
				794
				795	/* ========================================================================== */
				796	/* These opcodes add to the current list of states without looking
				797	at the current character. */
				798
				799	/-----------------------------------------------------------------/
				800	case OP_ALT:
				801	do { code += GET(code, 1); } while (*code == OP_ALT);
				802	ADD_ACTIVE((int)(code - start_code), 0);
				803	break;
				804
				805	/-----------------------------------------------------------------/
				806	case OP_BRA:
				807	case OP_SBRA:
				808	do
				809	{
				810	ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
				811	code += GET(code, 1);
				812	}
				813	while (*code == OP_ALT);
				814	break;
				815
				816	/-----------------------------------------------------------------/
				817	case OP_CBRA:
				818	case OP_SCBRA:
				819	ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0);
				820	code += GET(code, 1);
				821	while (*code == OP_ALT)
				822	{
				823	ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
				824	code += GET(code, 1);
				825	}
				826	break;
				827
				828	/-----------------------------------------------------------------/
				829	case OP_BRAZERO:
				830	case OP_BRAMINZERO:
				831	ADD_ACTIVE(state_offset + 1, 0);
				832	code += 1 + GET(code, 2);
				833	while (*code == OP_ALT) code += GET(code, 1);
				834	ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
				835	break;
				836
				837	/-----------------------------------------------------------------/
				838	case OP_SKIPZERO:
				839	code += 1 + GET(code, 2);
				840	while (*code == OP_ALT) code += GET(code, 1);
				841	ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
				842	break;
				843
				844	/-----------------------------------------------------------------/
				845	case OP_CIRC:
				846	if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
				847	{ ADD_ACTIVE(state_offset + 1, 0); }
				848	break;
				849
				850	/-----------------------------------------------------------------/
				851	case OP_CIRCM:
				852	if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) \|\|
				853	(ptr != end_subject && WAS_NEWLINE(ptr)))
				854	{ ADD_ACTIVE(state_offset + 1, 0); }
				855	break;
				856
				857	/-----------------------------------------------------------------/
				858	case OP_EOD:
				859	if (ptr >= end_subject)
				860	{
				861	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
				862	could_continue = TRUE;
				863	else { ADD_ACTIVE(state_offset + 1, 0); }
				864	}
				865	break;
				866
				867	/-----------------------------------------------------------------/
				868	case OP_SOD:
				869	if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
				870	break;
				871
				872	/-----------------------------------------------------------------/
				873	case OP_SOM:
				874	if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
				875	break;
				876
				877
				878	/* ========================================================================== */
				879	/* These opcodes inspect the next subject character, and sometimes
				880	the previous one as well, but do not have an argument. The variable
				881	clen contains the length of the current character and is zero if we are
				882	at the end of the subject. */
				883
				884	/-----------------------------------------------------------------/
				885	case OP_ANY:
				886	if (clen > 0 && !IS_NEWLINE(ptr))
				887	{ ADD_NEW(state_offset + 1, 0); }
				888	break;
				889
				890	/-----------------------------------------------------------------/
				891	case OP_ALLANY:
				892	if (clen > 0)
				893	{ ADD_NEW(state_offset + 1, 0); }
				894	break;
				895
				896	/-----------------------------------------------------------------/
				897	case OP_EODN:
				898	if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
				899	could_continue = TRUE;
				900	else if (clen == 0 \|\| (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
				901	{ ADD_ACTIVE(state_offset + 1, 0); }
				902	break;
				903
				904	/-----------------------------------------------------------------/
				905	case OP_DOLL:
				906	if ((md->moptions & PCRE_NOTEOL) == 0)
				907	{
				908	if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
				909	could_continue = TRUE;
				910	else if (clen == 0 \|\|
				911	((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
				912	(ptr == end_subject - md->nllen)
				913	))
				914	{ ADD_ACTIVE(state_offset + 1, 0); }
				915	}
				916	break;
				917
				918	/-----------------------------------------------------------------/
				919	case OP_DOLLM:
				920	if ((md->moptions & PCRE_NOTEOL) == 0)
				921	{
				922	if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
				923	could_continue = TRUE;
				924	else if (clen == 0 \|\|
				925	((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
				926	{ ADD_ACTIVE(state_offset + 1, 0); }
				927	}
				928	else if (IS_NEWLINE(ptr))
				929	{ ADD_ACTIVE(state_offset + 1, 0); }
				930	break;
				931
				932	/-----------------------------------------------------------------/
				933
				934	case OP_DIGIT:
				935	case OP_WHITESPACE:
				936	case OP_WORDCHAR:
				937	if (clen > 0 && c < 256 &&
				938	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
				939	{ ADD_NEW(state_offset + 1, 0); }
				940	break;
				941
				942	/-----------------------------------------------------------------/
				943	case OP_NOT_DIGIT:
				944	case OP_NOT_WHITESPACE:
				945	case OP_NOT_WORDCHAR:
				946	if (clen > 0 && (c >= 256 \|\|
				947	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
				948	{ ADD_NEW(state_offset + 1, 0); }
				949	break;
				950
				951	/-----------------------------------------------------------------/
				952	case OP_WORD_BOUNDARY:
				953	case OP_NOT_WORD_BOUNDARY:
				954	{
				955	int left_word, right_word;
				956
				957	if (ptr > start_subject)
				958	{
				959	const uschar *temp = ptr - 1;
				960	if (temp < md->start_used_ptr) md->start_used_ptr = temp;
				961	#ifdef SUPPORT_UTF8
				962	if (utf8) BACKCHAR(temp);
				963	#endif
				964	GETCHARTEST(d, temp);
				965	#ifdef SUPPORT_UCP
				966	if ((md->poptions & PCRE_UCP) != 0)
				967	{
				968	if (d == '_') left_word = TRUE; else
				969	{
				970	int cat = UCD_CATEGORY(d);
				971	left_word = (cat == ucp_L \|\| cat == ucp_N);
				972	}
				973	}
				974	else
				975	#endif
				976	left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
				977	}
				978	else left_word = FALSE;
				979
				980	if (clen > 0)
				981	{
				982	#ifdef SUPPORT_UCP
				983	if ((md->poptions & PCRE_UCP) != 0)
				984	{
				985	if (c == '_') right_word = TRUE; else
				986	{
				987	int cat = UCD_CATEGORY(c);
				988	right_word = (cat == ucp_L \|\| cat == ucp_N);
				989	}
				990	}
				991	else
				992	#endif
				993	right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
				994	}
				995	else right_word = FALSE;
				996
				997	if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
				998	{ ADD_ACTIVE(state_offset + 1, 0); }
				999	}
				1000	break;
				1001
				1002
				1003	/-----------------------------------------------------------------/
				1004	/* Check the next character by Unicode property. We will get here only
				1005	if the support is in the binary; otherwise a compile-time error occurs.
				1006	*/
				1007
				1008	#ifdef SUPPORT_UCP
				1009	case OP_PROP:
				1010	case OP_NOTPROP:
				1011	if (clen > 0)
				1012	{
				1013	BOOL OK;
				1014	const ucd_record * prop = GET_UCD(c);
				1015	switch(code[1])
				1016	{
				1017	case PT_ANY:
				1018	OK = TRUE;
				1019	break;
				1020
				1021	case PT_LAMP:
				1022	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
				1023	prop->chartype == ucp_Lt;
				1024	break;
				1025
				1026	case PT_GC:
				1027	OK = _pcre_ucp_gentype[prop->chartype] == code[2];
				1028	break;
				1029
				1030	case PT_PC:
				1031	OK = prop->chartype == code[2];
				1032	break;
				1033
				1034	case PT_SC:
				1035	OK = prop->script == code[2];
				1036	break;
				1037
				1038	/* These are specials for combination cases. */
				1039
				1040	case PT_ALNUM:
				1041	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1042	_pcre_ucp_gentype[prop->chartype] == ucp_N;
				1043	break;
				1044
				1045	case PT_SPACE: /* Perl space */
				1046	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1047	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
				1048	break;
				1049
				1050	case PT_PXSPACE: /* POSIX space */
				1051	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1052	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
				1053	c == CHAR_FF \|\| c == CHAR_CR;
				1054	break;
				1055
				1056	case PT_WORD:
				1057	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1058	_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|
				1059	c == CHAR_UNDERSCORE;
				1060	break;
				1061
				1062	/* Should never occur, but keep compilers from grumbling. */
				1063
				1064	default:
				1065	OK = codevalue != OP_PROP;
				1066	break;
				1067	}
				1068
				1069	if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
				1070	}
				1071	break;
				1072	#endif
				1073
				1074
				1075
				1076	/* ========================================================================== */
				1077	/* These opcodes likewise inspect the subject character, but have an
				1078	argument that is not a data character. It is one of these opcodes:
				1079	OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
				1080	OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
				1081
				1082	case OP_TYPEPLUS:
				1083	case OP_TYPEMINPLUS:
				1084	case OP_TYPEPOSPLUS:
				1085	count = current_state->count; /* Already matched */
				1086	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
				1087	if (clen > 0)
				1088	{
				1089	if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
				1090	(c < 256 &&
				1091	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
				1092	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
				1093	{
				1094	if (count > 0 && codevalue == OP_TYPEPOSPLUS)
				1095	{
				1096	active_count--; /* Remove non-match possibility */
				1097	next_active_state--;
				1098	}
				1099	count++;
				1100	ADD_NEW(state_offset, count);
				1101	}
				1102	}
				1103	break;
				1104
				1105	/-----------------------------------------------------------------/
				1106	case OP_TYPEQUERY:
				1107	case OP_TYPEMINQUERY:
				1108	case OP_TYPEPOSQUERY:
				1109	ADD_ACTIVE(state_offset + 2, 0);
				1110	if (clen > 0)
				1111	{
				1112	if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
				1113	(c < 256 &&
				1114	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
				1115	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
				1116	{
				1117	if (codevalue == OP_TYPEPOSQUERY)
				1118	{
				1119	active_count--; /* Remove non-match possibility */
				1120	next_active_state--;
				1121	}
				1122	ADD_NEW(state_offset + 2, 0);
				1123	}
				1124	}
				1125	break;
				1126
				1127	/-----------------------------------------------------------------/
				1128	case OP_TYPESTAR:
				1129	case OP_TYPEMINSTAR:
				1130	case OP_TYPEPOSSTAR:
				1131	ADD_ACTIVE(state_offset + 2, 0);
				1132	if (clen > 0)
				1133	{
				1134	if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
				1135	(c < 256 &&
				1136	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
				1137	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
				1138	{
				1139	if (codevalue == OP_TYPEPOSSTAR)
				1140	{
				1141	active_count--; /* Remove non-match possibility */
				1142	next_active_state--;
				1143	}
				1144	ADD_NEW(state_offset, 0);
				1145	}
				1146	}
				1147	break;
				1148
				1149	/-----------------------------------------------------------------/
				1150	case OP_TYPEEXACT:
				1151	count = current_state->count; /* Number already matched */
				1152	if (clen > 0)
				1153	{
				1154	if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
				1155	(c < 256 &&
				1156	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
				1157	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
				1158	{
				1159	if (++count >= GET2(code, 1))
				1160	{ ADD_NEW(state_offset + 4, 0); }
				1161	else
				1162	{ ADD_NEW(state_offset, count); }
				1163	}
				1164	}
				1165	break;
				1166
				1167	/-----------------------------------------------------------------/
				1168	case OP_TYPEUPTO:
				1169	case OP_TYPEMINUPTO:
				1170	case OP_TYPEPOSUPTO:
				1171	ADD_ACTIVE(state_offset + 4, 0);
				1172	count = current_state->count; /* Number already matched */
				1173	if (clen > 0)
				1174	{
				1175	if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
				1176	(c < 256 &&
				1177	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
				1178	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
				1179	{
				1180	if (codevalue == OP_TYPEPOSUPTO)
				1181	{
				1182	active_count--; /* Remove non-match possibility */
				1183	next_active_state--;
				1184	}
				1185	if (++count >= GET2(code, 1))
				1186	{ ADD_NEW(state_offset + 4, 0); }
				1187	else
				1188	{ ADD_NEW(state_offset, count); }
				1189	}
				1190	}
				1191	break;
				1192
				1193	/* ========================================================================== */
				1194	/* These are virtual opcodes that are used when something like
				1195	OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
				1196	argument. It keeps the code above fast for the other cases. The argument
				1197	is in the d variable. */
				1198
				1199	#ifdef SUPPORT_UCP
				1200	case OP_PROP_EXTRA + OP_TYPEPLUS:
				1201	case OP_PROP_EXTRA + OP_TYPEMINPLUS:
				1202	case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
				1203	count = current_state->count; /* Already matched */
				1204	if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
				1205	if (clen > 0)
				1206	{
				1207	BOOL OK;
				1208	const ucd_record * prop = GET_UCD(c);
				1209	switch(code[2])
				1210	{
				1211	case PT_ANY:
				1212	OK = TRUE;
				1213	break;
				1214
				1215	case PT_LAMP:
				1216	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
				1217	prop->chartype == ucp_Lt;
				1218	break;
				1219
				1220	case PT_GC:
				1221	OK = _pcre_ucp_gentype[prop->chartype] == code[3];
				1222	break;
				1223
				1224	case PT_PC:
				1225	OK = prop->chartype == code[3];
				1226	break;
				1227
				1228	case PT_SC:
				1229	OK = prop->script == code[3];
				1230	break;
				1231
				1232	/* These are specials for combination cases. */
				1233
				1234	case PT_ALNUM:
				1235	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1236	_pcre_ucp_gentype[prop->chartype] == ucp_N;
				1237	break;
				1238
				1239	case PT_SPACE: /* Perl space */
				1240	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1241	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
				1242	break;
				1243
				1244	case PT_PXSPACE: /* POSIX space */
				1245	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1246	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
				1247	c == CHAR_FF \|\| c == CHAR_CR;
				1248	break;
				1249
				1250	case PT_WORD:
				1251	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1252	_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|
				1253	c == CHAR_UNDERSCORE;
				1254	break;
				1255
				1256	/* Should never occur, but keep compilers from grumbling. */
				1257
				1258	default:
				1259	OK = codevalue != OP_PROP;
				1260	break;
				1261	}
				1262
				1263	if (OK == (d == OP_PROP))
				1264	{
				1265	if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
				1266	{
				1267	active_count--; /* Remove non-match possibility */
				1268	next_active_state--;
				1269	}
				1270	count++;
				1271	ADD_NEW(state_offset, count);
				1272	}
				1273	}
				1274	break;
				1275
				1276	/-----------------------------------------------------------------/
				1277	case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
				1278	case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
				1279	case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
				1280	count = current_state->count; /* Already matched */
				1281	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
				1282	if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
				1283	{
				1284	const uschar *nptr = ptr + clen;
				1285	int ncount = 0;
				1286	if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
				1287	{
				1288	active_count--; /* Remove non-match possibility */
				1289	next_active_state--;
				1290	}
				1291	while (nptr < end_subject)
				1292	{
				1293	int nd;
				1294	int ndlen = 1;
				1295	GETCHARLEN(nd, nptr, ndlen);
				1296	if (UCD_CATEGORY(nd) != ucp_M) break;
				1297	ncount++;
				1298	nptr += ndlen;
				1299	}
				1300	count++;
				1301	ADD_NEW_DATA(-state_offset, count, ncount);
				1302	}
				1303	break;
				1304	#endif
				1305
				1306	/-----------------------------------------------------------------/
				1307	case OP_ANYNL_EXTRA + OP_TYPEPLUS:
				1308	case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
				1309	case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
				1310	count = current_state->count; /* Already matched */
				1311	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
				1312	if (clen > 0)
				1313	{
				1314	int ncount = 0;
				1315	switch (c)
				1316	{
				1317	case 0x000b:
				1318	case 0x000c:
				1319	case 0x0085:
				1320	case 0x2028:
				1321	case 0x2029:
				1322	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
				1323	goto ANYNL01;
				1324
				1325	case 0x000d:
				1326	if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
				1327	/* Fall through */
				1328
				1329	ANYNL01:
				1330	case 0x000a:
				1331	if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
				1332	{
				1333	active_count--; /* Remove non-match possibility */
				1334	next_active_state--;
				1335	}
				1336	count++;
				1337	ADD_NEW_DATA(-state_offset, count, ncount);
				1338	break;
				1339
				1340	default:
				1341	break;
				1342	}
				1343	}
				1344	break;
				1345
				1346	/-----------------------------------------------------------------/
				1347	case OP_VSPACE_EXTRA + OP_TYPEPLUS:
				1348	case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
				1349	case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
				1350	count = current_state->count; /* Already matched */
				1351	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
				1352	if (clen > 0)
				1353	{
				1354	BOOL OK;
				1355	switch (c)
				1356	{
				1357	case 0x000a:
				1358	case 0x000b:
				1359	case 0x000c:
				1360	case 0x000d:
				1361	case 0x0085:
				1362	case 0x2028:
				1363	case 0x2029:
				1364	OK = TRUE;
				1365	break;
				1366
				1367	default:
				1368	OK = FALSE;
				1369	break;
				1370	}
				1371
				1372	if (OK == (d == OP_VSPACE))
				1373	{
				1374	if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
				1375	{
				1376	active_count--; /* Remove non-match possibility */
				1377	next_active_state--;
				1378	}
				1379	count++;
				1380	ADD_NEW_DATA(-state_offset, count, 0);
				1381	}
				1382	}
				1383	break;
				1384
				1385	/-----------------------------------------------------------------/
				1386	case OP_HSPACE_EXTRA + OP_TYPEPLUS:
				1387	case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
				1388	case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
				1389	count = current_state->count; /* Already matched */
				1390	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
				1391	if (clen > 0)
				1392	{
				1393	BOOL OK;
				1394	switch (c)
				1395	{
				1396	case 0x09: /* HT */
				1397	case 0x20: /* SPACE */
				1398	case 0xa0: /* NBSP */
				1399	case 0x1680: /* OGHAM SPACE MARK */
				1400	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
				1401	case 0x2000: /* EN QUAD */
				1402	case 0x2001: /* EM QUAD */
				1403	case 0x2002: /* EN SPACE */
				1404	case 0x2003: /* EM SPACE */
				1405	case 0x2004: /* THREE-PER-EM SPACE */
				1406	case 0x2005: /* FOUR-PER-EM SPACE */
				1407	case 0x2006: /* SIX-PER-EM SPACE */
				1408	case 0x2007: /* FIGURE SPACE */
				1409	case 0x2008: /* PUNCTUATION SPACE */
				1410	case 0x2009: /* THIN SPACE */
				1411	case 0x200A: /* HAIR SPACE */
				1412	case 0x202f: /* NARROW NO-BREAK SPACE */
				1413	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
				1414	case 0x3000: /* IDEOGRAPHIC SPACE */
				1415	OK = TRUE;
				1416	break;
				1417
				1418	default:
				1419	OK = FALSE;
				1420	break;
				1421	}
				1422
				1423	if (OK == (d == OP_HSPACE))
				1424	{
				1425	if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
				1426	{
				1427	active_count--; /* Remove non-match possibility */
				1428	next_active_state--;
				1429	}
				1430	count++;
				1431	ADD_NEW_DATA(-state_offset, count, 0);
				1432	}
				1433	}
				1434	break;
				1435
				1436	/-----------------------------------------------------------------/
				1437	#ifdef SUPPORT_UCP
				1438	case OP_PROP_EXTRA + OP_TYPEQUERY:
				1439	case OP_PROP_EXTRA + OP_TYPEMINQUERY:
				1440	case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
				1441	count = 4;
				1442	goto QS1;
				1443
				1444	case OP_PROP_EXTRA + OP_TYPESTAR:
				1445	case OP_PROP_EXTRA + OP_TYPEMINSTAR:
				1446	case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
				1447	count = 0;
				1448
				1449	QS1:
				1450
				1451	ADD_ACTIVE(state_offset + 4, 0);
				1452	if (clen > 0)
				1453	{
				1454	BOOL OK;
				1455	const ucd_record * prop = GET_UCD(c);
				1456	switch(code[2])
				1457	{
				1458	case PT_ANY:
				1459	OK = TRUE;
				1460	break;
				1461
				1462	case PT_LAMP:
				1463	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
				1464	prop->chartype == ucp_Lt;
				1465	break;
				1466
				1467	case PT_GC:
				1468	OK = _pcre_ucp_gentype[prop->chartype] == code[3];
				1469	break;
				1470
				1471	case PT_PC:
				1472	OK = prop->chartype == code[3];
				1473	break;
				1474
				1475	case PT_SC:
				1476	OK = prop->script == code[3];
				1477	break;
				1478
				1479	/* These are specials for combination cases. */
				1480
				1481	case PT_ALNUM:
				1482	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1483	_pcre_ucp_gentype[prop->chartype] == ucp_N;
				1484	break;
				1485
				1486	case PT_SPACE: /* Perl space */
				1487	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1488	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
				1489	break;
				1490
				1491	case PT_PXSPACE: /* POSIX space */
				1492	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1493	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
				1494	c == CHAR_FF \|\| c == CHAR_CR;
				1495	break;
				1496
				1497	case PT_WORD:
				1498	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1499	_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|
				1500	c == CHAR_UNDERSCORE;
				1501	break;
				1502
				1503	/* Should never occur, but keep compilers from grumbling. */
				1504
				1505	default:
				1506	OK = codevalue != OP_PROP;
				1507	break;
				1508	}
				1509
				1510	if (OK == (d == OP_PROP))
				1511	{
				1512	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR \|\|
				1513	codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
				1514	{
				1515	active_count--; /* Remove non-match possibility */
				1516	next_active_state--;
				1517	}
				1518	ADD_NEW(state_offset + count, 0);
				1519	}
				1520	}
				1521	break;
				1522
				1523	/-----------------------------------------------------------------/
				1524	case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
				1525	case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
				1526	case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
				1527	count = 2;
				1528	goto QS2;
				1529
				1530	case OP_EXTUNI_EXTRA + OP_TYPESTAR:
				1531	case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
				1532	case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
				1533	count = 0;
				1534
				1535	QS2:
				1536
				1537	ADD_ACTIVE(state_offset + 2, 0);
				1538	if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
				1539	{
				1540	const uschar *nptr = ptr + clen;
				1541	int ncount = 0;
				1542	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|
				1543	codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
				1544	{
				1545	active_count--; /* Remove non-match possibility */
				1546	next_active_state--;
				1547	}
				1548	while (nptr < end_subject)
				1549	{
				1550	int nd;
				1551	int ndlen = 1;
				1552	GETCHARLEN(nd, nptr, ndlen);
				1553	if (UCD_CATEGORY(nd) != ucp_M) break;
				1554	ncount++;
				1555	nptr += ndlen;
				1556	}
				1557	ADD_NEW_DATA(-(state_offset + count), 0, ncount);
				1558	}
				1559	break;
				1560	#endif
				1561
				1562	/-----------------------------------------------------------------/
				1563	case OP_ANYNL_EXTRA + OP_TYPEQUERY:
				1564	case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
				1565	case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
				1566	count = 2;
				1567	goto QS3;
				1568
				1569	case OP_ANYNL_EXTRA + OP_TYPESTAR:
				1570	case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
				1571	case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
				1572	count = 0;
				1573
				1574	QS3:
				1575	ADD_ACTIVE(state_offset + 2, 0);
				1576	if (clen > 0)
				1577	{
				1578	int ncount = 0;
				1579	switch (c)
				1580	{
				1581	case 0x000b:
				1582	case 0x000c:
				1583	case 0x0085:
				1584	case 0x2028:
				1585	case 0x2029:
				1586	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
				1587	goto ANYNL02;
				1588
				1589	case 0x000d:
				1590	if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
				1591	/* Fall through */
				1592
				1593	ANYNL02:
				1594	case 0x000a:
				1595	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|
				1596	codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
				1597	{
				1598	active_count--; /* Remove non-match possibility */
				1599	next_active_state--;
				1600	}
				1601	ADD_NEW_DATA(-(state_offset + count), 0, ncount);
				1602	break;
				1603
				1604	default:
				1605	break;
				1606	}
				1607	}
				1608	break;
				1609
				1610	/-----------------------------------------------------------------/
				1611	case OP_VSPACE_EXTRA + OP_TYPEQUERY:
				1612	case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
				1613	case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
				1614	count = 2;
				1615	goto QS4;
				1616
				1617	case OP_VSPACE_EXTRA + OP_TYPESTAR:
				1618	case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
				1619	case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
				1620	count = 0;
				1621
				1622	QS4:
				1623	ADD_ACTIVE(state_offset + 2, 0);
				1624	if (clen > 0)
				1625	{
				1626	BOOL OK;
				1627	switch (c)
				1628	{
				1629	case 0x000a:
				1630	case 0x000b:
				1631	case 0x000c:
				1632	case 0x000d:
				1633	case 0x0085:
				1634	case 0x2028:
				1635	case 0x2029:
				1636	OK = TRUE;
				1637	break;
				1638
				1639	default:
				1640	OK = FALSE;
				1641	break;
				1642	}
				1643	if (OK == (d == OP_VSPACE))
				1644	{
				1645	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
				1646	codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
				1647	{
				1648	active_count--; /* Remove non-match possibility */
				1649	next_active_state--;
				1650	}
				1651	ADD_NEW_DATA(-(state_offset + count), 0, 0);
				1652	}
				1653	}
				1654	break;
				1655
				1656	/-----------------------------------------------------------------/
				1657	case OP_HSPACE_EXTRA + OP_TYPEQUERY:
				1658	case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
				1659	case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
				1660	count = 2;
				1661	goto QS5;
				1662
				1663	case OP_HSPACE_EXTRA + OP_TYPESTAR:
				1664	case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
				1665	case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
				1666	count = 0;
				1667
				1668	QS5:
				1669	ADD_ACTIVE(state_offset + 2, 0);
				1670	if (clen > 0)
				1671	{
				1672	BOOL OK;
				1673	switch (c)
				1674	{
				1675	case 0x09: /* HT */
				1676	case 0x20: /* SPACE */
				1677	case 0xa0: /* NBSP */
				1678	case 0x1680: /* OGHAM SPACE MARK */
				1679	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
				1680	case 0x2000: /* EN QUAD */
				1681	case 0x2001: /* EM QUAD */
				1682	case 0x2002: /* EN SPACE */
				1683	case 0x2003: /* EM SPACE */
				1684	case 0x2004: /* THREE-PER-EM SPACE */
				1685	case 0x2005: /* FOUR-PER-EM SPACE */
				1686	case 0x2006: /* SIX-PER-EM SPACE */
				1687	case 0x2007: /* FIGURE SPACE */
				1688	case 0x2008: /* PUNCTUATION SPACE */
				1689	case 0x2009: /* THIN SPACE */
				1690	case 0x200A: /* HAIR SPACE */
				1691	case 0x202f: /* NARROW NO-BREAK SPACE */
				1692	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
				1693	case 0x3000: /* IDEOGRAPHIC SPACE */
				1694	OK = TRUE;
				1695	break;
				1696
				1697	default:
				1698	OK = FALSE;
				1699	break;
				1700	}
				1701
				1702	if (OK == (d == OP_HSPACE))
				1703	{
				1704	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
				1705	codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
				1706	{
				1707	active_count--; /* Remove non-match possibility */
				1708	next_active_state--;
				1709	}
				1710	ADD_NEW_DATA(-(state_offset + count), 0, 0);
				1711	}
				1712	}
				1713	break;
				1714
				1715	/-----------------------------------------------------------------/
				1716	#ifdef SUPPORT_UCP
				1717	case OP_PROP_EXTRA + OP_TYPEEXACT:
				1718	case OP_PROP_EXTRA + OP_TYPEUPTO:
				1719	case OP_PROP_EXTRA + OP_TYPEMINUPTO:
				1720	case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
				1721	if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
				1722	{ ADD_ACTIVE(state_offset + 6, 0); }
				1723	count = current_state->count; /* Number already matched */
				1724	if (clen > 0)
				1725	{
				1726	BOOL OK;
				1727	const ucd_record * prop = GET_UCD(c);
				1728	switch(code[4])
				1729	{
				1730	case PT_ANY:
				1731	OK = TRUE;
				1732	break;
				1733
				1734	case PT_LAMP:
				1735	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
				1736	prop->chartype == ucp_Lt;
				1737	break;
				1738
				1739	case PT_GC:
				1740	OK = _pcre_ucp_gentype[prop->chartype] == code[5];
				1741	break;
				1742
				1743	case PT_PC:
				1744	OK = prop->chartype == code[5];
				1745	break;
				1746
				1747	case PT_SC:
				1748	OK = prop->script == code[5];
				1749	break;
				1750
				1751	/* These are specials for combination cases. */
				1752
				1753	case PT_ALNUM:
				1754	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1755	_pcre_ucp_gentype[prop->chartype] == ucp_N;
				1756	break;
				1757
				1758	case PT_SPACE: /* Perl space */
				1759	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1760	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
				1761	break;
				1762
				1763	case PT_PXSPACE: /* POSIX space */
				1764	OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|
				1765	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
				1766	c == CHAR_FF \|\| c == CHAR_CR;
				1767	break;
				1768
				1769	case PT_WORD:
				1770	OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|
				1771	_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|
				1772	c == CHAR_UNDERSCORE;
				1773	break;
				1774
				1775	/* Should never occur, but keep compilers from grumbling. */
				1776
				1777	default:
				1778	OK = codevalue != OP_PROP;
				1779	break;
				1780	}
				1781
				1782	if (OK == (d == OP_PROP))
				1783	{
				1784	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
				1785	{
				1786	active_count--; /* Remove non-match possibility */
				1787	next_active_state--;
				1788	}
				1789	if (++count >= GET2(code, 1))
				1790	{ ADD_NEW(state_offset + 6, 0); }
				1791	else
				1792	{ ADD_NEW(state_offset, count); }
				1793	}
				1794	}
				1795	break;
				1796
				1797	/-----------------------------------------------------------------/
				1798	case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
				1799	case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
				1800	case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
				1801	case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
				1802	if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
				1803	{ ADD_ACTIVE(state_offset + 4, 0); }
				1804	count = current_state->count; /* Number already matched */
				1805	if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
				1806	{
				1807	const uschar *nptr = ptr + clen;
				1808	int ncount = 0;
				1809	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
				1810	{
				1811	active_count--; /* Remove non-match possibility */
				1812	next_active_state--;
				1813	}
				1814	while (nptr < end_subject)
				1815	{
				1816	int nd;
				1817	int ndlen = 1;
				1818	GETCHARLEN(nd, nptr, ndlen);
				1819	if (UCD_CATEGORY(nd) != ucp_M) break;
				1820	ncount++;
				1821	nptr += ndlen;
				1822	}
				1823	if (++count >= GET2(code, 1))
				1824	{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
				1825	else
				1826	{ ADD_NEW_DATA(-state_offset, count, ncount); }
				1827	}
				1828	break;
				1829	#endif
				1830
				1831	/-----------------------------------------------------------------/
				1832	case OP_ANYNL_EXTRA + OP_TYPEEXACT:
				1833	case OP_ANYNL_EXTRA + OP_TYPEUPTO:
				1834	case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
				1835	case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
				1836	if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
				1837	{ ADD_ACTIVE(state_offset + 4, 0); }
				1838	count = current_state->count; /* Number already matched */
				1839	if (clen > 0)
				1840	{
				1841	int ncount = 0;
				1842	switch (c)
				1843	{
				1844	case 0x000b:
				1845	case 0x000c:
				1846	case 0x0085:
				1847	case 0x2028:
				1848	case 0x2029:
				1849	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
				1850	goto ANYNL03;
				1851
				1852	case 0x000d:
				1853	if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
				1854	/* Fall through */
				1855
				1856	ANYNL03:
				1857	case 0x000a:
				1858	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
				1859	{
				1860	active_count--; /* Remove non-match possibility */
				1861	next_active_state--;
				1862	}
				1863	if (++count >= GET2(code, 1))
				1864	{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
				1865	else
				1866	{ ADD_NEW_DATA(-state_offset, count, ncount); }
				1867	break;
				1868
				1869	default:
				1870	break;
				1871	}
				1872	}
				1873	break;
				1874
				1875	/-----------------------------------------------------------------/
				1876	case OP_VSPACE_EXTRA + OP_TYPEEXACT:
				1877	case OP_VSPACE_EXTRA + OP_TYPEUPTO:
				1878	case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
				1879	case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
				1880	if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
				1881	{ ADD_ACTIVE(state_offset + 4, 0); }
				1882	count = current_state->count; /* Number already matched */
				1883	if (clen > 0)
				1884	{
				1885	BOOL OK;
				1886	switch (c)
				1887	{
				1888	case 0x000a:
				1889	case 0x000b:
				1890	case 0x000c:
				1891	case 0x000d:
				1892	case 0x0085:
				1893	case 0x2028:
				1894	case 0x2029:
				1895	OK = TRUE;
				1896	break;
				1897
				1898	default:
				1899	OK = FALSE;
				1900	}
				1901
				1902	if (OK == (d == OP_VSPACE))
				1903	{
				1904	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
				1905	{
				1906	active_count--; /* Remove non-match possibility */
				1907	next_active_state--;
				1908	}
				1909	if (++count >= GET2(code, 1))
				1910	{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
				1911	else
				1912	{ ADD_NEW_DATA(-state_offset, count, 0); }
				1913	}
				1914	}
				1915	break;
				1916
				1917	/-----------------------------------------------------------------/
				1918	case OP_HSPACE_EXTRA + OP_TYPEEXACT:
				1919	case OP_HSPACE_EXTRA + OP_TYPEUPTO:
				1920	case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
				1921	case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
				1922	if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
				1923	{ ADD_ACTIVE(state_offset + 4, 0); }
				1924	count = current_state->count; /* Number already matched */
				1925	if (clen > 0)
				1926	{
				1927	BOOL OK;
				1928	switch (c)
				1929	{
				1930	case 0x09: /* HT */
				1931	case 0x20: /* SPACE */
				1932	case 0xa0: /* NBSP */
				1933	case 0x1680: /* OGHAM SPACE MARK */
				1934	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
				1935	case 0x2000: /* EN QUAD */
				1936	case 0x2001: /* EM QUAD */
				1937	case 0x2002: /* EN SPACE */
				1938	case 0x2003: /* EM SPACE */
				1939	case 0x2004: /* THREE-PER-EM SPACE */
				1940	case 0x2005: /* FOUR-PER-EM SPACE */
				1941	case 0x2006: /* SIX-PER-EM SPACE */
				1942	case 0x2007: /* FIGURE SPACE */
				1943	case 0x2008: /* PUNCTUATION SPACE */
				1944	case 0x2009: /* THIN SPACE */
				1945	case 0x200A: /* HAIR SPACE */
				1946	case 0x202f: /* NARROW NO-BREAK SPACE */
				1947	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
				1948	case 0x3000: /* IDEOGRAPHIC SPACE */
				1949	OK = TRUE;
				1950	break;
				1951
				1952	default:
				1953	OK = FALSE;
				1954	break;
				1955	}
				1956
				1957	if (OK == (d == OP_HSPACE))
				1958	{
				1959	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
				1960	{
				1961	active_count--; /* Remove non-match possibility */
				1962	next_active_state--;
				1963	}
				1964	if (++count >= GET2(code, 1))
				1965	{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
				1966	else
				1967	{ ADD_NEW_DATA(-state_offset, count, 0); }
				1968	}
				1969	}
				1970	break;
				1971
				1972	/* ========================================================================== */
				1973	/* These opcodes are followed by a character that is usually compared
				1974	to the current subject character; it is loaded into d. We still get
				1975	here even if there is no subject character, because in some cases zero
				1976	repetitions are permitted. */
				1977
				1978	/-----------------------------------------------------------------/
				1979	case OP_CHAR:
				1980	if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
				1981	break;
				1982
				1983	/-----------------------------------------------------------------/
				1984	case OP_CHARI:
				1985	if (clen == 0) break;
				1986
				1987	#ifdef SUPPORT_UTF8
				1988	if (utf8)
				1989	{
				1990	if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
				1991	{
				1992	unsigned int othercase;
				1993	if (c < 128) othercase = fcc[c]; else
				1994
				1995	/* If we have Unicode property support, we can use it to test the
				1996	other case of the character. */
				1997
				1998	#ifdef SUPPORT_UCP
				1999	othercase = UCD_OTHERCASE(c);
				2000	#else
				2001	othercase = NOTACHAR;
				2002	#endif
				2003
				2004	if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
				2005	}
				2006	}
				2007	else
				2008	#endif /* SUPPORT_UTF8 */
				2009
				2010	/* Non-UTF-8 mode */
				2011	{
				2012	if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
				2013	}
				2014	break;
				2015
				2016
				2017	#ifdef SUPPORT_UCP
				2018	/-----------------------------------------------------------------/
				2019	/* This is a tricky one because it can match more than one character.
				2020	Find out how many characters to skip, and then set up a negative state
				2021	to wait for them to pass before continuing. */
				2022
				2023	case OP_EXTUNI:
				2024	if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
				2025	{
				2026	const uschar *nptr = ptr + clen;
				2027	int ncount = 0;
				2028	while (nptr < end_subject)
				2029	{
				2030	int nclen = 1;
				2031	GETCHARLEN(c, nptr, nclen);
				2032	if (UCD_CATEGORY(c) != ucp_M) break;
				2033	ncount++;
				2034	nptr += nclen;
				2035	}
				2036	ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
				2037	}
				2038	break;
				2039	#endif
				2040
				2041	/-----------------------------------------------------------------/
				2042	/* This is a tricky like EXTUNI because it too can match more than one
				2043	character (when CR is followed by LF). In this case, set up a negative
				2044	state to wait for one character to pass before continuing. */
				2045
				2046	case OP_ANYNL:
				2047	if (clen > 0) switch(c)
				2048	{
				2049	case 0x000b:
				2050	case 0x000c:
				2051	case 0x0085:
				2052	case 0x2028:
				2053	case 0x2029:
				2054	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
				2055
				2056	case 0x000a:
				2057	ADD_NEW(state_offset + 1, 0);
				2058	break;
				2059
				2060	case 0x000d:
				2061	if (ptr + 1 < end_subject && ptr[1] == 0x0a)
				2062	{
				2063	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
				2064	}
				2065	else
				2066	{
				2067	ADD_NEW(state_offset + 1, 0);
				2068	}
				2069	break;
				2070	}
				2071	break;
				2072
				2073	/-----------------------------------------------------------------/
				2074	case OP_NOT_VSPACE:
				2075	if (clen > 0) switch(c)
				2076	{
				2077	case 0x000a:
				2078	case 0x000b:
				2079	case 0x000c:
				2080	case 0x000d:
				2081	case 0x0085:
				2082	case 0x2028:
				2083	case 0x2029:
				2084	break;
				2085
				2086	default:
				2087	ADD_NEW(state_offset + 1, 0);
				2088	break;
				2089	}
				2090	break;
				2091
				2092	/-----------------------------------------------------------------/
				2093	case OP_VSPACE:
				2094	if (clen > 0) switch(c)
				2095	{
				2096	case 0x000a:
				2097	case 0x000b:
				2098	case 0x000c:
				2099	case 0x000d:
				2100	case 0x0085:
				2101	case 0x2028:
				2102	case 0x2029:
				2103	ADD_NEW(state_offset + 1, 0);
				2104	break;
				2105
				2106	default: break;
				2107	}
				2108	break;
				2109
				2110	/-----------------------------------------------------------------/
				2111	case OP_NOT_HSPACE:
				2112	if (clen > 0) switch(c)
				2113	{
				2114	case 0x09: /* HT */
				2115	case 0x20: /* SPACE */
				2116	case 0xa0: /* NBSP */
				2117	case 0x1680: /* OGHAM SPACE MARK */
				2118	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
				2119	case 0x2000: /* EN QUAD */
				2120	case 0x2001: /* EM QUAD */
				2121	case 0x2002: /* EN SPACE */
				2122	case 0x2003: /* EM SPACE */
				2123	case 0x2004: /* THREE-PER-EM SPACE */
				2124	case 0x2005: /* FOUR-PER-EM SPACE */
				2125	case 0x2006: /* SIX-PER-EM SPACE */
				2126	case 0x2007: /* FIGURE SPACE */
				2127	case 0x2008: /* PUNCTUATION SPACE */
				2128	case 0x2009: /* THIN SPACE */
				2129	case 0x200A: /* HAIR SPACE */
				2130	case 0x202f: /* NARROW NO-BREAK SPACE */
				2131	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
				2132	case 0x3000: /* IDEOGRAPHIC SPACE */
				2133	break;
				2134
				2135	default:
				2136	ADD_NEW(state_offset + 1, 0);
				2137	break;
				2138	}
				2139	break;
				2140
				2141	/-----------------------------------------------------------------/
				2142	case OP_HSPACE:
				2143	if (clen > 0) switch(c)
				2144	{
				2145	case 0x09: /* HT */
				2146	case 0x20: /* SPACE */
				2147	case 0xa0: /* NBSP */
				2148	case 0x1680: /* OGHAM SPACE MARK */
				2149	case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
				2150	case 0x2000: /* EN QUAD */
				2151	case 0x2001: /* EM QUAD */
				2152	case 0x2002: /* EN SPACE */
				2153	case 0x2003: /* EM SPACE */
				2154	case 0x2004: /* THREE-PER-EM SPACE */
				2155	case 0x2005: /* FOUR-PER-EM SPACE */
				2156	case 0x2006: /* SIX-PER-EM SPACE */
				2157	case 0x2007: /* FIGURE SPACE */
				2158	case 0x2008: /* PUNCTUATION SPACE */
				2159	case 0x2009: /* THIN SPACE */
				2160	case 0x200A: /* HAIR SPACE */
				2161	case 0x202f: /* NARROW NO-BREAK SPACE */
				2162	case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
				2163	case 0x3000: /* IDEOGRAPHIC SPACE */
				2164	ADD_NEW(state_offset + 1, 0);
				2165	break;
				2166	}
				2167	break;
				2168
				2169	/-----------------------------------------------------------------/
				2170	/* Match a negated single character casefully. This is only used for
				2171	one-byte characters, that is, we know that d < 256. The character we are
				2172	checking (c) can be multibyte. */
				2173
				2174	case OP_NOT:
				2175	if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
				2176	break;
				2177
				2178	/-----------------------------------------------------------------/
				2179	/* Match a negated single character caselessly. This is only used for
				2180	one-byte characters, that is, we know that d < 256. The character we are
				2181	checking (c) can be multibyte. */
				2182
				2183	case OP_NOTI:
				2184	if (clen > 0 && c != d && c != fcc[d])
				2185	{ ADD_NEW(state_offset + dlen + 1, 0); }
				2186	break;
				2187
				2188	/-----------------------------------------------------------------/
				2189	case OP_PLUSI:
				2190	case OP_MINPLUSI:
				2191	case OP_POSPLUSI:
				2192	case OP_NOTPLUSI:
				2193	case OP_NOTMINPLUSI:
				2194	case OP_NOTPOSPLUSI:
				2195	caseless = TRUE;
				2196	codevalue -= OP_STARI - OP_STAR;
				2197
				2198	/* Fall through */
				2199	case OP_PLUS:
				2200	case OP_MINPLUS:
				2201	case OP_POSPLUS:
				2202	case OP_NOTPLUS:
				2203	case OP_NOTMINPLUS:
				2204	case OP_NOTPOSPLUS:
				2205	count = current_state->count; /* Already matched */
				2206	if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
				2207	if (clen > 0)
				2208	{
				2209	unsigned int otherd = NOTACHAR;
				2210	if (caseless)
				2211	{
				2212	#ifdef SUPPORT_UTF8
				2213	if (utf8 && d >= 128)
				2214	{
				2215	#ifdef SUPPORT_UCP
				2216	otherd = UCD_OTHERCASE(d);
				2217	#endif /* SUPPORT_UCP */
				2218	}
				2219	else
				2220	#endif /* SUPPORT_UTF8 */
				2221	otherd = fcc[d];
				2222	}
				2223	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
				2224	{
				2225	if (count > 0 &&
				2226	(codevalue == OP_POSPLUS \|\| codevalue == OP_NOTPOSPLUS))
				2227	{
				2228	active_count--; /* Remove non-match possibility */
				2229	next_active_state--;
				2230	}
				2231	count++;
				2232	ADD_NEW(state_offset, count);
				2233	}
				2234	}
				2235	break;
				2236
				2237	/-----------------------------------------------------------------/
				2238	case OP_QUERYI:
				2239	case OP_MINQUERYI:
				2240	case OP_POSQUERYI:
				2241	case OP_NOTQUERYI:
				2242	case OP_NOTMINQUERYI:
				2243	case OP_NOTPOSQUERYI:
				2244	caseless = TRUE;
				2245	codevalue -= OP_STARI - OP_STAR;
				2246	/* Fall through */
				2247	case OP_QUERY:
				2248	case OP_MINQUERY:
				2249	case OP_POSQUERY:
				2250	case OP_NOTQUERY:
				2251	case OP_NOTMINQUERY:
				2252	case OP_NOTPOSQUERY:
				2253	ADD_ACTIVE(state_offset + dlen + 1, 0);
				2254	if (clen > 0)
				2255	{
				2256	unsigned int otherd = NOTACHAR;
				2257	if (caseless)
				2258	{
				2259	#ifdef SUPPORT_UTF8
				2260	if (utf8 && d >= 128)
				2261	{
				2262	#ifdef SUPPORT_UCP
				2263	otherd = UCD_OTHERCASE(d);
				2264	#endif /* SUPPORT_UCP */
				2265	}
				2266	else
				2267	#endif /* SUPPORT_UTF8 */
				2268	otherd = fcc[d];
				2269	}
				2270	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
				2271	{
				2272	if (codevalue == OP_POSQUERY \|\| codevalue == OP_NOTPOSQUERY)
				2273	{
				2274	active_count--; /* Remove non-match possibility */
				2275	next_active_state--;
				2276	}
				2277	ADD_NEW(state_offset + dlen + 1, 0);
				2278	}
				2279	}
				2280	break;
				2281
				2282	/-----------------------------------------------------------------/
				2283	case OP_STARI:
				2284	case OP_MINSTARI:
				2285	case OP_POSSTARI:
				2286	case OP_NOTSTARI:
				2287	case OP_NOTMINSTARI:
				2288	case OP_NOTPOSSTARI:
				2289	caseless = TRUE;
				2290	codevalue -= OP_STARI - OP_STAR;
				2291	/* Fall through */
				2292	case OP_STAR:
				2293	case OP_MINSTAR:
				2294	case OP_POSSTAR:
				2295	case OP_NOTSTAR:
				2296	case OP_NOTMINSTAR:
				2297	case OP_NOTPOSSTAR:
				2298	ADD_ACTIVE(state_offset + dlen + 1, 0);
				2299	if (clen > 0)
				2300	{
				2301	unsigned int otherd = NOTACHAR;
				2302	if (caseless)
				2303	{
				2304	#ifdef SUPPORT_UTF8
				2305	if (utf8 && d >= 128)
				2306	{
				2307	#ifdef SUPPORT_UCP
				2308	otherd = UCD_OTHERCASE(d);
				2309	#endif /* SUPPORT_UCP */
				2310	}
				2311	else
				2312	#endif /* SUPPORT_UTF8 */
				2313	otherd = fcc[d];
				2314	}
				2315	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
				2316	{
				2317	if (codevalue == OP_POSSTAR \|\| codevalue == OP_NOTPOSSTAR)
				2318	{
				2319	active_count--; /* Remove non-match possibility */
				2320	next_active_state--;
				2321	}
				2322	ADD_NEW(state_offset, 0);
				2323	}
				2324	}
				2325	break;
				2326
				2327	/-----------------------------------------------------------------/
				2328	case OP_EXACTI:
				2329	case OP_NOTEXACTI:
				2330	caseless = TRUE;
				2331	codevalue -= OP_STARI - OP_STAR;
				2332	/* Fall through */
				2333	case OP_EXACT:
				2334	case OP_NOTEXACT:
				2335	count = current_state->count; /* Number already matched */
				2336	if (clen > 0)
				2337	{
				2338	unsigned int otherd = NOTACHAR;
				2339	if (caseless)
				2340	{
				2341	#ifdef SUPPORT_UTF8
				2342	if (utf8 && d >= 128)
				2343	{
				2344	#ifdef SUPPORT_UCP
				2345	otherd = UCD_OTHERCASE(d);
				2346	#endif /* SUPPORT_UCP */
				2347	}
				2348	else
				2349	#endif /* SUPPORT_UTF8 */
				2350	otherd = fcc[d];
				2351	}
				2352	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
				2353	{
				2354	if (++count >= GET2(code, 1))
				2355	{ ADD_NEW(state_offset + dlen + 3, 0); }
				2356	else
				2357	{ ADD_NEW(state_offset, count); }
				2358	}
				2359	}
				2360	break;
				2361
				2362	/-----------------------------------------------------------------/
				2363	case OP_UPTOI:
				2364	case OP_MINUPTOI:
				2365	case OP_POSUPTOI:
				2366	case OP_NOTUPTOI:
				2367	case OP_NOTMINUPTOI:
				2368	case OP_NOTPOSUPTOI:
				2369	caseless = TRUE;
				2370	codevalue -= OP_STARI - OP_STAR;
				2371	/* Fall through */
				2372	case OP_UPTO:
				2373	case OP_MINUPTO:
				2374	case OP_POSUPTO:
				2375	case OP_NOTUPTO:
				2376	case OP_NOTMINUPTO:
				2377	case OP_NOTPOSUPTO:
				2378	ADD_ACTIVE(state_offset + dlen + 3, 0);
				2379	count = current_state->count; /* Number already matched */
				2380	if (clen > 0)
				2381	{
				2382	unsigned int otherd = NOTACHAR;
				2383	if (caseless)
				2384	{
				2385	#ifdef SUPPORT_UTF8
				2386	if (utf8 && d >= 128)
				2387	{
				2388	#ifdef SUPPORT_UCP
				2389	otherd = UCD_OTHERCASE(d);
				2390	#endif /* SUPPORT_UCP */
				2391	}
				2392	else
				2393	#endif /* SUPPORT_UTF8 */
				2394	otherd = fcc[d];
				2395	}
				2396	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
				2397	{
				2398	if (codevalue == OP_POSUPTO \|\| codevalue == OP_NOTPOSUPTO)
				2399	{
				2400	active_count--; /* Remove non-match possibility */
				2401	next_active_state--;
				2402	}
				2403	if (++count >= GET2(code, 1))
				2404	{ ADD_NEW(state_offset + dlen + 3, 0); }
				2405	else
				2406	{ ADD_NEW(state_offset, count); }
				2407	}
				2408	}
				2409	break;
				2410
				2411
				2412	/* ========================================================================== */
				2413	/* These are the class-handling opcodes */
				2414
				2415	case OP_CLASS:
				2416	case OP_NCLASS:
				2417	case OP_XCLASS:
				2418	{
				2419	BOOL isinclass = FALSE;
				2420	int next_state_offset;
				2421	const uschar *ecode;
				2422
				2423	/* For a simple class, there is always just a 32-byte table, and we
				2424	can set isinclass from it. */
				2425
				2426	if (codevalue != OP_XCLASS)
				2427	{
				2428	ecode = code + 33;
				2429	if (clen > 0)
				2430	{
				2431	isinclass = (c > 255)? (codevalue == OP_NCLASS) :
				2432	((code[1 + c/8] & (1 << (c&7))) != 0);
				2433	}
				2434	}
				2435
				2436	/* An extended class may have a table or a list of single characters,
				2437	ranges, or both, and it may be positive or negative. There's a
				2438	function that sorts all this out. */
				2439
				2440	else
				2441	{
				2442	ecode = code + GET(code, 1);
				2443	if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
				2444	}
				2445
				2446	/* At this point, isinclass is set for all kinds of class, and ecode
				2447	points to the byte after the end of the class. If there is a
				2448	quantifier, this is where it will be. */
				2449
				2450	next_state_offset = (int)(ecode - start_code);
				2451
				2452	switch (*ecode)
				2453	{
				2454	case OP_CRSTAR:
				2455	case OP_CRMINSTAR:
				2456	ADD_ACTIVE(next_state_offset + 1, 0);
				2457	if (isinclass) { ADD_NEW(state_offset, 0); }
				2458	break;
				2459
				2460	case OP_CRPLUS:
				2461	case OP_CRMINPLUS:
				2462	count = current_state->count; /* Already matched */
				2463	if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
				2464	if (isinclass) { count++; ADD_NEW(state_offset, count); }
				2465	break;
				2466
				2467	case OP_CRQUERY:
				2468	case OP_CRMINQUERY:
				2469	ADD_ACTIVE(next_state_offset + 1, 0);
				2470	if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
				2471	break;
				2472
				2473	case OP_CRRANGE:
				2474	case OP_CRMINRANGE:
				2475	count = current_state->count; /* Already matched */
				2476	if (count >= GET2(ecode, 1))
				2477	{ ADD_ACTIVE(next_state_offset + 5, 0); }
				2478	if (isinclass)
				2479	{
				2480	int max = GET2(ecode, 3);
				2481	if (++count >= max && max != 0) /* Max 0 => no limit */
				2482	{ ADD_NEW(next_state_offset + 5, 0); }
				2483	else
				2484	{ ADD_NEW(state_offset, count); }
				2485	}
				2486	break;
				2487
				2488	default:
				2489	if (isinclass) { ADD_NEW(next_state_offset, 0); }
				2490	break;
				2491	}
				2492	}
				2493	break;
				2494
				2495	/* ========================================================================== */
				2496	/* These are the opcodes for fancy brackets of various kinds. We have
				2497	to use recursion in order to handle them. The "always failing" assertion
				2498	(?!) is optimised to OP_FAIL when compiling, so we have to support that,
				2499	though the other "backtracking verbs" are not supported. */
				2500
				2501	case OP_FAIL:
				2502	forced_fail++; /* Count FAILs for multiple states */
				2503	break;
				2504
				2505	case OP_ASSERT:
				2506	case OP_ASSERT_NOT:
				2507	case OP_ASSERTBACK:
				2508	case OP_ASSERTBACK_NOT:
				2509	{
				2510	int rc;
				2511	int local_offsets[2];
				2512	int local_workspace[1000];
				2513	const uschar *endasscode = code + GET(code, 1);
				2514
				2515	while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
				2516
				2517	rc = internal_dfa_exec(
				2518	md, /* static match data */
				2519	code, /* this subexpression's code */
				2520	ptr, /* where we currently are */
				2521	(int)(ptr - start_subject), /* start offset */
				2522	local_offsets, /* offset vector */
				2523	sizeof(local_offsets)/sizeof(int), /* size of same */
				2524	local_workspace, /* workspace vector */
				2525	sizeof(local_workspace)/sizeof(int), /* size of same */
				2526	rlevel); /* function recursion level */
				2527
				2528	if (rc == PCRE_ERROR_DFA_UITEM) return rc;
				2529	if ((rc >= 0) == (codevalue == OP_ASSERT \|\| codevalue == OP_ASSERTBACK))
				2530	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
				2531	}
				2532	break;
				2533
				2534	/-----------------------------------------------------------------/
				2535	case OP_COND:
				2536	case OP_SCOND:
				2537	{
				2538	int local_offsets[1000];
				2539	int local_workspace[1000];
				2540	int codelink = GET(code, 1);
				2541	int condcode;
				2542
				2543	/* Because of the way auto-callout works during compile, a callout item
				2544	is inserted between OP_COND and an assertion condition. This does not
				2545	happen for the other conditions. */
				2546
				2547	if (code[LINK_SIZE+1] == OP_CALLOUT)
				2548	{
				2549	rrc = 0;
				2550	if (pcre_callout != NULL)
				2551	{
				2552	pcre_callout_block cb;
				2553	cb.version = 1; /* Version 1 of the callout block */
				2554	cb.callout_number = code[LINK_SIZE+2];
				2555	cb.offset_vector = offsets;
				2556	cb.subject = (PCRE_SPTR)start_subject;
				2557	cb.subject_length = (int)(end_subject - start_subject);
				2558	cb.start_match = (int)(current_subject - start_subject);
				2559	cb.current_position = (int)(ptr - start_subject);
				2560	cb.pattern_position = GET(code, LINK_SIZE + 3);
				2561	cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
				2562	cb.capture_top = 1;
				2563	cb.capture_last = -1;
				2564	cb.callout_data = md->callout_data;
				2565	cb.mark = NULL; /* No (MARK) support /
				2566	if ((rrc = (pcre_callout)(&cb)) < 0) return rrc; / Abandon */
				2567	}
				2568	if (rrc > 0) break; /* Fail this thread */
				2569	code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */
				2570	}
				2571
				2572	condcode = code[LINK_SIZE+1];
				2573
				2574	/* Back reference conditions are not supported */
				2575
				2576	if (condcode == OP_CREF \|\| condcode == OP_NCREF)
				2577	return PCRE_ERROR_DFA_UCOND;
				2578
				2579	/* The DEFINE condition is always false */
				2580
				2581	if (condcode == OP_DEF)
				2582	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
				2583
				2584	/* The only supported version of OP_RREF is for the value RREF_ANY,
				2585	which means "test if in any recursion". We can't test for specifically
				2586	recursed groups. */
				2587
				2588	else if (condcode == OP_RREF \|\| condcode == OP_NRREF)
				2589	{
				2590	int value = GET2(code, LINK_SIZE+2);
				2591	if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
				2592	if (md->recursive != NULL)
				2593	{ ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
				2594	else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
				2595	}
				2596
				2597	/* Otherwise, the condition is an assertion */
				2598
				2599	else
				2600	{
				2601	int rc;
				2602	const uschar *asscode = code + LINK_SIZE + 1;
				2603	const uschar *endasscode = asscode + GET(asscode, 1);
				2604
				2605	while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
				2606
				2607	rc = internal_dfa_exec(
				2608	md, /* fixed match data */
				2609	asscode, /* this subexpression's code */
				2610	ptr, /* where we currently are */
				2611	(int)(ptr - start_subject), /* start offset */
				2612	local_offsets, /* offset vector */
				2613	sizeof(local_offsets)/sizeof(int), /* size of same */
				2614	local_workspace, /* workspace vector */
				2615	sizeof(local_workspace)/sizeof(int), /* size of same */
				2616	rlevel); /* function recursion level */
				2617
				2618	if (rc == PCRE_ERROR_DFA_UITEM) return rc;
				2619	if ((rc >= 0) ==
				2620	(condcode == OP_ASSERT \|\| condcode == OP_ASSERTBACK))
				2621	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
				2622	else
				2623	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
				2624	}
				2625	}
				2626	break;
				2627
				2628	/-----------------------------------------------------------------/
				2629	case OP_RECURSE:
				2630	{
				2631	dfa_recursion_info *ri;
				2632	int local_offsets[1000];
				2633	int local_workspace[1000];
				2634	const uschar *callpat = start_code + GET(code, 1);
				2635	int recno = (callpat == md->start_code)? 0 :
				2636	GET2(callpat, 1 + LINK_SIZE);
				2637	int rc;
				2638
				2639	DPRINTF(("%.sStarting regex recursion\n", rlevel2-2, SP));
				2640
				2641	/* Check for repeating a recursion without advancing the subject
				2642	pointer. This should catch convoluted mutual recursions. (Some simple
				2643	cases are caught at compile time.) */
				2644
				2645	for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
				2646	if (recno == ri->group_num && ptr == ri->subject_position)
				2647	return PCRE_ERROR_RECURSELOOP;
				2648
				2649	/* Remember this recursion and where we started it so as to
				2650	catch infinite loops. */
				2651
				2652	new_recursive.group_num = recno;
				2653	new_recursive.subject_position = ptr;
				2654	new_recursive.prevrec = md->recursive;
				2655	md->recursive = &new_recursive;
				2656
				2657	rc = internal_dfa_exec(
				2658	md, /* fixed match data */
				2659	callpat, /* this subexpression's code */
				2660	ptr, /* where we currently are */
				2661	(int)(ptr - start_subject), /* start offset */
				2662	local_offsets, /* offset vector */
				2663	sizeof(local_offsets)/sizeof(int), /* size of same */
				2664	local_workspace, /* workspace vector */
				2665	sizeof(local_workspace)/sizeof(int), /* size of same */
				2666	rlevel); /* function recursion level */
				2667
				2668	md->recursive = new_recursive.prevrec; /* Done this recursion */
				2669
				2670	DPRINTF(("%.sReturn from regex recursion: rc=%d\n", rlevel2-2, SP,
				2671	rc));
				2672
				2673	/* Ran out of internal offsets */
				2674
				2675	if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
				2676
				2677	/* For each successful matched substring, set up the next state with a
				2678	count of characters to skip before trying it. Note that the count is in
				2679	characters, not bytes. */
				2680
				2681	if (rc > 0)
				2682	{
				2683	for (rc = rc*2 - 2; rc >= 0; rc -= 2)
				2684	{
				2685	const uschar *p = start_subject + local_offsets[rc];
				2686	const uschar *pp = start_subject + local_offsets[rc+1];
				2687	int charcount = local_offsets[rc+1] - local_offsets[rc];
				2688	while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
				2689	if (charcount > 0)
				2690	{
				2691	ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
				2692	}
				2693	else
				2694	{
				2695	ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
				2696	}
				2697	}
				2698	}
				2699	else if (rc != PCRE_ERROR_NOMATCH) return rc;
				2700	}
				2701	break;
				2702
				2703	/-----------------------------------------------------------------/
				2704	case OP_BRAPOS:
				2705	case OP_SBRAPOS:
				2706	case OP_CBRAPOS:
				2707	case OP_SCBRAPOS:
				2708	case OP_BRAPOSZERO:
				2709	{
				2710	int charcount, matched_count;
				2711	const uschar *local_ptr = ptr;
				2712	BOOL allow_zero;
				2713
				2714	if (codevalue == OP_BRAPOSZERO)
				2715	{
				2716	allow_zero = TRUE;
				2717	codevalue = (++code); / Codevalue will be one of above BRAs */
				2718	}
				2719	else allow_zero = FALSE;
				2720
				2721	/* Loop to match the subpattern as many times as possible as if it were
				2722	a complete pattern. */
				2723
				2724	for (matched_count = 0;; matched_count++)
				2725	{
				2726	int local_offsets[2];
				2727	int local_workspace[1000];
				2728
				2729	int rc = internal_dfa_exec(
				2730	md, /* fixed match data */
				2731	code, /* this subexpression's code */
				2732	local_ptr, /* where we currently are */
				2733	(int)(ptr - start_subject), /* start offset */
				2734	local_offsets, /* offset vector */
				2735	sizeof(local_offsets)/sizeof(int), /* size of same */
				2736	local_workspace, /* workspace vector */
				2737	sizeof(local_workspace)/sizeof(int), /* size of same */
				2738	rlevel); /* function recursion level */
				2739
				2740	/* Failed to match */
				2741
				2742	if (rc < 0)
				2743	{
				2744	if (rc != PCRE_ERROR_NOMATCH) return rc;
				2745	break;
				2746	}
				2747
				2748	/* Matched: break the loop if zero characters matched. */
				2749
				2750	charcount = local_offsets[1] - local_offsets[0];
				2751	if (charcount == 0) break;
				2752	local_ptr += charcount; /* Advance temporary position ptr */
				2753	}
				2754
				2755	/* At this point we have matched the subpattern matched_count
				2756	times, and local_ptr is pointing to the character after the end of the
				2757	last match. */
				2758
				2759	if (matched_count > 0 \|\| allow_zero)
				2760	{
				2761	const uschar *end_subpattern = code;
				2762	int next_state_offset;
				2763
				2764	do { end_subpattern += GET(end_subpattern, 1); }
				2765	while (*end_subpattern == OP_ALT);
				2766	next_state_offset =
				2767	(int)(end_subpattern - start_code + LINK_SIZE + 1);
				2768
				2769	/* Optimization: if there are no more active states, and there
				2770	are no new states yet set up, then skip over the subject string
				2771	right here, to save looping. Otherwise, set up the new state to swing
				2772	into action when the end of the matched substring is reached. */
				2773
				2774	if (i + 1 >= active_count && new_count == 0)
				2775	{
				2776	ptr = local_ptr;
				2777	clen = 0;
				2778	ADD_NEW(next_state_offset, 0);
				2779	}
				2780	else
				2781	{
				2782	const uschar *p = ptr;
				2783	const uschar *pp = local_ptr;
				2784	charcount = (int)(pp - p);
				2785	while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
				2786	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
				2787	}
				2788	}
				2789	}
				2790	break;
				2791
				2792	/-----------------------------------------------------------------/
				2793	case OP_ONCE:
				2794	case OP_ONCE_NC:
				2795	{
				2796	int local_offsets[2];
				2797	int local_workspace[1000];
				2798
				2799	int rc = internal_dfa_exec(
				2800	md, /* fixed match data */
				2801	code, /* this subexpression's code */
				2802	ptr, /* where we currently are */
				2803	(int)(ptr - start_subject), /* start offset */
				2804	local_offsets, /* offset vector */
				2805	sizeof(local_offsets)/sizeof(int), /* size of same */
				2806	local_workspace, /* workspace vector */
				2807	sizeof(local_workspace)/sizeof(int), /* size of same */
				2808	rlevel); /* function recursion level */
				2809
				2810	if (rc >= 0)
				2811	{
				2812	const uschar *end_subpattern = code;
				2813	int charcount = local_offsets[1] - local_offsets[0];
				2814	int next_state_offset, repeat_state_offset;
				2815
				2816	do { end_subpattern += GET(end_subpattern, 1); }
				2817	while (*end_subpattern == OP_ALT);
				2818	next_state_offset =
				2819	(int)(end_subpattern - start_code + LINK_SIZE + 1);
				2820
				2821	/* If the end of this subpattern is KETRMAX or KETRMIN, we must
				2822	arrange for the repeat state also to be added to the relevant list.
				2823	Calculate the offset, or set -1 for no repeat. */
				2824
				2825	repeat_state_offset = (*end_subpattern == OP_KETRMAX \|\|
				2826	*end_subpattern == OP_KETRMIN)?
				2827	(int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
				2828
				2829	/* If we have matched an empty string, add the next state at the
				2830	current character pointer. This is important so that the duplicate
				2831	checking kicks in, which is what breaks infinite loops that match an
				2832	empty string. */
				2833
				2834	if (charcount == 0)
				2835	{
				2836	ADD_ACTIVE(next_state_offset, 0);
				2837	}
				2838
				2839	/* Optimization: if there are no more active states, and there
				2840	are no new states yet set up, then skip over the subject string
				2841	right here, to save looping. Otherwise, set up the new state to swing
				2842	into action when the end of the matched substring is reached. */
				2843
				2844	else if (i + 1 >= active_count && new_count == 0)
				2845	{
				2846	ptr += charcount;
				2847	clen = 0;
				2848	ADD_NEW(next_state_offset, 0);
				2849
				2850	/* If we are adding a repeat state at the new character position,
				2851	we must fudge things so that it is the only current state.
				2852	Otherwise, it might be a duplicate of one we processed before, and
				2853	that would cause it to be skipped. */
				2854
				2855	if (repeat_state_offset >= 0)
				2856	{
				2857	next_active_state = active_states;
				2858	active_count = 0;
				2859	i = -1;
				2860	ADD_ACTIVE(repeat_state_offset, 0);
				2861	}
				2862	}
				2863	else
				2864	{
				2865	const uschar *p = start_subject + local_offsets[0];
				2866	const uschar *pp = start_subject + local_offsets[1];
				2867	while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
				2868	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
				2869	if (repeat_state_offset >= 0)
				2870	{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
				2871	}
				2872	}
				2873	else if (rc != PCRE_ERROR_NOMATCH) return rc;
				2874	}
				2875	break;
				2876
				2877
				2878	/* ========================================================================== */
				2879	/* Handle callouts */
				2880
				2881	case OP_CALLOUT:
				2882	rrc = 0;
				2883	if (pcre_callout != NULL)
				2884	{
				2885	pcre_callout_block cb;
				2886	cb.version = 1; /* Version 1 of the callout block */
				2887	cb.callout_number = code[1];
				2888	cb.offset_vector = offsets;
				2889	cb.subject = (PCRE_SPTR)start_subject;
				2890	cb.subject_length = (int)(end_subject - start_subject);
				2891	cb.start_match = (int)(current_subject - start_subject);
				2892	cb.current_position = (int)(ptr - start_subject);
				2893	cb.pattern_position = GET(code, 2);
				2894	cb.next_item_length = GET(code, 2 + LINK_SIZE);
				2895	cb.capture_top = 1;
				2896	cb.capture_last = -1;
				2897	cb.callout_data = md->callout_data;
				2898	cb.mark = NULL; /* No (MARK) support /
				2899	if ((rrc = (pcre_callout)(&cb)) < 0) return rrc; / Abandon */
				2900	}
				2901	if (rrc == 0)
				2902	{ ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
				2903	break;
				2904
				2905
				2906	/* ========================================================================== */
				2907	default: /* Unsupported opcode */
				2908	return PCRE_ERROR_DFA_UITEM;
				2909	}
				2910
				2911	NEXT_ACTIVE_STATE: continue;
				2912
				2913	} /* End of loop scanning active states */
				2914
				2915	/* We have finished the processing at the current subject character. If no
				2916	new states have been set for the next character, we have found all the
				2917	matches that we are going to find. If we are at the top level and partial
				2918	matching has been requested, check for appropriate conditions.
				2919
				2920	The "forced_ fail" variable counts the number of (*F) encountered for the
				2921	character. If it is equal to the original active_count (saved in
				2922	workspace[1]) it means that (*F) was found on every active state. In this
				2923	case we don't want to give a partial match.
				2924
				2925	The "could_continue" variable is true if a state could have continued but
				2926	for the fact that the end of the subject was reached. */
				2927
				2928	if (new_count <= 0)
				2929	{
				2930	if (rlevel == 1 && /* Top level, and */
				2931	could_continue && /* Some could go on */
				2932	forced_fail != workspace[1] && /* Not all forced fail & */
				2933	( /* either... */
				2934	(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
				2935	\|\| /* or... */
				2936	((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
				2937	match_count < 0) /* no matches */
				2938	) && /* And... */
				2939	ptr >= end_subject && /* Reached end of subject */
				2940	ptr > md->start_used_ptr) /* Inspected non-empty string */
				2941	{
				2942	if (offsetcount >= 2)
				2943	{
				2944	offsets[0] = (int)(md->start_used_ptr - start_subject);
				2945	offsets[1] = (int)(end_subject - start_subject);
				2946	}
				2947	match_count = PCRE_ERROR_PARTIAL;
				2948	}
				2949
				2950	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
				2951	"%.s---------------------\n\n", rlevel2-2, SP, rlevel, match_count,
				2952	rlevel*2-2, SP));
				2953	break; /* In effect, "return", but see the comment below */
				2954	}
				2955
				2956	/* One or more states are active for the next character. */
				2957
				2958	ptr += clen; /* Advance to next subject character */
				2959	} /* Loop to move along the subject string */
				2960
				2961	/* Control gets here from "break" a few lines above. We do it this way because
				2962	if we use "return" above, we have compiler trouble. Some compilers warn if
				2963	there's nothing here because they think the function doesn't return a value. On
				2964	the other hand, if we put a dummy statement here, some more clever compilers
				2965	complain that it can't be reached. Sigh. */
				2966
				2967	return match_count;
				2968	}
				2969
				2970
				2971
				2972
				2973	/*************************************************
				2974	* Execute a Regular Expression - DFA engine *
				2975	*************************************************/
				2976
				2977	/* This external function applies a compiled re to a subject string using a DFA
				2978	engine. This function calls the internal function multiple times if the pattern
				2979	is not anchored.
				2980
				2981	Arguments:
				2982	argument_re points to the compiled expression
				2983	extra_data points to extra data or is NULL
				2984	subject points to the subject string
				2985	length length of subject string (may contain binary zeros)
				2986	start_offset where to start in the subject string
				2987	options option bits
				2988	offsets vector of match offsets
				2989	offsetcount size of same
				2990	workspace workspace vector
				2991	wscount size of same
				2992
				2993	Returns: > 0 => number of match offset pairs placed in offsets
				2994	= 0 => offsets overflowed; longest matches are present
				2995	-1 => failed to match
				2996	< -1 => some kind of unexpected problem
				2997	*/
				2998
				2999	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
				3000	pcre_dfa_exec(const pcre argument_re, const pcre_extra extra_data,
				3001	const char subject, int length, int start_offset, int options, int offsets,
				3002	int offsetcount, int *workspace, int wscount)
				3003	{
				3004	real_pcre re = (real_pcre )argument_re;
				3005	dfa_match_data match_block;
				3006	dfa_match_data *md = &match_block;
				3007	BOOL utf8, anchored, startline, firstline;
				3008	const uschar current_subject, end_subject, *lcc;
				3009
				3010	pcre_study_data internal_study;
				3011	const pcre_study_data *study = NULL;
				3012	real_pcre internal_re;
				3013
				3014	const uschar *req_byte_ptr;
				3015	const uschar *start_bits = NULL;
				3016	BOOL first_byte_caseless = FALSE;
				3017	BOOL req_byte_caseless = FALSE;
				3018	int first_byte = -1;
				3019	int req_byte = -1;
				3020	int req_byte2 = -1;
				3021	int newline;
				3022
				3023	/* Plausibility checks */
				3024
				3025	if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
				3026	if (re == NULL \|\| subject == NULL \|\| workspace == NULL \|\|
				3027	(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
				3028	if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
				3029	if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
				3030	if (start_offset < 0 \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;
				3031
				3032	/* We need to find the pointer to any study data before we test for byte
				3033	flipping, so we scan the extra_data block first. This may set two fields in the
				3034	match block, so we must initialize them beforehand. However, the other fields
				3035	in the match block must not be set until after the byte flipping. */
				3036
				3037	md->tables = re->tables;
				3038	md->callout_data = NULL;
				3039
				3040	if (extra_data != NULL)
				3041	{
				3042	unsigned int flags = extra_data->flags;
				3043	if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
				3044	study = (const pcre_study_data *)extra_data->study_data;
				3045	if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
				3046	if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
				3047	return PCRE_ERROR_DFA_UMLIMIT;
				3048	if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
				3049	md->callout_data = extra_data->callout_data;
				3050	if ((flags & PCRE_EXTRA_TABLES) != 0)
				3051	md->tables = extra_data->tables;
				3052	}
				3053
				3054	/* Check that the first field in the block is the magic number. If it is not,
				3055	test for a regex that was compiled on a host of opposite endianness. If this is
				3056	the case, flipped values are put in internal_re and internal_study if there was
				3057	study data too. */
				3058
				3059	if (re->magic_number != MAGIC_NUMBER)
				3060	{
				3061	re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
				3062	if (re == NULL) return PCRE_ERROR_BADMAGIC;
				3063	if (study != NULL) study = &internal_study;
				3064	}
				3065
				3066	/* Set some local values */
				3067
				3068	current_subject = (const unsigned char *)subject + start_offset;
				3069	end_subject = (const unsigned char *)subject + length;
				3070	req_byte_ptr = current_subject - 1;
				3071
				3072	#ifdef SUPPORT_UTF8
				3073	utf8 = (re->options & PCRE_UTF8) != 0;
				3074	#else
				3075	utf8 = FALSE;
				3076	#endif
				3077
				3078	anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)) != 0 \|\|
				3079	(re->options & PCRE_ANCHORED) != 0;
				3080
				3081	/* The remaining fixed data for passing around. */
				3082
				3083	md->start_code = (const uschar *)argument_re +
				3084	re->name_table_offset + re->name_count * re->name_entry_size;
				3085	md->start_subject = (const unsigned char *)subject;
				3086	md->end_subject = end_subject;
				3087	md->start_offset = start_offset;
				3088	md->moptions = options;
				3089	md->poptions = re->options;
				3090
				3091	/* If the BSR option is not set at match time, copy what was set
				3092	at compile time. */
				3093
				3094	if ((md->moptions & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) == 0)
				3095	{
				3096	if ((re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) != 0)
				3097	md->moptions \|= re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE);
				3098	#ifdef BSR_ANYCRLF
				3099	else md->moptions \|= PCRE_BSR_ANYCRLF;
				3100	#endif
				3101	}
				3102
				3103	/* Handle different types of newline. The three bits give eight cases. If
				3104	nothing is set at run time, whatever was used at compile time applies. */
				3105
				3106	switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
				3107	PCRE_NEWLINE_BITS)
				3108	{
				3109	case 0: newline = NEWLINE; break; /* Compile-time default */
				3110	case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
				3111	case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
				3112	case PCRE_NEWLINE_CR+
				3113	PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) \| CHAR_NL; break;
				3114	case PCRE_NEWLINE_ANY: newline = -1; break;
				3115	case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
				3116	default: return PCRE_ERROR_BADNEWLINE;
				3117	}
				3118
				3119	if (newline == -2)
				3120	{
				3121	md->nltype = NLTYPE_ANYCRLF;
				3122	}
				3123	else if (newline < 0)
				3124	{
				3125	md->nltype = NLTYPE_ANY;
				3126	}
				3127	else
				3128	{
				3129	md->nltype = NLTYPE_FIXED;
				3130	if (newline > 255)
				3131	{
				3132	md->nllen = 2;
				3133	md->nl[0] = (newline >> 8) & 255;
				3134	md->nl[1] = newline & 255;
				3135	}
				3136	else
				3137	{
				3138	md->nllen = 1;
				3139	md->nl[0] = newline;
				3140	}
				3141	}
				3142
				3143	/* Check a UTF-8 string if required. Unfortunately there's no way of passing
				3144	back the character offset. */
				3145
				3146	#ifdef SUPPORT_UTF8
				3147	if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
				3148	{
				3149	int erroroffset;
				3150	int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
				3151	if (errorcode != 0)
				3152	{
				3153	if (offsetcount >= 2)
				3154	{
				3155	offsets[0] = erroroffset;
				3156	offsets[1] = errorcode;
				3157	}
				3158	return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
				3159	PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
				3160	}
				3161	if (start_offset > 0 && start_offset < length &&
				3162	(((USPTR)subject)[start_offset] & 0xc0) == 0x80)
				3163	return PCRE_ERROR_BADUTF8_OFFSET;
				3164	}
				3165	#endif
				3166
				3167	/* If the exec call supplied NULL for tables, use the inbuilt ones. This
				3168	is a feature that makes it possible to save compiled regex and re-use them
				3169	in other programs later. */
				3170
				3171	if (md->tables == NULL) md->tables = _pcre_default_tables;
				3172
				3173	/* The lower casing table and the "must be at the start of a line" flag are
				3174	used in a loop when finding where to start. */
				3175
				3176	lcc = md->tables + lcc_offset;
				3177	startline = (re->flags & PCRE_STARTLINE) != 0;
				3178	firstline = (re->options & PCRE_FIRSTLINE) != 0;
				3179
				3180	/* Set up the first character to match, if available. The first_byte value is
				3181	never set for an anchored regular expression, but the anchoring may be forced
				3182	at run time, so we have to test for anchoring. The first char may be unset for
				3183	an unanchored pattern, of course. If there's no first char and the pattern was
				3184	studied, there may be a bitmap of possible first characters. */
				3185
				3186	if (!anchored)
				3187	{
				3188	if ((re->flags & PCRE_FIRSTSET) != 0)
				3189	{
				3190	first_byte = re->first_byte & 255;
				3191	if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
				3192	first_byte = lcc[first_byte];
				3193	}
				3194	else
				3195	{
				3196	if (!startline && study != NULL &&
				3197	(study->flags & PCRE_STUDY_MAPPED) != 0)
				3198	start_bits = study->start_bits;
				3199	}
				3200	}
				3201
				3202	/* For anchored or unanchored matches, there may be a "last known required
				3203	character" set. */
				3204
				3205	if ((re->flags & PCRE_REQCHSET) != 0)
				3206	{
				3207	req_byte = re->req_byte & 255;
				3208	req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
				3209	req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
				3210	}
				3211
				3212	/* Call the main matching function, looping for a non-anchored regex after a
				3213	failed match. If not restarting, perform certain optimizations at the start of
				3214	a match. */
				3215
				3216	for (;;)
				3217	{
				3218	int rc;
				3219
				3220	if ((options & PCRE_DFA_RESTART) == 0)
				3221	{
				3222	const uschar *save_end_subject = end_subject;
				3223
				3224	/* If firstline is TRUE, the start of the match is constrained to the first
				3225	line of a multiline string. Implement this by temporarily adjusting
				3226	end_subject so that we stop scanning at a newline. If the match fails at
				3227	the newline, later code breaks this loop. */
				3228
				3229	if (firstline)
				3230	{
				3231	USPTR t = current_subject;
				3232	#ifdef SUPPORT_UTF8
				3233	if (utf8)
				3234	{
				3235	while (t < md->end_subject && !IS_NEWLINE(t))
				3236	{
				3237	t++;
				3238	while (t < end_subject && (*t & 0xc0) == 0x80) t++;
				3239	}
				3240	}
				3241	else
				3242	#endif
				3243	while (t < md->end_subject && !IS_NEWLINE(t)) t++;
				3244	end_subject = t;
				3245	}
				3246
				3247	/* There are some optimizations that avoid running the match if a known
				3248	starting point is not found. However, there is an option that disables
				3249	these, for testing and for ensuring that all callouts do actually occur.
				3250	The option can be set in the regex by (*NO_START_OPT) or passed in
				3251	match-time options. */
				3252
				3253	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == 0)
				3254	{
				3255	/* Advance to a known first byte. */
				3256
				3257	if (first_byte >= 0)
				3258	{
				3259	if (first_byte_caseless)
				3260	while (current_subject < end_subject &&
				3261	lcc[*current_subject] != first_byte)
				3262	current_subject++;
				3263	else
				3264	while (current_subject < end_subject &&
				3265	*current_subject != first_byte)
				3266	current_subject++;
				3267	}
				3268
				3269	/* Or to just after a linebreak for a multiline match if possible */
				3270
				3271	else if (startline)
				3272	{
				3273	if (current_subject > md->start_subject + start_offset)
				3274	{
				3275	#ifdef SUPPORT_UTF8
				3276	if (utf8)
				3277	{
				3278	while (current_subject < end_subject &&
				3279	!WAS_NEWLINE(current_subject))
				3280	{
				3281	current_subject++;
				3282	while(current_subject < end_subject &&
				3283	(*current_subject & 0xc0) == 0x80)
				3284	current_subject++;
				3285	}
				3286	}
				3287	else
				3288	#endif
				3289	while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
				3290	current_subject++;
				3291
				3292	/* If we have just passed a CR and the newline option is ANY or
				3293	ANYCRLF, and we are now at a LF, advance the match position by one
				3294	more character. */
				3295
				3296	if (current_subject[-1] == CHAR_CR &&
				3297	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
				3298	current_subject < end_subject &&
				3299	*current_subject == CHAR_NL)
				3300	current_subject++;
				3301	}
				3302	}
				3303
				3304	/* Or to a non-unique first char after study */
				3305
				3306	else if (start_bits != NULL)
				3307	{
				3308	while (current_subject < end_subject)
				3309	{
				3310	register unsigned int c = *current_subject;
				3311	if ((start_bits[c/8] & (1 << (c&7))) == 0)
				3312	{
				3313	current_subject++;
				3314	#ifdef SUPPORT_UTF8
				3315	if (utf8)
				3316	while(current_subject < end_subject &&
				3317	(*current_subject & 0xc0) == 0x80) current_subject++;
				3318	#endif
				3319	}
				3320	else break;
				3321	}
				3322	}
				3323	}
				3324
				3325	/* Restore fudged end_subject */
				3326
				3327	end_subject = save_end_subject;
				3328
				3329	/* The following two optimizations are disabled for partial matching or if
				3330	disabling is explicitly requested (and of course, by the test above, this
				3331	code is not obeyed when restarting after a partial match). */
				3332
				3333	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
				3334	(options & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) == 0)
				3335	{
				3336	/* If the pattern was studied, a minimum subject length may be set. This
				3337	is a lower bound; no actual string of that length may actually match the
				3338	pattern. Although the value is, strictly, in characters, we treat it as
				3339	bytes to avoid spending too much time in this optimization. */
				3340
				3341	if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
				3342	(pcre_uint32)(end_subject - current_subject) < study->minlength)
				3343	return PCRE_ERROR_NOMATCH;
				3344
				3345	/* If req_byte is set, we know that that character must appear in the
				3346	subject for the match to succeed. If the first character is set, req_byte
				3347	must be later in the subject; otherwise the test starts at the match
				3348	point. This optimization can save a huge amount of work in patterns with
				3349	nested unlimited repeats that aren't going to match. Writing separate
				3350	code for cased/caseless versions makes it go faster, as does using an
				3351	autoincrement and backing off on a match.
				3352
				3353	HOWEVER: when the subject string is very, very long, searching to its end
				3354	can take a long time, and give bad performance on quite ordinary
				3355	patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
				3356	string... so we don't do this when the string is sufficiently long. */
				3357
				3358	if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
				3359	{
				3360	register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
				3361
				3362	/* We don't need to repeat the search if we haven't yet reached the
				3363	place we found it at last time. */
				3364
				3365	if (p > req_byte_ptr)
				3366	{
				3367	if (req_byte_caseless)
				3368	{
				3369	while (p < end_subject)
				3370	{
				3371	register int pp = *p++;
				3372	if (pp == req_byte \|\| pp == req_byte2) { p--; break; }
				3373	}
				3374	}
				3375	else
				3376	{
				3377	while (p < end_subject)
				3378	{
				3379	if (*p++ == req_byte) { p--; break; }
				3380	}
				3381	}
				3382
				3383	/* If we can't find the required character, break the matching loop,
				3384	which will cause a return or PCRE_ERROR_NOMATCH. */
				3385
				3386	if (p >= end_subject) break;
				3387
				3388	/* If we have found the required character, save the point where we
				3389	found it, so that we don't search again next time round the loop if
				3390	the start hasn't passed this character yet. */
				3391
				3392	req_byte_ptr = p;
				3393	}
				3394	}
				3395	}
				3396	} /* End of optimizations that are done when not restarting */
				3397
				3398	/* OK, now we can do the business */
				3399
				3400	md->start_used_ptr = current_subject;
				3401	md->recursive = NULL;
				3402
				3403	rc = internal_dfa_exec(
				3404	md, /* fixed match data */
				3405	md->start_code, /* this subexpression's code */
				3406	current_subject, /* where we currently are */
				3407	start_offset, /* start offset in subject */
				3408	offsets, /* offset vector */
				3409	offsetcount, /* size of same */
				3410	workspace, /* workspace vector */
				3411	wscount, /* size of same */
				3412	0); /* function recurse level */
				3413
				3414	/* Anything other than "no match" means we are done, always; otherwise, carry
				3415	on only if not anchored. */
				3416
				3417	if (rc != PCRE_ERROR_NOMATCH \|\| anchored) return rc;
				3418
				3419	/* Advance to the next subject character unless we are at the end of a line
				3420	and firstline is set. */
				3421
				3422	if (firstline && IS_NEWLINE(current_subject)) break;
				3423	current_subject++;
				3424	if (utf8)
				3425	{
				3426	while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
				3427	current_subject++;
				3428	}
				3429	if (current_subject > end_subject) break;
				3430
				3431	/* If we have just passed a CR and we are now at a LF, and the pattern does
				3432	not contain any explicit matches for \r or \n, and the newline option is CRLF
				3433	or ANY or ANYCRLF, advance the match position by one more character. */
				3434
				3435	if (current_subject[-1] == CHAR_CR &&
				3436	current_subject < end_subject &&
				3437	*current_subject == CHAR_NL &&
				3438	(re->flags & PCRE_HASCRORLF) == 0 &&
				3439	(md->nltype == NLTYPE_ANY \|\|
				3440	md->nltype == NLTYPE_ANYCRLF \|\|
				3441	md->nllen == 2))
				3442	current_subject++;
				3443
				3444	} /* "Bumpalong" loop */
				3445
				3446	return PCRE_ERROR_NOMATCH;
				3447	}
				3448
				3449	/* End of pcre_dfa_exec.c */