Blame - jni/libpcre/sources/pcre_study.c - jami-client-android

blob: cb2f23e9bf03ae27cebc16306cc597c033092d4a [file] [log] [blame]

Tristan Matthews	0461646	2013-11-14 16:09:34 -0500	[diff] [blame]	1	/*************************************************
				2	* Perl-Compatible Regular Expressions *
				3	*************************************************/
				4
				5	/* PCRE is a library of functions to support regular expressions whose syntax
				6	and semantics are as close as possible to those of the Perl 5 language.
				7
				8	Written by Philip Hazel
				9	Copyright (c) 1997-2010 University of Cambridge
				10
				11	-----------------------------------------------------------------------------
				12	Redistribution and use in source and binary forms, with or without
				13	modification, are permitted provided that the following conditions are met:
				14
				15	* Redistributions of source code must retain the above copyright notice,
				16	this list of conditions and the following disclaimer.
				17
				18	* Redistributions in binary form must reproduce the above copyright
				19	notice, this list of conditions and the following disclaimer in the
				20	documentation and/or other materials provided with the distribution.
				21
				22	* Neither the name of the University of Cambridge nor the names of its
				23	contributors may be used to endorse or promote products derived from
				24	this software without specific prior written permission.
				25
				26	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				27	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				28	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				29	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				30	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				31	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				32	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				33	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				34	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				35	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				36	POSSIBILITY OF SUCH DAMAGE.
				37	-----------------------------------------------------------------------------
				38	*/
				39
				40
				41	/* This module contains the external function pcre_study(), along with local
				42	supporting functions. */
				43
				44
				45	#ifdef HAVE_CONFIG_H
				46	#include "config.h"
				47	#endif
				48
				49	#include "pcre_internal.h"
				50
				51	#define SET_BIT(c) start_bits[c/8] \|= (1 << (c&7))
				52
				53	/* Returns from set_start_bits() */
				54
				55	enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
				56
				57
				58
				59	/*************************************************
				60	* Find the minimum subject length for a group *
				61	*************************************************/
				62
				63	/* Scan a parenthesized group and compute the minimum length of subject that
				64	is needed to match it. This is a lower bound; it does not mean there is a
				65	string of that length that matches. In UTF8 mode, the result is in characters
				66	rather than bytes.
				67
				68	Arguments:
				69	code pointer to start of group (the bracket)
				70	startcode pointer to start of the whole pattern
				71	options the compiling options
				72	int RECURSE depth
				73
				74	Returns: the minimum length
				75	-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
				76	-2 internal error (missing capturing bracket)
				77	-3 internal error (opcode not listed)
				78	*/
				79
				80	static int
				81	find_minlength(const uschar code, const uschar startcode, int options,
				82	int recurse_depth)
				83	{
				84	int length = -1;
				85	BOOL utf8 = (options & PCRE_UTF8) != 0;
				86	BOOL had_recurse = FALSE;
				87	register int branchlength = 0;
				88	register uschar cc = (uschar )code + 1 + LINK_SIZE;
				89
				90	if (code == OP_CBRA \|\| code == OP_SCBRA \|\|
				91	code == OP_CBRAPOS \|\| code == OP_SCBRAPOS) cc += 2;
				92
				93	/* Scan along the opcodes for this branch. If we get to the end of the
				94	branch, check the length against that of the other branches. */
				95
				96	for (;;)
				97	{
				98	int d, min;
				99	uschar cs, ce;
				100	register int op = *cc;
				101
				102	switch (op)
				103	{
				104	case OP_COND:
				105	case OP_SCOND:
				106
				107	/* If there is only one branch in a condition, the implied branch has zero
				108	length, so we don't add anything. This covers the DEFINE "condition"
				109	automatically. */
				110
				111	cs = cc + GET(cc, 1);
				112	if (*cs != OP_ALT)
				113	{
				114	cc = cs + 1 + LINK_SIZE;
				115	break;
				116	}
				117
				118	/* Otherwise we can fall through and treat it the same as any other
				119	subpattern. */
				120
				121	case OP_CBRA:
				122	case OP_SCBRA:
				123	case OP_BRA:
				124	case OP_SBRA:
				125	case OP_CBRAPOS:
				126	case OP_SCBRAPOS:
				127	case OP_BRAPOS:
				128	case OP_SBRAPOS:
				129	case OP_ONCE:
				130	case OP_ONCE_NC:
				131	d = find_minlength(cc, startcode, options, recurse_depth);
				132	if (d < 0) return d;
				133	branchlength += d;
				134	do cc += GET(cc, 1); while (*cc == OP_ALT);
				135	cc += 1 + LINK_SIZE;
				136	break;
				137
				138	/* ACCEPT makes things far too complicated; we have to give up. */
				139
				140	case OP_ACCEPT:
				141	case OP_ASSERT_ACCEPT:
				142	return -1;
				143
				144	/* Reached end of a branch; if it's a ket it is the end of a nested
				145	call. If it's ALT it is an alternation in a nested call. If it is END it's
				146	the end of the outer call. All can be handled by the same code. If an
				147	ACCEPT was previously encountered, use the length that was in force at that
				148	time, and pass back the shortest ACCEPT length. */
				149
				150	case OP_ALT:
				151	case OP_KET:
				152	case OP_KETRMAX:
				153	case OP_KETRMIN:
				154	case OP_KETRPOS:
				155	case OP_END:
				156	if (length < 0 \|\| (!had_recurse && branchlength < length))
				157	length = branchlength;
				158	if (op != OP_ALT) return length;
				159	cc += 1 + LINK_SIZE;
				160	branchlength = 0;
				161	had_recurse = FALSE;
				162	break;
				163
				164	/* Skip over assertive subpatterns */
				165
				166	case OP_ASSERT:
				167	case OP_ASSERT_NOT:
				168	case OP_ASSERTBACK:
				169	case OP_ASSERTBACK_NOT:
				170	do cc += GET(cc, 1); while (*cc == OP_ALT);
				171	/* Fall through */
				172
				173	/* Skip over things that don't match chars */
				174
				175	case OP_REVERSE:
				176	case OP_CREF:
				177	case OP_NCREF:
				178	case OP_RREF:
				179	case OP_NRREF:
				180	case OP_DEF:
				181	case OP_CALLOUT:
				182	case OP_SOD:
				183	case OP_SOM:
				184	case OP_EOD:
				185	case OP_EODN:
				186	case OP_CIRC:
				187	case OP_CIRCM:
				188	case OP_DOLL:
				189	case OP_DOLLM:
				190	case OP_NOT_WORD_BOUNDARY:
				191	case OP_WORD_BOUNDARY:
				192	cc += _pcre_OP_lengths[*cc];
				193	break;
				194
				195	/* Skip over a subpattern that has a {0} or {0,x} quantifier */
				196
				197	case OP_BRAZERO:
				198	case OP_BRAMINZERO:
				199	case OP_BRAPOSZERO:
				200	case OP_SKIPZERO:
				201	cc += _pcre_OP_lengths[*cc];
				202	do cc += GET(cc, 1); while (*cc == OP_ALT);
				203	cc += 1 + LINK_SIZE;
				204	break;
				205
				206	/* Handle literal characters and + repetitions */
				207
				208	case OP_CHAR:
				209	case OP_CHARI:
				210	case OP_NOT:
				211	case OP_NOTI:
				212	case OP_PLUS:
				213	case OP_PLUSI:
				214	case OP_MINPLUS:
				215	case OP_MINPLUSI:
				216	case OP_POSPLUS:
				217	case OP_POSPLUSI:
				218	case OP_NOTPLUS:
				219	case OP_NOTPLUSI:
				220	case OP_NOTMINPLUS:
				221	case OP_NOTMINPLUSI:
				222	case OP_NOTPOSPLUS:
				223	case OP_NOTPOSPLUSI:
				224	branchlength++;
				225	cc += 2;
				226	#ifdef SUPPORT_UTF8
				227	if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
				228	#endif
				229	break;
				230
				231	case OP_TYPEPLUS:
				232	case OP_TYPEMINPLUS:
				233	case OP_TYPEPOSPLUS:
				234	branchlength++;
				235	cc += (cc[1] == OP_PROP \|\| cc[1] == OP_NOTPROP)? 4 : 2;
				236	break;
				237
				238	/* Handle exact repetitions. The count is already in characters, but we
				239	need to skip over a multibyte character in UTF8 mode. */
				240
				241	case OP_EXACT:
				242	case OP_EXACTI:
				243	case OP_NOTEXACT:
				244	case OP_NOTEXACTI:
				245	branchlength += GET2(cc,1);
				246	cc += 4;
				247	#ifdef SUPPORT_UTF8
				248	if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
				249	#endif
				250	break;
				251
				252	case OP_TYPEEXACT:
				253	branchlength += GET2(cc,1);
				254	cc += (cc[3] == OP_PROP \|\| cc[3] == OP_NOTPROP)? 6 : 4;
				255	break;
				256
				257	/* Handle single-char non-literal matchers */
				258
				259	case OP_PROP:
				260	case OP_NOTPROP:
				261	cc += 2;
				262	/* Fall through */
				263
				264	case OP_NOT_DIGIT:
				265	case OP_DIGIT:
				266	case OP_NOT_WHITESPACE:
				267	case OP_WHITESPACE:
				268	case OP_NOT_WORDCHAR:
				269	case OP_WORDCHAR:
				270	case OP_ANY:
				271	case OP_ALLANY:
				272	case OP_EXTUNI:
				273	case OP_HSPACE:
				274	case OP_NOT_HSPACE:
				275	case OP_VSPACE:
				276	case OP_NOT_VSPACE:
				277	branchlength++;
				278	cc++;
				279	break;
				280
				281	/* "Any newline" might match two characters, but it also might match just
				282	one. */
				283
				284	case OP_ANYNL:
				285	branchlength += 1;
				286	cc++;
				287	break;
				288
				289	/* The single-byte matcher means we can't proceed in UTF-8 mode. (In
				290	non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever
				291	appear, but leave the code, just in case.) */
				292
				293	case OP_ANYBYTE:
				294	#ifdef SUPPORT_UTF8
				295	if (utf8) return -1;
				296	#endif
				297	branchlength++;
				298	cc++;
				299	break;
				300
				301	/* For repeated character types, we have to test for \p and \P, which have
				302	an extra two bytes of parameters. */
				303
				304	case OP_TYPESTAR:
				305	case OP_TYPEMINSTAR:
				306	case OP_TYPEQUERY:
				307	case OP_TYPEMINQUERY:
				308	case OP_TYPEPOSSTAR:
				309	case OP_TYPEPOSQUERY:
				310	if (cc[1] == OP_PROP \|\| cc[1] == OP_NOTPROP) cc += 2;
				311	cc += _pcre_OP_lengths[op];
				312	break;
				313
				314	case OP_TYPEUPTO:
				315	case OP_TYPEMINUPTO:
				316	case OP_TYPEPOSUPTO:
				317	if (cc[3] == OP_PROP \|\| cc[3] == OP_NOTPROP) cc += 2;
				318	cc += _pcre_OP_lengths[op];
				319	break;
				320
				321	/* Check a class for variable quantification */
				322
				323	#ifdef SUPPORT_UTF8
				324	case OP_XCLASS:
				325	cc += GET(cc, 1) - 33;
				326	/* Fall through */
				327	#endif
				328
				329	case OP_CLASS:
				330	case OP_NCLASS:
				331	cc += 33;
				332
				333	switch (*cc)
				334	{
				335	case OP_CRPLUS:
				336	case OP_CRMINPLUS:
				337	branchlength++;
				338	/* Fall through */
				339
				340	case OP_CRSTAR:
				341	case OP_CRMINSTAR:
				342	case OP_CRQUERY:
				343	case OP_CRMINQUERY:
				344	cc++;
				345	break;
				346
				347	case OP_CRRANGE:
				348	case OP_CRMINRANGE:
				349	branchlength += GET2(cc,1);
				350	cc += 5;
				351	break;
				352
				353	default:
				354	branchlength++;
				355	break;
				356	}
				357	break;
				358
				359	/* Backreferences and subroutine calls are treated in the same way: we find
				360	the minimum length for the subpattern. A recursion, however, causes an
				361	a flag to be set that causes the length of this branch to be ignored. The
				362	logic is that a recursion can only make sense if there is another
				363	alternation that stops the recursing. That will provide the minimum length
				364	(when no recursion happens). A backreference within the group that it is
				365	referencing behaves in the same way.
				366
				367	If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
				368	matches an empty string (by default it causes a matching failure), so in
				369	that case we must set the minimum length to zero. */
				370
				371	case OP_REF:
				372	case OP_REFI:
				373	if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
				374	{
				375	ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
				376	if (cs == NULL) return -2;
				377	do ce += GET(ce, 1); while (*ce == OP_ALT);
				378	if (cc > cs && cc < ce)
				379	{
				380	d = 0;
				381	had_recurse = TRUE;
				382	}
				383	else
				384	{
				385	d = find_minlength(cs, startcode, options, recurse_depth);
				386	}
				387	}
				388	else d = 0;
				389	cc += 3;
				390
				391	/* Handle repeated back references */
				392
				393	switch (*cc)
				394	{
				395	case OP_CRSTAR:
				396	case OP_CRMINSTAR:
				397	case OP_CRQUERY:
				398	case OP_CRMINQUERY:
				399	min = 0;
				400	cc++;
				401	break;
				402
				403	case OP_CRPLUS:
				404	case OP_CRMINPLUS:
				405	min = 1;
				406	cc++;
				407	break;
				408
				409	case OP_CRRANGE:
				410	case OP_CRMINRANGE:
				411	min = GET2(cc, 1);
				412	cc += 5;
				413	break;
				414
				415	default:
				416	min = 1;
				417	break;
				418	}
				419
				420	branchlength += min * d;
				421	break;
				422
				423	/* We can easily detect direct recursion, but not mutual recursion. This is
				424	caught by a recursion depth count. */
				425
				426	case OP_RECURSE:
				427	cs = ce = (uschar *)startcode + GET(cc, 1);
				428	do ce += GET(ce, 1); while (*ce == OP_ALT);
				429	if ((cc > cs && cc < ce) \|\| recurse_depth > 10)
				430	had_recurse = TRUE;
				431	else
				432	{
				433	branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
				434	}
				435	cc += 1 + LINK_SIZE;
				436	break;
				437
				438	/* Anything else does not or need not match a character. We can get the
				439	item's length from the table, but for those that can match zero occurrences
				440	of a character, we must take special action for UTF-8 characters. As it
				441	happens, the "NOT" versions of these opcodes are used at present only for
				442	ASCII characters, so they could be omitted from this list. However, in
				443	future that may change, so we include them here so as not to leave a
				444	gotcha for a future maintainer. */
				445
				446	case OP_UPTO:
				447	case OP_UPTOI:
				448	case OP_NOTUPTO:
				449	case OP_NOTUPTOI:
				450	case OP_MINUPTO:
				451	case OP_MINUPTOI:
				452	case OP_NOTMINUPTO:
				453	case OP_NOTMINUPTOI:
				454	case OP_POSUPTO:
				455	case OP_POSUPTOI:
				456	case OP_NOTPOSUPTO:
				457	case OP_NOTPOSUPTOI:
				458
				459	case OP_STAR:
				460	case OP_STARI:
				461	case OP_NOTSTAR:
				462	case OP_NOTSTARI:
				463	case OP_MINSTAR:
				464	case OP_MINSTARI:
				465	case OP_NOTMINSTAR:
				466	case OP_NOTMINSTARI:
				467	case OP_POSSTAR:
				468	case OP_POSSTARI:
				469	case OP_NOTPOSSTAR:
				470	case OP_NOTPOSSTARI:
				471
				472	case OP_QUERY:
				473	case OP_QUERYI:
				474	case OP_NOTQUERY:
				475	case OP_NOTQUERYI:
				476	case OP_MINQUERY:
				477	case OP_MINQUERYI:
				478	case OP_NOTMINQUERY:
				479	case OP_NOTMINQUERYI:
				480	case OP_POSQUERY:
				481	case OP_POSQUERYI:
				482	case OP_NOTPOSQUERY:
				483	case OP_NOTPOSQUERYI:
				484
				485	cc += _pcre_OP_lengths[op];
				486	#ifdef SUPPORT_UTF8
				487	if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
				488	#endif
				489	break;
				490
				491	/* Skip these, but we need to add in the name length. */
				492
				493	case OP_MARK:
				494	case OP_PRUNE_ARG:
				495	case OP_SKIP_ARG:
				496	case OP_THEN_ARG:
				497	cc += _pcre_OP_lengths[op] + cc[1];
				498	break;
				499
				500	/* The remaining opcodes are just skipped over. */
				501
				502	case OP_CLOSE:
				503	case OP_COMMIT:
				504	case OP_FAIL:
				505	case OP_PRUNE:
				506	case OP_SET_SOM:
				507	case OP_SKIP:
				508	case OP_THEN:
				509	cc += _pcre_OP_lengths[op];
				510	break;
				511
				512	/* This should not occur: we list all opcodes explicitly so that when
				513	new ones get added they are properly considered. */
				514
				515	default:
				516	return -3;
				517	}
				518	}
				519	/* Control never gets here */
				520	}
				521
				522
				523
				524	/*************************************************
				525	* Set a bit and maybe its alternate case *
				526	*************************************************/
				527
				528	/* Given a character, set its first byte's bit in the table, and also the
				529	corresponding bit for the other version of a letter if we are caseless. In
				530	UTF-8 mode, for characters greater than 127, we can only do the caseless thing
				531	when Unicode property support is available.
				532
				533	Arguments:
				534	start_bits points to the bit map
				535	p points to the character
				536	caseless the caseless flag
				537	cd the block with char table pointers
				538	utf8 TRUE for UTF-8 mode
				539
				540	Returns: pointer after the character
				541	*/
				542
				543	static const uschar *
				544	set_table_bit(uschar start_bits, const uschar p, BOOL caseless,
				545	compile_data *cd, BOOL utf8)
				546	{
				547	unsigned int c = *p;
				548
				549	SET_BIT(c);
				550
				551	#ifdef SUPPORT_UTF8
				552	if (utf8 && c > 127)
				553	{
				554	GETCHARINC(c, p);
				555	#ifdef SUPPORT_UCP
				556	if (caseless)
				557	{
				558	uschar buff[8];
				559	c = UCD_OTHERCASE(c);
				560	(void)_pcre_ord2utf8(c, buff);
				561	SET_BIT(buff[0]);
				562	}
				563	#endif
				564	return p;
				565	}
				566	#endif
				567
				568	/* Not UTF-8 mode, or character is less than 127. */
				569
				570	if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
				571	return p + 1;
				572	}
				573
				574
				575
				576	/*************************************************
				577	* Set bits for a positive character type *
				578	*************************************************/
				579
				580	/* This function sets starting bits for a character type. In UTF-8 mode, we can
				581	only do a direct setting for bytes less than 128, as otherwise there can be
				582	confusion with bytes in the middle of UTF-8 characters. In a "traditional"
				583	environment, the tables will only recognize ASCII characters anyway, but in at
				584	least one Windows environment, some higher bytes bits were set in the tables.
				585	So we deal with that case by considering the UTF-8 encoding.
				586
				587	Arguments:
				588	start_bits the starting bitmap
				589	cbit type the type of character wanted
				590	table_limit 32 for non-UTF-8; 16 for UTF-8
				591	cd the block with char table pointers
				592
				593	Returns: nothing
				594	*/
				595
				596	static void
				597	set_type_bits(uschar *start_bits, int cbit_type, int table_limit,
				598	compile_data *cd)
				599	{
				600	register int c;
				601	for (c = 0; c < table_limit; c++) start_bits[c] \|= cd->cbits[c+cbit_type];
				602	if (table_limit == 32) return;
				603	for (c = 128; c < 256; c++)
				604	{
				605	if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
				606	{
				607	uschar buff[8];
				608	(void)_pcre_ord2utf8(c, buff);
				609	SET_BIT(buff[0]);
				610	}
				611	}
				612	}
				613
				614
				615	/*************************************************
				616	* Set bits for a negative character type *
				617	*************************************************/
				618
				619	/* This function sets starting bits for a negative character type such as \D.
				620	In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
				621	otherwise there can be confusion with bytes in the middle of UTF-8 characters.
				622	Unlike in the positive case, where we can set appropriate starting bits for
				623	specific high-valued UTF-8 characters, in this case we have to set the bits for
				624	all high-valued characters. The lowest is 0xc2, but we overkill by starting at
				625	0xc0 (192) for simplicity.
				626
				627	Arguments:
				628	start_bits the starting bitmap
				629	cbit type the type of character wanted
				630	table_limit 32 for non-UTF-8; 16 for UTF-8
				631	cd the block with char table pointers
				632
				633	Returns: nothing
				634	*/
				635
				636	static void
				637	set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,
				638	compile_data *cd)
				639	{
				640	register int c;
				641	for (c = 0; c < table_limit; c++) start_bits[c] \|= ~cd->cbits[c+cbit_type];
				642	if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
				643	}
				644
				645
				646
				647	/*************************************************
				648	* Create bitmap of starting bytes *
				649	*************************************************/
				650
				651	/* This function scans a compiled unanchored expression recursively and
				652	attempts to build a bitmap of the set of possible starting bytes. As time goes
				653	by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
				654	useful for parenthesized groups in patterns such as (a*)b where the group
				655	provides some optional starting bytes but scanning must continue at the outer
				656	level to find at least one mandatory byte. At the outermost level, this
				657	function fails unless the result is SSB_DONE.
				658
				659	Arguments:
				660	code points to an expression
				661	start_bits points to a 32-byte table, initialized to 0
				662	utf8 TRUE if in UTF-8 mode
				663	cd the block with char table pointers
				664
				665	Returns: SSB_FAIL => Failed to find any starting bytes
				666	SSB_DONE => Found mandatory starting bytes
				667	SSB_CONTINUE => Found optional starting bytes
				668	SSB_UNKNOWN => Hit an unrecognized opcode
				669	*/
				670
				671	static int
				672	set_start_bits(const uschar code, uschar start_bits, BOOL utf8,
				673	compile_data *cd)
				674	{
				675	register int c;
				676	int yield = SSB_DONE;
				677	int table_limit = utf8? 16:32;
				678
				679	#if 0
				680	/* ========================================================================= */
				681	/* The following comment and code was inserted in January 1999. In May 2006,
				682	when it was observed to cause compiler warnings about unused values, I took it
				683	out again. If anybody is still using OS/2, they will have to put it back
				684	manually. */
				685
				686	/* This next statement and the later reference to dummy are here in order to
				687	trick the optimizer of the IBM C compiler for OS/2 into generating correct
				688	code. Apparently IBM isn't going to fix the problem, and we would rather not
				689	disable optimization (in this module it actually makes a big difference, and
				690	the pcre module can use all the optimization it can get). */
				691
				692	volatile int dummy;
				693	/* ========================================================================= */
				694	#endif
				695
				696	do
				697	{
				698	BOOL try_next = TRUE;
				699	const uschar *tcode = code + 1 + LINK_SIZE;
				700
				701	if (code == OP_CBRA \|\| code == OP_SCBRA \|\|
				702	code == OP_CBRAPOS \|\| code == OP_SCBRAPOS) tcode += 2;
				703
				704	while (try_next) /* Loop for items in this branch */
				705	{
				706	int rc;
				707
				708	switch(*tcode)
				709	{
				710	/* If we reach something we don't understand, it means a new opcode has
				711	been created that hasn't been added to this code. Hopefully this problem
				712	will be discovered during testing. */
				713
				714	default:
				715	return SSB_UNKNOWN;
				716
				717	/* Fail for a valid opcode that implies no starting bits. */
				718
				719	case OP_ACCEPT:
				720	case OP_ASSERT_ACCEPT:
				721	case OP_ALLANY:
				722	case OP_ANY:
				723	case OP_ANYBYTE:
				724	case OP_CIRC:
				725	case OP_CIRCM:
				726	case OP_CLOSE:
				727	case OP_COMMIT:
				728	case OP_COND:
				729	case OP_CREF:
				730	case OP_DEF:
				731	case OP_DOLL:
				732	case OP_DOLLM:
				733	case OP_END:
				734	case OP_EOD:
				735	case OP_EODN:
				736	case OP_EXTUNI:
				737	case OP_FAIL:
				738	case OP_MARK:
				739	case OP_NCREF:
				740	case OP_NOT:
				741	case OP_NOTEXACT:
				742	case OP_NOTEXACTI:
				743	case OP_NOTI:
				744	case OP_NOTMINPLUS:
				745	case OP_NOTMINPLUSI:
				746	case OP_NOTMINQUERY:
				747	case OP_NOTMINQUERYI:
				748	case OP_NOTMINSTAR:
				749	case OP_NOTMINSTARI:
				750	case OP_NOTMINUPTO:
				751	case OP_NOTMINUPTOI:
				752	case OP_NOTPLUS:
				753	case OP_NOTPLUSI:
				754	case OP_NOTPOSPLUS:
				755	case OP_NOTPOSPLUSI:
				756	case OP_NOTPOSQUERY:
				757	case OP_NOTPOSQUERYI:
				758	case OP_NOTPOSSTAR:
				759	case OP_NOTPOSSTARI:
				760	case OP_NOTPOSUPTO:
				761	case OP_NOTPOSUPTOI:
				762	case OP_NOTPROP:
				763	case OP_NOTQUERY:
				764	case OP_NOTQUERYI:
				765	case OP_NOTSTAR:
				766	case OP_NOTSTARI:
				767	case OP_NOTUPTO:
				768	case OP_NOTUPTOI:
				769	case OP_NOT_HSPACE:
				770	case OP_NOT_VSPACE:
				771	case OP_NRREF:
				772	case OP_PROP:
				773	case OP_PRUNE:
				774	case OP_PRUNE_ARG:
				775	case OP_RECURSE:
				776	case OP_REF:
				777	case OP_REFI:
				778	case OP_REVERSE:
				779	case OP_RREF:
				780	case OP_SCOND:
				781	case OP_SET_SOM:
				782	case OP_SKIP:
				783	case OP_SKIP_ARG:
				784	case OP_SOD:
				785	case OP_SOM:
				786	case OP_THEN:
				787	case OP_THEN_ARG:
				788	case OP_XCLASS:
				789	return SSB_FAIL;
				790
				791	/* We can ignore word boundary tests. */
				792
				793	case OP_WORD_BOUNDARY:
				794	case OP_NOT_WORD_BOUNDARY:
				795	tcode++;
				796	break;
				797
				798	/* If we hit a bracket or a positive lookahead assertion, recurse to set
				799	bits from within the subpattern. If it can't find anything, we have to
				800	give up. If it finds some mandatory character(s), we are done for this
				801	branch. Otherwise, carry on scanning after the subpattern. */
				802
				803	case OP_BRA:
				804	case OP_SBRA:
				805	case OP_CBRA:
				806	case OP_SCBRA:
				807	case OP_BRAPOS:
				808	case OP_SBRAPOS:
				809	case OP_CBRAPOS:
				810	case OP_SCBRAPOS:
				811	case OP_ONCE:
				812	case OP_ONCE_NC:
				813	case OP_ASSERT:
				814	rc = set_start_bits(tcode, start_bits, utf8, cd);
				815	if (rc == SSB_FAIL \|\| rc == SSB_UNKNOWN) return rc;
				816	if (rc == SSB_DONE) try_next = FALSE; else
				817	{
				818	do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
				819	tcode += 1 + LINK_SIZE;
				820	}
				821	break;
				822
				823	/* If we hit ALT or KET, it means we haven't found anything mandatory in
				824	this branch, though we might have found something optional. For ALT, we
				825	continue with the next alternative, but we have to arrange that the final
				826	result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
				827	return SSB_CONTINUE: if this is the top level, that indicates failure,
				828	but after a nested subpattern, it causes scanning to continue. */
				829
				830	case OP_ALT:
				831	yield = SSB_CONTINUE;
				832	try_next = FALSE;
				833	break;
				834
				835	case OP_KET:
				836	case OP_KETRMAX:
				837	case OP_KETRMIN:
				838	case OP_KETRPOS:
				839	return SSB_CONTINUE;
				840
				841	/* Skip over callout */
				842
				843	case OP_CALLOUT:
				844	tcode += 2 + 2*LINK_SIZE;
				845	break;
				846
				847	/* Skip over lookbehind and negative lookahead assertions */
				848
				849	case OP_ASSERT_NOT:
				850	case OP_ASSERTBACK:
				851	case OP_ASSERTBACK_NOT:
				852	do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
				853	tcode += 1 + LINK_SIZE;
				854	break;
				855
				856	/* BRAZERO does the bracket, but carries on. */
				857
				858	case OP_BRAZERO:
				859	case OP_BRAMINZERO:
				860	case OP_BRAPOSZERO:
				861	rc = set_start_bits(++tcode, start_bits, utf8, cd);
				862	if (rc == SSB_FAIL \|\| rc == SSB_UNKNOWN) return rc;
				863	/* =========================================================================
				864	See the comment at the head of this function concerning the next line,
				865	which was an old fudge for the benefit of OS/2.
				866	dummy = 1;
				867	========================================================================= */
				868	do tcode += GET(tcode,1); while (*tcode == OP_ALT);
				869	tcode += 1 + LINK_SIZE;
				870	break;
				871
				872	/* SKIPZERO skips the bracket. */
				873
				874	case OP_SKIPZERO:
				875	tcode++;
				876	do tcode += GET(tcode,1); while (*tcode == OP_ALT);
				877	tcode += 1 + LINK_SIZE;
				878	break;
				879
				880	/* Single-char * or ? sets the bit and tries the next item */
				881
				882	case OP_STAR:
				883	case OP_MINSTAR:
				884	case OP_POSSTAR:
				885	case OP_QUERY:
				886	case OP_MINQUERY:
				887	case OP_POSQUERY:
				888	tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
				889	break;
				890
				891	case OP_STARI:
				892	case OP_MINSTARI:
				893	case OP_POSSTARI:
				894	case OP_QUERYI:
				895	case OP_MINQUERYI:
				896	case OP_POSQUERYI:
				897	tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
				898	break;
				899
				900	/* Single-char upto sets the bit and tries the next */
				901
				902	case OP_UPTO:
				903	case OP_MINUPTO:
				904	case OP_POSUPTO:
				905	tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);
				906	break;
				907
				908	case OP_UPTOI:
				909	case OP_MINUPTOI:
				910	case OP_POSUPTOI:
				911	tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);
				912	break;
				913
				914	/* At least one single char sets the bit and stops */
				915
				916	case OP_EXACT:
				917	tcode += 2;
				918	/* Fall through */
				919	case OP_CHAR:
				920	case OP_PLUS:
				921	case OP_MINPLUS:
				922	case OP_POSPLUS:
				923	(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
				924	try_next = FALSE;
				925	break;
				926
				927	case OP_EXACTI:
				928	tcode += 2;
				929	/* Fall through */
				930	case OP_CHARI:
				931	case OP_PLUSI:
				932	case OP_MINPLUSI:
				933	case OP_POSPLUSI:
				934	(void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);
				935	try_next = FALSE;
				936	break;
				937
				938	/* Special spacing and line-terminating items. These recognize specific
				939	lists of characters. The difference between VSPACE and ANYNL is that the
				940	latter can match the two-character CRLF sequence, but that is not
				941	relevant for finding the first character, so their code here is
				942	identical. */
				943
				944	case OP_HSPACE:
				945	SET_BIT(0x09);
				946	SET_BIT(0x20);
				947	if (utf8)
				948	{
				949	SET_BIT(0xC2); /* For U+00A0 */
				950	SET_BIT(0xE1); /* For U+1680, U+180E */
				951	SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
				952	SET_BIT(0xE3); /* For U+3000 */
				953	}
				954	else SET_BIT(0xA0);
				955	try_next = FALSE;
				956	break;
				957
				958	case OP_ANYNL:
				959	case OP_VSPACE:
				960	SET_BIT(0x0A);
				961	SET_BIT(0x0B);
				962	SET_BIT(0x0C);
				963	SET_BIT(0x0D);
				964	if (utf8)
				965	{
				966	SET_BIT(0xC2); /* For U+0085 */
				967	SET_BIT(0xE2); /* For U+2028, U+2029 */
				968	}
				969	else SET_BIT(0x85);
				970	try_next = FALSE;
				971	break;
				972
				973	/* Single character types set the bits and stop. Note that if PCRE_UCP
				974	is set, we do not see these op codes because \d etc are converted to
				975	properties. Therefore, these apply in the case when only characters less
				976	than 256 are recognized to match the types. */
				977
				978	case OP_NOT_DIGIT:
				979	set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
				980	try_next = FALSE;
				981	break;
				982
				983	case OP_DIGIT:
				984	set_type_bits(start_bits, cbit_digit, table_limit, cd);
				985	try_next = FALSE;
				986	break;
				987
				988	/* The cbit_space table has vertical tab as whitespace; we have to
				989	ensure it is set as not whitespace. */
				990
				991	case OP_NOT_WHITESPACE:
				992	set_nottype_bits(start_bits, cbit_space, table_limit, cd);
				993	start_bits[1] \|= 0x08;
				994	try_next = FALSE;
				995	break;
				996
				997	/* The cbit_space table has vertical tab as whitespace; we have to
				998	not set it from the table. */
				999
				1000	case OP_WHITESPACE:
				1001	c = start_bits[1]; /* Save in case it was already set */
				1002	set_type_bits(start_bits, cbit_space, table_limit, cd);
				1003	start_bits[1] = (start_bits[1] & ~0x08) \| c;
				1004	try_next = FALSE;
				1005	break;
				1006
				1007	case OP_NOT_WORDCHAR:
				1008	set_nottype_bits(start_bits, cbit_word, table_limit, cd);
				1009	try_next = FALSE;
				1010	break;
				1011
				1012	case OP_WORDCHAR:
				1013	set_type_bits(start_bits, cbit_word, table_limit, cd);
				1014	try_next = FALSE;
				1015	break;
				1016
				1017	/* One or more character type fudges the pointer and restarts, knowing
				1018	it will hit a single character type and stop there. */
				1019
				1020	case OP_TYPEPLUS:
				1021	case OP_TYPEMINPLUS:
				1022	case OP_TYPEPOSPLUS:
				1023	tcode++;
				1024	break;
				1025
				1026	case OP_TYPEEXACT:
				1027	tcode += 3;
				1028	break;
				1029
				1030	/* Zero or more repeats of character types set the bits and then
				1031	try again. */
				1032
				1033	case OP_TYPEUPTO:
				1034	case OP_TYPEMINUPTO:
				1035	case OP_TYPEPOSUPTO:
				1036	tcode += 2; /* Fall through */
				1037
				1038	case OP_TYPESTAR:
				1039	case OP_TYPEMINSTAR:
				1040	case OP_TYPEPOSSTAR:
				1041	case OP_TYPEQUERY:
				1042	case OP_TYPEMINQUERY:
				1043	case OP_TYPEPOSQUERY:
				1044	switch(tcode[1])
				1045	{
				1046	default:
				1047	case OP_ANY:
				1048	case OP_ALLANY:
				1049	return SSB_FAIL;
				1050
				1051	case OP_HSPACE:
				1052	SET_BIT(0x09);
				1053	SET_BIT(0x20);
				1054	if (utf8)
				1055	{
				1056	SET_BIT(0xC2); /* For U+00A0 */
				1057	SET_BIT(0xE1); /* For U+1680, U+180E */
				1058	SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */
				1059	SET_BIT(0xE3); /* For U+3000 */
				1060	}
				1061	else SET_BIT(0xA0);
				1062	break;
				1063
				1064	case OP_ANYNL:
				1065	case OP_VSPACE:
				1066	SET_BIT(0x0A);
				1067	SET_BIT(0x0B);
				1068	SET_BIT(0x0C);
				1069	SET_BIT(0x0D);
				1070	if (utf8)
				1071	{
				1072	SET_BIT(0xC2); /* For U+0085 */
				1073	SET_BIT(0xE2); /* For U+2028, U+2029 */
				1074	}
				1075	else SET_BIT(0x85);
				1076	break;
				1077
				1078	case OP_NOT_DIGIT:
				1079	set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
				1080	break;
				1081
				1082	case OP_DIGIT:
				1083	set_type_bits(start_bits, cbit_digit, table_limit, cd);
				1084	break;
				1085
				1086	/* The cbit_space table has vertical tab as whitespace; we have to
				1087	ensure it gets set as not whitespace. */
				1088
				1089	case OP_NOT_WHITESPACE:
				1090	set_nottype_bits(start_bits, cbit_space, table_limit, cd);
				1091	start_bits[1] \|= 0x08;
				1092	break;
				1093
				1094	/* The cbit_space table has vertical tab as whitespace; we have to
				1095	avoid setting it. */
				1096
				1097	case OP_WHITESPACE:
				1098	c = start_bits[1]; /* Save in case it was already set */
				1099	set_type_bits(start_bits, cbit_space, table_limit, cd);
				1100	start_bits[1] = (start_bits[1] & ~0x08) \| c;
				1101	break;
				1102
				1103	case OP_NOT_WORDCHAR:
				1104	set_nottype_bits(start_bits, cbit_word, table_limit, cd);
				1105	break;
				1106
				1107	case OP_WORDCHAR:
				1108	set_type_bits(start_bits, cbit_word, table_limit, cd);
				1109	break;
				1110	}
				1111
				1112	tcode += 2;
				1113	break;
				1114
				1115	/* Character class where all the information is in a bit map: set the
				1116	bits and either carry on or not, according to the repeat count. If it was
				1117	a negative class, and we are operating with UTF-8 characters, any byte
				1118	with a value >= 0xc4 is a potentially valid starter because it starts a
				1119	character with a value > 255. */
				1120
				1121	case OP_NCLASS:
				1122	#ifdef SUPPORT_UTF8
				1123	if (utf8)
				1124	{
				1125	start_bits[24] \|= 0xf0; /* Bits for 0xc4 - 0xc8 */
				1126	memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
				1127	}
				1128	#endif
				1129	/* Fall through */
				1130
				1131	case OP_CLASS:
				1132	{
				1133	tcode++;
				1134
				1135	/* In UTF-8 mode, the bits in a bit map correspond to character
				1136	values, not to byte values. However, the bit map we are constructing is
				1137	for byte values. So we have to do a conversion for characters whose
				1138	value is > 127. In fact, there are only two possible starting bytes for
				1139	characters in the range 128 - 255. */
				1140
				1141	#ifdef SUPPORT_UTF8
				1142	if (utf8)
				1143	{
				1144	for (c = 0; c < 16; c++) start_bits[c] \|= tcode[c];
				1145	for (c = 128; c < 256; c++)
				1146	{
				1147	if ((tcode[c/8] && (1 << (c&7))) != 0)
				1148	{
				1149	int d = (c >> 6) \| 0xc0; /* Set bit for this starter */
				1150	start_bits[d/8] \|= (1 << (d&7)); /* and then skip on to the */
				1151	c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
				1152	}
				1153	}
				1154	}
				1155
				1156	/* In non-UTF-8 mode, the two bit maps are completely compatible. */
				1157
				1158	else
				1159	#endif
				1160	{
				1161	for (c = 0; c < 32; c++) start_bits[c] \|= tcode[c];
				1162	}
				1163
				1164	/* Advance past the bit map, and act on what follows. For a zero
				1165	minimum repeat, continue; otherwise stop processing. */
				1166
				1167	tcode += 32;
				1168	switch (*tcode)
				1169	{
				1170	case OP_CRSTAR:
				1171	case OP_CRMINSTAR:
				1172	case OP_CRQUERY:
				1173	case OP_CRMINQUERY:
				1174	tcode++;
				1175	break;
				1176
				1177	case OP_CRRANGE:
				1178	case OP_CRMINRANGE:
				1179	if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
				1180	else try_next = FALSE;
				1181	break;
				1182
				1183	default:
				1184	try_next = FALSE;
				1185	break;
				1186	}
				1187	}
				1188	break; /* End of bitmap class handling */
				1189
				1190	} /* End of switch */
				1191	} /* End of try_next loop */
				1192
				1193	code += GET(code, 1); /* Advance to next branch */
				1194	}
				1195	while (*code == OP_ALT);
				1196	return yield;
				1197	}
				1198
				1199
				1200
				1201
				1202
				1203	/*************************************************
				1204	* Study a compiled expression *
				1205	*************************************************/
				1206
				1207	/* This function is handed a compiled expression that it must study to produce
				1208	information that will speed up the matching. It returns a pcre_extra block
				1209	which then gets handed back to pcre_exec().
				1210
				1211	Arguments:
				1212	re points to the compiled expression
				1213	options contains option bits
				1214	errorptr points to where to place error messages;
				1215	set NULL unless error
				1216
				1217	Returns: pointer to a pcre_extra block, with study_data filled in and the
				1218	appropriate flags set;
				1219	NULL on error or if no optimization possible
				1220	*/
				1221
				1222	PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
				1223	pcre_study(const pcre external_re, int options, const char *errorptr)
				1224	{
				1225	int min;
				1226	BOOL bits_set = FALSE;
				1227	uschar start_bits[32];
				1228	pcre_extra *extra = NULL;
				1229	pcre_study_data *study;
				1230	const uschar *tables;
				1231	uschar *code;
				1232	compile_data compile_block;
				1233	const real_pcre re = (const real_pcre )external_re;
				1234
				1235	*errorptr = NULL;
				1236
				1237	if (re == NULL \|\| re->magic_number != MAGIC_NUMBER)
				1238	{
				1239	*errorptr = "argument is not a compiled regular expression";
				1240	return NULL;
				1241	}
				1242
				1243	if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
				1244	{
				1245	*errorptr = "unknown or incorrect option bit(s) set";
				1246	return NULL;
				1247	}
				1248
				1249	code = (uschar *)re + re->name_table_offset +
				1250	(re->name_count * re->name_entry_size);
				1251
				1252	/* For an anchored pattern, or an unanchored pattern that has a first char, or
				1253	a multiline pattern that matches only at "line starts", there is no point in
				1254	seeking a list of starting bytes. */
				1255
				1256	if ((re->options & PCRE_ANCHORED) == 0 &&
				1257	(re->flags & (PCRE_FIRSTSET\|PCRE_STARTLINE)) == 0)
				1258	{
				1259	int rc;
				1260
				1261	/* Set the character tables in the block that is passed around */
				1262
				1263	tables = re->tables;
				1264	if (tables == NULL)
				1265	(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
				1266	(void *)(&tables));
				1267
				1268	compile_block.lcc = tables + lcc_offset;
				1269	compile_block.fcc = tables + fcc_offset;
				1270	compile_block.cbits = tables + cbits_offset;
				1271	compile_block.ctypes = tables + ctypes_offset;
				1272
				1273	/* See if we can find a fixed set of initial characters for the pattern. */
				1274
				1275	memset(start_bits, 0, 32 * sizeof(uschar));
				1276	rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
				1277	&compile_block);
				1278	bits_set = rc == SSB_DONE;
				1279	if (rc == SSB_UNKNOWN)
				1280	{
				1281	*errorptr = "internal error: opcode not recognized";
				1282	return NULL;
				1283	}
				1284	}
				1285
				1286	/* Find the minimum length of subject string. */
				1287
				1288	switch(min = find_minlength(code, code, re->options, 0))
				1289	{
				1290	case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
				1291	case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
				1292	default: break;
				1293	}
				1294
				1295	/* If a set of starting bytes has been identified, or if the minimum length is
				1296	greater than zero, or if JIT optimization has been requested, get a pcre_extra
				1297	block and a pcre_study_data block. The study data is put in the latter, which
				1298	is pointed to by the former, which may also get additional data set later by
				1299	the calling program. At the moment, the size of pcre_study_data is fixed. We
				1300	nevertheless save it in a field for returning via the pcre_fullinfo() function
				1301	so that if it becomes variable in the future, we don't have to change that
				1302	code. */
				1303
				1304	if (bits_set \|\| min > 0
				1305	#ifdef SUPPORT_JIT
				1306	\|\| (options & PCRE_STUDY_JIT_COMPILE) != 0
				1307	#endif
				1308	)
				1309	{
				1310	extra = (pcre_extra *)(pcre_malloc)
				1311	(sizeof(pcre_extra) + sizeof(pcre_study_data));
				1312	if (extra == NULL)
				1313	{
				1314	*errorptr = "failed to get memory";
				1315	return NULL;
				1316	}
				1317
				1318	study = (pcre_study_data )((char )extra + sizeof(pcre_extra));
				1319	extra->flags = PCRE_EXTRA_STUDY_DATA;
				1320	extra->study_data = study;
				1321
				1322	study->size = sizeof(pcre_study_data);
				1323	study->flags = 0;
				1324
				1325	/* Set the start bits always, to avoid unset memory errors if the
				1326	study data is written to a file, but set the flag only if any of the bits
				1327	are set, to save time looking when none are. */
				1328
				1329	if (bits_set)
				1330	{
				1331	study->flags \|= PCRE_STUDY_MAPPED;
				1332	memcpy(study->start_bits, start_bits, sizeof(start_bits));
				1333	}
				1334	else memset(study->start_bits, 0, 32 * sizeof(uschar));
				1335
				1336	/* Always set the minlength value in the block, because the JIT compiler
				1337	makes use of it. However, don't set the bit unless the length is greater than
				1338	zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
				1339	checking the zero case. */
				1340
				1341	if (min > 0)
				1342	{
				1343	study->flags \|= PCRE_STUDY_MINLEN;
				1344	study->minlength = min;
				1345	}
				1346	else study->minlength = 0;
				1347
				1348	/* If JIT support was compiled and requested, attempt the JIT compilation.
				1349	If no starting bytes were found, and the minimum length is zero, and JIT
				1350	compilation fails, abandon the extra block and return NULL. */
				1351
				1352	#ifdef SUPPORT_JIT
				1353	extra->executable_jit = NULL;
				1354	if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra);
				1355	if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0)
				1356	{
				1357	pcre_free_study(extra);
				1358	extra = NULL;
				1359	}
				1360	#endif
				1361	}
				1362
				1363	return extra;
				1364	}
				1365
				1366
				1367	/*************************************************
				1368	* Free the study data *
				1369	*************************************************/
				1370
				1371	/* This function frees the memory that was obtained by pcre_study().
				1372
				1373	Argument: a pointer to the pcre_extra block
				1374	Returns: nothing
				1375	*/
				1376
				1377	PCRE_EXP_DEFN void
				1378	pcre_free_study(pcre_extra *extra)
				1379	{
				1380	#ifdef SUPPORT_JIT
				1381	if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
				1382	extra->executable_jit != NULL)
				1383	_pcre_jit_free(extra->executable_jit);
				1384	#endif
				1385	pcre_free(extra);
				1386	}
				1387
				1388	/* End of pcre_study.c */