blob: 8a03272c8d631f699d9bfb4599a992d3fae076c5 [file] [log] [blame]
Tristan Matthews04616462013-11-14 16:09:34 -05001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14-----------------------------------------------------------------------------
15Redistribution and use in source and binary forms, with or without
16modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39POSSIBILITY OF SUCH DAMAGE.
40-----------------------------------------------------------------------------
41*/
42
43#ifdef HAVE_CONFIG_H
44#include "config.h"
45#endif
46
47#include <stdio.h>
48#include <string.h>
49#include "pcre.h"
50
51#define PCRE_BUG 0x80000000
52
53/*
54 Hungarian utf8 characters
55 \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
56 \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
57 \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
58 \xc2\x85 = 0x85 (NExt Line = NEL)
59 \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
60 \xe2\x80\xa8 = 0x2028 (Line Separator)
61 \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
62 \xcc\x8d = 781 (Something with Mark property)
63*/
64
65static void setstack(pcre_extra *extra);
66static int regression_tests(void);
67
68int main(void)
69{
70 int jit = 0;
71 pcre_config(PCRE_CONFIG_JIT, &jit);
72 if (!jit) {
73 printf("JIT must be enabled to run pcre_jit_test\n");
74 return 1;
75 }
76 return regression_tests();
77}
78
79static pcre_jit_stack* callback(void *arg)
80{
81 return (pcre_jit_stack *)arg;
82}
83
84static void setstack(pcre_extra *extra)
85{
86 static pcre_jit_stack *stack;
87 if (stack) pcre_jit_stack_free(stack);
88 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
89 pcre_assign_jit_stack(extra, callback, stack);
90}
91
92/* --------------------------------------------------------------------------------------- */
93
94#define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
95#define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
96#define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
97#define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
98#define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
99#define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
100#define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
101
102struct regression_test_case {
103 int flags;
104 int start_offset;
105 const char *pattern;
106 const char *input;
107};
108
109static struct regression_test_case regression_test_cases[] = {
110 /* Constant strings. */
111 { MUA, 0, "AbC", "AbAbC" },
112 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
113 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
114 { MA, 0, "[^a]", "aAbB" },
115 { CMA, 0, "[^m]", "mMnN" },
116 { MA, 0, "a[^b][^#]", "abacd" },
117 { CMA, 0, "A[^B][^E]", "abacd" },
118 { CMUA, 0, "[^x][^#]", "XxBll" },
119 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
120 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
121 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
122 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
123 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
124 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
125 { MUA, 0, "[axd]", "sAXd" },
126 { CMUA, 0, "[axd]", "sAXd" },
127 { CMUA, 0, "[^axd]", "DxA" },
128 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
129 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
130 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
131 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
132 { MUA, 0, "[^a]", "\xc2\x80[]" },
133 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
134 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
135 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
136 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
137 { PCRE_CASELESS, 0, "a1", "Aa1" },
138 { MA, 0, "\\Ca", "cda" },
139 { CMA, 0, "\\Ca", "CDA" },
140 { MA, 0, "\\Cx", "cda" },
141 { CMA, 0, "\\Cx", "CDA" },
142
143 /* Assertions. */
144 { MUA, 0, "\\b[^A]", "A_B#" },
145 { MA, 0, "\\b\\W", "\n*" },
146 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
147 { MAP, 0, "\\B", "_\xa1" },
148 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
149 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
150 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
151 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
152 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
153 { MUA, 0, "\\b.", "\xcd\xbe" },
154 { MA, 0, "\\R^", "\n" },
155 { MA, 1, "^", "\n" },
156 { 0, 0, "^ab", "ab" },
157 { 0, 0, "^ab", "aab" },
158 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
159 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
160 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
161 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
162 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
163 { 0, 0, "ab$", "ab" },
164 { 0, 0, "ab$", "ab\r\n" },
165 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
166 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
167 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
168 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
169 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
170 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
171 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
172 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
173 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
174 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
175 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
176 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
177 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
178 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
179 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
180 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
181 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
182 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
183 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
184 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
185 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
186 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
187 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
188 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
189 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
190 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
191 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
192 { MA, 0, "\\Aa", "aaa" },
193 { MA, 1, "\\Aa", "aaa" },
194 { MA, 1, "\\Ga", "aaa" },
195 { MA, 1, "\\Ga", "aba" },
196 { MA, 0, "a\\z", "aaa" },
197 { MA, 0, "a\\z", "aab" },
198
199 /* Brackets. */
200 { MUA, 0, "(ab|bb|cd)", "bacde" },
201 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
202 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
203 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
204 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
205 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
206
207 /* Greedy and non-greedy ? operators. */
208 { MUA, 0, "(?:a)?a", "laab" },
209 { CMUA, 0, "(A)?A", "llaab" },
210 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
211 { MUA, 0, "(a)?a", "manm" },
212 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
213 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
214 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
215
216 /* Greedy and non-greedy + operators */
217 { MUA, 0, "(aa)+aa", "aaaaaaa" },
218 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
219 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
220 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
221 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
222 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
223 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
224
225 /* Greedy and non-greedy * operators */
226 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
227 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
228 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
229 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
230 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
231 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
232 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
233 { MA, 0, "((?:a|)*){0}a", "a" },
234
235 /* Combining ? + * operators */
236 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
237 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
238 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
239 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
240 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
241
242 /* Single character iterators. */
243 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
244 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
245 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
246 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
247 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
248 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
249 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
250 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
251 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
252 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
253 { MUA, 0, "(a?+[^b])+", "babaacacb" },
254 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
255 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
256 { CMUA, 0, "[c-f]+k", "DemmFke" },
257 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
258 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
259 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
260 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
261 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
262 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
263 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
264 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
265 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
266 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
267 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
268 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
269 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
270
271 /* Basic character sets. */
272 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
273 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
274 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
275 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
276 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
277 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
278
279 /* Unicode properties. */
280 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
281 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
282 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
283 { MUAP, 0, "[\\P{Any}]", "abc" },
284 { MUAP, 0, "[^\\p{Any}]", "abc" },
285 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
286 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
287 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
288 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
289 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
290 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
291 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
292 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
293 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
294 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
295 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
296 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
297 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
298 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
299 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
300
301 /* Possible empty brackets. */
302 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
303 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
304 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
305 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
306 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
307 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
308 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
309 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
310 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
311 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
312
313 /* Start offset. */
314 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
315 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
316 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
317 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
318
319 /* Newline. */
320 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
321 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
322 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
323
324 /* Any character except newline or any newline. */
325 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
326 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
327 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
328 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
329 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
330 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
331 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
332 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
333 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
334 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
335 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
336 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
337 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
338 { MUA, 0, "\\R+", "ab" },
339 { MUA, 0, "\\R+", "ab\r\n\r" },
340 { MUA, 0, "\\R*", "ab\r\n\r" },
341 { MUA, 0, "\\R*", "\r\n\r" },
342 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
343 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
344 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
345 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
346 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
347 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
348 { MUA, 0, "\\R*\\R\\R", "\n\r" },
349 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
350 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
351
352 /* Atomic groups (no fallback from "next" direction). */
353 { MUA, 0, "(?>ab)ab", "bab" },
354 { MUA, 0, "(?>(ab))ab", "bab" },
355 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
356 "bababcdedefgheijijklmlmnop" },
357 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
358 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
359 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
360 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
361 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
362 { MUA, 0, "(?>x|)*$", "aaa" },
363 { MUA, 0, "(?>(x)|)*$", "aaa" },
364 { MUA, 0, "(?>x|())*$", "aaa" },
365 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
366 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
367 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
368 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
369 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
370 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
371 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
372 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
373 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
374 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
375 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
376 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
377 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
378 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
379 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
380 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
381 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
382 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
383 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
384 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
385 { MUA, 0, "\\X{2,4}", "abcdef" },
386 { MUA, 0, "\\X{2,4}?", "abcdef" },
387 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
388 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
389 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
390 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
391
392 /* Possessive quantifiers. */
393 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
394 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
395 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
396 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
397 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
398 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
399 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
400 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
401 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
402 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
403 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
404 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
405 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
406 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
407 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
408 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
409 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
410 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
411 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
412 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
413 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
414 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
415 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
416 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
417 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
418 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
419 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
420 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
421 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
422 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
423 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
424 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
425 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
426 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
427 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
428
429 /* Back references. */
430 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
431 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
432 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
433 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
434 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
435 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
436 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
437 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
438 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
439 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
440 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
441 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
442 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
443 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
444 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
445 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
446 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
447 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
448 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
449 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
450 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
451 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
452
453 /* Assertions. */
454 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
455 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
456 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
457 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
458 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
459 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
460 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
461 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
462 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
463 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
464 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
465 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
466 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
467 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
468 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
469 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
470 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
471 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
472 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
473 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
474 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
475 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
476 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
477 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
478
479 /* Not empty, ACCEPT, FAIL */
480 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
481 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
482 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
483 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
484 { MUA, 0, "a(*ACCEPT)b", "ab" },
485 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
486 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
487 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
488 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
489 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
490 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
491 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
492 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
493 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
494 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
495 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
496 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
497 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
498 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
499
500 /* Conditional blocks. */
501 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
502 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
503 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
504 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
505 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
506 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
507 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
508 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
509 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
510 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
511 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
512 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
513 { MUA, 0, "(?(?=a)ab)", "a" },
514 { MUA, 0, "(?(?<!b)c)", "b" },
515 { MUA, 0, "(?(DEFINE)a(b))", "a" },
516 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
517 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
518 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
519 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
520 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
521 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
522 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
523 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
524 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
525 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
526 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
527 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
528 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
529 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
530 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
531 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
532 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
533 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
534 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
535
536 /* Set start of match. */
537 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
538 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
539 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
540 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
541 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
542
543 /* First line. */
544 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
545 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
546 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
547 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
548 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
549 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
550 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
551 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
552 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
553 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
554 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
555 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
556 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
557 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
558 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
559 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
560 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
561
562 /* Recurse. */
563 { MUA, 0, "(a)(?1)", "aa" },
564 { MUA, 0, "((a))(?1)", "aa" },
565 { MUA, 0, "(b|a)(?1)", "aa" },
566 { MUA, 0, "(b|(a))(?1)", "aa" },
567 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
568 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
569 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
570 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
571 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
572 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
573 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
574 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
575 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
576 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
577 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
578 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
579 { MUA, 0, "b|<(?R)*>", "<<b>" },
580 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
581 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
582 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
583 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
584 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
585 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
586 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
587 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
588
589 /* Deep recursion. */
590 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
591 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
592 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
593
594 /* Deep recursion: Stack limit reached. */
595 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
596 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
597 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
598 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
599 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
600
601 { 0, 0, NULL, NULL }
602};
603
604static int regression_tests(void)
605{
606 pcre *re;
607 struct regression_test_case *current = regression_test_cases;
608 const char *error;
609 pcre_extra *extra;
610 int utf8 = 0, ucp = 0;
611 int ovector1[32];
612 int ovector2[32];
613 int return_value1, return_value2;
614 int i, err_offs;
615 int total = 0, succesful = 0;
616 int counter = 0;
617 int disabled_flags = PCRE_BUG;
618
619 /* This test compares the behaviour of interpreter and JIT. Although disabling
620 utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
621 still considered successful from pcre_jit_test point of view. */
622
623 pcre_config(PCRE_CONFIG_UTF8, &utf8);
624 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
625 if (!utf8)
626 disabled_flags |= PCRE_UTF8;
627 if (!ucp)
628 disabled_flags |= PCRE_UCP;
629
630 printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
631 while (current->pattern) {
632 /* printf("\nPattern: %s :\n", current->pattern); */
633 total++;
634
635 error = NULL;
636 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
637
638 if (!re) {
639 if (utf8 && ucp)
640 printf("\nCannot compile pattern: %s\n", current->pattern);
641 else {
642 /* Some patterns cannot be compiled when either of utf8
643 or ucp is disabled. We just skip them. */
644 printf(".");
645 succesful++;
646 }
647 current++;
648 continue;
649 }
650
651 error = NULL;
652 extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
653 if (!extra) {
654 printf("\nCannot study pattern: %s\n", current->pattern);
655 current++;
656 continue;
657 }
658
659 if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
660 printf("\nJIT compiler does not support: %s\n", current->pattern);
661 current++;
662 continue;
663 }
664
665 counter++;
666 if ((counter & 0x3) != 0)
667 setstack(extra);
668
669 for (i = 0; i < 32; ++i)
670 ovector1[i] = -2;
671 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
672
673 for (i = 0; i < 32; ++i)
674 ovector2[i] = -2;
675 return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
676
677 /* If PCRE_BUG is set, just run the test, but do not compare the results.
678 Segfaults can still be captured. */
679 if (!(current->flags & PCRE_BUG)) {
680 if (return_value1 != return_value2) {
681 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
682 current++;
683 continue;
684 }
685
686 if (return_value1 >= 0) {
687 return_value1 *= 2;
688 err_offs = 0;
689 for (i = 0; i < return_value1; ++i)
690 if (ovector1[i] != ovector2[i]) {
691 printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
692 err_offs = 1;
693 }
694 if (err_offs) {
695 current++;
696 continue;
697 }
698 }
699 }
700
701 pcre_free_study(extra);
702 pcre_free(re);
703
704 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
705 printf(".");
706 fflush(stdout);
707 current++;
708 succesful++;
709 }
710
711 if (total == succesful) {
712 printf("\nAll JIT regression tests are successfully passed.\n");
713 return 0;
714 } else {
715 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
716 return 1;
717 }
718}
719
720/* End of pcre_jit_test.c */