blob: 92cae8fbea57aed29b7327ad4d8a123f6ac1e53e [file] [log] [blame]
Tristan Matthews04616462013-11-14 16:09:34 -05001// -*- coding: utf-8 -*-
2//
3// Copyright (c) 2005 - 2010, Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//
32// Author: Sanjay Ghemawat
33//
34// TODO: Test extractions for PartialMatch/Consume
35
36#ifdef HAVE_CONFIG_H
37#include "config.h"
38#endif
39
40#include <stdio.h>
41#include <string.h> /* for memset and strcmp */
42#include <cassert>
43#include <vector>
44#include "pcrecpp.h"
45
46using pcrecpp::StringPiece;
47using pcrecpp::RE;
48using pcrecpp::RE_Options;
49using pcrecpp::Hex;
50using pcrecpp::Octal;
51using pcrecpp::CRadix;
52
53static bool VERBOSE_TEST = false;
54
55// CHECK dies with a fatal error if condition is not true. It is *not*
56// controlled by NDEBUG, so the check will be executed regardless of
57// compilation mode. Therefore, it is safe to do things like:
58// CHECK_EQ(fp->Write(x), 4)
59#define CHECK(condition) do { \
60 if (!(condition)) { \
61 fprintf(stderr, "%s:%d: Check failed: %s\n", \
62 __FILE__, __LINE__, #condition); \
63 exit(1); \
64 } \
65} while (0)
66
67#define CHECK_EQ(a, b) CHECK(a == b)
68
69static void Timing1(int num_iters) {
70 // Same pattern lots of times
71 RE pattern("ruby:\\d+");
72 StringPiece p("ruby:1234");
73 for (int j = num_iters; j > 0; j--) {
74 CHECK(pattern.FullMatch(p));
75 }
76}
77
78static void Timing2(int num_iters) {
79 // Same pattern lots of times
80 RE pattern("ruby:(\\d+)");
81 int i;
82 for (int j = num_iters; j > 0; j--) {
83 CHECK(pattern.FullMatch("ruby:1234", &i));
84 CHECK_EQ(i, 1234);
85 }
86}
87
88static void Timing3(int num_iters) {
89 string text_string;
90 for (int j = num_iters; j > 0; j--) {
91 text_string += "this is another line\n";
92 }
93
94 RE line_matcher(".*\n");
95 string line;
96 StringPiece text(text_string);
97 int counter = 0;
98 while (line_matcher.Consume(&text)) {
99 counter++;
100 }
101 printf("Matched %d lines\n", counter);
102}
103
104#if 0 // uncomment this if you have a way of defining VirtualProcessSize()
105
106static void LeakTest() {
107 // Check for memory leaks
108 unsigned long long initial_size = 0;
109 for (int i = 0; i < 100000; i++) {
110 if (i == 50000) {
111 initial_size = VirtualProcessSize();
112 printf("Size after 50000: %llu\n", initial_size);
113 }
114 char buf[100]; // definitely big enough
115 sprintf(buf, "pat%09d", i);
116 RE newre(buf);
117 }
118 uint64 final_size = VirtualProcessSize();
119 printf("Size after 100000: %llu\n", final_size);
120 const double growth = double(final_size - initial_size) / final_size;
121 printf("Growth: %0.2f%%", growth * 100);
122 CHECK(growth < 0.02); // Allow < 2% growth
123}
124
125#endif
126
127static void RadixTests() {
128 printf("Testing hex\n");
129
130#define CHECK_HEX(type, value) \
131 do { \
132 type v; \
133 CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \
134 CHECK_EQ(v, 0x ## value); \
135 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \
136 CHECK_EQ(v, 0x ## value); \
137 } while(0)
138
139 CHECK_HEX(short, 2bad);
140 CHECK_HEX(unsigned short, 2badU);
141 CHECK_HEX(int, dead);
142 CHECK_HEX(unsigned int, deadU);
143 CHECK_HEX(long, 7eadbeefL);
144 CHECK_HEX(unsigned long, deadbeefUL);
145#ifdef HAVE_LONG_LONG
146 CHECK_HEX(long long, 12345678deadbeefLL);
147#endif
148#ifdef HAVE_UNSIGNED_LONG_LONG
149 CHECK_HEX(unsigned long long, cafebabedeadbeefULL);
150#endif
151
152#undef CHECK_HEX
153
154 printf("Testing octal\n");
155
156#define CHECK_OCTAL(type, value) \
157 do { \
158 type v; \
159 CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \
160 CHECK_EQ(v, 0 ## value); \
161 CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \
162 CHECK_EQ(v, 0 ## value); \
163 } while(0)
164
165 CHECK_OCTAL(short, 77777);
166 CHECK_OCTAL(unsigned short, 177777U);
167 CHECK_OCTAL(int, 17777777777);
168 CHECK_OCTAL(unsigned int, 37777777777U);
169 CHECK_OCTAL(long, 17777777777L);
170 CHECK_OCTAL(unsigned long, 37777777777UL);
171#ifdef HAVE_LONG_LONG
172 CHECK_OCTAL(long long, 777777777777777777777LL);
173#endif
174#ifdef HAVE_UNSIGNED_LONG_LONG
175 CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL);
176#endif
177
178#undef CHECK_OCTAL
179
180 printf("Testing decimal\n");
181
182#define CHECK_DECIMAL(type, value) \
183 do { \
184 type v; \
185 CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \
186 CHECK_EQ(v, value); \
187 CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \
188 CHECK_EQ(v, value); \
189 } while(0)
190
191 CHECK_DECIMAL(short, -1);
192 CHECK_DECIMAL(unsigned short, 9999);
193 CHECK_DECIMAL(int, -1000);
194 CHECK_DECIMAL(unsigned int, 12345U);
195 CHECK_DECIMAL(long, -10000000L);
196 CHECK_DECIMAL(unsigned long, 3083324652U);
197#ifdef HAVE_LONG_LONG
198 CHECK_DECIMAL(long long, -100000000000000LL);
199#endif
200#ifdef HAVE_UNSIGNED_LONG_LONG
201 CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL);
202#endif
203
204#undef CHECK_DECIMAL
205
206}
207
208static void TestReplace() {
209 printf("Testing Replace\n");
210
211 struct ReplaceTest {
212 const char *regexp;
213 const char *rewrite;
214 const char *original;
215 const char *single;
216 const char *global;
217 int global_count; // the expected return value from ReplaceAll
218 };
219 static const ReplaceTest tests[] = {
220 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
221 "\\2\\1ay",
222 "the quick brown fox jumps over the lazy dogs.",
223 "ethay quick brown fox jumps over the lazy dogs.",
224 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
225 9 },
226 { "\\w+",
227 "\\0-NOSPAM",
228 "paul.haahr@google.com",
229 "paul-NOSPAM.haahr@google.com",
230 "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM",
231 4 },
232 { "^",
233 "(START)",
234 "foo",
235 "(START)foo",
236 "(START)foo",
237 1 },
238 { "^",
239 "(START)",
240 "",
241 "(START)",
242 "(START)",
243 1 },
244 { "$",
245 "(END)",
246 "",
247 "(END)",
248 "(END)",
249 1 },
250 { "b",
251 "bb",
252 "ababababab",
253 "abbabababab",
254 "abbabbabbabbabb",
255 5 },
256 { "b",
257 "bb",
258 "bbbbbb",
259 "bbbbbbb",
260 "bbbbbbbbbbbb",
261 6 },
262 { "b+",
263 "bb",
264 "bbbbbb",
265 "bb",
266 "bb",
267 1 },
268 { "b*",
269 "bb",
270 "bbbbbb",
271 "bb",
272 "bbbb",
273 2 },
274 { "b*",
275 "bb",
276 "aaaaa",
277 "bbaaaaa",
278 "bbabbabbabbabbabb",
279 6 },
280 { "b*",
281 "bb",
282 "aa\naa\n",
283 "bbaa\naa\n",
284 "bbabbabb\nbbabbabb\nbb",
285 7 },
286 { "b*",
287 "bb",
288 "aa\raa\r",
289 "bbaa\raa\r",
290 "bbabbabb\rbbabbabb\rbb",
291 7 },
292 { "b*",
293 "bb",
294 "aa\r\naa\r\n",
295 "bbaa\r\naa\r\n",
296 "bbabbabb\r\nbbabbabb\r\nbb",
297 7 },
298 // Check empty-string matching (it's tricky!)
299 { "aa|b*",
300 "@",
301 "aa",
302 "@",
303 "@@",
304 2 },
305 { "b*|aa",
306 "@",
307 "aa",
308 "@aa",
309 "@@@",
310 3 },
311#ifdef SUPPORT_UTF8
312 { "b*",
313 "bb",
314 "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8
315 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8",
316 "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb",
317 5 },
318 { "b*",
319 "bb",
320 "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8
321 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n",
322 ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0"
323 "bb\nbb""\xE3\x81\xB8""bb\r\nbb"),
324 9 },
325#endif
326 { "", NULL, NULL, NULL, NULL, 0 }
327 };
328
329#ifdef SUPPORT_UTF8
330 const bool support_utf8 = true;
331#else
332 const bool support_utf8 = false;
333#endif
334
335 for (const ReplaceTest *t = tests; t->original != NULL; ++t) {
336 RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8));
337 assert(re.error().empty());
338 string one(t->original);
339 CHECK(re.Replace(t->rewrite, &one));
340 CHECK_EQ(one, t->single);
341 string all(t->original);
342 const int replace_count = re.GlobalReplace(t->rewrite, &all);
343 CHECK_EQ(all, t->global);
344 CHECK_EQ(replace_count, t->global_count);
345 }
346
347 // One final test: test \r\n replacement when we're not in CRLF mode
348 {
349 RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8));
350 assert(re.error().empty());
351 string all("aa\r\naa\r\n");
352 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
353 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
354 }
355 {
356 RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8));
357 assert(re.error().empty());
358 string all("aa\r\naa\r\n");
359 CHECK_EQ(re.GlobalReplace("bb", &all), 9);
360 CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb"));
361 }
362 // TODO: test what happens when no PCRE_NEWLINE_* flag is set.
363 // Alas, the answer depends on how pcre was compiled.
364}
365
366static void TestExtract() {
367 printf("Testing Extract\n");
368
369 string s;
370
371 CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s));
372 CHECK_EQ(s, "kremvax!boris");
373
374 // check the RE interface as well
375 CHECK(RE(".*").Extract("'\\0'", "foo", &s));
376 CHECK_EQ(s, "'foo'");
377 CHECK(!RE("bar").Extract("'\\0'", "baz", &s));
378 CHECK_EQ(s, "'foo'");
379}
380
381static void TestConsume() {
382 printf("Testing Consume\n");
383
384 string word;
385
386 string s(" aaa b!@#$@#$cccc");
387 StringPiece input(s);
388
389 RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
390 CHECK(r.Consume(&input, &word));
391 CHECK_EQ(word, "aaa");
392 CHECK(r.Consume(&input, &word));
393 CHECK_EQ(word, "b");
394 CHECK(! r.Consume(&input, &word));
395}
396
397static void TestFindAndConsume() {
398 printf("Testing FindAndConsume\n");
399
400 string word;
401
402 string s(" aaa b!@#$@#$cccc");
403 StringPiece input(s);
404
405 RE r("(\\w+)"); // matches a word
406 CHECK(r.FindAndConsume(&input, &word));
407 CHECK_EQ(word, "aaa");
408 CHECK(r.FindAndConsume(&input, &word));
409 CHECK_EQ(word, "b");
410 CHECK(r.FindAndConsume(&input, &word));
411 CHECK_EQ(word, "cccc");
412 CHECK(! r.FindAndConsume(&input, &word));
413}
414
415static void TestMatchNumberPeculiarity() {
416 printf("Testing match-number peculiarity\n");
417
418 string word1;
419 string word2;
420 string word3;
421
422 RE r("(foo)|(bar)|(baz)");
423 CHECK(r.PartialMatch("foo", &word1, &word2, &word3));
424 CHECK_EQ(word1, "foo");
425 CHECK_EQ(word2, "");
426 CHECK_EQ(word3, "");
427 CHECK(r.PartialMatch("bar", &word1, &word2, &word3));
428 CHECK_EQ(word1, "");
429 CHECK_EQ(word2, "bar");
430 CHECK_EQ(word3, "");
431 CHECK(r.PartialMatch("baz", &word1, &word2, &word3));
432 CHECK_EQ(word1, "");
433 CHECK_EQ(word2, "");
434 CHECK_EQ(word3, "baz");
435 CHECK(!r.PartialMatch("f", &word1, &word2, &word3));
436
437 string a;
438 CHECK(RE("(foo)|hello").FullMatch("hello", &a));
439 CHECK_EQ(a, "");
440}
441
442static void TestRecursion() {
443 printf("Testing recursion\n");
444
445 // Get one string that passes (sometimes), one that never does.
446 string text_good("abcdefghijk");
447 string text_bad("acdefghijkl");
448
449 // According to pcretest, matching text_good against (\w+)*b
450 // requires match_limit of at least 8192, and match_recursion_limit
451 // of at least 37.
452
453 RE_Options options_ml;
454 options_ml.set_match_limit(8192);
455 RE re("(\\w+)*b", options_ml);
456 CHECK(re.PartialMatch(text_good) == true);
457 CHECK(re.PartialMatch(text_bad) == false);
458 CHECK(re.FullMatch(text_good) == false);
459 CHECK(re.FullMatch(text_bad) == false);
460
461 options_ml.set_match_limit(1024);
462 RE re2("(\\w+)*b", options_ml);
463 CHECK(re2.PartialMatch(text_good) == false); // because of match_limit
464 CHECK(re2.PartialMatch(text_bad) == false);
465 CHECK(re2.FullMatch(text_good) == false);
466 CHECK(re2.FullMatch(text_bad) == false);
467
468 RE_Options options_mlr;
469 options_mlr.set_match_limit_recursion(50);
470 RE re3("(\\w+)*b", options_mlr);
471 CHECK(re3.PartialMatch(text_good) == true);
472 CHECK(re3.PartialMatch(text_bad) == false);
473 CHECK(re3.FullMatch(text_good) == false);
474 CHECK(re3.FullMatch(text_bad) == false);
475
476 options_mlr.set_match_limit_recursion(10);
477 RE re4("(\\w+)*b", options_mlr);
478 CHECK(re4.PartialMatch(text_good) == false);
479 CHECK(re4.PartialMatch(text_bad) == false);
480 CHECK(re4.FullMatch(text_good) == false);
481 CHECK(re4.FullMatch(text_bad) == false);
482}
483
484// A meta-quoted string, interpreted as a pattern, should always match
485// the original unquoted string.
486static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) {
487 string quoted = RE::QuoteMeta(unquoted);
488 RE re(quoted, options);
489 CHECK(re.FullMatch(unquoted));
490}
491
492// A string containing meaningful regexp characters, which is then meta-
493// quoted, should not generally match a string the unquoted string does.
494static void NegativeTestQuoteMeta(string unquoted, string should_not_match,
495 RE_Options options = RE_Options()) {
496 string quoted = RE::QuoteMeta(unquoted);
497 RE re(quoted, options);
498 CHECK(!re.FullMatch(should_not_match));
499}
500
501// Tests that quoted meta characters match their original strings,
502// and that a few things that shouldn't match indeed do not.
503static void TestQuotaMetaSimple() {
504 TestQuoteMeta("foo");
505 TestQuoteMeta("foo.bar");
506 TestQuoteMeta("foo\\.bar");
507 TestQuoteMeta("[1-9]");
508 TestQuoteMeta("1.5-2.0?");
509 TestQuoteMeta("\\d");
510 TestQuoteMeta("Who doesn't like ice cream?");
511 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
512 TestQuoteMeta("((?!)xxx).*yyy");
513 TestQuoteMeta("([");
514 TestQuoteMeta(string("foo\0bar", 7));
515}
516
517static void TestQuoteMetaSimpleNegative() {
518 NegativeTestQuoteMeta("foo", "bar");
519 NegativeTestQuoteMeta("...", "bar");
520 NegativeTestQuoteMeta("\\.", ".");
521 NegativeTestQuoteMeta("\\.", "..");
522 NegativeTestQuoteMeta("(a)", "a");
523 NegativeTestQuoteMeta("(a|b)", "a");
524 NegativeTestQuoteMeta("(a|b)", "(a)");
525 NegativeTestQuoteMeta("(a|b)", "a|b");
526 NegativeTestQuoteMeta("[0-9]", "0");
527 NegativeTestQuoteMeta("[0-9]", "0-9");
528 NegativeTestQuoteMeta("[0-9]", "[9]");
529 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
530}
531
532static void TestQuoteMetaLatin1() {
533 TestQuoteMeta("3\xb2 = 9");
534}
535
536static void TestQuoteMetaUtf8() {
537#ifdef SUPPORT_UTF8
538 TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8());
539 TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8
540 TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol)
541 TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character
542 TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime)
543 TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note)
544 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work
545 NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol)
546 "27\\\xc2\\\xb0",
547 pcrecpp::UTF8());
548#endif
549}
550
551static void TestQuoteMetaAll() {
552 printf("Testing QuoteMeta\n");
553 TestQuotaMetaSimple();
554 TestQuoteMetaSimpleNegative();
555 TestQuoteMetaLatin1();
556 TestQuoteMetaUtf8();
557}
558
559//
560// Options tests contributed by
561// Giuseppe Maxia, CTO, Stardata s.r.l.
562// July 2005
563//
564static void GetOneOptionResult(
565 const char *option_name,
566 const char *regex,
567 const char *str,
568 RE_Options options,
569 bool full,
570 string expected) {
571
572 printf("Testing Option <%s>\n", option_name);
573 if(VERBOSE_TEST)
574 printf("/%s/ finds \"%s\" within \"%s\" \n",
575 regex,
576 expected.c_str(),
577 str);
578 string captured("");
579 if (full)
580 RE(regex,options).FullMatch(str, &captured);
581 else
582 RE(regex,options).PartialMatch(str, &captured);
583 CHECK_EQ(captured, expected);
584}
585
586static void TestOneOption(
587 const char *option_name,
588 const char *regex,
589 const char *str,
590 RE_Options options,
591 bool full,
592 bool assertive = true) {
593
594 printf("Testing Option <%s>\n", option_name);
595 if (VERBOSE_TEST)
596 printf("'%s' %s /%s/ \n",
597 str,
598 (assertive? "matches" : "doesn't match"),
599 regex);
600 if (assertive) {
601 if (full)
602 CHECK(RE(regex,options).FullMatch(str));
603 else
604 CHECK(RE(regex,options).PartialMatch(str));
605 } else {
606 if (full)
607 CHECK(!RE(regex,options).FullMatch(str));
608 else
609 CHECK(!RE(regex,options).PartialMatch(str));
610 }
611}
612
613static void Test_CASELESS() {
614 RE_Options options;
615 RE_Options options2;
616
617 options.set_caseless(true);
618 TestOneOption("CASELESS (class)", "HELLO", "hello", options, false);
619 TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false);
620 TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false);
621
622 TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false);
623 TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false);
624 options.set_caseless(false);
625 TestOneOption("no CASELESS", "HELLO", "hello", options, false, false);
626}
627
628static void Test_MULTILINE() {
629 RE_Options options;
630 RE_Options options2;
631 const char *str = "HELLO\n" "cruel\n" "world\n";
632
633 options.set_multiline(true);
634 TestOneOption("MULTILINE (class)", "^cruel$", str, options, false);
635 TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false);
636 TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false);
637 options.set_multiline(false);
638 TestOneOption("no MULTILINE", "^cruel$", str, options, false, false);
639}
640
641static void Test_DOTALL() {
642 RE_Options options;
643 RE_Options options2;
644 const char *str = "HELLO\n" "cruel\n" "world";
645
646 options.set_dotall(true);
647 TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true);
648 TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true);
649 TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true);
650 options.set_dotall(false);
651 TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false);
652}
653
654static void Test_DOLLAR_ENDONLY() {
655 RE_Options options;
656 RE_Options options2;
657 const char *str = "HELLO world\n";
658
659 TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false);
660 options.set_dollar_endonly(true);
661 TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false);
662 TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false);
663}
664
665static void Test_EXTRA() {
666 RE_Options options;
667 const char *str = "HELLO";
668
669 options.set_extra(true);
670 TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false );
671 TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false );
672 options.set_extra(false);
673 TestOneOption("no EXTRA", "\\HELL\\O", str, options, true );
674}
675
676static void Test_EXTENDED() {
677 RE_Options options;
678 RE_Options options2;
679 const char *str = "HELLO world";
680
681 options.set_extended(true);
682 TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false);
683 TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false);
684 TestOneOption("EXTENDED (class)",
685 "^ HE L{2} O "
686 "\\s+ "
687 "\\w+ $ ",
688 str,
689 options,
690 false);
691
692 TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false);
693 TestOneOption("EXTENDED (function)",
694 "^ HE L{2} O "
695 "\\s+ "
696 "\\w+ $ ",
697 str,
698 pcrecpp::EXTENDED(),
699 false);
700
701 options.set_extended(false);
702 TestOneOption("no EXTENDED", "HELLO world", str, options, false);
703}
704
705static void Test_NO_AUTO_CAPTURE() {
706 RE_Options options;
707 const char *str = "HELLO world";
708 string captured;
709
710 printf("Testing Option <no NO_AUTO_CAPTURE>\n");
711 if (VERBOSE_TEST)
712 printf("parentheses capture text\n");
713 RE re("(world|universe)$", options);
714 CHECK(re.Extract("\\1", str , &captured));
715 CHECK_EQ(captured, "world");
716 options.set_no_auto_capture(true);
717 printf("testing Option <NO_AUTO_CAPTURE>\n");
718 if (VERBOSE_TEST)
719 printf("parentheses do not capture text\n");
720 re.Extract("\\1",str, &captured );
721 CHECK_EQ(captured, "world");
722}
723
724static void Test_UNGREEDY() {
725 RE_Options options;
726 const char *str = "HELLO, 'this' is the 'world'";
727
728 options.set_ungreedy(true);
729 GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" );
730 GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" );
731 GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" );
732
733 options.set_ungreedy(false);
734 GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" );
735 GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" );
736}
737
738static void Test_all_options() {
739 const char *str = "HELLO\n" "cruel\n" "world";
740 RE_Options options;
741 options.set_all_options(PCRE_CASELESS | PCRE_DOTALL);
742
743 TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false);
744 options.set_all_options(0);
745 TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false);
746 options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED);
747
748 TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false);
749 TestOneOption("all_options (MULTILINE|EXTENDED) with constructor",
750 " ^ c r u e l $ ",
751 str,
752 RE_Options(PCRE_MULTILINE | PCRE_EXTENDED),
753 false);
754
755 TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation",
756 " ^ c r u e l $ ",
757 str,
758 RE_Options()
759 .set_multiline(true)
760 .set_extended(true),
761 false);
762
763 options.set_all_options(0);
764 TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false);
765
766}
767
768static void TestOptions() {
769 printf("Testing Options\n");
770 Test_CASELESS();
771 Test_MULTILINE();
772 Test_DOTALL();
773 Test_DOLLAR_ENDONLY();
774 Test_EXTENDED();
775 Test_NO_AUTO_CAPTURE();
776 Test_UNGREEDY();
777 Test_EXTRA();
778 Test_all_options();
779}
780
781static void TestConstructors() {
782 printf("Testing constructors\n");
783
784 RE_Options options;
785 options.set_dotall(true);
786 const char *str = "HELLO\n" "cruel\n" "world";
787
788 RE orig("HELLO.*world", options);
789 CHECK(orig.FullMatch(str));
790
791 RE copy1(orig);
792 CHECK(copy1.FullMatch(str));
793
794 RE copy2("not a match");
795 CHECK(!copy2.FullMatch(str));
796 copy2 = copy1;
797 CHECK(copy2.FullMatch(str));
798 copy2 = orig;
799 CHECK(copy2.FullMatch(str));
800
801 // Make sure when we assign to ourselves, nothing bad happens
802 orig = orig;
803 copy1 = copy1;
804 copy2 = copy2;
805 CHECK(orig.FullMatch(str));
806 CHECK(copy1.FullMatch(str));
807 CHECK(copy2.FullMatch(str));
808}
809
810int main(int argc, char** argv) {
811 // Treat any flag as --help
812 if (argc > 1 && argv[1][0] == '-') {
813 printf("Usage: %s [timing1|timing2|timing3 num-iters]\n"
814 " If 'timingX ###' is specified, run the given timing test\n"
815 " with the given number of iterations, rather than running\n"
816 " the default corectness test.\n", argv[0]);
817 return 0;
818 }
819
820 if (argc > 1) {
821 if ( argc == 2 || atoi(argv[2]) == 0) {
822 printf("timing mode needs a num-iters argument\n");
823 return 1;
824 }
825 if (!strcmp(argv[1], "timing1"))
826 Timing1(atoi(argv[2]));
827 else if (!strcmp(argv[1], "timing2"))
828 Timing2(atoi(argv[2]));
829 else if (!strcmp(argv[1], "timing3"))
830 Timing3(atoi(argv[2]));
831 else
832 printf("Unknown argument '%s'\n", argv[1]);
833 return 0;
834 }
835
836 printf("PCRE C++ wrapper tests\n");
837 printf("Testing FullMatch\n");
838
839 int i;
840 string s;
841
842 /***** FullMatch with no args *****/
843
844 CHECK(RE("h.*o").FullMatch("hello"));
845 CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front
846 CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end
847 CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op
848 CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op
849 CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops
850
851 /***** FullMatch with args *****/
852
853 // Zero-arg
854 CHECK(RE("\\d+").FullMatch("1001"));
855
856 // Single-arg
857 CHECK(RE("(\\d+)").FullMatch("1001", &i));
858 CHECK_EQ(i, 1001);
859 CHECK(RE("(-?\\d+)").FullMatch("-123", &i));
860 CHECK_EQ(i, -123);
861 CHECK(!RE("()\\d+").FullMatch("10", &i));
862 CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890",
863 &i));
864
865 // Digits surrounding integer-arg
866 CHECK(RE("1(\\d*)4").FullMatch("1234", &i));
867 CHECK_EQ(i, 23);
868 CHECK(RE("(\\d)\\d+").FullMatch("1234", &i));
869 CHECK_EQ(i, 1);
870 CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i));
871 CHECK_EQ(i, -1);
872 CHECK(RE("(\\d)").PartialMatch("1234", &i));
873 CHECK_EQ(i, 1);
874 CHECK(RE("(-\\d)").PartialMatch("-1234", &i));
875 CHECK_EQ(i, -1);
876
877 // String-arg
878 CHECK(RE("h(.*)o").FullMatch("hello", &s));
879 CHECK_EQ(s, string("ell"));
880
881 // StringPiece-arg
882 StringPiece sp;
883 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i));
884 CHECK_EQ(sp.size(), 4);
885 CHECK(memcmp(sp.data(), "ruby", 4) == 0);
886 CHECK_EQ(i, 1234);
887
888 // Multi-arg
889 CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i));
890 CHECK_EQ(s, string("ruby"));
891 CHECK_EQ(i, 1234);
892
893 // Ignore non-void* NULL arg
894 CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL));
895 CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL));
896 CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL));
897 CHECK(RE("(.*)").FullMatch("1234", (int*)NULL));
898#ifdef HAVE_LONG_LONG
899 CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL));
900#endif
901 CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL));
902 CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL));
903
904 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
905 CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL));
906 CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL));
907 CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL));
908 CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL));
909 CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL));
910
911 // Ignored arg
912 CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i));
913 CHECK_EQ(s, string("ruby"));
914 CHECK_EQ(i, 1234);
915
916 // Type tests
917 {
918 char c;
919 CHECK(RE("(H)ello").FullMatch("Hello", &c));
920 CHECK_EQ(c, 'H');
921 }
922 {
923 unsigned char c;
924 CHECK(RE("(H)ello").FullMatch("Hello", &c));
925 CHECK_EQ(c, static_cast<unsigned char>('H'));
926 }
927 {
928 short v;
929 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
930 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
931 CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
932 CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768);
933 CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v));
934 CHECK(!RE("(-?\\d+)").FullMatch("32768", &v));
935 }
936 {
937 unsigned short v;
938 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
939 CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767);
940 CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535);
941 CHECK(!RE("(\\d+)").FullMatch("65536", &v));
942 }
943 {
944 int v;
945 static const int max_value = 0x7fffffff;
946 static const int min_value = -max_value - 1;
947 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
948 CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100);
949 CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value);
950 CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value);
951 CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v));
952 CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v));
953 }
954 {
955 unsigned int v;
956 static const unsigned int max_value = 0xfffffffful;
957 CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
958 CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value);
959 CHECK(!RE("(\\d+)").FullMatch("4294967296", &v));
960 }
961#ifdef HAVE_LONG_LONG
962# if defined(__MINGW__) || defined(__MINGW32__)
963# define LLD "%I64d"
964# define LLU "%I64u"
965# else
966# define LLD "%lld"
967# define LLU "%llu"
968# endif
969 {
970 long long v;
971 static const long long max_value = 0x7fffffffffffffffLL;
972 static const long long min_value = -max_value - 1;
973 char buf[32]; // definitely big enough for a long long
974
975 CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100);
976 CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100);
977
978 sprintf(buf, LLD, max_value);
979 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
980
981 sprintf(buf, LLD, min_value);
982 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value);
983
984 sprintf(buf, LLD, max_value);
985 assert(buf[strlen(buf)-1] != '9');
986 buf[strlen(buf)-1]++;
987 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
988
989 sprintf(buf, LLD, min_value);
990 assert(buf[strlen(buf)-1] != '9');
991 buf[strlen(buf)-1]++;
992 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
993 }
994#endif
995#if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG
996 {
997 unsigned long long v;
998 long long v2;
999 static const unsigned long long max_value = 0xffffffffffffffffULL;
1000 char buf[32]; // definitely big enough for a unsigned long long
1001
1002 CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100);
1003 CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100);
1004
1005 sprintf(buf, LLU, max_value);
1006 CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value);
1007
1008 assert(buf[strlen(buf)-1] != '9');
1009 buf[strlen(buf)-1]++;
1010 CHECK(!RE("(-?\\d+)").FullMatch(buf, &v));
1011 }
1012#endif
1013 {
1014 float v;
1015 CHECK(RE("(.*)").FullMatch("100", &v));
1016 CHECK(RE("(.*)").FullMatch("-100.", &v));
1017 CHECK(RE("(.*)").FullMatch("1e23", &v));
1018 }
1019 {
1020 double v;
1021 CHECK(RE("(.*)").FullMatch("100", &v));
1022 CHECK(RE("(.*)").FullMatch("-100.", &v));
1023 CHECK(RE("(.*)").FullMatch("1e23", &v));
1024 }
1025
1026 // Check that matching is fully anchored
1027 CHECK(!RE("(\\d+)").FullMatch("x1001", &i));
1028 CHECK(!RE("(\\d+)").FullMatch("1001x", &i));
1029 CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001);
1030 CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001);
1031
1032 // Braces
1033 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd"));
1034 CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde"));
1035 CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc"));
1036
1037 // Complicated RE
1038 CHECK(RE("foo|bar|[A-Z]").FullMatch("foo"));
1039 CHECK(RE("foo|bar|[A-Z]").FullMatch("bar"));
1040 CHECK(RE("foo|bar|[A-Z]").FullMatch("X"));
1041 CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY"));
1042
1043 // Check full-match handling (needs '$' tacked on internally)
1044 CHECK(RE("fo|foo").FullMatch("fo"));
1045 CHECK(RE("fo|foo").FullMatch("foo"));
1046 CHECK(RE("fo|foo$").FullMatch("fo"));
1047 CHECK(RE("fo|foo$").FullMatch("foo"));
1048 CHECK(RE("foo$").FullMatch("foo"));
1049 CHECK(!RE("foo\\$").FullMatch("foo$bar"));
1050 CHECK(!RE("fo|bar").FullMatch("fox"));
1051
1052 // Uncomment the following if we change the handling of '$' to
1053 // prevent it from matching a trailing newline
1054 if (false) {
1055 // Check that we don't get bitten by pcre's special handling of a
1056 // '\n' at the end of the string matching '$'
1057 CHECK(!RE("foo$").PartialMatch("foo\n"));
1058 }
1059
1060 // Number of args
1061 int a[16];
1062 CHECK(RE("").FullMatch(""));
1063
1064 memset(a, 0, sizeof(0));
1065 CHECK(RE("(\\d){1}").FullMatch("1",
1066 &a[0]));
1067 CHECK_EQ(a[0], 1);
1068
1069 memset(a, 0, sizeof(0));
1070 CHECK(RE("(\\d)(\\d)").FullMatch("12",
1071 &a[0], &a[1]));
1072 CHECK_EQ(a[0], 1);
1073 CHECK_EQ(a[1], 2);
1074
1075 memset(a, 0, sizeof(0));
1076 CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123",
1077 &a[0], &a[1], &a[2]));
1078 CHECK_EQ(a[0], 1);
1079 CHECK_EQ(a[1], 2);
1080 CHECK_EQ(a[2], 3);
1081
1082 memset(a, 0, sizeof(0));
1083 CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234",
1084 &a[0], &a[1], &a[2], &a[3]));
1085 CHECK_EQ(a[0], 1);
1086 CHECK_EQ(a[1], 2);
1087 CHECK_EQ(a[2], 3);
1088 CHECK_EQ(a[3], 4);
1089
1090 memset(a, 0, sizeof(0));
1091 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345",
1092 &a[0], &a[1], &a[2],
1093 &a[3], &a[4]));
1094 CHECK_EQ(a[0], 1);
1095 CHECK_EQ(a[1], 2);
1096 CHECK_EQ(a[2], 3);
1097 CHECK_EQ(a[3], 4);
1098 CHECK_EQ(a[4], 5);
1099
1100 memset(a, 0, sizeof(0));
1101 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456",
1102 &a[0], &a[1], &a[2],
1103 &a[3], &a[4], &a[5]));
1104 CHECK_EQ(a[0], 1);
1105 CHECK_EQ(a[1], 2);
1106 CHECK_EQ(a[2], 3);
1107 CHECK_EQ(a[3], 4);
1108 CHECK_EQ(a[4], 5);
1109 CHECK_EQ(a[5], 6);
1110
1111 memset(a, 0, sizeof(0));
1112 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567",
1113 &a[0], &a[1], &a[2], &a[3],
1114 &a[4], &a[5], &a[6]));
1115 CHECK_EQ(a[0], 1);
1116 CHECK_EQ(a[1], 2);
1117 CHECK_EQ(a[2], 3);
1118 CHECK_EQ(a[3], 4);
1119 CHECK_EQ(a[4], 5);
1120 CHECK_EQ(a[5], 6);
1121 CHECK_EQ(a[6], 7);
1122
1123 memset(a, 0, sizeof(0));
1124 CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1125 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch(
1126 "1234567890123456",
1127 &a[0], &a[1], &a[2], &a[3],
1128 &a[4], &a[5], &a[6], &a[7],
1129 &a[8], &a[9], &a[10], &a[11],
1130 &a[12], &a[13], &a[14], &a[15]));
1131 CHECK_EQ(a[0], 1);
1132 CHECK_EQ(a[1], 2);
1133 CHECK_EQ(a[2], 3);
1134 CHECK_EQ(a[3], 4);
1135 CHECK_EQ(a[4], 5);
1136 CHECK_EQ(a[5], 6);
1137 CHECK_EQ(a[6], 7);
1138 CHECK_EQ(a[7], 8);
1139 CHECK_EQ(a[8], 9);
1140 CHECK_EQ(a[9], 0);
1141 CHECK_EQ(a[10], 1);
1142 CHECK_EQ(a[11], 2);
1143 CHECK_EQ(a[12], 3);
1144 CHECK_EQ(a[13], 4);
1145 CHECK_EQ(a[14], 5);
1146 CHECK_EQ(a[15], 6);
1147
1148 /***** PartialMatch *****/
1149
1150 printf("Testing PartialMatch\n");
1151
1152 CHECK(RE("h.*o").PartialMatch("hello"));
1153 CHECK(RE("h.*o").PartialMatch("othello"));
1154 CHECK(RE("h.*o").PartialMatch("hello!"));
1155 CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x"));
1156
1157 /***** other tests *****/
1158
1159 RadixTests();
1160 TestReplace();
1161 TestExtract();
1162 TestConsume();
1163 TestFindAndConsume();
1164 TestQuoteMetaAll();
1165 TestMatchNumberPeculiarity();
1166
1167 // Check the pattern() accessor
1168 {
1169 const string kPattern = "http://([^/]+)/.*";
1170 const RE re(kPattern);
1171 CHECK_EQ(kPattern, re.pattern());
1172 }
1173
1174 // Check RE error field.
1175 {
1176 RE re("foo");
1177 CHECK(re.error().empty()); // Must have no error
1178 }
1179
1180#ifdef SUPPORT_UTF8
1181 // Check UTF-8 handling
1182 {
1183 printf("Testing UTF-8 handling\n");
1184
1185 // Three Japanese characters (nihongo)
1186 const unsigned char utf8_string[] = {
1187 0xe6, 0x97, 0xa5, // 65e5
1188 0xe6, 0x9c, 0xac, // 627c
1189 0xe8, 0xaa, 0x9e, // 8a9e
1190 0
1191 };
1192 const unsigned char utf8_pattern[] = {
1193 '.',
1194 0xe6, 0x9c, 0xac, // 627c
1195 '.',
1196 0
1197 };
1198
1199 // Both should match in either mode, bytes or UTF-8
1200 RE re_test1(".........");
1201 CHECK(re_test1.FullMatch(utf8_string));
1202 RE re_test2("...", pcrecpp::UTF8());
1203 CHECK(re_test2.FullMatch(utf8_string));
1204
1205 // Check that '.' matches one byte or UTF-8 character
1206 // according to the mode.
1207 string ss;
1208 RE re_test3("(.)");
1209 CHECK(re_test3.PartialMatch(utf8_string, &ss));
1210 CHECK_EQ(ss, string("\xe6"));
1211 RE re_test4("(.)", pcrecpp::UTF8());
1212 CHECK(re_test4.PartialMatch(utf8_string, &ss));
1213 CHECK_EQ(ss, string("\xe6\x97\xa5"));
1214
1215 // Check that string matches itself in either mode
1216 RE re_test5(utf8_string);
1217 CHECK(re_test5.FullMatch(utf8_string));
1218 RE re_test6(utf8_string, pcrecpp::UTF8());
1219 CHECK(re_test6.FullMatch(utf8_string));
1220
1221 // Check that pattern matches string only in UTF8 mode
1222 RE re_test7(utf8_pattern);
1223 CHECK(!re_test7.FullMatch(utf8_string));
1224 RE re_test8(utf8_pattern, pcrecpp::UTF8());
1225 CHECK(re_test8.FullMatch(utf8_string));
1226 }
1227
1228 // Check that ungreedy, UTF8 regular expressions don't match when they
1229 // oughtn't -- see bug 82246.
1230 {
1231 // This code always worked.
1232 const char* pattern = "\\w+X";
1233 const string target = "a aX";
1234 RE match_sentence(pattern);
1235 RE match_sentence_re(pattern, pcrecpp::UTF8());
1236
1237 CHECK(!match_sentence.FullMatch(target));
1238 CHECK(!match_sentence_re.FullMatch(target));
1239 }
1240
1241 {
1242 const char* pattern = "(?U)\\w+X";
1243 const string target = "a aX";
1244 RE match_sentence(pattern);
1245 RE match_sentence_re(pattern, pcrecpp::UTF8());
1246
1247 CHECK(!match_sentence.FullMatch(target));
1248 CHECK(!match_sentence_re.FullMatch(target));
1249 }
1250#endif /* def SUPPORT_UTF8 */
1251
1252 printf("Testing error reporting\n");
1253
1254 { RE re("a\\1"); CHECK(!re.error().empty()); }
1255 {
1256 RE re("a[x");
1257 CHECK(!re.error().empty());
1258 }
1259 {
1260 RE re("a[z-a]");
1261 CHECK(!re.error().empty());
1262 }
1263 {
1264 RE re("a[[:foobar:]]");
1265 CHECK(!re.error().empty());
1266 }
1267 {
1268 RE re("a(b");
1269 CHECK(!re.error().empty());
1270 }
1271 {
1272 RE re("a\\");
1273 CHECK(!re.error().empty());
1274 }
1275
1276 // Test that recursion is stopped
1277 TestRecursion();
1278
1279 // Test Options
1280 if (getenv("VERBOSE_TEST") != NULL)
1281 VERBOSE_TEST = true;
1282 TestOptions();
1283
1284 // Test the constructors
1285 TestConstructors();
1286
1287 // Done
1288 printf("OK\n");
1289
1290 return 0;
1291}