Tristan Matthews | 0461646 | 2013-11-14 16:09:34 -0500 | [diff] [blame] | 1 | // -*- coding: utf-8 -*- |
| 2 | // |
| 3 | // Copyright (c) 2005 - 2010, Google Inc. |
| 4 | // All rights reserved. |
| 5 | // |
| 6 | // Redistribution and use in source and binary forms, with or without |
| 7 | // modification, are permitted provided that the following conditions are |
| 8 | // met: |
| 9 | // |
| 10 | // * Redistributions of source code must retain the above copyright |
| 11 | // notice, this list of conditions and the following disclaimer. |
| 12 | // * Redistributions in binary form must reproduce the above |
| 13 | // copyright notice, this list of conditions and the following disclaimer |
| 14 | // in the documentation and/or other materials provided with the |
| 15 | // distribution. |
| 16 | // * Neither the name of Google Inc. nor the names of its |
| 17 | // contributors may be used to endorse or promote products derived from |
| 18 | // this software without specific prior written permission. |
| 19 | // |
| 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | // |
| 32 | // Author: Sanjay Ghemawat |
| 33 | // |
| 34 | // TODO: Test extractions for PartialMatch/Consume |
| 35 | |
| 36 | #ifdef HAVE_CONFIG_H |
| 37 | #include "config.h" |
| 38 | #endif |
| 39 | |
| 40 | #include <stdio.h> |
| 41 | #include <string.h> /* for memset and strcmp */ |
| 42 | #include <cassert> |
| 43 | #include <vector> |
| 44 | #include "pcrecpp.h" |
| 45 | |
| 46 | using pcrecpp::StringPiece; |
| 47 | using pcrecpp::RE; |
| 48 | using pcrecpp::RE_Options; |
| 49 | using pcrecpp::Hex; |
| 50 | using pcrecpp::Octal; |
| 51 | using pcrecpp::CRadix; |
| 52 | |
| 53 | static bool VERBOSE_TEST = false; |
| 54 | |
| 55 | // CHECK dies with a fatal error if condition is not true. It is *not* |
| 56 | // controlled by NDEBUG, so the check will be executed regardless of |
| 57 | // compilation mode. Therefore, it is safe to do things like: |
| 58 | // CHECK_EQ(fp->Write(x), 4) |
| 59 | #define CHECK(condition) do { \ |
| 60 | if (!(condition)) { \ |
| 61 | fprintf(stderr, "%s:%d: Check failed: %s\n", \ |
| 62 | __FILE__, __LINE__, #condition); \ |
| 63 | exit(1); \ |
| 64 | } \ |
| 65 | } while (0) |
| 66 | |
| 67 | #define CHECK_EQ(a, b) CHECK(a == b) |
| 68 | |
| 69 | static void Timing1(int num_iters) { |
| 70 | // Same pattern lots of times |
| 71 | RE pattern("ruby:\\d+"); |
| 72 | StringPiece p("ruby:1234"); |
| 73 | for (int j = num_iters; j > 0; j--) { |
| 74 | CHECK(pattern.FullMatch(p)); |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | static void Timing2(int num_iters) { |
| 79 | // Same pattern lots of times |
| 80 | RE pattern("ruby:(\\d+)"); |
| 81 | int i; |
| 82 | for (int j = num_iters; j > 0; j--) { |
| 83 | CHECK(pattern.FullMatch("ruby:1234", &i)); |
| 84 | CHECK_EQ(i, 1234); |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | static void Timing3(int num_iters) { |
| 89 | string text_string; |
| 90 | for (int j = num_iters; j > 0; j--) { |
| 91 | text_string += "this is another line\n"; |
| 92 | } |
| 93 | |
| 94 | RE line_matcher(".*\n"); |
| 95 | string line; |
| 96 | StringPiece text(text_string); |
| 97 | int counter = 0; |
| 98 | while (line_matcher.Consume(&text)) { |
| 99 | counter++; |
| 100 | } |
| 101 | printf("Matched %d lines\n", counter); |
| 102 | } |
| 103 | |
| 104 | #if 0 // uncomment this if you have a way of defining VirtualProcessSize() |
| 105 | |
| 106 | static void LeakTest() { |
| 107 | // Check for memory leaks |
| 108 | unsigned long long initial_size = 0; |
| 109 | for (int i = 0; i < 100000; i++) { |
| 110 | if (i == 50000) { |
| 111 | initial_size = VirtualProcessSize(); |
| 112 | printf("Size after 50000: %llu\n", initial_size); |
| 113 | } |
| 114 | char buf[100]; // definitely big enough |
| 115 | sprintf(buf, "pat%09d", i); |
| 116 | RE newre(buf); |
| 117 | } |
| 118 | uint64 final_size = VirtualProcessSize(); |
| 119 | printf("Size after 100000: %llu\n", final_size); |
| 120 | const double growth = double(final_size - initial_size) / final_size; |
| 121 | printf("Growth: %0.2f%%", growth * 100); |
| 122 | CHECK(growth < 0.02); // Allow < 2% growth |
| 123 | } |
| 124 | |
| 125 | #endif |
| 126 | |
| 127 | static void RadixTests() { |
| 128 | printf("Testing hex\n"); |
| 129 | |
| 130 | #define CHECK_HEX(type, value) \ |
| 131 | do { \ |
| 132 | type v; \ |
| 133 | CHECK(RE("([0-9a-fA-F]+)[uUlL]*").FullMatch(#value, Hex(&v))); \ |
| 134 | CHECK_EQ(v, 0x ## value); \ |
| 135 | CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0x" #value, CRadix(&v))); \ |
| 136 | CHECK_EQ(v, 0x ## value); \ |
| 137 | } while(0) |
| 138 | |
| 139 | CHECK_HEX(short, 2bad); |
| 140 | CHECK_HEX(unsigned short, 2badU); |
| 141 | CHECK_HEX(int, dead); |
| 142 | CHECK_HEX(unsigned int, deadU); |
| 143 | CHECK_HEX(long, 7eadbeefL); |
| 144 | CHECK_HEX(unsigned long, deadbeefUL); |
| 145 | #ifdef HAVE_LONG_LONG |
| 146 | CHECK_HEX(long long, 12345678deadbeefLL); |
| 147 | #endif |
| 148 | #ifdef HAVE_UNSIGNED_LONG_LONG |
| 149 | CHECK_HEX(unsigned long long, cafebabedeadbeefULL); |
| 150 | #endif |
| 151 | |
| 152 | #undef CHECK_HEX |
| 153 | |
| 154 | printf("Testing octal\n"); |
| 155 | |
| 156 | #define CHECK_OCTAL(type, value) \ |
| 157 | do { \ |
| 158 | type v; \ |
| 159 | CHECK(RE("([0-7]+)[uUlL]*").FullMatch(#value, Octal(&v))); \ |
| 160 | CHECK_EQ(v, 0 ## value); \ |
| 161 | CHECK(RE("([0-9a-fA-FxX]+)[uUlL]*").FullMatch("0" #value, CRadix(&v))); \ |
| 162 | CHECK_EQ(v, 0 ## value); \ |
| 163 | } while(0) |
| 164 | |
| 165 | CHECK_OCTAL(short, 77777); |
| 166 | CHECK_OCTAL(unsigned short, 177777U); |
| 167 | CHECK_OCTAL(int, 17777777777); |
| 168 | CHECK_OCTAL(unsigned int, 37777777777U); |
| 169 | CHECK_OCTAL(long, 17777777777L); |
| 170 | CHECK_OCTAL(unsigned long, 37777777777UL); |
| 171 | #ifdef HAVE_LONG_LONG |
| 172 | CHECK_OCTAL(long long, 777777777777777777777LL); |
| 173 | #endif |
| 174 | #ifdef HAVE_UNSIGNED_LONG_LONG |
| 175 | CHECK_OCTAL(unsigned long long, 1777777777777777777777ULL); |
| 176 | #endif |
| 177 | |
| 178 | #undef CHECK_OCTAL |
| 179 | |
| 180 | printf("Testing decimal\n"); |
| 181 | |
| 182 | #define CHECK_DECIMAL(type, value) \ |
| 183 | do { \ |
| 184 | type v; \ |
| 185 | CHECK(RE("(-?[0-9]+)[uUlL]*").FullMatch(#value, &v)); \ |
| 186 | CHECK_EQ(v, value); \ |
| 187 | CHECK(RE("(-?[0-9a-fA-FxX]+)[uUlL]*").FullMatch(#value, CRadix(&v))); \ |
| 188 | CHECK_EQ(v, value); \ |
| 189 | } while(0) |
| 190 | |
| 191 | CHECK_DECIMAL(short, -1); |
| 192 | CHECK_DECIMAL(unsigned short, 9999); |
| 193 | CHECK_DECIMAL(int, -1000); |
| 194 | CHECK_DECIMAL(unsigned int, 12345U); |
| 195 | CHECK_DECIMAL(long, -10000000L); |
| 196 | CHECK_DECIMAL(unsigned long, 3083324652U); |
| 197 | #ifdef HAVE_LONG_LONG |
| 198 | CHECK_DECIMAL(long long, -100000000000000LL); |
| 199 | #endif |
| 200 | #ifdef HAVE_UNSIGNED_LONG_LONG |
| 201 | CHECK_DECIMAL(unsigned long long, 1234567890987654321ULL); |
| 202 | #endif |
| 203 | |
| 204 | #undef CHECK_DECIMAL |
| 205 | |
| 206 | } |
| 207 | |
| 208 | static void TestReplace() { |
| 209 | printf("Testing Replace\n"); |
| 210 | |
| 211 | struct ReplaceTest { |
| 212 | const char *regexp; |
| 213 | const char *rewrite; |
| 214 | const char *original; |
| 215 | const char *single; |
| 216 | const char *global; |
| 217 | int global_count; // the expected return value from ReplaceAll |
| 218 | }; |
| 219 | static const ReplaceTest tests[] = { |
| 220 | { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", |
| 221 | "\\2\\1ay", |
| 222 | "the quick brown fox jumps over the lazy dogs.", |
| 223 | "ethay quick brown fox jumps over the lazy dogs.", |
| 224 | "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", |
| 225 | 9 }, |
| 226 | { "\\w+", |
| 227 | "\\0-NOSPAM", |
| 228 | "paul.haahr@google.com", |
| 229 | "paul-NOSPAM.haahr@google.com", |
| 230 | "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", |
| 231 | 4 }, |
| 232 | { "^", |
| 233 | "(START)", |
| 234 | "foo", |
| 235 | "(START)foo", |
| 236 | "(START)foo", |
| 237 | 1 }, |
| 238 | { "^", |
| 239 | "(START)", |
| 240 | "", |
| 241 | "(START)", |
| 242 | "(START)", |
| 243 | 1 }, |
| 244 | { "$", |
| 245 | "(END)", |
| 246 | "", |
| 247 | "(END)", |
| 248 | "(END)", |
| 249 | 1 }, |
| 250 | { "b", |
| 251 | "bb", |
| 252 | "ababababab", |
| 253 | "abbabababab", |
| 254 | "abbabbabbabbabb", |
| 255 | 5 }, |
| 256 | { "b", |
| 257 | "bb", |
| 258 | "bbbbbb", |
| 259 | "bbbbbbb", |
| 260 | "bbbbbbbbbbbb", |
| 261 | 6 }, |
| 262 | { "b+", |
| 263 | "bb", |
| 264 | "bbbbbb", |
| 265 | "bb", |
| 266 | "bb", |
| 267 | 1 }, |
| 268 | { "b*", |
| 269 | "bb", |
| 270 | "bbbbbb", |
| 271 | "bb", |
| 272 | "bbbb", |
| 273 | 2 }, |
| 274 | { "b*", |
| 275 | "bb", |
| 276 | "aaaaa", |
| 277 | "bbaaaaa", |
| 278 | "bbabbabbabbabbabb", |
| 279 | 6 }, |
| 280 | { "b*", |
| 281 | "bb", |
| 282 | "aa\naa\n", |
| 283 | "bbaa\naa\n", |
| 284 | "bbabbabb\nbbabbabb\nbb", |
| 285 | 7 }, |
| 286 | { "b*", |
| 287 | "bb", |
| 288 | "aa\raa\r", |
| 289 | "bbaa\raa\r", |
| 290 | "bbabbabb\rbbabbabb\rbb", |
| 291 | 7 }, |
| 292 | { "b*", |
| 293 | "bb", |
| 294 | "aa\r\naa\r\n", |
| 295 | "bbaa\r\naa\r\n", |
| 296 | "bbabbabb\r\nbbabbabb\r\nbb", |
| 297 | 7 }, |
| 298 | // Check empty-string matching (it's tricky!) |
| 299 | { "aa|b*", |
| 300 | "@", |
| 301 | "aa", |
| 302 | "@", |
| 303 | "@@", |
| 304 | 2 }, |
| 305 | { "b*|aa", |
| 306 | "@", |
| 307 | "aa", |
| 308 | "@aa", |
| 309 | "@@@", |
| 310 | 3 }, |
| 311 | #ifdef SUPPORT_UTF8 |
| 312 | { "b*", |
| 313 | "bb", |
| 314 | "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 |
| 315 | "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", |
| 316 | "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", |
| 317 | 5 }, |
| 318 | { "b*", |
| 319 | "bb", |
| 320 | "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 |
| 321 | "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", |
| 322 | ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" |
| 323 | "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), |
| 324 | 9 }, |
| 325 | #endif |
| 326 | { "", NULL, NULL, NULL, NULL, 0 } |
| 327 | }; |
| 328 | |
| 329 | #ifdef SUPPORT_UTF8 |
| 330 | const bool support_utf8 = true; |
| 331 | #else |
| 332 | const bool support_utf8 = false; |
| 333 | #endif |
| 334 | |
| 335 | for (const ReplaceTest *t = tests; t->original != NULL; ++t) { |
| 336 | RE re(t->regexp, RE_Options(PCRE_NEWLINE_CRLF).set_utf8(support_utf8)); |
| 337 | assert(re.error().empty()); |
| 338 | string one(t->original); |
| 339 | CHECK(re.Replace(t->rewrite, &one)); |
| 340 | CHECK_EQ(one, t->single); |
| 341 | string all(t->original); |
| 342 | const int replace_count = re.GlobalReplace(t->rewrite, &all); |
| 343 | CHECK_EQ(all, t->global); |
| 344 | CHECK_EQ(replace_count, t->global_count); |
| 345 | } |
| 346 | |
| 347 | // One final test: test \r\n replacement when we're not in CRLF mode |
| 348 | { |
| 349 | RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); |
| 350 | assert(re.error().empty()); |
| 351 | string all("aa\r\naa\r\n"); |
| 352 | CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
| 353 | CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 354 | } |
| 355 | { |
| 356 | RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); |
| 357 | assert(re.error().empty()); |
| 358 | string all("aa\r\naa\r\n"); |
| 359 | CHECK_EQ(re.GlobalReplace("bb", &all), 9); |
| 360 | CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); |
| 361 | } |
| 362 | // TODO: test what happens when no PCRE_NEWLINE_* flag is set. |
| 363 | // Alas, the answer depends on how pcre was compiled. |
| 364 | } |
| 365 | |
| 366 | static void TestExtract() { |
| 367 | printf("Testing Extract\n"); |
| 368 | |
| 369 | string s; |
| 370 | |
| 371 | CHECK(RE("(.*)@([^.]*)").Extract("\\2!\\1", "boris@kremvax.ru", &s)); |
| 372 | CHECK_EQ(s, "kremvax!boris"); |
| 373 | |
| 374 | // check the RE interface as well |
| 375 | CHECK(RE(".*").Extract("'\\0'", "foo", &s)); |
| 376 | CHECK_EQ(s, "'foo'"); |
| 377 | CHECK(!RE("bar").Extract("'\\0'", "baz", &s)); |
| 378 | CHECK_EQ(s, "'foo'"); |
| 379 | } |
| 380 | |
| 381 | static void TestConsume() { |
| 382 | printf("Testing Consume\n"); |
| 383 | |
| 384 | string word; |
| 385 | |
| 386 | string s(" aaa b!@#$@#$cccc"); |
| 387 | StringPiece input(s); |
| 388 | |
| 389 | RE r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace |
| 390 | CHECK(r.Consume(&input, &word)); |
| 391 | CHECK_EQ(word, "aaa"); |
| 392 | CHECK(r.Consume(&input, &word)); |
| 393 | CHECK_EQ(word, "b"); |
| 394 | CHECK(! r.Consume(&input, &word)); |
| 395 | } |
| 396 | |
| 397 | static void TestFindAndConsume() { |
| 398 | printf("Testing FindAndConsume\n"); |
| 399 | |
| 400 | string word; |
| 401 | |
| 402 | string s(" aaa b!@#$@#$cccc"); |
| 403 | StringPiece input(s); |
| 404 | |
| 405 | RE r("(\\w+)"); // matches a word |
| 406 | CHECK(r.FindAndConsume(&input, &word)); |
| 407 | CHECK_EQ(word, "aaa"); |
| 408 | CHECK(r.FindAndConsume(&input, &word)); |
| 409 | CHECK_EQ(word, "b"); |
| 410 | CHECK(r.FindAndConsume(&input, &word)); |
| 411 | CHECK_EQ(word, "cccc"); |
| 412 | CHECK(! r.FindAndConsume(&input, &word)); |
| 413 | } |
| 414 | |
| 415 | static void TestMatchNumberPeculiarity() { |
| 416 | printf("Testing match-number peculiarity\n"); |
| 417 | |
| 418 | string word1; |
| 419 | string word2; |
| 420 | string word3; |
| 421 | |
| 422 | RE r("(foo)|(bar)|(baz)"); |
| 423 | CHECK(r.PartialMatch("foo", &word1, &word2, &word3)); |
| 424 | CHECK_EQ(word1, "foo"); |
| 425 | CHECK_EQ(word2, ""); |
| 426 | CHECK_EQ(word3, ""); |
| 427 | CHECK(r.PartialMatch("bar", &word1, &word2, &word3)); |
| 428 | CHECK_EQ(word1, ""); |
| 429 | CHECK_EQ(word2, "bar"); |
| 430 | CHECK_EQ(word3, ""); |
| 431 | CHECK(r.PartialMatch("baz", &word1, &word2, &word3)); |
| 432 | CHECK_EQ(word1, ""); |
| 433 | CHECK_EQ(word2, ""); |
| 434 | CHECK_EQ(word3, "baz"); |
| 435 | CHECK(!r.PartialMatch("f", &word1, &word2, &word3)); |
| 436 | |
| 437 | string a; |
| 438 | CHECK(RE("(foo)|hello").FullMatch("hello", &a)); |
| 439 | CHECK_EQ(a, ""); |
| 440 | } |
| 441 | |
| 442 | static void TestRecursion() { |
| 443 | printf("Testing recursion\n"); |
| 444 | |
| 445 | // Get one string that passes (sometimes), one that never does. |
| 446 | string text_good("abcdefghijk"); |
| 447 | string text_bad("acdefghijkl"); |
| 448 | |
| 449 | // According to pcretest, matching text_good against (\w+)*b |
| 450 | // requires match_limit of at least 8192, and match_recursion_limit |
| 451 | // of at least 37. |
| 452 | |
| 453 | RE_Options options_ml; |
| 454 | options_ml.set_match_limit(8192); |
| 455 | RE re("(\\w+)*b", options_ml); |
| 456 | CHECK(re.PartialMatch(text_good) == true); |
| 457 | CHECK(re.PartialMatch(text_bad) == false); |
| 458 | CHECK(re.FullMatch(text_good) == false); |
| 459 | CHECK(re.FullMatch(text_bad) == false); |
| 460 | |
| 461 | options_ml.set_match_limit(1024); |
| 462 | RE re2("(\\w+)*b", options_ml); |
| 463 | CHECK(re2.PartialMatch(text_good) == false); // because of match_limit |
| 464 | CHECK(re2.PartialMatch(text_bad) == false); |
| 465 | CHECK(re2.FullMatch(text_good) == false); |
| 466 | CHECK(re2.FullMatch(text_bad) == false); |
| 467 | |
| 468 | RE_Options options_mlr; |
| 469 | options_mlr.set_match_limit_recursion(50); |
| 470 | RE re3("(\\w+)*b", options_mlr); |
| 471 | CHECK(re3.PartialMatch(text_good) == true); |
| 472 | CHECK(re3.PartialMatch(text_bad) == false); |
| 473 | CHECK(re3.FullMatch(text_good) == false); |
| 474 | CHECK(re3.FullMatch(text_bad) == false); |
| 475 | |
| 476 | options_mlr.set_match_limit_recursion(10); |
| 477 | RE re4("(\\w+)*b", options_mlr); |
| 478 | CHECK(re4.PartialMatch(text_good) == false); |
| 479 | CHECK(re4.PartialMatch(text_bad) == false); |
| 480 | CHECK(re4.FullMatch(text_good) == false); |
| 481 | CHECK(re4.FullMatch(text_bad) == false); |
| 482 | } |
| 483 | |
| 484 | // A meta-quoted string, interpreted as a pattern, should always match |
| 485 | // the original unquoted string. |
| 486 | static void TestQuoteMeta(string unquoted, RE_Options options = RE_Options()) { |
| 487 | string quoted = RE::QuoteMeta(unquoted); |
| 488 | RE re(quoted, options); |
| 489 | CHECK(re.FullMatch(unquoted)); |
| 490 | } |
| 491 | |
| 492 | // A string containing meaningful regexp characters, which is then meta- |
| 493 | // quoted, should not generally match a string the unquoted string does. |
| 494 | static void NegativeTestQuoteMeta(string unquoted, string should_not_match, |
| 495 | RE_Options options = RE_Options()) { |
| 496 | string quoted = RE::QuoteMeta(unquoted); |
| 497 | RE re(quoted, options); |
| 498 | CHECK(!re.FullMatch(should_not_match)); |
| 499 | } |
| 500 | |
| 501 | // Tests that quoted meta characters match their original strings, |
| 502 | // and that a few things that shouldn't match indeed do not. |
| 503 | static void TestQuotaMetaSimple() { |
| 504 | TestQuoteMeta("foo"); |
| 505 | TestQuoteMeta("foo.bar"); |
| 506 | TestQuoteMeta("foo\\.bar"); |
| 507 | TestQuoteMeta("[1-9]"); |
| 508 | TestQuoteMeta("1.5-2.0?"); |
| 509 | TestQuoteMeta("\\d"); |
| 510 | TestQuoteMeta("Who doesn't like ice cream?"); |
| 511 | TestQuoteMeta("((a|b)c?d*e+[f-h]i)"); |
| 512 | TestQuoteMeta("((?!)xxx).*yyy"); |
| 513 | TestQuoteMeta("(["); |
| 514 | TestQuoteMeta(string("foo\0bar", 7)); |
| 515 | } |
| 516 | |
| 517 | static void TestQuoteMetaSimpleNegative() { |
| 518 | NegativeTestQuoteMeta("foo", "bar"); |
| 519 | NegativeTestQuoteMeta("...", "bar"); |
| 520 | NegativeTestQuoteMeta("\\.", "."); |
| 521 | NegativeTestQuoteMeta("\\.", ".."); |
| 522 | NegativeTestQuoteMeta("(a)", "a"); |
| 523 | NegativeTestQuoteMeta("(a|b)", "a"); |
| 524 | NegativeTestQuoteMeta("(a|b)", "(a)"); |
| 525 | NegativeTestQuoteMeta("(a|b)", "a|b"); |
| 526 | NegativeTestQuoteMeta("[0-9]", "0"); |
| 527 | NegativeTestQuoteMeta("[0-9]", "0-9"); |
| 528 | NegativeTestQuoteMeta("[0-9]", "[9]"); |
| 529 | NegativeTestQuoteMeta("((?!)xxx)", "xxx"); |
| 530 | } |
| 531 | |
| 532 | static void TestQuoteMetaLatin1() { |
| 533 | TestQuoteMeta("3\xb2 = 9"); |
| 534 | } |
| 535 | |
| 536 | static void TestQuoteMetaUtf8() { |
| 537 | #ifdef SUPPORT_UTF8 |
| 538 | TestQuoteMeta("Pl\xc3\xa1\x63ido Domingo", pcrecpp::UTF8()); |
| 539 | TestQuoteMeta("xyz", pcrecpp::UTF8()); // No fancy utf8 |
| 540 | TestQuoteMeta("\xc2\xb0", pcrecpp::UTF8()); // 2-byte utf8 (degree symbol) |
| 541 | TestQuoteMeta("27\xc2\xb0 degrees", pcrecpp::UTF8()); // As a middle character |
| 542 | TestQuoteMeta("\xe2\x80\xb3", pcrecpp::UTF8()); // 3-byte utf8 (double prime) |
| 543 | TestQuoteMeta("\xf0\x9d\x85\x9f", pcrecpp::UTF8()); // 4-byte utf8 (music note) |
| 544 | TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, but should still work |
| 545 | NegativeTestQuoteMeta("27\xc2\xb0", // 2-byte utf (degree symbol) |
| 546 | "27\\\xc2\\\xb0", |
| 547 | pcrecpp::UTF8()); |
| 548 | #endif |
| 549 | } |
| 550 | |
| 551 | static void TestQuoteMetaAll() { |
| 552 | printf("Testing QuoteMeta\n"); |
| 553 | TestQuotaMetaSimple(); |
| 554 | TestQuoteMetaSimpleNegative(); |
| 555 | TestQuoteMetaLatin1(); |
| 556 | TestQuoteMetaUtf8(); |
| 557 | } |
| 558 | |
| 559 | // |
| 560 | // Options tests contributed by |
| 561 | // Giuseppe Maxia, CTO, Stardata s.r.l. |
| 562 | // July 2005 |
| 563 | // |
| 564 | static void GetOneOptionResult( |
| 565 | const char *option_name, |
| 566 | const char *regex, |
| 567 | const char *str, |
| 568 | RE_Options options, |
| 569 | bool full, |
| 570 | string expected) { |
| 571 | |
| 572 | printf("Testing Option <%s>\n", option_name); |
| 573 | if(VERBOSE_TEST) |
| 574 | printf("/%s/ finds \"%s\" within \"%s\" \n", |
| 575 | regex, |
| 576 | expected.c_str(), |
| 577 | str); |
| 578 | string captured(""); |
| 579 | if (full) |
| 580 | RE(regex,options).FullMatch(str, &captured); |
| 581 | else |
| 582 | RE(regex,options).PartialMatch(str, &captured); |
| 583 | CHECK_EQ(captured, expected); |
| 584 | } |
| 585 | |
| 586 | static void TestOneOption( |
| 587 | const char *option_name, |
| 588 | const char *regex, |
| 589 | const char *str, |
| 590 | RE_Options options, |
| 591 | bool full, |
| 592 | bool assertive = true) { |
| 593 | |
| 594 | printf("Testing Option <%s>\n", option_name); |
| 595 | if (VERBOSE_TEST) |
| 596 | printf("'%s' %s /%s/ \n", |
| 597 | str, |
| 598 | (assertive? "matches" : "doesn't match"), |
| 599 | regex); |
| 600 | if (assertive) { |
| 601 | if (full) |
| 602 | CHECK(RE(regex,options).FullMatch(str)); |
| 603 | else |
| 604 | CHECK(RE(regex,options).PartialMatch(str)); |
| 605 | } else { |
| 606 | if (full) |
| 607 | CHECK(!RE(regex,options).FullMatch(str)); |
| 608 | else |
| 609 | CHECK(!RE(regex,options).PartialMatch(str)); |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | static void Test_CASELESS() { |
| 614 | RE_Options options; |
| 615 | RE_Options options2; |
| 616 | |
| 617 | options.set_caseless(true); |
| 618 | TestOneOption("CASELESS (class)", "HELLO", "hello", options, false); |
| 619 | TestOneOption("CASELESS (class2)", "HELLO", "hello", options2.set_caseless(true), false); |
| 620 | TestOneOption("CASELESS (class)", "^[A-Z]+$", "Hello", options, false); |
| 621 | |
| 622 | TestOneOption("CASELESS (function)", "HELLO", "hello", pcrecpp::CASELESS(), false); |
| 623 | TestOneOption("CASELESS (function)", "^[A-Z]+$", "Hello", pcrecpp::CASELESS(), false); |
| 624 | options.set_caseless(false); |
| 625 | TestOneOption("no CASELESS", "HELLO", "hello", options, false, false); |
| 626 | } |
| 627 | |
| 628 | static void Test_MULTILINE() { |
| 629 | RE_Options options; |
| 630 | RE_Options options2; |
| 631 | const char *str = "HELLO\n" "cruel\n" "world\n"; |
| 632 | |
| 633 | options.set_multiline(true); |
| 634 | TestOneOption("MULTILINE (class)", "^cruel$", str, options, false); |
| 635 | TestOneOption("MULTILINE (class2)", "^cruel$", str, options2.set_multiline(true), false); |
| 636 | TestOneOption("MULTILINE (function)", "^cruel$", str, pcrecpp::MULTILINE(), false); |
| 637 | options.set_multiline(false); |
| 638 | TestOneOption("no MULTILINE", "^cruel$", str, options, false, false); |
| 639 | } |
| 640 | |
| 641 | static void Test_DOTALL() { |
| 642 | RE_Options options; |
| 643 | RE_Options options2; |
| 644 | const char *str = "HELLO\n" "cruel\n" "world"; |
| 645 | |
| 646 | options.set_dotall(true); |
| 647 | TestOneOption("DOTALL (class)", "HELLO.*world", str, options, true); |
| 648 | TestOneOption("DOTALL (class2)", "HELLO.*world", str, options2.set_dotall(true), true); |
| 649 | TestOneOption("DOTALL (function)", "HELLO.*world", str, pcrecpp::DOTALL(), true); |
| 650 | options.set_dotall(false); |
| 651 | TestOneOption("no DOTALL", "HELLO.*world", str, options, true, false); |
| 652 | } |
| 653 | |
| 654 | static void Test_DOLLAR_ENDONLY() { |
| 655 | RE_Options options; |
| 656 | RE_Options options2; |
| 657 | const char *str = "HELLO world\n"; |
| 658 | |
| 659 | TestOneOption("no DOLLAR_ENDONLY", "world$", str, options, false); |
| 660 | options.set_dollar_endonly(true); |
| 661 | TestOneOption("DOLLAR_ENDONLY 1", "world$", str, options, false, false); |
| 662 | TestOneOption("DOLLAR_ENDONLY 2", "world$", str, options2.set_dollar_endonly(true), false, false); |
| 663 | } |
| 664 | |
| 665 | static void Test_EXTRA() { |
| 666 | RE_Options options; |
| 667 | const char *str = "HELLO"; |
| 668 | |
| 669 | options.set_extra(true); |
| 670 | TestOneOption("EXTRA 1", "\\HELL\\O", str, options, true, false ); |
| 671 | TestOneOption("EXTRA 2", "\\HELL\\O", str, RE_Options().set_extra(true), true, false ); |
| 672 | options.set_extra(false); |
| 673 | TestOneOption("no EXTRA", "\\HELL\\O", str, options, true ); |
| 674 | } |
| 675 | |
| 676 | static void Test_EXTENDED() { |
| 677 | RE_Options options; |
| 678 | RE_Options options2; |
| 679 | const char *str = "HELLO world"; |
| 680 | |
| 681 | options.set_extended(true); |
| 682 | TestOneOption("EXTENDED (class)", "HELLO world", str, options, false, false); |
| 683 | TestOneOption("EXTENDED (class2)", "HELLO world", str, options2.set_extended(true), false, false); |
| 684 | TestOneOption("EXTENDED (class)", |
| 685 | "^ HE L{2} O " |
| 686 | "\\s+ " |
| 687 | "\\w+ $ ", |
| 688 | str, |
| 689 | options, |
| 690 | false); |
| 691 | |
| 692 | TestOneOption("EXTENDED (function)", "HELLO world", str, pcrecpp::EXTENDED(), false, false); |
| 693 | TestOneOption("EXTENDED (function)", |
| 694 | "^ HE L{2} O " |
| 695 | "\\s+ " |
| 696 | "\\w+ $ ", |
| 697 | str, |
| 698 | pcrecpp::EXTENDED(), |
| 699 | false); |
| 700 | |
| 701 | options.set_extended(false); |
| 702 | TestOneOption("no EXTENDED", "HELLO world", str, options, false); |
| 703 | } |
| 704 | |
| 705 | static void Test_NO_AUTO_CAPTURE() { |
| 706 | RE_Options options; |
| 707 | const char *str = "HELLO world"; |
| 708 | string captured; |
| 709 | |
| 710 | printf("Testing Option <no NO_AUTO_CAPTURE>\n"); |
| 711 | if (VERBOSE_TEST) |
| 712 | printf("parentheses capture text\n"); |
| 713 | RE re("(world|universe)$", options); |
| 714 | CHECK(re.Extract("\\1", str , &captured)); |
| 715 | CHECK_EQ(captured, "world"); |
| 716 | options.set_no_auto_capture(true); |
| 717 | printf("testing Option <NO_AUTO_CAPTURE>\n"); |
| 718 | if (VERBOSE_TEST) |
| 719 | printf("parentheses do not capture text\n"); |
| 720 | re.Extract("\\1",str, &captured ); |
| 721 | CHECK_EQ(captured, "world"); |
| 722 | } |
| 723 | |
| 724 | static void Test_UNGREEDY() { |
| 725 | RE_Options options; |
| 726 | const char *str = "HELLO, 'this' is the 'world'"; |
| 727 | |
| 728 | options.set_ungreedy(true); |
| 729 | GetOneOptionResult("UNGREEDY 1", "('.*')", str, options, false, "'this'" ); |
| 730 | GetOneOptionResult("UNGREEDY 2", "('.*')", str, RE_Options().set_ungreedy(true), false, "'this'" ); |
| 731 | GetOneOptionResult("UNGREEDY", "('.*?')", str, options, false, "'this' is the 'world'" ); |
| 732 | |
| 733 | options.set_ungreedy(false); |
| 734 | GetOneOptionResult("no UNGREEDY", "('.*')", str, options, false, "'this' is the 'world'" ); |
| 735 | GetOneOptionResult("no UNGREEDY", "('.*?')", str, options, false, "'this'" ); |
| 736 | } |
| 737 | |
| 738 | static void Test_all_options() { |
| 739 | const char *str = "HELLO\n" "cruel\n" "world"; |
| 740 | RE_Options options; |
| 741 | options.set_all_options(PCRE_CASELESS | PCRE_DOTALL); |
| 742 | |
| 743 | TestOneOption("all_options (CASELESS|DOTALL)", "^hello.*WORLD", str , options, false); |
| 744 | options.set_all_options(0); |
| 745 | TestOneOption("all_options (0)", "^hello.*WORLD", str , options, false, false); |
| 746 | options.set_all_options(PCRE_MULTILINE | PCRE_EXTENDED); |
| 747 | |
| 748 | TestOneOption("all_options (MULTILINE|EXTENDED)", " ^ c r u e l $ ", str, options, false); |
| 749 | TestOneOption("all_options (MULTILINE|EXTENDED) with constructor", |
| 750 | " ^ c r u e l $ ", |
| 751 | str, |
| 752 | RE_Options(PCRE_MULTILINE | PCRE_EXTENDED), |
| 753 | false); |
| 754 | |
| 755 | TestOneOption("all_options (MULTILINE|EXTENDED) with concatenation", |
| 756 | " ^ c r u e l $ ", |
| 757 | str, |
| 758 | RE_Options() |
| 759 | .set_multiline(true) |
| 760 | .set_extended(true), |
| 761 | false); |
| 762 | |
| 763 | options.set_all_options(0); |
| 764 | TestOneOption("all_options (0)", "^ c r u e l $", str, options, false, false); |
| 765 | |
| 766 | } |
| 767 | |
| 768 | static void TestOptions() { |
| 769 | printf("Testing Options\n"); |
| 770 | Test_CASELESS(); |
| 771 | Test_MULTILINE(); |
| 772 | Test_DOTALL(); |
| 773 | Test_DOLLAR_ENDONLY(); |
| 774 | Test_EXTENDED(); |
| 775 | Test_NO_AUTO_CAPTURE(); |
| 776 | Test_UNGREEDY(); |
| 777 | Test_EXTRA(); |
| 778 | Test_all_options(); |
| 779 | } |
| 780 | |
| 781 | static void TestConstructors() { |
| 782 | printf("Testing constructors\n"); |
| 783 | |
| 784 | RE_Options options; |
| 785 | options.set_dotall(true); |
| 786 | const char *str = "HELLO\n" "cruel\n" "world"; |
| 787 | |
| 788 | RE orig("HELLO.*world", options); |
| 789 | CHECK(orig.FullMatch(str)); |
| 790 | |
| 791 | RE copy1(orig); |
| 792 | CHECK(copy1.FullMatch(str)); |
| 793 | |
| 794 | RE copy2("not a match"); |
| 795 | CHECK(!copy2.FullMatch(str)); |
| 796 | copy2 = copy1; |
| 797 | CHECK(copy2.FullMatch(str)); |
| 798 | copy2 = orig; |
| 799 | CHECK(copy2.FullMatch(str)); |
| 800 | |
| 801 | // Make sure when we assign to ourselves, nothing bad happens |
| 802 | orig = orig; |
| 803 | copy1 = copy1; |
| 804 | copy2 = copy2; |
| 805 | CHECK(orig.FullMatch(str)); |
| 806 | CHECK(copy1.FullMatch(str)); |
| 807 | CHECK(copy2.FullMatch(str)); |
| 808 | } |
| 809 | |
| 810 | int main(int argc, char** argv) { |
| 811 | // Treat any flag as --help |
| 812 | if (argc > 1 && argv[1][0] == '-') { |
| 813 | printf("Usage: %s [timing1|timing2|timing3 num-iters]\n" |
| 814 | " If 'timingX ###' is specified, run the given timing test\n" |
| 815 | " with the given number of iterations, rather than running\n" |
| 816 | " the default corectness test.\n", argv[0]); |
| 817 | return 0; |
| 818 | } |
| 819 | |
| 820 | if (argc > 1) { |
| 821 | if ( argc == 2 || atoi(argv[2]) == 0) { |
| 822 | printf("timing mode needs a num-iters argument\n"); |
| 823 | return 1; |
| 824 | } |
| 825 | if (!strcmp(argv[1], "timing1")) |
| 826 | Timing1(atoi(argv[2])); |
| 827 | else if (!strcmp(argv[1], "timing2")) |
| 828 | Timing2(atoi(argv[2])); |
| 829 | else if (!strcmp(argv[1], "timing3")) |
| 830 | Timing3(atoi(argv[2])); |
| 831 | else |
| 832 | printf("Unknown argument '%s'\n", argv[1]); |
| 833 | return 0; |
| 834 | } |
| 835 | |
| 836 | printf("PCRE C++ wrapper tests\n"); |
| 837 | printf("Testing FullMatch\n"); |
| 838 | |
| 839 | int i; |
| 840 | string s; |
| 841 | |
| 842 | /***** FullMatch with no args *****/ |
| 843 | |
| 844 | CHECK(RE("h.*o").FullMatch("hello")); |
| 845 | CHECK(!RE("h.*o").FullMatch("othello")); // Must be anchored at front |
| 846 | CHECK(!RE("h.*o").FullMatch("hello!")); // Must be anchored at end |
| 847 | CHECK(RE("a*").FullMatch("aaaa")); // Fullmatch with normal op |
| 848 | CHECK(RE("a*?").FullMatch("aaaa")); // Fullmatch with nongreedy op |
| 849 | CHECK(RE("a*?\\z").FullMatch("aaaa")); // Two unusual ops |
| 850 | |
| 851 | /***** FullMatch with args *****/ |
| 852 | |
| 853 | // Zero-arg |
| 854 | CHECK(RE("\\d+").FullMatch("1001")); |
| 855 | |
| 856 | // Single-arg |
| 857 | CHECK(RE("(\\d+)").FullMatch("1001", &i)); |
| 858 | CHECK_EQ(i, 1001); |
| 859 | CHECK(RE("(-?\\d+)").FullMatch("-123", &i)); |
| 860 | CHECK_EQ(i, -123); |
| 861 | CHECK(!RE("()\\d+").FullMatch("10", &i)); |
| 862 | CHECK(!RE("(\\d+)").FullMatch("1234567890123456789012345678901234567890", |
| 863 | &i)); |
| 864 | |
| 865 | // Digits surrounding integer-arg |
| 866 | CHECK(RE("1(\\d*)4").FullMatch("1234", &i)); |
| 867 | CHECK_EQ(i, 23); |
| 868 | CHECK(RE("(\\d)\\d+").FullMatch("1234", &i)); |
| 869 | CHECK_EQ(i, 1); |
| 870 | CHECK(RE("(-\\d)\\d+").FullMatch("-1234", &i)); |
| 871 | CHECK_EQ(i, -1); |
| 872 | CHECK(RE("(\\d)").PartialMatch("1234", &i)); |
| 873 | CHECK_EQ(i, 1); |
| 874 | CHECK(RE("(-\\d)").PartialMatch("-1234", &i)); |
| 875 | CHECK_EQ(i, -1); |
| 876 | |
| 877 | // String-arg |
| 878 | CHECK(RE("h(.*)o").FullMatch("hello", &s)); |
| 879 | CHECK_EQ(s, string("ell")); |
| 880 | |
| 881 | // StringPiece-arg |
| 882 | StringPiece sp; |
| 883 | CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &sp, &i)); |
| 884 | CHECK_EQ(sp.size(), 4); |
| 885 | CHECK(memcmp(sp.data(), "ruby", 4) == 0); |
| 886 | CHECK_EQ(i, 1234); |
| 887 | |
| 888 | // Multi-arg |
| 889 | CHECK(RE("(\\w+):(\\d+)").FullMatch("ruby:1234", &s, &i)); |
| 890 | CHECK_EQ(s, string("ruby")); |
| 891 | CHECK_EQ(i, 1234); |
| 892 | |
| 893 | // Ignore non-void* NULL arg |
| 894 | CHECK(RE("he(.*)lo").FullMatch("hello", (char*)NULL)); |
| 895 | CHECK(RE("h(.*)o").FullMatch("hello", (string*)NULL)); |
| 896 | CHECK(RE("h(.*)o").FullMatch("hello", (StringPiece*)NULL)); |
| 897 | CHECK(RE("(.*)").FullMatch("1234", (int*)NULL)); |
| 898 | #ifdef HAVE_LONG_LONG |
| 899 | CHECK(RE("(.*)").FullMatch("1234567890123456", (long long*)NULL)); |
| 900 | #endif |
| 901 | CHECK(RE("(.*)").FullMatch("123.4567890123456", (double*)NULL)); |
| 902 | CHECK(RE("(.*)").FullMatch("123.4567890123456", (float*)NULL)); |
| 903 | |
| 904 | // Fail on non-void* NULL arg if the match doesn't parse for the given type. |
| 905 | CHECK(!RE("h(.*)lo").FullMatch("hello", &s, (char*)NULL)); |
| 906 | CHECK(!RE("(.*)").FullMatch("hello", (int*)NULL)); |
| 907 | CHECK(!RE("(.*)").FullMatch("1234567890123456", (int*)NULL)); |
| 908 | CHECK(!RE("(.*)").FullMatch("hello", (double*)NULL)); |
| 909 | CHECK(!RE("(.*)").FullMatch("hello", (float*)NULL)); |
| 910 | |
| 911 | // Ignored arg |
| 912 | CHECK(RE("(\\w+)(:)(\\d+)").FullMatch("ruby:1234", &s, (void*)NULL, &i)); |
| 913 | CHECK_EQ(s, string("ruby")); |
| 914 | CHECK_EQ(i, 1234); |
| 915 | |
| 916 | // Type tests |
| 917 | { |
| 918 | char c; |
| 919 | CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
| 920 | CHECK_EQ(c, 'H'); |
| 921 | } |
| 922 | { |
| 923 | unsigned char c; |
| 924 | CHECK(RE("(H)ello").FullMatch("Hello", &c)); |
| 925 | CHECK_EQ(c, static_cast<unsigned char>('H')); |
| 926 | } |
| 927 | { |
| 928 | short v; |
| 929 | CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 930 | CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
| 931 | CHECK(RE("(-?\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
| 932 | CHECK(RE("(-?\\d+)").FullMatch("-32768", &v)); CHECK_EQ(v, -32768); |
| 933 | CHECK(!RE("(-?\\d+)").FullMatch("-32769", &v)); |
| 934 | CHECK(!RE("(-?\\d+)").FullMatch("32768", &v)); |
| 935 | } |
| 936 | { |
| 937 | unsigned short v; |
| 938 | CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 939 | CHECK(RE("(\\d+)").FullMatch("32767", &v)); CHECK_EQ(v, 32767); |
| 940 | CHECK(RE("(\\d+)").FullMatch("65535", &v)); CHECK_EQ(v, 65535); |
| 941 | CHECK(!RE("(\\d+)").FullMatch("65536", &v)); |
| 942 | } |
| 943 | { |
| 944 | int v; |
| 945 | static const int max_value = 0x7fffffff; |
| 946 | static const int min_value = -max_value - 1; |
| 947 | CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 948 | CHECK(RE("(-?\\d+)").FullMatch("-100", &v)); CHECK_EQ(v, -100); |
| 949 | CHECK(RE("(-?\\d+)").FullMatch("2147483647", &v)); CHECK_EQ(v, max_value); |
| 950 | CHECK(RE("(-?\\d+)").FullMatch("-2147483648", &v)); CHECK_EQ(v, min_value); |
| 951 | CHECK(!RE("(-?\\d+)").FullMatch("-2147483649", &v)); |
| 952 | CHECK(!RE("(-?\\d+)").FullMatch("2147483648", &v)); |
| 953 | } |
| 954 | { |
| 955 | unsigned int v; |
| 956 | static const unsigned int max_value = 0xfffffffful; |
| 957 | CHECK(RE("(\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 958 | CHECK(RE("(\\d+)").FullMatch("4294967295", &v)); CHECK_EQ(v, max_value); |
| 959 | CHECK(!RE("(\\d+)").FullMatch("4294967296", &v)); |
| 960 | } |
| 961 | #ifdef HAVE_LONG_LONG |
| 962 | # if defined(__MINGW__) || defined(__MINGW32__) |
| 963 | # define LLD "%I64d" |
| 964 | # define LLU "%I64u" |
| 965 | # else |
| 966 | # define LLD "%lld" |
| 967 | # define LLU "%llu" |
| 968 | # endif |
| 969 | { |
| 970 | long long v; |
| 971 | static const long long max_value = 0x7fffffffffffffffLL; |
| 972 | static const long long min_value = -max_value - 1; |
| 973 | char buf[32]; // definitely big enough for a long long |
| 974 | |
| 975 | CHECK(RE("(-?\\d+)").FullMatch("100", &v)); CHECK_EQ(v, 100); |
| 976 | CHECK(RE("(-?\\d+)").FullMatch("-100",&v)); CHECK_EQ(v, -100); |
| 977 | |
| 978 | sprintf(buf, LLD, max_value); |
| 979 | CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 980 | |
| 981 | sprintf(buf, LLD, min_value); |
| 982 | CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, min_value); |
| 983 | |
| 984 | sprintf(buf, LLD, max_value); |
| 985 | assert(buf[strlen(buf)-1] != '9'); |
| 986 | buf[strlen(buf)-1]++; |
| 987 | CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 988 | |
| 989 | sprintf(buf, LLD, min_value); |
| 990 | assert(buf[strlen(buf)-1] != '9'); |
| 991 | buf[strlen(buf)-1]++; |
| 992 | CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 993 | } |
| 994 | #endif |
| 995 | #if defined HAVE_UNSIGNED_LONG_LONG && defined HAVE_LONG_LONG |
| 996 | { |
| 997 | unsigned long long v; |
| 998 | long long v2; |
| 999 | static const unsigned long long max_value = 0xffffffffffffffffULL; |
| 1000 | char buf[32]; // definitely big enough for a unsigned long long |
| 1001 | |
| 1002 | CHECK(RE("(-?\\d+)").FullMatch("100",&v)); CHECK_EQ(v, 100); |
| 1003 | CHECK(RE("(-?\\d+)").FullMatch("-100",&v2)); CHECK_EQ(v2, -100); |
| 1004 | |
| 1005 | sprintf(buf, LLU, max_value); |
| 1006 | CHECK(RE("(-?\\d+)").FullMatch(buf,&v)); CHECK_EQ(v, max_value); |
| 1007 | |
| 1008 | assert(buf[strlen(buf)-1] != '9'); |
| 1009 | buf[strlen(buf)-1]++; |
| 1010 | CHECK(!RE("(-?\\d+)").FullMatch(buf, &v)); |
| 1011 | } |
| 1012 | #endif |
| 1013 | { |
| 1014 | float v; |
| 1015 | CHECK(RE("(.*)").FullMatch("100", &v)); |
| 1016 | CHECK(RE("(.*)").FullMatch("-100.", &v)); |
| 1017 | CHECK(RE("(.*)").FullMatch("1e23", &v)); |
| 1018 | } |
| 1019 | { |
| 1020 | double v; |
| 1021 | CHECK(RE("(.*)").FullMatch("100", &v)); |
| 1022 | CHECK(RE("(.*)").FullMatch("-100.", &v)); |
| 1023 | CHECK(RE("(.*)").FullMatch("1e23", &v)); |
| 1024 | } |
| 1025 | |
| 1026 | // Check that matching is fully anchored |
| 1027 | CHECK(!RE("(\\d+)").FullMatch("x1001", &i)); |
| 1028 | CHECK(!RE("(\\d+)").FullMatch("1001x", &i)); |
| 1029 | CHECK(RE("x(\\d+)").FullMatch("x1001", &i)); CHECK_EQ(i, 1001); |
| 1030 | CHECK(RE("(\\d+)x").FullMatch("1001x", &i)); CHECK_EQ(i, 1001); |
| 1031 | |
| 1032 | // Braces |
| 1033 | CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcd")); |
| 1034 | CHECK(RE("[0-9a-f+.-]{5,}").FullMatch("0abcde")); |
| 1035 | CHECK(!RE("[0-9a-f+.-]{5,}").FullMatch("0abc")); |
| 1036 | |
| 1037 | // Complicated RE |
| 1038 | CHECK(RE("foo|bar|[A-Z]").FullMatch("foo")); |
| 1039 | CHECK(RE("foo|bar|[A-Z]").FullMatch("bar")); |
| 1040 | CHECK(RE("foo|bar|[A-Z]").FullMatch("X")); |
| 1041 | CHECK(!RE("foo|bar|[A-Z]").FullMatch("XY")); |
| 1042 | |
| 1043 | // Check full-match handling (needs '$' tacked on internally) |
| 1044 | CHECK(RE("fo|foo").FullMatch("fo")); |
| 1045 | CHECK(RE("fo|foo").FullMatch("foo")); |
| 1046 | CHECK(RE("fo|foo$").FullMatch("fo")); |
| 1047 | CHECK(RE("fo|foo$").FullMatch("foo")); |
| 1048 | CHECK(RE("foo$").FullMatch("foo")); |
| 1049 | CHECK(!RE("foo\\$").FullMatch("foo$bar")); |
| 1050 | CHECK(!RE("fo|bar").FullMatch("fox")); |
| 1051 | |
| 1052 | // Uncomment the following if we change the handling of '$' to |
| 1053 | // prevent it from matching a trailing newline |
| 1054 | if (false) { |
| 1055 | // Check that we don't get bitten by pcre's special handling of a |
| 1056 | // '\n' at the end of the string matching '$' |
| 1057 | CHECK(!RE("foo$").PartialMatch("foo\n")); |
| 1058 | } |
| 1059 | |
| 1060 | // Number of args |
| 1061 | int a[16]; |
| 1062 | CHECK(RE("").FullMatch("")); |
| 1063 | |
| 1064 | memset(a, 0, sizeof(0)); |
| 1065 | CHECK(RE("(\\d){1}").FullMatch("1", |
| 1066 | &a[0])); |
| 1067 | CHECK_EQ(a[0], 1); |
| 1068 | |
| 1069 | memset(a, 0, sizeof(0)); |
| 1070 | CHECK(RE("(\\d)(\\d)").FullMatch("12", |
| 1071 | &a[0], &a[1])); |
| 1072 | CHECK_EQ(a[0], 1); |
| 1073 | CHECK_EQ(a[1], 2); |
| 1074 | |
| 1075 | memset(a, 0, sizeof(0)); |
| 1076 | CHECK(RE("(\\d)(\\d)(\\d)").FullMatch("123", |
| 1077 | &a[0], &a[1], &a[2])); |
| 1078 | CHECK_EQ(a[0], 1); |
| 1079 | CHECK_EQ(a[1], 2); |
| 1080 | CHECK_EQ(a[2], 3); |
| 1081 | |
| 1082 | memset(a, 0, sizeof(0)); |
| 1083 | CHECK(RE("(\\d)(\\d)(\\d)(\\d)").FullMatch("1234", |
| 1084 | &a[0], &a[1], &a[2], &a[3])); |
| 1085 | CHECK_EQ(a[0], 1); |
| 1086 | CHECK_EQ(a[1], 2); |
| 1087 | CHECK_EQ(a[2], 3); |
| 1088 | CHECK_EQ(a[3], 4); |
| 1089 | |
| 1090 | memset(a, 0, sizeof(0)); |
| 1091 | CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("12345", |
| 1092 | &a[0], &a[1], &a[2], |
| 1093 | &a[3], &a[4])); |
| 1094 | CHECK_EQ(a[0], 1); |
| 1095 | CHECK_EQ(a[1], 2); |
| 1096 | CHECK_EQ(a[2], 3); |
| 1097 | CHECK_EQ(a[3], 4); |
| 1098 | CHECK_EQ(a[4], 5); |
| 1099 | |
| 1100 | memset(a, 0, sizeof(0)); |
| 1101 | CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("123456", |
| 1102 | &a[0], &a[1], &a[2], |
| 1103 | &a[3], &a[4], &a[5])); |
| 1104 | CHECK_EQ(a[0], 1); |
| 1105 | CHECK_EQ(a[1], 2); |
| 1106 | CHECK_EQ(a[2], 3); |
| 1107 | CHECK_EQ(a[3], 4); |
| 1108 | CHECK_EQ(a[4], 5); |
| 1109 | CHECK_EQ(a[5], 6); |
| 1110 | |
| 1111 | memset(a, 0, sizeof(0)); |
| 1112 | CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch("1234567", |
| 1113 | &a[0], &a[1], &a[2], &a[3], |
| 1114 | &a[4], &a[5], &a[6])); |
| 1115 | CHECK_EQ(a[0], 1); |
| 1116 | CHECK_EQ(a[1], 2); |
| 1117 | CHECK_EQ(a[2], 3); |
| 1118 | CHECK_EQ(a[3], 4); |
| 1119 | CHECK_EQ(a[4], 5); |
| 1120 | CHECK_EQ(a[5], 6); |
| 1121 | CHECK_EQ(a[6], 7); |
| 1122 | |
| 1123 | memset(a, 0, sizeof(0)); |
| 1124 | CHECK(RE("(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)" |
| 1125 | "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)").FullMatch( |
| 1126 | "1234567890123456", |
| 1127 | &a[0], &a[1], &a[2], &a[3], |
| 1128 | &a[4], &a[5], &a[6], &a[7], |
| 1129 | &a[8], &a[9], &a[10], &a[11], |
| 1130 | &a[12], &a[13], &a[14], &a[15])); |
| 1131 | CHECK_EQ(a[0], 1); |
| 1132 | CHECK_EQ(a[1], 2); |
| 1133 | CHECK_EQ(a[2], 3); |
| 1134 | CHECK_EQ(a[3], 4); |
| 1135 | CHECK_EQ(a[4], 5); |
| 1136 | CHECK_EQ(a[5], 6); |
| 1137 | CHECK_EQ(a[6], 7); |
| 1138 | CHECK_EQ(a[7], 8); |
| 1139 | CHECK_EQ(a[8], 9); |
| 1140 | CHECK_EQ(a[9], 0); |
| 1141 | CHECK_EQ(a[10], 1); |
| 1142 | CHECK_EQ(a[11], 2); |
| 1143 | CHECK_EQ(a[12], 3); |
| 1144 | CHECK_EQ(a[13], 4); |
| 1145 | CHECK_EQ(a[14], 5); |
| 1146 | CHECK_EQ(a[15], 6); |
| 1147 | |
| 1148 | /***** PartialMatch *****/ |
| 1149 | |
| 1150 | printf("Testing PartialMatch\n"); |
| 1151 | |
| 1152 | CHECK(RE("h.*o").PartialMatch("hello")); |
| 1153 | CHECK(RE("h.*o").PartialMatch("othello")); |
| 1154 | CHECK(RE("h.*o").PartialMatch("hello!")); |
| 1155 | CHECK(RE("((((((((((((((((((((x))))))))))))))))))))").PartialMatch("x")); |
| 1156 | |
| 1157 | /***** other tests *****/ |
| 1158 | |
| 1159 | RadixTests(); |
| 1160 | TestReplace(); |
| 1161 | TestExtract(); |
| 1162 | TestConsume(); |
| 1163 | TestFindAndConsume(); |
| 1164 | TestQuoteMetaAll(); |
| 1165 | TestMatchNumberPeculiarity(); |
| 1166 | |
| 1167 | // Check the pattern() accessor |
| 1168 | { |
| 1169 | const string kPattern = "http://([^/]+)/.*"; |
| 1170 | const RE re(kPattern); |
| 1171 | CHECK_EQ(kPattern, re.pattern()); |
| 1172 | } |
| 1173 | |
| 1174 | // Check RE error field. |
| 1175 | { |
| 1176 | RE re("foo"); |
| 1177 | CHECK(re.error().empty()); // Must have no error |
| 1178 | } |
| 1179 | |
| 1180 | #ifdef SUPPORT_UTF8 |
| 1181 | // Check UTF-8 handling |
| 1182 | { |
| 1183 | printf("Testing UTF-8 handling\n"); |
| 1184 | |
| 1185 | // Three Japanese characters (nihongo) |
| 1186 | const unsigned char utf8_string[] = { |
| 1187 | 0xe6, 0x97, 0xa5, // 65e5 |
| 1188 | 0xe6, 0x9c, 0xac, // 627c |
| 1189 | 0xe8, 0xaa, 0x9e, // 8a9e |
| 1190 | 0 |
| 1191 | }; |
| 1192 | const unsigned char utf8_pattern[] = { |
| 1193 | '.', |
| 1194 | 0xe6, 0x9c, 0xac, // 627c |
| 1195 | '.', |
| 1196 | 0 |
| 1197 | }; |
| 1198 | |
| 1199 | // Both should match in either mode, bytes or UTF-8 |
| 1200 | RE re_test1("........."); |
| 1201 | CHECK(re_test1.FullMatch(utf8_string)); |
| 1202 | RE re_test2("...", pcrecpp::UTF8()); |
| 1203 | CHECK(re_test2.FullMatch(utf8_string)); |
| 1204 | |
| 1205 | // Check that '.' matches one byte or UTF-8 character |
| 1206 | // according to the mode. |
| 1207 | string ss; |
| 1208 | RE re_test3("(.)"); |
| 1209 | CHECK(re_test3.PartialMatch(utf8_string, &ss)); |
| 1210 | CHECK_EQ(ss, string("\xe6")); |
| 1211 | RE re_test4("(.)", pcrecpp::UTF8()); |
| 1212 | CHECK(re_test4.PartialMatch(utf8_string, &ss)); |
| 1213 | CHECK_EQ(ss, string("\xe6\x97\xa5")); |
| 1214 | |
| 1215 | // Check that string matches itself in either mode |
| 1216 | RE re_test5(utf8_string); |
| 1217 | CHECK(re_test5.FullMatch(utf8_string)); |
| 1218 | RE re_test6(utf8_string, pcrecpp::UTF8()); |
| 1219 | CHECK(re_test6.FullMatch(utf8_string)); |
| 1220 | |
| 1221 | // Check that pattern matches string only in UTF8 mode |
| 1222 | RE re_test7(utf8_pattern); |
| 1223 | CHECK(!re_test7.FullMatch(utf8_string)); |
| 1224 | RE re_test8(utf8_pattern, pcrecpp::UTF8()); |
| 1225 | CHECK(re_test8.FullMatch(utf8_string)); |
| 1226 | } |
| 1227 | |
| 1228 | // Check that ungreedy, UTF8 regular expressions don't match when they |
| 1229 | // oughtn't -- see bug 82246. |
| 1230 | { |
| 1231 | // This code always worked. |
| 1232 | const char* pattern = "\\w+X"; |
| 1233 | const string target = "a aX"; |
| 1234 | RE match_sentence(pattern); |
| 1235 | RE match_sentence_re(pattern, pcrecpp::UTF8()); |
| 1236 | |
| 1237 | CHECK(!match_sentence.FullMatch(target)); |
| 1238 | CHECK(!match_sentence_re.FullMatch(target)); |
| 1239 | } |
| 1240 | |
| 1241 | { |
| 1242 | const char* pattern = "(?U)\\w+X"; |
| 1243 | const string target = "a aX"; |
| 1244 | RE match_sentence(pattern); |
| 1245 | RE match_sentence_re(pattern, pcrecpp::UTF8()); |
| 1246 | |
| 1247 | CHECK(!match_sentence.FullMatch(target)); |
| 1248 | CHECK(!match_sentence_re.FullMatch(target)); |
| 1249 | } |
| 1250 | #endif /* def SUPPORT_UTF8 */ |
| 1251 | |
| 1252 | printf("Testing error reporting\n"); |
| 1253 | |
| 1254 | { RE re("a\\1"); CHECK(!re.error().empty()); } |
| 1255 | { |
| 1256 | RE re("a[x"); |
| 1257 | CHECK(!re.error().empty()); |
| 1258 | } |
| 1259 | { |
| 1260 | RE re("a[z-a]"); |
| 1261 | CHECK(!re.error().empty()); |
| 1262 | } |
| 1263 | { |
| 1264 | RE re("a[[:foobar:]]"); |
| 1265 | CHECK(!re.error().empty()); |
| 1266 | } |
| 1267 | { |
| 1268 | RE re("a(b"); |
| 1269 | CHECK(!re.error().empty()); |
| 1270 | } |
| 1271 | { |
| 1272 | RE re("a\\"); |
| 1273 | CHECK(!re.error().empty()); |
| 1274 | } |
| 1275 | |
| 1276 | // Test that recursion is stopped |
| 1277 | TestRecursion(); |
| 1278 | |
| 1279 | // Test Options |
| 1280 | if (getenv("VERBOSE_TEST") != NULL) |
| 1281 | VERBOSE_TEST = true; |
| 1282 | TestOptions(); |
| 1283 | |
| 1284 | // Test the constructors |
| 1285 | TestConstructors(); |
| 1286 | |
| 1287 | // Done |
| 1288 | printf("OK\n"); |
| 1289 | |
| 1290 | return 0; |
| 1291 | } |