Tristan Matthews | 0461646 | 2013-11-14 16:09:34 -0500 | [diff] [blame] | 1 | /-- These tests for Unicode property support test PCRE's API and show some of |
| 2 | the compiled code. They are not Perl-compatible. --/ |
| 3 | |
| 4 | /[\p{L}]/DZ |
| 5 | |
| 6 | /[\p{^L}]/DZ |
| 7 | |
| 8 | /[\P{L}]/DZ |
| 9 | |
| 10 | /[\P{^L}]/DZ |
| 11 | |
| 12 | /[abc\p{L}\x{0660}]/8DZ |
| 13 | |
| 14 | /[\p{Nd}]/8DZ |
| 15 | 1234 |
| 16 | |
| 17 | /[\p{Nd}+-]+/8DZ |
| 18 | 1234 |
| 19 | 12-34 |
| 20 | 12+\x{661}-34 |
| 21 | ** Failers |
| 22 | abcd |
| 23 | |
| 24 | /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8iDZ |
| 25 | |
| 26 | /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/8DZ |
| 27 | |
| 28 | /AB\x{1fb0}/8DZ |
| 29 | |
| 30 | /AB\x{1fb0}/8DZi |
| 31 | |
| 32 | /[\x{105}-\x{109}]/8iDZ |
| 33 | \x{104} |
| 34 | \x{105} |
| 35 | \x{109} |
| 36 | ** Failers |
| 37 | \x{100} |
| 38 | \x{10a} |
| 39 | |
| 40 | /[z-\x{100}]/8iDZ |
| 41 | Z |
| 42 | z |
| 43 | \x{39c} |
| 44 | \x{178} |
| 45 | | |
| 46 | \x{80} |
| 47 | \x{ff} |
| 48 | \x{100} |
| 49 | \x{101} |
| 50 | ** Failers |
| 51 | \x{102} |
| 52 | Y |
| 53 | y |
| 54 | |
| 55 | /[z-\x{100}]/8DZi |
| 56 | |
| 57 | /(?:[\PPa*]*){8,}/ |
| 58 | |
| 59 | /[\P{Any}]/BZ |
| 60 | |
| 61 | /[\P{Any}\E]/BZ |
| 62 | |
| 63 | /(\P{Yi}+\277)/ |
| 64 | |
| 65 | /(\P{Yi}+\277)?/ |
| 66 | |
| 67 | /(?<=\P{Yi}{3}A)X/ |
| 68 | |
| 69 | /\p{Yi}+(\P{Yi}+)(?1)/ |
| 70 | |
| 71 | /(\P{Yi}{2}\277)?/ |
| 72 | |
| 73 | /[\P{Yi}A]/ |
| 74 | |
| 75 | /[\P{Yi}\P{Yi}\P{Yi}A]/ |
| 76 | |
| 77 | /[^\P{Yi}A]/ |
| 78 | |
| 79 | /[^\P{Yi}\P{Yi}\P{Yi}A]/ |
| 80 | |
| 81 | /(\P{Yi}*\277)*/ |
| 82 | |
| 83 | /(\P{Yi}*?\277)*/ |
| 84 | |
| 85 | /(\p{Yi}*+\277)*/ |
| 86 | |
| 87 | /(\P{Yi}?\277)*/ |
| 88 | |
| 89 | /(\P{Yi}??\277)*/ |
| 90 | |
| 91 | /(\p{Yi}?+\277)*/ |
| 92 | |
| 93 | /(\P{Yi}{0,3}\277)*/ |
| 94 | |
| 95 | /(\P{Yi}{0,3}?\277)*/ |
| 96 | |
| 97 | /(\p{Yi}{0,3}+\277)*/ |
| 98 | |
| 99 | /\p{Zl}{2,3}+/8BZ |
| 100 | \xe2\x80\xa8\xe2\x80\xa8 |
| 101 | \x{2028}\x{2028}\x{2028} |
| 102 | |
| 103 | /\p{Zl}/8BZ |
| 104 | |
| 105 | /\p{Lu}{3}+/8BZ |
| 106 | |
| 107 | /\pL{2}+/8BZ |
| 108 | |
| 109 | /\p{Cc}{2}+/8BZ |
| 110 | |
| 111 | /^\p{Cs}/8 |
| 112 | \?\x{dfff} |
| 113 | ** Failers |
| 114 | \x{09f} |
| 115 | |
| 116 | /^\p{Sc}+/8 |
| 117 | $\x{a2}\x{a3}\x{a4}\x{a5}\x{a6} |
| 118 | \x{9f2} |
| 119 | ** Failers |
| 120 | X |
| 121 | \x{2c2} |
| 122 | |
| 123 | /^\p{Zs}/8 |
| 124 | \ \ |
| 125 | \x{a0} |
| 126 | \x{1680} |
| 127 | \x{180e} |
| 128 | \x{2000} |
| 129 | \x{2001} |
| 130 | ** Failers |
| 131 | \x{2028} |
| 132 | \x{200d} |
| 133 | |
| 134 | /-- These four are here rather than in test 6 because Perl has problems with |
| 135 | the negative versions of the properties. --/ |
| 136 | |
| 137 | /\p{^Lu}/8i |
| 138 | 1234 |
| 139 | ** Failers |
| 140 | ABC |
| 141 | |
| 142 | /\P{Lu}/8i |
| 143 | 1234 |
| 144 | ** Failers |
| 145 | ABC |
| 146 | |
| 147 | /\p{Ll}/8i |
| 148 | a |
| 149 | Az |
| 150 | ** Failers |
| 151 | ABC |
| 152 | |
| 153 | /\p{Lu}/8i |
| 154 | A |
| 155 | a\x{10a0}B |
| 156 | ** Failers |
| 157 | a |
| 158 | \x{1d00} |
| 159 | |
| 160 | /[\x{c0}\x{391}]/8i |
| 161 | \x{c0} |
| 162 | \x{e0} |
| 163 | |
| 164 | /-- The next two are special cases where the lengths of the different cases of |
| 165 | the same character differ. The first went wrong with heap frame storage; the |
| 166 | second was broken in all cases. --/ |
| 167 | |
| 168 | /^\x{023a}+?(\x{0130}+)/8i |
| 169 | \x{023a}\x{2c65}\x{0130} |
| 170 | |
| 171 | /^\x{023a}+([^X])/8i |
| 172 | \x{023a}\x{2c65}X |
| 173 | |
| 174 | /\x{c0}+\x{116}+/8i |
| 175 | \x{c0}\x{e0}\x{116}\x{117} |
| 176 | |
| 177 | /[\x{c0}\x{116}]+/8i |
| 178 | \x{c0}\x{e0}\x{116}\x{117} |
| 179 | |
| 180 | /(\x{de})\1/8i |
| 181 | \x{de}\x{de} |
| 182 | \x{de}\x{fe} |
| 183 | \x{fe}\x{fe} |
| 184 | \x{fe}\x{de} |
| 185 | |
| 186 | /^\x{c0}$/8i |
| 187 | \x{c0} |
| 188 | \x{e0} |
| 189 | |
| 190 | /^\x{e0}$/8i |
| 191 | \x{c0} |
| 192 | \x{e0} |
| 193 | |
| 194 | /-- The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE |
| 195 | will match it only with UCP support, because without that it has no notion |
| 196 | of case for anything other than the ASCII letters. --/ |
| 197 | |
| 198 | /((?i)[\x{c0}])/8 |
| 199 | \x{c0} |
| 200 | \x{e0} |
| 201 | |
| 202 | /(?i:[\x{c0}])/8 |
| 203 | \x{c0} |
| 204 | \x{e0} |
| 205 | |
| 206 | /-- This should be Perl-compatible but Perl 5.11 gets \x{300} wrong. --/8 |
| 207 | |
| 208 | /^\X/8 |
| 209 | A |
| 210 | A\x{300}BC |
| 211 | A\x{300}\x{301}\x{302}BC |
| 212 | *** Failers |
| 213 | \x{300} |
| 214 | |
| 215 | /-- These are PCRE's extra properties to help with Unicodizing \d etc. --/ |
| 216 | |
| 217 | /^\p{Xan}/8 |
| 218 | ABCD |
| 219 | 1234 |
| 220 | \x{6ca} |
| 221 | \x{a6c} |
| 222 | \x{10a7} |
| 223 | ** Failers |
| 224 | _ABC |
| 225 | |
| 226 | /^\p{Xan}+/8 |
| 227 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 228 | ** Failers |
| 229 | _ABC |
| 230 | |
| 231 | /^\p{Xan}+?/8 |
| 232 | \x{6ca}\x{a6c}\x{10a7}_ |
| 233 | |
| 234 | /^\p{Xan}*/8 |
| 235 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 236 | |
| 237 | /^\p{Xan}{2,9}/8 |
| 238 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 239 | |
| 240 | /^\p{Xan}{2,9}?/8 |
| 241 | \x{6ca}\x{a6c}\x{10a7}_ |
| 242 | |
| 243 | /^[\p{Xan}]/8 |
| 244 | ABCD1234_ |
| 245 | 1234abcd_ |
| 246 | \x{6ca} |
| 247 | \x{a6c} |
| 248 | \x{10a7} |
| 249 | ** Failers |
| 250 | _ABC |
| 251 | |
| 252 | /^[\p{Xan}]+/8 |
| 253 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 254 | ** Failers |
| 255 | _ABC |
| 256 | |
| 257 | /^>\p{Xsp}/8 |
| 258 | >\x{1680}\x{2028}\x{0b} |
| 259 | >\x{a0} |
| 260 | ** Failers |
| 261 | \x{0b} |
| 262 | |
| 263 | /^>\p{Xsp}+/8 |
| 264 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 265 | |
| 266 | /^>\p{Xsp}+?/8 |
| 267 | >\x{1680}\x{2028}\x{0b} |
| 268 | |
| 269 | /^>\p{Xsp}*/8 |
| 270 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 271 | |
| 272 | /^>\p{Xsp}{2,9}/8 |
| 273 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 274 | |
| 275 | /^>\p{Xsp}{2,9}?/8 |
| 276 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 277 | |
| 278 | /^>[\p{Xsp}]/8 |
| 279 | >\x{2028}\x{0b} |
| 280 | |
| 281 | /^>[\p{Xsp}]+/8 |
| 282 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 283 | |
| 284 | /^>\p{Xps}/8 |
| 285 | >\x{1680}\x{2028}\x{0b} |
| 286 | >\x{a0} |
| 287 | ** Failers |
| 288 | \x{0b} |
| 289 | |
| 290 | /^>\p{Xps}+/8 |
| 291 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 292 | |
| 293 | /^>\p{Xps}+?/8 |
| 294 | >\x{1680}\x{2028}\x{0b} |
| 295 | |
| 296 | /^>\p{Xps}*/8 |
| 297 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 298 | |
| 299 | /^>\p{Xps}{2,9}/8 |
| 300 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 301 | |
| 302 | /^>\p{Xps}{2,9}?/8 |
| 303 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 304 | |
| 305 | /^>[\p{Xps}]/8 |
| 306 | >\x{2028}\x{0b} |
| 307 | |
| 308 | /^>[\p{Xps}]+/8 |
| 309 | > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} |
| 310 | |
| 311 | /^\p{Xwd}/8 |
| 312 | ABCD |
| 313 | 1234 |
| 314 | \x{6ca} |
| 315 | \x{a6c} |
| 316 | \x{10a7} |
| 317 | _ABC |
| 318 | ** Failers |
| 319 | [] |
| 320 | |
| 321 | /^\p{Xwd}+/8 |
| 322 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 323 | |
| 324 | /^\p{Xwd}+?/8 |
| 325 | \x{6ca}\x{a6c}\x{10a7}_ |
| 326 | |
| 327 | /^\p{Xwd}*/8 |
| 328 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 329 | |
| 330 | /^\p{Xwd}{2,9}/8 |
| 331 | A_B12\x{6ca}\x{a6c}\x{10a7} |
| 332 | |
| 333 | /^\p{Xwd}{2,9}?/8 |
| 334 | \x{6ca}\x{a6c}\x{10a7}_ |
| 335 | |
| 336 | /^[\p{Xwd}]/8 |
| 337 | ABCD1234_ |
| 338 | 1234abcd_ |
| 339 | \x{6ca} |
| 340 | \x{a6c} |
| 341 | \x{10a7} |
| 342 | _ABC |
| 343 | ** Failers |
| 344 | [] |
| 345 | |
| 346 | /^[\p{Xwd}]+/8 |
| 347 | ABCD1234\x{6ca}\x{a6c}\x{10a7}_ |
| 348 | |
| 349 | /-- A check not in UTF-8 mode --/ |
| 350 | |
| 351 | /^[\p{Xwd}]+/ |
| 352 | ABCD1234_ |
| 353 | |
| 354 | /-- Some negative checks --/ |
| 355 | |
| 356 | /^[\P{Xwd}]+/8 |
| 357 | !.+\x{019}\x{35a}AB |
| 358 | |
| 359 | /^[\p{^Xwd}]+/8 |
| 360 | !.+\x{019}\x{35a}AB |
| 361 | |
| 362 | /[\D]/WBZ8 |
| 363 | 1\x{3c8}2 |
| 364 | |
| 365 | /[\d]/WBZ8 |
| 366 | >\x{6f4}< |
| 367 | |
| 368 | /[\S]/WBZ8 |
| 369 | \x{1680}\x{6f4}\x{1680} |
| 370 | |
| 371 | /[\s]/WBZ8 |
| 372 | >\x{1680}< |
| 373 | |
| 374 | /[\W]/WBZ8 |
| 375 | A\x{1712}B |
| 376 | |
| 377 | /[\w]/WBZ8 |
| 378 | >\x{1723}< |
| 379 | |
| 380 | /\D/WBZ8 |
| 381 | 1\x{3c8}2 |
| 382 | |
| 383 | /\d/WBZ8 |
| 384 | >\x{6f4}< |
| 385 | |
| 386 | /\S/WBZ8 |
| 387 | \x{1680}\x{6f4}\x{1680} |
| 388 | |
| 389 | /\s/WBZ8 |
| 390 | >\x{1680}> |
| 391 | |
| 392 | /\W/WBZ8 |
| 393 | A\x{1712}B |
| 394 | |
| 395 | /\w/WBZ8 |
| 396 | >\x{1723}< |
| 397 | |
| 398 | /[[:alpha:]]/WBZ |
| 399 | |
| 400 | /[[:lower:]]/WBZ |
| 401 | |
| 402 | /[[:upper:]]/WBZ |
| 403 | |
| 404 | /[[:alnum:]]/WBZ |
| 405 | |
| 406 | /[[:ascii:]]/WBZ |
| 407 | |
| 408 | /[[:blank:]]/WBZ |
| 409 | |
| 410 | /[[:cntrl:]]/WBZ |
| 411 | |
| 412 | /[[:digit:]]/WBZ |
| 413 | |
| 414 | /[[:graph:]]/WBZ |
| 415 | |
| 416 | /[[:print:]]/WBZ |
| 417 | |
| 418 | /[[:punct:]]/WBZ |
| 419 | |
| 420 | /[[:space:]]/WBZ |
| 421 | |
| 422 | /[[:word:]]/WBZ |
| 423 | |
| 424 | /[[:xdigit:]]/WBZ |
| 425 | |
| 426 | /-- Unicode properties for \b abd \B --/ |
| 427 | |
| 428 | /\b...\B/8W |
| 429 | abc_ |
| 430 | \x{37e}abc\x{376} |
| 431 | \x{37e}\x{376}\x{371}\x{393}\x{394} |
| 432 | !\x{c0}++\x{c1}\x{c2} |
| 433 | !\x{c0}+++++ |
| 434 | |
| 435 | /-- Without PCRE_UCP, non-ASCII always fail, even if < 256 --/ |
| 436 | |
| 437 | /\b...\B/8 |
| 438 | abc_ |
| 439 | ** Failers |
| 440 | \x{37e}abc\x{376} |
| 441 | \x{37e}\x{376}\x{371}\x{393}\x{394} |
| 442 | !\x{c0}++\x{c1}\x{c2} |
| 443 | !\x{c0}+++++ |
| 444 | |
| 445 | /-- With PCRE_UCP, non-UTF8 chars that are < 256 still check properties --/ |
| 446 | |
| 447 | /\b...\B/W |
| 448 | abc_ |
| 449 | !\x{c0}++\x{c1}\x{c2} |
| 450 | !\x{c0}+++++ |
| 451 | |
| 452 | /-- POSIX interface --/ |
| 453 | |
| 454 | /\w/P |
| 455 | +++\x{c2} |
| 456 | |
| 457 | /\w/WP |
| 458 | +++\x{c2} |
| 459 | |
| 460 | /-- Some of these are silly, but they check various combinations --/ |
| 461 | |
| 462 | /[[:^alpha:][:^cntrl:]]+/8WBZ |
| 463 | 123 |
| 464 | abc |
| 465 | |
| 466 | /[[:^cntrl:][:^alpha:]]+/8WBZ |
| 467 | 123 |
| 468 | abc |
| 469 | |
| 470 | /[[:alpha:]]+/8WBZ |
| 471 | abc |
| 472 | |
| 473 | /[[:^alpha:]\S]+/8WBZ |
| 474 | 123 |
| 475 | abc |
| 476 | |
| 477 | /[^\d]+/8WBZ |
| 478 | abc123 |
| 479 | abc\x{123} |
| 480 | \x{660}abc |
| 481 | |
| 482 | /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/8iSI |
| 483 | \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f} |
| 484 | \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f} |
| 485 | |
| 486 | /\p{Lu}+9\p{Lu}+B\p{Lu}+b/BZ |
| 487 | |
| 488 | /\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/BZ |
| 489 | |
| 490 | /\P{Lu}+9\P{Lu}+B\P{Lu}+b/BZ |
| 491 | |
| 492 | /\p{Han}+X\p{Greek}+\x{370}/BZ8 |
| 493 | |
| 494 | /\p{Xan}+!\p{Xan}+A/BZ |
| 495 | |
| 496 | /\p{Xsp}+!\p{Xsp}\t/BZ |
| 497 | |
| 498 | /\p{Xps}+!\p{Xps}\t/BZ |
| 499 | |
| 500 | /\p{Xwd}+!\p{Xwd}_/BZ |
| 501 | |
| 502 | /A+\p{N}A+\dB+\p{N}*B+\d*/WBZ |
| 503 | |
| 504 | /-- These behaved oddly in Perl, so they are kept in this test --/ |
| 505 | |
| 506 | /(\x{23a}\x{23a}\x{23a})?\1/8i |
| 507 | \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} |
| 508 | |
| 509 | /(ȺȺȺ)?\1/8i |
| 510 | ȺȺȺⱥⱥ |
| 511 | |
| 512 | /(\x{23a}\x{23a}\x{23a})?\1/8i |
| 513 | \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} |
| 514 | |
| 515 | /(ȺȺȺ)?\1/8i |
| 516 | ȺȺȺⱥⱥⱥ |
| 517 | |
| 518 | /(\x{23a}\x{23a}\x{23a})\1/8i |
| 519 | \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} |
| 520 | |
| 521 | /(ȺȺȺ)\1/8i |
| 522 | ȺȺȺⱥⱥ |
| 523 | |
| 524 | /(\x{23a}\x{23a}\x{23a})\1/8i |
| 525 | \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} |
| 526 | |
| 527 | /(ȺȺȺ)\1/8i |
| 528 | ȺȺȺⱥⱥⱥ |
| 529 | |
| 530 | /(\x{2c65}\x{2c65})\1/8i |
| 531 | \x{2c65}\x{2c65}\x{23a}\x{23a} |
| 532 | |
| 533 | /(ⱥⱥ)\1/8i |
| 534 | ⱥⱥȺȺ |
| 535 | |
| 536 | /(\x{23a}\x{23a}\x{23a})\1Y/8i |
| 537 | X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ |
| 538 | |
| 539 | /(\x{2c65}\x{2c65})\1Y/8i |
| 540 | X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ |
| 541 | |
| 542 | /-- --/ |
| 543 | |
| 544 | /-- These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE --/ |
| 545 | |
| 546 | /^[\p{Batak}]/8 |
| 547 | \x{1bc0} |
| 548 | \x{1bff} |
| 549 | ** Failers |
| 550 | \x{1bf4} |
| 551 | |
| 552 | /^[\p{Brahmi}]/8 |
| 553 | \x{11000} |
| 554 | \x{1106f} |
| 555 | ** Failers |
| 556 | \x{1104e} |
| 557 | |
| 558 | /^[\p{Mandaic}]/8 |
| 559 | \x{840} |
| 560 | \x{85e} |
| 561 | ** Failers |
| 562 | \x{85c} |
| 563 | \x{85d} |
| 564 | |
| 565 | /-- --/ |
| 566 | |
| 567 | /(\X*)(.)/s8 |
| 568 | A\x{300} |
| 569 | |
| 570 | /^S(\X*)e(\X*)$/8 |
| 571 | Stéréo |
| 572 | |
| 573 | /^\X/8 |
| 574 | ́réo |
| 575 | |
| 576 | /^a\X41z/<JS> |
| 577 | aX41z |
| 578 | *** Failers |
| 579 | aAz |
| 580 | |
| 581 | /(?<=ab\Cde)X/8 |
| 582 | |
| 583 | /-- End of testinput13 --/ |