blob: ca7eb54f3f3d5e0533bc0dd4fbfb61fa3de0e2c6 [file] [log] [blame]
/-- This set of tests checks the API, internals, and non-Perl stuff for UTF-8
support, excluding Unicode properties. --/
/\x{100}/8DZ
/\x{1000}/8DZ
/\x{10000}/8DZ
/\x{100000}/8DZ
/\x{1000000}/8DZ
/\x{4000000}/8DZ
/\x{7fffFFFF}/8DZ
/[\x{ff}]/8DZ
/[\x{100}]/8DZ
/\x{ffffffff}/8
/\x{100000000}/8
/^\x{100}a\x{1234}/8
\x{100}a\x{1234}bcd
/\x80/8DZ
/\xff/8DZ
/\x{0041}\x{2262}\x{0391}\x{002e}/DZ8
\x{0041}\x{2262}\x{0391}\x{002e}
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
\x{D55c}\x{ad6d}\x{C5B4}
/\x{65e5}\x{672c}\x{8a9e}/DZ8
\x{65e5}\x{672c}\x{8a9e}
/\x{80}/DZ8
/\x{084}/DZ8
/\x{104}/DZ8
/\x{861}/DZ8
/\x{212ab}/DZ8
/.{3,5}X/DZ8
\x{212ab}\x{212ab}\x{212ab}\x{861}X
/.{3,5}?/DZ8
\x{212ab}\x{212ab}\x{212ab}\x{861}
/(?<=\C)X/8
Should produce an error diagnostic
/-- This one is here not because it's different to Perl, but because the way
the captured single-byte is displayed. (In Perl it becomes a character, and you
can't tell the difference.) --/
/X(\C)(.*)/8
X\x{1234}
X\nabc
/-- This one is here because Perl gives out a grumbly error message (quite
correctly, but that messes up comparisons). --/
/a\Cb/8
*** Failers
a\x{100}b
/^[ab]/8DZ
bar
*** Failers
c
\x{ff}
\x{100}
/^[^ab]/8DZ
c
\x{ff}
\x{100}
*** Failers
aaa
/[^ab\xC0-\xF0]/8SDZ
\x{f1}
\x{bf}
\x{100}
\x{1000}
*** Failers
\x{c0}
\x{f0}
/Ā{3,4}/8SDZ
\x{100}\x{100}\x{100}\x{100\x{100}
/(\x{100}+|x)/8SDZ
/(\x{100}*a|x)/8SDZ
/(\x{100}{0,2}a|x)/8SDZ
/(\x{100}{1,2}a|x)/8SDZ
/\x{100}*(\d+|"(?1)")/8
1234
"1234"
\x{100}1234
"\x{100}1234"
\x{100}\x{100}12ab
\x{100}\x{100}"12"
*** Failers
\x{100}\x{100}abcd
/\x{100}/8DZ
/\x{100}*/8DZ
/a\x{100}*/8DZ
/ab\x{100}*/8DZ
/a\x{100}\x{101}*/8DZ
/a\x{100}\x{101}+/8DZ
/\x{100}*A/8DZ
A
/\x{100}*\d(?R)/8DZ
/[^\x{c4}]/DZ
/[^\x{c4}]/8DZ
/[\x{100}]/8DZ
\x{100}
Z\x{100}
\x{100}Z
*** Failers
/[Z\x{100}]/8DZ
Z\x{100}
\x{100}
\x{100}Z
*** Failers
/[\x{200}-\x{100}]/8
/[Ā-Ą]/8
\x{100}
\x{104}
*** Failers
\x{105}
\x{ff}
/[z-\x{100}]/8DZ
/[z\Qa-d]Ā\E]/8DZ
\x{100}
Ā
/[\xFF]/DZ
>\xff<
/[\xff]/DZ8
>\x{ff}<
/[^\xFF]/DZ
/[^\xff]/8DZ
/[Ä-Ü]/8
Ö # Matches without Study
\x{d6}
/[Ä-Ü]/8S
Ö <-- Same with Study
\x{d6}
/[\x{c4}-\x{dc}]/8
Ö # Matches without Study
\x{d6}
/[\x{c4}-\x{dc}]/8S
Ö <-- Same with Study
\x{d6}
/[Ã]/8
/Ã/8
/ÃÃÃxxx/8
/ÃÃÃxxx/8?DZSS
/abc/8
Ã]
Ã
ÃÃÃ
ÃÃÃ\?
\xe1\x88
\P\xe1\x88
\P\P\xe1\x88
XX\xea
\O0XX\xea
\O1XX\xea
\O2XX\xea
XX\xf1
XX\xf8
XX\xfc
ZZ\xea\xaf\x20YY
ZZ\xfd\xbf\xbf\x2f\xbf\xbfYY
ZZ\xfd\xbf\xbf\xbf\x2f\xbfYY
ZZ\xfd\xbf\xbf\xbf\xbf\x2fYY
ZZ\xffYY
ZZ\xfeYY
/anything/8
\xc0\x80
\xc1\x8f
\xe0\x9f\x80
\xf0\x8f\x80\x80
\xf8\x87\x80\x80\x80
\xfc\x83\x80\x80\x80\x80
\xfe\x80\x80\x80\x80\x80
\xff\x80\x80\x80\x80\x80
\xc3\x8f
\xe0\xaf\x80
\xe1\x80\x80
\xf0\x9f\x80\x80
\xf1\x8f\x80\x80
\xf8\x88\x80\x80\x80
\xf9\x87\x80\x80\x80
\xfc\x84\x80\x80\x80\x80
\xfd\x83\x80\x80\x80\x80
\?\xf8\x88\x80\x80\x80
\?\xf9\x87\x80\x80\x80
\?\xfc\x84\x80\x80\x80\x80
\?\xfd\x83\x80\x80\x80\x80
/\x{100}abc(xyz(?1))/8DZ
/[^\x{100}]abc(xyz(?1))/8DZ
/[ab\x{100}]abc(xyz(?1))/8DZ
/(\x{100}(b(?2)c))?/DZ8
/(\x{100}(b(?2)c)){0,2}/DZ8
/(\x{100}(b(?1)c))?/DZ8
/(\x{100}(b(?1)c)){0,2}/DZ8
/\W/8
A.B
A\x{100}B
/\w/8
\x{100}X
/a\x{1234}b/P8
a\x{1234}b
/^ˆ´/8DZ
/\777/I
/\777/8I
\x{1ff}
\777
/\x{100}*\d/8DZ
/\x{100}*\s/8DZ
/\x{100}*\w/8DZ
/\x{100}*\D/8DZ
/\x{100}*\S/8DZ
/\x{100}*\W/8DZ
/\x{100}+\x{200}/8DZ
/\x{100}+X/8DZ
/X+\x{200}/8DZ
/()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
A (x) (?41) B/8x
AxxB
/^[\x{100}\E-\Q\E\x{150}]/BZ8
/^[\QĀ\E-\QŐ\E]/BZ8
/^[\QĀ\E-\QŐ\E/BZ8
/^abc./mgx8<any>
abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
/abc.$/mgx8<any>
abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
/^a\Rb/8<bsr_unicode>
a\nb
a\rb
a\r\nb
a\x0bb
a\x0cb
a\x{85}b
a\x{2028}b
a\x{2029}b
** Failers
a\n\rb
/^a\R*b/8<bsr_unicode>
ab
a\nb
a\rb
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
a\x{85}b
a\n\rb
a\n\r\x{85}\x0cb
/^a\R+b/8<bsr_unicode>
a\nb
a\rb
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
a\x{85}b
a\n\rb
a\n\r\x{85}\x0cb
** Failers
ab
/^a\R{1,3}b/8<bsr_unicode>
a\nb
a\n\rb
a\n\r\x{85}b
a\r\n\r\nb
a\r\n\r\n\r\nb
a\n\r\n\rb
a\n\n\r\nb
** Failers
a\n\n\n\rb
a\r
/\H\h\V\v/8
X X\x0a
X\x09X\x0b
** Failers
\x{a0} X\x0a
/\H*\h+\V?\v{3,4}/8
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c
** Failers
\x09\x20\x{a0}\x0a\x0b
/\H\h\V\v/8
\x{3001}\x{3000}\x{2030}\x{2028}
X\x{180e}X\x{85}
** Failers
\x{2009} X\x0a
/\H*\h+\V?\v{3,4}/8
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
\x09\x20\x{202f}\x0a\x0b\x0c
** Failers
\x09\x{200a}\x{a0}\x{2028}\x0b
/[\h]/8BZ
>\x{1680}
/[\h]{3,}/8BZ
>\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
/[\v]/8BZ
/[\H]/8BZ
/[\V]/8BZ
/.*$/8<any>
\x{1ec5}
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
/X/8
\x{0}\x{d7ff}\x{e000}\x{10ffff}
\x{d800}
\x{d800}\?
\x{da00}
\x{da00}\?
\x{dfff}
\x{dfff}\?
\x{110000}
\x{110000}\?
\x{2000000}
\x{2000000}\?
\x{7fffffff}
\x{7fffffff}\?
/a\Rb/I8<bsr_anycrlf>
a\rb
a\nb
a\r\nb
** Failers
a\x{85}b
a\x0bb
/a\Rb/I8<bsr_unicode>
a\rb
a\nb
a\r\nb
a\x{85}b
a\x0bb
** Failers
a\x{85}b\<bsr_anycrlf>
a\x0bb\<bsr_anycrlf>
/a\R?b/I8<bsr_anycrlf>
a\rb
a\nb
a\r\nb
** Failers
a\x{85}b
a\x0bb
/a\R?b/I8<bsr_unicode>
a\rb
a\nb
a\r\nb
a\x{85}b
a\x0bb
** Failers
a\x{85}b\<bsr_anycrlf>
a\x0bb\<bsr_anycrlf>
/.*a.*=.b.*/8<ANY>
QQQ\x{2029}ABCaXYZ=!bPQR
** Failers
a\x{2029}b
\x61\xe2\x80\xa9\x62
/[[:a\x{100}b:]]/8
/a[^]b/<JS>8
a\x{1234}b
a\nb
** Failers
ab
/a[^]+b/<JS>8
aXb
a\nX\nX\x{1234}b
** Failers
ab
/(\x{de})\1/
\x{de}\x{de}
\x{123}
/X/8f<any>
A\x{1ec5}ABCXYZ
/(*UTF8)\x{1234}/
abcd\x{1234}pqr
/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I
/Xa{2,4}b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/Xa{2,4}?b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/Xa{2,4}+b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X\x{123}{2,4}b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X\x{123}{2,4}?b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X\x{123}{2,4}+b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X\x{123}{2,4}b/8
Xx\P
X\x{123}x\P
X\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}\x{123}x\P
/X\x{123}{2,4}?b/8
Xx\P
X\x{123}x\P
X\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}\x{123}x\P
/X\x{123}{2,4}+b/8
Xx\P
X\x{123}x\P
X\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}x\P
X\x{123}\x{123}\x{123}\x{123}x\P
/X\d{2,4}b/8
X\P
X3\P
X33\P
X333\P
X3333\P
/X\d{2,4}?b/8
X\P
X3\P
X33\P
X333\P
X3333\P
/X\d{2,4}+b/8
X\P
X3\P
X33\P
X333\P
X3333\P
/X\D{2,4}b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X\D{2,4}?b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X\D{2,4}+b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X\D{2,4}b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X\D{2,4}?b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X\D{2,4}+b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[abc]{2,4}b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X[abc]{2,4}?b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X[abc]{2,4}+b/8
X\P
Xa\P
Xaa\P
Xaaa\P
Xaaaa\P
/X[abc\x{123}]{2,4}b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[abc\x{123}]{2,4}?b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[abc\x{123}]{2,4}+b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[^a]{2,4}b/8
X\P
Xz\P
Xzz\P
Xzzz\P
Xzzzz\P
/X[^a]{2,4}?b/8
X\P
Xz\P
Xzz\P
Xzzz\P
Xzzzz\P
/X[^a]{2,4}+b/8
X\P
Xz\P
Xzz\P
Xzzz\P
Xzzzz\P
/X[^a]{2,4}b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[^a]{2,4}?b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/X[^a]{2,4}+b/8
X\P
X\x{123}\P
X\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\P
X\x{123}\x{123}\x{123}\x{123}\P
/(Y)X\1{2,4}b/8
YX\P
YXY\P
YXYY\P
YXYYY\P
YXYYYY\P
/(Y)X\1{2,4}?b/8
YX\P
YXY\P
YXYY\P
YXYYY\P
YXYYYY\P
/(Y)X\1{2,4}+b/8
YX\P
YXY\P
YXYY\P
YXYYY\P
YXYYYY\P
/(\x{123})X\1{2,4}b/8
\x{123}X\P
\x{123}X\x{123}\P
\x{123}X\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\x{123}\P
/(\x{123})X\1{2,4}?b/8
\x{123}X\P
\x{123}X\x{123}\P
\x{123}X\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\x{123}\P
/(\x{123})X\1{2,4}+b/8
\x{123}X\P
\x{123}X\x{123}\P
\x{123}X\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\P
\x{123}X\x{123}\x{123}\x{123}\x{123}\P
/\bthe cat\b/8
the cat\P
the cat\P\P
/abcd*/8
xxxxabcd\P
xxxxabcd\P\P
/abcd*/i8
xxxxabcd\P
xxxxabcd\P\P
XXXXABCD\P
XXXXABCD\P\P
/abc\d*/8
xxxxabc1\P
xxxxabc1\P\P
/(a)bc\1*/8
xxxxabca\P
xxxxabca\P\P
/abc[de]*/8
xxxxabcde\P
xxxxabcde\P\P
/X\W{3}X/8
\PX
/\h/SI
/\h/SI8
ABC\x{09}
ABC\x{20}
ABC\x{a0}
ABC\x{1680}
ABC\x{180e}
ABC\x{2000}
ABC\x{202f}
ABC\x{205f}
ABC\x{3000}
/\v/SI
/\v/SI8
ABC\x{0a}
ABC\x{0b}
ABC\x{0c}
ABC\x{0d}
ABC\x{85}
ABC\x{2028}
/\R/SI
/\R/SI8
/\h*A/SI8
CDBABC
/\v+A/SI8
/\s?xxx\s/8SI
/\sxxx\s/8T1
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\sxxx\s/I8ST1
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/8T1
\x{a2} \x{84}
/\S \S/I8ST1
\x{a2} \x{84}
A Z
'A#хц'8x<any>BZ
'A#хц
PQ'8x<any>BZ
/a+#хaa
z#XX?/8x<any>BZ
/a+#хaa
z#х?/8x<any>BZ
/\g{A}xxx#bXX(?'A'123) (?'A'456)/8x<any>BZ
/\g{A}xxx#bх(?'A'123) (?'A'456)/8x<any>BZ
/a+/8
a\x{123}aa\>1
a\x{123}aa\>2
a\x{123}aa\>3
a\x{123}aa\>4
a\x{123}aa\>5
a\x{123}aa\>6
/^\cģ/8
/(\R*)(.)/s8
\r\n
\r\r\n\n\r
\r\r\n\n\r\n
/(\R)*(.)/s8
\r\n
\r\r\n\n\r
\r\r\n\n\r\n
/\x{1234}+/iS8I
/\x{1234}+?/iS8I
/\x{1234}++/iS8I
/\x{1234}{2}/iS8I
/[^\x{1234}]+/iS8I
/[^\x{1234}]+?/iS8I
/[^\x{1234}]++/iS8I
/[^\x{1234}]{2}/iS8I
//<bsr_anycrlf><bsr_unicode>
/f.*/
\P\Pfor
/f.*/s
\P\Pfor
/f.*/8
\P\Pfor
/f.*/8s
\P\Pfor
/-- End of testinput5 --/