OLD | NEW |
1 # Copyright (c) 2001-2014 International Business Machines | 1 # Copyright (c) 2001-2015 International Business Machines |
2 # Corporation and others. All Rights Reserved. | 2 # Corporation and others. All Rights Reserved. |
3 # | 3 # |
4 # file: | 4 # file: |
5 # | 5 # |
6 # ICU regular expression test cases. | 6 # ICU regular expression test cases. |
7 # | 7 # |
8 # format: one test case per line, | 8 # format: one test case per line, |
9 # <test case> = <pattern> <flags> <match string> [# commen
t] | 9 # <test case> = <pattern> <flags> <match string> [# commen
t] |
10 # <pattern> = "<regular expression pattern>" | 10 # <pattern> = "<regular expression pattern>" |
11 # <match string> = "<tagged string>" | 11 # <match string> = "<tagged string>" |
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
506 # Back reference to a zero-length match. They are always a successful match. | 506 # Back reference to a zero-length match. They are always a successful match. |
507 "ab(x?)cd(\1)ef" "<0>ab<1></1>cd<2></2>ef</0>" | 507 "ab(x?)cd(\1)ef" "<0>ab<1></1>cd<2></2>ef</0>" |
508 "ab(x?)cd(\1)ef" i "<0>ab<1></1>cd<2></2>ef</0>" | 508 "ab(x?)cd(\1)ef" i "<0>ab<1></1>cd<2></2>ef</0>" |
509 | 509 |
510 # Back refs to capture groups that didn't participate in the match. | 510 # Back refs to capture groups that didn't participate in the match. |
511 "ab(?:(c)|(d))\1" "abde" | 511 "ab(?:(c)|(d))\1" "abde" |
512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e" | 512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e" |
513 "ab(?:(c)|(d))\1" i "abde" | 513 "ab(?:(c)|(d))\1" i "abde" |
514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e" | 514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e" |
515 | 515 |
| 516 # Named back references |
| 517 "(?<one>abcd)\k<one>" "<0><1>abcd</1>abcd</0>" |
| 518 "(no)?(?<one>abcd)\k<one>" "<0><2>abcd</2>abcd</0>" |
| 519 |
| 520 "(?<a_1>...)" E " " # backref names are ascii letters & number
s only" |
| 521 "(?<1a>...)" E " " # backref names must begin with a letter" |
| 522 "(?<a>.)(?<a>.)" E " " # Repeated names are illegal. |
| 523 |
| 524 |
516 # Case Insensitive | 525 # Case Insensitive |
517 "aBc" i "<0>ABC</0>" | 526 "aBc" i "<0>ABC</0>" |
518 "a[^bc]d" i "ABD" | 527 "a[^bc]d" i "ABD" |
519 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8
></7></6></5></4></3></2></1>A</0>" | 528 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8
></7></6></5></4></3></2></1>A</0>" |
520 | 529 |
521 "(?:(?i)a)b" "<0>Ab</0>" | 530 "(?:(?i)a)b" "<0>Ab</0>" |
522 "ab(?i)cd" "<0>abCd</0>" | 531 "ab(?i)cd" "<0>abCd</0>" |
523 "ab$cd" "abcd" | 532 "ab$cd" "abcd" |
524 | 533 |
525 "ssl" i "abc<0>ßl</0>xyz" | 534 "ssl" i "abc<0>ßl</0>xyz" |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
678 "abc" 2Z "aaabc <0>abc</0>ab" | 687 "abc" 2Z "aaabc <0>abc</0>ab" |
679 "abc" 3z "aa>abc abcab" | 688 "abc" 3z "aa>abc abcab" |
680 | 689 |
681 # | 690 # |
682 # \ escaping | 691 # \ escaping |
683 # | 692 # |
684 "abc\jkl" "<0>abcjkl</0>" # escape of a non-special l
etter is just itself. | 693 "abc\jkl" "<0>abcjkl</0>" # escape of a non-special l
etter is just itself. |
685 "abc[ \j]kl" "<0>abcjkl</0>" | 694 "abc[ \j]kl" "<0>abcjkl</0>" |
686 | 695 |
687 # | 696 # |
| 697 # \R all newline sequences. |
| 698 # |
| 699 "abc\Rxyz" "<0>abc\u000axyz</0>gh" |
| 700 "abc\Rxyz" "<0>abc\u000bxyz</0>gh" |
| 701 "abc\Rxyz" "<0>abc\u000cxyz</0>gh" |
| 702 "abc\Rxyz" "<0>abc\u000dxyz</0>gh" |
| 703 "abc\Rxyz" "<0>abc\u0085xyz</0>gh" |
| 704 "abc\Rxyz" "<0>abc\u2028xyz</0>gh" |
| 705 "abc\Rxyz" "<0>abc\u2029xyz</0>gh" |
| 706 "abc\Rxyz" "<0>abc\u000d\u000axyz</0>gh" |
| 707 |
| 708 "abc\R\nxyz" "abc\u000d\u000axyzgh" # \R cannot ma
tch only the CR from a CR/LF sequence. |
| 709 "abc\r\nxyz" "<0>abc\u000d\u000axyz</0>gh" |
| 710 |
| 711 "abc\Rxyz" "abc\u0009xyz" # Assorted non
-matches. |
| 712 "abc\Rxyz" "abc\u000exyz" |
| 713 "abc\Rxyz" "abc\u202axyz" |
| 714 |
| 715 # \v \V single character new line sequences. |
| 716 |
| 717 "abc\vxyz" "<0>abc\u000axyz</0>gh" |
| 718 "abc\vxyz" "<0>abc\u000bxyz</0>gh" |
| 719 "abc\vxyz" "<0>abc\u000cxyz</0>gh" |
| 720 "abc\vxyz" "<0>abc\u000dxyz</0>gh" |
| 721 "abc\vxyz" "<0>abc\u0085xyz</0>gh" |
| 722 "abc\vxyz" "<0>abc\u2028xyz</0>gh" |
| 723 "abc\vxyz" "<0>abc\u2029xyz</0>gh" |
| 724 "abc\vxyz" "abc\u000d\u000axyzgh" |
| 725 "abc\vxyz" "abc?xyzgh" |
| 726 |
| 727 "abc[\v]xyz" "<0>abc\u000axyz</0>gh" |
| 728 "abc[\v]xyz" "<0>abc\u000bxyz</0>gh" |
| 729 "abc[\v]xyz" "<0>abc\u000cxyz</0>gh" |
| 730 "abc[\v]xyz" "<0>abc\u000dxyz</0>gh" |
| 731 "abc[\v]xyz" "<0>abc\u0085xyz</0>gh" |
| 732 "abc[\v]xyz" "<0>abc\u2028xyz</0>gh" |
| 733 "abc[\v]xyz" "<0>abc\u2029xyz</0>gh" |
| 734 "abc[\v]xyz" "abc\u000d\u000axyzgh" |
| 735 "abc[\v]xyz" "abc?xyzgh" |
| 736 |
| 737 "abc\Vxyz" "abc\u000axyzgh" |
| 738 "abc\Vxyz" "abc\u000bxyzgh" |
| 739 "abc\Vxyz" "abc\u000cxyzgh" |
| 740 "abc\Vxyz" "abc\u000dxyzgh" |
| 741 "abc\Vxyz" "abc\u0085xyzgh" |
| 742 "abc\Vxyz" "abc\u2028xyzgh" |
| 743 "abc\Vxyz" "abc\u2029xyzgh" |
| 744 "abc\Vxyz" "abc\u000d\u000axyzgh" |
| 745 "abc\Vxyz" "<0>abc?xyz</0>gh" |
| 746 |
| 747 # \h \H horizontal white space. Defined as gc=space_separator plus ascii tab |
| 748 |
| 749 "abc\hxyz" "<0>abc xyz</0>gh" |
| 750 "abc\Hxyz" "abc xyzgh" |
| 751 "abc\hxyz" "<0>abc\u2003xyz</0>gh" |
| 752 "abc\Hxyz" "abc\u2003xyzgh" |
| 753 "abc\hxyz" "<0>abc\u0009xyz</0>gh" |
| 754 "abc\Hxyz" "abc\u0009xyzgh" |
| 755 "abc\hxyz" "abc?xyzgh" |
| 756 "abc\Hxyz" "<0>abc?xyz</0>gh" |
| 757 |
| 758 "abc[\h]xyz" "<0>abc xyz</0>gh" |
| 759 "abc[\H]xyz" "abc xyzgh" |
| 760 "abc[\h]xyz" "<0>abc\u2003xyz</0>gh" |
| 761 "abc[\H]xyz" "abc\u2003xyzgh" |
| 762 "abc[\h]xyz" "<0>abc\u0009xyz</0>gh" |
| 763 "abc[\H]xyz" "abc\u0009xyzgh" |
| 764 "abc[\h]xyz" "abc?xyzgh" |
| 765 "abc[\H]xyz" "<0>abc?xyz</0>gh" |
| 766 |
| 767 |
| 768 # |
688 # Bug xxxx | 769 # Bug xxxx |
689 # | 770 # |
690 "(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\
d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61.
789+71:81</0>" | 771 "(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\
d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61.
789+71:81</0>" |
691 | 772 |
692 | 773 |
693 # | 774 # |
694 # A random, complex, meaningless pattern that should at least compile | 775 # A random, complex, meaningless pattern that should at least compile |
695 # | 776 # |
696 "(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\0
04|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97
D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V
[|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\]
\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\03
7\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07|
\e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\02
67\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[
^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\0
2-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g
|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\011
4\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{
Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0></0>abc" | 777 "(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\0
04|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97
D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V
[|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\]
\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\03
7\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07|
\e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\02
67\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[
^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\0
2-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g
|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\011
4\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{
Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0></0>abc" |
697 | 778 |
(...skipping 513 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1211 "((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" | 1292 "((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" |
1212 | 1293 |
1213 # Bug 11370 | 1294 # Bug 11370 |
1214 # Max match length computation of look-behind expression gives result that is
too big to fit in the | 1295 # Max match length computation of look-behind expression gives result that is
too big to fit in the |
1215 # in the 24 bit operand portion of the compiled code. Expressions should fail
to compile | 1296 # in the 24 bit operand portion of the compiled code. Expressions should fail
to compile |
1216 # (Look-behind match length must be bounded. This case is treated as unbounded
, an error.) | 1297 # (Look-behind match length must be bounded. This case is treated as unbounded
, an error.) |
1217 | 1298 |
1218 "(?<!(0123456789a){10000000})x" E "no match" | 1299 "(?<!(0123456789a){10000000})x" E "no match" |
1219 "(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" | 1300 "(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" |
1220 | 1301 |
| 1302 # Bug 11374 Bad integer overflow check in number conversion. |
| 1303 # 4294967300 converts to 4 with 32 bit overflow. |
| 1304 |
| 1305 "x{4294967300}" E "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" |
| 1306 "x{0,4294967300}" E "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" |
| 1307 |
| 1308 # Bug 11373 |
| 1309 # |
| 1310 # Overflow checking in max match length computation for loops. |
| 1311 # Value here is 10 * 100000 * 3000 = 3E9, overflowing a 32 bit signed value. |
| 1312 # Before fixing, this case gave an assertion failure. |
| 1313 |
| 1314 "(?<=((0123456789){100000}){3000})abc" E "abc" |
| 1315 |
| 1316 |
1221 | 1317 |
1222 # Random debugging, Temporary | 1318 # Random debugging, Temporary |
1223 # | 1319 # |
1224 | 1320 |
1225 "This is a string with (?:one |two |three )endings" "<0>This is a string with
two endings</0>" | 1321 "This is a string with (?:one |two |three )endings" "<0>This is a string with
two endings</0>" |
1226 | 1322 |
1227 | 1323 |
1228 # | 1324 # |
1229 # Regexps from http://www.regexlib.com | 1325 # Regexps from http://www.regexlib.com |
1230 # | 1326 # |
(...skipping 1272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2503 "(\w+)\s+\1" "may day" | 2599 "(\w+)\s+\1" "may day" |
2504 "(\w+)\s+\1" "gogo" | 2600 "(\w+)\s+\1" "gogo" |
2505 "(\w+)\s+\1" "1212" | 2601 "(\w+)\s+\1" "1212" |
2506 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB
and.com</0>" | 2602 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB
and.com</0>" |
2507 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net<
/0>" | 2603 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net<
/0>" |
2508 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil
</0>" | 2604 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil
</0>" |
2509 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand
.com" | 2605 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand
.com" |
2510 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net
" | 2606 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net
" |
2511 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita
ry" | 2607 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita
ry" |
2512 | 2608 |
OLD | NEW |