OLD | NEW |
1 # Copyright (c) 2001-2013 International Business Machines | 1 # Copyright (c) 2001-2014 International Business Machines |
2 # Corporation and others. All Rights Reserved. | 2 # Corporation and others. All Rights Reserved. |
3 # | 3 # |
4 # file: | 4 # file: |
5 # | 5 # |
6 # ICU regular expression test cases. | 6 # ICU regular expression test cases. |
7 # | 7 # |
8 # format: one test case per line, | 8 # format: one test case per line, |
9 # <test case> = <pattern> <flags> <match string> [# commen
t] | 9 # <test case> = <pattern> <flags> <match string> [# commen
t] |
10 # <pattern> = "<regular expression pattern>" | 10 # <pattern> = "<regular expression pattern>" |
11 # <match string> = "<tagged string>" | 11 # <match string> = "<tagged string>" |
(...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e" | 512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e" |
513 "ab(?:(c)|(d))\1" i "abde" | 513 "ab(?:(c)|(d))\1" i "abde" |
514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e" | 514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e" |
515 | 515 |
516 # Case Insensitive | 516 # Case Insensitive |
517 "aBc" i "<0>ABC</0>" | 517 "aBc" i "<0>ABC</0>" |
518 "a[^bc]d" i "ABD" | 518 "a[^bc]d" i "ABD" |
519 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8
></7></6></5></4></3></2></1>A</0>" | 519 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8
></7></6></5></4></3></2></1>A</0>" |
520 | 520 |
521 "(?:(?i)a)b" "<0>Ab</0>" | 521 "(?:(?i)a)b" "<0>Ab</0>" |
522 "ab(?i)cd"» "<0>abCd</0>" | 522 "ab(?i)cd"» "<0>abCd</0>" |
523 "ab$cd" "abcd" | 523 "ab$cd" "abcd" |
524 | 524 |
| 525 "ssl" i "abc<0>ßl</0>xyz" |
| 526 "ssl" i "abc<0>ẞl</0>xyz" |
| 527 "FIND" i "can <0>find</0> ?" # fi ligature, \ufb01 |
| 528 "find" i "can <0>FIND</0> ?" |
| 529 "ῧ" i "xxx<0>ῧ</0>xxx" # Composed char (match str
ing) decomposes when case-folded (pattern) |
| 530 |
525 # White space handling | 531 # White space handling |
526 "a b" "ab" | 532 "a b" "ab" |
527 "abc " "abc" | 533 "abc " "abc" |
528 "abc " "<0>abc </0>" | 534 "abc " "<0>abc </0>" |
529 "ab[cd e]z" "<0>ab z</0>" | 535 "ab[cd e]z" "<0>ab z</0>" |
530 "ab\ c" "<0>ab c</0> " | 536 "ab\ c" "<0>ab c</0> " |
531 "ab c" "<0>ab c</0> " | 537 "ab c" "<0>ab c</0> " |
532 "ab c" x "ab c " | 538 "ab c" x "ab c " |
533 "ab\ c" x "<0>ab c</0> " | 539 "ab\ c" x "<0>ab c</0> " |
534 | 540 |
(...skipping 630 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1165 | 1171 |
1166 "(?<=a{1,5})bc" "aaaa<0>bc</0>def" | 1172 "(?<=a{1,5})bc" "aaaa<0>bc</0>def" |
1167 "(?<=(?:aa){3,20})bc" "aaaaaa<0>bc</0>def" | 1173 "(?<=(?:aa){3,20})bc" "aaaaaa<0>bc</0>def" |
1168 "(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl" "def jkl" | 1174 "(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl" "def jkl" |
1169 "(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl" "rst <0>jkl</0>" | 1175 "(?<!abc {1,100}|def {1,100}|ghi {1,100})jkl" "rst <0>jkl</0>" |
1170 "(?<=a{11})bc" "aaaaaaaaaaa<0>bc</0>" | 1176 "(?<=a{11})bc" "aaaaaaaaaaa<0>bc</0>" |
1171 "(?<=a{11})bc" "aaaaaaaaaabc" | 1177 "(?<=a{11})bc" "aaaaaaaaaabc" |
1172 "(?<=a{1,})bc" E "aaaa<0>bc</0>def" # U_REGEX_LOOK_BEHIND_LIMI
T error. | 1178 "(?<=a{1,})bc" E "aaaa<0>bc</0>def" # U_REGEX_LOOK_BEHIND_LIMI
T error. |
1173 "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. | 1179 "(?<=(?:){11})bc" "<0>bc</0>" # Empty (?:) expression. |
1174 | 1180 |
| 1181 # Bug 10835 |
| 1182 # Match Start Set not being correctly computed for case insensitive patterns. |
| 1183 # (Test here is to dump the compiled pattern & manually check the start set.) |
1175 | 1184 |
1176 # Bug 11369 | 1185 "(private|secret|confidential|classified|restricted)" i "hmm, <0><1>Classifie
d</1></0> stuff" |
1177 # Incorrect optimization of patterns with a zero length quantifier {0} | 1186 "(private|secret|confidential|classified|restricted)" "hmm, Classified stuf
f" |
1178 | 1187 |
1179 "(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" | 1188 # Bug 10844 |
1180 "(|b)ab(c)" "<0><1></1>ab<2>c</2></0>" | |
1181 "(|b){0}a{3}(D*)" "<0>aaa<2></2></0>" | |
1182 "(|b){0,1}a{3}(D*)" "<0><1></1>aaa<2></2></0>" | |
1183 "((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" | |
1184 | 1189 |
1185 # Bug 11370 | 1190 "^([\w\d:]+)$" "<0><1>DiesIst1Beispiel:text</1></0>" |
1186 # Max match length computation of look-behind expression gives result that is
too big to fit in the | 1191 "^([\w\d:]+)$" i "<0><1>DiesIst1Beispiel:text</1></0>" |
1187 # in the 24 bit operand portion of the compiled code. Expressions should fail
to compile | 1192 "^(\w+\d\w+:\w+)$" "<0><1>DiesIst1Beispiel:text</1></0>" |
1188 # (Look-behind match length must be bounded. This case is treated as unbounded
, an error.) | 1193 "^(\w+\d\w+:\w+)$" i "<0><1>DiesIst1Beispiel:text</1></0>" |
1189 | 1194 |
1190 "(?<!(0123456789a){10000000})x" E "no match" | 1195 # Bug 11049 |
1191 "(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" | 1196 # Edge cases in find() when pattern match begins with set of code points |
| 1197 # and the match begins at the end of the string. |
1192 | 1198 |
| 1199 "A|B|C" "hello <0>A</0>" |
| 1200 "A|B|C" "hello \U00011234" |
| 1201 "A|B|\U00012345" "hello <0>\U00012345</0>" |
| 1202 "A|B|\U00010000" "hello \ud800" |
1193 | 1203 |
1194 # Random debugging, Temporary | 1204 # Random debugging, Temporary |
1195 # | 1205 # |
1196 #"^(?:a?b?)*$" "a--" | |
1197 | 1206 |
1198 "This is a string with (?:one |two |three )endings" "<0>This is a string with
two endings</0>" | 1207 "This is a string with (?:one |two |three )endings" "<0>This is a string with
two endings</0>" |
1199 "((?:a|b|c)whoop-dee-do) | [jkl]|zed" "x" | |
1200 "astring|another[bcd]|alpha|a|[a]" "x" | |
1201 | 1208 |
1202 | 1209 |
1203 # | 1210 # |
1204 # Regexps from http://www.regexlib.com | 1211 # Regexps from http://www.regexlib.com |
1205 # | 1212 # |
1206 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>G1 1AA</0>" | 1213 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>G1 1AA</0>" |
1207 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>EH10 2QQ</0
>" | 1214 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>EH10 2QQ</0
>" |
1208 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>SW1 1ZZ</0>
" | 1215 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>SW1 1ZZ</0>
" |
1209 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "G111 1AA" | 1216 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "G111 1AA" |
1210 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "X10 WW" | 1217 "^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "X10 WW" |
(...skipping 1267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2478 "(\w+)\s+\1" "may day" | 2485 "(\w+)\s+\1" "may day" |
2479 "(\w+)\s+\1" "gogo" | 2486 "(\w+)\s+\1" "gogo" |
2480 "(\w+)\s+\1" "1212" | 2487 "(\w+)\s+\1" "1212" |
2481 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB
and.com</0>" | 2488 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB
and.com</0>" |
2482 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net<
/0>" | 2489 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net<
/0>" |
2483 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil
</0>" | 2490 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil
</0>" |
2484 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand
.com" | 2491 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand
.com" |
2485 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net
" | 2492 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net
" |
2486 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita
ry" | 2493 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita
ry" |
2487 | 2494 |
OLD | NEW |