Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(150)

Side by Side Diff: source/test/testdata/regextst.txt

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/testdata/rbbitst.txt ('k') | source/test/testdata/root.txt » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright (c) 2001-2014 International Business Machines 1 # Copyright (c) 2001-2015 International Business Machines
2 # Corporation and others. All Rights Reserved. 2 # Corporation and others. All Rights Reserved.
3 # 3 #
4 # file: 4 # file:
5 # 5 #
6 # ICU regular expression test cases. 6 # ICU regular expression test cases.
7 # 7 #
8 # format: one test case per line, 8 # format: one test case per line,
9 # <test case> = <pattern> <flags> <match string> [# commen t] 9 # <test case> = <pattern> <flags> <match string> [# commen t]
10 # <pattern> = "<regular expression pattern>" 10 # <pattern> = "<regular expression pattern>"
11 # <match string> = "<tagged string>" 11 # <match string> = "<tagged string>"
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
506 # Back reference to a zero-length match. They are always a successful match. 506 # Back reference to a zero-length match. They are always a successful match.
507 "ab(x?)cd(\1)ef" "<0>ab<1></1>cd<2></2>ef</0>" 507 "ab(x?)cd(\1)ef" "<0>ab<1></1>cd<2></2>ef</0>"
508 "ab(x?)cd(\1)ef" i "<0>ab<1></1>cd<2></2>ef</0>" 508 "ab(x?)cd(\1)ef" i "<0>ab<1></1>cd<2></2>ef</0>"
509 509
510 # Back refs to capture groups that didn't participate in the match. 510 # Back refs to capture groups that didn't participate in the match.
511 "ab(?:(c)|(d))\1" "abde" 511 "ab(?:(c)|(d))\1" "abde"
512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e" 512 "ab(?:(c)|(d))\1" "<0>ab<1>c</1>c</0>e"
513 "ab(?:(c)|(d))\1" i "abde" 513 "ab(?:(c)|(d))\1" i "abde"
514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e" 514 "ab(?:(c)|(d))\1" i "<0>ab<1>c</1>c</0>e"
515 515
516 # Named back references
517 "(?<one>abcd)\k<one>" "<0><1>abcd</1>abcd</0>"
518 "(no)?(?<one>abcd)\k<one>" "<0><2>abcd</2>abcd</0>"
519
520 "(?<a_1>...)" E " " # backref names are ascii letters & number s only"
521 "(?<1a>...)" E " " # backref names must begin with a letter"
522 "(?<a>.)(?<a>.)" E " " # Repeated names are illegal.
523
524
516 # Case Insensitive 525 # Case Insensitive
517 "aBc" i "<0>ABC</0>" 526 "aBc" i "<0>ABC</0>"
518 "a[^bc]d" i "ABD" 527 "a[^bc]d" i "ABD"
519 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8 ></7></6></5></4></3></2></1>A</0>" 528 '((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>A</10></9></8 ></7></6></5></4></3></2></1>A</0>"
520 529
521 "(?:(?i)a)b" "<0>Ab</0>" 530 "(?:(?i)a)b" "<0>Ab</0>"
522 "ab(?i)cd" "<0>abCd</0>" 531 "ab(?i)cd" "<0>abCd</0>"
523 "ab$cd" "abcd" 532 "ab$cd" "abcd"
524 533
525 "ssl" i "abc<0>ßl</0>xyz" 534 "ssl" i "abc<0>ßl</0>xyz"
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 "abc" 2Z "aaabc <0>abc</0>ab" 687 "abc" 2Z "aaabc <0>abc</0>ab"
679 "abc" 3z "aa>abc abcab" 688 "abc" 3z "aa>abc abcab"
680 689
681 # 690 #
682 # \ escaping 691 # \ escaping
683 # 692 #
684 "abc\jkl" "<0>abcjkl</0>" # escape of a non-special l etter is just itself. 693 "abc\jkl" "<0>abcjkl</0>" # escape of a non-special l etter is just itself.
685 "abc[ \j]kl" "<0>abcjkl</0>" 694 "abc[ \j]kl" "<0>abcjkl</0>"
686 695
687 # 696 #
697 # \R all newline sequences.
698 #
699 "abc\Rxyz" "<0>abc\u000axyz</0>gh"
700 "abc\Rxyz" "<0>abc\u000bxyz</0>gh"
701 "abc\Rxyz" "<0>abc\u000cxyz</0>gh"
702 "abc\Rxyz" "<0>abc\u000dxyz</0>gh"
703 "abc\Rxyz" "<0>abc\u0085xyz</0>gh"
704 "abc\Rxyz" "<0>abc\u2028xyz</0>gh"
705 "abc\Rxyz" "<0>abc\u2029xyz</0>gh"
706 "abc\Rxyz" "<0>abc\u000d\u000axyz</0>gh"
707
708 "abc\R\nxyz" "abc\u000d\u000axyzgh" # \R cannot ma tch only the CR from a CR/LF sequence.
709 "abc\r\nxyz" "<0>abc\u000d\u000axyz</0>gh"
710
711 "abc\Rxyz" "abc\u0009xyz" # Assorted non -matches.
712 "abc\Rxyz" "abc\u000exyz"
713 "abc\Rxyz" "abc\u202axyz"
714
715 # \v \V single character new line sequences.
716
717 "abc\vxyz" "<0>abc\u000axyz</0>gh"
718 "abc\vxyz" "<0>abc\u000bxyz</0>gh"
719 "abc\vxyz" "<0>abc\u000cxyz</0>gh"
720 "abc\vxyz" "<0>abc\u000dxyz</0>gh"
721 "abc\vxyz" "<0>abc\u0085xyz</0>gh"
722 "abc\vxyz" "<0>abc\u2028xyz</0>gh"
723 "abc\vxyz" "<0>abc\u2029xyz</0>gh"
724 "abc\vxyz" "abc\u000d\u000axyzgh"
725 "abc\vxyz" "abc?xyzgh"
726
727 "abc[\v]xyz" "<0>abc\u000axyz</0>gh"
728 "abc[\v]xyz" "<0>abc\u000bxyz</0>gh"
729 "abc[\v]xyz" "<0>abc\u000cxyz</0>gh"
730 "abc[\v]xyz" "<0>abc\u000dxyz</0>gh"
731 "abc[\v]xyz" "<0>abc\u0085xyz</0>gh"
732 "abc[\v]xyz" "<0>abc\u2028xyz</0>gh"
733 "abc[\v]xyz" "<0>abc\u2029xyz</0>gh"
734 "abc[\v]xyz" "abc\u000d\u000axyzgh"
735 "abc[\v]xyz" "abc?xyzgh"
736
737 "abc\Vxyz" "abc\u000axyzgh"
738 "abc\Vxyz" "abc\u000bxyzgh"
739 "abc\Vxyz" "abc\u000cxyzgh"
740 "abc\Vxyz" "abc\u000dxyzgh"
741 "abc\Vxyz" "abc\u0085xyzgh"
742 "abc\Vxyz" "abc\u2028xyzgh"
743 "abc\Vxyz" "abc\u2029xyzgh"
744 "abc\Vxyz" "abc\u000d\u000axyzgh"
745 "abc\Vxyz" "<0>abc?xyz</0>gh"
746
747 # \h \H horizontal white space. Defined as gc=space_separator plus ascii tab
748
749 "abc\hxyz" "<0>abc xyz</0>gh"
750 "abc\Hxyz" "abc xyzgh"
751 "abc\hxyz" "<0>abc\u2003xyz</0>gh"
752 "abc\Hxyz" "abc\u2003xyzgh"
753 "abc\hxyz" "<0>abc\u0009xyz</0>gh"
754 "abc\Hxyz" "abc\u0009xyzgh"
755 "abc\hxyz" "abc?xyzgh"
756 "abc\Hxyz" "<0>abc?xyz</0>gh"
757
758 "abc[\h]xyz" "<0>abc xyz</0>gh"
759 "abc[\H]xyz" "abc xyzgh"
760 "abc[\h]xyz" "<0>abc\u2003xyz</0>gh"
761 "abc[\H]xyz" "abc\u2003xyzgh"
762 "abc[\h]xyz" "<0>abc\u0009xyz</0>gh"
763 "abc[\H]xyz" "abc\u0009xyzgh"
764 "abc[\h]xyz" "abc?xyzgh"
765 "abc[\H]xyz" "<0>abc?xyz</0>gh"
766
767
768 #
688 # Bug xxxx 769 # Bug xxxx
689 # 770 #
690 "(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\ d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61. 789+71:81</0>" 771 "(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\ d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61. 789+71:81</0>"
691 772
692 773
693 # 774 #
694 # A random, complex, meaningless pattern that should at least compile 775 # A random, complex, meaningless pattern that should at least compile
695 # 776 #
696 "(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\0 04|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97 D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V [|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\] \070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\03 7\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07| \e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\02 67\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[ ^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\0 2-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g |\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\011 4\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{ Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0></0>abc" 777 "(?![^\<C\f\0146\0270\}&&[|\02-\x3E\}|X-\|]]{7,}+)[|\\\x98\<\?\u4FCFr\,\0025\}\0 04|\0025-\0521]|(?<![|\01-\u829E])|(?<!\p{Alpha})|^|(?-s:[^\x15\\\x24F\a\,\a\u97 D8[\x38\a[\0224-\0306[^\0020-\u6A57]]]]??)(?xix:[^|\{\[\0367\t\e\x8C\{\[\074c\]V [|b\fu\r\0175\<\07f\066s[^D-\x5D]]])(?xx:^{5,}+)(?d)(?=^\D)|(?!\G)(?>\G)(?![^|\] \070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\03 7\uECB3\u3D9A\x31\|\<b\0206\uF2EC\01m\,\ak\a\03&&\p{Punct}]]]])(?-dxs:[|\06-\07| \e-\x63&&[|Tp\u18A3\00\|\xE4\05\061\015\0116C|\r\{\}\006\xEA\0367\xC4\01\0042\02 67\xBB\01T\}\0100\?[|\[-\u459B|\x23\x91\rF\0376[|\?-\x94\0113-\\\s]]]]{6}?)(?<=[ ^\t-\x42H\04\f\03\0172\?i\u97B6\e\f\uDAC2])(?=\B)(?>[^\016\r\{\,\uA29D\034\02[\0 2-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g |\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\011 4\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{ Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0></0>abc"
697 778
(...skipping 513 matching lines...) Expand 10 before | Expand all | Expand 10 after
1211 "((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>" 1292 "((|b){0})a{3}(D*)" "<0><1></1>aaa<3></3></0>"
1212 1293
1213 # Bug 11370 1294 # Bug 11370
1214 # Max match length computation of look-behind expression gives result that is too big to fit in the 1295 # Max match length computation of look-behind expression gives result that is too big to fit in the
1215 # in the 24 bit operand portion of the compiled code. Expressions should fail to compile 1296 # in the 24 bit operand portion of the compiled code. Expressions should fail to compile
1216 # (Look-behind match length must be bounded. This case is treated as unbounded , an error.) 1297 # (Look-behind match length must be bounded. This case is treated as unbounded , an error.)
1217 1298
1218 "(?<!(0123456789a){10000000})x" E "no match" 1299 "(?<!(0123456789a){10000000})x" E "no match"
1219 "(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match" 1300 "(?<!\\ubeaf(\\ubeaf{11000}){11000})" E "no match"
1220 1301
1302 # Bug 11374 Bad integer overflow check in number conversion.
1303 # 4294967300 converts to 4 with 32 bit overflow.
1304
1305 "x{4294967300}" E "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1306 "x{0,4294967300}" E "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
1307
1308 # Bug 11373
1309 #
1310 # Overflow checking in max match length computation for loops.
1311 # Value here is 10 * 100000 * 3000 = 3E9, overflowing a 32 bit signed value.
1312 # Before fixing, this case gave an assertion failure.
1313
1314 "(?<=((0123456789){100000}){3000})abc" E "abc"
1315
1316
1221 1317
1222 # Random debugging, Temporary 1318 # Random debugging, Temporary
1223 # 1319 #
1224 1320
1225 "This is a string with (?:one |two |three )endings" "<0>This is a string with two endings</0>" 1321 "This is a string with (?:one |two |three )endings" "<0>This is a string with two endings</0>"
1226 1322
1227 1323
1228 # 1324 #
1229 # Regexps from http://www.regexlib.com 1325 # Regexps from http://www.regexlib.com
1230 # 1326 #
(...skipping 1272 matching lines...) Expand 10 before | Expand all | Expand 10 after
2503 "(\w+)\s+\1" "may day" 2599 "(\w+)\s+\1" "may day"
2504 "(\w+)\s+\1" "gogo" 2600 "(\w+)\s+\1" "gogo"
2505 "(\w+)\s+\1" "1212" 2601 "(\w+)\s+\1" "1212"
2506 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB and.com</0>" 2602 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareB and.com</0>"
2507 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net< /0>" 2603 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net< /0>"
2508 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil </0>" 2604 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil </0>"
2509 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand .com" 2605 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand .com"
2510 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net " 2606 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net "
2511 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita ry" 2607 "^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.milita ry"
2512 2608
OLDNEW
« no previous file with comments | « source/test/testdata/rbbitst.txt ('k') | source/test/testdata/root.txt » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698