| OLD | NEW |
| 1 /******************************************************************** | 1 /******************************************************************** |
| 2 * COPYRIGHT: | 2 * COPYRIGHT: |
| 3 * Copyright (c) 2002-2014, International Business Machines Corporation and | 3 * Copyright (c) 2002-2015, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ | 5 ********************************************************************/ |
| 6 | 6 |
| 7 // | 7 // |
| 8 // regextst.cpp | 8 // regextst.cpp |
| 9 // | 9 // |
| 10 // ICU Regular Expressions test, part of intltest. | 10 // ICU Regular Expressions test, part of intltest. |
| 11 // | 11 // |
| 12 | 12 |
| 13 /* | 13 /* |
| 14 NOTE!! | 14 NOTE!! |
| 15 | 15 |
| 16 PLEASE be careful about ASCII assumptions in this test. | 16 PLEASE be careful about ASCII assumptions in this test. |
| 17 This test is one of the worst repeat offenders. | 17 This test is one of the worst repeat offenders. |
| 18 If you have questions, contact someone on the ICU PMC | 18 If you have questions, contact someone on the ICU PMC |
| 19 who has access to an EBCDIC system. | 19 who has access to an EBCDIC system. |
| 20 | 20 |
| 21 */ | 21 */ |
| 22 | 22 |
| 23 #include "intltest.h" | 23 #include "intltest.h" |
| 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 25 | 25 |
| 26 #include <stdlib.h> |
| 27 #include <stdio.h> |
| 28 #include <string.h> |
| 29 |
| 26 #include "unicode/localpointer.h" | 30 #include "unicode/localpointer.h" |
| 27 #include "unicode/regex.h" | 31 #include "unicode/regex.h" |
| 28 #include "unicode/uchar.h" | 32 #include "unicode/uchar.h" |
| 29 #include "unicode/ucnv.h" | 33 #include "unicode/ucnv.h" |
| 30 #include "unicode/uniset.h" | 34 #include "unicode/uniset.h" |
| 31 #include "unicode/uregex.h" | 35 #include "unicode/uregex.h" |
| 32 #include "unicode/usetiter.h" | 36 #include "unicode/usetiter.h" |
| 33 #include "unicode/ustring.h" | 37 #include "unicode/ustring.h" |
| 38 #include "unicode/utext.h" |
| 39 |
| 34 #include "regextst.h" | 40 #include "regextst.h" |
| 35 #include "regexcmp.h" | 41 #include "regexcmp.h" |
| 36 #include "uvector.h" | 42 #include "uvector.h" |
| 37 #include "util.h" | 43 #include "util.h" |
| 38 #include <stdlib.h> | 44 #include "cmemory.h" |
| 39 #include <string.h> | |
| 40 #include <stdio.h> | |
| 41 #include "cstring.h" | 45 #include "cstring.h" |
| 42 #include "uinvchar.h" | 46 #include "uinvchar.h" |
| 43 | 47 |
| 44 #define SUPPORT_MUTATING_INPUT_STRING 0 | 48 #define SUPPORT_MUTATING_INPUT_STRING 0 |
| 45 | 49 |
| 46 //--------------------------------------------------------------------------- | 50 //--------------------------------------------------------------------------- |
| 47 // | 51 // |
| 48 // Test class boilerplate | 52 // Test class boilerplate |
| 49 // | 53 // |
| 50 //--------------------------------------------------------------------------- | 54 //--------------------------------------------------------------------------- |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 140 break; | 144 break; |
| 141 case 23: name = "TestCaseInsensitiveStarters"; | 145 case 23: name = "TestCaseInsensitiveStarters"; |
| 142 if (exec) TestCaseInsensitiveStarters(); | 146 if (exec) TestCaseInsensitiveStarters(); |
| 143 break; | 147 break; |
| 144 case 24: name = "TestBug11049"; | 148 case 24: name = "TestBug11049"; |
| 145 if (exec) TestBug11049(); | 149 if (exec) TestBug11049(); |
| 146 break; | 150 break; |
| 147 case 25: name = "TestBug11371"; | 151 case 25: name = "TestBug11371"; |
| 148 if (exec) TestBug11371(); | 152 if (exec) TestBug11371(); |
| 149 break; | 153 break; |
| 154 case 26: name = "TestBug11480"; |
| 155 if (exec) TestBug11480(); |
| 156 break; |
| 157 case 27: name = "NamedCapture"; |
| 158 if (exec) NamedCapture(); |
| 159 break; |
| 160 case 28: name = "NamedCaptureLimits"; |
| 161 if (exec) NamedCaptureLimits(); |
| 162 break; |
| 150 default: name = ""; | 163 default: name = ""; |
| 151 break; //needed to end loop | 164 break; //needed to end loop |
| 152 } | 165 } |
| 153 } | 166 } |
| 154 | 167 |
| 155 | 168 |
| 156 | 169 |
| 157 /** | 170 /** |
| 158 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage | 171 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage |
| 159 * into ASCII. | 172 * into ASCII. |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 232 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ | 245 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ |
| 233 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ | 246 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ |
| 234 __LINE__, u_errorName(errcode), u_errorName(status));};} | 247 __LINE__, u_errorName(errcode), u_errorName(status));};} |
| 235 | 248 |
| 236 #define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \ | 249 #define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \ |
| 237 "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), stat
us); }} | 250 "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), stat
us); }} |
| 238 | 251 |
| 239 #define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \ | 252 #define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \ |
| 240 errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}} | 253 errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}} |
| 241 | 254 |
| 242 #define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTes
t failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToA
ssertBuf(ustr),inv);};} | 255 // expected: const char * , restricted to invariant characters. |
| 256 // actual: const UnicodeString & |
| 257 #define REGEX_ASSERT_UNISTR(expected, actual) { \ |
| 258 if (UnicodeString(expected, -1, US_INV) != (actual)) { \ |
| 259 errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n",
\ |
| 260 __FILE__, __LINE__, expected, extractToAssertBuf(actual));};} |
| 243 | 261 |
| 244 | 262 |
| 245 static UBool testUTextEqual(UText *uta, UText *utb) { | 263 static UBool testUTextEqual(UText *uta, UText *utb) { |
| 246 UChar32 ca = 0; | 264 UChar32 ca = 0; |
| 247 UChar32 cb = 0; | 265 UChar32 cb = 0; |
| 248 utext_setNativeIndex(uta, 0); | 266 utext_setNativeIndex(uta, 0); |
| 249 utext_setNativeIndex(utb, 0); | 267 utext_setNativeIndex(utb, 0); |
| 250 do { | 268 do { |
| 251 ca = utext_next32(uta); | 269 ca = utext_next32(uta); |
| 252 cb = utext_next32(utb); | 270 cb = utext_next32(utb); |
| (...skipping 1163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1416 REGEX_CHECK_STATUS; | 1434 REGEX_CHECK_STATUS; |
| 1417 dest = matcher2->replaceFirst("$1$1", status); | 1435 dest = matcher2->replaceFirst("$1$1", status); |
| 1418 REGEX_CHECK_STATUS; | 1436 REGEX_CHECK_STATUS; |
| 1419 REGEX_ASSERT(dest == "bcbcdefg"); | 1437 REGEX_ASSERT(dest == "bcbcdefg"); |
| 1420 | 1438 |
| 1421 dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1
."), status); | 1439 dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1
."), status); |
| 1422 REGEX_CHECK_STATUS; | 1440 REGEX_CHECK_STATUS; |
| 1423 REGEX_ASSERT(dest == "The value of $1 is bc.defg"); | 1441 REGEX_ASSERT(dest == "The value of $1 is bc.defg"); |
| 1424 | 1442 |
| 1425 dest = matcher2->replaceFirst("$ by itself, no group number $$$", status); | 1443 dest = matcher2->replaceFirst("$ by itself, no group number $$$", status); |
| 1426 REGEX_CHECK_STATUS; | 1444 REGEX_ASSERT(U_FAILURE(status)); |
| 1427 REGEX_ASSERT(dest == "$ by itself, no group number $$$defg"); | 1445 status = U_ZERO_ERROR; |
| 1428 | 1446 |
| 1429 UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U
0001D7CF."); | 1447 UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U
0001D7CF."); |
| 1430 replacement = replacement.unescape(); | 1448 replacement = replacement.unescape(); |
| 1431 dest = matcher2->replaceFirst(replacement, status); | 1449 dest = matcher2->replaceFirst(replacement, status); |
| 1432 REGEX_CHECK_STATUS; | 1450 REGEX_CHECK_STATUS; |
| 1433 REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg"); | 1451 REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg"); |
| 1434 | 1452 |
| 1435 REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",st
atus), U_INDEX_OUTOFBOUNDS_ERROR); | 1453 REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",st
atus), U_INDEX_OUTOFBOUNDS_ERROR); |
| 1436 | 1454 |
| 1437 | 1455 |
| (...skipping 605 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2043 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2061 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2044 utext_close(result); | 2062 utext_close(result); |
| 2045 result = matcher->group(0, &destText, group_len, status); | 2063 result = matcher->group(0, &destText, group_len, status); |
| 2046 REGEX_CHECK_STATUS; | 2064 REGEX_CHECK_STATUS; |
| 2047 REGEX_ASSERT(result == &destText); | 2065 REGEX_ASSERT(result == &destText); |
| 2048 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2066 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2049 // destText is now immutable, reopen it | 2067 // destText is now immutable, reopen it |
| 2050 utext_close(&destText); | 2068 utext_close(&destText); |
| 2051 utext_openUnicodeString(&destText, &dest, &status); | 2069 utext_openUnicodeString(&destText, &dest, &status); |
| 2052 | 2070 |
| 2053 result = matcher->group(0, NULL, status); | 2071 int64_t length; |
| 2072 result = matcher->group(0, NULL, length, status); |
| 2054 REGEX_CHECK_STATUS; | 2073 REGEX_CHECK_STATUS; |
| 2055 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2074 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2056 utext_close(result); | 2075 utext_close(result); |
| 2057 result = matcher->group(0, &destText, status); | 2076 result = matcher->group(0, &destText, length, status); |
| 2058 REGEX_CHECK_STATUS; | 2077 REGEX_CHECK_STATUS; |
| 2059 REGEX_ASSERT(result == &destText); | 2078 REGEX_ASSERT(result == &destText); |
| 2060 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2079 REGEX_ASSERT(utext_getNativeIndex(result) == 0); |
| 2080 REGEX_ASSERT(length == 10); |
| 2081 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2061 | 2082 |
| 2062 result = matcher->group(1, NULL, status); | 2083 // Capture Group 1 == "234567" |
| 2084 result = matcher->group(1, NULL, length, status); |
| 2063 REGEX_CHECK_STATUS; | 2085 REGEX_CHECK_STATUS; |
| 2064 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 };
/* 234567 */ | 2086 REGEX_ASSERT(utext_getNativeIndex(result) == 2); |
| 2065 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2087 REGEX_ASSERT(length == 6); |
| 2088 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2066 utext_close(result); | 2089 utext_close(result); |
| 2067 result = matcher->group(1, &destText, status); | 2090 |
| 2091 result = matcher->group(1, &destText, length, status); |
| 2068 REGEX_CHECK_STATUS; | 2092 REGEX_CHECK_STATUS; |
| 2069 REGEX_ASSERT(result == &destText); | 2093 REGEX_ASSERT(result == &destText); |
| 2070 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2094 REGEX_ASSERT(utext_getNativeIndex(result) == 2); |
| 2095 REGEX_ASSERT(length == 6); |
| 2096 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2097 utext_close(result); |
| 2071 | 2098 |
| 2072 result = matcher->group(2, NULL, status); | 2099 // Capture Group 2 == "45" |
| 2100 result = matcher->group(2, NULL, length, status); |
| 2073 REGEX_CHECK_STATUS; | 2101 REGEX_CHECK_STATUS; |
| 2074 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ | 2102 REGEX_ASSERT(utext_getNativeIndex(result) == 4); |
| 2075 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2103 REGEX_ASSERT(length == 2); |
| 2104 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2076 utext_close(result); | 2105 utext_close(result); |
| 2077 result = matcher->group(2, &destText, status); | 2106 |
| 2107 result = matcher->group(2, &destText, length, status); |
| 2078 REGEX_CHECK_STATUS; | 2108 REGEX_CHECK_STATUS; |
| 2079 REGEX_ASSERT(result == &destText); | 2109 REGEX_ASSERT(result == &destText); |
| 2080 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2110 REGEX_ASSERT(utext_getNativeIndex(result) == 4); |
| 2111 REGEX_ASSERT(length == 2); |
| 2112 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2113 utext_close(result); |
| 2081 | 2114 |
| 2082 result = matcher->group(3, NULL, status); | 2115 // Capture Group 3 == "89" |
| 2116 result = matcher->group(3, NULL, length, status); |
| 2083 REGEX_CHECK_STATUS; | 2117 REGEX_CHECK_STATUS; |
| 2084 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ | 2118 REGEX_ASSERT(utext_getNativeIndex(result) == 8); |
| 2085 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2119 REGEX_ASSERT(length == 2); |
| 2120 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2086 utext_close(result); | 2121 utext_close(result); |
| 2087 result = matcher->group(3, &destText, status); | 2122 |
| 2123 result = matcher->group(3, &destText, length, status); |
| 2088 REGEX_CHECK_STATUS; | 2124 REGEX_CHECK_STATUS; |
| 2089 REGEX_ASSERT(result == &destText); | 2125 REGEX_ASSERT(result == &destText); |
| 2090 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2126 REGEX_ASSERT(utext_getNativeIndex(result) == 8); |
| 2127 REGEX_ASSERT(length == 2); |
| 2128 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result); |
| 2129 utext_close(result); |
| 2091 | 2130 |
| 2131 // Capture Group number out of range. |
| 2132 status = U_ZERO_ERROR; |
| 2092 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2133 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2134 status = U_ZERO_ERROR; |
| 2093 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2135 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2136 status = U_ZERO_ERROR; |
| 2094 matcher->reset(); | 2137 matcher->reset(); |
| 2095 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); | 2138 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); |
| 2096 | 2139 |
| 2097 delete matcher; | 2140 delete matcher; |
| 2098 delete pat; | 2141 delete pat; |
| 2099 | 2142 |
| 2100 utext_close(&destText); | 2143 utext_close(&destText); |
| 2101 utext_close(&input); | 2144 utext_close(&input); |
| 2102 utext_close(&re); | 2145 utext_close(&re); |
| 2103 } | 2146 } |
| (...skipping 491 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2595 REGEX_CHECK_STATUS; | 2638 REGEX_CHECK_STATUS; |
| 2596 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ | 2639 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ |
| 2597 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2640 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
| 2598 utext_close(result); | 2641 utext_close(result); |
| 2599 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2642 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2600 result = matcher2->replaceFirst(&replText, &destText, status); | 2643 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2601 REGEX_CHECK_STATUS; | 2644 REGEX_CHECK_STATUS; |
| 2602 REGEX_ASSERT(result == &destText); | 2645 REGEX_ASSERT(result == &destText); |
| 2603 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2646 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
| 2604 | 2647 |
| 2605 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6
9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f,
0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0
x00 }; /* $ by itself, no group number $$$ */ | 2648 const char str_byitselfnogroupnumber[] = { 0x5c, 0x24, 0x20, 0x62, 0x79, 0x2
0, 0x69, 0x74, 0x73, 0x65, 0x6c, |
| 2649 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70,
0x20, 0x6e, 0x75, 0x6d, 0x62, |
| 2650 0x65, 0x72, 0x20, 0x5c, 0x24, 0x5c, 0x24, 0x5c, 0x24, 0x00 }; /*
\$ by itself, no group number \$\$\$ */ |
| 2606 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); | 2651 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); |
| 2607 result = matcher2->replaceFirst(&replText, NULL, status); | 2652 result = matcher2->replaceFirst(&replText, NULL, status); |
| 2608 REGEX_CHECK_STATUS; | 2653 REGEX_CHECK_STATUS; |
| 2609 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ | 2654 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ |
| 2610 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); | 2655 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); |
| 2611 utext_close(result); | 2656 utext_close(result); |
| 2612 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2657 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2613 result = matcher2->replaceFirst(&replText, &destText, status); | 2658 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2614 REGEX_CHECK_STATUS; | 2659 REGEX_CHECK_STATUS; |
| 2615 REGEX_ASSERT(result == &destText); | 2660 REGEX_ASSERT(result == &destText); |
| (...skipping 446 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3062 REGEX_ASSERT(n==5); | 3107 REGEX_ASSERT(n==5); |
| 3063 REGEX_ASSERT(fields[0]=="1"); | 3108 REGEX_ASSERT(fields[0]=="1"); |
| 3064 REGEX_ASSERT(fields[1]=="-"); | 3109 REGEX_ASSERT(fields[1]=="-"); |
| 3065 REGEX_ASSERT(fields[2]=="10"); | 3110 REGEX_ASSERT(fields[2]=="10"); |
| 3066 REGEX_ASSERT(fields[3]==","); | 3111 REGEX_ASSERT(fields[3]==","); |
| 3067 REGEX_ASSERT(fields[4]=="20"); | 3112 REGEX_ASSERT(fields[4]=="20"); |
| 3068 delete pat1; | 3113 delete pat1; |
| 3069 | 3114 |
| 3070 | 3115 |
| 3071 // | 3116 // |
| 3117 // split of a UText based string, with library allocating output UTexts. |
| 3118 // |
| 3119 { |
| 3120 status = U_ZERO_ERROR; |
| 3121 RegexMatcher matcher(UnicodeString("(:)"), 0, status); |
| 3122 UnicodeString stringToSplit("first:second:third"); |
| 3123 UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &stat
us); |
| 3124 REGEX_CHECK_STATUS; |
| 3125 |
| 3126 UText *splits[10] = {NULL}; |
| 3127 int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(spl
its), status); |
| 3128 REGEX_CHECK_STATUS; |
| 3129 REGEX_ASSERT(numFields == 5); |
| 3130 REGEX_ASSERT_UTEXT_INVARIANT("first", splits[0]); |
| 3131 REGEX_ASSERT_UTEXT_INVARIANT(":", splits[1]); |
| 3132 REGEX_ASSERT_UTEXT_INVARIANT("second", splits[2]); |
| 3133 REGEX_ASSERT_UTEXT_INVARIANT(":", splits[3]); |
| 3134 REGEX_ASSERT_UTEXT_INVARIANT("third", splits[4]); |
| 3135 REGEX_ASSERT(splits[5] == NULL); |
| 3136 |
| 3137 for (int i=0; i<UPRV_LENGTHOF(splits); i++) { |
| 3138 if (splits[i]) { |
| 3139 utext_close(splits[i]); |
| 3140 splits[i] = NULL; |
| 3141 } |
| 3142 } |
| 3143 utext_close(textToSplit); |
| 3144 } |
| 3145 |
| 3146 |
| 3147 // |
| 3072 // RegexPattern::pattern() and patternText() | 3148 // RegexPattern::pattern() and patternText() |
| 3073 // | 3149 // |
| 3074 pat1 = new RegexPattern(); | 3150 pat1 = new RegexPattern(); |
| 3075 REGEX_ASSERT(pat1->pattern() == ""); | 3151 REGEX_ASSERT(pat1->pattern() == ""); |
| 3076 REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status)); | 3152 REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status)); |
| 3077 delete pat1; | 3153 delete pat1; |
| 3078 const char *helloWorldInvariant = "(Hello, world)*"; | 3154 const char *helloWorldInvariant = "(Hello, world)*"; |
| 3079 regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status); | 3155 regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status); |
| 3080 pat1 = RegexPattern::compile(&re1, pe, status); | 3156 pat1 = RegexPattern::compile(&re1, pe, status); |
| 3081 REGEX_CHECK_STATUS; | 3157 REGEX_CHECK_STATUS; |
| 3082 REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*"); | 3158 REGEX_ASSERT_UNISTR("(Hello, world)*", pat1->pattern()); |
| 3083 REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status)); | 3159 REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status)); |
| 3084 delete pat1; | 3160 delete pat1; |
| 3085 | 3161 |
| 3086 utext_close(&re1); | 3162 utext_close(&re1); |
| 3087 } | 3163 } |
| 3088 | 3164 |
| 3089 | 3165 |
| 3090 //--------------------------------------------------------------------------- | 3166 //--------------------------------------------------------------------------- |
| 3091 // | 3167 // |
| 3092 // Extended A more thorough check for features of regex patterns | 3168 // Extended A more thorough check for features of regex patterns |
| (...skipping 684 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3777 REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX); | 3853 REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX); |
| 3778 REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX); | 3854 REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX); |
| 3779 | 3855 |
| 3780 // Mal-formed {min,max} quantifiers | 3856 // Mal-formed {min,max} quantifiers |
| 3781 REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL); | 3857 REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL); |
| 3782 REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN); | 3858 REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN); |
| 3783 REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL); | 3859 REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL); |
| 3784 REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL); | 3860 REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL); |
| 3785 REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL); | 3861 REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL); |
| 3786 REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG); | 3862 REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG); |
| 3787 REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG); // Ov
erflows int during scan | 3863 REGEX_ERR("abc{5,50000000000}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov
erflows int during scan |
| 3788 REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov
erflows regex binary format | 3864 REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov
erflows regex binary format |
| 3789 REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG); | 3865 REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG); |
| 3790 | 3866 |
| 3791 // Ticket 5389 | 3867 // Ticket 5389 |
| 3792 REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); | 3868 REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); |
| 3793 | 3869 |
| 3794 // Invalid Back Reference \0 | 3870 // Invalid Back Reference \0 |
| 3795 // For ICU 3.8 and earlier | 3871 // For ICU 3.8 and earlier |
| 3796 // For ICU versions newer than 3.8, \0 introduces an octal escape. | 3872 // For ICU versions newer than 3.8, \0 introduces an octal escape. |
| 3797 // | 3873 // |
| (...skipping 1001 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4799 REGEX_ASSERT(cbInfo.numCalls > 0); | 4875 REGEX_ASSERT(cbInfo.numCalls > 0); |
| 4800 | 4876 |
| 4801 // A longer running match that the callback function will abort. | 4877 // A longer running match that the callback function will abort. |
| 4802 status = U_ZERO_ERROR; | 4878 status = U_ZERO_ERROR; |
| 4803 cbInfo.reset(4); | 4879 cbInfo.reset(4); |
| 4804 s = "aaaaaaaaaaaaaaaaaaaaaaab"; | 4880 s = "aaaaaaaaaaaaaaaaaaaaaaab"; |
| 4805 matcher.reset(s); | 4881 matcher.reset(s); |
| 4806 REGEX_ASSERT(matcher.matches(status)==FALSE); | 4882 REGEX_ASSERT(matcher.matches(status)==FALSE); |
| 4807 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); | 4883 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
| 4808 REGEX_ASSERT(cbInfo.numCalls == 4); | 4884 REGEX_ASSERT(cbInfo.numCalls == 4); |
| 4885 |
| 4886 // A longer running find that the callback function will abort. |
| 4887 status = U_ZERO_ERROR; |
| 4888 cbInfo.reset(4); |
| 4889 s = "aaaaaaaaaaaaaaaaaaaaaaab"; |
| 4890 matcher.reset(s); |
| 4891 REGEX_ASSERT(matcher.find(status)==FALSE); |
| 4892 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
| 4893 REGEX_ASSERT(cbInfo.numCalls == 4); |
| 4809 } | 4894 } |
| 4810 | 4895 |
| 4811 | 4896 |
| 4812 } | 4897 } |
| 4813 | 4898 |
| 4814 | 4899 |
| 4815 // | 4900 // |
| 4816 // FindProgressCallbacks() Test the find "progress" callback function. | 4901 // FindProgressCallbacks() Test the find "progress" callback function. |
| 4817 // When set, the find progress callback will be invoked during
a find operations | 4902 // When set, the find progress callback will be invoked during
a find operations |
| 4818 // after each return from a match attempt, giving the applicati
on the opportunity | 4903 // after each return from a match attempt, giving the applicati
on the opportunity |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4988 utext_close(&text2); | 5073 utext_close(&text2); |
| 4989 } | 5074 } |
| 4990 | 5075 |
| 4991 /* | 5076 /* |
| 4992 * group() | 5077 * group() |
| 4993 */ | 5078 */ |
| 4994 { | 5079 { |
| 4995 UChar text1[80]; | 5080 UChar text1[80]; |
| 4996 UText *actual; | 5081 UText *actual; |
| 4997 UBool result; | 5082 UBool result; |
| 4998 u_uastrncpy(text1, "noise abc interior def, and this is off the end", s
izeof(text1)/2); | 5083 int64_t length = 0; |
| 5084 |
| 5085 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U
PRV_LENGTHOF(text1)); |
| 5086 // 012345678901234567890123456789012345678901234567 |
| 5087 // 0 1 2 3 4 |
| 4999 | 5088 |
| 5000 status = U_ZERO_ERROR; | 5089 status = U_ZERO_ERROR; |
| 5001 re = uregex_openC("abc(.*?)def", 0, NULL, &status); | 5090 re = uregex_openC("abc(.*?)def", 0, NULL, &status); |
| 5002 REGEX_CHECK_STATUS; | 5091 REGEX_CHECK_STATUS; |
| 5003 | 5092 |
| 5004 uregex_setText(re, text1, -1, &status); | 5093 uregex_setText(re, text1, -1, &status); |
| 5005 result = uregex_find(re, 0, &status); | 5094 result = uregex_find(re, 0, &status); |
| 5006 REGEX_ASSERT(result==TRUE); | 5095 REGEX_ASSERT(result==TRUE); |
| 5007 | 5096 |
| 5008 /* Capture Group 0, the full match. Should succeed. */ | 5097 /* Capture Group 0, the full match. Should succeed. "abc interior def"
*/ |
| 5009 status = U_ZERO_ERROR; | 5098 status = U_ZERO_ERROR; |
| 5010 actual = uregex_groupUTextDeep(re, 0, &bufferText, &status); | 5099 actual = uregex_groupUText(re, 0, &bufferText, &length, &status); |
| 5011 REGEX_CHECK_STATUS; | 5100 REGEX_CHECK_STATUS; |
| 5012 REGEX_ASSERT(actual == &bufferText); | 5101 REGEX_ASSERT(actual == &bufferText); |
| 5013 REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual); | 5102 REGEX_ASSERT(utext_getNativeIndex(actual) == 6); |
| 5103 REGEX_ASSERT(length == 16); |
| 5104 REGEX_ASSERT(utext_nativeLength(actual) == 47); |
| 5014 | 5105 |
| 5015 /* Capture group #1. Should succeed. */ | 5106 /* Capture group #1. Should succeed, matching " interior ". */ |
| 5016 status = U_ZERO_ERROR; | 5107 status = U_ZERO_ERROR; |
| 5017 actual = uregex_groupUTextDeep(re, 1, &bufferText, &status); | 5108 actual = uregex_groupUText(re, 1, &bufferText, &length, &status); |
| 5018 REGEX_CHECK_STATUS; | 5109 REGEX_CHECK_STATUS; |
| 5019 REGEX_ASSERT(actual == &bufferText); | 5110 REGEX_ASSERT(actual == &bufferText); |
| 5020 REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual); | 5111 REGEX_ASSERT(utext_getNativeIndex(actual) == 9); // position of " inte
rior " |
| 5112 REGEX_ASSERT(length == 10); |
| 5113 REGEX_ASSERT(utext_nativeLength(actual) == 47); |
| 5021 | 5114 |
| 5022 /* Capture group out of range. Error. */ | 5115 /* Capture group out of range. Error. */ |
| 5023 status = U_ZERO_ERROR; | 5116 status = U_ZERO_ERROR; |
| 5024 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); | 5117 actual = uregex_groupUText(re, 2, &bufferText, &length, &status); |
| 5025 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 5118 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 5026 REGEX_ASSERT(actual == &bufferText); | 5119 REGEX_ASSERT(actual == &bufferText); |
| 5027 | |
| 5028 uregex_close(re); | 5120 uregex_close(re); |
| 5029 | 5121 |
| 5030 } | 5122 } |
| 5031 | 5123 |
| 5032 /* | 5124 /* |
| 5033 * replaceFirst() | 5125 * replaceFirst() |
| 5034 */ | 5126 */ |
| 5035 { | 5127 { |
| 5036 UChar text1[80]; | 5128 UChar text1[80]; |
| 5037 UChar text2[80]; | 5129 UChar text2[80]; |
| 5038 UText replText = UTEXT_INITIALIZER; | 5130 UText replText = UTEXT_INITIALIZER; |
| 5039 UText *result; | 5131 UText *result; |
| 5132 status = U_ZERO_ERROR; |
| 5133 utext_openUnicodeString(&bufferText, &buffer, &status); |
| 5040 | 5134 |
| 5041 status = U_ZERO_ERROR; | 5135 status = U_ZERO_ERROR; |
| 5042 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); | 5136 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); |
| 5043 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | 5137 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)/2); |
| 5044 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); | 5138 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); |
| 5045 | 5139 |
| 5046 re = uregex_openC("x(.*?)x", 0, NULL, &status); | 5140 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
| 5047 REGEX_CHECK_STATUS; | 5141 REGEX_CHECK_STATUS; |
| 5048 | 5142 |
| 5049 /* Normal case, with match */ | 5143 /* Normal case, with match */ |
| 5050 uregex_setText(re, text1, -1, &status); | 5144 uregex_setText(re, text1, -1, &status); |
| 5145 REGEX_CHECK_STATUS; |
| 5051 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5146 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5147 REGEX_CHECK_STATUS; |
| 5052 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5148 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5053 REGEX_CHECK_STATUS; | 5149 REGEX_CHECK_STATUS; |
| 5054 REGEX_ASSERT(result == &bufferText); | 5150 REGEX_ASSERT(result == &bufferText); |
| 5055 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); | 5151 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); |
| 5056 | 5152 |
| 5057 /* No match. Text should copy to output with no changes. */ | 5153 /* No match. Text should copy to output with no changes. */ |
| 5058 uregex_setText(re, text2, -1, &status); | 5154 uregex_setText(re, text2, -1, &status); |
| 5059 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5155 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5060 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5156 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5061 REGEX_CHECK_STATUS; | 5157 REGEX_CHECK_STATUS; |
| 5062 REGEX_ASSERT(result == &bufferText); | 5158 REGEX_ASSERT(result == &bufferText); |
| 5063 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); | 5159 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); |
| 5064 | 5160 |
| 5065 /* Unicode escapes */ | 5161 /* Unicode escapes */ |
| 5066 uregex_setText(re, text1, -1, &status); | 5162 uregex_setText(re, text1, -1, &status); |
| 5067 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a"
, -1, &status); | 5163 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042\\$\\
a", -1, &status); |
| 5068 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5164 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5069 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5165 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5070 REGEX_CHECK_STATUS; | 5166 REGEX_CHECK_STATUS; |
| 5071 REGEX_ASSERT(result == &bufferText); | 5167 REGEX_ASSERT(result == &bufferText); |
| 5072 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); | 5168 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); |
| 5073 | 5169 |
| 5074 uregex_close(re); | 5170 uregex_close(re); |
| 5075 utext_close(&replText); | 5171 utext_close(&replText); |
| 5076 } | 5172 } |
| 5077 | 5173 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5116 | 5212 |
| 5117 /* | 5213 /* |
| 5118 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, | 5214 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, |
| 5119 * so we don't need to test it here. | 5215 * so we don't need to test it here. |
| 5120 */ | 5216 */ |
| 5121 | 5217 |
| 5122 utext_close(&bufferText); | 5218 utext_close(&bufferText); |
| 5123 utext_close(&patternText); | 5219 utext_close(&patternText); |
| 5124 } | 5220 } |
| 5125 | 5221 |
| 5222 |
| 5223 //-------------------------------------------------------------- |
| 5224 // |
| 5225 // NamedCapture Check basic named capture group functionality |
| 5226 // |
| 5227 //-------------------------------------------------------------- |
| 5228 void RegexTest::NamedCapture() { |
| 5229 UErrorCode status = U_ZERO_ERROR; |
| 5230 RegexPattern *pat = RegexPattern::compile(UnicodeString( |
| 5231 "abc()()(?<three>xyz)(de)(?<five>hmm)(?<six>oh)f\\k<five>"), 0, stat
us); |
| 5232 REGEX_CHECK_STATUS; |
| 5233 int32_t group = pat->groupNumberFromName("five", -1, status); |
| 5234 REGEX_CHECK_STATUS; |
| 5235 REGEX_ASSERT(5 == group); |
| 5236 group = pat->groupNumberFromName("three", -1, status); |
| 5237 REGEX_CHECK_STATUS; |
| 5238 REGEX_ASSERT(3 == group); |
| 5239 |
| 5240 status = U_ZERO_ERROR; |
| 5241 group = pat->groupNumberFromName(UnicodeString("six"), status); |
| 5242 REGEX_CHECK_STATUS; |
| 5243 REGEX_ASSERT(6 == group); |
| 5244 |
| 5245 status = U_ZERO_ERROR; |
| 5246 group = pat->groupNumberFromName(UnicodeString("nosuch"), status); |
| 5247 U_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5248 |
| 5249 status = U_ZERO_ERROR; |
| 5250 |
| 5251 // After copying a pattern, named capture should still work in the copy. |
| 5252 RegexPattern *copiedPat = new RegexPattern(*pat); |
| 5253 REGEX_ASSERT(*copiedPat == *pat); |
| 5254 delete pat; pat = NULL; // Delete original, copy should have no references
back to it. |
| 5255 |
| 5256 group = copiedPat->groupNumberFromName("five", -1, status); |
| 5257 REGEX_CHECK_STATUS; |
| 5258 REGEX_ASSERT(5 == group); |
| 5259 group = copiedPat->groupNumberFromName("three", -1, status); |
| 5260 REGEX_CHECK_STATUS; |
| 5261 REGEX_ASSERT(3 == group); |
| 5262 delete copiedPat; |
| 5263 |
| 5264 // ReplaceAll with named capture group. |
| 5265 status = U_ZERO_ERROR; |
| 5266 UnicodeString text("Substitution of <<quotes>> for <<double brackets>>"); |
| 5267 RegexMatcher *m = new RegexMatcher(UnicodeString("<<(?<mid>.+?)>>"), text, 0
, status); |
| 5268 REGEX_CHECK_STATUS; |
| 5269 // m.pattern().dumpPattern(); |
| 5270 UnicodeString replacedText = m->replaceAll("'${mid}'", status); |
| 5271 REGEX_CHECK_STATUS; |
| 5272 REGEX_ASSERT(UnicodeString("Substitution of 'quotes' for 'double brackets'")
== replacedText); |
| 5273 delete m; |
| 5274 |
| 5275 // ReplaceAll, allowed capture group numbers. |
| 5276 text = UnicodeString("abcmxyz"); |
| 5277 m = new RegexMatcher(UnicodeString("..(?<one>m)(.)(.)"), text, 0, status); |
| 5278 REGEX_CHECK_STATUS; |
| 5279 |
| 5280 status = U_ZERO_ERROR; |
| 5281 replacedText = m->replaceAll(UnicodeString("<$0>"), status); // group 0,
full match, is allowed. |
| 5282 REGEX_CHECK_STATUS; |
| 5283 REGEX_ASSERT(UnicodeString("a<bcmxy>z") == replacedText); |
| 5284 |
| 5285 status = U_ZERO_ERROR; |
| 5286 replacedText = m->replaceAll(UnicodeString("<$1>"), status); // group
1 by number. |
| 5287 REGEX_CHECK_STATUS; |
| 5288 REGEX_ASSERT(UnicodeString("a<m>z") == replacedText); |
| 5289 |
| 5290 status = U_ZERO_ERROR; |
| 5291 replacedText = m->replaceAll(UnicodeString("<${one}>"), status); // group
1 by name. |
| 5292 REGEX_CHECK_STATUS; |
| 5293 REGEX_ASSERT(UnicodeString("a<m>z") == replacedText); |
| 5294 |
| 5295 status = U_ZERO_ERROR; |
| 5296 replacedText = m->replaceAll(UnicodeString("<$2>"), status); // group 2. |
| 5297 REGEX_CHECK_STATUS; |
| 5298 REGEX_ASSERT(UnicodeString("a<x>z") == replacedText); |
| 5299 |
| 5300 status = U_ZERO_ERROR; |
| 5301 replacedText = m->replaceAll(UnicodeString("<$3>"), status); |
| 5302 REGEX_CHECK_STATUS; |
| 5303 REGEX_ASSERT(UnicodeString("a<y>z") == replacedText); |
| 5304 |
| 5305 status = U_ZERO_ERROR; |
| 5306 replacedText = m->replaceAll(UnicodeString("<$4>"), status); |
| 5307 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 5308 |
| 5309 status = U_ZERO_ERROR; |
| 5310 replacedText = m->replaceAll(UnicodeString("<$04>"), status); // group
0, leading 0, |
| 5311 REGEX_CHECK_STATUS; // tr
ailing out-of-range 4 passes through. |
| 5312 REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == replacedText); |
| 5313 |
| 5314 status = U_ZERO_ERROR; |
| 5315 replacedText = m->replaceAll(UnicodeString("<$000016>"), status); // Consu
me leading zeroes. Don't consume digits |
| 5316 REGEX_CHECK_STATUS; // tha
t push group num out of range. |
| 5317 REGEX_ASSERT(UnicodeString("a<m6>z") == replacedText); // Thi
s is group 1. |
| 5318 |
| 5319 status = U_ZERO_ERROR; |
| 5320 replacedText = m->replaceAll(UnicodeString("<$3$2$1${one}>"), status); |
| 5321 REGEX_CHECK_STATUS; |
| 5322 REGEX_ASSERT(UnicodeString("a<yxmm>z") == replacedText); |
| 5323 |
| 5324 status = U_ZERO_ERROR; |
| 5325 replacedText = m->replaceAll(UnicodeString("$3$2$1${one}"), status); |
| 5326 REGEX_CHECK_STATUS; |
| 5327 REGEX_ASSERT(UnicodeString("ayxmmz") == replacedText); |
| 5328 |
| 5329 status = U_ZERO_ERROR; |
| 5330 replacedText = m->replaceAll(UnicodeString("<${noSuchName}>"), status); |
| 5331 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5332 |
| 5333 status = U_ZERO_ERROR; |
| 5334 replacedText = m->replaceAll(UnicodeString("<${invalid-name}>"), status); |
| 5335 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5336 |
| 5337 status = U_ZERO_ERROR; |
| 5338 replacedText = m->replaceAll(UnicodeString("<${one"), status); |
| 5339 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5340 |
| 5341 status = U_ZERO_ERROR; |
| 5342 replacedText = m->replaceAll(UnicodeString("$not a capture group"), status)
; |
| 5343 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5344 |
| 5345 delete m; |
| 5346 |
| 5347 // Repeat the above replaceAll() tests using the plain C API, which |
| 5348 // has a separate implementation internally. |
| 5349 // TODO: factor out the test data. |
| 5350 |
| 5351 status = U_ZERO_ERROR; |
| 5352 URegularExpression *re = uregex_openC("..(?<one>m)(.)(.)", 0, NULL, &status)
; |
| 5353 REGEX_CHECK_STATUS; |
| 5354 text = UnicodeString("abcmxyz"); |
| 5355 uregex_setText(re, text.getBuffer(), text.length(), &status); |
| 5356 REGEX_CHECK_STATUS; |
| 5357 |
| 5358 UChar resultBuf[100]; |
| 5359 int32_t resultLength; |
| 5360 UnicodeString repl; |
| 5361 |
| 5362 status = U_ZERO_ERROR; |
| 5363 repl = UnicodeString("<$0>"); |
| 5364 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5365 REGEX_CHECK_STATUS; |
| 5366 REGEX_ASSERT(UnicodeString("a<bcmxy>z") == UnicodeString(resultBuf, resultLe
ngth)); |
| 5367 |
| 5368 status = U_ZERO_ERROR; |
| 5369 repl = UnicodeString("<$1>"); |
| 5370 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5371 REGEX_CHECK_STATUS; |
| 5372 REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength
)); |
| 5373 |
| 5374 status = U_ZERO_ERROR; |
| 5375 repl = UnicodeString("<${one}>"); |
| 5376 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5377 REGEX_CHECK_STATUS; |
| 5378 REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength
)); |
| 5379 |
| 5380 status = U_ZERO_ERROR; |
| 5381 repl = UnicodeString("<$2>"); |
| 5382 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5383 REGEX_CHECK_STATUS; |
| 5384 REGEX_ASSERT(UnicodeString("a<x>z") == UnicodeString(resultBuf, resultLength
)); |
| 5385 |
| 5386 status = U_ZERO_ERROR; |
| 5387 repl = UnicodeString("<$3>"); |
| 5388 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5389 REGEX_CHECK_STATUS; |
| 5390 REGEX_ASSERT(UnicodeString("a<y>z") == UnicodeString(resultBuf, resultLength
)); |
| 5391 |
| 5392 status = U_ZERO_ERROR; |
| 5393 repl = UnicodeString("<$4>"); |
| 5394 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5395 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 5396 |
| 5397 status = U_ZERO_ERROR; |
| 5398 repl = UnicodeString("<$04>"); |
| 5399 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5400 REGEX_CHECK_STATUS; |
| 5401 REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == UnicodeString(resultBuf, resultL
ength)); |
| 5402 |
| 5403 status = U_ZERO_ERROR; |
| 5404 repl = UnicodeString("<$000016>"); |
| 5405 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5406 REGEX_CHECK_STATUS; |
| 5407 REGEX_ASSERT(UnicodeString("a<m6>z") == UnicodeString(resultBuf, resultLengt
h)); |
| 5408 |
| 5409 status = U_ZERO_ERROR; |
| 5410 repl = UnicodeString("<$3$2$1${one}>"); |
| 5411 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5412 REGEX_CHECK_STATUS; |
| 5413 REGEX_ASSERT(UnicodeString("a<yxmm>z") == UnicodeString(resultBuf, resultLen
gth)); |
| 5414 |
| 5415 status = U_ZERO_ERROR; |
| 5416 repl = UnicodeString("$3$2$1${one}"); |
| 5417 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5418 REGEX_CHECK_STATUS; |
| 5419 REGEX_ASSERT(UnicodeString("ayxmmz") == UnicodeString(resultBuf, resultLengt
h)); |
| 5420 |
| 5421 status = U_ZERO_ERROR; |
| 5422 repl = UnicodeString("<${noSuchName}>"); |
| 5423 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5424 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5425 |
| 5426 status = U_ZERO_ERROR; |
| 5427 repl = UnicodeString("<${invalid-name}>"); |
| 5428 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5429 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5430 |
| 5431 status = U_ZERO_ERROR; |
| 5432 repl = UnicodeString("<${one"); |
| 5433 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5434 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5435 |
| 5436 status = U_ZERO_ERROR; |
| 5437 repl = UnicodeString("$not a capture group"); |
| 5438 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result
Buf, UPRV_LENGTHOF(resultBuf), &status); |
| 5439 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME); |
| 5440 |
| 5441 uregex_close(re); |
| 5442 } |
| 5443 |
| 5444 //-------------------------------------------------------------- |
| 5445 // |
| 5446 // NamedCaptureLimits Patterns with huge numbers of named capture groups. |
| 5447 // The point is not so much what the exact limit is, |
| 5448 // but that a largish number doesn't hit bad non-linear pe
rformance, |
| 5449 // and that exceeding the limit fails cleanly. |
| 5450 // |
| 5451 //-------------------------------------------------------------- |
| 5452 void RegexTest::NamedCaptureLimits() { |
| 5453 if (quick) { |
| 5454 logln("Skipping test. Runs in exhuastive mode only."); |
| 5455 return; |
| 5456 } |
| 5457 const int32_t goodLimit = 1000000; // Pattern w this many groups builds
successfully. |
| 5458 const int32_t failLimit = 10000000; // Pattern exceeds internal limits, f
ails to compile. |
| 5459 char nnbuf[100]; |
| 5460 UnicodeString pattern; |
| 5461 int32_t nn; |
| 5462 |
| 5463 for (nn=1; nn<goodLimit; nn++) { |
| 5464 sprintf(nnbuf, "(?<nn%d>)", nn); |
| 5465 pattern.append(UnicodeString(nnbuf, -1, US_INV)); |
| 5466 } |
| 5467 UErrorCode status = U_ZERO_ERROR; |
| 5468 RegexPattern *pat = RegexPattern::compile(pattern, 0, status); |
| 5469 REGEX_CHECK_STATUS; |
| 5470 for (nn=1; nn<goodLimit; nn++) { |
| 5471 sprintf(nnbuf, "nn%d", nn); |
| 5472 int32_t groupNum = pat->groupNumberFromName(nnbuf, -1, status); |
| 5473 REGEX_ASSERT(nn == groupNum); |
| 5474 if (nn != groupNum) { |
| 5475 break; |
| 5476 } |
| 5477 } |
| 5478 delete pat; |
| 5479 |
| 5480 pattern.remove(); |
| 5481 for (nn=1; nn<failLimit; nn++) { |
| 5482 sprintf(nnbuf, "(?<nn%d>)", nn); |
| 5483 pattern.append(UnicodeString(nnbuf, -1, US_INV)); |
| 5484 } |
| 5485 status = U_ZERO_ERROR; |
| 5486 pat = RegexPattern::compile(pattern, 0, status); |
| 5487 REGEX_ASSERT(status == U_REGEX_PATTERN_TOO_BIG); |
| 5488 delete pat; |
| 5489 } |
| 5490 |
| 5491 |
| 5126 //-------------------------------------------------------------- | 5492 //-------------------------------------------------------------- |
| 5127 // | 5493 // |
| 5128 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. | 5494 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. |
| 5129 // | 5495 // |
| 5130 //--------------------------------------------------------------- | 5496 //--------------------------------------------------------------- |
| 5131 void RegexTest::Bug7651() { | 5497 void RegexTest::Bug7651() { |
| 5132 UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\
u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z
0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\
uFFFF])|\\$[A-Za-z]+)"); | 5498 UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\
u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z
0-9_]+(?:\\/[\\w-]+)?|(https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\
uFFFF])|\\$[A-Za-z]+)"); |
| 5133 // The following should exceed the default operator stack depth in the matc
her, i.e. force the matcher to malloc instead of using fSmallData. | 5499 // The following should exceed the default operator stack depth in the matc
her, i.e. force the matcher to malloc instead of using fSmallData. |
| 5134 // It will cause a segfault if RegexMatcher tries to use fSmallData instead
of malloc'ing the memory needed (see init2) for the pattern operator stack allo
cation. | 5500 // It will cause a segfault if RegexMatcher tries to use fSmallData instead
of malloc'ing the memory needed (see init2) for the pattern operator stack allo
cation. |
| 5135 UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u
0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![
A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u0
0f8-\\u00ff]*|\\$[A-Za-z]+)"); | 5501 UnicodeString pattern2("((https?\\:\\/\\/|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u
0080-\\uFFFF])|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?|(?<![
A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u0
0f8-\\u00ff]*|\\$[A-Za-z]+)"); |
| (...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5407 patternString.append(UnicodeString("stuff and things dont you know, thes
e are a few of my favorite strings\n")); | 5773 patternString.append(UnicodeString("stuff and things dont you know, thes
e are a few of my favorite strings\n")); |
| 5408 } | 5774 } |
| 5409 patternString.append(UnicodeString("X? trailing string")); | 5775 patternString.append(UnicodeString("X? trailing string")); |
| 5410 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString,
0, status)); | 5776 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString,
0, status)); |
| 5411 if (status != U_REGEX_PATTERN_TOO_BIG) { | 5777 if (status != U_REGEX_PATTERN_TOO_BIG) { |
| 5412 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | 5778 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", |
| 5413 __FILE__, __LINE__, u_errorName(status)); | 5779 __FILE__, __LINE__, u_errorName(status)); |
| 5414 } | 5780 } |
| 5415 } | 5781 } |
| 5416 | 5782 |
| 5783 void RegexTest::TestBug11480() { |
| 5784 // C API, get capture group of a group that does not participate in the matc
h. |
| 5785 // (Returns a zero length string, with nul termination, |
| 5786 // indistinguishable from a group with a zero length match.) |
| 5787 |
| 5788 UErrorCode status = U_ZERO_ERROR; |
| 5789 URegularExpression *re = uregex_openC("(A)|(B)", 0, NULL, &status); |
| 5790 REGEX_CHECK_STATUS; |
| 5791 UnicodeString text = UNICODE_STRING_SIMPLE("A"); |
| 5792 uregex_setText(re, text.getBuffer(), text.length(), &status); |
| 5793 REGEX_CHECK_STATUS; |
| 5794 REGEX_ASSERT(uregex_lookingAt(re, 0, &status)); |
| 5795 UChar buf[10] = {(UChar)13, (UChar)13, (UChar)13, (UChar)13}; |
| 5796 int32_t length = uregex_group(re, 2, buf+1, UPRV_LENGTHOF(buf)-1, &status); |
| 5797 REGEX_ASSERT(length == 0); |
| 5798 REGEX_ASSERT(buf[0] == 13); |
| 5799 REGEX_ASSERT(buf[1] == 0); |
| 5800 REGEX_ASSERT(buf[2] == 13); |
| 5801 uregex_close(re); |
| 5802 |
| 5803 // UText C++ API, length of match is 0 for non-participating matches. |
| 5804 UText ut = UTEXT_INITIALIZER; |
| 5805 utext_openUnicodeString(&ut, &text, &status); |
| 5806 RegexMatcher matcher(UnicodeString("(A)|(B)"), 0, status); |
| 5807 REGEX_CHECK_STATUS; |
| 5808 matcher.reset(&ut); |
| 5809 REGEX_ASSERT(matcher.lookingAt(0, status)); |
| 5810 |
| 5811 // UText C++ API, Capture group 1 matches "A", position 0, length 1. |
| 5812 int64_t groupLen = -666; |
| 5813 UText group = UTEXT_INITIALIZER; |
| 5814 matcher.group(1, &group, groupLen, status); |
| 5815 REGEX_CHECK_STATUS; |
| 5816 REGEX_ASSERT(groupLen == 1); |
| 5817 REGEX_ASSERT(utext_getNativeIndex(&group) == 0); |
| 5818 |
| 5819 // Capture group 2, the (B), does not participate in the match. |
| 5820 matcher.group(2, &group, groupLen, status); |
| 5821 REGEX_CHECK_STATUS; |
| 5822 REGEX_ASSERT(groupLen == 0); |
| 5823 REGEX_ASSERT(matcher.start(2, status) == -1); |
| 5824 REGEX_CHECK_STATUS; |
| 5825 } |
| 5826 |
| 5827 |
| 5417 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ | 5828 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
| 5418 | |
| OLD | NEW |