source/test/intltest/regextst.cpp - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/test/intltest/regextst.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 2002-2014, International Business Machines Corporation and	3 * Copyright (c) 2002-2015, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6	6

7 //	7 //

8 // regextst.cpp	8 // regextst.cpp

9 //	9 //

10 // ICU Regular Expressions test, part of intltest.	10 // ICU Regular Expressions test, part of intltest.

11 //	11 //

12	12

13 /*	13 /*

14 NOTE!!	14 NOTE!!

15	15

16 PLEASE be careful about ASCII assumptions in this test.	16 PLEASE be careful about ASCII assumptions in this test.

17 This test is one of the worst repeat offenders.	17 This test is one of the worst repeat offenders.

18 If you have questions, contact someone on the ICU PMC	18 If you have questions, contact someone on the ICU PMC

19 who has access to an EBCDIC system.	19 who has access to an EBCDIC system.

20	20

21 */	21 */

22	22

23 #include "intltest.h"	23 #include "intltest.h"

24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

25	25

	26 #include <stdlib.h>

	27 #include <stdio.h>

	28 #include <string.h>

	29

26 #include "unicode/localpointer.h"	30 #include "unicode/localpointer.h"

27 #include "unicode/regex.h"	31 #include "unicode/regex.h"

28 #include "unicode/uchar.h"	32 #include "unicode/uchar.h"

29 #include "unicode/ucnv.h"	33 #include "unicode/ucnv.h"

30 #include "unicode/uniset.h"	34 #include "unicode/uniset.h"

31 #include "unicode/uregex.h"	35 #include "unicode/uregex.h"

32 #include "unicode/usetiter.h"	36 #include "unicode/usetiter.h"

33 #include "unicode/ustring.h"	37 #include "unicode/ustring.h"

	38 #include "unicode/utext.h"

	39

34 #include "regextst.h"	40 #include "regextst.h"

35 #include "regexcmp.h"	41 #include "regexcmp.h"

36 #include "uvector.h"	42 #include "uvector.h"

37 #include "util.h"	43 #include "util.h"

38 #include <stdlib.h>	44 #include "cmemory.h"

39 #include <string.h>

40 #include <stdio.h>

41 #include "cstring.h"	45 #include "cstring.h"

42 #include "uinvchar.h"	46 #include "uinvchar.h"

43	47

44 #define SUPPORT_MUTATING_INPUT_STRING 0	48 #define SUPPORT_MUTATING_INPUT_STRING 0

45	49

46 //---------------------------------------------------------------------------	50 //---------------------------------------------------------------------------

47 //	51 //

48 // Test class boilerplate	52 // Test class boilerplate

49 //	53 //

50 //---------------------------------------------------------------------------	54 //---------------------------------------------------------------------------

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
140 break;	144 break;

141 case 23: name = "TestCaseInsensitiveStarters";	145 case 23: name = "TestCaseInsensitiveStarters";

142 if (exec) TestCaseInsensitiveStarters();	146 if (exec) TestCaseInsensitiveStarters();

143 break;	147 break;

144 case 24: name = "TestBug11049";	148 case 24: name = "TestBug11049";

145 if (exec) TestBug11049();	149 if (exec) TestBug11049();

146 break;	150 break;

147 case 25: name = "TestBug11371";	151 case 25: name = "TestBug11371";

148 if (exec) TestBug11371();	152 if (exec) TestBug11371();

149 break;	153 break;

	154 case 26: name = "TestBug11480";

	155 if (exec) TestBug11480();

	156 break;

	157 case 27: name = "NamedCapture";

	158 if (exec) NamedCapture();

	159 break;

	160 case 28: name = "NamedCaptureLimits";

	161 if (exec) NamedCaptureLimits();

	162 break;

150 default: name = "";	163 default: name = "";

151 break; //needed to end loop	164 break; //needed to end loop

152 }	165 }

153 }	166 }

154	167

155	168

156	169

157 /**	170 /**

158 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage	171 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage

159 * into ASCII.	172 * into ASCII.

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
232 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\	245 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\

233 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \	246 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \

234 __LINE__, u_errorName(errcode), u_errorName(status));};}	247 __LINE__, u_errorName(errcode), u_errorName(status));};}

235	248

236 #define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \	249 #define REGEX_CHECK_STATUS_L(line) {if (U_FAILURE(status)) {errln( \

237 "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), stat us); }}	250 "RegexTest failure at line %d, from %d. status=%d\n",__LINE__, (line), stat us); }}

238	251

239 #define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \	252 #define REGEX_ASSERT_L(expr, line) {if ((expr)==FALSE) { \

240 errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}	253 errln("RegexTest failure at line %d, from %d.", __LINE__, (line)); return;}}

241	254

242 #define REGEX_ASSERT_UNISTR(ustr,inv) {if (!(ustr==inv)) {errln("%s:%d: RegexTes t failure: REGEX_ASSERT_UNISTR(%s,%s) failed \n", __FILE__, __LINE__, extractToA ssertBuf(ustr),inv);};}	255 // expected: const char * , restricted to invariant characters.

	256 // actual: const UnicodeString &

	257 #define REGEX_ASSERT_UNISTR(expected, actual) { \

	258 if (UnicodeString(expected, -1, US_INV) != (actual)) { \

	259 errln("%s:%d: RegexTest failure: REGEX_ASSERT_UNISTR(%s, %s) failed \n", \

	260 __FILE__, __LINE__, expected, extractToAssertBuf(actual));};}

243	261

244	262

245 static UBool testUTextEqual(UText uta, UText utb) {	263 static UBool testUTextEqual(UText uta, UText utb) {

246 UChar32 ca = 0;	264 UChar32 ca = 0;

247 UChar32 cb = 0;	265 UChar32 cb = 0;

248 utext_setNativeIndex(uta, 0);	266 utext_setNativeIndex(uta, 0);

249 utext_setNativeIndex(utb, 0);	267 utext_setNativeIndex(utb, 0);

250 do {	268 do {

251 ca = utext_next32(uta);	269 ca = utext_next32(uta);

252 cb = utext_next32(utb);	270 cb = utext_next32(utb);

(...skipping 1163 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1416 REGEX_CHECK_STATUS;	1434 REGEX_CHECK_STATUS;

1417 dest = matcher2->replaceFirst("$1$1", status);	1435 dest = matcher2->replaceFirst("$1$1", status);

1418 REGEX_CHECK_STATUS;	1436 REGEX_CHECK_STATUS;

1419 REGEX_ASSERT(dest == "bcbcdefg");	1437 REGEX_ASSERT(dest == "bcbcdefg");

1420	1438

1421 dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1 ."), status);	1439 dest = matcher2->replaceFirst(UNICODE_STRING_SIMPLE("The value of \\$1 is $1 ."), status);

1422 REGEX_CHECK_STATUS;	1440 REGEX_CHECK_STATUS;

1423 REGEX_ASSERT(dest == "The value of $1 is bc.defg");	1441 REGEX_ASSERT(dest == "The value of $1 is bc.defg");

1424	1442

1425 dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);	1443 dest = matcher2->replaceFirst("$ by itself, no group number $$$", status);

1426 REGEX_CHECK_STATUS;	1444 REGEX_ASSERT(U_FAILURE(status));

1427 REGEX_ASSERT(dest == "$ by itself, no group number $$$defg");	1445 status = U_ZERO_ERROR;

1428	1446

1429 UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U 0001D7CF.");	1447 UnicodeString replacement = UNICODE_STRING_SIMPLE("Supplemental Digit 1 $\\U 0001D7CF.");

1430 replacement = replacement.unescape();	1448 replacement = replacement.unescape();

1431 dest = matcher2->replaceFirst(replacement, status);	1449 dest = matcher2->replaceFirst(replacement, status);

1432 REGEX_CHECK_STATUS;	1450 REGEX_CHECK_STATUS;

1433 REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg");	1451 REGEX_ASSERT(dest == "Supplemental Digit 1 bc.defg");

1434	1452

1435 REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",st atus), U_INDEX_OUTOFBOUNDS_ERROR);	1453 REGEX_ASSERT_FAIL(matcher2->replaceFirst("bad capture group number $5...",st atus), U_INDEX_OUTOFBOUNDS_ERROR);

1436	1454

1437	1455

(...skipping 605 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2043 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2061 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2044 utext_close(result);	2062 utext_close(result);

2045 result = matcher->group(0, &destText, group_len, status);	2063 result = matcher->group(0, &destText, group_len, status);

2046 REGEX_CHECK_STATUS;	2064 REGEX_CHECK_STATUS;

2047 REGEX_ASSERT(result == &destText);	2065 REGEX_ASSERT(result == &destText);

2048 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2066 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2049 // destText is now immutable, reopen it	2067 // destText is now immutable, reopen it

2050 utext_close(&destText);	2068 utext_close(&destText);

2051 utext_openUnicodeString(&destText, &dest, &status);	2069 utext_openUnicodeString(&destText, &dest, &status);

2052	2070

2053 result = matcher->group(0, NULL, status);	2071 int64_t length;

	2072 result = matcher->group(0, NULL, length, status);

2054 REGEX_CHECK_STATUS;	2073 REGEX_CHECK_STATUS;

2055 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2074 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2056 utext_close(result);	2075 utext_close(result);

2057 result = matcher->group(0, &destText, status);	2076 result = matcher->group(0, &destText, length, status);

2058 REGEX_CHECK_STATUS;	2077 REGEX_CHECK_STATUS;

2059 REGEX_ASSERT(result == &destText);	2078 REGEX_ASSERT(result == &destText);

2060 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2079 REGEX_ASSERT(utext_getNativeIndex(result) == 0);

	2080 REGEX_ASSERT(length == 10);

	2081 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

2061	2082

2062 result = matcher->group(1, NULL, status);	2083 // Capture Group 1 == "234567"

	2084 result = matcher->group(1, NULL, length, status);

2063 REGEX_CHECK_STATUS;	2085 REGEX_CHECK_STATUS;

2064 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */	2086 REGEX_ASSERT(utext_getNativeIndex(result) == 2);

2065 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);	2087 REGEX_ASSERT(length == 6);

	2088 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

2066 utext_close(result);	2089 utext_close(result);

2067 result = matcher->group(1, &destText, status);	2090

	2091 result = matcher->group(1, &destText, length, status);

2068 REGEX_CHECK_STATUS;	2092 REGEX_CHECK_STATUS;

2069 REGEX_ASSERT(result == &destText);	2093 REGEX_ASSERT(result == &destText);

2070 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);	2094 REGEX_ASSERT(utext_getNativeIndex(result) == 2);

	2095 REGEX_ASSERT(length == 6);

	2096 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

	2097 utext_close(result);

2071	2098

2072 result = matcher->group(2, NULL, status);	2099 // Capture Group 2 == "45"

	2100 result = matcher->group(2, NULL, length, status);

2073 REGEX_CHECK_STATUS;	2101 REGEX_CHECK_STATUS;

2074 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */	2102 REGEX_ASSERT(utext_getNativeIndex(result) == 4);

2075 REGEX_ASSERT_UTEXT_UTF8(str_45, result);	2103 REGEX_ASSERT(length == 2);

	2104 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

2076 utext_close(result);	2105 utext_close(result);

2077 result = matcher->group(2, &destText, status);	2106

	2107 result = matcher->group(2, &destText, length, status);

2078 REGEX_CHECK_STATUS;	2108 REGEX_CHECK_STATUS;

2079 REGEX_ASSERT(result == &destText);	2109 REGEX_ASSERT(result == &destText);

2080 REGEX_ASSERT_UTEXT_UTF8(str_45, result);	2110 REGEX_ASSERT(utext_getNativeIndex(result) == 4);

	2111 REGEX_ASSERT(length == 2);

	2112 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

	2113 utext_close(result);

2081	2114

2082 result = matcher->group(3, NULL, status);	2115 // Capture Group 3 == "89"

	2116 result = matcher->group(3, NULL, length, status);

2083 REGEX_CHECK_STATUS;	2117 REGEX_CHECK_STATUS;

2084 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */	2118 REGEX_ASSERT(utext_getNativeIndex(result) == 8);

2085 REGEX_ASSERT_UTEXT_UTF8(str_89, result);	2119 REGEX_ASSERT(length == 2);

	2120 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

2086 utext_close(result);	2121 utext_close(result);

2087 result = matcher->group(3, &destText, status);	2122

	2123 result = matcher->group(3, &destText, length, status);

2088 REGEX_CHECK_STATUS;	2124 REGEX_CHECK_STATUS;

2089 REGEX_ASSERT(result == &destText);	2125 REGEX_ASSERT(result == &destText);

2090 REGEX_ASSERT_UTEXT_UTF8(str_89, result);	2126 REGEX_ASSERT(utext_getNativeIndex(result) == 8);

	2127 REGEX_ASSERT(length == 2);

	2128 REGEX_ASSERT_UTEXT_INVARIANT("0123456789", result);

	2129 utext_close(result);

2091	2130

	2131 // Capture Group number out of range.

	2132 status = U_ZERO_ERROR;

2092 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2133 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

	2134 status = U_ZERO_ERROR;

2093 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2135 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

	2136 status = U_ZERO_ERROR;

2094 matcher->reset();	2137 matcher->reset();

2095 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);	2138 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);

2096	2139

2097 delete matcher;	2140 delete matcher;

2098 delete pat;	2141 delete pat;

2099	2142

2100 utext_close(&destText);	2143 utext_close(&destText);

2101 utext_close(&input);	2144 utext_close(&input);

2102 utext_close(&re);	2145 utext_close(&re);

2103 }	2146 }

(...skipping 491 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2595 REGEX_CHECK_STATUS;	2638 REGEX_CHECK_STATUS;

2596 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * /	2639 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * /

2597 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);	2640 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);

2598 utext_close(result);	2641 utext_close(result);

2599 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2642 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2600 result = matcher2->replaceFirst(&replText, &destText, status);	2643 result = matcher2->replaceFirst(&replText, &destText, status);

2601 REGEX_CHECK_STATUS;	2644 REGEX_CHECK_STATUS;

2602 REGEX_ASSERT(result == &destText);	2645 REGEX_ASSERT(result == &destText);

2603 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);	2646 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);

2604	2647

2605 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6 9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0 x00 }; /* $ by itself, no group number $$$ */	2648 const char str_byitselfnogroupnumber[] = { 0x5c, 0x24, 0x20, 0x62, 0x79, 0x2 0, 0x69, 0x74, 0x73, 0x65, 0x6c,

	2649 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62,

	2650 0x65, 0x72, 0x20, 0x5c, 0x24, 0x5c, 0x24, 0x5c, 0x24, 0x00 }; /* \$ by itself, no group number \$\$\$ */

2606 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);	2651 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);

2607 result = matcher2->replaceFirst(&replText, NULL, status);	2652 result = matcher2->replaceFirst(&replText, NULL, status);

2608 REGEX_CHECK_STATUS;	2653 REGEX_CHECK_STATUS;

2609 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */	2654 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */

2610 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);	2655 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);

2611 utext_close(result);	2656 utext_close(result);

2612 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2657 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2613 result = matcher2->replaceFirst(&replText, &destText, status);	2658 result = matcher2->replaceFirst(&replText, &destText, status);

2614 REGEX_CHECK_STATUS;	2659 REGEX_CHECK_STATUS;

2615 REGEX_ASSERT(result == &destText);	2660 REGEX_ASSERT(result == &destText);

(...skipping 446 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3062 REGEX_ASSERT(n==5);	3107 REGEX_ASSERT(n==5);

3063 REGEX_ASSERT(fields[0]=="1");	3108 REGEX_ASSERT(fields[0]=="1");

3064 REGEX_ASSERT(fields[1]=="-");	3109 REGEX_ASSERT(fields[1]=="-");

3065 REGEX_ASSERT(fields[2]=="10");	3110 REGEX_ASSERT(fields[2]=="10");

3066 REGEX_ASSERT(fields[3]==",");	3111 REGEX_ASSERT(fields[3]==",");

3067 REGEX_ASSERT(fields[4]=="20");	3112 REGEX_ASSERT(fields[4]=="20");

3068 delete pat1;	3113 delete pat1;

3069	3114

3070	3115

3071 //	3116 //

	3117 // split of a UText based string, with library allocating output UTexts.

	3118 //

	3119 {

	3120 status = U_ZERO_ERROR;

	3121 RegexMatcher matcher(UnicodeString("(:)"), 0, status);

	3122 UnicodeString stringToSplit("first:second:third");

	3123 UText *textToSplit = utext_openUnicodeString(NULL, &stringToSplit, &stat us);

	3124 REGEX_CHECK_STATUS;

	3125

	3126 UText *splits[10] = {NULL};

	3127 int32_t numFields = matcher.split(textToSplit, splits, UPRV_LENGTHOF(spl its), status);

	3128 REGEX_CHECK_STATUS;

	3129 REGEX_ASSERT(numFields == 5);

	3130 REGEX_ASSERT_UTEXT_INVARIANT("first", splits[0]);

	3131 REGEX_ASSERT_UTEXT_INVARIANT(":", splits[1]);

	3132 REGEX_ASSERT_UTEXT_INVARIANT("second", splits[2]);

	3133 REGEX_ASSERT_UTEXT_INVARIANT(":", splits[3]);

	3134 REGEX_ASSERT_UTEXT_INVARIANT("third", splits[4]);

	3135 REGEX_ASSERT(splits[5] == NULL);

	3136

	3137 for (int i=0; i<UPRV_LENGTHOF(splits); i++) {

	3138 if (splits[i]) {

	3139 utext_close(splits[i]);

	3140 splits[i] = NULL;

	3141 }

	3142 }

	3143 utext_close(textToSplit);

	3144 }

	3145

	3146

	3147 //

3072 // RegexPattern::pattern() and patternText()	3148 // RegexPattern::pattern() and patternText()

3073 //	3149 //

3074 pat1 = new RegexPattern();	3150 pat1 = new RegexPattern();

3075 REGEX_ASSERT(pat1->pattern() == "");	3151 REGEX_ASSERT(pat1->pattern() == "");

3076 REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));	3152 REGEX_ASSERT_UTEXT_UTF8("", pat1->patternText(status));

3077 delete pat1;	3153 delete pat1;

3078 const char helloWorldInvariant = "(Hello, world)";	3154 const char helloWorldInvariant = "(Hello, world)";

3079 regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);	3155 regextst_openUTF8FromInvariant(&re1, helloWorldInvariant, -1, &status);

3080 pat1 = RegexPattern::compile(&re1, pe, status);	3156 pat1 = RegexPattern::compile(&re1, pe, status);

3081 REGEX_CHECK_STATUS;	3157 REGEX_CHECK_STATUS;

3082 REGEX_ASSERT_UNISTR(pat1->pattern(),"(Hello, world)*");	3158 REGEX_ASSERT_UNISTR("(Hello, world)*", pat1->pattern());

3083 REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));	3159 REGEX_ASSERT_UTEXT_INVARIANT("(Hello, world)*", pat1->patternText(status));

3084 delete pat1;	3160 delete pat1;

3085	3161

3086 utext_close(&re1);	3162 utext_close(&re1);

3087 }	3163 }

3088	3164

3089	3165

3090 //---------------------------------------------------------------------------	3166 //---------------------------------------------------------------------------

3091 //	3167 //

3092 // Extended A more thorough check for features of regex patterns	3168 // Extended A more thorough check for features of regex patterns

(...skipping 684 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3777 REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);	3853 REGEX_ERR("abc\ndef(*2)", 2, 5, U_REGEX_RULE_SYNTAX);

3778 REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);	3854 REGEX_ERR("abc**", 1, 5, U_REGEX_RULE_SYNTAX);

3779	3855

3780 // Mal-formed {min,max} quantifiers	3856 // Mal-formed {min,max} quantifiers

3781 REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL);	3857 REGEX_ERR("abc{a,2}",1,5, U_REGEX_BAD_INTERVAL);

3782 REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN);	3858 REGEX_ERR("abc{4,2}",1,8, U_REGEX_MAX_LT_MIN);

3783 REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL);	3859 REGEX_ERR("abc{1,b}",1,7, U_REGEX_BAD_INTERVAL);

3784 REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);	3860 REGEX_ERR("abc{1,,2}",1,7, U_REGEX_BAD_INTERVAL);

3785 REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);	3861 REGEX_ERR("abc{1,2a}",1,8, U_REGEX_BAD_INTERVAL);

3786 REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);	3862 REGEX_ERR("abc{222222222222222222222}",1,14, U_REGEX_NUMBER_TOO_BIG);

3787 REGEX_ERR("abc{5,50000000000}", 1, 17, U_REGEX_NUMBER_TOO_BIG); // Ov erflows int during scan	3863 REGEX_ERR("abc{5,50000000000}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov erflows int during scan

3788 REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov erflows regex binary format	3864 REGEX_ERR("abc{5,687865858}", 1, 16, U_REGEX_NUMBER_TOO_BIG); // Ov erflows regex binary format

3789 REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);	3865 REGEX_ERR("abc{687865858,687865859}", 1, 24, U_REGEX_NUMBER_TOO_BIG);

3790	3866

3791 // Ticket 5389	3867 // Ticket 5389

3792 REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);	3868 REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX);

3793	3869

3794 // Invalid Back Reference \0	3870 // Invalid Back Reference \0

3795 // For ICU 3.8 and earlier	3871 // For ICU 3.8 and earlier

3796 // For ICU versions newer than 3.8, \0 introduces an octal escape.	3872 // For ICU versions newer than 3.8, \0 introduces an octal escape.

3797 //	3873 //

(...skipping 1001 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4799 REGEX_ASSERT(cbInfo.numCalls > 0);	4875 REGEX_ASSERT(cbInfo.numCalls > 0);

4800	4876

4801 // A longer running match that the callback function will abort.	4877 // A longer running match that the callback function will abort.

4802 status = U_ZERO_ERROR;	4878 status = U_ZERO_ERROR;

4803 cbInfo.reset(4);	4879 cbInfo.reset(4);

4804 s = "aaaaaaaaaaaaaaaaaaaaaaab";	4880 s = "aaaaaaaaaaaaaaaaaaaaaaab";

4805 matcher.reset(s);	4881 matcher.reset(s);

4806 REGEX_ASSERT(matcher.matches(status)==FALSE);	4882 REGEX_ASSERT(matcher.matches(status)==FALSE);

4807 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);	4883 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

4808 REGEX_ASSERT(cbInfo.numCalls == 4);	4884 REGEX_ASSERT(cbInfo.numCalls == 4);

	4885

	4886 // A longer running find that the callback function will abort.

	4887 status = U_ZERO_ERROR;

	4888 cbInfo.reset(4);

	4889 s = "aaaaaaaaaaaaaaaaaaaaaaab";

	4890 matcher.reset(s);

	4891 REGEX_ASSERT(matcher.find(status)==FALSE);

	4892 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

	4893 REGEX_ASSERT(cbInfo.numCalls == 4);

4809 }	4894 }

4810	4895

4811	4896

4812 }	4897 }

4813	4898

4814	4899

4815 //	4900 //

4816 // FindProgressCallbacks() Test the find "progress" callback function.	4901 // FindProgressCallbacks() Test the find "progress" callback function.

4817 // When set, the find progress callback will be invoked during a find operations	4902 // When set, the find progress callback will be invoked during a find operations

4818 // after each return from a match attempt, giving the applicati on the opportunity	4903 // after each return from a match attempt, giving the applicati on the opportunity

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4988 utext_close(&text2);	5073 utext_close(&text2);

4989 }	5074 }

4990	5075

4991 /*	5076 /*

4992 * group()	5077 * group()

4993 */	5078 */

4994 {	5079 {

4995 UChar text1[80];	5080 UChar text1[80];

4996 UText *actual;	5081 UText *actual;

4997 UBool result;	5082 UBool result;

4998 u_uastrncpy(text1, "noise abc interior def, and this is off the end", s izeof(text1)/2);	5083 int64_t length = 0;

	5084

	5085 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U PRV_LENGTHOF(text1));

	5086 // 012345678901234567890123456789012345678901234567

	5087 // 0 1 2 3 4

4999	5088

5000 status = U_ZERO_ERROR;	5089 status = U_ZERO_ERROR;

5001 re = uregex_openC("abc(.*?)def", 0, NULL, &status);	5090 re = uregex_openC("abc(.*?)def", 0, NULL, &status);

5002 REGEX_CHECK_STATUS;	5091 REGEX_CHECK_STATUS;

5003	5092

5004 uregex_setText(re, text1, -1, &status);	5093 uregex_setText(re, text1, -1, &status);

5005 result = uregex_find(re, 0, &status);	5094 result = uregex_find(re, 0, &status);

5006 REGEX_ASSERT(result==TRUE);	5095 REGEX_ASSERT(result==TRUE);

5007	5096

5008 /* Capture Group 0, the full match. Should succeed. */	5097 /* Capture Group 0, the full match. Should succeed. "abc interior def" */

5009 status = U_ZERO_ERROR;	5098 status = U_ZERO_ERROR;

5010 actual = uregex_groupUTextDeep(re, 0, &bufferText, &status);	5099 actual = uregex_groupUText(re, 0, &bufferText, &length, &status);

5011 REGEX_CHECK_STATUS;	5100 REGEX_CHECK_STATUS;

5012 REGEX_ASSERT(actual == &bufferText);	5101 REGEX_ASSERT(actual == &bufferText);

5013 REGEX_ASSERT_UTEXT_INVARIANT("abc interior def", actual);	5102 REGEX_ASSERT(utext_getNativeIndex(actual) == 6);

	5103 REGEX_ASSERT(length == 16);

	5104 REGEX_ASSERT(utext_nativeLength(actual) == 47);

5014	5105

5015 /* Capture group #1. Should succeed. */	5106 /* Capture group #1. Should succeed, matching " interior ". */

5016 status = U_ZERO_ERROR;	5107 status = U_ZERO_ERROR;

5017 actual = uregex_groupUTextDeep(re, 1, &bufferText, &status);	5108 actual = uregex_groupUText(re, 1, &bufferText, &length, &status);

5018 REGEX_CHECK_STATUS;	5109 REGEX_CHECK_STATUS;

5019 REGEX_ASSERT(actual == &bufferText);	5110 REGEX_ASSERT(actual == &bufferText);

5020 REGEX_ASSERT_UTEXT_INVARIANT(" interior ", actual);	5111 REGEX_ASSERT(utext_getNativeIndex(actual) == 9); // position of " inte rior "

	5112 REGEX_ASSERT(length == 10);

	5113 REGEX_ASSERT(utext_nativeLength(actual) == 47);

5021	5114

5022 /* Capture group out of range. Error. */	5115 /* Capture group out of range. Error. */

5023 status = U_ZERO_ERROR;	5116 status = U_ZERO_ERROR;

5024 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);	5117 actual = uregex_groupUText(re, 2, &bufferText, &length, &status);

5025 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);	5118 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

5026 REGEX_ASSERT(actual == &bufferText);	5119 REGEX_ASSERT(actual == &bufferText);

5027

5028 uregex_close(re);	5120 uregex_close(re);

5029	5121

5030 }	5122 }

5031	5123

5032 /*	5124 /*

5033 * replaceFirst()	5125 * replaceFirst()

5034 */	5126 */

5035 {	5127 {

5036 UChar text1[80];	5128 UChar text1[80];

5037 UChar text2[80];	5129 UChar text2[80];

5038 UText replText = UTEXT_INITIALIZER;	5130 UText replText = UTEXT_INITIALIZER;

5039 UText *result;	5131 UText *result;

	5132 status = U_ZERO_ERROR;

	5133 utext_openUnicodeString(&bufferText, &buffer, &status);

5040	5134

5041 status = U_ZERO_ERROR;	5135 status = U_ZERO_ERROR;

5042 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);	5136 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));

5043 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);	5137 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)/2);

5044 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);	5138 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);

5045	5139

5046 re = uregex_openC("x(.*?)x", 0, NULL, &status);	5140 re = uregex_openC("x(.*?)x", 0, NULL, &status);

5047 REGEX_CHECK_STATUS;	5141 REGEX_CHECK_STATUS;

5048	5142

5049 /* Normal case, with match */	5143 /* Normal case, with match */

5050 uregex_setText(re, text1, -1, &status);	5144 uregex_setText(re, text1, -1, &status);

	5145 REGEX_CHECK_STATUS;

5051 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5146 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

	5147 REGEX_CHECK_STATUS;

5052 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5148 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5053 REGEX_CHECK_STATUS;	5149 REGEX_CHECK_STATUS;

5054 REGEX_ASSERT(result == &bufferText);	5150 REGEX_ASSERT(result == &bufferText);

5055 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);	5151 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);

5056	5152

5057 /* No match. Text should copy to output with no changes. */	5153 /* No match. Text should copy to output with no changes. */

5058 uregex_setText(re, text2, -1, &status);	5154 uregex_setText(re, text2, -1, &status);

5059 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5155 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

5060 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5156 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5061 REGEX_CHECK_STATUS;	5157 REGEX_CHECK_STATUS;

5062 REGEX_ASSERT(result == &bufferText);	5158 REGEX_ASSERT(result == &bufferText);

5063 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);	5159 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);

5064	5160

5065 /* Unicode escapes */	5161 /* Unicode escapes */

5066 uregex_setText(re, text1, -1, &status);	5162 uregex_setText(re, text1, -1, &status);

5067 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a" , -1, &status);	5163 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042\\$\\ a", -1, &status);

5068 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5164 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

5069 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5165 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5070 REGEX_CHECK_STATUS;	5166 REGEX_CHECK_STATUS;

5071 REGEX_ASSERT(result == &bufferText);	5167 REGEX_ASSERT(result == &bufferText);

5072 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);	5168 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);

5073	5169

5074 uregex_close(re);	5170 uregex_close(re);

5075 utext_close(&replText);	5171 utext_close(&replText);

5076 }	5172 }

5077	5173

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5116	5212

5117 /*	5213 /*

5118 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts,	5214 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts,

5119 * so we don't need to test it here.	5215 * so we don't need to test it here.

5120 */	5216 */

5121	5217

5122 utext_close(&bufferText);	5218 utext_close(&bufferText);

5123 utext_close(&patternText);	5219 utext_close(&patternText);

5124 }	5220 }

5125	5221

	5222

	5223 //--------------------------------------------------------------

	5224 //

	5225 // NamedCapture Check basic named capture group functionality

	5226 //

	5227 //--------------------------------------------------------------

	5228 void RegexTest::NamedCapture() {

	5229 UErrorCode status = U_ZERO_ERROR;

	5230 RegexPattern *pat = RegexPattern::compile(UnicodeString(

	5231 "abc()()(?<three>xyz)(de)(?<five>hmm)(?<six>oh)f\\k<five>"), 0, stat us);

	5232 REGEX_CHECK_STATUS;

	5233 int32_t group = pat->groupNumberFromName("five", -1, status);

	5234 REGEX_CHECK_STATUS;

	5235 REGEX_ASSERT(5 == group);

	5236 group = pat->groupNumberFromName("three", -1, status);

	5237 REGEX_CHECK_STATUS;

	5238 REGEX_ASSERT(3 == group);

	5239

	5240 status = U_ZERO_ERROR;

	5241 group = pat->groupNumberFromName(UnicodeString("six"), status);

	5242 REGEX_CHECK_STATUS;

	5243 REGEX_ASSERT(6 == group);

	5244

	5245 status = U_ZERO_ERROR;

	5246 group = pat->groupNumberFromName(UnicodeString("nosuch"), status);

	5247 U_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5248

	5249 status = U_ZERO_ERROR;

	5250

	5251 // After copying a pattern, named capture should still work in the copy.

	5252 RegexPattern copiedPat = new RegexPattern(pat);

	5253 REGEX_ASSERT(copiedPat == pat);

	5254 delete pat; pat = NULL; // Delete original, copy should have no references back to it.

	5255

	5256 group = copiedPat->groupNumberFromName("five", -1, status);

	5257 REGEX_CHECK_STATUS;

	5258 REGEX_ASSERT(5 == group);

	5259 group = copiedPat->groupNumberFromName("three", -1, status);

	5260 REGEX_CHECK_STATUS;

	5261 REGEX_ASSERT(3 == group);

	5262 delete copiedPat;

	5263

	5264 // ReplaceAll with named capture group.

	5265 status = U_ZERO_ERROR;

	5266 UnicodeString text("Substitution of <<quotes>> for <<double brackets>>");

	5267 RegexMatcher *m = new RegexMatcher(UnicodeString("<<(?<mid>.+?)>>"), text, 0 , status);

	5268 REGEX_CHECK_STATUS;

	5269 // m.pattern().dumpPattern();

	5270 UnicodeString replacedText = m->replaceAll("'${mid}'", status);

	5271 REGEX_CHECK_STATUS;

	5272 REGEX_ASSERT(UnicodeString("Substitution of 'quotes' for 'double brackets'") == replacedText);

	5273 delete m;

	5274

	5275 // ReplaceAll, allowed capture group numbers.

	5276 text = UnicodeString("abcmxyz");

	5277 m = new RegexMatcher(UnicodeString("..(?<one>m)(.)(.)"), text, 0, status);

	5278 REGEX_CHECK_STATUS;

	5279

	5280 status = U_ZERO_ERROR;

	5281 replacedText = m->replaceAll(UnicodeString("<$0>"), status); // group 0, full match, is allowed.

	5282 REGEX_CHECK_STATUS;

	5283 REGEX_ASSERT(UnicodeString("a<bcmxy>z") == replacedText);

	5284

	5285 status = U_ZERO_ERROR;

	5286 replacedText = m->replaceAll(UnicodeString("<$1>"), status); // group 1 by number.

	5287 REGEX_CHECK_STATUS;

	5288 REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);

	5289

	5290 status = U_ZERO_ERROR;

	5291 replacedText = m->replaceAll(UnicodeString("<${one}>"), status); // group 1 by name.

	5292 REGEX_CHECK_STATUS;

	5293 REGEX_ASSERT(UnicodeString("a<m>z") == replacedText);

	5294

	5295 status = U_ZERO_ERROR;

	5296 replacedText = m->replaceAll(UnicodeString("<$2>"), status); // group 2.

	5297 REGEX_CHECK_STATUS;

	5298 REGEX_ASSERT(UnicodeString("a<x>z") == replacedText);

	5299

	5300 status = U_ZERO_ERROR;

	5301 replacedText = m->replaceAll(UnicodeString("<$3>"), status);

	5302 REGEX_CHECK_STATUS;

	5303 REGEX_ASSERT(UnicodeString("a<y>z") == replacedText);

	5304

	5305 status = U_ZERO_ERROR;

	5306 replacedText = m->replaceAll(UnicodeString("<$4>"), status);

	5307 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

	5308

	5309 status = U_ZERO_ERROR;

	5310 replacedText = m->replaceAll(UnicodeString("<$04>"), status); // group 0, leading 0,

	5311 REGEX_CHECK_STATUS; // tr ailing out-of-range 4 passes through.

	5312 REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == replacedText);

	5313

	5314 status = U_ZERO_ERROR;

	5315 replacedText = m->replaceAll(UnicodeString("<$000016>"), status); // Consu me leading zeroes. Don't consume digits

	5316 REGEX_CHECK_STATUS; // tha t push group num out of range.

	5317 REGEX_ASSERT(UnicodeString("a<m6>z") == replacedText); // Thi s is group 1.

	5318

	5319 status = U_ZERO_ERROR;

	5320 replacedText = m->replaceAll(UnicodeString("<$3$2$1${one}>"), status);

	5321 REGEX_CHECK_STATUS;

	5322 REGEX_ASSERT(UnicodeString("a<yxmm>z") == replacedText);

	5323

	5324 status = U_ZERO_ERROR;

	5325 replacedText = m->replaceAll(UnicodeString("$3$2$1${one}"), status);

	5326 REGEX_CHECK_STATUS;

	5327 REGEX_ASSERT(UnicodeString("ayxmmz") == replacedText);

	5328

	5329 status = U_ZERO_ERROR;

	5330 replacedText = m->replaceAll(UnicodeString("<${noSuchName}>"), status);

	5331 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5332

	5333 status = U_ZERO_ERROR;

	5334 replacedText = m->replaceAll(UnicodeString("<${invalid-name}>"), status);

	5335 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5336

	5337 status = U_ZERO_ERROR;

	5338 replacedText = m->replaceAll(UnicodeString("<${one"), status);

	5339 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5340

	5341 status = U_ZERO_ERROR;

	5342 replacedText = m->replaceAll(UnicodeString("$not a capture group"), status) ;

	5343 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5344

	5345 delete m;

	5346

	5347 // Repeat the above replaceAll() tests using the plain C API, which

	5348 // has a separate implementation internally.

	5349 // TODO: factor out the test data.

	5350

	5351 status = U_ZERO_ERROR;

	5352 URegularExpression *re = uregex_openC("..(?<one>m)(.)(.)", 0, NULL, &status) ;

	5353 REGEX_CHECK_STATUS;

	5354 text = UnicodeString("abcmxyz");

	5355 uregex_setText(re, text.getBuffer(), text.length(), &status);

	5356 REGEX_CHECK_STATUS;

	5357

	5358 UChar resultBuf[100];

	5359 int32_t resultLength;

	5360 UnicodeString repl;

	5361

	5362 status = U_ZERO_ERROR;

	5363 repl = UnicodeString("<$0>");

	5364 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5365 REGEX_CHECK_STATUS;

	5366 REGEX_ASSERT(UnicodeString("a<bcmxy>z") == UnicodeString(resultBuf, resultLe ngth));

	5367

	5368 status = U_ZERO_ERROR;

	5369 repl = UnicodeString("<$1>");

	5370 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5371 REGEX_CHECK_STATUS;

	5372 REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength ));

	5373

	5374 status = U_ZERO_ERROR;

	5375 repl = UnicodeString("<${one}>");

	5376 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5377 REGEX_CHECK_STATUS;

	5378 REGEX_ASSERT(UnicodeString("a<m>z") == UnicodeString(resultBuf, resultLength ));

	5379

	5380 status = U_ZERO_ERROR;

	5381 repl = UnicodeString("<$2>");

	5382 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5383 REGEX_CHECK_STATUS;

	5384 REGEX_ASSERT(UnicodeString("a<x>z") == UnicodeString(resultBuf, resultLength ));

	5385

	5386 status = U_ZERO_ERROR;

	5387 repl = UnicodeString("<$3>");

	5388 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5389 REGEX_CHECK_STATUS;

	5390 REGEX_ASSERT(UnicodeString("a<y>z") == UnicodeString(resultBuf, resultLength ));

	5391

	5392 status = U_ZERO_ERROR;

	5393 repl = UnicodeString("<$4>");

	5394 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5395 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

	5396

	5397 status = U_ZERO_ERROR;

	5398 repl = UnicodeString("<$04>");

	5399 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5400 REGEX_CHECK_STATUS;

	5401 REGEX_ASSERT(UnicodeString("a<bcmxy4>z") == UnicodeString(resultBuf, resultL ength));

	5402

	5403 status = U_ZERO_ERROR;

	5404 repl = UnicodeString("<$000016>");

	5405 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5406 REGEX_CHECK_STATUS;

	5407 REGEX_ASSERT(UnicodeString("a<m6>z") == UnicodeString(resultBuf, resultLengt h));

	5408

	5409 status = U_ZERO_ERROR;

	5410 repl = UnicodeString("<$3$2$1${one}>");

	5411 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5412 REGEX_CHECK_STATUS;

	5413 REGEX_ASSERT(UnicodeString("a<yxmm>z") == UnicodeString(resultBuf, resultLen gth));

	5414

	5415 status = U_ZERO_ERROR;

	5416 repl = UnicodeString("$3$2$1${one}");

	5417 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5418 REGEX_CHECK_STATUS;

	5419 REGEX_ASSERT(UnicodeString("ayxmmz") == UnicodeString(resultBuf, resultLengt h));

	5420

	5421 status = U_ZERO_ERROR;

	5422 repl = UnicodeString("<${noSuchName}>");

	5423 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5424 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5425

	5426 status = U_ZERO_ERROR;

	5427 repl = UnicodeString("<${invalid-name}>");

	5428 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5429 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5430

	5431 status = U_ZERO_ERROR;

	5432 repl = UnicodeString("<${one");

	5433 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5434 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5435

	5436 status = U_ZERO_ERROR;

	5437 repl = UnicodeString("$not a capture group");

	5438 resultLength = uregex_replaceAll(re, repl.getBuffer(), repl.length(), result Buf, UPRV_LENGTHOF(resultBuf), &status);

	5439 REGEX_ASSERT(status == U_REGEX_INVALID_CAPTURE_GROUP_NAME);

	5440

	5441 uregex_close(re);

	5442 }

	5443

	5444 //--------------------------------------------------------------

	5445 //

	5446 // NamedCaptureLimits Patterns with huge numbers of named capture groups.

	5447 // The point is not so much what the exact limit is,

	5448 // but that a largish number doesn't hit bad non-linear pe rformance,

	5449 // and that exceeding the limit fails cleanly.

	5450 //

	5451 //--------------------------------------------------------------

	5452 void RegexTest::NamedCaptureLimits() {

	5453 if (quick) {

	5454 logln("Skipping test. Runs in exhuastive mode only.");

	5455 return;

	5456 }

	5457 const int32_t goodLimit = 1000000; // Pattern w this many groups builds successfully.

	5458 const int32_t failLimit = 10000000; // Pattern exceeds internal limits, f ails to compile.

	5459 char nnbuf[100];

	5460 UnicodeString pattern;

	5461 int32_t nn;

	5462

	5463 for (nn=1; nn<goodLimit; nn++) {

	5464 sprintf(nnbuf, "(?<nn%d>)", nn);

	5465 pattern.append(UnicodeString(nnbuf, -1, US_INV));

	5466 }

	5467 UErrorCode status = U_ZERO_ERROR;

	5468 RegexPattern *pat = RegexPattern::compile(pattern, 0, status);

	5469 REGEX_CHECK_STATUS;

	5470 for (nn=1; nn<goodLimit; nn++) {

	5471 sprintf(nnbuf, "nn%d", nn);

	5472 int32_t groupNum = pat->groupNumberFromName(nnbuf, -1, status);

	5473 REGEX_ASSERT(nn == groupNum);

	5474 if (nn != groupNum) {

	5475 break;

	5476 }

	5477 }

	5478 delete pat;

	5479

	5480 pattern.remove();

	5481 for (nn=1; nn<failLimit; nn++) {

	5482 sprintf(nnbuf, "(?<nn%d>)", nn);

	5483 pattern.append(UnicodeString(nnbuf, -1, US_INV));

	5484 }

	5485 status = U_ZERO_ERROR;

	5486 pat = RegexPattern::compile(pattern, 0, status);

	5487 REGEX_ASSERT(status == U_REGEX_PATTERN_TOO_BIG);

	5488 delete pat;

	5489 }

	5490

	5491

5126 //--------------------------------------------------------------	5492 //--------------------------------------------------------------

5127 //	5493 //

5128 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher .	5494 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher .

5129 //	5495 //

5130 //---------------------------------------------------------------	5496 //---------------------------------------------------------------

5131 void RegexTest::Bug7651() {	5497 void RegexTest::Bug7651() {

5132 UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\ u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*\|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z 0-9_]+(?:\\/[\\w-]+)?\|(https?\\:\\/\\/\|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\ uFFFF])\|\\$[A-Za-z]+)");	5498 UnicodeString pattern1("((?<![A-Za-z0-9])[#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\ u00c0-\\u00d6\\u00c8-\\u00f6\\u00f8-\\u00ff]*\|(?<![A-Za-z0-9_])[@\\uff20][A-Za-z 0-9_]+(?:\\/[\\w-]+)?\|(https?\\:\\/\\/\|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u0080-\\ uFFFF])\|\\$[A-Za-z]+)");

5133 // The following should exceed the default operator stack depth in the matc her, i.e. force the matcher to malloc instead of using fSmallData.	5499 // The following should exceed the default operator stack depth in the matc her, i.e. force the matcher to malloc instead of using fSmallData.

5134 // It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allo cation.	5500 // It will cause a segfault if RegexMatcher tries to use fSmallData instead of malloc'ing the memory needed (see init2) for the pattern operator stack allo cation.

5135 UnicodeString pattern2("((https?\\:\\/\\/\|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u 0080-\\uFFFF])\|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?\|(?<![ A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u0 0f8-\\u00ff]*\|\\$[A-Za-z]+)");	5501 UnicodeString pattern2("((https?\\:\\/\\/\|www\\.)\\S+(?<![\\!\\),\\.:;\\]\\u 0080-\\uFFFF])\|(?<![A-Za-z0-9_])[\\@\\uff20][A-Za-z0-9_]+(?:\\/[\\w\\-]+)?\|(?<![ A-Za-z0-9])[\\#\\uff03][A-Za-z0-9_][A-Za-z0-9_\\u00c0-\\u00d6\\u00c8-\\u00f6\\u0 0f8-\\u00ff]*\|\\$[A-Za-z]+)");

(...skipping 271 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5407 patternString.append(UnicodeString("stuff and things dont you know, thes e are a few of my favorite strings\n"));	5773 patternString.append(UnicodeString("stuff and things dont you know, thes e are a few of my favorite strings\n"));

5408 }	5774 }

5409 patternString.append(UnicodeString("X? trailing string"));	5775 patternString.append(UnicodeString("X? trailing string"));

5410 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));	5776 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));

5411 if (status != U_REGEX_PATTERN_TOO_BIG) {	5777 if (status != U_REGEX_PATTERN_TOO_BIG) {

5412 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",	5778 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",

5413 __FILE__, __LINE__, u_errorName(status));	5779 __FILE__, __LINE__, u_errorName(status));

5414 }	5780 }

5415 }	5781 }

5416	5782

	5783 void RegexTest::TestBug11480() {

	5784 // C API, get capture group of a group that does not participate in the matc h.

	5785 // (Returns a zero length string, with nul termination,

	5786 // indistinguishable from a group with a zero length match.)

	5787

	5788 UErrorCode status = U_ZERO_ERROR;

	5789 URegularExpression *re = uregex_openC("(A)\|(B)", 0, NULL, &status);

	5790 REGEX_CHECK_STATUS;

	5791 UnicodeString text = UNICODE_STRING_SIMPLE("A");

	5792 uregex_setText(re, text.getBuffer(), text.length(), &status);

	5793 REGEX_CHECK_STATUS;

	5794 REGEX_ASSERT(uregex_lookingAt(re, 0, &status));

	5795 UChar buf[10] = {(UChar)13, (UChar)13, (UChar)13, (UChar)13};

	5796 int32_t length = uregex_group(re, 2, buf+1, UPRV_LENGTHOF(buf)-1, &status);

	5797 REGEX_ASSERT(length == 0);

	5798 REGEX_ASSERT(buf[0] == 13);

	5799 REGEX_ASSERT(buf[1] == 0);

	5800 REGEX_ASSERT(buf[2] == 13);

	5801 uregex_close(re);

	5802

	5803 // UText C++ API, length of match is 0 for non-participating matches.

	5804 UText ut = UTEXT_INITIALIZER;

	5805 utext_openUnicodeString(&ut, &text, &status);

	5806 RegexMatcher matcher(UnicodeString("(A)\|(B)"), 0, status);

	5807 REGEX_CHECK_STATUS;

	5808 matcher.reset(&ut);

	5809 REGEX_ASSERT(matcher.lookingAt(0, status));

	5810

	5811 // UText C++ API, Capture group 1 matches "A", position 0, length 1.

	5812 int64_t groupLen = -666;

	5813 UText group = UTEXT_INITIALIZER;

	5814 matcher.group(1, &group, groupLen, status);

	5815 REGEX_CHECK_STATUS;

	5816 REGEX_ASSERT(groupLen == 1);

	5817 REGEX_ASSERT(utext_getNativeIndex(&group) == 0);

	5818

	5819 // Capture group 2, the (B), does not participate in the match.

	5820 matcher.group(2, &group, groupLen, status);

	5821 REGEX_CHECK_STATUS;

	5822 REGEX_ASSERT(groupLen == 0);

	5823 REGEX_ASSERT(matcher.start(2, status) == -1);

	5824 REGEX_CHECK_STATUS;

	5825 }

	5826

	5827

5417 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */	5828 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

5418

OLD	NEW

« no previous file with comments | « source/test/intltest/regextst.h ('k') | source/test/intltest/regiontst.cpp » ('j') | no next file with comments »