source/test/intltest/regextst.cpp - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/test/intltest/regextst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/test/intltest/regextst.cpp

diff --git a/source/test/intltest/regextst.cpp b/source/test/intltest/regextst.cpp

index 9fd9f43fd8c6b23ea21daebc3cf5a40face1f9f3..ca2fd21481a47d52695c9a36bc20a838b1f0f4ab 100644

--- a/source/test/intltest/regextst.cpp

+++ b/source/test/intltest/regextst.cpp

@@ -1,6 +1,6 @@

/********************************************************************

* COPYRIGHT:

********************************************************************/

@@ -23,12 +23,16 @@

#include "intltest.h"

#if !UCONFIG_NO_REGULAR_EXPRESSIONS

+#include "unicode/localpointer.h"

#include "unicode/regex.h"

#include "unicode/uchar.h"

#include "unicode/ucnv.h"

#include "unicode/uniset.h"

+#include "unicode/uregex.h"

+#include "unicode/usetiter.h"

#include "unicode/ustring.h"

#include "regextst.h"

+#include "regexcmp.h"

#include "uvector.h"

#include "util.h"

#include <stdlib.h>

@@ -131,10 +135,15 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch

case 21: name = "Bug 9283";

if (exec) Bug9283();

break;

- case 22: name = "TestBug11371";

- if (exec) TestBug11371();

+ case 22: name = "Bug10459";

+ if (exec) Bug10459();

+ break;

+ case 23: name = "TestCaseInsensitiveStarters";

+ if (exec) TestCaseInsensitiveStarters();

+ break;

+ case 24: name = "TestBug11049";

+ if (exec) TestBug11049();

break;

default: name = "";

break; //needed to end loop

}

@@ -144,7 +153,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch

/**

* Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage

- * into ASCII.

+ * into ASCII.

* @see utext_openUTF8

static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);

@@ -210,7 +219,6 @@ const char* RegexTest::extractToAssertBuf(const UnicodeString& message) {

return ASSERT_BUF;

}

#define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}

#define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \

@@ -296,11 +304,11 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const

}

/**

- * Assumes utf-8 input

+ * Assumes utf-8 input

#define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)

/**

- * Assumes Invariant input

+ * Assumes Invariant input

#define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)

@@ -308,7 +316,7 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const

* This buffer ( inv_buf ) is used to hold the UTF-8 strings

* passed into utext_openUTF8. An error will be given if

* INV_BUFSIZ is too small. It's only used on EBCDIC systems.

- */

+ */

#define INV_BUFSIZ 2048 /* increase this if too small */

@@ -376,7 +384,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,

line, u_errorName(status));

return FALSE;

}

- if (line==376) { RegexPatternDump(REPattern);}

+ if (line==376) { REPattern->dumpPattern();}

UnicodeString inputString(inputText);

UnicodeString unEscapedInput = inputString.unescape();

@@ -412,7 +420,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,

}

if (retVal == FALSE) {

- RegexPatternDump(REPattern);

+ REPattern->dumpPattern();

}

delete REPattern;

@@ -439,12 +447,12 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look

line, u_errorName(status));

return FALSE;

}

UnicodeString inputString(text, -1, US_INV);

UnicodeString unEscapedInput = inputString.unescape();

LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));

ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);

inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);

if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

// UTF-8 does not allow unpaired surrogates, so this could actually happen

@@ -455,7 +463,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look

textChars = new char[inputUTF8Length+1];

unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);

utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);

REMatcher = &REPattern->matcher(status)->reset(&inputText);

if (U_FAILURE(status)) {

errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n",

@@ -488,7 +496,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look

}

if (retVal == FALSE) {

- RegexPatternDump(REPattern);

+ REPattern->dumpPattern();

}

delete REPattern;

@@ -554,7 +562,7 @@ void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,

}

delete callerPattern;

utext_close(&patternText);

}

@@ -581,7 +589,7 @@ void RegexTest::Basic() {

UErrorCode status = U_ZERO_ERROR;

RegexPattern *pattern;

pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);

- RegexPatternDump(pattern);

+ pattern->dumpPattern();

RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);

UBool result = m->find();

printf("result = %d\n", result);

@@ -729,18 +737,18 @@ void RegexTest::UTextBasic() {

utext_openUTF8(&pattern, str_abc, -1, &status);

RegexMatcher matcher(&pattern, 0, status);

REGEX_CHECK_STATUS;

UText input = UTEXT_INITIALIZER;

utext_openUTF8(&input, str_abc, -1, &status);

REGEX_CHECK_STATUS;

matcher.reset(&input);

REGEX_CHECK_STATUS;

REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());

matcher.reset(matcher.inputText());

REGEX_CHECK_STATUS;

REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());

utext_close(&pattern);

utext_close(&input);

}

@@ -1117,7 +1125,7 @@ void RegexTest::API_Match() {

delete m;

delete p;

}

// Regions

@@ -1130,34 +1138,34 @@ void RegexTest::API_Match() {

REGEX_ASSERT(m.regionEnd() == testString.length());

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

m.region(2,4, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT(m.matches(status));

REGEX_ASSERT(m.start(status)==2);

REGEX_ASSERT(m.end(status)==4);

REGEX_CHECK_STATUS;

m.reset();

REGEX_ASSERT(m.regionStart() == 0);

REGEX_ASSERT(m.regionEnd() == testString.length());

UnicodeString shorterString("short");

m.reset(shorterString);

REGEX_ASSERT(m.regionStart() == 0);

REGEX_ASSERT(m.regionEnd() == shorterString.length());

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));

REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));

REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

@@ -1168,9 +1176,9 @@ void RegexTest::API_Match() {

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

}

// hitEnd() and requireEnd()

@@ -1182,7 +1190,7 @@ void RegexTest::API_Match() {

REGEX_ASSERT(m1.hitEnd() == TRUE);

REGEX_ASSERT(m1.requireEnd() == FALSE);

REGEX_CHECK_STATUS;

status = U_ZERO_ERROR;

RegexMatcher m2("a*", testString, 0, status);

REGEX_ASSERT(m2.lookingAt(status) == TRUE);

@@ -1220,7 +1228,7 @@ void RegexTest::API_Match() {

#endif

- // Time Outs.

+ // Time Outs.

// Note: These tests will need to be changed when the regexp engine is

// able to detect and cut short the exponential time behavior on

// this type of match.

@@ -1248,22 +1256,22 @@ void RegexTest::API_Match() {

REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

REGEX_CHECK_STATUS;

}

// Stack Limits

{

UErrorCode status = U_ZERO_ERROR;

UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'

// Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations

// of the '+', and makes the stack frames larger.

RegexMatcher matcher("(A)+A$", testString, 0, status);

// With the default stack, this match should fail to run

REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);

// With unlimited stack, it should run

status = U_ZERO_ERROR;

matcher.setStackLimit(0, status);

@@ -1279,7 +1287,7 @@ void RegexTest::API_Match() {

REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);

REGEX_ASSERT(matcher.getStackLimit() == 10000);

}

// A pattern that doesn't save state should work with

// a minimal sized stack

{

@@ -1292,7 +1300,7 @@ void RegexTest::API_Match() {

REGEX_ASSERT(matcher.matches(status) == TRUE);

REGEX_CHECK_STATUS;

REGEX_ASSERT(matcher.getStackLimit() == 30);

// Negative stack sizes should fail

status = U_ZERO_ERROR;

matcher.setStackLimit(1000, status);

@@ -1301,7 +1309,7 @@ void RegexTest::API_Match() {

REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

REGEX_ASSERT(matcher.getStackLimit() == 1000);

}

@@ -1850,7 +1858,7 @@ void RegexTest::API_Match_UTF8() {

regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);

REGEX_VERBOSE_TEXT(&input2);

utext_openUChars(&empty, NULL, 0, &status);

int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */

int32_t input2Len = strlen("not abc");

@@ -1960,7 +1968,7 @@ void RegexTest::API_Match_UTF8() {

delete m1;

delete pat2;

utext_close(&re);

utext_close(&input1);

utext_close(&input2);

@@ -1981,10 +1989,10 @@ void RegexTest::API_Match_UTF8() {

UText re=UTEXT_INITIALIZER;

const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */

utext_openUTF8(&re, str_01234567_pat, -1, &status);

RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

REGEX_CHECK_STATUS;

UText input = UTEXT_INITIALIZER;

const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */

utext_openUTF8(&input, str_0123456789, -1, &status);

@@ -2019,13 +2027,13 @@ void RegexTest::API_Match_UTF8() {

REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);

matcher->lookingAt(status);

UnicodeString dest;

UText destText = UTEXT_INITIALIZER;

utext_openUnicodeString(&destText, &dest, &status);

UText *result;

//const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */

- // Test shallow-clone API

+ // Test shallow-clone API

int64_t group_len;

result = matcher->group((UText *)NULL, group_len, status);

REGEX_CHECK_STATUS;

@@ -2038,7 +2046,7 @@ void RegexTest::API_Match_UTF8() {

// destText is now immutable, reopen it

utext_close(&destText);

utext_openUnicodeString(&destText, &dest, &status);

result = matcher->group(0, NULL, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

@@ -2047,7 +2055,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &destText);

REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

result = matcher->group(1, NULL, status);

REGEX_CHECK_STATUS;

const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */

@@ -2057,7 +2065,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &destText);

REGEX_ASSERT_UTEXT_UTF8(str_234567, result);

result = matcher->group(2, NULL, status);

REGEX_CHECK_STATUS;

const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */

@@ -2067,7 +2075,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &destText);

REGEX_ASSERT_UTEXT_UTF8(str_45, result);

result = matcher->group(3, NULL, status);

REGEX_CHECK_STATUS;

const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */

@@ -2085,7 +2093,7 @@ void RegexTest::API_Match_UTF8() {

delete matcher;

delete pat;

utext_close(&destText);

utext_close(&input);

utext_close(&re);

@@ -2146,7 +2154,7 @@ void RegexTest::API_Match_UTF8() {

delete matcher;

delete pat;

utext_close(&input);

utext_close(&re);

}

@@ -2164,7 +2172,7 @@ void RegexTest::API_Match_UTF8() {

utext_openUTF8(&re, str_Gabcabc, -1, &status);

RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

REGEX_CHECK_STATUS;

UText input = UTEXT_INITIALIZER;

const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */

@@ -2186,7 +2194,7 @@ void RegexTest::API_Match_UTF8() {

delete matcher;

delete pat;

utext_close(&input);

utext_close(&re);

}

@@ -2226,7 +2234,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_ASSERT(m.end(status) == i);

}

REGEX_ASSERT(i==20);

utext_close(&s);

}

{

@@ -2248,7 +2256,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));

}

REGEX_ASSERT(i==5);

utext_close(&s);

}

@@ -2276,7 +2284,7 @@ void RegexTest::API_Match_UTF8() {

delete m;

delete p;

}

// Regions

@@ -2288,42 +2296,42 @@ void RegexTest::API_Match_UTF8() {

REGEX_VERBOSE_TEXT(&testPattern);

regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);

REGEX_VERBOSE_TEXT(&testText);

RegexMatcher m(&testPattern, &testText, 0, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT(m.regionStart() == 0);

REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

m.region(2,4, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT(m.matches(status));

REGEX_ASSERT(m.start(status)==2);

REGEX_ASSERT(m.end(status)==4);

REGEX_CHECK_STATUS;

m.reset();

REGEX_ASSERT(m.regionStart() == 0);

REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));

regextst_openUTF8FromInvariant(&testText, "short", -1, &status);

REGEX_VERBOSE_TEXT(&testText);

m.reset(&testText);

REGEX_ASSERT(m.regionStart() == 0);

REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));

REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));

REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

@@ -2334,11 +2342,11 @@ void RegexTest::API_Match_UTF8() {

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

REGEX_ASSERT(&m == &m.reset());

REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

utext_close(&testText);

utext_close(&testPattern);

}

// hitEnd() and requireEnd()

@@ -2350,13 +2358,13 @@ void RegexTest::API_Match_UTF8() {

const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */

utext_openUTF8(&testPattern, str_, -1, &status);

utext_openUTF8(&testText, str_aabb, -1, &status);

RegexMatcher m1(&testPattern, &testText, 0, status);

REGEX_ASSERT(m1.lookingAt(status) == TRUE);

REGEX_ASSERT(m1.hitEnd() == TRUE);

REGEX_ASSERT(m1.requireEnd() == FALSE);

REGEX_CHECK_STATUS;

status = U_ZERO_ERROR;

const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */

utext_openUTF8(&testPattern, str_a, -1, &status);

@@ -2374,7 +2382,7 @@ void RegexTest::API_Match_UTF8() {

REGEX_ASSERT(m3.hitEnd() == TRUE);

REGEX_ASSERT(m3.requireEnd() == TRUE);

REGEX_CHECK_STATUS;

utext_close(&testText);

utext_close(&testPattern);

}

@@ -2400,7 +2408,7 @@ void RegexTest::API_Replace_UTF8() {

REGEX_VERBOSE_TEXT(&re);

RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

REGEX_CHECK_STATUS;

char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */

// 012345678901234567

UText dataText = UTEXT_INITIALIZER;

@@ -2416,9 +2424,9 @@ void RegexTest::API_Replace_UTF8() {

UText destText = UTEXT_INITIALIZER;

utext_openUnicodeString(&destText, &dest, &status);

UText *result;

UText replText = UTEXT_INITIALIZER;

const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */

utext_openUTF8(&replText, str_yz, -1, &status);

REGEX_VERBOSE_TEXT(&replText);

@@ -2450,7 +2458,7 @@ void RegexTest::API_Replace_UTF8() {

const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */

utext_openUTF8(&dataText, str_abxabxabx, -1, &status);

matcher->reset(&dataText);

result = matcher->replaceFirst(&replText, NULL, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);

@@ -2475,7 +2483,7 @@ void RegexTest::API_Replace_UTF8() {

utext_openUTF8(&dataText, NULL, 0, &status);

matcher->reset(&dataText);

result = matcher->replaceFirst(&replText, NULL, status);

REGEX_CHECK_STATUS;

REGEX_ASSERT_UTEXT_UTF8("", result);

@@ -2499,7 +2507,7 @@ void RegexTest::API_Replace_UTF8() {

utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."

matcher->reset(&dataText);

utext_openUTF8(&replText, NULL, 0, &status);

result = matcher->replaceFirst(&replText, NULL, status);

REGEX_CHECK_STATUS;

@@ -2563,7 +2571,7 @@ void RegexTest::API_Replace_UTF8() {

utext_openUTF8(&dataText, str_abcdefg, -1, &status);

RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);

REGEX_CHECK_STATUS;

const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */

utext_openUTF8(&replText, str_11, -1, &status);

result = matcher2->replaceFirst(&replText, NULL, status);

@@ -2576,8 +2584,8 @@ void RegexTest::API_Replace_UTF8() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &destText);

REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);

- const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */

+ const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */

utext_openUTF8(&replText, str_v, -1, &status);

REGEX_VERBOSE_TEXT(&replText);

result = matcher2->replaceFirst(&replText, NULL, status);

@@ -2590,7 +2598,7 @@ void RegexTest::API_Replace_UTF8() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &destText);

REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);

const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */

utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);

result = matcher2->replaceFirst(&replText, NULL, status);

@@ -2612,7 +2620,7 @@ void RegexTest::API_Replace_UTF8() {

supplDigitChars[24] = 0x9F;

supplDigitChars[25] = 0x8F;

utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);

result = matcher2->replaceFirst(&replText, NULL, status);

REGEX_CHECK_STATUS;

const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */

@@ -2642,7 +2650,7 @@ void RegexTest::API_Replace_UTF8() {

utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);

utext_openUTF8(&replText, str_u0043, -1, &status);

matcher->reset(&dataText);

result = matcher->replaceAll(&replText, NULL, status);

REGEX_CHECK_STATUS;

const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */

@@ -2662,7 +2670,7 @@ void RegexTest::API_Replace_UTF8() {

matcher->reset(&dataText);

unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"

- // 0123456789

+ // 0123456789

expected[2] = 0xF0;

expected[3] = 0x90;

expected[4] = 0x80;

@@ -2690,10 +2698,10 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */

utext_openUTF8(&re, str_ssee, -1, &status);

utext_openUTF8(&dataText, str_blah, -1, &status);

utext_openUTF8(&replText, str_ooh, -1, &status);

RegexMatcher m(&re, 0, status);

REGEX_CHECK_STATUS;

UnicodeString result;

UText resultText = UTEXT_INITIALIZER;

utext_openUnicodeString(&resultText, &result, &status);

@@ -2734,7 +2742,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */

m.appendTail(&resultText, status);

const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */

REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);

utext_close(&resultText);

}

@@ -2742,7 +2750,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */

delete pat2;

delete matcher;

delete pat;

utext_close(&dataText);

utext_close(&replText);

utext_close(&destText);

@@ -2767,7 +2775,7 @@ void RegexTest::API_Pattern_UTF8() {

UText re2 = UTEXT_INITIALIZER;

UErrorCode status = U_ZERO_ERROR;

UParseError pe;

const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */

const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */

utext_openUTF8(&re1, str_abcalmz, -1, &status);

@@ -2816,7 +2824,7 @@ void RegexTest::API_Pattern_UTF8() {

delete pat1a;

delete pat1;

delete pat2;

utext_close(&re1);

utext_close(&re2);

@@ -2830,13 +2838,13 @@ void RegexTest::API_Pattern_UTF8() {

UText pattern = UTEXT_INITIALIZER;

const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */

utext_openUTF8(&pattern, str_pL, -1, &status);

RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status);

RegexPattern *pClone = pSource->clone();

delete pSource;

RegexMatcher *mFromClone = pClone->matcher(status);

REGEX_CHECK_STATUS;

UText input = UTEXT_INITIALIZER;

const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */

utext_openUTF8(&input, str_HelloWorld, -1, &status);

@@ -2848,7 +2856,7 @@ void RegexTest::API_Pattern_UTF8() {

REGEX_ASSERT(mFromClone->find() == FALSE);

delete mFromClone;

delete pClone;

utext_close(&input);

utext_close(&pattern);

}

@@ -2860,7 +2868,7 @@ void RegexTest::API_Pattern_UTF8() {

UErrorCode status = U_ZERO_ERROR;

UText pattern = UTEXT_INITIALIZER;

UText input = UTEXT_INITIALIZER;

const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */

utext_openUTF8(&input, str_randominput, -1, &status);

@@ -2868,17 +2876,17 @@ void RegexTest::API_Pattern_UTF8() {

utext_openUTF8(&pattern, str_dotstar, -1, &status);

REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);

REGEX_CHECK_STATUS;

const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */

utext_openUTF8(&pattern, str_abc, -1, &status);

REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);

REGEX_CHECK_STATUS;

const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */

utext_openUTF8(&pattern, str_nput, -1, &status);

REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);

REGEX_CHECK_STATUS;

utext_openUTF8(&pattern, str_randominput, -1, &status);

REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);

REGEX_CHECK_STATUS;

@@ -2887,13 +2895,13 @@ void RegexTest::API_Pattern_UTF8() {

utext_openUTF8(&pattern, str_u, -1, &status);

REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);

REGEX_CHECK_STATUS;

utext_openUTF8(&input, str_abc, -1, &status);

utext_openUTF8(&pattern, str_abc, -1, &status);

status = U_INDEX_OUTOFBOUNDS_ERROR;

REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);

REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

utext_close(&input);

utext_close(&pattern);

}

@@ -3284,7 +3292,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

int32_t line) {

UnicodeString unEscapedInput;

UnicodeString deTaggedInput;

int32_t patternUTF8Length, inputUTF8Length;

char *patternChars = NULL, *inputChars = NULL;

UText patternText = UTEXT_INITIALIZER;

@@ -3311,7 +3319,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

int32_t regionEnd = -1;

int32_t regionStartUTF8 = -1;

int32_t regionEndUTF8 = -1;

// Compile the caller's pattern

@@ -3329,7 +3337,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag

bflags |= UREGEX_MULTILINE;

}

if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag

bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;

}

@@ -3365,16 +3373,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,

UTF8Converter = ucnv_open("UTF8", &status);

ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);

patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);

status = U_ZERO_ERROR; // buffer overflow

patternChars = new char[patternUTF8Length+1];

pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);

utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);

if (status == U_ZERO_ERROR) {

UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);

if (status != U_ZERO_ERROR) {

#if UCONFIG_NO_BREAK_ITERATION==1

// 'v' test flag means that the test pattern should not compile if ICU was configured

@@ -3396,7 +3404,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

}

if (UTF8Pattern == NULL) {

// UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine

logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);

@@ -3404,7 +3412,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

}

if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag

- RegexPatternDump(callerPattern);

+ callerPattern->dumpPattern();

}

if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag

@@ -3426,7 +3434,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

numFinds = i;

}

// 'M' flag. Use matches() instead of find()

if (flags.indexOf((UChar)0x4d) >= 0) {

useMatchesFunc = TRUE;

@@ -3481,7 +3489,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag

matcher->setTrace(TRUE);

}

if (UTF8Pattern != NULL) {

inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);

status = U_ZERO_ERROR; // buffer overflow

@@ -3493,7 +3501,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);

REGEX_CHECK_STATUS_L(line);

}

if (UTF8Matcher == NULL) {

// UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine

logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);

@@ -3507,7 +3515,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

if (UTF8Matcher != NULL) {

if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);

if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);

// Fill out the native index UVector info.

// Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()

for (i=0; i<groupStarts.size(); i++) {

@@ -3522,7 +3530,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

}

setInt(groupStartsUTF8, startUTF8, i);

}

int32_t end = groupEnds.elementAti(i);

// -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting

if (end >= 0) {

@@ -3557,8 +3565,8 @@ void RegexTest::regex_find(const UnicodeString &pattern,

UTF8Matcher->useTransparentBounds(TRUE);

}

// Do a find on the de-tagged input using the caller's pattern

@@ -3633,7 +3641,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

failed = TRUE;

goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.

}

int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));

int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));

if (matcher->end(i, status) != expectedEnd) {

@@ -3670,7 +3678,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line);

failed = TRUE;

}

if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true

matcher->requireEnd() == FALSE) {

errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line);

@@ -3680,7 +3688,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line);

failed = TRUE;

}

if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false

matcher->hitEnd() == TRUE) {

errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line);

@@ -3690,7 +3698,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,

errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line);

failed = TRUE;

}

if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true

matcher->hitEnd() == FALSE) {

errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line);

@@ -3714,7 +3722,7 @@ cleanupAndReturn:

delete UTF8Pattern;

delete matcher;

delete callerPattern;

utext_close(&inputText);

delete[] inputChars;

utext_close(&patternText);

@@ -3790,7 +3798,7 @@ void RegexTest::Errors() {

//-------------------------------------------------------------------------------

-//

+//

// Read a text data file, convert it to UChars, and return the data

// in one big UChar * buffer, which the caller must delete.

@@ -4133,7 +4141,7 @@ void RegexTest::PerlTests() {

lineNum, expected?"":"no ", found?"":"no " );

continue;

}

// Don't try to check expected results if there is no match.

// (Some have stuff in the expected fields)

if (!found) {

@@ -4431,7 +4439,7 @@ void RegexTest::PerlTestsUTF8() {

if (flagStr.indexOf(UChar_x) != -1) {

flags |= UREGEX_COMMENTS;

}

// Put the pattern in a UTF-8 UText

@@ -4528,7 +4536,7 @@ void RegexTest::PerlTestsUTF8() {

lineNum, expected?"":"no ", found?"":"no " );

continue;

}

// Don't try to check expected results if there is no match.

// (Some have stuff in the expected fields)

if (!found) {

@@ -4671,10 +4679,10 @@ void RegexTest::PerlTestsUTF8() {

delete fieldPat;

delete [] testData;

utext_close(&patternText);

utext_close(&inputText);

delete [] patternChars;

delete [] inputChars;

@@ -4738,12 +4746,12 @@ U_CDECL_END

void RegexTest::Callbacks() {

{

// Getter returns NULLs if no callback has been set

// The variables that the getter will fill in.

// Init to non-null values so that the action of the getter can be seen.

const void *returnedContext = &returnedContext;

URegexMatchCallback *returnedFn = &testCallBackFn;

UErrorCode status = U_ZERO_ERROR;

RegexMatcher matcher("x", 0, status);

REGEX_CHECK_STATUS;

@@ -4752,7 +4760,7 @@ void RegexTest::Callbacks() {

REGEX_ASSERT(returnedFn == NULL);

REGEX_ASSERT(returnedContext == NULL);

}

{

// Set and Get work

callBackContext cbInfo = {this, 0, 0, 0};

@@ -4767,7 +4775,7 @@ void RegexTest::Callbacks() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(returnedFn == testCallBackFn);

REGEX_ASSERT(returnedContext == &cbInfo);

// A short-running match shouldn't invoke the callback

status = U_ZERO_ERROR;

cbInfo.reset(1);

@@ -4776,7 +4784,7 @@ void RegexTest::Callbacks() {

REGEX_ASSERT(matcher.matches(status));

REGEX_CHECK_STATUS;

REGEX_ASSERT(cbInfo.numCalls == 0);

// A medium-length match that runs long enough to invoke the

// callback, but not so long that the callback aborts it.

status = U_ZERO_ERROR;

@@ -4786,7 +4794,7 @@ void RegexTest::Callbacks() {

REGEX_ASSERT(matcher.matches(status)==FALSE);

REGEX_CHECK_STATUS;

REGEX_ASSERT(cbInfo.numCalls > 0);

// A longer running match that the callback function will abort.

status = U_ZERO_ERROR;

cbInfo.reset(4);

@@ -4796,7 +4804,7 @@ void RegexTest::Callbacks() {

REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

REGEX_ASSERT(cbInfo.numCalls == 4);

}

@@ -4816,6 +4824,9 @@ struct progressCallBackContext {

void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};

};

+// call-back function for find().

+// Return TRUE to continue the find().

+// Return FALSE to stop the find().

U_CDECL_BEGIN

static UBool U_CALLCONV

testProgressCallBackFn(const void *context, int64_t matchIndex) {

@@ -4830,12 +4841,12 @@ U_CDECL_END

void RegexTest::FindProgressCallbacks() {

{

// Getter returns NULLs if no callback has been set

// The variables that the getter will fill in.

// Init to non-null values so that the action of the getter can be seen.

const void *returnedContext = &returnedContext;

URegexFindProgressCallback *returnedFn = &testProgressCallBackFn;

UErrorCode status = U_ZERO_ERROR;

RegexMatcher matcher("x", 0, status);

REGEX_CHECK_STATUS;

@@ -4844,14 +4855,14 @@ void RegexTest::FindProgressCallbacks() {

REGEX_ASSERT(returnedFn == NULL);

REGEX_ASSERT(returnedContext == NULL);

}

{

// Set and Get work

progressCallBackContext cbInfo = {this, 0, 0, 0};

const void *returnedContext;

URegexFindProgressCallback *returnedFn;

UErrorCode status = U_ZERO_ERROR;

- RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.

+ RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);

REGEX_CHECK_STATUS;

matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);

REGEX_CHECK_STATUS;

@@ -4859,11 +4870,11 @@ void RegexTest::FindProgressCallbacks() {

REGEX_CHECK_STATUS;

REGEX_ASSERT(returnedFn == testProgressCallBackFn);

REGEX_ASSERT(returnedContext == &cbInfo);

- // A short-running match should NOT invoke the callback.

+ // A find that matches on the initial position does NOT invoke the callback.

status = U_ZERO_ERROR;

cbInfo.reset(100);

- UnicodeString s = "abxxx";

+ UnicodeString s = "aaxxx";

matcher.reset(s);

#if 0

matcher.setTrace(TRUE);

@@ -4871,8 +4882,9 @@ void RegexTest::FindProgressCallbacks() {

REGEX_ASSERT(matcher.find(0, status));

REGEX_CHECK_STATUS;

REGEX_ASSERT(cbInfo.numCalls == 0);

- // A medium running match that causes matcher.find() to invoke our callback for each index.

+ // A medium running find() that causes matcher.find() to invoke our callback for each index,

+ // but not so many times that we interrupt the operation.

status = U_ZERO_ERROR;

s = "aaaaaaaaaaaaaaaaaaab";

cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string

@@ -4880,31 +4892,30 @@ void RegexTest::FindProgressCallbacks() {

REGEX_ASSERT(matcher.find(0, status)==FALSE);

REGEX_CHECK_STATUS;

REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);

// A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.

status = U_ZERO_ERROR;

UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";

cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string

matcher.reset(s1);

REGEX_ASSERT(matcher.find(0, status)==FALSE);

- REGEX_CHECK_STATUS;

+ REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);

-#if 0

// Now a match that will succeed, but after an interruption

status = U_ZERO_ERROR;

UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";

cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string

matcher.reset(s2);

REGEX_ASSERT(matcher.find(0, status)==FALSE);

- REGEX_CHECK_STATUS;

+ REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

// Now retry the match from where left off

cbInfo.maxCalls = 100; // No callback limit

+ status = U_ZERO_ERROR;

REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));

REGEX_CHECK_STATUS;

-#endif

}

@@ -4923,7 +4934,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

UText patternText = UTEXT_INITIALIZER;

UnicodeString buffer;

UText bufferText = UTEXT_INITIALIZER;

utext_openUnicodeString(&bufferText, &buffer, &status);

@@ -4940,7 +4951,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);

u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);

utext_openUChars(&text2, text2Chars, -1, &status);

regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);

re = uregex_openUText(&patternText, 0, NULL, &status);

@@ -4952,7 +4963,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

utext_setNativeIndex(resultText, 0);

utext_setNativeIndex(&text1, 0);

REGEX_ASSERT(testUTextEqual(resultText, &text1));

resultText = uregex_getUText(re, &bufferText, &status);

REGEX_CHECK_STATUS;

REGEX_ASSERT(resultText == &bufferText);

@@ -4968,7 +4979,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

utext_setNativeIndex(resultText, 0);

utext_setNativeIndex(&text2, 0);

REGEX_ASSERT(testUTextEqual(resultText, &text2));

uregex_close(re);

utext_close(&text1);

utext_close(&text2);

@@ -5014,7 +5025,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

uregex_close(re);

}

* replaceFirst()

@@ -5023,7 +5034,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

UChar text2[80];

UText replText = UTEXT_INITIALIZER;

UText *result;

status = U_ZERO_ERROR;

u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);

u_uastrncpy(text2, "No match here.", sizeof(text2)/2);

@@ -5047,7 +5058,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

REGEX_CHECK_STATUS;

REGEX_ASSERT(result == &bufferText);

REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);

/* Unicode escapes */

uregex_setText(re, text1, -1, &status);

regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);

@@ -5104,7 +5115,7 @@ void RegexTest::PreAllocatedUTextCAPI () {

* splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,

* so we don't need to test it here.

utext_close(&bufferText);

utext_close(&patternText);

}

@@ -5179,7 +5190,7 @@ void RegexTest::Bug8479() {

delete pMatcher;

}

// Bug 7029

void RegexTest::Bug7029() {

@@ -5197,16 +5208,17 @@ void RegexTest::Bug7029() {

// Bug 9283

// This test is checking for the existance of any supplemental characters that case-fold

-// to a bmp character.

+// to a bmp character.

-// At the time of this writing there are none. If any should appear in a subsequent release

-// of Unicode, the code in regular expressions compilation that determines the longest

-// posssible match for a literal string will need to be enhanced.

+// At the time of this writing there are none. If any should appear in a subsequent release

+// of Unicode, the code in regular expressions compilation that determines the longest

+// posssible match for a literal string will need to be enhanced.

// See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()

// for details on what to do in case of a failure of this test.

void RegexTest::Bug9283() {

+#if !UCONFIG_NO_NORMALIZATION

UErrorCode status = U_ZERO_ERROR;

UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);

REGEX_CHECK_STATUS;

@@ -5220,6 +5232,7 @@ void RegexTest::Bug9283() {

UnicodeString cf = UnicodeString(c).foldCase();

REGEX_ASSERT(cf.length() >= 2);

}

+#endif /* #if !UCONFIG_NO_NORMALIZATION */

}

@@ -5232,47 +5245,128 @@ void RegexTest::CheckInvBufSize() {

}

-void RegexTest::TestBug11371() {

+void RegexTest::Bug10459() {

UErrorCode status = U_ZERO_ERROR;

- UnicodeString patternString;

+ UnicodeString patternString("(txt)");

+ UnicodeString txtString("txt");

+ UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status);

+ REGEX_CHECK_STATUS;

+ UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status);

+ REGEX_CHECK_STATUS;

+ URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);

+ REGEX_CHECK_STATUS;

- for (int i=0; i<8000000; i++) {

- patternString.append(UnicodeString("()"));

+ uregex_setUText(icu_re, utext_txt, &status);

+ REGEX_CHECK_STATUS;

+ // The bug was that calling uregex_group() before doing a matching operation

+ // was causing a segfault. Only for Regular Expressions created from UText.

+ // It should set an U_REGEX_INVALID_STATE.

+ UChar buf[100];

+ int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status);

+ REGEX_ASSERT(status == U_REGEX_INVALID_STATE);

+ REGEX_ASSERT(len == 0);

+ uregex_close(icu_re);

+ utext_close(utext_pat);

+ utext_close(utext_txt);

+void RegexTest::TestCaseInsensitiveStarters() {

+ // Test that the data used by RegexCompile::findCaseInsensitiveStarters() hasn't

+ // become stale because of new Unicode characters.

+ // If it is stale, rerun the generation tool

+ // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing

+ // and replace the embedded data in i18n/regexcmp.cpp

+ for (UChar32 cp=0; cp<=0x10ffff; cp++) {

+ if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) {

+ continue;

+ }

+ UnicodeSet s(cp, cp);

+ s.closeOver(USET_CASE_INSENSITIVE);

+ UnicodeSetIterator setIter(s);

+ while (setIter.next()) {

+ if (!setIter.isString()) {

+ continue;

+ }

+ const UnicodeString &str = setIter.getString();

+ UChar32 firstChar = str.char32At(0);

+ UnicodeSet starters;

+ RegexCompile::findCaseInsensitiveStarters(firstChar, &starters);

+ if (!starters.contains(cp)) {

+ errln("CaseInsensitiveStarters for \\u%x is missing character \\u%x.", cp, firstChar);

+ return;

+ }

}

+void RegexTest::TestBug11049() {

+ // Original bug report: pattern with match start consisting of one of several individual characters,

+ // and the text being matched ending with a supplementary character. find() would read past the

+ // end of the input text when searching for potential match starting points.

+ // To see the problem, the text must exactly fill an allocated buffer, so that valgrind will

+ // detect the bad read.

+ TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);

+ TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);

+ // Test again with a pattern starting with a single character,

+ // which takes a different code path than starting with an OR expression,

+ // but with similar logic.

+ TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);

+ TestCase11049("C", "string matches at end C", TRUE, __LINE__);

+// Run a single test case from TestBug11049(). Internal function.

+void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber) {

+ UErrorCode status = U_ZERO_ERROR;

+ UnicodeString patternString = UnicodeString(pattern).unescape();

LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));

- if (status != U_REGEX_PATTERN_TOO_BIG) {

- errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",

- __FILE__, __LINE__, u_errorName(status));

- }

- status = U_ZERO_ERROR;

- patternString = "(";

- for (int i=0; i<20000000; i++) {

- patternString.append(UnicodeString("A++"));

- }

- patternString.append(UnicodeString("){0}B++"));

- LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));

- if (status != U_REGEX_PATTERN_TOO_BIG) {

- errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",

- __FILE__, __LINE__, u_errorName(status));

- }

+ UnicodeString dataString = UnicodeString(data).unescape();

+ UChar *exactBuffer = new UChar[dataString.length()];

+ dataString.extract(exactBuffer, dataString.length(), status);

+ UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status);

- // Pattern with too much string data, such that string indexes overflow operand data.

- status = U_ZERO_ERROR;

- patternString = "";

- while (patternString.length() < 0x00ffffff) {

- patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));

- }

- patternString.append(UnicodeString("X? trailing string"));

- LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));

- compiledPat3->dumpPattern();

- if (status != U_REGEX_PATTERN_TOO_BIG) {

- errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",

- __FILE__, __LINE__, u_errorName(status));

+ LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status));

+ REGEX_CHECK_STATUS;

+ matcher->reset(ut);

+ UBool result = matcher->find();

+ if (result != expectMatch) {

+ errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \"%s\"",

+ __FILE__, lineNumber, expectMatch, result, pattern, data);

+ }

+ // Rerun test with UTF-8 input text. Won't see buffer overreads, but could see

+ // off-by-one on find() with match at the last code point.

+ // Size of the original char * data (invariant charset) will be <= than the equivalent UTF-8

+ // because string.unescape() will only shrink it.

+ char * utf8Buffer = new char[uprv_strlen(data)+1];

+ u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), dataString.length(), &status);

+ REGEX_CHECK_STATUS;

+ ut = utext_openUTF8(ut, utf8Buffer, -1, &status);

+ REGEX_CHECK_STATUS;

+ matcher->reset(ut);

+ result = matcher->find();

+ if (result != expectMatch) {

+ errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \"%s\", text = \"%s\"",

+ __FILE__, lineNumber, expectMatch, result, pattern, data);

}

+ delete [] utf8Buffer;

+ utext_close(ut);

+ delete [] exactBuffer;

#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

« no previous file with comments | « source/test/intltest/regextst.h ('k') | source/test/intltest/regiontst.cpp » ('j') | no next file with comments »