| Index: source/test/intltest/regextst.cpp
|
| diff --git a/source/test/intltest/regextst.cpp b/source/test/intltest/regextst.cpp
|
| index 9fd9f43fd8c6b23ea21daebc3cf5a40face1f9f3..ca2fd21481a47d52695c9a36bc20a838b1f0f4ab 100644
|
| --- a/source/test/intltest/regextst.cpp
|
| +++ b/source/test/intltest/regextst.cpp
|
| @@ -1,6 +1,6 @@
|
| /********************************************************************
|
| * COPYRIGHT:
|
| - * Copyright (c) 2002-2013, International Business Machines Corporation and
|
| + * Copyright (c) 2002-2014, International Business Machines Corporation and
|
| * others. All Rights Reserved.
|
| ********************************************************************/
|
|
|
| @@ -23,12 +23,16 @@
|
| #include "intltest.h"
|
| #if !UCONFIG_NO_REGULAR_EXPRESSIONS
|
|
|
| +#include "unicode/localpointer.h"
|
| #include "unicode/regex.h"
|
| #include "unicode/uchar.h"
|
| #include "unicode/ucnv.h"
|
| #include "unicode/uniset.h"
|
| +#include "unicode/uregex.h"
|
| +#include "unicode/usetiter.h"
|
| #include "unicode/ustring.h"
|
| #include "regextst.h"
|
| +#include "regexcmp.h"
|
| #include "uvector.h"
|
| #include "util.h"
|
| #include <stdlib.h>
|
| @@ -131,10 +135,15 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
| case 21: name = "Bug 9283";
|
| if (exec) Bug9283();
|
| break;
|
| - case 22: name = "TestBug11371";
|
| - if (exec) TestBug11371();
|
| + case 22: name = "Bug10459";
|
| + if (exec) Bug10459();
|
| + break;
|
| + case 23: name = "TestCaseInsensitiveStarters";
|
| + if (exec) TestCaseInsensitiveStarters();
|
| + break;
|
| + case 24: name = "TestBug11049";
|
| + if (exec) TestBug11049();
|
| break;
|
| -
|
| default: name = "";
|
| break; //needed to end loop
|
| }
|
| @@ -144,7 +153,7 @@ void RegexTest::runIndexedTest( int32_t index, UBool exec, const char* &name, ch
|
|
|
| /**
|
| * Calls utext_openUTF8 after, potentially, converting invariant text from the compilation codepage
|
| - * into ASCII.
|
| + * into ASCII.
|
| * @see utext_openUTF8
|
| */
|
| static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
|
| @@ -210,7 +219,6 @@ const char* RegexTest::extractToAssertBuf(const UnicodeString& message) {
|
| return ASSERT_BUF;
|
| }
|
|
|
| -
|
| #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
|
|
|
| #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \
|
| @@ -296,11 +304,11 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
|
| }
|
|
|
| /**
|
| - * Assumes utf-8 input
|
| + * Assumes utf-8 input
|
| */
|
| #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actual), __FILE__, __LINE__)
|
| /**
|
| - * Assumes Invariant input
|
| + * Assumes Invariant input
|
| */
|
| #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((expected), (actual), __FILE__, __LINE__)
|
|
|
| @@ -308,7 +316,7 @@ void RegexTest::assertUTextInvariant(const char *expected, UText *actual, const
|
| * This buffer ( inv_buf ) is used to hold the UTF-8 strings
|
| * passed into utext_openUTF8. An error will be given if
|
| * INV_BUFSIZ is too small. It's only used on EBCDIC systems.
|
| - */
|
| + */
|
|
|
| #define INV_BUFSIZ 2048 /* increase this if too small */
|
|
|
| @@ -376,7 +384,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
|
| line, u_errorName(status));
|
| return FALSE;
|
| }
|
| - if (line==376) { RegexPatternDump(REPattern);}
|
| + if (line==376) { REPattern->dumpPattern();}
|
|
|
| UnicodeString inputString(inputText);
|
| UnicodeString unEscapedInput = inputString.unescape();
|
| @@ -412,7 +420,7 @@ UBool RegexTest::doRegexLMTest(const char *pat, const char *text, UBool looking,
|
| }
|
|
|
| if (retVal == FALSE) {
|
| - RegexPatternDump(REPattern);
|
| + REPattern->dumpPattern();
|
| }
|
|
|
| delete REPattern;
|
| @@ -439,12 +447,12 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
|
| line, u_errorName(status));
|
| return FALSE;
|
| }
|
| -
|
| +
|
| UnicodeString inputString(text, -1, US_INV);
|
| UnicodeString unEscapedInput = inputString.unescape();
|
| LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
|
| ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
|
| -
|
| +
|
| inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
|
| if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
|
| // UTF-8 does not allow unpaired surrogates, so this could actually happen
|
| @@ -455,7 +463,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
|
| textChars = new char[inputUTF8Length+1];
|
| unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(), status);
|
| utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
|
| -
|
| +
|
| REMatcher = &REPattern->matcher(status)->reset(&inputText);
|
| if (U_FAILURE(status)) {
|
| errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Status = %s\n",
|
| @@ -488,7 +496,7 @@ UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
|
| }
|
|
|
| if (retVal == FALSE) {
|
| - RegexPatternDump(REPattern);
|
| + REPattern->dumpPattern();
|
| }
|
|
|
| delete REPattern;
|
| @@ -554,7 +562,7 @@ void RegexTest::regex_err(const char *pat, int32_t errLine, int32_t errCol,
|
| }
|
| }
|
| }
|
| -
|
| +
|
| delete callerPattern;
|
| utext_close(&patternText);
|
| }
|
| @@ -581,7 +589,7 @@ void RegexTest::Basic() {
|
| UErrorCode status = U_ZERO_ERROR;
|
| RegexPattern *pattern;
|
| pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unescape(), UREGEX_CASE_INSENSITIVE, pe, status);
|
| - RegexPatternDump(pattern);
|
| + pattern->dumpPattern();
|
| RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz").unescape(), status);
|
| UBool result = m->find();
|
| printf("result = %d\n", result);
|
| @@ -729,18 +737,18 @@ void RegexTest::UTextBasic() {
|
| utext_openUTF8(&pattern, str_abc, -1, &status);
|
| RegexMatcher matcher(&pattern, 0, status);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| UText input = UTEXT_INITIALIZER;
|
| utext_openUTF8(&input, str_abc, -1, &status);
|
| REGEX_CHECK_STATUS;
|
| matcher.reset(&input);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
|
| -
|
| +
|
| matcher.reset(matcher.inputText());
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
|
| -
|
| +
|
| utext_close(&pattern);
|
| utext_close(&input);
|
| }
|
| @@ -1117,7 +1125,7 @@ void RegexTest::API_Match() {
|
| delete m;
|
| delete p;
|
| }
|
| -
|
| +
|
| //
|
| // Regions
|
| //
|
| @@ -1130,34 +1138,34 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(m.regionEnd() == testString.length());
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| -
|
| +
|
| m.region(2,4, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(m.matches(status));
|
| REGEX_ASSERT(m.start(status)==2);
|
| REGEX_ASSERT(m.end(status)==4);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| m.reset();
|
| REGEX_ASSERT(m.regionStart() == 0);
|
| REGEX_ASSERT(m.regionEnd() == testString.length());
|
| -
|
| +
|
| UnicodeString shorterString("short");
|
| m.reset(shorterString);
|
| REGEX_ASSERT(m.regionStart() == 0);
|
| REGEX_ASSERT(m.regionEnd() == shorterString.length());
|
| -
|
| +
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
|
| REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
|
| -
|
| +
|
| REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| -
|
| +
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
|
| REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
|
| @@ -1168,9 +1176,9 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| -
|
| +
|
| }
|
| -
|
| +
|
| //
|
| // hitEnd() and requireEnd()
|
| //
|
| @@ -1182,7 +1190,7 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(m1.hitEnd() == TRUE);
|
| REGEX_ASSERT(m1.requireEnd() == FALSE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| status = U_ZERO_ERROR;
|
| RegexMatcher m2("a*", testString, 0, status);
|
| REGEX_ASSERT(m2.lookingAt(status) == TRUE);
|
| @@ -1220,7 +1228,7 @@ void RegexTest::API_Match() {
|
| #endif
|
|
|
| //
|
| - // Time Outs.
|
| + // Time Outs.
|
| // Note: These tests will need to be changed when the regexp engine is
|
| // able to detect and cut short the exponential time behavior on
|
| // this type of match.
|
| @@ -1248,22 +1256,22 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
| REGEX_CHECK_STATUS;
|
| }
|
| -
|
| +
|
| //
|
| // Stack Limits
|
| //
|
| {
|
| UErrorCode status = U_ZERO_ERROR;
|
| UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'
|
| -
|
| +
|
| // Adding the capturing parentheses to the pattern "(A)+A$" inhibits optimizations
|
| // of the '+', and makes the stack frames larger.
|
| RegexMatcher matcher("(A)+A$", testString, 0, status);
|
| -
|
| +
|
| // With the default stack, this match should fail to run
|
| REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
|
| REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
|
| -
|
| +
|
| // With unlimited stack, it should run
|
| status = U_ZERO_ERROR;
|
| matcher.setStackLimit(0, status);
|
| @@ -1279,7 +1287,7 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
|
| REGEX_ASSERT(matcher.getStackLimit() == 10000);
|
| }
|
| -
|
| +
|
| // A pattern that doesn't save state should work with
|
| // a minimal sized stack
|
| {
|
| @@ -1292,7 +1300,7 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(matcher.matches(status) == TRUE);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(matcher.getStackLimit() == 30);
|
| -
|
| +
|
| // Negative stack sizes should fail
|
| status = U_ZERO_ERROR;
|
| matcher.setStackLimit(1000, status);
|
| @@ -1301,7 +1309,7 @@ void RegexTest::API_Match() {
|
| REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
|
| REGEX_ASSERT(matcher.getStackLimit() == 1000);
|
| }
|
| -
|
| +
|
|
|
| }
|
|
|
| @@ -1850,7 +1858,7 @@ void RegexTest::API_Match_UTF8() {
|
| regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
|
| REGEX_VERBOSE_TEXT(&input2);
|
| utext_openUChars(&empty, NULL, 0, &status);
|
| -
|
| +
|
| int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not nativelen (input1) ? */
|
| int32_t input2Len = strlen("not abc");
|
|
|
| @@ -1960,7 +1968,7 @@ void RegexTest::API_Match_UTF8() {
|
|
|
| delete m1;
|
| delete pat2;
|
| -
|
| +
|
| utext_close(&re);
|
| utext_close(&input1);
|
| utext_close(&input2);
|
| @@ -1981,10 +1989,10 @@ void RegexTest::API_Match_UTF8() {
|
| UText re=UTEXT_INITIALIZER;
|
| const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67)(.*) */
|
| utext_openUTF8(&re, str_01234567_pat, -1, &status);
|
| -
|
| +
|
| RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| UText input = UTEXT_INITIALIZER;
|
| const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
|
| utext_openUTF8(&input, str_0123456789, -1, &status);
|
| @@ -2019,13 +2027,13 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
|
|
|
| matcher->lookingAt(status);
|
| -
|
| +
|
| UnicodeString dest;
|
| UText destText = UTEXT_INITIALIZER;
|
| utext_openUnicodeString(&destText, &dest, &status);
|
| UText *result;
|
| //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
|
| - // Test shallow-clone API
|
| + // Test shallow-clone API
|
| int64_t group_len;
|
| result = matcher->group((UText *)NULL, group_len, status);
|
| REGEX_CHECK_STATUS;
|
| @@ -2038,7 +2046,7 @@ void RegexTest::API_Match_UTF8() {
|
| // destText is now immutable, reopen it
|
| utext_close(&destText);
|
| utext_openUnicodeString(&destText, &dest, &status);
|
| -
|
| +
|
| result = matcher->group(0, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
| @@ -2047,7 +2055,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &destText);
|
| REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
|
| -
|
| +
|
| result = matcher->group(1, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
|
| @@ -2057,7 +2065,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &destText);
|
| REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
|
| -
|
| +
|
| result = matcher->group(2, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
|
| @@ -2067,7 +2075,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &destText);
|
| REGEX_ASSERT_UTEXT_UTF8(str_45, result);
|
| -
|
| +
|
| result = matcher->group(3, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
|
| @@ -2085,7 +2093,7 @@ void RegexTest::API_Match_UTF8() {
|
|
|
| delete matcher;
|
| delete pat;
|
| -
|
| +
|
| utext_close(&destText);
|
| utext_close(&input);
|
| utext_close(&re);
|
| @@ -2146,7 +2154,7 @@ void RegexTest::API_Match_UTF8() {
|
|
|
| delete matcher;
|
| delete pat;
|
| -
|
| +
|
| utext_close(&input);
|
| utext_close(&re);
|
| }
|
| @@ -2164,7 +2172,7 @@ void RegexTest::API_Match_UTF8() {
|
| utext_openUTF8(&re, str_Gabcabc, -1, &status);
|
|
|
| RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
|
| -
|
| +
|
| REGEX_CHECK_STATUS;
|
| UText input = UTEXT_INITIALIZER;
|
| const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
|
| @@ -2186,7 +2194,7 @@ void RegexTest::API_Match_UTF8() {
|
|
|
| delete matcher;
|
| delete pat;
|
| -
|
| +
|
| utext_close(&input);
|
| utext_close(&re);
|
| }
|
| @@ -2226,7 +2234,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_ASSERT(m.end(status) == i);
|
| }
|
| REGEX_ASSERT(i==20);
|
| -
|
| +
|
| utext_close(&s);
|
| }
|
| {
|
| @@ -2248,7 +2256,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
|
| }
|
| REGEX_ASSERT(i==5);
|
| -
|
| +
|
| utext_close(&s);
|
| }
|
|
|
| @@ -2276,7 +2284,7 @@ void RegexTest::API_Match_UTF8() {
|
| delete m;
|
| delete p;
|
| }
|
| -
|
| +
|
| //
|
| // Regions
|
| //
|
| @@ -2288,42 +2296,42 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_VERBOSE_TEXT(&testPattern);
|
| regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &status);
|
| REGEX_VERBOSE_TEXT(&testText);
|
| -
|
| +
|
| RegexMatcher m(&testPattern, &testText, 0, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(m.regionStart() == 0);
|
| REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| -
|
| +
|
| m.region(2,4, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(m.matches(status));
|
| REGEX_ASSERT(m.start(status)==2);
|
| REGEX_ASSERT(m.end(status)==4);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| m.reset();
|
| REGEX_ASSERT(m.regionStart() == 0);
|
| REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
|
| -
|
| +
|
| regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
|
| REGEX_VERBOSE_TEXT(&testText);
|
| m.reset(&testText);
|
| REGEX_ASSERT(m.regionStart() == 0);
|
| REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
|
| -
|
| +
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
|
| REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
|
| -
|
| +
|
| REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
|
| -
|
| +
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
|
| REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
|
| @@ -2334,11 +2342,11 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| REGEX_ASSERT(&m == &m.reset());
|
| REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
|
| -
|
| +
|
| utext_close(&testText);
|
| utext_close(&testPattern);
|
| }
|
| -
|
| +
|
| //
|
| // hitEnd() and requireEnd()
|
| //
|
| @@ -2350,13 +2358,13 @@ void RegexTest::API_Match_UTF8() {
|
| const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
|
| utext_openUTF8(&testPattern, str_, -1, &status);
|
| utext_openUTF8(&testText, str_aabb, -1, &status);
|
| -
|
| +
|
| RegexMatcher m1(&testPattern, &testText, 0, status);
|
| REGEX_ASSERT(m1.lookingAt(status) == TRUE);
|
| REGEX_ASSERT(m1.hitEnd() == TRUE);
|
| REGEX_ASSERT(m1.requireEnd() == FALSE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| status = U_ZERO_ERROR;
|
| const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
|
| utext_openUTF8(&testPattern, str_a, -1, &status);
|
| @@ -2374,7 +2382,7 @@ void RegexTest::API_Match_UTF8() {
|
| REGEX_ASSERT(m3.hitEnd() == TRUE);
|
| REGEX_ASSERT(m3.requireEnd() == TRUE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| utext_close(&testText);
|
| utext_close(&testPattern);
|
| }
|
| @@ -2400,7 +2408,7 @@ void RegexTest::API_Replace_UTF8() {
|
| REGEX_VERBOSE_TEXT(&re);
|
| RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
|
| // 012345678901234567
|
| UText dataText = UTEXT_INITIALIZER;
|
| @@ -2416,9 +2424,9 @@ void RegexTest::API_Replace_UTF8() {
|
| UText destText = UTEXT_INITIALIZER;
|
| utext_openUnicodeString(&destText, &dest, &status);
|
| UText *result;
|
| -
|
| +
|
| UText replText = UTEXT_INITIALIZER;
|
| -
|
| +
|
| const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
|
| utext_openUTF8(&replText, str_yz, -1, &status);
|
| REGEX_VERBOSE_TEXT(&replText);
|
| @@ -2450,7 +2458,7 @@ void RegexTest::API_Replace_UTF8() {
|
| const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...abx.. */
|
| utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
|
| matcher->reset(&dataText);
|
| -
|
| +
|
| result = matcher->replaceFirst(&replText, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
|
| @@ -2475,7 +2483,7 @@ void RegexTest::API_Replace_UTF8() {
|
| //
|
| utext_openUTF8(&dataText, NULL, 0, &status);
|
| matcher->reset(&dataText);
|
| -
|
| +
|
| result = matcher->replaceFirst(&replText, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT_UTEXT_UTF8("", result);
|
| @@ -2499,7 +2507,7 @@ void RegexTest::API_Replace_UTF8() {
|
| //
|
| utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
|
| matcher->reset(&dataText);
|
| -
|
| +
|
| utext_openUTF8(&replText, NULL, 0, &status);
|
| result = matcher->replaceFirst(&replText, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| @@ -2563,7 +2571,7 @@ void RegexTest::API_Replace_UTF8() {
|
| utext_openUTF8(&dataText, str_abcdefg, -1, &status);
|
| RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
|
| utext_openUTF8(&replText, str_11, -1, &status);
|
| result = matcher2->replaceFirst(&replText, NULL, status);
|
| @@ -2576,8 +2584,8 @@ void RegexTest::API_Replace_UTF8() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &destText);
|
| REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
|
| -
|
| - const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
|
| +
|
| + const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
|
| utext_openUTF8(&replText, str_v, -1, &status);
|
| REGEX_VERBOSE_TEXT(&replText);
|
| result = matcher2->replaceFirst(&replText, NULL, status);
|
| @@ -2590,7 +2598,7 @@ void RegexTest::API_Replace_UTF8() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &destText);
|
| REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
|
| -
|
| +
|
| const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0x00 }; /* $ by itself, no group number $$$ */
|
| utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
|
| result = matcher2->replaceFirst(&replText, NULL, status);
|
| @@ -2612,7 +2620,7 @@ void RegexTest::API_Replace_UTF8() {
|
| supplDigitChars[24] = 0x9F;
|
| supplDigitChars[25] = 0x8F;
|
| utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
|
| -
|
| +
|
| result = matcher2->replaceFirst(&replText, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplemental Digit 1 bc.defg */
|
| @@ -2642,7 +2650,7 @@ void RegexTest::API_Replace_UTF8() {
|
| utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
|
| utext_openUTF8(&replText, str_u0043, -1, &status);
|
| matcher->reset(&dataText);
|
| -
|
| +
|
| result = matcher->replaceAll(&replText, NULL, status);
|
| REGEX_CHECK_STATUS;
|
| const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
|
| @@ -2662,7 +2670,7 @@ void RegexTest::API_Replace_UTF8() {
|
| matcher->reset(&dataText);
|
|
|
| unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
|
| - // 0123456789
|
| + // 0123456789
|
| expected[2] = 0xF0;
|
| expected[3] = 0x90;
|
| expected[4] = 0x80;
|
| @@ -2690,10 +2698,10 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
|
| utext_openUTF8(&re, str_ssee, -1, &status);
|
| utext_openUTF8(&dataText, str_blah, -1, &status);
|
| utext_openUTF8(&replText, str_ooh, -1, &status);
|
| -
|
| +
|
| RegexMatcher m(&re, 0, status);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| UnicodeString result;
|
| UText resultText = UTEXT_INITIALIZER;
|
| utext_openUnicodeString(&resultText, &result, &status);
|
| @@ -2734,7 +2742,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
|
| m.appendTail(&resultText, status);
|
| const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
|
| REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
|
| -
|
| +
|
| utext_close(&resultText);
|
| }
|
|
|
| @@ -2742,7 +2750,7 @@ const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
|
| delete pat2;
|
| delete matcher;
|
| delete pat;
|
| -
|
| +
|
| utext_close(&dataText);
|
| utext_close(&replText);
|
| utext_close(&destText);
|
| @@ -2767,7 +2775,7 @@ void RegexTest::API_Pattern_UTF8() {
|
| UText re2 = UTEXT_INITIALIZER;
|
| UErrorCode status = U_ZERO_ERROR;
|
| UParseError pe;
|
| -
|
| +
|
| const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
|
| const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
|
| utext_openUTF8(&re1, str_abcalmz, -1, &status);
|
| @@ -2816,7 +2824,7 @@ void RegexTest::API_Pattern_UTF8() {
|
| delete pat1a;
|
| delete pat1;
|
| delete pat2;
|
| -
|
| +
|
| utext_close(&re1);
|
| utext_close(&re2);
|
|
|
| @@ -2830,13 +2838,13 @@ void RegexTest::API_Pattern_UTF8() {
|
| UText pattern = UTEXT_INITIALIZER;
|
| const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \p{L}+ */
|
| utext_openUTF8(&pattern, str_pL, -1, &status);
|
| -
|
| +
|
| RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status);
|
| RegexPattern *pClone = pSource->clone();
|
| delete pSource;
|
| RegexMatcher *mFromClone = pClone->matcher(status);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| UText input = UTEXT_INITIALIZER;
|
| const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
|
| utext_openUTF8(&input, str_HelloWorld, -1, &status);
|
| @@ -2848,7 +2856,7 @@ void RegexTest::API_Pattern_UTF8() {
|
| REGEX_ASSERT(mFromClone->find() == FALSE);
|
| delete mFromClone;
|
| delete pClone;
|
| -
|
| +
|
| utext_close(&input);
|
| utext_close(&pattern);
|
| }
|
| @@ -2860,7 +2868,7 @@ void RegexTest::API_Pattern_UTF8() {
|
| UErrorCode status = U_ZERO_ERROR;
|
| UText pattern = UTEXT_INITIALIZER;
|
| UText input = UTEXT_INITIALIZER;
|
| -
|
| +
|
| const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x20, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
|
| utext_openUTF8(&input, str_randominput, -1, &status);
|
|
|
| @@ -2868,17 +2876,17 @@ void RegexTest::API_Pattern_UTF8() {
|
| utext_openUTF8(&pattern, str_dotstar, -1, &status);
|
| REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
|
| utext_openUTF8(&pattern, str_abc, -1, &status);
|
| REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
|
| utext_openUTF8(&pattern, str_nput, -1, &status);
|
| REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| utext_openUTF8(&pattern, str_randominput, -1, &status);
|
| REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, status) == TRUE);
|
| REGEX_CHECK_STATUS;
|
| @@ -2887,13 +2895,13 @@ void RegexTest::API_Pattern_UTF8() {
|
| utext_openUTF8(&pattern, str_u, -1, &status);
|
| REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
|
| REGEX_CHECK_STATUS;
|
| -
|
| +
|
| utext_openUTF8(&input, str_abc, -1, &status);
|
| utext_openUTF8(&pattern, str_abc, -1, &status);
|
| status = U_INDEX_OUTOFBOUNDS_ERROR;
|
| REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
|
| REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
|
| -
|
| +
|
| utext_close(&input);
|
| utext_close(&pattern);
|
| }
|
| @@ -3284,7 +3292,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| int32_t line) {
|
| UnicodeString unEscapedInput;
|
| UnicodeString deTaggedInput;
|
| -
|
| +
|
| int32_t patternUTF8Length, inputUTF8Length;
|
| char *patternChars = NULL, *inputChars = NULL;
|
| UText patternText = UTEXT_INITIALIZER;
|
| @@ -3311,7 +3319,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| int32_t regionEnd = -1;
|
| int32_t regionStartUTF8 = -1;
|
| int32_t regionEndUTF8 = -1;
|
| -
|
| +
|
|
|
| //
|
| // Compile the caller's pattern
|
| @@ -3329,7 +3337,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag
|
| bflags |= UREGEX_MULTILINE;
|
| }
|
| -
|
| +
|
| if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
|
| bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
|
| }
|
| @@ -3365,16 +3373,16 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
|
|
| UTF8Converter = ucnv_open("UTF8", &status);
|
| ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
|
| -
|
| +
|
| patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
|
| status = U_ZERO_ERROR; // buffer overflow
|
| patternChars = new char[patternUTF8Length+1];
|
| pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
|
| utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
|
| -
|
| +
|
| if (status == U_ZERO_ERROR) {
|
| UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
|
| -
|
| +
|
| if (status != U_ZERO_ERROR) {
|
| #if UCONFIG_NO_BREAK_ITERATION==1
|
| // 'v' test flag means that the test pattern should not compile if ICU was configured
|
| @@ -3396,7 +3404,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| }
|
| }
|
| }
|
| -
|
| +
|
| if (UTF8Pattern == NULL) {
|
| // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
|
| logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
|
| @@ -3404,7 +3412,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| }
|
|
|
| if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag
|
| - RegexPatternDump(callerPattern);
|
| + callerPattern->dumpPattern();
|
| }
|
|
|
| if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag
|
| @@ -3426,7 +3434,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| numFinds = i;
|
| }
|
| }
|
| -
|
| +
|
| // 'M' flag. Use matches() instead of find()
|
| if (flags.indexOf((UChar)0x4d) >= 0) {
|
| useMatchesFunc = TRUE;
|
| @@ -3481,7 +3489,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag
|
| matcher->setTrace(TRUE);
|
| }
|
| -
|
| +
|
| if (UTF8Pattern != NULL) {
|
| inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
|
| status = U_ZERO_ERROR; // buffer overflow
|
| @@ -3493,7 +3501,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
|
| REGEX_CHECK_STATUS_L(line);
|
| }
|
| -
|
| +
|
| if (UTF8Matcher == NULL) {
|
| // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
|
| logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d", srcPath, line);
|
| @@ -3507,7 +3515,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| if (UTF8Matcher != NULL) {
|
| if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStart, regionStartUTF8);
|
| if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
|
| -
|
| +
|
| // Fill out the native index UVector info.
|
| // Only need 1 loop, from above we know groupStarts.size() = groupEnds.size()
|
| for (i=0; i<groupStarts.size(); i++) {
|
| @@ -3522,7 +3530,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| }
|
| setInt(groupStartsUTF8, startUTF8, i);
|
| }
|
| -
|
| +
|
| int32_t end = groupEnds.elementAti(i);
|
| // -1 means there was no UVector slot and we won't be requesting that capture group for this test, don't bother inserting
|
| if (end >= 0) {
|
| @@ -3557,8 +3565,8 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| UTF8Matcher->useTransparentBounds(TRUE);
|
| }
|
| }
|
| -
|
| -
|
| +
|
| +
|
|
|
| //
|
| // Do a find on the de-tagged input using the caller's pattern
|
| @@ -3633,7 +3641,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| failed = TRUE;
|
| goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
|
| }
|
| -
|
| +
|
| int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti(i));
|
| int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF8.elementAti(i));
|
| if (matcher->end(i, status) != expectedEnd) {
|
| @@ -3670,7 +3678,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UTF8)", line);
|
| failed = TRUE;
|
| }
|
| -
|
| +
|
| if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == true
|
| matcher->requireEnd() == FALSE) {
|
| errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", line);
|
| @@ -3680,7 +3688,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UTF8)", line);
|
| failed = TRUE;
|
| }
|
| -
|
| +
|
| if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false
|
| matcher->hitEnd() == TRUE) {
|
| errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line);
|
| @@ -3690,7 +3698,7 @@ void RegexTest::regex_find(const UnicodeString &pattern,
|
| errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)", line);
|
| failed = TRUE;
|
| }
|
| -
|
| +
|
| if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true
|
| matcher->hitEnd() == FALSE) {
|
| errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line);
|
| @@ -3714,7 +3722,7 @@ cleanupAndReturn:
|
| delete UTF8Pattern;
|
| delete matcher;
|
| delete callerPattern;
|
| -
|
| +
|
| utext_close(&inputText);
|
| delete[] inputChars;
|
| utext_close(&patternText);
|
| @@ -3790,7 +3798,7 @@ void RegexTest::Errors() {
|
|
|
|
|
| //-------------------------------------------------------------------------------
|
| -//
|
| +//
|
| // Read a text data file, convert it to UChars, and return the data
|
| // in one big UChar * buffer, which the caller must delete.
|
| //
|
| @@ -4133,7 +4141,7 @@ void RegexTest::PerlTests() {
|
| lineNum, expected?"":"no ", found?"":"no " );
|
| continue;
|
| }
|
| -
|
| +
|
| // Don't try to check expected results if there is no match.
|
| // (Some have stuff in the expected fields)
|
| if (!found) {
|
| @@ -4431,7 +4439,7 @@ void RegexTest::PerlTestsUTF8() {
|
| if (flagStr.indexOf(UChar_x) != -1) {
|
| flags |= UREGEX_COMMENTS;
|
| }
|
| -
|
| +
|
| //
|
| // Put the pattern in a UTF-8 UText
|
| //
|
| @@ -4528,7 +4536,7 @@ void RegexTest::PerlTestsUTF8() {
|
| lineNum, expected?"":"no ", found?"":"no " );
|
| continue;
|
| }
|
| -
|
| +
|
| // Don't try to check expected results if there is no match.
|
| // (Some have stuff in the expected fields)
|
| if (!found) {
|
| @@ -4671,10 +4679,10 @@ void RegexTest::PerlTestsUTF8() {
|
|
|
| delete fieldPat;
|
| delete [] testData;
|
| -
|
| +
|
| utext_close(&patternText);
|
| utext_close(&inputText);
|
| -
|
| +
|
| delete [] patternChars;
|
| delete [] inputChars;
|
|
|
| @@ -4738,12 +4746,12 @@ U_CDECL_END
|
| void RegexTest::Callbacks() {
|
| {
|
| // Getter returns NULLs if no callback has been set
|
| -
|
| +
|
| // The variables that the getter will fill in.
|
| // Init to non-null values so that the action of the getter can be seen.
|
| const void *returnedContext = &returnedContext;
|
| URegexMatchCallback *returnedFn = &testCallBackFn;
|
| -
|
| +
|
| UErrorCode status = U_ZERO_ERROR;
|
| RegexMatcher matcher("x", 0, status);
|
| REGEX_CHECK_STATUS;
|
| @@ -4752,7 +4760,7 @@ void RegexTest::Callbacks() {
|
| REGEX_ASSERT(returnedFn == NULL);
|
| REGEX_ASSERT(returnedContext == NULL);
|
| }
|
| -
|
| +
|
| {
|
| // Set and Get work
|
| callBackContext cbInfo = {this, 0, 0, 0};
|
| @@ -4767,7 +4775,7 @@ void RegexTest::Callbacks() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(returnedFn == testCallBackFn);
|
| REGEX_ASSERT(returnedContext == &cbInfo);
|
| -
|
| +
|
| // A short-running match shouldn't invoke the callback
|
| status = U_ZERO_ERROR;
|
| cbInfo.reset(1);
|
| @@ -4776,7 +4784,7 @@ void RegexTest::Callbacks() {
|
| REGEX_ASSERT(matcher.matches(status));
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(cbInfo.numCalls == 0);
|
| -
|
| +
|
| // A medium-length match that runs long enough to invoke the
|
| // callback, but not so long that the callback aborts it.
|
| status = U_ZERO_ERROR;
|
| @@ -4786,7 +4794,7 @@ void RegexTest::Callbacks() {
|
| REGEX_ASSERT(matcher.matches(status)==FALSE);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(cbInfo.numCalls > 0);
|
| -
|
| +
|
| // A longer running match that the callback function will abort.
|
| status = U_ZERO_ERROR;
|
| cbInfo.reset(4);
|
| @@ -4796,7 +4804,7 @@ void RegexTest::Callbacks() {
|
| REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
|
| REGEX_ASSERT(cbInfo.numCalls == 4);
|
| }
|
| -
|
| +
|
|
|
| }
|
|
|
| @@ -4816,6 +4824,9 @@ struct progressCallBackContext {
|
| void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
|
| };
|
|
|
| +// call-back function for find().
|
| +// Return TRUE to continue the find().
|
| +// Return FALSE to stop the find().
|
| U_CDECL_BEGIN
|
| static UBool U_CALLCONV
|
| testProgressCallBackFn(const void *context, int64_t matchIndex) {
|
| @@ -4830,12 +4841,12 @@ U_CDECL_END
|
| void RegexTest::FindProgressCallbacks() {
|
| {
|
| // Getter returns NULLs if no callback has been set
|
| -
|
| +
|
| // The variables that the getter will fill in.
|
| // Init to non-null values so that the action of the getter can be seen.
|
| const void *returnedContext = &returnedContext;
|
| URegexFindProgressCallback *returnedFn = &testProgressCallBackFn;
|
| -
|
| +
|
| UErrorCode status = U_ZERO_ERROR;
|
| RegexMatcher matcher("x", 0, status);
|
| REGEX_CHECK_STATUS;
|
| @@ -4844,14 +4855,14 @@ void RegexTest::FindProgressCallbacks() {
|
| REGEX_ASSERT(returnedFn == NULL);
|
| REGEX_ASSERT(returnedContext == NULL);
|
| }
|
| -
|
| +
|
| {
|
| // Set and Get work
|
| progressCallBackContext cbInfo = {this, 0, 0, 0};
|
| const void *returnedContext;
|
| URegexFindProgressCallback *returnedFn;
|
| UErrorCode status = U_ZERO_ERROR;
|
| - RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.
|
| + RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);
|
| REGEX_CHECK_STATUS;
|
| matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status);
|
| REGEX_CHECK_STATUS;
|
| @@ -4859,11 +4870,11 @@ void RegexTest::FindProgressCallbacks() {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(returnedFn == testProgressCallBackFn);
|
| REGEX_ASSERT(returnedContext == &cbInfo);
|
| -
|
| - // A short-running match should NOT invoke the callback.
|
| +
|
| + // A find that matches on the initial position does NOT invoke the callback.
|
| status = U_ZERO_ERROR;
|
| cbInfo.reset(100);
|
| - UnicodeString s = "abxxx";
|
| + UnicodeString s = "aaxxx";
|
| matcher.reset(s);
|
| #if 0
|
| matcher.setTrace(TRUE);
|
| @@ -4871,8 +4882,9 @@ void RegexTest::FindProgressCallbacks() {
|
| REGEX_ASSERT(matcher.find(0, status));
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(cbInfo.numCalls == 0);
|
| -
|
| - // A medium running match that causes matcher.find() to invoke our callback for each index.
|
| +
|
| + // A medium running find() that causes matcher.find() to invoke our callback for each index,
|
| + // but not so many times that we interrupt the operation.
|
| status = U_ZERO_ERROR;
|
| s = "aaaaaaaaaaaaaaaaaaab";
|
| cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string
|
| @@ -4880,31 +4892,30 @@ void RegexTest::FindProgressCallbacks() {
|
| REGEX_ASSERT(matcher.find(0, status)==FALSE);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
|
| -
|
| +
|
| // A longer running match that causes matcher.find() to invoke our callback which we cancel/interrupt at some point.
|
| status = U_ZERO_ERROR;
|
| UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
|
| cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string
|
| matcher.reset(s1);
|
| REGEX_ASSERT(matcher.find(0, status)==FALSE);
|
| - REGEX_CHECK_STATUS;
|
| + REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
|
| REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
|
|
|
| -#if 0
|
| // Now a match that will succeed, but after an interruption
|
| status = U_ZERO_ERROR;
|
| UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
|
| cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string
|
| matcher.reset(s2);
|
| REGEX_ASSERT(matcher.find(0, status)==FALSE);
|
| - REGEX_CHECK_STATUS;
|
| + REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
|
| // Now retry the match from where left off
|
| cbInfo.maxCalls = 100; // No callback limit
|
| + status = U_ZERO_ERROR;
|
| REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
|
| REGEX_CHECK_STATUS;
|
| -#endif
|
| }
|
| -
|
| +
|
|
|
| }
|
|
|
| @@ -4923,7 +4934,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| UText patternText = UTEXT_INITIALIZER;
|
| UnicodeString buffer;
|
| UText bufferText = UTEXT_INITIALIZER;
|
| -
|
| +
|
| utext_openUnicodeString(&bufferText, &buffer, &status);
|
|
|
| /*
|
| @@ -4940,7 +4951,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
|
| u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
|
| utext_openUChars(&text2, text2Chars, -1, &status);
|
| -
|
| +
|
| regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
|
| re = uregex_openUText(&patternText, 0, NULL, &status);
|
|
|
| @@ -4952,7 +4963,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| utext_setNativeIndex(resultText, 0);
|
| utext_setNativeIndex(&text1, 0);
|
| REGEX_ASSERT(testUTextEqual(resultText, &text1));
|
| -
|
| +
|
| resultText = uregex_getUText(re, &bufferText, &status);
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(resultText == &bufferText);
|
| @@ -4968,7 +4979,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| utext_setNativeIndex(resultText, 0);
|
| utext_setNativeIndex(&text2, 0);
|
| REGEX_ASSERT(testUTextEqual(resultText, &text2));
|
| -
|
| +
|
| uregex_close(re);
|
| utext_close(&text1);
|
| utext_close(&text2);
|
| @@ -5014,7 +5025,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| uregex_close(re);
|
|
|
| }
|
| -
|
| +
|
| /*
|
| * replaceFirst()
|
| */
|
| @@ -5023,7 +5034,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| UChar text2[80];
|
| UText replText = UTEXT_INITIALIZER;
|
| UText *result;
|
| -
|
| +
|
| status = U_ZERO_ERROR;
|
| u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
|
| u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
|
| @@ -5047,7 +5058,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| REGEX_CHECK_STATUS;
|
| REGEX_ASSERT(result == &bufferText);
|
| REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
|
| -
|
| +
|
| /* Unicode escapes */
|
| uregex_setText(re, text1, -1, &status);
|
| regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a", -1, &status);
|
| @@ -5104,7 +5115,7 @@ void RegexTest::PreAllocatedUTextCAPI () {
|
| * splitUText() uses the C++ API directly, and the UnicodeString version uses mutable UTexts,
|
| * so we don't need to test it here.
|
| */
|
| -
|
| +
|
| utext_close(&bufferText);
|
| utext_close(&patternText);
|
| }
|
| @@ -5179,7 +5190,7 @@ void RegexTest::Bug8479() {
|
| delete pMatcher;
|
| }
|
| }
|
| -
|
| +
|
|
|
| // Bug 7029
|
| void RegexTest::Bug7029() {
|
| @@ -5197,16 +5208,17 @@ void RegexTest::Bug7029() {
|
|
|
| // Bug 9283
|
| // This test is checking for the existance of any supplemental characters that case-fold
|
| -// to a bmp character.
|
| +// to a bmp character.
|
| //
|
| -// At the time of this writing there are none. If any should appear in a subsequent release
|
| -// of Unicode, the code in regular expressions compilation that determines the longest
|
| -// posssible match for a literal string will need to be enhanced.
|
| +// At the time of this writing there are none. If any should appear in a subsequent release
|
| +// of Unicode, the code in regular expressions compilation that determines the longest
|
| +// posssible match for a literal string will need to be enhanced.
|
| //
|
| // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
|
| // for details on what to do in case of a failure of this test.
|
| //
|
| void RegexTest::Bug9283() {
|
| +#if !UCONFIG_NO_NORMALIZATION
|
| UErrorCode status = U_ZERO_ERROR;
|
| UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]]", status);
|
| REGEX_CHECK_STATUS;
|
| @@ -5220,6 +5232,7 @@ void RegexTest::Bug9283() {
|
| UnicodeString cf = UnicodeString(c).foldCase();
|
| REGEX_ASSERT(cf.length() >= 2);
|
| }
|
| +#endif /* #if !UCONFIG_NO_NORMALIZATION */
|
| }
|
|
|
|
|
| @@ -5232,47 +5245,128 @@ void RegexTest::CheckInvBufSize() {
|
| }
|
| }
|
|
|
| -void RegexTest::TestBug11371() {
|
| +
|
| +void RegexTest::Bug10459() {
|
| UErrorCode status = U_ZERO_ERROR;
|
| - UnicodeString patternString;
|
| + UnicodeString patternString("(txt)");
|
| + UnicodeString txtString("txt");
|
| +
|
| + UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status);
|
| + REGEX_CHECK_STATUS;
|
| + UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status);
|
| + REGEX_CHECK_STATUS;
|
| +
|
| + URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);
|
| + REGEX_CHECK_STATUS;
|
|
|
| - for (int i=0; i<8000000; i++) {
|
| - patternString.append(UnicodeString("()"));
|
| + uregex_setUText(icu_re, utext_txt, &status);
|
| + REGEX_CHECK_STATUS;
|
| +
|
| + // The bug was that calling uregex_group() before doing a matching operation
|
| + // was causing a segfault. Only for Regular Expressions created from UText.
|
| + // It should set an U_REGEX_INVALID_STATE.
|
| +
|
| + UChar buf[100];
|
| + int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status);
|
| + REGEX_ASSERT(status == U_REGEX_INVALID_STATE);
|
| + REGEX_ASSERT(len == 0);
|
| +
|
| + uregex_close(icu_re);
|
| + utext_close(utext_pat);
|
| + utext_close(utext_txt);
|
| +}
|
| +
|
| +void RegexTest::TestCaseInsensitiveStarters() {
|
| + // Test that the data used by RegexCompile::findCaseInsensitiveStarters() hasn't
|
| + // become stale because of new Unicode characters.
|
| + // If it is stale, rerun the generation tool
|
| + // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing
|
| + // and replace the embedded data in i18n/regexcmp.cpp
|
| +
|
| + for (UChar32 cp=0; cp<=0x10ffff; cp++) {
|
| + if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) {
|
| + continue;
|
| + }
|
| + UnicodeSet s(cp, cp);
|
| + s.closeOver(USET_CASE_INSENSITIVE);
|
| + UnicodeSetIterator setIter(s);
|
| + while (setIter.next()) {
|
| + if (!setIter.isString()) {
|
| + continue;
|
| + }
|
| + const UnicodeString &str = setIter.getString();
|
| + UChar32 firstChar = str.char32At(0);
|
| + UnicodeSet starters;
|
| + RegexCompile::findCaseInsensitiveStarters(firstChar, &starters);
|
| + if (!starters.contains(cp)) {
|
| + errln("CaseInsensitiveStarters for \\u%x is missing character \\u%x.", cp, firstChar);
|
| + return;
|
| + }
|
| + }
|
| }
|
| +}
|
| +
|
| +
|
| +void RegexTest::TestBug11049() {
|
| + // Original bug report: pattern with match start consisting of one of several individual characters,
|
| + // and the text being matched ending with a supplementary character. find() would read past the
|
| + // end of the input text when searching for potential match starting points.
|
| +
|
| + // To see the problem, the text must exactly fill an allocated buffer, so that valgrind will
|
| + // detect the bad read.
|
| +
|
| + TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);
|
| + TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);
|
| +
|
| + // Test again with a pattern starting with a single character,
|
| + // which takes a different code path than starting with an OR expression,
|
| + // but with similar logic.
|
| + TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);
|
| + TestCase11049("C", "string matches at end C", TRUE, __LINE__);
|
| +}
|
| +
|
| +// Run a single test case from TestBug11049(). Internal function.
|
| +void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expectMatch, int32_t lineNumber) {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + UnicodeString patternString = UnicodeString(pattern).unescape();
|
| LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
|
| - if (status != U_REGEX_PATTERN_TOO_BIG) {
|
| - errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
| - __FILE__, __LINE__, u_errorName(status));
|
| - }
|
|
|
| - status = U_ZERO_ERROR;
|
| - patternString = "(";
|
| - for (int i=0; i<20000000; i++) {
|
| - patternString.append(UnicodeString("A++"));
|
| - }
|
| - patternString.append(UnicodeString("){0}B++"));
|
| - LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
|
| - if (status != U_REGEX_PATTERN_TOO_BIG) {
|
| - errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
| - __FILE__, __LINE__, u_errorName(status));
|
| - }
|
| + UnicodeString dataString = UnicodeString(data).unescape();
|
| + UChar *exactBuffer = new UChar[dataString.length()];
|
| + dataString.extract(exactBuffer, dataString.length(), status);
|
| + UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status);
|
|
|
| - // Pattern with too much string data, such that string indexes overflow operand data.
|
| - status = U_ZERO_ERROR;
|
| - patternString = "";
|
| - while (patternString.length() < 0x00ffffff) {
|
| - patternString.append(UnicodeString("stuff and things dont you know, these are a few of my favorite strings\n"));
|
| - }
|
| - patternString.append(UnicodeString("X? trailing string"));
|
| - LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
|
| - compiledPat3->dumpPattern();
|
| - if (status != U_REGEX_PATTERN_TOO_BIG) {
|
| - errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.",
|
| - __FILE__, __LINE__, u_errorName(status));
|
| + LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status));
|
| + REGEX_CHECK_STATUS;
|
| + matcher->reset(ut);
|
| + UBool result = matcher->find();
|
| + if (result != expectMatch) {
|
| + errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \"%s\"",
|
| + __FILE__, lineNumber, expectMatch, result, pattern, data);
|
| + }
|
| +
|
| + // Rerun test with UTF-8 input text. Won't see buffer overreads, but could see
|
| + // off-by-one on find() with match at the last code point.
|
| + // Size of the original char * data (invariant charset) will be <= than the equivalent UTF-8
|
| + // because string.unescape() will only shrink it.
|
| + char * utf8Buffer = new char[uprv_strlen(data)+1];
|
| + u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), dataString.length(), &status);
|
| + REGEX_CHECK_STATUS;
|
| + ut = utext_openUTF8(ut, utf8Buffer, -1, &status);
|
| + REGEX_CHECK_STATUS;
|
| + matcher->reset(ut);
|
| + result = matcher->find();
|
| + if (result != expectMatch) {
|
| + errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \"%s\", text = \"%s\"",
|
| + __FILE__, lineNumber, expectMatch, result, pattern, data);
|
| }
|
| + delete [] utf8Buffer;
|
|
|
| + utext_close(ut);
|
| + delete [] exactBuffer;
|
| +}
|
|
|
|
|
| -}
|
|
|
| #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
|
| +
|
|
|