source/test/intltest/regextst.cpp - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/test/intltest/regextst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /********************************************************************	1 /********************************************************************

2 * COPYRIGHT:	2 * COPYRIGHT:

3 * Copyright (c) 2002-2013, International Business Machines Corporation and	3 * Copyright (c) 2002-2014, International Business Machines Corporation and

4 * others. All Rights Reserved.	4 * others. All Rights Reserved.

5 ********************************************************************/	5 ********************************************************************/

6	6

7 //	7 //

8 // regextst.cpp	8 // regextst.cpp

9 //	9 //

10 // ICU Regular Expressions test, part of intltest.	10 // ICU Regular Expressions test, part of intltest.

11 //	11 //

12	12

13 /*	13 /*

14 NOTE!!	14 NOTE!!

15	15

16 PLEASE be careful about ASCII assumptions in this test.	16 PLEASE be careful about ASCII assumptions in this test.

17 This test is one of the worst repeat offenders.	17 This test is one of the worst repeat offenders.

18 If you have questions, contact someone on the ICU PMC	18 If you have questions, contact someone on the ICU PMC

19 who has access to an EBCDIC system.	19 who has access to an EBCDIC system.

20	20

21 */	21 */

22	22

23 #include "intltest.h"	23 #include "intltest.h"

24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS	24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

25	25

	26 #include "unicode/localpointer.h"

26 #include "unicode/regex.h"	27 #include "unicode/regex.h"

27 #include "unicode/uchar.h"	28 #include "unicode/uchar.h"

28 #include "unicode/ucnv.h"	29 #include "unicode/ucnv.h"

29 #include "unicode/uniset.h"	30 #include "unicode/uniset.h"

	31 #include "unicode/uregex.h"

	32 #include "unicode/usetiter.h"

30 #include "unicode/ustring.h"	33 #include "unicode/ustring.h"

31 #include "regextst.h"	34 #include "regextst.h"

	35 #include "regexcmp.h"

32 #include "uvector.h"	36 #include "uvector.h"

33 #include "util.h"	37 #include "util.h"

34 #include <stdlib.h>	38 #include <stdlib.h>

35 #include <string.h>	39 #include <string.h>

36 #include <stdio.h>	40 #include <stdio.h>

37 #include "cstring.h"	41 #include "cstring.h"

38 #include "uinvchar.h"	42 #include "uinvchar.h"

39	43

40 #define SUPPORT_MUTATING_INPUT_STRING 0	44 #define SUPPORT_MUTATING_INPUT_STRING 0

41	45

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
124 break;	128 break;

125 case 19: name = "Bug 7029";	129 case 19: name = "Bug 7029";

126 if (exec) Bug7029();	130 if (exec) Bug7029();

127 break;	131 break;

128 case 20: name = "CheckInvBufSize";	132 case 20: name = "CheckInvBufSize";

129 if (exec) CheckInvBufSize();	133 if (exec) CheckInvBufSize();

130 break;	134 break;

131 case 21: name = "Bug 9283";	135 case 21: name = "Bug 9283";

132 if (exec) Bug9283();	136 if (exec) Bug9283();

133 break;	137 break;

134 case 22: name = "TestBug11371";	138 case 22: name = "Bug10459";

135 if (exec) TestBug11371();	139 if (exec) Bug10459();

136 break;	140 break;

137	141 case 23: name = "TestCaseInsensitiveStarters";

	142 if (exec) TestCaseInsensitiveStarters();

	143 break;

	144 case 24: name = "TestBug11049";

	145 if (exec) TestBug11049();

	146 break;

138 default: name = "";	147 default: name = "";

139 break; //needed to end loop	148 break; //needed to end loop

140 }	149 }

141 }	150 }

142	151

143	152

144	153

145 /**	154 /**

146 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage	155 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage

147 * into ASCII.	156 * into ASCII.

148 * @see utext_openUTF8	157 * @see utext_openUTF8

149 */	158 */

150 static UText* regextst_openUTF8FromInvariant(UText* ut, const char inv, int64_t length, UErrorCode status);	159 static UText* regextst_openUTF8FromInvariant(UText* ut, const char inv, int64_t length, UErrorCode status);

151	160

152 //---------------------------------------------------------------------------	161 //---------------------------------------------------------------------------

153 //	162 //

154 // Error Checking / Reporting macros used in all of the tests.	163 // Error Checking / Reporting macros used in all of the tests.

155 //	164 //

156 //---------------------------------------------------------------------------	165 //---------------------------------------------------------------------------

157	166

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
203 UChar ch = buf[i];	212 UChar ch = buf[i];

204 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);	213 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);

205 }	214 }

206 }	215 }

207 }	216 }

208 }	217 }

209 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;	218 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;

210 return ASSERT_BUF;	219 return ASSERT_BUF;

211 }	220 }

212	221

213

214 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf) /sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}	222 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf) /sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}

215	223

216 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \	224 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \

217 __FILE__, __LINE__ , u_errorName(status)); return;}}	225 __FILE__, __LINE__ , u_errorName(status)); return;}}

218	226

219 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}	227 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}

220	228

221 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\	229 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\

222 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \	230 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \

223 __LINE__, u_errorName(errcode), u_errorName(status));};}	231 __LINE__, u_errorName(errcode), u_errorName(status));};}

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
289 char buf[201 /21/];	297 char buf[201 /21/];

290 char expectedBuf[201];	298 char expectedBuf[201];

291 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);	299 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);

292 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]) , &expectedText);	300 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]) , &expectedText);

293 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars) , got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe ctedText), buf, (int)utext_nativeLength(actual));	301 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars) , got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe ctedText), buf, (int)utext_nativeLength(actual));

294 }	302 }

295 utext_close(&expectedText);	303 utext_close(&expectedText);

296 }	304 }

297	305

298 /**	306 /**

299 * Assumes utf-8 input	307 * Assumes utf-8 input

300 */	308 */

301 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua l), __FILE__, __LINE__)	309 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua l), __FILE__, __LINE__)

302 /**	310 /**

303 * Assumes Invariant input	311 * Assumes Invariant input

304 */	312 */

305 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp ected), (actual), __FILE__, __LINE__)	313 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp ected), (actual), __FILE__, __LINE__)

306	314

307 /**	315 /**

308 * This buffer ( inv_buf ) is used to hold the UTF-8 strings	316 * This buffer ( inv_buf ) is used to hold the UTF-8 strings

309 * passed into utext_openUTF8. An error will be given if	317 * passed into utext_openUTF8. An error will be given if

310 * INV_BUFSIZ is too small. It's only used on EBCDIC systems.	318 * INV_BUFSIZ is too small. It's only used on EBCDIC systems.

311 */	319 */

312	320

313 #define INV_BUFSIZ 2048 /* increase this if too small */	321 #define INV_BUFSIZ 2048 /* increase this if too small */

314	322

315 static int64_t inv_next=0;	323 static int64_t inv_next=0;

316	324

317 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY	325 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY

318 static char inv_buf[INV_BUFSIZ];	326 static char inv_buf[INV_BUFSIZ];

319 #endif	327 #endif

320	328

321 static UText* regextst_openUTF8FromInvariant(UText ut, const char inv, int64_t length, UErrorCode *status) {	329 static UText* regextst_openUTF8FromInvariant(UText ut, const char inv, int64_t length, UErrorCode *status) {

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
369 RegexMatcher *REMatcher = NULL;	377 RegexMatcher *REMatcher = NULL;

370 UBool retVal = TRUE;	378 UBool retVal = TRUE;

371	379

372 UnicodeString patString(pat, -1, US_INV);	380 UnicodeString patString(pat, -1, US_INV);

373 REPattern = RegexPattern::compile(patString, 0, pe, status);	381 REPattern = RegexPattern::compile(patString, 0, pe, status);

374 if (U_FAILURE(status)) {	382 if (U_FAILURE(status)) {

375 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta tus = %s",	383 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta tus = %s",

376 line, u_errorName(status));	384 line, u_errorName(status));

377 return FALSE;	385 return FALSE;

378 }	386 }

379 if (line==376) { RegexPatternDump(REPattern);}	387 if (line==376) { REPattern->dumpPattern();}

380	388

381 UnicodeString inputString(inputText);	389 UnicodeString inputString(inputText);

382 UnicodeString unEscapedInput = inputString.unescape();	390 UnicodeString unEscapedInput = inputString.unescape();

383 REMatcher = REPattern->matcher(unEscapedInput, status);	391 REMatcher = REPattern->matcher(unEscapedInput, status);

384 if (U_FAILURE(status)) {	392 if (U_FAILURE(status)) {

385 errln("RegexTest failure in REPattern::matcher() at line %d. Status = % s\n",	393 errln("RegexTest failure in REPattern::matcher() at line %d. Status = % s\n",

386 line, u_errorName(status));	394 line, u_errorName(status));

387 return FALSE;	395 return FALSE;

388 }	396 }

389	397

(...skipping 15 matching lines...) Expand all Loading...
405 errln("RegexTest failure in matches() at line %d. Status = %s\n",	413 errln("RegexTest failure in matches() at line %d. Status = %s\n",

406 line, u_errorName(status));	414 line, u_errorName(status));

407 retVal = FALSE;	415 retVal = FALSE;

408 }	416 }

409 if (actualmatch != match) {	417 if (actualmatch != match) {

410 errln("RegexTest: wrong return from matches() at line %d.\n", line);	418 errln("RegexTest: wrong return from matches() at line %d.\n", line);

411 retVal = FALSE;	419 retVal = FALSE;

412 }	420 }

413	421

414 if (retVal == FALSE) {	422 if (retVal == FALSE) {

415 RegexPatternDump(REPattern);	423 REPattern->dumpPattern();

416 }	424 }

417	425

418 delete REPattern;	426 delete REPattern;

419 delete REMatcher;	427 delete REMatcher;

420 return retVal;	428 return retVal;

421 }	429 }

422	430

423	431

424 UBool RegexTest::doRegexLMTestUTF8(const char pat, const char text, UBool look ing, UBool match, int32_t line) {	432 UBool RegexTest::doRegexLMTestUTF8(const char pat, const char text, UBool look ing, UBool match, int32_t line) {

425 UText pattern = UTEXT_INITIALIZER;	433 UText pattern = UTEXT_INITIALIZER;

426 int32_t inputUTF8Length;	434 int32_t inputUTF8Length;

427 char *textChars = NULL;	435 char *textChars = NULL;

428 UText inputText = UTEXT_INITIALIZER;	436 UText inputText = UTEXT_INITIALIZER;

429 UErrorCode status = U_ZERO_ERROR;	437 UErrorCode status = U_ZERO_ERROR;

430 UParseError pe;	438 UParseError pe;

431 RegexPattern *REPattern = NULL;	439 RegexPattern *REPattern = NULL;

432 RegexMatcher *REMatcher = NULL;	440 RegexMatcher *REMatcher = NULL;

433 UBool retVal = TRUE;	441 UBool retVal = TRUE;

434	442

435 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);	443 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);

436 REPattern = RegexPattern::compile(&pattern, 0, pe, status);	444 REPattern = RegexPattern::compile(&pattern, 0, pe, status);

437 if (U_FAILURE(status)) {	445 if (U_FAILURE(status)) {

438 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8 ). Status = %s\n",	446 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8 ). Status = %s\n",

439 line, u_errorName(status));	447 line, u_errorName(status));

440 return FALSE;	448 return FALSE;

441 }	449 }

442	450

443 UnicodeString inputString(text, -1, US_INV);	451 UnicodeString inputString(text, -1, US_INV);

444 UnicodeString unEscapedInput = inputString.unescape();	452 UnicodeString unEscapedInput = inputString.unescape();

445 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));	453 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));

446 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N ULL, NULL, NULL, &status);	454 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N ULL, NULL, NULL, &status);

447	455

448 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);	456 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);

449 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {	457 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

450 // UTF-8 does not allow unpaired surrogates, so this could actually happ en	458 // UTF-8 does not allow unpaired surrogates, so this could actually happ en

451 logln("RegexTest unable to convert input to UTF8 at line %d. Status = % s\n", line, u_errorName(status));	459 logln("RegexTest unable to convert input to UTF8 at line %d. Status = % s\n", line, u_errorName(status));

452 return TRUE; // not a failure of the Regex engine	460 return TRUE; // not a failure of the Regex engine

453 }	461 }

454 status = U_ZERO_ERROR; // buffer overflow	462 status = U_ZERO_ERROR; // buffer overflow

455 textChars = new char[inputUTF8Length+1];	463 textChars = new char[inputUTF8Length+1];

456 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias( ), status);	464 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias( ), status);

457 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);	465 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);

458	466

459 REMatcher = &REPattern->matcher(status)->reset(&inputText);	467 REMatcher = &REPattern->matcher(status)->reset(&inputText);

460 if (U_FAILURE(status)) {	468 if (U_FAILURE(status)) {

461 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta tus = %s\n",	469 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta tus = %s\n",

462 line, u_errorName(status));	470 line, u_errorName(status));

463 return FALSE;	471 return FALSE;

464 }	472 }

465	473

466 UBool actualmatch;	474 UBool actualmatch;

467 actualmatch = REMatcher->lookingAt(status);	475 actualmatch = REMatcher->lookingAt(status);

468 if (U_FAILURE(status)) {	476 if (U_FAILURE(status)) {

(...skipping 12 matching lines...) Expand all Loading...
481 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n" ,	489 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n" ,

482 line, u_errorName(status));	490 line, u_errorName(status));

483 retVal = FALSE;	491 retVal = FALSE;

484 }	492 }

485 if (actualmatch != match) {	493 if (actualmatch != match) {

486 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin e);	494 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin e);

487 retVal = FALSE;	495 retVal = FALSE;

488 }	496 }

489	497

490 if (retVal == FALSE) {	498 if (retVal == FALSE) {

491 RegexPatternDump(REPattern);	499 REPattern->dumpPattern();

492 }	500 }

493	501

494 delete REPattern;	502 delete REPattern;

495 delete REMatcher;	503 delete REMatcher;

496 utext_close(&inputText);	504 utext_close(&inputText);

497 utext_close(&pattern);	505 utext_close(&pattern);

498 delete[] textChars;	506 delete[] textChars;

499 return retVal;	507 return retVal;

500 }	508 }

501	509

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
547 if (status != expectedStatus) {	555 if (status != expectedStatus) {

548 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err orName(status));	556 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err orName(status));

549 } else {	557 } else {

550 if (status != U_ZERO_ERROR) {	558 if (status != U_ZERO_ERROR) {

551 if (pe.line != errLine \|\| pe.offset != errCol) {	559 if (pe.line != errLine \|\| pe.offset != errCol) {

552 errln("Line %d: incorrect line/offset from UParseError. Expecte d %d/%d; got %d/%d.\n",	560 errln("Line %d: incorrect line/offset from UParseError. Expecte d %d/%d; got %d/%d.\n",

553 line, errLine, errCol, pe.line, pe.offset);	561 line, errLine, errCol, pe.line, pe.offset);

554 }	562 }

555 }	563 }

556 }	564 }

557	565

558 delete callerPattern;	566 delete callerPattern;

559 utext_close(&patternText);	567 utext_close(&patternText);

560 }	568 }

561	569

562	570

563	571

564 //---------------------------------------------------------------------------	572 //---------------------------------------------------------------------------

565 //	573 //

566 // Basic Check for basic functionality of regex pattern matching.	574 // Basic Check for basic functionality of regex pattern matching.

567 // Avoid the use of REGEX_FIND test macro, which has	575 // Avoid the use of REGEX_FIND test macro, which has

568 // substantial dependencies on basic Regex functionality.	576 // substantial dependencies on basic Regex functionality.

569 //	577 //

570 //---------------------------------------------------------------------------	578 //---------------------------------------------------------------------------

571 void RegexTest::Basic() {	579 void RegexTest::Basic() {

572	580

573	581

574 //	582 //

575 // Debug - slide failing test cases early	583 // Debug - slide failing test cases early

576 //	584 //

577 #if 0	585 #if 0

578 {	586 {

579 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);	587 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);

580 UParseError pe;	588 UParseError pe;

581 UErrorCode status = U_ZERO_ERROR;	589 UErrorCode status = U_ZERO_ERROR;

582 RegexPattern *pattern;	590 RegexPattern *pattern;

583 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc ape(), UREGEX_CASE_INSENSITIVE, pe, status);	591 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc ape(), UREGEX_CASE_INSENSITIVE, pe, status);

584 RegexPatternDump(pattern);	592 pattern->dumpPattern();

585 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz") .unescape(), status);	593 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz") .unescape(), status);

586 UBool result = m->find();	594 UBool result = m->find();

587 printf("result = %d\n", result);	595 printf("result = %d\n", result);

588 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd");	596 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd");

589 // REGEX_FIND("(X([abc=X]+)+X)\|(y[abc=]+)", "=XX====================");	597 // REGEX_FIND("(X([abc=X]+)+X)\|(y[abc=]+)", "=XX====================");

590 }	598 }

591 exit(1);	599 exit(1);

592 #endif	600 #endif

593	601

594	602

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
722 // implementation.	730 // implementation.

723 //	731 //

724 //---------------------------------------------------------------------------	732 //---------------------------------------------------------------------------

725 void RegexTest::UTextBasic() {	733 void RegexTest::UTextBasic() {

726 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */	734 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */

727 UErrorCode status = U_ZERO_ERROR;	735 UErrorCode status = U_ZERO_ERROR;

728 UText pattern = UTEXT_INITIALIZER;	736 UText pattern = UTEXT_INITIALIZER;

729 utext_openUTF8(&pattern, str_abc, -1, &status);	737 utext_openUTF8(&pattern, str_abc, -1, &status);

730 RegexMatcher matcher(&pattern, 0, status);	738 RegexMatcher matcher(&pattern, 0, status);

731 REGEX_CHECK_STATUS;	739 REGEX_CHECK_STATUS;

732	740

733 UText input = UTEXT_INITIALIZER;	741 UText input = UTEXT_INITIALIZER;

734 utext_openUTF8(&input, str_abc, -1, &status);	742 utext_openUTF8(&input, str_abc, -1, &status);

735 REGEX_CHECK_STATUS;	743 REGEX_CHECK_STATUS;

736 matcher.reset(&input);	744 matcher.reset(&input);

737 REGEX_CHECK_STATUS;	745 REGEX_CHECK_STATUS;

738 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());	746 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());

739	747

740 matcher.reset(matcher.inputText());	748 matcher.reset(matcher.inputText());

741 REGEX_CHECK_STATUS;	749 REGEX_CHECK_STATUS;

742 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());	750 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());

743	751

744 utext_close(&pattern);	752 utext_close(&pattern);

745 utext_close(&input);	753 utext_close(&input);

746 }	754 }

747	755

748	756

749 //---------------------------------------------------------------------------	757 //---------------------------------------------------------------------------

750 //	758 //

751 // API_Match Test that the API for class RegexMatcher	759 // API_Match Test that the API for class RegexMatcher

752 // is present and nominally working, but excluding functions	760 // is present and nominally working, but excluding functions

753 // implementing replace operations.	761 // implementing replace operations.

(...skipping 356 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1110 UErrorCode status = U_ZERO_ERROR;	1118 UErrorCode status = U_ZERO_ERROR;

1111 RegexPattern *p = RegexPattern::compile(".", 0, status);	1119 RegexPattern *p = RegexPattern::compile(".", 0, status);

1112 RegexMatcher *m = p->matcher(status);	1120 RegexMatcher *m = p->matcher(status);

1113 REGEX_CHECK_STATUS;	1121 REGEX_CHECK_STATUS;

1114	1122

1115 REGEX_ASSERT(m->find() == FALSE);	1123 REGEX_ASSERT(m->find() == FALSE);

1116 REGEX_ASSERT(m->input() == "");	1124 REGEX_ASSERT(m->input() == "");

1117 delete m;	1125 delete m;

1118 delete p;	1126 delete p;

1119 }	1127 }

1120	1128

1121 //	1129 //

1122 // Regions	1130 // Regions

1123 //	1131 //

1124 {	1132 {

1125 UErrorCode status = U_ZERO_ERROR;	1133 UErrorCode status = U_ZERO_ERROR;

1126 UnicodeString testString("This is test data");	1134 UnicodeString testString("This is test data");

1127 RegexMatcher m(".*", testString, 0, status);	1135 RegexMatcher m(".*", testString, 0, status);

1128 REGEX_CHECK_STATUS;	1136 REGEX_CHECK_STATUS;

1129 REGEX_ASSERT(m.regionStart() == 0);	1137 REGEX_ASSERT(m.regionStart() == 0);

1130 REGEX_ASSERT(m.regionEnd() == testString.length());	1138 REGEX_ASSERT(m.regionEnd() == testString.length());

1131 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	1139 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

1132 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	1140 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

1133	1141

1134 m.region(2,4, status);	1142 m.region(2,4, status);

1135 REGEX_CHECK_STATUS;	1143 REGEX_CHECK_STATUS;

1136 REGEX_ASSERT(m.matches(status));	1144 REGEX_ASSERT(m.matches(status));

1137 REGEX_ASSERT(m.start(status)==2);	1145 REGEX_ASSERT(m.start(status)==2);

1138 REGEX_ASSERT(m.end(status)==4);	1146 REGEX_ASSERT(m.end(status)==4);

1139 REGEX_CHECK_STATUS;	1147 REGEX_CHECK_STATUS;

1140	1148

1141 m.reset();	1149 m.reset();

1142 REGEX_ASSERT(m.regionStart() == 0);	1150 REGEX_ASSERT(m.regionStart() == 0);

1143 REGEX_ASSERT(m.regionEnd() == testString.length());	1151 REGEX_ASSERT(m.regionEnd() == testString.length());

1144	1152

1145 UnicodeString shorterString("short");	1153 UnicodeString shorterString("short");

1146 m.reset(shorterString);	1154 m.reset(shorterString);

1147 REGEX_ASSERT(m.regionStart() == 0);	1155 REGEX_ASSERT(m.regionStart() == 0);

1148 REGEX_ASSERT(m.regionEnd() == shorterString.length());	1156 REGEX_ASSERT(m.regionEnd() == shorterString.length());

1149	1157

1150 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	1158 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

1151 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));	1159 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));

1152 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);	1160 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

1153 REGEX_ASSERT(&m == &m.reset());	1161 REGEX_ASSERT(&m == &m.reset());

1154 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);	1162 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

1155	1163

1156 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));	1164 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));

1157 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	1165 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

1158 REGEX_ASSERT(&m == &m.reset());	1166 REGEX_ASSERT(&m == &m.reset());

1159 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	1167 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

1160	1168

1161 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	1169 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

1162 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));	1170 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));

1163 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);	1171 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

1164 REGEX_ASSERT(&m == &m.reset());	1172 REGEX_ASSERT(&m == &m.reset());

1165 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);	1173 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

1166	1174

1167 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));	1175 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));

1168 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	1176 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

1169 REGEX_ASSERT(&m == &m.reset());	1177 REGEX_ASSERT(&m == &m.reset());

1170 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	1178 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

1171	1179

1172 }	1180 }

1173	1181

1174 //	1182 //

1175 // hitEnd() and requireEnd()	1183 // hitEnd() and requireEnd()

1176 //	1184 //

1177 {	1185 {

1178 UErrorCode status = U_ZERO_ERROR;	1186 UErrorCode status = U_ZERO_ERROR;

1179 UnicodeString testString("aabb");	1187 UnicodeString testString("aabb");

1180 RegexMatcher m1(".*", testString, 0, status);	1188 RegexMatcher m1(".*", testString, 0, status);

1181 REGEX_ASSERT(m1.lookingAt(status) == TRUE);	1189 REGEX_ASSERT(m1.lookingAt(status) == TRUE);

1182 REGEX_ASSERT(m1.hitEnd() == TRUE);	1190 REGEX_ASSERT(m1.hitEnd() == TRUE);

1183 REGEX_ASSERT(m1.requireEnd() == FALSE);	1191 REGEX_ASSERT(m1.requireEnd() == FALSE);

1184 REGEX_CHECK_STATUS;	1192 REGEX_CHECK_STATUS;

1185	1193

1186 status = U_ZERO_ERROR;	1194 status = U_ZERO_ERROR;

1187 RegexMatcher m2("a*", testString, 0, status);	1195 RegexMatcher m2("a*", testString, 0, status);

1188 REGEX_ASSERT(m2.lookingAt(status) == TRUE);	1196 REGEX_ASSERT(m2.lookingAt(status) == TRUE);

1189 REGEX_ASSERT(m2.hitEnd() == FALSE);	1197 REGEX_ASSERT(m2.hitEnd() == FALSE);

1190 REGEX_ASSERT(m2.requireEnd() == FALSE);	1198 REGEX_ASSERT(m2.requireEnd() == FALSE);

1191 REGEX_CHECK_STATUS;	1199 REGEX_CHECK_STATUS;

1192	1200

1193 status = U_ZERO_ERROR;	1201 status = U_ZERO_ERROR;

1194 RegexMatcher m3(".*$", testString, 0, status);	1202 RegexMatcher m3(".*$", testString, 0, status);

1195 REGEX_ASSERT(m3.lookingAt(status) == TRUE);	1203 REGEX_ASSERT(m3.lookingAt(status) == TRUE);

(...skipping 17 matching lines...) Expand all Loading...
1213 m.reset(ucharString); // should not compile.	1221 m.reset(ucharString); // should not compile.

1214	1222

1215 RegexPattern *p = RegexPattern::compile(".", 0, status);	1223 RegexPattern *p = RegexPattern::compile(".", 0, status);

1216 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co mpile.	1224 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co mpile.

1217	1225

1218 RegexMatcher m3(".", ucharString, 0, status); // Should not compile	1226 RegexMatcher m3(".", ucharString, 0, status); // Should not compile

1219 }	1227 }

1220 #endif	1228 #endif

1221	1229

1222 //	1230 //

1223 // Time Outs.	1231 // Time Outs.

1224 // Note: These tests will need to be changed when the regexp engine i s	1232 // Note: These tests will need to be changed when the regexp engine i s

1225 // able to detect and cut short the exponential time behavior o n	1233 // able to detect and cut short the exponential time behavior o n

1226 // this type of match.	1234 // this type of match.

1227 //	1235 //

1228 {	1236 {

1229 UErrorCode status = U_ZERO_ERROR;	1237 UErrorCode status = U_ZERO_ERROR;

1230 // Enough 'a's in the string to cause the match to time out.	1238 // Enough 'a's in the string to cause the match to time out.

1231 // (Each on additonal 'a' doubles the time)	1239 // (Each on additonal 'a' doubles the time)

1232 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");	1240 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");

1233 RegexMatcher matcher("(a+)+b", testString, 0, status);	1241 RegexMatcher matcher("(a+)+b", testString, 0, status);

1234 REGEX_CHECK_STATUS;	1242 REGEX_CHECK_STATUS;

1235 REGEX_ASSERT(matcher.getTimeLimit() == 0);	1243 REGEX_ASSERT(matcher.getTimeLimit() == 0);

1236 matcher.setTimeLimit(100, status);	1244 matcher.setTimeLimit(100, status);

1237 REGEX_ASSERT(matcher.getTimeLimit() == 100);	1245 REGEX_ASSERT(matcher.getTimeLimit() == 100);

1238 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);	1246 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

1239 REGEX_ASSERT(status == U_REGEX_TIME_OUT);	1247 REGEX_ASSERT(status == U_REGEX_TIME_OUT);

1240 }	1248 }

1241 {	1249 {

1242 UErrorCode status = U_ZERO_ERROR;	1250 UErrorCode status = U_ZERO_ERROR;

1243 // Few enough 'a's to slip in under the time limit.	1251 // Few enough 'a's to slip in under the time limit.

1244 UnicodeString testString("aaaaaaaaaaaaaaaaaa");	1252 UnicodeString testString("aaaaaaaaaaaaaaaaaa");

1245 RegexMatcher matcher("(a+)+b", testString, 0, status);	1253 RegexMatcher matcher("(a+)+b", testString, 0, status);

1246 REGEX_CHECK_STATUS;	1254 REGEX_CHECK_STATUS;

1247 matcher.setTimeLimit(100, status);	1255 matcher.setTimeLimit(100, status);

1248 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);	1256 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

1249 REGEX_CHECK_STATUS;	1257 REGEX_CHECK_STATUS;

1250 }	1258 }

1251	1259

1252 //	1260 //

1253 // Stack Limits	1261 // Stack Limits

1254 //	1262 //

1255 {	1263 {

1256 UErrorCode status = U_ZERO_ERROR;	1264 UErrorCode status = U_ZERO_ERROR;

1257 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'	1265 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'

1258	1266

1259 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt imizations	1267 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt imizations

1260 // of the '+', and makes the stack frames larger.	1268 // of the '+', and makes the stack frames larger.

1261 RegexMatcher matcher("(A)+A$", testString, 0, status);	1269 RegexMatcher matcher("(A)+A$", testString, 0, status);

1262	1270

1263 // With the default stack, this match should fail to run	1271 // With the default stack, this match should fail to run

1264 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);	1272 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

1265 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);	1273 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);

1266	1274

1267 // With unlimited stack, it should run	1275 // With unlimited stack, it should run

1268 status = U_ZERO_ERROR;	1276 status = U_ZERO_ERROR;

1269 matcher.setStackLimit(0, status);	1277 matcher.setStackLimit(0, status);

1270 REGEX_CHECK_STATUS;	1278 REGEX_CHECK_STATUS;

1271 REGEX_ASSERT(matcher.lookingAt(status) == TRUE);	1279 REGEX_ASSERT(matcher.lookingAt(status) == TRUE);

1272 REGEX_CHECK_STATUS;	1280 REGEX_CHECK_STATUS;

1273 REGEX_ASSERT(matcher.getStackLimit() == 0);	1281 REGEX_ASSERT(matcher.getStackLimit() == 0);

1274	1282

1275 // With a limited stack, it the match should fail	1283 // With a limited stack, it the match should fail

1276 status = U_ZERO_ERROR;	1284 status = U_ZERO_ERROR;

1277 matcher.setStackLimit(10000, status);	1285 matcher.setStackLimit(10000, status);

1278 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);	1286 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);

1279 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);	1287 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);

1280 REGEX_ASSERT(matcher.getStackLimit() == 10000);	1288 REGEX_ASSERT(matcher.getStackLimit() == 10000);

1281 }	1289 }

1282	1290

1283 // A pattern that doesn't save state should work with	1291 // A pattern that doesn't save state should work with

1284 // a minimal sized stack	1292 // a minimal sized stack

1285 {	1293 {

1286 UErrorCode status = U_ZERO_ERROR;	1294 UErrorCode status = U_ZERO_ERROR;

1287 UnicodeString testString = "abc";	1295 UnicodeString testString = "abc";

1288 RegexMatcher matcher("abc", testString, 0, status);	1296 RegexMatcher matcher("abc", testString, 0, status);

1289 REGEX_CHECK_STATUS;	1297 REGEX_CHECK_STATUS;

1290 matcher.setStackLimit(30, status);	1298 matcher.setStackLimit(30, status);

1291 REGEX_CHECK_STATUS;	1299 REGEX_CHECK_STATUS;

1292 REGEX_ASSERT(matcher.matches(status) == TRUE);	1300 REGEX_ASSERT(matcher.matches(status) == TRUE);

1293 REGEX_CHECK_STATUS;	1301 REGEX_CHECK_STATUS;

1294 REGEX_ASSERT(matcher.getStackLimit() == 30);	1302 REGEX_ASSERT(matcher.getStackLimit() == 30);

1295	1303

1296 // Negative stack sizes should fail	1304 // Negative stack sizes should fail

1297 status = U_ZERO_ERROR;	1305 status = U_ZERO_ERROR;

1298 matcher.setStackLimit(1000, status);	1306 matcher.setStackLimit(1000, status);

1299 REGEX_CHECK_STATUS;	1307 REGEX_CHECK_STATUS;

1300 matcher.setStackLimit(-1, status);	1308 matcher.setStackLimit(-1, status);

1301 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);	1309 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

1302 REGEX_ASSERT(matcher.getStackLimit() == 1000);	1310 REGEX_ASSERT(matcher.getStackLimit() == 1000);

1303 }	1311 }

1304	1312

1305	1313

1306 }	1314 }

1307	1315

1308	1316

1309	1317

1310	1318

1311	1319

1312	1320

1313 //---------------------------------------------------------------------------	1321 //---------------------------------------------------------------------------

1314 //	1322 //

(...skipping 528 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1843 REGEX_CHECK_STATUS;	1851 REGEX_CHECK_STATUS;

1844	1852

1845 UText input1 = UTEXT_INITIALIZER;	1853 UText input1 = UTEXT_INITIALIZER;

1846 UText input2 = UTEXT_INITIALIZER;	1854 UText input2 = UTEXT_INITIALIZER;

1847 UText empty = UTEXT_INITIALIZER;	1855 UText empty = UTEXT_INITIALIZER;

1848 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st atus);	1856 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st atus);

1849 REGEX_VERBOSE_TEXT(&input1);	1857 REGEX_VERBOSE_TEXT(&input1);

1850 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);	1858 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);

1851 REGEX_VERBOSE_TEXT(&input2);	1859 REGEX_VERBOSE_TEXT(&input2);

1852 utext_openUChars(&empty, NULL, 0, &status);	1860 utext_openUChars(&empty, NULL, 0, &status);

1853	1861

1854 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na tivelen (input1) ? */	1862 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na tivelen (input1) ? */

1855 int32_t input2Len = strlen("not abc");	1863 int32_t input2Len = strlen("not abc");

1856	1864

1857	1865

1858 //	1866 //

1859 // Matcher creation and reset.	1867 // Matcher creation and reset.

1860 //	1868 //

1861 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);	1869 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);

1862 REGEX_CHECK_STATUS;	1870 REGEX_CHECK_STATUS;

1863 REGEX_ASSERT(m1->lookingAt(status) == TRUE);	1871 REGEX_ASSERT(m1->lookingAt(status) == TRUE);

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1953 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);	1961 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);

1954 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);	1962 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

1955 status = U_ZERO_ERROR;	1963 status = U_ZERO_ERROR;

1956 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);	1964 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);

1957 REGEX_CHECK_STATUS;	1965 REGEX_CHECK_STATUS;

1958 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);	1966 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);

1959 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);	1967 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

1960	1968

1961 delete m1;	1969 delete m1;

1962 delete pat2;	1970 delete pat2;

1963	1971

1964 utext_close(&re);	1972 utext_close(&re);

1965 utext_close(&input1);	1973 utext_close(&input1);

1966 utext_close(&input2);	1974 utext_close(&input2);

1967 utext_close(&empty);	1975 utext_close(&empty);

1968 }	1976 }

1969	1977

1970	1978

1971 //	1979 //

1972 // Capture Group.	1980 // Capture Group.

1973 // RegexMatcher::start();	1981 // RegexMatcher::start();

1974 // RegexMatcher::end();	1982 // RegexMatcher::end();

1975 // RegexMatcher::groupCount();	1983 // RegexMatcher::groupCount();

1976 //	1984 //

1977 {	1985 {

1978 int32_t flags=0;	1986 int32_t flags=0;

1979 UParseError pe;	1987 UParseError pe;

1980 UErrorCode status=U_ZERO_ERROR;	1988 UErrorCode status=U_ZERO_ERROR;

1981 UText re=UTEXT_INITIALIZER;	1989 UText re=UTEXT_INITIALIZER;

1982 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x 34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67 )(.) /	1990 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x 34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67 )(.) /

1983 utext_openUTF8(&re, str_01234567_pat, -1, &status);	1991 utext_openUTF8(&re, str_01234567_pat, -1, &status);

1984	1992

1985 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);	1993 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

1986 REGEX_CHECK_STATUS;	1994 REGEX_CHECK_STATUS;

1987	1995

1988 UText input = UTEXT_INITIALIZER;	1996 UText input = UTEXT_INITIALIZER;

1989 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36 , 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */	1997 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36 , 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */

1990 utext_openUTF8(&input, str_0123456789, -1, &status);	1998 utext_openUTF8(&input, str_0123456789, -1, &status);

1991	1999

1992 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);	2000 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);

1993 REGEX_CHECK_STATUS;	2001 REGEX_CHECK_STATUS;

1994 REGEX_ASSERT(matcher->lookingAt(status) == TRUE);	2002 REGEX_ASSERT(matcher->lookingAt(status) == TRUE);

1995 static const int32_t matchStarts[] = {0, 2, 4, 8};	2003 static const int32_t matchStarts[] = {0, 2, 4, 8};

1996 static const int32_t matchEnds[] = {10, 8, 6, 10};	2004 static const int32_t matchEnds[] = {10, 8, 6, 10};

1997 int32_t i;	2005 int32_t i;

(...skipping 14 matching lines...) Expand all Loading...
2012	2020

2013 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));	2021 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));

2014 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));	2022 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));

2015	2023

2016 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2024 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

2017 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2025 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

2018 matcher->reset();	2026 matcher->reset();

2019 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);	2027 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);

2020	2028

2021 matcher->lookingAt(status);	2029 matcher->lookingAt(status);

2022	2030

2023 UnicodeString dest;	2031 UnicodeString dest;

2024 UText destText = UTEXT_INITIALIZER;	2032 UText destText = UTEXT_INITIALIZER;

2025 utext_openUnicodeString(&destText, &dest, &status);	2033 utext_openUnicodeString(&destText, &dest, &status);

2026 UText *result;	2034 UText *result;

2027 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x 36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */	2035 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x 36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */

2028 //» Test shallow-clone API	2036 // Test shallow-clone API

2029 int64_t group_len;	2037 int64_t group_len;

2030 result = matcher->group((UText *)NULL, group_len, status);	2038 result = matcher->group((UText *)NULL, group_len, status);

2031 REGEX_CHECK_STATUS;	2039 REGEX_CHECK_STATUS;

2032 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2040 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2033 utext_close(result);	2041 utext_close(result);

2034 result = matcher->group(0, &destText, group_len, status);	2042 result = matcher->group(0, &destText, group_len, status);

2035 REGEX_CHECK_STATUS;	2043 REGEX_CHECK_STATUS;

2036 REGEX_ASSERT(result == &destText);	2044 REGEX_ASSERT(result == &destText);

2037 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2045 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2038 // destText is now immutable, reopen it	2046 // destText is now immutable, reopen it

2039 utext_close(&destText);	2047 utext_close(&destText);

2040 utext_openUnicodeString(&destText, &dest, &status);	2048 utext_openUnicodeString(&destText, &dest, &status);

2041	2049

2042 result = matcher->group(0, NULL, status);	2050 result = matcher->group(0, NULL, status);

2043 REGEX_CHECK_STATUS;	2051 REGEX_CHECK_STATUS;

2044 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2052 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2045 utext_close(result);	2053 utext_close(result);

2046 result = matcher->group(0, &destText, status);	2054 result = matcher->group(0, &destText, status);

2047 REGEX_CHECK_STATUS;	2055 REGEX_CHECK_STATUS;

2048 REGEX_ASSERT(result == &destText);	2056 REGEX_ASSERT(result == &destText);

2049 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);	2057 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);

2050	2058

2051 result = matcher->group(1, NULL, status);	2059 result = matcher->group(1, NULL, status);

2052 REGEX_CHECK_STATUS;	2060 REGEX_CHECK_STATUS;

2053 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */	2061 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */

2054 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);	2062 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);

2055 utext_close(result);	2063 utext_close(result);

2056 result = matcher->group(1, &destText, status);	2064 result = matcher->group(1, &destText, status);

2057 REGEX_CHECK_STATUS;	2065 REGEX_CHECK_STATUS;

2058 REGEX_ASSERT(result == &destText);	2066 REGEX_ASSERT(result == &destText);

2059 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);	2067 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);

2060	2068

2061 result = matcher->group(2, NULL, status);	2069 result = matcher->group(2, NULL, status);

2062 REGEX_CHECK_STATUS;	2070 REGEX_CHECK_STATUS;

2063 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */	2071 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */

2064 REGEX_ASSERT_UTEXT_UTF8(str_45, result);	2072 REGEX_ASSERT_UTEXT_UTF8(str_45, result);

2065 utext_close(result);	2073 utext_close(result);

2066 result = matcher->group(2, &destText, status);	2074 result = matcher->group(2, &destText, status);

2067 REGEX_CHECK_STATUS;	2075 REGEX_CHECK_STATUS;

2068 REGEX_ASSERT(result == &destText);	2076 REGEX_ASSERT(result == &destText);

2069 REGEX_ASSERT_UTEXT_UTF8(str_45, result);	2077 REGEX_ASSERT_UTEXT_UTF8(str_45, result);

2070	2078

2071 result = matcher->group(3, NULL, status);	2079 result = matcher->group(3, NULL, status);

2072 REGEX_CHECK_STATUS;	2080 REGEX_CHECK_STATUS;

2073 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */	2081 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */

2074 REGEX_ASSERT_UTEXT_UTF8(str_89, result);	2082 REGEX_ASSERT_UTEXT_UTF8(str_89, result);

2075 utext_close(result);	2083 utext_close(result);

2076 result = matcher->group(3, &destText, status);	2084 result = matcher->group(3, &destText, status);

2077 REGEX_CHECK_STATUS;	2085 REGEX_CHECK_STATUS;

2078 REGEX_ASSERT(result == &destText);	2086 REGEX_ASSERT(result == &destText);

2079 REGEX_ASSERT_UTEXT_UTF8(str_89, result);	2087 REGEX_ASSERT_UTEXT_UTF8(str_89, result);

2080	2088

2081 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2089 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

2082 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;	2090 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;

2083 matcher->reset();	2091 matcher->reset();

2084 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);	2092 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);

2085	2093

2086 delete matcher;	2094 delete matcher;

2087 delete pat;	2095 delete pat;

2088	2096

2089 utext_close(&destText);	2097 utext_close(&destText);

2090 utext_close(&input);	2098 utext_close(&input);

2091 utext_close(&re);	2099 utext_close(&re);

2092 }	2100 }

2093	2101

2094 //	2102 //

2095 // find	2103 // find

2096 //	2104 //

2097 {	2105 {

2098 int32_t flags=0;	2106 int32_t flags=0;

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2139	2147

2140 status = U_ZERO_ERROR;	2148 status = U_ZERO_ERROR;

2141 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);	2149 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);

2142 status = U_ZERO_ERROR;	2150 status = U_ZERO_ERROR;

2143 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);	2151 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);

2144	2152

2145 REGEX_ASSERT(matcher->groupCount() == 0);	2153 REGEX_ASSERT(matcher->groupCount() == 0);

2146	2154

2147 delete matcher;	2155 delete matcher;

2148 delete pat;	2156 delete pat;

2149	2157

2150 utext_close(&input);	2158 utext_close(&input);

2151 utext_close(&re);	2159 utext_close(&re);

2152 }	2160 }

2153	2161

2154	2162

2155 //	2163 //

2156 // find, with \G in pattern (true if at the end of a previous match).	2164 // find, with \G in pattern (true if at the end of a previous match).

2157 //	2165 //

2158 {	2166 {

2159 int32_t flags=0;	2167 int32_t flags=0;

2160 UParseError pe;	2168 UParseError pe;

2161 UErrorCode status=U_ZERO_ERROR;	2169 UErrorCode status=U_ZERO_ERROR;

2162 UText re=UTEXT_INITIALIZER;	2170 UText re=UTEXT_INITIALIZER;

2163 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0 x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0 0 }; /* .?(?:(\\Gabc)\|(abc)) /	2171 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0 x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0 0 }; /* .?(?:(\\Gabc)\|(abc)) /

2164 utext_openUTF8(&re, str_Gabcabc, -1, &status);	2172 utext_openUTF8(&re, str_Gabcabc, -1, &status);

2165	2173

2166 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);	2174 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

2167	2175

2168 REGEX_CHECK_STATUS;	2176 REGEX_CHECK_STATUS;

2169 UText input = UTEXT_INITIALIZER;	2177 UText input = UTEXT_INITIALIZER;

2170 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */	2178 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */

2171 utext_openUTF8(&input, str_abcabcabc, -1, &status);	2179 utext_openUTF8(&input, str_abcabcabc, -1, &status);

2172 // 012345678901234567	2180 // 012345678901234567

2173	2181

2174 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);	2182 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);

2175 REGEX_CHECK_STATUS;	2183 REGEX_CHECK_STATUS;

2176 REGEX_ASSERT(matcher->find());	2184 REGEX_ASSERT(matcher->find());

2177 REGEX_ASSERT(matcher->start(status) == 0);	2185 REGEX_ASSERT(matcher->start(status) == 0);

2178 REGEX_ASSERT(matcher->start(1, status) == -1);	2186 REGEX_ASSERT(matcher->start(1, status) == -1);

2179 REGEX_ASSERT(matcher->start(2, status) == 1);	2187 REGEX_ASSERT(matcher->start(2, status) == 1);

2180	2188

2181 REGEX_ASSERT(matcher->find());	2189 REGEX_ASSERT(matcher->find());

2182 REGEX_ASSERT(matcher->start(status) == 4);	2190 REGEX_ASSERT(matcher->start(status) == 4);

2183 REGEX_ASSERT(matcher->start(1, status) == 4);	2191 REGEX_ASSERT(matcher->start(1, status) == 4);

2184 REGEX_ASSERT(matcher->start(2, status) == -1);	2192 REGEX_ASSERT(matcher->start(2, status) == -1);

2185 REGEX_CHECK_STATUS;	2193 REGEX_CHECK_STATUS;

2186	2194

2187 delete matcher;	2195 delete matcher;

2188 delete pat;	2196 delete pat;

2189	2197

2190 utext_close(&input);	2198 utext_close(&input);

2191 utext_close(&re);	2199 utext_close(&re);

2192 }	2200 }

2193	2201

2194 //	2202 //

2195 // find with zero length matches, match position should bump ahead	2203 // find with zero length matches, match position should bump ahead

2196 // to prevent loops.	2204 // to prevent loops.

2197 //	2205 //

2198 {	2206 {

2199 int32_t i;	2207 int32_t i;

(...skipping 19 matching lines...) Expand all Loading...
2219 utext_openUTF8(&s, (char *)aboveBMP, -1, &status);	2227 utext_openUTF8(&s, (char *)aboveBMP, -1, &status);

2220 m.reset(&s);	2228 m.reset(&s);

2221 for (i=0; ; i+=4) {	2229 for (i=0; ; i+=4) {

2222 if (m.find() == FALSE) {	2230 if (m.find() == FALSE) {

2223 break;	2231 break;

2224 }	2232 }

2225 REGEX_ASSERT(m.start(status) == i);	2233 REGEX_ASSERT(m.start(status) == i);

2226 REGEX_ASSERT(m.end(status) == i);	2234 REGEX_ASSERT(m.end(status) == i);

2227 }	2235 }

2228 REGEX_ASSERT(i==20);	2236 REGEX_ASSERT(i==20);

2229	2237

2230 utext_close(&s);	2238 utext_close(&s);

2231 }	2239 }

2232 {	2240 {

2233 // find() loop breaking test.	2241 // find() loop breaking test.

2234 // with pattern of /.?/, should see a series of one char matches, then a single	2242 // with pattern of /.?/, should see a series of one char matches, then a single

2235 // match of zero length at the end of the input string.	2243 // match of zero length at the end of the input string.

2236 int32_t i;	2244 int32_t i;

2237 UErrorCode status=U_ZERO_ERROR;	2245 UErrorCode status=U_ZERO_ERROR;

2238 RegexMatcher m(".?", 0, status);	2246 RegexMatcher m(".?", 0, status);

2239 REGEX_CHECK_STATUS;	2247 REGEX_CHECK_STATUS;

2240 UText s = UTEXT_INITIALIZER;	2248 UText s = UTEXT_INITIALIZER;

2241 utext_openUTF8(&s, " ", -1, &status);	2249 utext_openUTF8(&s, " ", -1, &status);

2242 m.reset(&s);	2250 m.reset(&s);

2243 for (i=0; ; i++) {	2251 for (i=0; ; i++) {

2244 if (m.find() == FALSE) {	2252 if (m.find() == FALSE) {

2245 break;	2253 break;

2246 }	2254 }

2247 REGEX_ASSERT(m.start(status) == i);	2255 REGEX_ASSERT(m.start(status) == i);

2248 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));	2256 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));

2249 }	2257 }

2250 REGEX_ASSERT(i==5);	2258 REGEX_ASSERT(i==5);

2251	2259

2252 utext_close(&s);	2260 utext_close(&s);

2253 }	2261 }

2254	2262

2255	2263

2256 //	2264 //

2257 // Matchers with no input string behave as if they had an empty input string .	2265 // Matchers with no input string behave as if they had an empty input string .

2258 //	2266 //

2259	2267

2260 {	2268 {

2261 UErrorCode status = U_ZERO_ERROR;	2269 UErrorCode status = U_ZERO_ERROR;

2262 RegexMatcher m(".?", 0, status);	2270 RegexMatcher m(".?", 0, status);

2263 REGEX_CHECK_STATUS;	2271 REGEX_CHECK_STATUS;

2264 REGEX_ASSERT(m.find());	2272 REGEX_ASSERT(m.find());

2265 REGEX_ASSERT(m.start(status) == 0);	2273 REGEX_ASSERT(m.start(status) == 0);

2266 REGEX_ASSERT(m.input() == "");	2274 REGEX_ASSERT(m.input() == "");

2267 }	2275 }

2268 {	2276 {

2269 UErrorCode status = U_ZERO_ERROR;	2277 UErrorCode status = U_ZERO_ERROR;

2270 RegexPattern *p = RegexPattern::compile(".", 0, status);	2278 RegexPattern *p = RegexPattern::compile(".", 0, status);

2271 RegexMatcher *m = p->matcher(status);	2279 RegexMatcher *m = p->matcher(status);

2272 REGEX_CHECK_STATUS;	2280 REGEX_CHECK_STATUS;

2273	2281

2274 REGEX_ASSERT(m->find() == FALSE);	2282 REGEX_ASSERT(m->find() == FALSE);

2275 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);	2283 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);

2276 delete m;	2284 delete m;

2277 delete p;	2285 delete p;

2278 }	2286 }

2279	2287

2280 //	2288 //

2281 // Regions	2289 // Regions

2282 //	2290 //

2283 {	2291 {

2284 UErrorCode status = U_ZERO_ERROR;	2292 UErrorCode status = U_ZERO_ERROR;

2285 UText testPattern = UTEXT_INITIALIZER;	2293 UText testPattern = UTEXT_INITIALIZER;

2286 UText testText = UTEXT_INITIALIZER;	2294 UText testText = UTEXT_INITIALIZER;

2287 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);	2295 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);

2288 REGEX_VERBOSE_TEXT(&testPattern);	2296 REGEX_VERBOSE_TEXT(&testPattern);

2289 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat us);	2297 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat us);

2290 REGEX_VERBOSE_TEXT(&testText);	2298 REGEX_VERBOSE_TEXT(&testText);

2291	2299

2292 RegexMatcher m(&testPattern, &testText, 0, status);	2300 RegexMatcher m(&testPattern, &testText, 0, status);

2293 REGEX_CHECK_STATUS;	2301 REGEX_CHECK_STATUS;

2294 REGEX_ASSERT(m.regionStart() == 0);	2302 REGEX_ASSERT(m.regionStart() == 0);

2295 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));	2303 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));

2296 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	2304 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

2297 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	2305 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

2298	2306

2299 m.region(2,4, status);	2307 m.region(2,4, status);

2300 REGEX_CHECK_STATUS;	2308 REGEX_CHECK_STATUS;

2301 REGEX_ASSERT(m.matches(status));	2309 REGEX_ASSERT(m.matches(status));

2302 REGEX_ASSERT(m.start(status)==2);	2310 REGEX_ASSERT(m.start(status)==2);

2303 REGEX_ASSERT(m.end(status)==4);	2311 REGEX_ASSERT(m.end(status)==4);

2304 REGEX_CHECK_STATUS;	2312 REGEX_CHECK_STATUS;

2305	2313

2306 m.reset();	2314 m.reset();

2307 REGEX_ASSERT(m.regionStart() == 0);	2315 REGEX_ASSERT(m.regionStart() == 0);

2308 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));	2316 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));

2309	2317

2310 regextst_openUTF8FromInvariant(&testText, "short", -1, &status);	2318 regextst_openUTF8FromInvariant(&testText, "short", -1, &status);

2311 REGEX_VERBOSE_TEXT(&testText);	2319 REGEX_VERBOSE_TEXT(&testText);

2312 m.reset(&testText);	2320 m.reset(&testText);

2313 REGEX_ASSERT(m.regionStart() == 0);	2321 REGEX_ASSERT(m.regionStart() == 0);

2314 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));	2322 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));

2315	2323

2316 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	2324 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

2317 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));	2325 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));

2318 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);	2326 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

2319 REGEX_ASSERT(&m == &m.reset());	2327 REGEX_ASSERT(&m == &m.reset());

2320 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);	2328 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);

2321	2329

2322 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));	2330 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));

2323 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	2331 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

2324 REGEX_ASSERT(&m == &m.reset());	2332 REGEX_ASSERT(&m == &m.reset());

2325 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);	2333 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);

2326	2334

2327 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	2335 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

2328 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));	2336 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));

2329 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);	2337 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

2330 REGEX_ASSERT(&m == &m.reset());	2338 REGEX_ASSERT(&m == &m.reset());

2331 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);	2339 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);

2332	2340

2333 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));	2341 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));

2334 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	2342 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

2335 REGEX_ASSERT(&m == &m.reset());	2343 REGEX_ASSERT(&m == &m.reset());

2336 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);	2344 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);

2337	2345

2338 utext_close(&testText);	2346 utext_close(&testText);

2339 utext_close(&testPattern);	2347 utext_close(&testPattern);

2340 }	2348 }

2341	2349

2342 //	2350 //

2343 // hitEnd() and requireEnd()	2351 // hitEnd() and requireEnd()

2344 //	2352 //

2345 {	2353 {

2346 UErrorCode status = U_ZERO_ERROR;	2354 UErrorCode status = U_ZERO_ERROR;

2347 UText testPattern = UTEXT_INITIALIZER;	2355 UText testPattern = UTEXT_INITIALIZER;

2348 UText testText = UTEXT_INITIALIZER;	2356 UText testText = UTEXT_INITIALIZER;

2349 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */	2357 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */

2350 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */	2358 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */

2351 utext_openUTF8(&testPattern, str_, -1, &status);	2359 utext_openUTF8(&testPattern, str_, -1, &status);

2352 utext_openUTF8(&testText, str_aabb, -1, &status);	2360 utext_openUTF8(&testText, str_aabb, -1, &status);

2353	2361

2354 RegexMatcher m1(&testPattern, &testText, 0, status);	2362 RegexMatcher m1(&testPattern, &testText, 0, status);

2355 REGEX_ASSERT(m1.lookingAt(status) == TRUE);	2363 REGEX_ASSERT(m1.lookingAt(status) == TRUE);

2356 REGEX_ASSERT(m1.hitEnd() == TRUE);	2364 REGEX_ASSERT(m1.hitEnd() == TRUE);

2357 REGEX_ASSERT(m1.requireEnd() == FALSE);	2365 REGEX_ASSERT(m1.requireEnd() == FALSE);

2358 REGEX_CHECK_STATUS;	2366 REGEX_CHECK_STATUS;

2359	2367

2360 status = U_ZERO_ERROR;	2368 status = U_ZERO_ERROR;

2361 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */	2369 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */

2362 utext_openUTF8(&testPattern, str_a, -1, &status);	2370 utext_openUTF8(&testPattern, str_a, -1, &status);

2363 RegexMatcher m2(&testPattern, &testText, 0, status);	2371 RegexMatcher m2(&testPattern, &testText, 0, status);

2364 REGEX_ASSERT(m2.lookingAt(status) == TRUE);	2372 REGEX_ASSERT(m2.lookingAt(status) == TRUE);

2365 REGEX_ASSERT(m2.hitEnd() == FALSE);	2373 REGEX_ASSERT(m2.hitEnd() == FALSE);

2366 REGEX_ASSERT(m2.requireEnd() == FALSE);	2374 REGEX_ASSERT(m2.requireEnd() == FALSE);

2367 REGEX_CHECK_STATUS;	2375 REGEX_CHECK_STATUS;

2368	2376

2369 status = U_ZERO_ERROR;	2377 status = U_ZERO_ERROR;

2370 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .$ /	2378 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .$ /

2371 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);	2379 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);

2372 RegexMatcher m3(&testPattern, &testText, 0, status);	2380 RegexMatcher m3(&testPattern, &testText, 0, status);

2373 REGEX_ASSERT(m3.lookingAt(status) == TRUE);	2381 REGEX_ASSERT(m3.lookingAt(status) == TRUE);

2374 REGEX_ASSERT(m3.hitEnd() == TRUE);	2382 REGEX_ASSERT(m3.hitEnd() == TRUE);

2375 REGEX_ASSERT(m3.requireEnd() == TRUE);	2383 REGEX_ASSERT(m3.requireEnd() == TRUE);

2376 REGEX_CHECK_STATUS;	2384 REGEX_CHECK_STATUS;

2377	2385

2378 utext_close(&testText);	2386 utext_close(&testText);

2379 utext_close(&testPattern);	2387 utext_close(&testPattern);

2380 }	2388 }

2381 }	2389 }

2382	2390

2383	2391

2384 //---------------------------------------------------------------------------	2392 //---------------------------------------------------------------------------

2385 //	2393 //

2386 // API_Replace_UTF8 API test for class RegexMatcher, testing the	2394 // API_Replace_UTF8 API test for class RegexMatcher, testing the

2387 // Replace family of functions.	2395 // Replace family of functions.

2388 //	2396 //

2389 //---------------------------------------------------------------------------	2397 //---------------------------------------------------------------------------

2390 void RegexTest::API_Replace_UTF8() {	2398 void RegexTest::API_Replace_UTF8() {

2391 //	2399 //

2392 // Replace	2400 // Replace

2393 //	2401 //

2394 int32_t flags=0;	2402 int32_t flags=0;

2395 UParseError pe;	2403 UParseError pe;

2396 UErrorCode status=U_ZERO_ERROR;	2404 UErrorCode status=U_ZERO_ERROR;

2397	2405

2398 UText re=UTEXT_INITIALIZER;	2406 UText re=UTEXT_INITIALIZER;

2399 regextst_openUTF8FromInvariant(&re, "abc", -1, &status);	2407 regextst_openUTF8FromInvariant(&re, "abc", -1, &status);

2400 REGEX_VERBOSE_TEXT(&re);	2408 REGEX_VERBOSE_TEXT(&re);

2401 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);	2409 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);

2402 REGEX_CHECK_STATUS;	2410 REGEX_CHECK_STATUS;

2403	2411

2404 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */	2412 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */

2405 // 012345678901234567	2413 // 012345678901234567

2406 UText dataText = UTEXT_INITIALIZER;	2414 UText dataText = UTEXT_INITIALIZER;

2407 utext_openUTF8(&dataText, data, -1, &status);	2415 utext_openUTF8(&dataText, data, -1, &status);

2408 REGEX_CHECK_STATUS;	2416 REGEX_CHECK_STATUS;

2409 REGEX_VERBOSE_TEXT(&dataText);	2417 REGEX_VERBOSE_TEXT(&dataText);

2410 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);	2418 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);

2411	2419

2412 //	2420 //

2413 // Plain vanilla matches.	2421 // Plain vanilla matches.

2414 //	2422 //

2415 UnicodeString dest;	2423 UnicodeString dest;

2416 UText destText = UTEXT_INITIALIZER;	2424 UText destText = UTEXT_INITIALIZER;

2417 utext_openUnicodeString(&destText, &dest, &status);	2425 utext_openUnicodeString(&destText, &dest, &status);

2418 UText *result;	2426 UText *result;

2419	2427

2420 UText replText = UTEXT_INITIALIZER;	2428 UText replText = UTEXT_INITIALIZER;

2421	2429

2422 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */	2430 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */

2423 utext_openUTF8(&replText, str_yz, -1, &status);	2431 utext_openUTF8(&replText, str_yz, -1, &status);

2424 REGEX_VERBOSE_TEXT(&replText);	2432 REGEX_VERBOSE_TEXT(&replText);

2425 result = matcher->replaceFirst(&replText, NULL, status);	2433 result = matcher->replaceFirst(&replText, NULL, status);

2426 REGEX_CHECK_STATUS;	2434 REGEX_CHECK_STATUS;

2427 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63 , 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */	2435 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63 , 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */

2428 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);	2436 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);

2429 utext_close(result);	2437 utext_close(result);

2430 result = matcher->replaceFirst(&replText, &destText, status);	2438 result = matcher->replaceFirst(&replText, &destText, status);

2431 REGEX_CHECK_STATUS;	2439 REGEX_CHECK_STATUS;

(...skipping 11 matching lines...) Expand all Loading...
2443 REGEX_CHECK_STATUS;	2451 REGEX_CHECK_STATUS;

2444 REGEX_ASSERT(result == &destText);	2452 REGEX_ASSERT(result == &destText);

2445 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);	2453 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);

2446	2454

2447 //	2455 //

2448 // Plain vanilla non-matches.	2456 // Plain vanilla non-matches.

2449 //	2457 //

2450 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6 2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx... abx.. */	2458 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6 2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx... abx.. */

2451 utext_openUTF8(&dataText, str_abxabxabx, -1, &status);	2459 utext_openUTF8(&dataText, str_abxabxabx, -1, &status);

2452 matcher->reset(&dataText);	2460 matcher->reset(&dataText);

2453	2461

2454 result = matcher->replaceFirst(&replText, NULL, status);	2462 result = matcher->replaceFirst(&replText, NULL, status);

2455 REGEX_CHECK_STATUS;	2463 REGEX_CHECK_STATUS;

2456 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);	2464 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);

2457 utext_close(result);	2465 utext_close(result);

2458 result = matcher->replaceFirst(&replText, &destText, status);	2466 result = matcher->replaceFirst(&replText, &destText, status);

2459 REGEX_CHECK_STATUS;	2467 REGEX_CHECK_STATUS;

2460 REGEX_ASSERT(result == &destText);	2468 REGEX_ASSERT(result == &destText);

2461 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);	2469 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);

2462	2470

2463 result = matcher->replaceAll(&replText, NULL, status);	2471 result = matcher->replaceAll(&replText, NULL, status);

2464 REGEX_CHECK_STATUS;	2472 REGEX_CHECK_STATUS;

2465 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);	2473 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);

2466 utext_close(result);	2474 utext_close(result);

2467 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2475 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2468 result = matcher->replaceAll(&replText, &destText, status);	2476 result = matcher->replaceAll(&replText, &destText, status);

2469 REGEX_CHECK_STATUS;	2477 REGEX_CHECK_STATUS;

2470 REGEX_ASSERT(result == &destText);	2478 REGEX_ASSERT(result == &destText);

2471 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);	2479 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);

2472	2480

2473 //	2481 //

2474 // Empty source string	2482 // Empty source string

2475 //	2483 //

2476 utext_openUTF8(&dataText, NULL, 0, &status);	2484 utext_openUTF8(&dataText, NULL, 0, &status);

2477 matcher->reset(&dataText);	2485 matcher->reset(&dataText);

2478	2486

2479 result = matcher->replaceFirst(&replText, NULL, status);	2487 result = matcher->replaceFirst(&replText, NULL, status);

2480 REGEX_CHECK_STATUS;	2488 REGEX_CHECK_STATUS;

2481 REGEX_ASSERT_UTEXT_UTF8("", result);	2489 REGEX_ASSERT_UTEXT_UTF8("", result);

2482 utext_close(result);	2490 utext_close(result);

2483 result = matcher->replaceFirst(&replText, &destText, status);	2491 result = matcher->replaceFirst(&replText, &destText, status);

2484 REGEX_CHECK_STATUS;	2492 REGEX_CHECK_STATUS;

2485 REGEX_ASSERT(result == &destText);	2493 REGEX_ASSERT(result == &destText);

2486 REGEX_ASSERT_UTEXT_UTF8("", result);	2494 REGEX_ASSERT_UTEXT_UTF8("", result);

2487	2495

2488 result = matcher->replaceAll(&replText, NULL, status);	2496 result = matcher->replaceAll(&replText, NULL, status);

2489 REGEX_CHECK_STATUS;	2497 REGEX_CHECK_STATUS;

2490 REGEX_ASSERT_UTEXT_UTF8("", result);	2498 REGEX_ASSERT_UTEXT_UTF8("", result);

2491 utext_close(result);	2499 utext_close(result);

2492 result = matcher->replaceAll(&replText, &destText, status);	2500 result = matcher->replaceAll(&replText, &destText, status);

2493 REGEX_CHECK_STATUS;	2501 REGEX_CHECK_STATUS;

2494 REGEX_ASSERT(result == &destText);	2502 REGEX_ASSERT(result == &destText);

2495 REGEX_ASSERT_UTEXT_UTF8("", result);	2503 REGEX_ASSERT_UTEXT_UTF8("", result);

2496	2504

2497 //	2505 //

2498 // Empty substitution string	2506 // Empty substitution string

2499 //	2507 //

2500 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."	2508 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."

2501 matcher->reset(&dataText);	2509 matcher->reset(&dataText);

2502	2510

2503 utext_openUTF8(&replText, NULL, 0, &status);	2511 utext_openUTF8(&replText, NULL, 0, &status);

2504 result = matcher->replaceFirst(&replText, NULL, status);	2512 result = matcher->replaceFirst(&replText, NULL, status);

2505 REGEX_CHECK_STATUS;	2513 REGEX_CHECK_STATUS;

2506 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */	2514 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */

2507 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);	2515 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);

2508 utext_close(result);	2516 utext_close(result);

2509 result = matcher->replaceFirst(&replText, &destText, status);	2517 result = matcher->replaceFirst(&replText, &destText, status);

2510 REGEX_CHECK_STATUS;	2518 REGEX_CHECK_STATUS;

2511 REGEX_ASSERT(result == &destText);	2519 REGEX_ASSERT(result == &destText);

2512 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);	2520 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2556 //	2564 //

2557 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */	2565 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */

2558 utext_openUTF8(&re, str_add, -1, &status);	2566 utext_openUTF8(&re, str_add, -1, &status);

2559 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);	2567 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);

2560 REGEX_CHECK_STATUS;	2568 REGEX_CHECK_STATUS;

2561	2569

2562 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */	2570 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */

2563 utext_openUTF8(&dataText, str_abcdefg, -1, &status);	2571 utext_openUTF8(&dataText, str_abcdefg, -1, &status);

2564 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);	2572 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);

2565 REGEX_CHECK_STATUS;	2573 REGEX_CHECK_STATUS;

2566	2574

2567 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */	2575 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */

2568 utext_openUTF8(&replText, str_11, -1, &status);	2576 utext_openUTF8(&replText, str_11, -1, &status);

2569 result = matcher2->replaceFirst(&replText, NULL, status);	2577 result = matcher2->replaceFirst(&replText, NULL, status);

2570 REGEX_CHECK_STATUS;	2578 REGEX_CHECK_STATUS;

2571 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 , 0x00 }; /* bcbcdefg */	2579 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 , 0x00 }; /* bcbcdefg */

2572 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);	2580 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);

2573 utext_close(result);	2581 utext_close(result);

2574 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2582 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2575 result = matcher2->replaceFirst(&replText, &destText, status);	2583 result = matcher2->replaceFirst(&replText, &destText, status);

2576 REGEX_CHECK_STATUS;	2584 REGEX_CHECK_STATUS;

2577 REGEX_ASSERT(result == &destText);	2585 REGEX_ASSERT(result == &destText);

2578 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);	2586 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);

2579	2587

2580 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6 5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */	2588 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6 5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */

2581 utext_openUTF8(&replText, str_v, -1, &status);	2589 utext_openUTF8(&replText, str_v, -1, &status);

2582 REGEX_VERBOSE_TEXT(&replText);	2590 REGEX_VERBOSE_TEXT(&replText);

2583 result = matcher2->replaceFirst(&replText, NULL, status);	2591 result = matcher2->replaceFirst(&replText, NULL, status);

2584 REGEX_CHECK_STATUS;	2592 REGEX_CHECK_STATUS;

2585 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * /	2593 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * /

2586 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);	2594 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);

2587 utext_close(result);	2595 utext_close(result);

2588 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2596 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2589 result = matcher2->replaceFirst(&replText, &destText, status);	2597 result = matcher2->replaceFirst(&replText, &destText, status);

2590 REGEX_CHECK_STATUS;	2598 REGEX_CHECK_STATUS;

2591 REGEX_ASSERT(result == &destText);	2599 REGEX_ASSERT(result == &destText);

2592 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);	2600 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);

2593	2601

2594 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6 9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0 x00 }; /* $ by itself, no group number $$$ */	2602 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6 9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0 x00 }; /* $ by itself, no group number $$$ */

2595 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);	2603 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);

2596 result = matcher2->replaceFirst(&replText, NULL, status);	2604 result = matcher2->replaceFirst(&replText, NULL, status);

2597 REGEX_CHECK_STATUS;	2605 REGEX_CHECK_STATUS;

2598 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */	2606 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */

2599 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);	2607 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);

2600 utext_close(result);	2608 utext_close(result);

2601 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2609 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2602 result = matcher2->replaceFirst(&replText, &destText, status);	2610 result = matcher2->replaceFirst(&replText, &destText, status);

2603 REGEX_CHECK_STATUS;	2611 REGEX_CHECK_STATUS;

2604 REGEX_ASSERT(result == &destText);	2612 REGEX_ASSERT(result == &destText);

2605 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);	2613 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);

2606	2614

2607 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d , 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */	2615 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d , 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */

2608 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001 D7CF, MATHEMATICAL BOLD DIGIT ONE	2616 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001 D7CF, MATHEMATICAL BOLD DIGIT ONE

2609 // 012345678901234567890123456	2617 // 012345678901234567890123456

2610 supplDigitChars[22] = 0xF0;	2618 supplDigitChars[22] = 0xF0;

2611 supplDigitChars[23] = 0x9D;	2619 supplDigitChars[23] = 0x9D;

2612 supplDigitChars[24] = 0x9F;	2620 supplDigitChars[24] = 0x9F;

2613 supplDigitChars[25] = 0x8F;	2621 supplDigitChars[25] = 0x8F;

2614 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);	2622 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);

2615	2623

2616 result = matcher2->replaceFirst(&replText, NULL, status);	2624 result = matcher2->replaceFirst(&replText, NULL, status);

2617 REGEX_CHECK_STATUS;	2625 REGEX_CHECK_STATUS;

2618 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x 20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa l Digit 1 bc.defg */	2626 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x 20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa l Digit 1 bc.defg */

2619 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);	2627 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);

2620 utext_close(result);	2628 utext_close(result);

2621 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2629 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2622 result = matcher2->replaceFirst(&replText, &destText, status);	2630 result = matcher2->replaceFirst(&replText, &destText, status);

2623 REGEX_CHECK_STATUS;	2631 REGEX_CHECK_STATUS;

2624 REGEX_ASSERT(result == &destText);	2632 REGEX_ASSERT(result == &destText);

2625 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);	2633 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);

2626 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x 61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e , 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */	2634 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x 61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e , 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */

2627 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);	2635 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);

2628 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)) , U_INDEX_OUTOFBOUNDS_ERROR);	2636 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)) , U_INDEX_OUTOFBOUNDS_ERROR);

2629 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);	2637 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);

2630 utext_close(result);	2638 utext_close(result);

2631 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;	2639 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;

2632 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta tus)), U_INDEX_OUTOFBOUNDS_ERROR);	2640 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta tus)), U_INDEX_OUTOFBOUNDS_ERROR);

2633 REGEX_ASSERT(result == &destText);	2641 REGEX_ASSERT(result == &destText);

2634 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);	2642 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);

2635	2643

2636 //	2644 //

2637 // Replacement String with \u hex escapes	2645 // Replacement String with \u hex escapes

2638 //	2646 //

2639 {	2647 {

2640 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61 , 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a bc 2 abc 3 */	2648 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61 , 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a bc 2 abc 3 */

2641 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */	2649 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */

2642 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);	2650 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);

2643 utext_openUTF8(&replText, str_u0043, -1, &status);	2651 utext_openUTF8(&replText, str_u0043, -1, &status);

2644 matcher->reset(&dataText);	2652 matcher->reset(&dataText);

2645	2653

2646 result = matcher->replaceAll(&replText, NULL, status);	2654 result = matcher->replaceAll(&replText, NULL, status);

2647 REGEX_CHECK_STATUS;	2655 REGEX_CHECK_STATUS;

2648 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x 20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d , 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */	2656 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x 20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d , 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */

2649 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);	2657 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);

2650 utext_close(result);	2658 utext_close(result);

2651 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);	2659 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);

2652 result = matcher->replaceAll(&replText, &destText, status);	2660 result = matcher->replaceAll(&replText, &destText, status);

2653 REGEX_CHECK_STATUS;	2661 REGEX_CHECK_STATUS;

2654 REGEX_ASSERT(result == &destText);	2662 REGEX_ASSERT(result == &destText);

2655 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);	2663 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);

2656 }	2664 }

2657 {	2665 {

2658 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */	2666 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */

2659 utext_openUTF8(&dataText, str_abc, -1, &status);	2667 utext_openUTF8(&dataText, str_abc, -1, &status);

2660 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */	2668 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */

2661 utext_openUTF8(&replText, str_U00010000, -1, &status);	2669 utext_openUTF8(&replText, str_U00010000, -1, &status);

2662 matcher->reset(&dataText);	2670 matcher->reset(&dataText);

2663	2671

2664 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0 x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"	2672 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0 x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"

2665 // 0123456789	2673 // 0123456789

2666 expected[2] = 0xF0;	2674 expected[2] = 0xF0;

2667 expected[3] = 0x90;	2675 expected[3] = 0x90;

2668 expected[4] = 0x80;	2676 expected[4] = 0x80;

2669 expected[5] = 0x80;	2677 expected[5] = 0x80;

2670	2678

2671 result = matcher->replaceAll(&replText, NULL, status);	2679 result = matcher->replaceAll(&replText, NULL, status);

2672 REGEX_CHECK_STATUS;	2680 REGEX_CHECK_STATUS;

2673 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);	2681 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);

2674 utext_close(result);	2682 utext_close(result);

2675 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);	2683 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);

2676 result = matcher->replaceAll(&replText, &destText, status);	2684 result = matcher->replaceAll(&replText, &destText, status);

2677 REGEX_CHECK_STATUS;	2685 REGEX_CHECK_STATUS;

2678 REGEX_ASSERT(result == &destText);	2686 REGEX_ASSERT(result == &destText);

2679 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);	2687 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);

2680 }	2688 }

2681 // TODO: need more through testing of capture substitutions.	2689 // TODO: need more through testing of capture substitutions.

2682	2690

2683 // Bug 4057	2691 // Bug 4057

2684 //	2692 //

2685 {	2693 {

2686 status = U_ZERO_ERROR;	2694 status = U_ZERO_ERROR;

2687 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.?)ee /	2695 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.?)ee /

2688 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x 20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69 , 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit h ss and end with ee ss stuff ee fin */	2696 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x 20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69 , 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit h ss and end with ee ss stuff ee fin */

2689 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */	2697 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */

2690 utext_openUTF8(&re, str_ssee, -1, &status);	2698 utext_openUTF8(&re, str_ssee, -1, &status);

2691 utext_openUTF8(&dataText, str_blah, -1, &status);	2699 utext_openUTF8(&dataText, str_blah, -1, &status);

2692 utext_openUTF8(&replText, str_ooh, -1, &status);	2700 utext_openUTF8(&replText, str_ooh, -1, &status);

2693	2701

2694 RegexMatcher m(&re, 0, status);	2702 RegexMatcher m(&re, 0, status);

2695 REGEX_CHECK_STATUS;	2703 REGEX_CHECK_STATUS;

2696	2704

2697 UnicodeString result;	2705 UnicodeString result;

2698 UText resultText = UTEXT_INITIALIZER;	2706 UText resultText = UTEXT_INITIALIZER;

2699 utext_openUnicodeString(&resultText, &result, &status);	2707 utext_openUnicodeString(&resultText, &result, &status);

2700	2708

2701 // Multiple finds do NOT bump up the previous appendReplacement postion.	2709 // Multiple finds do NOT bump up the previous appendReplacement postion.

2702 m.reset(&dataText);	2710 m.reset(&dataText);

2703 m.find();	2711 m.find();

2704 m.find();	2712 m.find();

2705 m.appendReplacement(&resultText, &replText, status);	2713 m.appendReplacement(&resultText, &replText, status);

2706 REGEX_CHECK_STATUS;	2714 REGEX_CHECK_STATUS;

(...skipping 20 matching lines...) Expand all Loading...
2727 m.find(10, status);	2735 m.find(10, status);

2728 m.find();	2736 m.find();

2729 m.appendReplacement(&resultText, &replText, status);	2737 m.appendReplacement(&resultText, &replText, status);

2730 REGEX_CHECK_STATUS;	2738 REGEX_CHECK_STATUS;

2731 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */	2739 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */

2732 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);	2740 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);

2733	2741

2734 m.appendTail(&resultText, status);	2742 m.appendTail(&resultText, status);

2735 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6 9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */	2743 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6 9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */

2736 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);	2744 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);

2737	2745

2738 utext_close(&resultText);	2746 utext_close(&resultText);

2739 }	2747 }

2740	2748

2741 delete matcher2;	2749 delete matcher2;

2742 delete pat2;	2750 delete pat2;

2743 delete matcher;	2751 delete matcher;

2744 delete pat;	2752 delete pat;

2745	2753

2746 utext_close(&dataText);	2754 utext_close(&dataText);

2747 utext_close(&replText);	2755 utext_close(&replText);

2748 utext_close(&destText);	2756 utext_close(&destText);

2749 utext_close(&re);	2757 utext_close(&re);

2750 }	2758 }

2751	2759

2752	2760

2753 //---------------------------------------------------------------------------	2761 //---------------------------------------------------------------------------

2754 //	2762 //

2755 // API_Pattern_UTF8 Test that the API for class RegexPattern is	2763 // API_Pattern_UTF8 Test that the API for class RegexPattern is

2756 // present and nominally working.	2764 // present and nominally working.

2757 //	2765 //

2758 //---------------------------------------------------------------------------	2766 //---------------------------------------------------------------------------

2759 void RegexTest::API_Pattern_UTF8() {	2767 void RegexTest::API_Pattern_UTF8() {

2760 RegexPattern pata; // Test default constructor to not crash.	2768 RegexPattern pata; // Test default constructor to not crash.

2761 RegexPattern patb;	2769 RegexPattern patb;

2762	2770

2763 REGEX_ASSERT(pata == patb);	2771 REGEX_ASSERT(pata == patb);

2764 REGEX_ASSERT(pata == pata);	2772 REGEX_ASSERT(pata == pata);

2765	2773

2766 UText re1 = UTEXT_INITIALIZER;	2774 UText re1 = UTEXT_INITIALIZER;

2767 UText re2 = UTEXT_INITIALIZER;	2775 UText re2 = UTEXT_INITIALIZER;

2768 UErrorCode status = U_ZERO_ERROR;	2776 UErrorCode status = U_ZERO_ERROR;

2769 UParseError pe;	2777 UParseError pe;

2770	2778

2771 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */	2779 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */

2772 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */	2780 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */

2773 utext_openUTF8(&re1, str_abcalmz, -1, &status);	2781 utext_openUTF8(&re1, str_abcalmz, -1, &status);

2774 utext_openUTF8(&re2, str_def, -1, &status);	2782 utext_openUTF8(&re2, str_def, -1, &status);

2775	2783

2776 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status);	2784 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status);

2777 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status);	2785 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status);

2778 REGEX_CHECK_STATUS;	2786 REGEX_CHECK_STATUS;

2779 REGEX_ASSERT(pat1 == pat1);	2787 REGEX_ASSERT(pat1 == pat1);

2780 REGEX_ASSERT(*pat1 != pata);	2788 REGEX_ASSERT(*pat1 != pata);

(...skipping 28 matching lines...) Expand all Loading...
2809	2817

2810 // clone	2818 // clone

2811 RegexPattern *pat1c = pat1->clone();	2819 RegexPattern *pat1c = pat1->clone();

2812 REGEX_ASSERT(pat1c == pat1);	2820 REGEX_ASSERT(pat1c == pat1);

2813 REGEX_ASSERT(pat1c != pat2);	2821 REGEX_ASSERT(pat1c != pat2);

2814	2822

2815 delete pat1c;	2823 delete pat1c;

2816 delete pat1a;	2824 delete pat1a;

2817 delete pat1;	2825 delete pat1;

2818 delete pat2;	2826 delete pat2;

2819	2827

2820 utext_close(&re1);	2828 utext_close(&re1);

2821 utext_close(&re2);	2829 utext_close(&re2);

2822	2830

2823	2831

2824 //	2832 //

2825 // Verify that a matcher created from a cloned pattern works.	2833 // Verify that a matcher created from a cloned pattern works.

2826 // (Jitterbug 3423)	2834 // (Jitterbug 3423)

2827 //	2835 //

2828 {	2836 {

2829 UErrorCode status = U_ZERO_ERROR;	2837 UErrorCode status = U_ZERO_ERROR;

2830 UText pattern = UTEXT_INITIALIZER;	2838 UText pattern = UTEXT_INITIALIZER;

2831 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \ p{L}+ */	2839 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \ p{L}+ */

2832 utext_openUTF8(&pattern, str_pL, -1, &status);	2840 utext_openUTF8(&pattern, str_pL, -1, &status);

2833	2841

2834 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status);	2842 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status);

2835 RegexPattern *pClone = pSource->clone();	2843 RegexPattern *pClone = pSource->clone();

2836 delete pSource;	2844 delete pSource;

2837 RegexMatcher *mFromClone = pClone->matcher(status);	2845 RegexMatcher *mFromClone = pClone->matcher(status);

2838 REGEX_CHECK_STATUS;	2846 REGEX_CHECK_STATUS;

2839	2847

2840 UText input = UTEXT_INITIALIZER;	2848 UText input = UTEXT_INITIALIZER;

2841 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57 , 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */	2849 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57 , 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */

2842 utext_openUTF8(&input, str_HelloWorld, -1, &status);	2850 utext_openUTF8(&input, str_HelloWorld, -1, &status);

2843 mFromClone->reset(&input);	2851 mFromClone->reset(&input);

2844 REGEX_ASSERT(mFromClone->find() == TRUE);	2852 REGEX_ASSERT(mFromClone->find() == TRUE);

2845 REGEX_ASSERT(mFromClone->group(status) == "Hello");	2853 REGEX_ASSERT(mFromClone->group(status) == "Hello");

2846 REGEX_ASSERT(mFromClone->find() == TRUE);	2854 REGEX_ASSERT(mFromClone->find() == TRUE);

2847 REGEX_ASSERT(mFromClone->group(status) == "World");	2855 REGEX_ASSERT(mFromClone->group(status) == "World");

2848 REGEX_ASSERT(mFromClone->find() == FALSE);	2856 REGEX_ASSERT(mFromClone->find() == FALSE);

2849 delete mFromClone;	2857 delete mFromClone;

2850 delete pClone;	2858 delete pClone;

2851	2859

2852 utext_close(&input);	2860 utext_close(&input);

2853 utext_close(&pattern);	2861 utext_close(&pattern);

2854 }	2862 }

2855	2863

2856 //	2864 //

2857 // matches convenience API	2865 // matches convenience API

2858 //	2866 //

2859 {	2867 {

2860 UErrorCode status = U_ZERO_ERROR;	2868 UErrorCode status = U_ZERO_ERROR;

2861 UText pattern = UTEXT_INITIALIZER;	2869 UText pattern = UTEXT_INITIALIZER;

2862 UText input = UTEXT_INITIALIZER;	2870 UText input = UTEXT_INITIALIZER;

2863	2871

2864 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2 0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */	2872 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2 0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */

2865 utext_openUTF8(&input, str_randominput, -1, &status);	2873 utext_openUTF8(&input, str_randominput, -1, &status);

2866	2874

2867 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */	2875 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */

2868 utext_openUTF8(&pattern, str_dotstar, -1, &status);	2876 utext_openUTF8(&pattern, str_dotstar, -1, &status);

2869 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE );	2877 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE );

2870 REGEX_CHECK_STATUS;	2878 REGEX_CHECK_STATUS;

2871	2879

2872 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */	2880 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */

2873 utext_openUTF8(&pattern, str_abc, -1, &status);	2881 utext_openUTF8(&pattern, str_abc, -1, &status);

2874 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);	2882 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);

2875 REGEX_CHECK_STATUS;	2883 REGEX_CHECK_STATUS;

2876	2884

2877 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .nput /	2885 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .nput /

2878 utext_openUTF8(&pattern, str_nput, -1, &status);	2886 utext_openUTF8(&pattern, str_nput, -1, &status);

2879 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);	2887 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);

2880 REGEX_CHECK_STATUS;	2888 REGEX_CHECK_STATUS;

2881	2889

2882 utext_openUTF8(&pattern, str_randominput, -1, &status);	2890 utext_openUTF8(&pattern, str_randominput, -1, &status);

2883 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s tatus) == TRUE);	2891 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s tatus) == TRUE);

2884 REGEX_CHECK_STATUS;	2892 REGEX_CHECK_STATUS;

2885	2893

2886 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .u /	2894 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .u /

2887 utext_openUTF8(&pattern, str_u, -1, &status);	2895 utext_openUTF8(&pattern, str_u, -1, &status);

2888 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);	2896 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);

2889 REGEX_CHECK_STATUS;	2897 REGEX_CHECK_STATUS;

2890	2898

2891 utext_openUTF8(&input, str_abc, -1, &status);	2899 utext_openUTF8(&input, str_abc, -1, &status);

2892 utext_openUTF8(&pattern, str_abc, -1, &status);	2900 utext_openUTF8(&pattern, str_abc, -1, &status);

2893 status = U_INDEX_OUTOFBOUNDS_ERROR;	2901 status = U_INDEX_OUTOFBOUNDS_ERROR;

2894 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);	2902 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);

2895 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);	2903 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

2896	2904

2897 utext_close(&input);	2905 utext_close(&input);

2898 utext_close(&pattern);	2906 utext_close(&pattern);

2899 }	2907 }

2900	2908

2901	2909

2902 //	2910 //

2903 // Split()	2911 // Split()

2904 //	2912 //

2905 status = U_ZERO_ERROR;	2913 status = U_ZERO_ERROR;

2906 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */	2914 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */

(...skipping 370 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3277 }	3285 }

3278	3286

3279	3287

3280 void RegexTest::regex_find(const UnicodeString &pattern,	3288 void RegexTest::regex_find(const UnicodeString &pattern,

3281 const UnicodeString &flags,	3289 const UnicodeString &flags,

3282 const UnicodeString &inputString,	3290 const UnicodeString &inputString,

3283 const char *srcPath,	3291 const char *srcPath,

3284 int32_t line) {	3292 int32_t line) {

3285 UnicodeString unEscapedInput;	3293 UnicodeString unEscapedInput;

3286 UnicodeString deTaggedInput;	3294 UnicodeString deTaggedInput;

3287	3295

3288 int32_t patternUTF8Length, inputUTF8Length;	3296 int32_t patternUTF8Length, inputUTF8Length;

3289 char patternChars = NULL, inputChars = NULL;	3297 char patternChars = NULL, inputChars = NULL;

3290 UText patternText = UTEXT_INITIALIZER;	3298 UText patternText = UTEXT_INITIALIZER;

3291 UText inputText = UTEXT_INITIALIZER;	3299 UText inputText = UTEXT_INITIALIZER;

3292 UConverter *UTF8Converter = NULL;	3300 UConverter *UTF8Converter = NULL;

3293	3301

3294 UErrorCode status = U_ZERO_ERROR;	3302 UErrorCode status = U_ZERO_ERROR;

3295 UParseError pe;	3303 UParseError pe;

3296 RegexPattern *parsePat = NULL;	3304 RegexPattern *parsePat = NULL;

3297 RegexMatcher *parseMatcher = NULL;	3305 RegexMatcher *parseMatcher = NULL;

3298 RegexPattern callerPattern = NULL, UTF8Pattern = NULL;	3306 RegexPattern callerPattern = NULL, UTF8Pattern = NULL;

3299 RegexMatcher matcher = NULL, UTF8Matcher = NULL;	3307 RegexMatcher matcher = NULL, UTF8Matcher = NULL;

3300 UVector groupStarts(status);	3308 UVector groupStarts(status);

3301 UVector groupEnds(status);	3309 UVector groupEnds(status);

3302 UVector groupStartsUTF8(status);	3310 UVector groupStartsUTF8(status);

3303 UVector groupEndsUTF8(status);	3311 UVector groupEndsUTF8(status);

3304 UBool isMatch = FALSE, isUTF8Match = FALSE;	3312 UBool isMatch = FALSE, isUTF8Match = FALSE;

3305 UBool failed = FALSE;	3313 UBool failed = FALSE;

3306 int32_t numFinds;	3314 int32_t numFinds;

3307 int32_t i;	3315 int32_t i;

3308 UBool useMatchesFunc = FALSE;	3316 UBool useMatchesFunc = FALSE;

3309 UBool useLookingAtFunc = FALSE;	3317 UBool useLookingAtFunc = FALSE;

3310 int32_t regionStart = -1;	3318 int32_t regionStart = -1;

3311 int32_t regionEnd = -1;	3319 int32_t regionEnd = -1;

3312 int32_t regionStartUTF8 = -1;	3320 int32_t regionStartUTF8 = -1;

3313 int32_t regionEndUTF8 = -1;	3321 int32_t regionEndUTF8 = -1;

3314	3322

3315	3323

3316 //	3324 //

3317 // Compile the caller's pattern	3325 // Compile the caller's pattern

3318 //	3326 //

3319 uint32_t bflags = 0;	3327 uint32_t bflags = 0;

3320 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag	3328 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag

3321 bflags \|= UREGEX_CASE_INSENSITIVE;	3329 bflags \|= UREGEX_CASE_INSENSITIVE;

3322 }	3330 }

3323 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag	3331 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag

3324 bflags \|= UREGEX_COMMENTS;	3332 bflags \|= UREGEX_COMMENTS;

3325 }	3333 }

3326 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag	3334 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag

3327 bflags \|= UREGEX_DOTALL;	3335 bflags \|= UREGEX_DOTALL;

3328 }	3336 }

3329 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag	3337 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag

3330 bflags \|= UREGEX_MULTILINE;	3338 bflags \|= UREGEX_MULTILINE;

3331 }	3339 }

3332	3340

3333 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag	3341 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag

3334 bflags \|= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;	3342 bflags \|= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;

3335 }	3343 }

3336 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag	3344 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag

3337 bflags \|= UREGEX_UNIX_LINES;	3345 bflags \|= UREGEX_UNIX_LINES;

3338 }	3346 }

3339 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag	3347 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag

3340 bflags \|= UREGEX_LITERAL;	3348 bflags \|= UREGEX_LITERAL;

3341 }	3349 }

3342	3350

(...skipping 15 matching lines...) Expand all Loading...
3358 goto cleanupAndReturn;	3366 goto cleanupAndReturn;

3359 } else {	3367 } else {

3360 // Unexpected pattern compilation error.	3368 // Unexpected pattern compilation error.

3361 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName( status));	3369 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName( status));

3362 goto cleanupAndReturn;	3370 goto cleanupAndReturn;

3363 }	3371 }

3364 }	3372 }

3365	3373

3366 UTF8Converter = ucnv_open("UTF8", &status);	3374 UTF8Converter = ucnv_open("UTF8", &status);

3367 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);	3375 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);

3368	3376

3369 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);	3377 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);

3370 status = U_ZERO_ERROR; // buffer overflow	3378 status = U_ZERO_ERROR; // buffer overflow

3371 patternChars = new char[patternUTF8Length+1];	3379 patternChars = new char[patternUTF8Length+1];

3372 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);	3380 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);

3373 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);	3381 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);

3374	3382

3375 if (status == U_ZERO_ERROR) {	3383 if (status == U_ZERO_ERROR) {

3376 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);	3384 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);

3377	3385

3378 if (status != U_ZERO_ERROR) {	3386 if (status != U_ZERO_ERROR) {

3379 #if UCONFIG_NO_BREAK_ITERATION==1	3387 #if UCONFIG_NO_BREAK_ITERATION==1

3380 // 'v' test flag means that the test pattern should not compile if I CU was configured	3388 // 'v' test flag means that the test pattern should not compile if I CU was configured

3381 // to not include break iteration. RBBI is needed for Unicode w ord boundaries.	3389 // to not include break iteration. RBBI is needed for Unicode w ord boundaries.

3382 if (flags.indexOf((UChar)0x76) >= 0 /'v'/ && status == U_UNSUPPORT ED_ERROR) {	3390 if (flags.indexOf((UChar)0x76) >= 0 /'v'/ && status == U_UNSUPPORT ED_ERROR) {

3383 goto cleanupAndReturn;	3391 goto cleanupAndReturn;

3384 }	3392 }

3385 #endif	3393 #endif

3386 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E'	3394 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E'

3387 // Expected pattern compilation error.	3395 // Expected pattern compilation error.

3388 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd'	3396 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd'

3389 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s tatus));	3397 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s tatus));

3390 }	3398 }

3391 goto cleanupAndReturn;	3399 goto cleanupAndReturn;

3392 } else {	3400 } else {

3393 // Unexpected pattern compilation error.	3401 // Unexpected pattern compilation error.

3394 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err orName(status));	3402 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err orName(status));

3395 goto cleanupAndReturn;	3403 goto cleanupAndReturn;

3396 }	3404 }

3397 }	3405 }

3398 }	3406 }

3399	3407

3400 if (UTF8Pattern == NULL) {	3408 if (UTF8Pattern == NULL) {

3401 // UTF-8 does not allow unpaired surrogates, so this could actually happ en without being a failure of the engine	3409 // UTF-8 does not allow unpaired surrogates, so this could actually happ en without being a failure of the engine

3402 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);	3410 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);

3403 status = U_ZERO_ERROR;	3411 status = U_ZERO_ERROR;

3404 }	3412 }

3405	3413

3406 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag	3414 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag

3407 RegexPatternDump(callerPattern);	3415 callerPattern->dumpPattern();

3408 }	3416 }

3409	3417

3410 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag	3418 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag

3411 errln("%s, Line %d: Expected, but did not get, a pattern compilation err or.", srcPath, line);	3419 errln("%s, Line %d: Expected, but did not get, a pattern compilation err or.", srcPath, line);

3412 goto cleanupAndReturn;	3420 goto cleanupAndReturn;

3413 }	3421 }

3414	3422

3415	3423

3416 //	3424 //

3417 // Number of times find() should be called on the test string, default to 1	3425 // Number of times find() should be called on the test string, default to 1

3418 //	3426 //

3419 numFinds = 1;	3427 numFinds = 1;

3420 for (i=2; i<=9; i++) {	3428 for (i=2; i<=9; i++) {

3421 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag	3429 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag

3422 if (numFinds != 1) {	3430 if (numFinds != 1) {

3423 errln("Line %d: more than one digit flag. Scanning %d.", line, i);	3431 errln("Line %d: more than one digit flag. Scanning %d.", line, i);

3424 goto cleanupAndReturn;	3432 goto cleanupAndReturn;

3425 }	3433 }

3426 numFinds = i;	3434 numFinds = i;

3427 }	3435 }

3428 }	3436 }

3429	3437

3430 // 'M' flag. Use matches() instead of find()	3438 // 'M' flag. Use matches() instead of find()

3431 if (flags.indexOf((UChar)0x4d) >= 0) {	3439 if (flags.indexOf((UChar)0x4d) >= 0) {

3432 useMatchesFunc = TRUE;	3440 useMatchesFunc = TRUE;

3433 }	3441 }

3434 if (flags.indexOf((UChar)0x4c) >= 0) {	3442 if (flags.indexOf((UChar)0x4c) >= 0) {

3435 useLookingAtFunc = TRUE;	3443 useLookingAtFunc = TRUE;

3436 }	3444 }

3437	3445

3438 //	3446 //

3439 // Find the tags in the input data, remove them, and record the group bound ary	3447 // Find the tags in the input data, remove them, and record the group bound ary

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3474 }	3482 }

3475	3483

3476 //	3484 //

3477 // Configure the matcher according to the flags specified with this test.	3485 // Configure the matcher according to the flags specified with this test.

3478 //	3486 //

3479 matcher = callerPattern->matcher(deTaggedInput, status);	3487 matcher = callerPattern->matcher(deTaggedInput, status);

3480 REGEX_CHECK_STATUS_L(line);	3488 REGEX_CHECK_STATUS_L(line);

3481 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag	3489 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag

3482 matcher->setTrace(TRUE);	3490 matcher->setTrace(TRUE);

3483 }	3491 }

3484	3492

3485 if (UTF8Pattern != NULL) {	3493 if (UTF8Pattern != NULL) {

3486 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);	3494 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);

3487 status = U_ZERO_ERROR; // buffer overflow	3495 status = U_ZERO_ERROR; // buffer overflow

3488 inputChars = new char[inputUTF8Length+1];	3496 inputChars = new char[inputUTF8Length+1];

3489 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat us);	3497 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat us);

3490 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);	3498 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);

3491	3499

3492 if (status == U_ZERO_ERROR) {	3500 if (status == U_ZERO_ERROR) {

3493 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);	3501 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);

3494 REGEX_CHECK_STATUS_L(line);	3502 REGEX_CHECK_STATUS_L(line);

3495 }	3503 }

3496	3504

3497 if (UTF8Matcher == NULL) {	3505 if (UTF8Matcher == NULL) {

3498 // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine	3506 // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine

3499 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d" , srcPath, line);	3507 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d" , srcPath, line);

3500 status = U_ZERO_ERROR;	3508 status = U_ZERO_ERROR;

3501 }	3509 }

3502 }	3510 }

3503	3511

3504 //	3512 //

3505 // Generate native indices for UTF8 versions of region and capture group in fo	3513 // Generate native indices for UTF8 versions of region and capture group in fo

3506 //	3514 //

3507 if (UTF8Matcher != NULL) {	3515 if (UTF8Matcher != NULL) {

3508 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar t, regionStartUTF8);	3516 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar t, regionStartUTF8);

3509 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);	3517 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);

3510	3518

3511 // Fill out the native index UVector info.	3519 // Fill out the native index UVector info.

3512 // Only need 1 loop, from above we know groupStarts.size() = groupEnds. size()	3520 // Only need 1 loop, from above we know groupStarts.size() = groupEnds. size()

3513 for (i=0; i<groupStarts.size(); i++) {	3521 for (i=0; i<groupStarts.size(); i++) {

3514 int32_t start = groupStarts.elementAti(i);	3522 int32_t start = groupStarts.elementAti(i);

3515 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting	3523 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting

3516 if (start >= 0) {	3524 if (start >= 0) {

3517 int32_t startUTF8;	3525 int32_t startUTF8;

3518 if (!utextOffsetToNative(&inputText, start, startUTF8)) {	3526 if (!utextOffsetToNative(&inputText, start, startUTF8)) {

3519 errln("Error at line %d: could not find native index for gro up start %d. UTF16 index %d", line, i, start);	3527 errln("Error at line %d: could not find native index for gro up start %d. UTF16 index %d", line, i, start);

3520 failed = TRUE;	3528 failed = TRUE;

3521 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.	3529 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.

3522 }	3530 }

3523 setInt(groupStartsUTF8, startUTF8, i);	3531 setInt(groupStartsUTF8, startUTF8, i);

3524 }	3532 }

3525	3533

3526 int32_t end = groupEnds.elementAti(i);	3534 int32_t end = groupEnds.elementAti(i);

3527 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting	3535 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting

3528 if (end >= 0) {	3536 if (end >= 0) {

3529 int32_t endUTF8;	3537 int32_t endUTF8;

3530 if (!utextOffsetToNative(&inputText, end, endUTF8)) {	3538 if (!utextOffsetToNative(&inputText, end, endUTF8)) {

3531 errln("Error at line %d: could not find native index for gro up end %d. UTF16 index %d", line, i, end);	3539 errln("Error at line %d: could not find native index for gro up end %d. UTF16 index %d", line, i, end);

3532 failed = TRUE;	3540 failed = TRUE;

3533 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.	3541 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.

3534 }	3542 }

3535 setInt(groupEndsUTF8, endUTF8, i);	3543 setInt(groupEndsUTF8, endUTF8, i);

(...skipping 14 matching lines...) Expand all Loading...
3550 if (UTF8Matcher != NULL) {	3558 if (UTF8Matcher != NULL) {

3551 UTF8Matcher->useAnchoringBounds(FALSE);	3559 UTF8Matcher->useAnchoringBounds(FALSE);

3552 }	3560 }

3553 }	3561 }

3554 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag	3562 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag

3555 matcher->useTransparentBounds(TRUE);	3563 matcher->useTransparentBounds(TRUE);

3556 if (UTF8Matcher != NULL) {	3564 if (UTF8Matcher != NULL) {

3557 UTF8Matcher->useTransparentBounds(TRUE);	3565 UTF8Matcher->useTransparentBounds(TRUE);

3558 }	3566 }

3559 }	3567 }

3560	3568

3561	3569

3562	3570

3563 //	3571 //

3564 // Do a find on the de-tagged input using the caller's pattern	3572 // Do a find on the de-tagged input using the caller's pattern

3565 // TODO: error on count>1 and not find().	3573 // TODO: error on count>1 and not find().

3566 // error on both matches() and lookingAt().	3574 // error on both matches() and lookingAt().

3567 //	3575 //

3568 for (i=0; i<numFinds; i++) {	3576 for (i=0; i<numFinds; i++) {

3569 if (useMatchesFunc) {	3577 if (useMatchesFunc) {

3570 isMatch = matcher->matches(status);	3578 isMatch = matcher->matches(status);

3571 if (UTF8Matcher != NULL) {	3579 if (UTF8Matcher != NULL) {

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3626 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d",	3634 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d",

3627 line, i, expectedStart, matcher->start(i, status));	3635 line, i, expectedStart, matcher->start(i, status));

3628 failed = TRUE;	3636 failed = TRUE;

3629 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.	3637 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.

3630 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec tedStartUTF8) {	3638 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec tedStartUTF8) {

3631 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d (UTF8)",	3639 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d (UTF8)",

3632 line, i, expectedStartUTF8, UTF8Matcher->start(i, status));	3640 line, i, expectedStartUTF8, UTF8Matcher->start(i, status));

3633 failed = TRUE;	3641 failed = TRUE;

3634 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.	3642 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.

3635 }	3643 }

3636	3644

3637 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti (i));	3645 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti (i));

3638 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF 8.elementAti(i));	3646 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF 8.elementAti(i));

3639 if (matcher->end(i, status) != expectedEnd) {	3647 if (matcher->end(i, status) != expectedEnd) {

3640 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d",	3648 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d",

3641 line, i, expectedEnd, matcher->end(i, status));	3649 line, i, expectedEnd, matcher->end(i, status));

3642 failed = TRUE;	3650 failed = TRUE;

3643 // Error on end position; keep going; real error is probably yet to come as group	3651 // Error on end position; keep going; real error is probably yet to come as group

3644 // end positions work from end of the input data towards the front .	3652 // end positions work from end of the input data towards the front .

3645 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte dEndUTF8) {	3653 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte dEndUTF8) {

3646 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d (UTF8)",	3654 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d (UTF8)",

(...skipping 16 matching lines...) Expand all Loading...
3663	3671

3664 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa lse	3672 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa lse

3665 matcher->requireEnd() == TRUE) {	3673 matcher->requireEnd() == TRUE) {

3666 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l ine);	3674 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l ine);

3667 failed = TRUE;	3675 failed = TRUE;

3668 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false	3676 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false

3669 UTF8Matcher->requireEnd() == TRUE) {	3677 UTF8Matcher->requireEnd() == TRUE) {

3670 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT F8)", line);	3678 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT F8)", line);

3671 failed = TRUE;	3679 failed = TRUE;

3672 }	3680 }

3673	3681

3674 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr ue	3682 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr ue

3675 matcher->requireEnd() == FALSE) {	3683 matcher->requireEnd() == FALSE) {

3676 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l ine);	3684 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l ine);

3677 failed = TRUE;	3685 failed = TRUE;

3678 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false	3686 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false

3679 UTF8Matcher->requireEnd() == FALSE) {	3687 UTF8Matcher->requireEnd() == FALSE) {

3680 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT F8)", line);	3688 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT F8)", line);

3681 failed = TRUE;	3689 failed = TRUE;

3682 }	3690 }

3683	3691

3684 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false	3692 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false

3685 matcher->hitEnd() == TRUE) {	3693 matcher->hitEnd() == TRUE) {

3686 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line) ;	3694 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line) ;

3687 failed = TRUE;	3695 failed = TRUE;

3688 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false	3696 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false

3689 UTF8Matcher->hitEnd() == TRUE) {	3697 UTF8Matcher->hitEnd() == TRUE) {

3690 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)" , line);	3698 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)" , line);

3691 failed = TRUE;	3699 failed = TRUE;

3692 }	3700 }

3693	3701

3694 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true	3702 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true

3695 matcher->hitEnd() == FALSE) {	3703 matcher->hitEnd() == FALSE) {

3696 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line) ;	3704 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line) ;

3697 failed = TRUE;	3705 failed = TRUE;

3698 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true	3706 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true

3699 UTF8Matcher->hitEnd() == FALSE) {	3707 UTF8Matcher->hitEnd() == FALSE) {

3700 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)" , line);	3708 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)" , line);

3701 failed = TRUE;	3709 failed = TRUE;

3702 }	3710 }

3703	3711

3704	3712

3705 cleanupAndReturn:	3713 cleanupAndReturn:

3706 if (failed) {	3714 if (failed) {

3707 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" "	3715 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" "

3708 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\"");	3716 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\"");

3709 // callerPattern->dump();	3717 // callerPattern->dump();

3710 }	3718 }

3711 delete parseMatcher;	3719 delete parseMatcher;

3712 delete parsePat;	3720 delete parsePat;

3713 delete UTF8Matcher;	3721 delete UTF8Matcher;

3714 delete UTF8Pattern;	3722 delete UTF8Pattern;

3715 delete matcher;	3723 delete matcher;

3716 delete callerPattern;	3724 delete callerPattern;

3717	3725

3718 utext_close(&inputText);	3726 utext_close(&inputText);

3719 delete[] inputChars;	3727 delete[] inputChars;

3720 utext_close(&patternText);	3728 utext_close(&patternText);

3721 delete[] patternChars;	3729 delete[] patternChars;

3722 ucnv_close(UTF8Converter);	3730 ucnv_close(UTF8Converter);

3723 }	3731 }

3724	3732

3725	3733

3726	3734

3727	3735

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3783 // Invalid Back Reference \0	3791 // Invalid Back Reference \0

3784 // For ICU 3.8 and earlier	3792 // For ICU 3.8 and earlier

3785 // For ICU versions newer than 3.8, \0 introduces an octal escape.	3793 // For ICU versions newer than 3.8, \0 introduces an octal escape.

3786 //	3794 //

3787 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);	3795 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);

3788	3796

3789 }	3797 }

3790	3798

3791	3799

3792 //------------------------------------------------------------------------------ -	3800 //------------------------------------------------------------------------------ -

3793 //	3801 //

3794 // Read a text data file, convert it to UChars, and return the data	3802 // Read a text data file, convert it to UChars, and return the data

3795 // in one big UChar * buffer, which the caller must delete.	3803 // in one big UChar * buffer, which the caller must delete.

3796 //	3804 //

3797 //------------------------------------------------------------------------------ --	3805 //------------------------------------------------------------------------------ --

3798 UChar RegexTest::ReadAndConvertFile(const char fileName, int32_t &ulen,	3806 UChar RegexTest::ReadAndConvertFile(const char fileName, int32_t &ulen,

3799 const char *defEncoding, UErrorCode &status ) {	3807 const char *defEncoding, UErrorCode &status ) {

3800 UChar *retPtr = NULL;	3808 UChar *retPtr = NULL;

3801 char *fileBuf = NULL;	3809 char *fileBuf = NULL;

3802 UConverter* conv = NULL;	3810 UConverter* conv = NULL;

3803 FILE *f = NULL;	3811 FILE *f = NULL;

(...skipping 322 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4126 UBool found = testMat->find();	4134 UBool found = testMat->find();

4127 UBool expected = FALSE;	4135 UBool expected = FALSE;

4128 if (fields[2].indexOf(UChar_y) >=0) {	4136 if (fields[2].indexOf(UChar_y) >=0) {

4129 expected = TRUE;	4137 expected = TRUE;

4130 }	4138 }

4131 if (expected != found) {	4139 if (expected != found) {

4132 errln("line %d: Expected %smatch, got %smatch",	4140 errln("line %d: Expected %smatch, got %smatch",

4133 lineNum, expected?"":"no ", found?"":"no " );	4141 lineNum, expected?"":"no ", found?"":"no " );

4134 continue;	4142 continue;

4135 }	4143 }

4136	4144

4137 // Don't try to check expected results if there is no match.	4145 // Don't try to check expected results if there is no match.

4138 // (Some have stuff in the expected fields)	4146 // (Some have stuff in the expected fields)

4139 if (!found) {	4147 if (!found) {

4140 delete testMat;	4148 delete testMat;

4141 delete testPat;	4149 delete testPat;

4142 continue;	4150 continue;

4143 }	4151 }

4144	4152

4145 //	4153 //

4146 // Interpret the Perl expression from the fourth field of the data file,	4154 // Interpret the Perl expression from the fourth field of the data file,

(...skipping 277 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4424 const UChar UChar_y = 0x79;	4432 const UChar UChar_y = 0x79;

4425 if (flagStr.indexOf(UChar_i) != -1) {	4433 if (flagStr.indexOf(UChar_i) != -1) {

4426 flags \|= UREGEX_CASE_INSENSITIVE;	4434 flags \|= UREGEX_CASE_INSENSITIVE;

4427 }	4435 }

4428 if (flagStr.indexOf(UChar_m) != -1) {	4436 if (flagStr.indexOf(UChar_m) != -1) {

4429 flags \|= UREGEX_MULTILINE;	4437 flags \|= UREGEX_MULTILINE;

4430 }	4438 }

4431 if (flagStr.indexOf(UChar_x) != -1) {	4439 if (flagStr.indexOf(UChar_x) != -1) {

4432 flags \|= UREGEX_COMMENTS;	4440 flags \|= UREGEX_COMMENTS;

4433 }	4441 }

4434	4442

4435 //	4443 //

4436 // Put the pattern in a UTF-8 UText	4444 // Put the pattern in a UTF-8 UText

4437 //	4445 //

4438 status = U_ZERO_ERROR;	4446 status = U_ZERO_ERROR;

4439 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve rter.getAlias(), status);	4447 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve rter.getAlias(), status);

4440 if (status == U_BUFFER_OVERFLOW_ERROR) {	4448 if (status == U_BUFFER_OVERFLOW_ERROR) {

4441 status = U_ZERO_ERROR;	4449 status = U_ZERO_ERROR;

4442 delete[] patternChars;	4450 delete[] patternChars;

4443 patternCapacity = patternLength + 1;	4451 patternCapacity = patternLength + 1;

4444 patternChars = new char[patternCapacity];	4452 patternChars = new char[patternCapacity];

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4521 UBool found = testMat->find();	4529 UBool found = testMat->find();

4522 UBool expected = FALSE;	4530 UBool expected = FALSE;

4523 if (fields[2].indexOf(UChar_y) >=0) {	4531 if (fields[2].indexOf(UChar_y) >=0) {

4524 expected = TRUE;	4532 expected = TRUE;

4525 }	4533 }

4526 if (expected != found) {	4534 if (expected != found) {

4527 errln("line %d: Expected %smatch, got %smatch",	4535 errln("line %d: Expected %smatch, got %smatch",

4528 lineNum, expected?"":"no ", found?"":"no " );	4536 lineNum, expected?"":"no ", found?"":"no " );

4529 continue;	4537 continue;

4530 }	4538 }

4531	4539

4532 // Don't try to check expected results if there is no match.	4540 // Don't try to check expected results if there is no match.

4533 // (Some have stuff in the expected fields)	4541 // (Some have stuff in the expected fields)

4534 if (!found) {	4542 if (!found) {

4535 delete testMat;	4543 delete testMat;

4536 delete testPat;	4544 delete testPat;

4537 continue;	4545 continue;

4538 }	4546 }

4539	4547

4540 //	4548 //

4541 // Interpret the Perl expression from the fourth field of the data file,	4549 // Interpret the Perl expression from the fourth field of the data file,

(...skipping 122 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4664 delete groupsPat;	4672 delete groupsPat;

4665	4673

4666 delete flagMat;	4674 delete flagMat;

4667 delete flagPat;	4675 delete flagPat;

4668	4676

4669 delete lineMat;	4677 delete lineMat;

4670 delete linePat;	4678 delete linePat;

4671	4679

4672 delete fieldPat;	4680 delete fieldPat;

4673 delete [] testData;	4681 delete [] testData;

4674	4682

4675 utext_close(&patternText);	4683 utext_close(&patternText);

4676 utext_close(&inputText);	4684 utext_close(&inputText);

4677	4685

4678 delete [] patternChars;	4686 delete [] patternChars;

4679 delete [] inputChars;	4687 delete [] inputChars;

4680	4688

4681	4689

4682 logln("%d tests skipped because of unimplemented regexp features.", skippedU nimplementedCount);	4690 logln("%d tests skipped because of unimplemented regexp features.", skippedU nimplementedCount);

4683	4691

4684 }	4692 }

4685	4693

4686	4694

4687 //--------------------------------------------------------------	4695 //--------------------------------------------------------------

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4731 }	4739 }

4732 info->lastSteps = steps;	4740 info->lastSteps = steps;

4733 info->numCalls++;	4741 info->numCalls++;

4734 return (info->numCalls < info->maxCalls);	4742 return (info->numCalls < info->maxCalls);

4735 }	4743 }

4736 U_CDECL_END	4744 U_CDECL_END

4737	4745

4738 void RegexTest::Callbacks() {	4746 void RegexTest::Callbacks() {

4739 {	4747 {

4740 // Getter returns NULLs if no callback has been set	4748 // Getter returns NULLs if no callback has been set

4741	4749

4742 // The variables that the getter will fill in.	4750 // The variables that the getter will fill in.

4743 // Init to non-null values so that the action of the getter can be see n.	4751 // Init to non-null values so that the action of the getter can be see n.

4744 const void *returnedContext = &returnedContext;	4752 const void *returnedContext = &returnedContext;

4745 URegexMatchCallback *returnedFn = &testCallBackFn;	4753 URegexMatchCallback *returnedFn = &testCallBackFn;

4746	4754

4747 UErrorCode status = U_ZERO_ERROR;	4755 UErrorCode status = U_ZERO_ERROR;

4748 RegexMatcher matcher("x", 0, status);	4756 RegexMatcher matcher("x", 0, status);

4749 REGEX_CHECK_STATUS;	4757 REGEX_CHECK_STATUS;

4750 matcher.getMatchCallback(returnedFn, returnedContext, status);	4758 matcher.getMatchCallback(returnedFn, returnedContext, status);

4751 REGEX_CHECK_STATUS;	4759 REGEX_CHECK_STATUS;

4752 REGEX_ASSERT(returnedFn == NULL);	4760 REGEX_ASSERT(returnedFn == NULL);

4753 REGEX_ASSERT(returnedContext == NULL);	4761 REGEX_ASSERT(returnedContext == NULL);

4754 }	4762 }

4755	4763

4756 {	4764 {

4757 // Set and Get work	4765 // Set and Get work

4758 callBackContext cbInfo = {this, 0, 0, 0};	4766 callBackContext cbInfo = {this, 0, 0, 0};

4759 const void *returnedContext;	4767 const void *returnedContext;

4760 URegexMatchCallback *returnedFn;	4768 URegexMatchCallback *returnedFn;

4761 UErrorCode status = U_ZERO_ERROR;	4769 UErrorCode status = U_ZERO_ERROR;

4762 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.	4770 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.

4763 REGEX_CHECK_STATUS;	4771 REGEX_CHECK_STATUS;

4764 matcher.setMatchCallback(testCallBackFn, &cbInfo, status);	4772 matcher.setMatchCallback(testCallBackFn, &cbInfo, status);

4765 REGEX_CHECK_STATUS;	4773 REGEX_CHECK_STATUS;

4766 matcher.getMatchCallback(returnedFn, returnedContext, status);	4774 matcher.getMatchCallback(returnedFn, returnedContext, status);

4767 REGEX_CHECK_STATUS;	4775 REGEX_CHECK_STATUS;

4768 REGEX_ASSERT(returnedFn == testCallBackFn);	4776 REGEX_ASSERT(returnedFn == testCallBackFn);

4769 REGEX_ASSERT(returnedContext == &cbInfo);	4777 REGEX_ASSERT(returnedContext == &cbInfo);

4770	4778

4771 // A short-running match shouldn't invoke the callback	4779 // A short-running match shouldn't invoke the callback

4772 status = U_ZERO_ERROR;	4780 status = U_ZERO_ERROR;

4773 cbInfo.reset(1);	4781 cbInfo.reset(1);

4774 UnicodeString s = "xxx";	4782 UnicodeString s = "xxx";

4775 matcher.reset(s);	4783 matcher.reset(s);

4776 REGEX_ASSERT(matcher.matches(status));	4784 REGEX_ASSERT(matcher.matches(status));

4777 REGEX_CHECK_STATUS;	4785 REGEX_CHECK_STATUS;

4778 REGEX_ASSERT(cbInfo.numCalls == 0);	4786 REGEX_ASSERT(cbInfo.numCalls == 0);

4779	4787

4780 // A medium-length match that runs long enough to invoke the	4788 // A medium-length match that runs long enough to invoke the

4781 // callback, but not so long that the callback aborts it.	4789 // callback, but not so long that the callback aborts it.

4782 status = U_ZERO_ERROR;	4790 status = U_ZERO_ERROR;

4783 cbInfo.reset(4);	4791 cbInfo.reset(4);

4784 s = "aaaaaaaaaaaaaaaaaaab";	4792 s = "aaaaaaaaaaaaaaaaaaab";

4785 matcher.reset(s);	4793 matcher.reset(s);

4786 REGEX_ASSERT(matcher.matches(status)==FALSE);	4794 REGEX_ASSERT(matcher.matches(status)==FALSE);

4787 REGEX_CHECK_STATUS;	4795 REGEX_CHECK_STATUS;

4788 REGEX_ASSERT(cbInfo.numCalls > 0);	4796 REGEX_ASSERT(cbInfo.numCalls > 0);

4789	4797

4790 // A longer running match that the callback function will abort.	4798 // A longer running match that the callback function will abort.

4791 status = U_ZERO_ERROR;	4799 status = U_ZERO_ERROR;

4792 cbInfo.reset(4);	4800 cbInfo.reset(4);

4793 s = "aaaaaaaaaaaaaaaaaaaaaaab";	4801 s = "aaaaaaaaaaaaaaaaaaaaaaab";

4794 matcher.reset(s);	4802 matcher.reset(s);

4795 REGEX_ASSERT(matcher.matches(status)==FALSE);	4803 REGEX_ASSERT(matcher.matches(status)==FALSE);

4796 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);	4804 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

4797 REGEX_ASSERT(cbInfo.numCalls == 4);	4805 REGEX_ASSERT(cbInfo.numCalls == 4);

4798 }	4806 }

4799	4807

4800	4808

4801 }	4809 }

4802	4810

4803	4811

4804 //	4812 //

4805 // FindProgressCallbacks() Test the find "progress" callback function.	4813 // FindProgressCallbacks() Test the find "progress" callback function.

4806 // When set, the find progress callback will be invoked during a find operations	4814 // When set, the find progress callback will be invoked during a find operations

4807 // after each return from a match attempt, giving the applicati on the opportunity	4815 // after each return from a match attempt, giving the applicati on the opportunity

4808 // to terminate a long-running find operation before it's norma l completion.	4816 // to terminate a long-running find operation before it's norma l completion.

4809 //	4817 //

4810	4818

4811 struct progressCallBackContext {	4819 struct progressCallBackContext {

4812 RegexTest *test;	4820 RegexTest *test;

4813 int64_t lastIndex;	4821 int64_t lastIndex;

4814 int32_t maxCalls;	4822 int32_t maxCalls;

4815 int32_t numCalls;	4823 int32_t numCalls;

4816 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};	4824 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};

4817 };	4825 };

4818	4826

	4827 // call-back function for find().

	4828 // Return TRUE to continue the find().

	4829 // Return FALSE to stop the find().

4819 U_CDECL_BEGIN	4830 U_CDECL_BEGIN

4820 static UBool U_CALLCONV	4831 static UBool U_CALLCONV

4821 testProgressCallBackFn(const void *context, int64_t matchIndex) {	4832 testProgressCallBackFn(const void *context, int64_t matchIndex) {

4822 progressCallBackContext info = (progressCallBackContext )context;	4833 progressCallBackContext info = (progressCallBackContext )context;

4823 info->numCalls++;	4834 info->numCalls++;

4824 info->lastIndex = matchIndex;	4835 info->lastIndex = matchIndex;

4825 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);	4836 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);

4826 return (info->numCalls < info->maxCalls);	4837 return (info->numCalls < info->maxCalls);

4827 }	4838 }

4828 U_CDECL_END	4839 U_CDECL_END

4829	4840

4830 void RegexTest::FindProgressCallbacks() {	4841 void RegexTest::FindProgressCallbacks() {

4831 {	4842 {

4832 // Getter returns NULLs if no callback has been set	4843 // Getter returns NULLs if no callback has been set

4833	4844

4834 // The variables that the getter will fill in.	4845 // The variables that the getter will fill in.

4835 // Init to non-null values so that the action of the getter can be see n.	4846 // Init to non-null values so that the action of the getter can be see n.

4836 const void *returnedContext = &returnedContext;	4847 const void *returnedContext = &returnedContext;

4837 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn;	4848 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn;

4838	4849

4839 UErrorCode status = U_ZERO_ERROR;	4850 UErrorCode status = U_ZERO_ERROR;

4840 RegexMatcher matcher("x", 0, status);	4851 RegexMatcher matcher("x", 0, status);

4841 REGEX_CHECK_STATUS;	4852 REGEX_CHECK_STATUS;

4842 matcher.getFindProgressCallback(returnedFn, returnedContext, status);	4853 matcher.getFindProgressCallback(returnedFn, returnedContext, status);

4843 REGEX_CHECK_STATUS;	4854 REGEX_CHECK_STATUS;

4844 REGEX_ASSERT(returnedFn == NULL);	4855 REGEX_ASSERT(returnedFn == NULL);

4845 REGEX_ASSERT(returnedContext == NULL);	4856 REGEX_ASSERT(returnedContext == NULL);

4846 }	4857 }

4847	4858

4848 {	4859 {

4849 // Set and Get work	4860 // Set and Get work

4850 progressCallBackContext cbInfo = {this, 0, 0, 0};	4861 progressCallBackContext cbInfo = {this, 0, 0, 0};

4851 const void *returnedContext;	4862 const void *returnedContext;

4852 URegexFindProgressCallback *returnedFn;	4863 URegexFindProgressCallback *returnedFn;

4853 UErrorCode status = U_ZERO_ERROR;	4864 UErrorCode status = U_ZERO_ERROR;

4854 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.	4865 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);

4855 REGEX_CHECK_STATUS;	4866 REGEX_CHECK_STATUS;

4856 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status) ;	4867 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status) ;

4857 REGEX_CHECK_STATUS;	4868 REGEX_CHECK_STATUS;

4858 matcher.getFindProgressCallback(returnedFn, returnedContext, status);	4869 matcher.getFindProgressCallback(returnedFn, returnedContext, status);

4859 REGEX_CHECK_STATUS;	4870 REGEX_CHECK_STATUS;

4860 REGEX_ASSERT(returnedFn == testProgressCallBackFn);	4871 REGEX_ASSERT(returnedFn == testProgressCallBackFn);

4861 REGEX_ASSERT(returnedContext == &cbInfo);	4872 REGEX_ASSERT(returnedContext == &cbInfo);

4862	4873

4863 // A short-running match should NOT invoke the callback.	4874 // A find that matches on the initial position does NOT invoke the callb ack.

4864 status = U_ZERO_ERROR;	4875 status = U_ZERO_ERROR;

4865 cbInfo.reset(100);	4876 cbInfo.reset(100);

4866 UnicodeString s = "abxxx";	4877 UnicodeString s = "aaxxx";

4867 matcher.reset(s);	4878 matcher.reset(s);

4868 #if 0	4879 #if 0

4869 matcher.setTrace(TRUE);	4880 matcher.setTrace(TRUE);

4870 #endif	4881 #endif

4871 REGEX_ASSERT(matcher.find(0, status));	4882 REGEX_ASSERT(matcher.find(0, status));

4872 REGEX_CHECK_STATUS;	4883 REGEX_CHECK_STATUS;

4873 REGEX_ASSERT(cbInfo.numCalls == 0);	4884 REGEX_ASSERT(cbInfo.numCalls == 0);

4874	4885

4875 // A medium running match that causes matcher.find() to invoke our callb ack for each index.	4886 // A medium running find() that causes matcher.find() to invoke our call back for each index,

	4887 // but not so many times that we interrupt the operation.

4876 status = U_ZERO_ERROR;	4888 status = U_ZERO_ERROR;

4877 s = "aaaaaaaaaaaaaaaaaaab";	4889 s = "aaaaaaaaaaaaaaaaaaab";

4878 cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string	4890 cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string

4879 matcher.reset(s);	4891 matcher.reset(s);

4880 REGEX_ASSERT(matcher.find(0, status)==FALSE);	4892 REGEX_ASSERT(matcher.find(0, status)==FALSE);

4881 REGEX_CHECK_STATUS;	4893 REGEX_CHECK_STATUS;

4882 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);	4894 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);

4883	4895

4884 // A longer running match that causes matcher.find() to invoke our callb ack which we cancel/interrupt at some point.	4896 // A longer running match that causes matcher.find() to invoke our callb ack which we cancel/interrupt at some point.

4885 status = U_ZERO_ERROR;	4897 status = U_ZERO_ERROR;

4886 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";	4898 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";

4887 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string	4899 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string

4888 matcher.reset(s1);	4900 matcher.reset(s1);

4889 REGEX_ASSERT(matcher.find(0, status)==FALSE);	4901 REGEX_ASSERT(matcher.find(0, status)==FALSE);

4890 REGEX_CHECK_STATUS;	4902 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

4891 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);	4903 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);

4892	4904

4893 #if 0

4894 // Now a match that will succeed, but after an interruption	4905 // Now a match that will succeed, but after an interruption

4895 status = U_ZERO_ERROR;	4906 status = U_ZERO_ERROR;

4896 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";	4907 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";

4897 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string	4908 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string

4898 matcher.reset(s2);	4909 matcher.reset(s2);

4899 REGEX_ASSERT(matcher.find(0, status)==FALSE);	4910 REGEX_ASSERT(matcher.find(0, status)==FALSE);

4900 REGEX_CHECK_STATUS;	4911 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

4901 // Now retry the match from where left off	4912 // Now retry the match from where left off

4902 cbInfo.maxCalls = 100; // No callback limit	4913 cbInfo.maxCalls = 100; // No callback limit

	4914 status = U_ZERO_ERROR;

4903 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));	4915 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));

4904 REGEX_CHECK_STATUS;	4916 REGEX_CHECK_STATUS;

4905 #endif

4906 }	4917 }

4907	4918

4908	4919

4909 }	4920 }

4910	4921

4911	4922

4912 //---------------------------------------------------------------------------	4923 //---------------------------------------------------------------------------

4913 //	4924 //

4914 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable	4925 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable

4915 // UTexts. The pure-C implementation of UText	4926 // UTexts. The pure-C implementation of UText

4916 // has no mutable backing stores, but we can	4927 // has no mutable backing stores, but we can

4917 // use UnicodeString here to test the functionality.	4928 // use UnicodeString here to test the functionality.

4918 //	4929 //

4919 //---------------------------------------------------------------------------	4930 //---------------------------------------------------------------------------

4920 void RegexTest::PreAllocatedUTextCAPI () {	4931 void RegexTest::PreAllocatedUTextCAPI () {

4921 UErrorCode status = U_ZERO_ERROR;	4932 UErrorCode status = U_ZERO_ERROR;

4922 URegularExpression *re;	4933 URegularExpression *re;

4923 UText patternText = UTEXT_INITIALIZER;	4934 UText patternText = UTEXT_INITIALIZER;

4924 UnicodeString buffer;	4935 UnicodeString buffer;

4925 UText bufferText = UTEXT_INITIALIZER;	4936 UText bufferText = UTEXT_INITIALIZER;

4926	4937

4927 utext_openUnicodeString(&bufferText, &buffer, &status);	4938 utext_openUnicodeString(&bufferText, &buffer, &status);

4928	4939

4929 /*	4940 /*

4930 * getText() and getUText()	4941 * getText() and getUText()

4931 */	4942 */

4932 {	4943 {

4933 UText text1 = UTEXT_INITIALIZER;	4944 UText text1 = UTEXT_INITIALIZER;

4934 UText text2 = UTEXT_INITIALIZER;	4945 UText text2 = UTEXT_INITIALIZER;

4935 UChar text2Chars[20];	4946 UChar text2Chars[20];

4936 UText *resultText;	4947 UText *resultText;

4937	4948

4938 status = U_ZERO_ERROR;	4949 status = U_ZERO_ERROR;

4939 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);	4950 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);

4940 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);	4951 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);

4941 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);	4952 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);

4942 utext_openUChars(&text2, text2Chars, -1, &status);	4953 utext_openUChars(&text2, text2Chars, -1, &status);

4943	4954

4944 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);	4955 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);

4945 re = uregex_openUText(&patternText, 0, NULL, &status);	4956 re = uregex_openUText(&patternText, 0, NULL, &status);

4946	4957

4947 /* First set a UText */	4958 /* First set a UText */

4948 uregex_setUText(re, &text1, &status);	4959 uregex_setUText(re, &text1, &status);

4949 resultText = uregex_getUText(re, &bufferText, &status);	4960 resultText = uregex_getUText(re, &bufferText, &status);

4950 REGEX_CHECK_STATUS;	4961 REGEX_CHECK_STATUS;

4951 REGEX_ASSERT(resultText == &bufferText);	4962 REGEX_ASSERT(resultText == &bufferText);

4952 utext_setNativeIndex(resultText, 0);	4963 utext_setNativeIndex(resultText, 0);

4953 utext_setNativeIndex(&text1, 0);	4964 utext_setNativeIndex(&text1, 0);

4954 REGEX_ASSERT(testUTextEqual(resultText, &text1));	4965 REGEX_ASSERT(testUTextEqual(resultText, &text1));

4955	4966

4956 resultText = uregex_getUText(re, &bufferText, &status);	4967 resultText = uregex_getUText(re, &bufferText, &status);

4957 REGEX_CHECK_STATUS;	4968 REGEX_CHECK_STATUS;

4958 REGEX_ASSERT(resultText == &bufferText);	4969 REGEX_ASSERT(resultText == &bufferText);

4959 utext_setNativeIndex(resultText, 0);	4970 utext_setNativeIndex(resultText, 0);

4960 utext_setNativeIndex(&text1, 0);	4971 utext_setNativeIndex(&text1, 0);

4961 REGEX_ASSERT(testUTextEqual(resultText, &text1));	4972 REGEX_ASSERT(testUTextEqual(resultText, &text1));

4962	4973

4963 /* Then set a UChar * */	4974 /* Then set a UChar * */

4964 uregex_setText(re, text2Chars, 7, &status);	4975 uregex_setText(re, text2Chars, 7, &status);

4965 resultText = uregex_getUText(re, &bufferText, &status);	4976 resultText = uregex_getUText(re, &bufferText, &status);

4966 REGEX_CHECK_STATUS;	4977 REGEX_CHECK_STATUS;

4967 REGEX_ASSERT(resultText == &bufferText);	4978 REGEX_ASSERT(resultText == &bufferText);

4968 utext_setNativeIndex(resultText, 0);	4979 utext_setNativeIndex(resultText, 0);

4969 utext_setNativeIndex(&text2, 0);	4980 utext_setNativeIndex(&text2, 0);

4970 REGEX_ASSERT(testUTextEqual(resultText, &text2));	4981 REGEX_ASSERT(testUTextEqual(resultText, &text2));

4971	4982

4972 uregex_close(re);	4983 uregex_close(re);

4973 utext_close(&text1);	4984 utext_close(&text1);

4974 utext_close(&text2);	4985 utext_close(&text2);

4975 }	4986 }

4976	4987

4977 /*	4988 /*

4978 * group()	4989 * group()

4979 */	4990 */

4980 {	4991 {

4981 UChar text1[80];	4992 UChar text1[80];

(...skipping 25 matching lines...) Expand all Loading...
5007	5018

5008 /* Capture group out of range. Error. */	5019 /* Capture group out of range. Error. */

5009 status = U_ZERO_ERROR;	5020 status = U_ZERO_ERROR;

5010 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);	5021 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);

5011 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);	5022 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

5012 REGEX_ASSERT(actual == &bufferText);	5023 REGEX_ASSERT(actual == &bufferText);

5013	5024

5014 uregex_close(re);	5025 uregex_close(re);

5015	5026

5016 }	5027 }

5017	5028

5018 /*	5029 /*

5019 * replaceFirst()	5030 * replaceFirst()

5020 */	5031 */

5021 {	5032 {

5022 UChar text1[80];	5033 UChar text1[80];

5023 UChar text2[80];	5034 UChar text2[80];

5024 UText replText = UTEXT_INITIALIZER;	5035 UText replText = UTEXT_INITIALIZER;

5025 UText *result;	5036 UText *result;

5026	5037

5027 status = U_ZERO_ERROR;	5038 status = U_ZERO_ERROR;

5028 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);	5039 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);

5029 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);	5040 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);

5030 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);	5041 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);

5031	5042

5032 re = uregex_openC("x(.*?)x", 0, NULL, &status);	5043 re = uregex_openC("x(.*?)x", 0, NULL, &status);

5033 REGEX_CHECK_STATUS;	5044 REGEX_CHECK_STATUS;

5034	5045

5035 /* Normal case, with match */	5046 /* Normal case, with match */

5036 uregex_setText(re, text1, -1, &status);	5047 uregex_setText(re, text1, -1, &status);

5037 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5048 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

5038 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5049 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5039 REGEX_CHECK_STATUS;	5050 REGEX_CHECK_STATUS;

5040 REGEX_ASSERT(result == &bufferText);	5051 REGEX_ASSERT(result == &bufferText);

5041 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);	5052 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);

5042	5053

5043 /* No match. Text should copy to output with no changes. */	5054 /* No match. Text should copy to output with no changes. */

5044 uregex_setText(re, text2, -1, &status);	5055 uregex_setText(re, text2, -1, &status);

5045 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5056 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

5046 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5057 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5047 REGEX_CHECK_STATUS;	5058 REGEX_CHECK_STATUS;

5048 REGEX_ASSERT(result == &bufferText);	5059 REGEX_ASSERT(result == &bufferText);

5049 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);	5060 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);

5050	5061

5051 /* Unicode escapes */	5062 /* Unicode escapes */

5052 uregex_setText(re, text1, -1, &status);	5063 uregex_setText(re, text1, -1, &status);

5053 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a" , -1, &status);	5064 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a" , -1, &status);

5054 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);	5065 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);

5055 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);	5066 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);

5056 REGEX_CHECK_STATUS;	5067 REGEX_CHECK_STATUS;

5057 REGEX_ASSERT(result == &bufferText);	5068 REGEX_ASSERT(result == &bufferText);

5058 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);	5069 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);

5059	5070

5060 uregex_close(re);	5071 uregex_close(re);

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5097	5108

5098 uregex_close(re);	5109 uregex_close(re);

5099 utext_close(&replText);	5110 utext_close(&replText);

5100 }	5111 }

5101	5112

5102	5113

5103 /*	5114 /*

5104 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts,	5115 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts,

5105 * so we don't need to test it here.	5116 * so we don't need to test it here.

5106 */	5117 */

5107	5118

5108 utext_close(&bufferText);	5119 utext_close(&bufferText);

5109 utext_close(&patternText);	5120 utext_close(&patternText);

5110 }	5121 }

5111	5122

5112 //--------------------------------------------------------------	5123 //--------------------------------------------------------------

5113 //	5124 //

5114 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher .	5125 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher .

5115 //	5126 //

5116 //---------------------------------------------------------------	5127 //---------------------------------------------------------------

5117 void RegexTest::Bug7651() {	5128 void RegexTest::Bug7651() {

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5172 {	5183 {

5173 UnicodeString str;	5184 UnicodeString str;

5174 str.setToBogus();	5185 str.setToBogus();

5175 pMatcher->reset(str);	5186 pMatcher->reset(str);

5176 status = U_ZERO_ERROR;	5187 status = U_ZERO_ERROR;

5177 pMatcher->matches(status);	5188 pMatcher->matches(status);

5178 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);	5189 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

5179 delete pMatcher;	5190 delete pMatcher;

5180 }	5191 }

5181 }	5192 }

5182	5193

5183	5194

5184 // Bug 7029	5195 // Bug 7029

5185 void RegexTest::Bug7029() {	5196 void RegexTest::Bug7029() {

5186 UErrorCode status = U_ZERO_ERROR;	5197 UErrorCode status = U_ZERO_ERROR;

5187	5198

5188 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);	5199 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);

5189 UnicodeString text = "abc.def";	5200 UnicodeString text = "abc.def";

5190 UnicodeString splits[10];	5201 UnicodeString splits[10];

5191 REGEX_CHECK_STATUS;	5202 REGEX_CHECK_STATUS;

5192 int32_t numFields = pMatcher->split(text, splits, 10, status);	5203 int32_t numFields = pMatcher->split(text, splits, 10, status);

5193 REGEX_CHECK_STATUS;	5204 REGEX_CHECK_STATUS;

5194 REGEX_ASSERT(numFields == 8);	5205 REGEX_ASSERT(numFields == 8);

5195 delete pMatcher;	5206 delete pMatcher;

5196 }	5207 }

5197	5208

5198 // Bug 9283	5209 // Bug 9283

5199 // This test is checking for the existance of any supplemental characters that case-fold	5210 // This test is checking for the existance of any supplemental characters that case-fold

5200 // to a bmp character.	5211 // to a bmp character.

5201 //	5212 //

5202 // At the time of this writing there are none. If any should appear in a subse quent release	5213 // At the time of this writing there are none. If any should appear in a subse quent release

5203 // of Unicode, the code in regular expressions compilation that determines the longest	5214 // of Unicode, the code in regular expressions compilation that determines the longest

5204 // posssible match for a literal string will need to be enhanced.	5215 // posssible match for a literal string will need to be enhanced.

5205 //	5216 //

5206 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()	5217 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()

5207 // for details on what to do in case of a failure of this test.	5218 // for details on what to do in case of a failure of this test.

5208 //	5219 //

5209 void RegexTest::Bug9283() {	5220 void RegexTest::Bug9283() {

	5221 #if !UCONFIG_NO_NORMALIZATION

5210 UErrorCode status = U_ZERO_ERROR;	5222 UErrorCode status = U_ZERO_ERROR;

5211 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF] ]", status);	5223 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF] ]", status);

5212 REGEX_CHECK_STATUS;	5224 REGEX_CHECK_STATUS;

5213 int32_t index;	5225 int32_t index;

5214 UChar32 c;	5226 UChar32 c;

5215 for (index=0; ; index++) {	5227 for (index=0; ; index++) {

5216 c = supplementalsWithCaseFolding.charAt(index);	5228 c = supplementalsWithCaseFolding.charAt(index);

5217 if (c == -1) {	5229 if (c == -1) {

5218 break;	5230 break;

5219 }	5231 }

5220 UnicodeString cf = UnicodeString(c).foldCase();	5232 UnicodeString cf = UnicodeString(c).foldCase();

5221 REGEX_ASSERT(cf.length() >= 2);	5233 REGEX_ASSERT(cf.length() >= 2);

5222 }	5234 }

	5235 #endif /* #if !UCONFIG_NO_NORMALIZATION */

5223 }	5236 }

5224	5237

5225	5238

5226 void RegexTest::CheckInvBufSize() {	5239 void RegexTest::CheckInvBufSize() {

5227 if(inv_next>=INV_BUFSIZ) {	5240 if(inv_next>=INV_BUFSIZ) {

5228 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least % d )\n",	5241 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least % d )\n",

5229 __FILE__, INV_BUFSIZ, inv_next);	5242 __FILE__, INV_BUFSIZ, inv_next);

5230 } else {	5243 } else {

5231 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);	5244 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);

5232 }	5245 }

5233 }	5246 }

5234	5247

5235 void RegexTest::TestBug11371() {	5248

	5249 void RegexTest::Bug10459() {

5236 UErrorCode status = U_ZERO_ERROR;	5250 UErrorCode status = U_ZERO_ERROR;

5237 UnicodeString patternString;	5251 UnicodeString patternString("(txt)");

	5252 UnicodeString txtString("txt");

5238	5253

5239 for (int i=0; i<8000000; i++) {	5254 UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status);

5240 patternString.append(UnicodeString("()"));	5255 REGEX_CHECK_STATUS;

	5256 UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status);

	5257 REGEX_CHECK_STATUS;

	5258

	5259 URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);

	5260 REGEX_CHECK_STATUS;

	5261

	5262 uregex_setUText(icu_re, utext_txt, &status);

	5263 REGEX_CHECK_STATUS;

	5264

	5265 // The bug was that calling uregex_group() before doing a matching operation

	5266 // was causing a segfault. Only for Regular Expressions created from UText .

	5267 // It should set an U_REGEX_INVALID_STATE.

	5268

	5269 UChar buf[100];

	5270 int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status);

	5271 REGEX_ASSERT(status == U_REGEX_INVALID_STATE);

	5272 REGEX_ASSERT(len == 0);

	5273

	5274 uregex_close(icu_re);

	5275 utext_close(utext_pat);

	5276 utext_close(utext_txt);

	5277 }

	5278

	5279 void RegexTest::TestCaseInsensitiveStarters() {

	5280 // Test that the data used by RegexCompile::findCaseInsensitiveStarters() ha sn't

	5281 // become stale because of new Unicode characters.

	5282 // If it is stale, rerun the generation tool

	5283 // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genre gexcasing

	5284 // and replace the embedded data in i18n/regexcmp.cpp

	5285

	5286 for (UChar32 cp=0; cp<=0x10ffff; cp++) {

	5287 if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) {

	5288 continue;

	5289 }

	5290 UnicodeSet s(cp, cp);

	5291 s.closeOver(USET_CASE_INSENSITIVE);

	5292 UnicodeSetIterator setIter(s);

	5293 while (setIter.next()) {

	5294 if (!setIter.isString()) {

	5295 continue;

	5296 }

	5297 const UnicodeString &str = setIter.getString();

	5298 UChar32 firstChar = str.char32At(0);

	5299 UnicodeSet starters;

	5300 RegexCompile::findCaseInsensitiveStarters(firstChar, &starters);

	5301 if (!starters.contains(cp)) {

	5302 errln("CaseInsensitiveStarters for \\u%x is missing character \\ u%x.", cp, firstChar);

	5303 return;

	5304 }

	5305 }

5241 }	5306 }

	5307 }

	5308

	5309

	5310 void RegexTest::TestBug11049() {

	5311 // Original bug report: pattern with match start consisting of one of severa l individual characters,

	5312 // and the text being matched ending with a supplementary character. find() would read past the

	5313 // end of the input text when searching for potential match starting points .

	5314

	5315 // To see the problem, the text must exactly fill an allocated buffer, so th at valgrind will

	5316 // detect the bad read.

	5317

	5318 TestCase11049("A\|B\|C", "a string \\ud800\\udc00", FALSE, __LINE__);

	5319 TestCase11049("A\|B\|C", "string matches at end C", TRUE, __LINE__);

	5320

	5321 // Test again with a pattern starting with a single character,

	5322 // which takes a different code path than starting with an OR expression,

	5323 // but with similar logic.

	5324 TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);

	5325 TestCase11049("C", "string matches at end C", TRUE, __LINE__);

	5326 }

	5327

	5328 // Run a single test case from TestBug11049(). Internal function.

	5329 void RegexTest::TestCase11049(const char pattern, const char data, UBool expec tMatch, int32_t lineNumber) {

	5330 UErrorCode status = U_ZERO_ERROR;

	5331 UnicodeString patternString = UnicodeString(pattern).unescape();

5242 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));	5332 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));

5243 if (status != U_REGEX_PATTERN_TOO_BIG) {	5333

5244 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",	5334 UnicodeString dataString = UnicodeString(data).unescape();

5245 __FILE__, __LINE__, u_errorName(status));	5335 UChar *exactBuffer = new UChar[dataString.length()];

	5336 dataString.extract(exactBuffer, dataString.length(), status);

	5337 UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status );

	5338

	5339 LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status));

	5340 REGEX_CHECK_STATUS;

	5341 matcher->reset(ut);

	5342 UBool result = matcher->find();

	5343 if (result != expectMatch) {

	5344 errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \ "%s\"",

	5345 __FILE__, lineNumber, expectMatch, result, pattern, data);

5246 }	5346 }

5247	5347

5248 status = U_ZERO_ERROR;	5348 // Rerun test with UTF-8 input text. Won't see buffer overreads, but could s ee

5249 patternString = "(";	5349 // off-by-one on find() with match at the last code point.

5250 for (int i=0; i<20000000; i++) {	5350 // Size of the original char * data (invariant charset) will be <= than th e equivalent UTF-8

5251 patternString.append(UnicodeString("A++"));	5351 // because string.unescape() will only shrink it.

	5352 char * utf8Buffer = new char[uprv_strlen(data)+1];

	5353 u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), d ataString.length(), &status);

	5354 REGEX_CHECK_STATUS;

	5355 ut = utext_openUTF8(ut, utf8Buffer, -1, &status);

	5356 REGEX_CHECK_STATUS;

	5357 matcher->reset(ut);

	5358 result = matcher->find();

	5359 if (result != expectMatch) {

	5360 errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \" %s\", text = \"%s\"",

	5361 __FILE__, lineNumber, expectMatch, result, pattern, data);

5252 }	5362 }

5253 patternString.append(UnicodeString("){0}B++"));	5363 delete [] utf8Buffer;

5254 LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));

5255 if (status != U_REGEX_PATTERN_TOO_BIG) {

5256 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",

5257 __FILE__, __LINE__, u_errorName(status));

5258 }

5259	5364

5260 // Pattern with too much string data, such that string indexes overflow oper and data.	5365 utext_close(ut);

5261 status = U_ZERO_ERROR;	5366 delete [] exactBuffer;

5262 patternString = "";	5367 }

5263 while (patternString.length() < 0x00ffffff) {

5264 patternString.append(UnicodeString("stuff and things dont you know, thes e are a few of my favorite strings\n"));

5265 }

5266 patternString.append(UnicodeString("X? trailing string"));

5267 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));

5268 compiledPat3->dumpPattern();

5269 if (status != U_REGEX_PATTERN_TOO_BIG) {

5270 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",

5271 __FILE__, __LINE__, u_errorName(status));

5272 }

5273	5368

5274	5369

5275	5370

5276 }	5371 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

5277	5372

5278 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

OLD	NEW

« no previous file with comments | « source/test/intltest/regextst.h ('k') | source/test/intltest/regiontst.cpp » ('j') | no next file with comments »