Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(448)

Side by Side Diff: source/test/intltest/regextst.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/intltest/regextst.h ('k') | source/test/intltest/regiontst.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /******************************************************************** 1 /********************************************************************
2 * COPYRIGHT: 2 * COPYRIGHT:
3 * Copyright (c) 2002-2013, International Business Machines Corporation and 3 * Copyright (c) 2002-2014, International Business Machines Corporation and
4 * others. All Rights Reserved. 4 * others. All Rights Reserved.
5 ********************************************************************/ 5 ********************************************************************/
6 6
7 // 7 //
8 // regextst.cpp 8 // regextst.cpp
9 // 9 //
10 // ICU Regular Expressions test, part of intltest. 10 // ICU Regular Expressions test, part of intltest.
11 // 11 //
12 12
13 /* 13 /*
14 NOTE!! 14 NOTE!!
15 15
16 PLEASE be careful about ASCII assumptions in this test. 16 PLEASE be careful about ASCII assumptions in this test.
17 This test is one of the worst repeat offenders. 17 This test is one of the worst repeat offenders.
18 If you have questions, contact someone on the ICU PMC 18 If you have questions, contact someone on the ICU PMC
19 who has access to an EBCDIC system. 19 who has access to an EBCDIC system.
20 20
21 */ 21 */
22 22
23 #include "intltest.h" 23 #include "intltest.h"
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25 25
26 #include "unicode/localpointer.h"
26 #include "unicode/regex.h" 27 #include "unicode/regex.h"
27 #include "unicode/uchar.h" 28 #include "unicode/uchar.h"
28 #include "unicode/ucnv.h" 29 #include "unicode/ucnv.h"
29 #include "unicode/uniset.h" 30 #include "unicode/uniset.h"
31 #include "unicode/uregex.h"
32 #include "unicode/usetiter.h"
30 #include "unicode/ustring.h" 33 #include "unicode/ustring.h"
31 #include "regextst.h" 34 #include "regextst.h"
35 #include "regexcmp.h"
32 #include "uvector.h" 36 #include "uvector.h"
33 #include "util.h" 37 #include "util.h"
34 #include <stdlib.h> 38 #include <stdlib.h>
35 #include <string.h> 39 #include <string.h>
36 #include <stdio.h> 40 #include <stdio.h>
37 #include "cstring.h" 41 #include "cstring.h"
38 #include "uinvchar.h" 42 #include "uinvchar.h"
39 43
40 #define SUPPORT_MUTATING_INPUT_STRING 0 44 #define SUPPORT_MUTATING_INPUT_STRING 0
41 45
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 break; 128 break;
125 case 19: name = "Bug 7029"; 129 case 19: name = "Bug 7029";
126 if (exec) Bug7029(); 130 if (exec) Bug7029();
127 break; 131 break;
128 case 20: name = "CheckInvBufSize"; 132 case 20: name = "CheckInvBufSize";
129 if (exec) CheckInvBufSize(); 133 if (exec) CheckInvBufSize();
130 break; 134 break;
131 case 21: name = "Bug 9283"; 135 case 21: name = "Bug 9283";
132 if (exec) Bug9283(); 136 if (exec) Bug9283();
133 break; 137 break;
134 case 22: name = "TestBug11371"; 138 case 22: name = "Bug10459";
135 if (exec) TestBug11371(); 139 if (exec) Bug10459();
136 break; 140 break;
137 141 case 23: name = "TestCaseInsensitiveStarters";
142 if (exec) TestCaseInsensitiveStarters();
143 break;
144 case 24: name = "TestBug11049";
145 if (exec) TestBug11049();
146 break;
138 default: name = ""; 147 default: name = "";
139 break; //needed to end loop 148 break; //needed to end loop
140 } 149 }
141 } 150 }
142 151
143 152
144 153
145 /** 154 /**
146 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage 155 * Calls utext_openUTF8 after, potentially, converting invariant text from the c ompilation codepage
147 * into ASCII. 156 * into ASCII.
148 * @see utext_openUTF8 157 * @see utext_openUTF8
149 */ 158 */
150 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status); 159 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t length, UErrorCode *status);
151 160
152 //--------------------------------------------------------------------------- 161 //---------------------------------------------------------------------------
153 // 162 //
154 // Error Checking / Reporting macros used in all of the tests. 163 // Error Checking / Reporting macros used in all of the tests.
155 // 164 //
156 //--------------------------------------------------------------------------- 165 //---------------------------------------------------------------------------
157 166
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
203 UChar ch = buf[i]; 212 UChar ch = buf[i];
204 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); 213 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch);
205 } 214 }
206 } 215 }
207 } 216 }
208 } 217 }
209 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; 218 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0;
210 return ASSERT_BUF; 219 return ASSERT_BUF;
211 } 220 }
212 221
213
214 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf) /sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);} 222 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf) /sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text, buf);}
215 223
216 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \ 224 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest failure. status=%s", \
217 __FILE__, __LINE__ , u_errorName(status)); return;}} 225 __FILE__, __LINE__ , u_errorName(status)); return;}}
218 226
219 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} 227 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure: REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};}
220 228
221 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\ 229 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr) ;\
222 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \ 230 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status= %s, got %s", \
223 __LINE__, u_errorName(errcode), u_errorName(status));};} 231 __LINE__, u_errorName(errcode), u_errorName(status));};}
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 char buf[201 /*21*/]; 297 char buf[201 /*21*/];
290 char expectedBuf[201]; 298 char expectedBuf[201];
291 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); 299 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual);
292 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]) , &expectedText); 300 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0]) , &expectedText);
293 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars) , got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe ctedText), buf, (int)utext_nativeLength(actual)); 301 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars) , got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe ctedText), buf, (int)utext_nativeLength(actual));
294 } 302 }
295 utext_close(&expectedText); 303 utext_close(&expectedText);
296 } 304 }
297 305
298 /** 306 /**
299 * Assumes utf-8 input 307 * Assumes utf-8 input
300 */ 308 */
301 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua l), __FILE__, __LINE__) 309 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua l), __FILE__, __LINE__)
302 /** 310 /**
303 * Assumes Invariant input 311 * Assumes Invariant input
304 */ 312 */
305 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp ected), (actual), __FILE__, __LINE__) 313 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp ected), (actual), __FILE__, __LINE__)
306 314
307 /** 315 /**
308 * This buffer ( inv_buf ) is used to hold the UTF-8 strings 316 * This buffer ( inv_buf ) is used to hold the UTF-8 strings
309 * passed into utext_openUTF8. An error will be given if 317 * passed into utext_openUTF8. An error will be given if
310 * INV_BUFSIZ is too small. It's only used on EBCDIC systems. 318 * INV_BUFSIZ is too small. It's only used on EBCDIC systems.
311 */ 319 */
312 320
313 #define INV_BUFSIZ 2048 /* increase this if too small */ 321 #define INV_BUFSIZ 2048 /* increase this if too small */
314 322
315 static int64_t inv_next=0; 323 static int64_t inv_next=0;
316 324
317 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY 325 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY
318 static char inv_buf[INV_BUFSIZ]; 326 static char inv_buf[INV_BUFSIZ];
319 #endif 327 #endif
320 328
321 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) { 329 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t length, UErrorCode *status) {
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 RegexMatcher *REMatcher = NULL; 377 RegexMatcher *REMatcher = NULL;
370 UBool retVal = TRUE; 378 UBool retVal = TRUE;
371 379
372 UnicodeString patString(pat, -1, US_INV); 380 UnicodeString patString(pat, -1, US_INV);
373 REPattern = RegexPattern::compile(patString, 0, pe, status); 381 REPattern = RegexPattern::compile(patString, 0, pe, status);
374 if (U_FAILURE(status)) { 382 if (U_FAILURE(status)) {
375 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta tus = %s", 383 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta tus = %s",
376 line, u_errorName(status)); 384 line, u_errorName(status));
377 return FALSE; 385 return FALSE;
378 } 386 }
379 if (line==376) { RegexPatternDump(REPattern);} 387 if (line==376) { REPattern->dumpPattern();}
380 388
381 UnicodeString inputString(inputText); 389 UnicodeString inputString(inputText);
382 UnicodeString unEscapedInput = inputString.unescape(); 390 UnicodeString unEscapedInput = inputString.unescape();
383 REMatcher = REPattern->matcher(unEscapedInput, status); 391 REMatcher = REPattern->matcher(unEscapedInput, status);
384 if (U_FAILURE(status)) { 392 if (U_FAILURE(status)) {
385 errln("RegexTest failure in REPattern::matcher() at line %d. Status = % s\n", 393 errln("RegexTest failure in REPattern::matcher() at line %d. Status = % s\n",
386 line, u_errorName(status)); 394 line, u_errorName(status));
387 return FALSE; 395 return FALSE;
388 } 396 }
389 397
(...skipping 15 matching lines...) Expand all
405 errln("RegexTest failure in matches() at line %d. Status = %s\n", 413 errln("RegexTest failure in matches() at line %d. Status = %s\n",
406 line, u_errorName(status)); 414 line, u_errorName(status));
407 retVal = FALSE; 415 retVal = FALSE;
408 } 416 }
409 if (actualmatch != match) { 417 if (actualmatch != match) {
410 errln("RegexTest: wrong return from matches() at line %d.\n", line); 418 errln("RegexTest: wrong return from matches() at line %d.\n", line);
411 retVal = FALSE; 419 retVal = FALSE;
412 } 420 }
413 421
414 if (retVal == FALSE) { 422 if (retVal == FALSE) {
415 RegexPatternDump(REPattern); 423 REPattern->dumpPattern();
416 } 424 }
417 425
418 delete REPattern; 426 delete REPattern;
419 delete REMatcher; 427 delete REMatcher;
420 return retVal; 428 return retVal;
421 } 429 }
422 430
423 431
424 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look ing, UBool match, int32_t line) { 432 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look ing, UBool match, int32_t line) {
425 UText pattern = UTEXT_INITIALIZER; 433 UText pattern = UTEXT_INITIALIZER;
426 int32_t inputUTF8Length; 434 int32_t inputUTF8Length;
427 char *textChars = NULL; 435 char *textChars = NULL;
428 UText inputText = UTEXT_INITIALIZER; 436 UText inputText = UTEXT_INITIALIZER;
429 UErrorCode status = U_ZERO_ERROR; 437 UErrorCode status = U_ZERO_ERROR;
430 UParseError pe; 438 UParseError pe;
431 RegexPattern *REPattern = NULL; 439 RegexPattern *REPattern = NULL;
432 RegexMatcher *REMatcher = NULL; 440 RegexMatcher *REMatcher = NULL;
433 UBool retVal = TRUE; 441 UBool retVal = TRUE;
434 442
435 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); 443 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status);
436 REPattern = RegexPattern::compile(&pattern, 0, pe, status); 444 REPattern = RegexPattern::compile(&pattern, 0, pe, status);
437 if (U_FAILURE(status)) { 445 if (U_FAILURE(status)) {
438 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8 ). Status = %s\n", 446 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8 ). Status = %s\n",
439 line, u_errorName(status)); 447 line, u_errorName(status));
440 return FALSE; 448 return FALSE;
441 } 449 }
442 450
443 UnicodeString inputString(text, -1, US_INV); 451 UnicodeString inputString(text, -1, US_INV);
444 UnicodeString unEscapedInput = inputString.unescape(); 452 UnicodeString unEscapedInput = inputString.unescape();
445 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); 453 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status));
446 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N ULL, NULL, NULL, &status); 454 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N ULL, NULL, NULL, &status);
447 455
448 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status); 456 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(), status);
449 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { 457 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
450 // UTF-8 does not allow unpaired surrogates, so this could actually happ en 458 // UTF-8 does not allow unpaired surrogates, so this could actually happ en
451 logln("RegexTest unable to convert input to UTF8 at line %d. Status = % s\n", line, u_errorName(status)); 459 logln("RegexTest unable to convert input to UTF8 at line %d. Status = % s\n", line, u_errorName(status));
452 return TRUE; // not a failure of the Regex engine 460 return TRUE; // not a failure of the Regex engine
453 } 461 }
454 status = U_ZERO_ERROR; // buffer overflow 462 status = U_ZERO_ERROR; // buffer overflow
455 textChars = new char[inputUTF8Length+1]; 463 textChars = new char[inputUTF8Length+1];
456 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias( ), status); 464 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias( ), status);
457 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); 465 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status);
458 466
459 REMatcher = &REPattern->matcher(status)->reset(&inputText); 467 REMatcher = &REPattern->matcher(status)->reset(&inputText);
460 if (U_FAILURE(status)) { 468 if (U_FAILURE(status)) {
461 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta tus = %s\n", 469 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta tus = %s\n",
462 line, u_errorName(status)); 470 line, u_errorName(status));
463 return FALSE; 471 return FALSE;
464 } 472 }
465 473
466 UBool actualmatch; 474 UBool actualmatch;
467 actualmatch = REMatcher->lookingAt(status); 475 actualmatch = REMatcher->lookingAt(status);
468 if (U_FAILURE(status)) { 476 if (U_FAILURE(status)) {
(...skipping 12 matching lines...) Expand all
481 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n" , 489 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n" ,
482 line, u_errorName(status)); 490 line, u_errorName(status));
483 retVal = FALSE; 491 retVal = FALSE;
484 } 492 }
485 if (actualmatch != match) { 493 if (actualmatch != match) {
486 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin e); 494 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin e);
487 retVal = FALSE; 495 retVal = FALSE;
488 } 496 }
489 497
490 if (retVal == FALSE) { 498 if (retVal == FALSE) {
491 RegexPatternDump(REPattern); 499 REPattern->dumpPattern();
492 } 500 }
493 501
494 delete REPattern; 502 delete REPattern;
495 delete REMatcher; 503 delete REMatcher;
496 utext_close(&inputText); 504 utext_close(&inputText);
497 utext_close(&pattern); 505 utext_close(&pattern);
498 delete[] textChars; 506 delete[] textChars;
499 return retVal; 507 return retVal;
500 } 508 }
501 509
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 if (status != expectedStatus) { 555 if (status != expectedStatus) {
548 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err orName(status)); 556 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err orName(status));
549 } else { 557 } else {
550 if (status != U_ZERO_ERROR) { 558 if (status != U_ZERO_ERROR) {
551 if (pe.line != errLine || pe.offset != errCol) { 559 if (pe.line != errLine || pe.offset != errCol) {
552 errln("Line %d: incorrect line/offset from UParseError. Expecte d %d/%d; got %d/%d.\n", 560 errln("Line %d: incorrect line/offset from UParseError. Expecte d %d/%d; got %d/%d.\n",
553 line, errLine, errCol, pe.line, pe.offset); 561 line, errLine, errCol, pe.line, pe.offset);
554 } 562 }
555 } 563 }
556 } 564 }
557 565
558 delete callerPattern; 566 delete callerPattern;
559 utext_close(&patternText); 567 utext_close(&patternText);
560 } 568 }
561 569
562 570
563 571
564 //--------------------------------------------------------------------------- 572 //---------------------------------------------------------------------------
565 // 573 //
566 // Basic Check for basic functionality of regex pattern matching. 574 // Basic Check for basic functionality of regex pattern matching.
567 // Avoid the use of REGEX_FIND test macro, which has 575 // Avoid the use of REGEX_FIND test macro, which has
568 // substantial dependencies on basic Regex functionality. 576 // substantial dependencies on basic Regex functionality.
569 // 577 //
570 //--------------------------------------------------------------------------- 578 //---------------------------------------------------------------------------
571 void RegexTest::Basic() { 579 void RegexTest::Basic() {
572 580
573 581
574 // 582 //
575 // Debug - slide failing test cases early 583 // Debug - slide failing test cases early
576 // 584 //
577 #if 0 585 #if 0
578 { 586 {
579 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); 587 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE);
580 UParseError pe; 588 UParseError pe;
581 UErrorCode status = U_ZERO_ERROR; 589 UErrorCode status = U_ZERO_ERROR;
582 RegexPattern *pattern; 590 RegexPattern *pattern;
583 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc ape(), UREGEX_CASE_INSENSITIVE, pe, status); 591 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc ape(), UREGEX_CASE_INSENSITIVE, pe, status);
584 RegexPatternDump(pattern); 592 pattern->dumpPattern();
585 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz") .unescape(), status); 593 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz") .unescape(), status);
586 UBool result = m->find(); 594 UBool result = m->find();
587 printf("result = %d\n", result); 595 printf("result = %d\n", result);
588 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); 596 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd");
589 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); 597 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX====================");
590 } 598 }
591 exit(1); 599 exit(1);
592 #endif 600 #endif
593 601
594 602
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
722 // implementation. 730 // implementation.
723 // 731 //
724 //--------------------------------------------------------------------------- 732 //---------------------------------------------------------------------------
725 void RegexTest::UTextBasic() { 733 void RegexTest::UTextBasic() {
726 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 734 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
727 UErrorCode status = U_ZERO_ERROR; 735 UErrorCode status = U_ZERO_ERROR;
728 UText pattern = UTEXT_INITIALIZER; 736 UText pattern = UTEXT_INITIALIZER;
729 utext_openUTF8(&pattern, str_abc, -1, &status); 737 utext_openUTF8(&pattern, str_abc, -1, &status);
730 RegexMatcher matcher(&pattern, 0, status); 738 RegexMatcher matcher(&pattern, 0, status);
731 REGEX_CHECK_STATUS; 739 REGEX_CHECK_STATUS;
732 740
733 UText input = UTEXT_INITIALIZER; 741 UText input = UTEXT_INITIALIZER;
734 utext_openUTF8(&input, str_abc, -1, &status); 742 utext_openUTF8(&input, str_abc, -1, &status);
735 REGEX_CHECK_STATUS; 743 REGEX_CHECK_STATUS;
736 matcher.reset(&input); 744 matcher.reset(&input);
737 REGEX_CHECK_STATUS; 745 REGEX_CHECK_STATUS;
738 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 746 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
739 747
740 matcher.reset(matcher.inputText()); 748 matcher.reset(matcher.inputText());
741 REGEX_CHECK_STATUS; 749 REGEX_CHECK_STATUS;
742 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); 750 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText());
743 751
744 utext_close(&pattern); 752 utext_close(&pattern);
745 utext_close(&input); 753 utext_close(&input);
746 } 754 }
747 755
748 756
749 //--------------------------------------------------------------------------- 757 //---------------------------------------------------------------------------
750 // 758 //
751 // API_Match Test that the API for class RegexMatcher 759 // API_Match Test that the API for class RegexMatcher
752 // is present and nominally working, but excluding functions 760 // is present and nominally working, but excluding functions
753 // implementing replace operations. 761 // implementing replace operations.
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after
1110 UErrorCode status = U_ZERO_ERROR; 1118 UErrorCode status = U_ZERO_ERROR;
1111 RegexPattern *p = RegexPattern::compile(".", 0, status); 1119 RegexPattern *p = RegexPattern::compile(".", 0, status);
1112 RegexMatcher *m = p->matcher(status); 1120 RegexMatcher *m = p->matcher(status);
1113 REGEX_CHECK_STATUS; 1121 REGEX_CHECK_STATUS;
1114 1122
1115 REGEX_ASSERT(m->find() == FALSE); 1123 REGEX_ASSERT(m->find() == FALSE);
1116 REGEX_ASSERT(m->input() == ""); 1124 REGEX_ASSERT(m->input() == "");
1117 delete m; 1125 delete m;
1118 delete p; 1126 delete p;
1119 } 1127 }
1120 1128
1121 // 1129 //
1122 // Regions 1130 // Regions
1123 // 1131 //
1124 { 1132 {
1125 UErrorCode status = U_ZERO_ERROR; 1133 UErrorCode status = U_ZERO_ERROR;
1126 UnicodeString testString("This is test data"); 1134 UnicodeString testString("This is test data");
1127 RegexMatcher m(".*", testString, 0, status); 1135 RegexMatcher m(".*", testString, 0, status);
1128 REGEX_CHECK_STATUS; 1136 REGEX_CHECK_STATUS;
1129 REGEX_ASSERT(m.regionStart() == 0); 1137 REGEX_ASSERT(m.regionStart() == 0);
1130 REGEX_ASSERT(m.regionEnd() == testString.length()); 1138 REGEX_ASSERT(m.regionEnd() == testString.length());
1131 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1139 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1132 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1140 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1133 1141
1134 m.region(2,4, status); 1142 m.region(2,4, status);
1135 REGEX_CHECK_STATUS; 1143 REGEX_CHECK_STATUS;
1136 REGEX_ASSERT(m.matches(status)); 1144 REGEX_ASSERT(m.matches(status));
1137 REGEX_ASSERT(m.start(status)==2); 1145 REGEX_ASSERT(m.start(status)==2);
1138 REGEX_ASSERT(m.end(status)==4); 1146 REGEX_ASSERT(m.end(status)==4);
1139 REGEX_CHECK_STATUS; 1147 REGEX_CHECK_STATUS;
1140 1148
1141 m.reset(); 1149 m.reset();
1142 REGEX_ASSERT(m.regionStart() == 0); 1150 REGEX_ASSERT(m.regionStart() == 0);
1143 REGEX_ASSERT(m.regionEnd() == testString.length()); 1151 REGEX_ASSERT(m.regionEnd() == testString.length());
1144 1152
1145 UnicodeString shorterString("short"); 1153 UnicodeString shorterString("short");
1146 m.reset(shorterString); 1154 m.reset(shorterString);
1147 REGEX_ASSERT(m.regionStart() == 0); 1155 REGEX_ASSERT(m.regionStart() == 0);
1148 REGEX_ASSERT(m.regionEnd() == shorterString.length()); 1156 REGEX_ASSERT(m.regionEnd() == shorterString.length());
1149 1157
1150 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1158 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1151 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 1159 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
1152 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1160 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1153 REGEX_ASSERT(&m == &m.reset()); 1161 REGEX_ASSERT(&m == &m.reset());
1154 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 1162 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
1155 1163
1156 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 1164 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
1157 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1165 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1158 REGEX_ASSERT(&m == &m.reset()); 1166 REGEX_ASSERT(&m == &m.reset());
1159 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 1167 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
1160 1168
1161 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1169 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1162 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 1170 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
1163 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1171 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1164 REGEX_ASSERT(&m == &m.reset()); 1172 REGEX_ASSERT(&m == &m.reset());
1165 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 1173 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
1166 1174
1167 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 1175 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
1168 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1176 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1169 REGEX_ASSERT(&m == &m.reset()); 1177 REGEX_ASSERT(&m == &m.reset());
1170 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 1178 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
1171 1179
1172 } 1180 }
1173 1181
1174 // 1182 //
1175 // hitEnd() and requireEnd() 1183 // hitEnd() and requireEnd()
1176 // 1184 //
1177 { 1185 {
1178 UErrorCode status = U_ZERO_ERROR; 1186 UErrorCode status = U_ZERO_ERROR;
1179 UnicodeString testString("aabb"); 1187 UnicodeString testString("aabb");
1180 RegexMatcher m1(".*", testString, 0, status); 1188 RegexMatcher m1(".*", testString, 0, status);
1181 REGEX_ASSERT(m1.lookingAt(status) == TRUE); 1189 REGEX_ASSERT(m1.lookingAt(status) == TRUE);
1182 REGEX_ASSERT(m1.hitEnd() == TRUE); 1190 REGEX_ASSERT(m1.hitEnd() == TRUE);
1183 REGEX_ASSERT(m1.requireEnd() == FALSE); 1191 REGEX_ASSERT(m1.requireEnd() == FALSE);
1184 REGEX_CHECK_STATUS; 1192 REGEX_CHECK_STATUS;
1185 1193
1186 status = U_ZERO_ERROR; 1194 status = U_ZERO_ERROR;
1187 RegexMatcher m2("a*", testString, 0, status); 1195 RegexMatcher m2("a*", testString, 0, status);
1188 REGEX_ASSERT(m2.lookingAt(status) == TRUE); 1196 REGEX_ASSERT(m2.lookingAt(status) == TRUE);
1189 REGEX_ASSERT(m2.hitEnd() == FALSE); 1197 REGEX_ASSERT(m2.hitEnd() == FALSE);
1190 REGEX_ASSERT(m2.requireEnd() == FALSE); 1198 REGEX_ASSERT(m2.requireEnd() == FALSE);
1191 REGEX_CHECK_STATUS; 1199 REGEX_CHECK_STATUS;
1192 1200
1193 status = U_ZERO_ERROR; 1201 status = U_ZERO_ERROR;
1194 RegexMatcher m3(".*$", testString, 0, status); 1202 RegexMatcher m3(".*$", testString, 0, status);
1195 REGEX_ASSERT(m3.lookingAt(status) == TRUE); 1203 REGEX_ASSERT(m3.lookingAt(status) == TRUE);
(...skipping 17 matching lines...) Expand all
1213 m.reset(ucharString); // should not compile. 1221 m.reset(ucharString); // should not compile.
1214 1222
1215 RegexPattern *p = RegexPattern::compile(".", 0, status); 1223 RegexPattern *p = RegexPattern::compile(".", 0, status);
1216 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co mpile. 1224 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co mpile.
1217 1225
1218 RegexMatcher m3(".", ucharString, 0, status); // Should not compile 1226 RegexMatcher m3(".", ucharString, 0, status); // Should not compile
1219 } 1227 }
1220 #endif 1228 #endif
1221 1229
1222 // 1230 //
1223 // Time Outs. 1231 // Time Outs.
1224 // Note: These tests will need to be changed when the regexp engine i s 1232 // Note: These tests will need to be changed when the regexp engine i s
1225 // able to detect and cut short the exponential time behavior o n 1233 // able to detect and cut short the exponential time behavior o n
1226 // this type of match. 1234 // this type of match.
1227 // 1235 //
1228 { 1236 {
1229 UErrorCode status = U_ZERO_ERROR; 1237 UErrorCode status = U_ZERO_ERROR;
1230 // Enough 'a's in the string to cause the match to time out. 1238 // Enough 'a's in the string to cause the match to time out.
1231 // (Each on additonal 'a' doubles the time) 1239 // (Each on additonal 'a' doubles the time)
1232 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); 1240 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa");
1233 RegexMatcher matcher("(a+)+b", testString, 0, status); 1241 RegexMatcher matcher("(a+)+b", testString, 0, status);
1234 REGEX_CHECK_STATUS; 1242 REGEX_CHECK_STATUS;
1235 REGEX_ASSERT(matcher.getTimeLimit() == 0); 1243 REGEX_ASSERT(matcher.getTimeLimit() == 0);
1236 matcher.setTimeLimit(100, status); 1244 matcher.setTimeLimit(100, status);
1237 REGEX_ASSERT(matcher.getTimeLimit() == 100); 1245 REGEX_ASSERT(matcher.getTimeLimit() == 100);
1238 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1246 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1239 REGEX_ASSERT(status == U_REGEX_TIME_OUT); 1247 REGEX_ASSERT(status == U_REGEX_TIME_OUT);
1240 } 1248 }
1241 { 1249 {
1242 UErrorCode status = U_ZERO_ERROR; 1250 UErrorCode status = U_ZERO_ERROR;
1243 // Few enough 'a's to slip in under the time limit. 1251 // Few enough 'a's to slip in under the time limit.
1244 UnicodeString testString("aaaaaaaaaaaaaaaaaa"); 1252 UnicodeString testString("aaaaaaaaaaaaaaaaaa");
1245 RegexMatcher matcher("(a+)+b", testString, 0, status); 1253 RegexMatcher matcher("(a+)+b", testString, 0, status);
1246 REGEX_CHECK_STATUS; 1254 REGEX_CHECK_STATUS;
1247 matcher.setTimeLimit(100, status); 1255 matcher.setTimeLimit(100, status);
1248 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1256 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1249 REGEX_CHECK_STATUS; 1257 REGEX_CHECK_STATUS;
1250 } 1258 }
1251 1259
1252 // 1260 //
1253 // Stack Limits 1261 // Stack Limits
1254 // 1262 //
1255 { 1263 {
1256 UErrorCode status = U_ZERO_ERROR; 1264 UErrorCode status = U_ZERO_ERROR;
1257 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A' 1265 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000, filled with 'A'
1258 1266
1259 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt imizations 1267 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt imizations
1260 // of the '+', and makes the stack frames larger. 1268 // of the '+', and makes the stack frames larger.
1261 RegexMatcher matcher("(A)+A$", testString, 0, status); 1269 RegexMatcher matcher("(A)+A$", testString, 0, status);
1262 1270
1263 // With the default stack, this match should fail to run 1271 // With the default stack, this match should fail to run
1264 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1272 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1265 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1273 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1266 1274
1267 // With unlimited stack, it should run 1275 // With unlimited stack, it should run
1268 status = U_ZERO_ERROR; 1276 status = U_ZERO_ERROR;
1269 matcher.setStackLimit(0, status); 1277 matcher.setStackLimit(0, status);
1270 REGEX_CHECK_STATUS; 1278 REGEX_CHECK_STATUS;
1271 REGEX_ASSERT(matcher.lookingAt(status) == TRUE); 1279 REGEX_ASSERT(matcher.lookingAt(status) == TRUE);
1272 REGEX_CHECK_STATUS; 1280 REGEX_CHECK_STATUS;
1273 REGEX_ASSERT(matcher.getStackLimit() == 0); 1281 REGEX_ASSERT(matcher.getStackLimit() == 0);
1274 1282
1275 // With a limited stack, it the match should fail 1283 // With a limited stack, it the match should fail
1276 status = U_ZERO_ERROR; 1284 status = U_ZERO_ERROR;
1277 matcher.setStackLimit(10000, status); 1285 matcher.setStackLimit(10000, status);
1278 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); 1286 REGEX_ASSERT(matcher.lookingAt(status) == FALSE);
1279 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); 1287 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW);
1280 REGEX_ASSERT(matcher.getStackLimit() == 10000); 1288 REGEX_ASSERT(matcher.getStackLimit() == 10000);
1281 } 1289 }
1282 1290
1283 // A pattern that doesn't save state should work with 1291 // A pattern that doesn't save state should work with
1284 // a minimal sized stack 1292 // a minimal sized stack
1285 { 1293 {
1286 UErrorCode status = U_ZERO_ERROR; 1294 UErrorCode status = U_ZERO_ERROR;
1287 UnicodeString testString = "abc"; 1295 UnicodeString testString = "abc";
1288 RegexMatcher matcher("abc", testString, 0, status); 1296 RegexMatcher matcher("abc", testString, 0, status);
1289 REGEX_CHECK_STATUS; 1297 REGEX_CHECK_STATUS;
1290 matcher.setStackLimit(30, status); 1298 matcher.setStackLimit(30, status);
1291 REGEX_CHECK_STATUS; 1299 REGEX_CHECK_STATUS;
1292 REGEX_ASSERT(matcher.matches(status) == TRUE); 1300 REGEX_ASSERT(matcher.matches(status) == TRUE);
1293 REGEX_CHECK_STATUS; 1301 REGEX_CHECK_STATUS;
1294 REGEX_ASSERT(matcher.getStackLimit() == 30); 1302 REGEX_ASSERT(matcher.getStackLimit() == 30);
1295 1303
1296 // Negative stack sizes should fail 1304 // Negative stack sizes should fail
1297 status = U_ZERO_ERROR; 1305 status = U_ZERO_ERROR;
1298 matcher.setStackLimit(1000, status); 1306 matcher.setStackLimit(1000, status);
1299 REGEX_CHECK_STATUS; 1307 REGEX_CHECK_STATUS;
1300 matcher.setStackLimit(-1, status); 1308 matcher.setStackLimit(-1, status);
1301 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 1309 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1302 REGEX_ASSERT(matcher.getStackLimit() == 1000); 1310 REGEX_ASSERT(matcher.getStackLimit() == 1000);
1303 } 1311 }
1304 1312
1305 1313
1306 } 1314 }
1307 1315
1308 1316
1309 1317
1310 1318
1311 1319
1312 1320
1313 //--------------------------------------------------------------------------- 1321 //---------------------------------------------------------------------------
1314 // 1322 //
(...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after
1843 REGEX_CHECK_STATUS; 1851 REGEX_CHECK_STATUS;
1844 1852
1845 UText input1 = UTEXT_INITIALIZER; 1853 UText input1 = UTEXT_INITIALIZER;
1846 UText input2 = UTEXT_INITIALIZER; 1854 UText input2 = UTEXT_INITIALIZER;
1847 UText empty = UTEXT_INITIALIZER; 1855 UText empty = UTEXT_INITIALIZER;
1848 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st atus); 1856 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st atus);
1849 REGEX_VERBOSE_TEXT(&input1); 1857 REGEX_VERBOSE_TEXT(&input1);
1850 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); 1858 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status);
1851 REGEX_VERBOSE_TEXT(&input2); 1859 REGEX_VERBOSE_TEXT(&input2);
1852 utext_openUChars(&empty, NULL, 0, &status); 1860 utext_openUChars(&empty, NULL, 0, &status);
1853 1861
1854 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na tivelen (input1) ? */ 1862 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na tivelen (input1) ? */
1855 int32_t input2Len = strlen("not abc"); 1863 int32_t input2Len = strlen("not abc");
1856 1864
1857 1865
1858 // 1866 //
1859 // Matcher creation and reset. 1867 // Matcher creation and reset.
1860 // 1868 //
1861 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); 1869 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1);
1862 REGEX_CHECK_STATUS; 1870 REGEX_CHECK_STATUS;
1863 REGEX_ASSERT(m1->lookingAt(status) == TRUE); 1871 REGEX_ASSERT(m1->lookingAt(status) == TRUE);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1953 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); 1961 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE);
1954 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1962 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1955 status = U_ZERO_ERROR; 1963 status = U_ZERO_ERROR;
1956 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); 1964 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE);
1957 REGEX_CHECK_STATUS; 1965 REGEX_CHECK_STATUS;
1958 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); 1966 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE);
1959 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 1967 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1960 1968
1961 delete m1; 1969 delete m1;
1962 delete pat2; 1970 delete pat2;
1963 1971
1964 utext_close(&re); 1972 utext_close(&re);
1965 utext_close(&input1); 1973 utext_close(&input1);
1966 utext_close(&input2); 1974 utext_close(&input2);
1967 utext_close(&empty); 1975 utext_close(&empty);
1968 } 1976 }
1969 1977
1970 1978
1971 // 1979 //
1972 // Capture Group. 1980 // Capture Group.
1973 // RegexMatcher::start(); 1981 // RegexMatcher::start();
1974 // RegexMatcher::end(); 1982 // RegexMatcher::end();
1975 // RegexMatcher::groupCount(); 1983 // RegexMatcher::groupCount();
1976 // 1984 //
1977 { 1985 {
1978 int32_t flags=0; 1986 int32_t flags=0;
1979 UParseError pe; 1987 UParseError pe;
1980 UErrorCode status=U_ZERO_ERROR; 1988 UErrorCode status=U_ZERO_ERROR;
1981 UText re=UTEXT_INITIALIZER; 1989 UText re=UTEXT_INITIALIZER;
1982 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x 34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67 )(.*) */ 1990 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x 34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67 )(.*) */
1983 utext_openUTF8(&re, str_01234567_pat, -1, &status); 1991 utext_openUTF8(&re, str_01234567_pat, -1, &status);
1984 1992
1985 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 1993 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
1986 REGEX_CHECK_STATUS; 1994 REGEX_CHECK_STATUS;
1987 1995
1988 UText input = UTEXT_INITIALIZER; 1996 UText input = UTEXT_INITIALIZER;
1989 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36 , 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 1997 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36 , 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
1990 utext_openUTF8(&input, str_0123456789, -1, &status); 1998 utext_openUTF8(&input, str_0123456789, -1, &status);
1991 1999
1992 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 2000 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
1993 REGEX_CHECK_STATUS; 2001 REGEX_CHECK_STATUS;
1994 REGEX_ASSERT(matcher->lookingAt(status) == TRUE); 2002 REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
1995 static const int32_t matchStarts[] = {0, 2, 4, 8}; 2003 static const int32_t matchStarts[] = {0, 2, 4, 8};
1996 static const int32_t matchEnds[] = {10, 8, 6, 10}; 2004 static const int32_t matchEnds[] = {10, 8, 6, 10};
1997 int32_t i; 2005 int32_t i;
(...skipping 14 matching lines...) Expand all
2012 2020
2013 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); 2021 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status));
2014 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); 2022 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status));
2015 2023
2016 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ; 2024 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;
2017 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ; 2025 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;
2018 matcher->reset(); 2026 matcher->reset();
2019 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); 2027 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE);
2020 2028
2021 matcher->lookingAt(status); 2029 matcher->lookingAt(status);
2022 2030
2023 UnicodeString dest; 2031 UnicodeString dest;
2024 UText destText = UTEXT_INITIALIZER; 2032 UText destText = UTEXT_INITIALIZER;
2025 utext_openUnicodeString(&destText, &dest, &status); 2033 utext_openUnicodeString(&destText, &dest, &status);
2026 UText *result; 2034 UText *result;
2027 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x 36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ 2035 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x 36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */
2028 //» Test shallow-clone API 2036 // Test shallow-clone API
2029 int64_t group_len; 2037 int64_t group_len;
2030 result = matcher->group((UText *)NULL, group_len, status); 2038 result = matcher->group((UText *)NULL, group_len, status);
2031 REGEX_CHECK_STATUS; 2039 REGEX_CHECK_STATUS;
2032 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 2040 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
2033 utext_close(result); 2041 utext_close(result);
2034 result = matcher->group(0, &destText, group_len, status); 2042 result = matcher->group(0, &destText, group_len, status);
2035 REGEX_CHECK_STATUS; 2043 REGEX_CHECK_STATUS;
2036 REGEX_ASSERT(result == &destText); 2044 REGEX_ASSERT(result == &destText);
2037 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 2045 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
2038 // destText is now immutable, reopen it 2046 // destText is now immutable, reopen it
2039 utext_close(&destText); 2047 utext_close(&destText);
2040 utext_openUnicodeString(&destText, &dest, &status); 2048 utext_openUnicodeString(&destText, &dest, &status);
2041 2049
2042 result = matcher->group(0, NULL, status); 2050 result = matcher->group(0, NULL, status);
2043 REGEX_CHECK_STATUS; 2051 REGEX_CHECK_STATUS;
2044 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 2052 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
2045 utext_close(result); 2053 utext_close(result);
2046 result = matcher->group(0, &destText, status); 2054 result = matcher->group(0, &destText, status);
2047 REGEX_CHECK_STATUS; 2055 REGEX_CHECK_STATUS;
2048 REGEX_ASSERT(result == &destText); 2056 REGEX_ASSERT(result == &destText);
2049 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); 2057 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result);
2050 2058
2051 result = matcher->group(1, NULL, status); 2059 result = matcher->group(1, NULL, status);
2052 REGEX_CHECK_STATUS; 2060 REGEX_CHECK_STATUS;
2053 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */ 2061 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 }; /* 234567 */
2054 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 2062 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
2055 utext_close(result); 2063 utext_close(result);
2056 result = matcher->group(1, &destText, status); 2064 result = matcher->group(1, &destText, status);
2057 REGEX_CHECK_STATUS; 2065 REGEX_CHECK_STATUS;
2058 REGEX_ASSERT(result == &destText); 2066 REGEX_ASSERT(result == &destText);
2059 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); 2067 REGEX_ASSERT_UTEXT_UTF8(str_234567, result);
2060 2068
2061 result = matcher->group(2, NULL, status); 2069 result = matcher->group(2, NULL, status);
2062 REGEX_CHECK_STATUS; 2070 REGEX_CHECK_STATUS;
2063 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ 2071 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */
2064 REGEX_ASSERT_UTEXT_UTF8(str_45, result); 2072 REGEX_ASSERT_UTEXT_UTF8(str_45, result);
2065 utext_close(result); 2073 utext_close(result);
2066 result = matcher->group(2, &destText, status); 2074 result = matcher->group(2, &destText, status);
2067 REGEX_CHECK_STATUS; 2075 REGEX_CHECK_STATUS;
2068 REGEX_ASSERT(result == &destText); 2076 REGEX_ASSERT(result == &destText);
2069 REGEX_ASSERT_UTEXT_UTF8(str_45, result); 2077 REGEX_ASSERT_UTEXT_UTF8(str_45, result);
2070 2078
2071 result = matcher->group(3, NULL, status); 2079 result = matcher->group(3, NULL, status);
2072 REGEX_CHECK_STATUS; 2080 REGEX_CHECK_STATUS;
2073 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ 2081 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */
2074 REGEX_ASSERT_UTEXT_UTF8(str_89, result); 2082 REGEX_ASSERT_UTEXT_UTF8(str_89, result);
2075 utext_close(result); 2083 utext_close(result);
2076 result = matcher->group(3, &destText, status); 2084 result = matcher->group(3, &destText, status);
2077 REGEX_CHECK_STATUS; 2085 REGEX_CHECK_STATUS;
2078 REGEX_ASSERT(result == &destText); 2086 REGEX_ASSERT(result == &destText);
2079 REGEX_ASSERT_UTEXT_UTF8(str_89, result); 2087 REGEX_ASSERT_UTEXT_UTF8(str_89, result);
2080 2088
2081 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ; 2089 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR) ;
2082 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ; 2090 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR) ;
2083 matcher->reset(); 2091 matcher->reset();
2084 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); 2092 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE);
2085 2093
2086 delete matcher; 2094 delete matcher;
2087 delete pat; 2095 delete pat;
2088 2096
2089 utext_close(&destText); 2097 utext_close(&destText);
2090 utext_close(&input); 2098 utext_close(&input);
2091 utext_close(&re); 2099 utext_close(&re);
2092 } 2100 }
2093 2101
2094 // 2102 //
2095 // find 2103 // find
2096 // 2104 //
2097 { 2105 {
2098 int32_t flags=0; 2106 int32_t flags=0;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
2139 2147
2140 status = U_ZERO_ERROR; 2148 status = U_ZERO_ERROR;
2141 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); 2149 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR);
2142 status = U_ZERO_ERROR; 2150 status = U_ZERO_ERROR;
2143 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); 2151 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR);
2144 2152
2145 REGEX_ASSERT(matcher->groupCount() == 0); 2153 REGEX_ASSERT(matcher->groupCount() == 0);
2146 2154
2147 delete matcher; 2155 delete matcher;
2148 delete pat; 2156 delete pat;
2149 2157
2150 utext_close(&input); 2158 utext_close(&input);
2151 utext_close(&re); 2159 utext_close(&re);
2152 } 2160 }
2153 2161
2154 2162
2155 // 2163 //
2156 // find, with \G in pattern (true if at the end of a previous match). 2164 // find, with \G in pattern (true if at the end of a previous match).
2157 // 2165 //
2158 { 2166 {
2159 int32_t flags=0; 2167 int32_t flags=0;
2160 UParseError pe; 2168 UParseError pe;
2161 UErrorCode status=U_ZERO_ERROR; 2169 UErrorCode status=U_ZERO_ERROR;
2162 UText re=UTEXT_INITIALIZER; 2170 UText re=UTEXT_INITIALIZER;
2163 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0 x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0 0 }; /* .*?(?:(\\Gabc)|(abc)) */ 2171 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0 x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0 0 }; /* .*?(?:(\\Gabc)|(abc)) */
2164 utext_openUTF8(&re, str_Gabcabc, -1, &status); 2172 utext_openUTF8(&re, str_Gabcabc, -1, &status);
2165 2173
2166 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 2174 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
2167 2175
2168 REGEX_CHECK_STATUS; 2176 REGEX_CHECK_STATUS;
2169 UText input = UTEXT_INITIALIZER; 2177 UText input = UTEXT_INITIALIZER;
2170 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ 2178 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */
2171 utext_openUTF8(&input, str_abcabcabc, -1, &status); 2179 utext_openUTF8(&input, str_abcabcabc, -1, &status);
2172 // 012345678901234567 2180 // 012345678901234567
2173 2181
2174 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); 2182 RegexMatcher *matcher = &pat->matcher(status)->reset(&input);
2175 REGEX_CHECK_STATUS; 2183 REGEX_CHECK_STATUS;
2176 REGEX_ASSERT(matcher->find()); 2184 REGEX_ASSERT(matcher->find());
2177 REGEX_ASSERT(matcher->start(status) == 0); 2185 REGEX_ASSERT(matcher->start(status) == 0);
2178 REGEX_ASSERT(matcher->start(1, status) == -1); 2186 REGEX_ASSERT(matcher->start(1, status) == -1);
2179 REGEX_ASSERT(matcher->start(2, status) == 1); 2187 REGEX_ASSERT(matcher->start(2, status) == 1);
2180 2188
2181 REGEX_ASSERT(matcher->find()); 2189 REGEX_ASSERT(matcher->find());
2182 REGEX_ASSERT(matcher->start(status) == 4); 2190 REGEX_ASSERT(matcher->start(status) == 4);
2183 REGEX_ASSERT(matcher->start(1, status) == 4); 2191 REGEX_ASSERT(matcher->start(1, status) == 4);
2184 REGEX_ASSERT(matcher->start(2, status) == -1); 2192 REGEX_ASSERT(matcher->start(2, status) == -1);
2185 REGEX_CHECK_STATUS; 2193 REGEX_CHECK_STATUS;
2186 2194
2187 delete matcher; 2195 delete matcher;
2188 delete pat; 2196 delete pat;
2189 2197
2190 utext_close(&input); 2198 utext_close(&input);
2191 utext_close(&re); 2199 utext_close(&re);
2192 } 2200 }
2193 2201
2194 // 2202 //
2195 // find with zero length matches, match position should bump ahead 2203 // find with zero length matches, match position should bump ahead
2196 // to prevent loops. 2204 // to prevent loops.
2197 // 2205 //
2198 { 2206 {
2199 int32_t i; 2207 int32_t i;
(...skipping 19 matching lines...) Expand all
2219 utext_openUTF8(&s, (char *)aboveBMP, -1, &status); 2227 utext_openUTF8(&s, (char *)aboveBMP, -1, &status);
2220 m.reset(&s); 2228 m.reset(&s);
2221 for (i=0; ; i+=4) { 2229 for (i=0; ; i+=4) {
2222 if (m.find() == FALSE) { 2230 if (m.find() == FALSE) {
2223 break; 2231 break;
2224 } 2232 }
2225 REGEX_ASSERT(m.start(status) == i); 2233 REGEX_ASSERT(m.start(status) == i);
2226 REGEX_ASSERT(m.end(status) == i); 2234 REGEX_ASSERT(m.end(status) == i);
2227 } 2235 }
2228 REGEX_ASSERT(i==20); 2236 REGEX_ASSERT(i==20);
2229 2237
2230 utext_close(&s); 2238 utext_close(&s);
2231 } 2239 }
2232 { 2240 {
2233 // find() loop breaking test. 2241 // find() loop breaking test.
2234 // with pattern of /.?/, should see a series of one char matches, then a single 2242 // with pattern of /.?/, should see a series of one char matches, then a single
2235 // match of zero length at the end of the input string. 2243 // match of zero length at the end of the input string.
2236 int32_t i; 2244 int32_t i;
2237 UErrorCode status=U_ZERO_ERROR; 2245 UErrorCode status=U_ZERO_ERROR;
2238 RegexMatcher m(".?", 0, status); 2246 RegexMatcher m(".?", 0, status);
2239 REGEX_CHECK_STATUS; 2247 REGEX_CHECK_STATUS;
2240 UText s = UTEXT_INITIALIZER; 2248 UText s = UTEXT_INITIALIZER;
2241 utext_openUTF8(&s, " ", -1, &status); 2249 utext_openUTF8(&s, " ", -1, &status);
2242 m.reset(&s); 2250 m.reset(&s);
2243 for (i=0; ; i++) { 2251 for (i=0; ; i++) {
2244 if (m.find() == FALSE) { 2252 if (m.find() == FALSE) {
2245 break; 2253 break;
2246 } 2254 }
2247 REGEX_ASSERT(m.start(status) == i); 2255 REGEX_ASSERT(m.start(status) == i);
2248 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); 2256 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i));
2249 } 2257 }
2250 REGEX_ASSERT(i==5); 2258 REGEX_ASSERT(i==5);
2251 2259
2252 utext_close(&s); 2260 utext_close(&s);
2253 } 2261 }
2254 2262
2255 2263
2256 // 2264 //
2257 // Matchers with no input string behave as if they had an empty input string . 2265 // Matchers with no input string behave as if they had an empty input string .
2258 // 2266 //
2259 2267
2260 { 2268 {
2261 UErrorCode status = U_ZERO_ERROR; 2269 UErrorCode status = U_ZERO_ERROR;
2262 RegexMatcher m(".?", 0, status); 2270 RegexMatcher m(".?", 0, status);
2263 REGEX_CHECK_STATUS; 2271 REGEX_CHECK_STATUS;
2264 REGEX_ASSERT(m.find()); 2272 REGEX_ASSERT(m.find());
2265 REGEX_ASSERT(m.start(status) == 0); 2273 REGEX_ASSERT(m.start(status) == 0);
2266 REGEX_ASSERT(m.input() == ""); 2274 REGEX_ASSERT(m.input() == "");
2267 } 2275 }
2268 { 2276 {
2269 UErrorCode status = U_ZERO_ERROR; 2277 UErrorCode status = U_ZERO_ERROR;
2270 RegexPattern *p = RegexPattern::compile(".", 0, status); 2278 RegexPattern *p = RegexPattern::compile(".", 0, status);
2271 RegexMatcher *m = p->matcher(status); 2279 RegexMatcher *m = p->matcher(status);
2272 REGEX_CHECK_STATUS; 2280 REGEX_CHECK_STATUS;
2273 2281
2274 REGEX_ASSERT(m->find() == FALSE); 2282 REGEX_ASSERT(m->find() == FALSE);
2275 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); 2283 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0);
2276 delete m; 2284 delete m;
2277 delete p; 2285 delete p;
2278 } 2286 }
2279 2287
2280 // 2288 //
2281 // Regions 2289 // Regions
2282 // 2290 //
2283 { 2291 {
2284 UErrorCode status = U_ZERO_ERROR; 2292 UErrorCode status = U_ZERO_ERROR;
2285 UText testPattern = UTEXT_INITIALIZER; 2293 UText testPattern = UTEXT_INITIALIZER;
2286 UText testText = UTEXT_INITIALIZER; 2294 UText testText = UTEXT_INITIALIZER;
2287 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); 2295 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status);
2288 REGEX_VERBOSE_TEXT(&testPattern); 2296 REGEX_VERBOSE_TEXT(&testPattern);
2289 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat us); 2297 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat us);
2290 REGEX_VERBOSE_TEXT(&testText); 2298 REGEX_VERBOSE_TEXT(&testText);
2291 2299
2292 RegexMatcher m(&testPattern, &testText, 0, status); 2300 RegexMatcher m(&testPattern, &testText, 0, status);
2293 REGEX_CHECK_STATUS; 2301 REGEX_CHECK_STATUS;
2294 REGEX_ASSERT(m.regionStart() == 0); 2302 REGEX_ASSERT(m.regionStart() == 0);
2295 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 2303 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
2296 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 2304 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
2297 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 2305 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
2298 2306
2299 m.region(2,4, status); 2307 m.region(2,4, status);
2300 REGEX_CHECK_STATUS; 2308 REGEX_CHECK_STATUS;
2301 REGEX_ASSERT(m.matches(status)); 2309 REGEX_ASSERT(m.matches(status));
2302 REGEX_ASSERT(m.start(status)==2); 2310 REGEX_ASSERT(m.start(status)==2);
2303 REGEX_ASSERT(m.end(status)==4); 2311 REGEX_ASSERT(m.end(status)==4);
2304 REGEX_CHECK_STATUS; 2312 REGEX_CHECK_STATUS;
2305 2313
2306 m.reset(); 2314 m.reset();
2307 REGEX_ASSERT(m.regionStart() == 0); 2315 REGEX_ASSERT(m.regionStart() == 0);
2308 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); 2316 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data"));
2309 2317
2310 regextst_openUTF8FromInvariant(&testText, "short", -1, &status); 2318 regextst_openUTF8FromInvariant(&testText, "short", -1, &status);
2311 REGEX_VERBOSE_TEXT(&testText); 2319 REGEX_VERBOSE_TEXT(&testText);
2312 m.reset(&testText); 2320 m.reset(&testText);
2313 REGEX_ASSERT(m.regionStart() == 0); 2321 REGEX_ASSERT(m.regionStart() == 0);
2314 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); 2322 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short"));
2315 2323
2316 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 2324 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
2317 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); 2325 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE));
2318 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 2326 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
2319 REGEX_ASSERT(&m == &m.reset()); 2327 REGEX_ASSERT(&m == &m.reset());
2320 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); 2328 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE);
2321 2329
2322 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); 2330 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE));
2323 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 2331 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
2324 REGEX_ASSERT(&m == &m.reset()); 2332 REGEX_ASSERT(&m == &m.reset());
2325 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); 2333 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE);
2326 2334
2327 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 2335 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
2328 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); 2336 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE));
2329 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 2337 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
2330 REGEX_ASSERT(&m == &m.reset()); 2338 REGEX_ASSERT(&m == &m.reset());
2331 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); 2339 REGEX_ASSERT(m.hasTransparentBounds() == TRUE);
2332 2340
2333 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); 2341 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE));
2334 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 2342 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
2335 REGEX_ASSERT(&m == &m.reset()); 2343 REGEX_ASSERT(&m == &m.reset());
2336 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); 2344 REGEX_ASSERT(m.hasTransparentBounds() == FALSE);
2337 2345
2338 utext_close(&testText); 2346 utext_close(&testText);
2339 utext_close(&testPattern); 2347 utext_close(&testPattern);
2340 } 2348 }
2341 2349
2342 // 2350 //
2343 // hitEnd() and requireEnd() 2351 // hitEnd() and requireEnd()
2344 // 2352 //
2345 { 2353 {
2346 UErrorCode status = U_ZERO_ERROR; 2354 UErrorCode status = U_ZERO_ERROR;
2347 UText testPattern = UTEXT_INITIALIZER; 2355 UText testPattern = UTEXT_INITIALIZER;
2348 UText testText = UTEXT_INITIALIZER; 2356 UText testText = UTEXT_INITIALIZER;
2349 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 2357 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */
2350 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ 2358 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */
2351 utext_openUTF8(&testPattern, str_, -1, &status); 2359 utext_openUTF8(&testPattern, str_, -1, &status);
2352 utext_openUTF8(&testText, str_aabb, -1, &status); 2360 utext_openUTF8(&testText, str_aabb, -1, &status);
2353 2361
2354 RegexMatcher m1(&testPattern, &testText, 0, status); 2362 RegexMatcher m1(&testPattern, &testText, 0, status);
2355 REGEX_ASSERT(m1.lookingAt(status) == TRUE); 2363 REGEX_ASSERT(m1.lookingAt(status) == TRUE);
2356 REGEX_ASSERT(m1.hitEnd() == TRUE); 2364 REGEX_ASSERT(m1.hitEnd() == TRUE);
2357 REGEX_ASSERT(m1.requireEnd() == FALSE); 2365 REGEX_ASSERT(m1.requireEnd() == FALSE);
2358 REGEX_CHECK_STATUS; 2366 REGEX_CHECK_STATUS;
2359 2367
2360 status = U_ZERO_ERROR; 2368 status = U_ZERO_ERROR;
2361 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ 2369 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */
2362 utext_openUTF8(&testPattern, str_a, -1, &status); 2370 utext_openUTF8(&testPattern, str_a, -1, &status);
2363 RegexMatcher m2(&testPattern, &testText, 0, status); 2371 RegexMatcher m2(&testPattern, &testText, 0, status);
2364 REGEX_ASSERT(m2.lookingAt(status) == TRUE); 2372 REGEX_ASSERT(m2.lookingAt(status) == TRUE);
2365 REGEX_ASSERT(m2.hitEnd() == FALSE); 2373 REGEX_ASSERT(m2.hitEnd() == FALSE);
2366 REGEX_ASSERT(m2.requireEnd() == FALSE); 2374 REGEX_ASSERT(m2.requireEnd() == FALSE);
2367 REGEX_CHECK_STATUS; 2375 REGEX_CHECK_STATUS;
2368 2376
2369 status = U_ZERO_ERROR; 2377 status = U_ZERO_ERROR;
2370 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ 2378 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */
2371 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); 2379 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status);
2372 RegexMatcher m3(&testPattern, &testText, 0, status); 2380 RegexMatcher m3(&testPattern, &testText, 0, status);
2373 REGEX_ASSERT(m3.lookingAt(status) == TRUE); 2381 REGEX_ASSERT(m3.lookingAt(status) == TRUE);
2374 REGEX_ASSERT(m3.hitEnd() == TRUE); 2382 REGEX_ASSERT(m3.hitEnd() == TRUE);
2375 REGEX_ASSERT(m3.requireEnd() == TRUE); 2383 REGEX_ASSERT(m3.requireEnd() == TRUE);
2376 REGEX_CHECK_STATUS; 2384 REGEX_CHECK_STATUS;
2377 2385
2378 utext_close(&testText); 2386 utext_close(&testText);
2379 utext_close(&testPattern); 2387 utext_close(&testPattern);
2380 } 2388 }
2381 } 2389 }
2382 2390
2383 2391
2384 //--------------------------------------------------------------------------- 2392 //---------------------------------------------------------------------------
2385 // 2393 //
2386 // API_Replace_UTF8 API test for class RegexMatcher, testing the 2394 // API_Replace_UTF8 API test for class RegexMatcher, testing the
2387 // Replace family of functions. 2395 // Replace family of functions.
2388 // 2396 //
2389 //--------------------------------------------------------------------------- 2397 //---------------------------------------------------------------------------
2390 void RegexTest::API_Replace_UTF8() { 2398 void RegexTest::API_Replace_UTF8() {
2391 // 2399 //
2392 // Replace 2400 // Replace
2393 // 2401 //
2394 int32_t flags=0; 2402 int32_t flags=0;
2395 UParseError pe; 2403 UParseError pe;
2396 UErrorCode status=U_ZERO_ERROR; 2404 UErrorCode status=U_ZERO_ERROR;
2397 2405
2398 UText re=UTEXT_INITIALIZER; 2406 UText re=UTEXT_INITIALIZER;
2399 regextst_openUTF8FromInvariant(&re, "abc", -1, &status); 2407 regextst_openUTF8FromInvariant(&re, "abc", -1, &status);
2400 REGEX_VERBOSE_TEXT(&re); 2408 REGEX_VERBOSE_TEXT(&re);
2401 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); 2409 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status);
2402 REGEX_CHECK_STATUS; 2410 REGEX_CHECK_STATUS;
2403 2411
2404 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ 2412 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */
2405 // 012345678901234567 2413 // 012345678901234567
2406 UText dataText = UTEXT_INITIALIZER; 2414 UText dataText = UTEXT_INITIALIZER;
2407 utext_openUTF8(&dataText, data, -1, &status); 2415 utext_openUTF8(&dataText, data, -1, &status);
2408 REGEX_CHECK_STATUS; 2416 REGEX_CHECK_STATUS;
2409 REGEX_VERBOSE_TEXT(&dataText); 2417 REGEX_VERBOSE_TEXT(&dataText);
2410 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); 2418 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText);
2411 2419
2412 // 2420 //
2413 // Plain vanilla matches. 2421 // Plain vanilla matches.
2414 // 2422 //
2415 UnicodeString dest; 2423 UnicodeString dest;
2416 UText destText = UTEXT_INITIALIZER; 2424 UText destText = UTEXT_INITIALIZER;
2417 utext_openUnicodeString(&destText, &dest, &status); 2425 utext_openUnicodeString(&destText, &dest, &status);
2418 UText *result; 2426 UText *result;
2419 2427
2420 UText replText = UTEXT_INITIALIZER; 2428 UText replText = UTEXT_INITIALIZER;
2421 2429
2422 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ 2430 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */
2423 utext_openUTF8(&replText, str_yz, -1, &status); 2431 utext_openUTF8(&replText, str_yz, -1, &status);
2424 REGEX_VERBOSE_TEXT(&replText); 2432 REGEX_VERBOSE_TEXT(&replText);
2425 result = matcher->replaceFirst(&replText, NULL, status); 2433 result = matcher->replaceFirst(&replText, NULL, status);
2426 REGEX_CHECK_STATUS; 2434 REGEX_CHECK_STATUS;
2427 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63 , 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ 2435 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63 , 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */
2428 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); 2436 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result);
2429 utext_close(result); 2437 utext_close(result);
2430 result = matcher->replaceFirst(&replText, &destText, status); 2438 result = matcher->replaceFirst(&replText, &destText, status);
2431 REGEX_CHECK_STATUS; 2439 REGEX_CHECK_STATUS;
(...skipping 11 matching lines...) Expand all
2443 REGEX_CHECK_STATUS; 2451 REGEX_CHECK_STATUS;
2444 REGEX_ASSERT(result == &destText); 2452 REGEX_ASSERT(result == &destText);
2445 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); 2453 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result);
2446 2454
2447 // 2455 //
2448 // Plain vanilla non-matches. 2456 // Plain vanilla non-matches.
2449 // 2457 //
2450 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6 2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx... abx.. */ 2458 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6 2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx... abx.. */
2451 utext_openUTF8(&dataText, str_abxabxabx, -1, &status); 2459 utext_openUTF8(&dataText, str_abxabxabx, -1, &status);
2452 matcher->reset(&dataText); 2460 matcher->reset(&dataText);
2453 2461
2454 result = matcher->replaceFirst(&replText, NULL, status); 2462 result = matcher->replaceFirst(&replText, NULL, status);
2455 REGEX_CHECK_STATUS; 2463 REGEX_CHECK_STATUS;
2456 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 2464 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
2457 utext_close(result); 2465 utext_close(result);
2458 result = matcher->replaceFirst(&replText, &destText, status); 2466 result = matcher->replaceFirst(&replText, &destText, status);
2459 REGEX_CHECK_STATUS; 2467 REGEX_CHECK_STATUS;
2460 REGEX_ASSERT(result == &destText); 2468 REGEX_ASSERT(result == &destText);
2461 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 2469 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
2462 2470
2463 result = matcher->replaceAll(&replText, NULL, status); 2471 result = matcher->replaceAll(&replText, NULL, status);
2464 REGEX_CHECK_STATUS; 2472 REGEX_CHECK_STATUS;
2465 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 2473 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
2466 utext_close(result); 2474 utext_close(result);
2467 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2475 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2468 result = matcher->replaceAll(&replText, &destText, status); 2476 result = matcher->replaceAll(&replText, &destText, status);
2469 REGEX_CHECK_STATUS; 2477 REGEX_CHECK_STATUS;
2470 REGEX_ASSERT(result == &destText); 2478 REGEX_ASSERT(result == &destText);
2471 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); 2479 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result);
2472 2480
2473 // 2481 //
2474 // Empty source string 2482 // Empty source string
2475 // 2483 //
2476 utext_openUTF8(&dataText, NULL, 0, &status); 2484 utext_openUTF8(&dataText, NULL, 0, &status);
2477 matcher->reset(&dataText); 2485 matcher->reset(&dataText);
2478 2486
2479 result = matcher->replaceFirst(&replText, NULL, status); 2487 result = matcher->replaceFirst(&replText, NULL, status);
2480 REGEX_CHECK_STATUS; 2488 REGEX_CHECK_STATUS;
2481 REGEX_ASSERT_UTEXT_UTF8("", result); 2489 REGEX_ASSERT_UTEXT_UTF8("", result);
2482 utext_close(result); 2490 utext_close(result);
2483 result = matcher->replaceFirst(&replText, &destText, status); 2491 result = matcher->replaceFirst(&replText, &destText, status);
2484 REGEX_CHECK_STATUS; 2492 REGEX_CHECK_STATUS;
2485 REGEX_ASSERT(result == &destText); 2493 REGEX_ASSERT(result == &destText);
2486 REGEX_ASSERT_UTEXT_UTF8("", result); 2494 REGEX_ASSERT_UTEXT_UTF8("", result);
2487 2495
2488 result = matcher->replaceAll(&replText, NULL, status); 2496 result = matcher->replaceAll(&replText, NULL, status);
2489 REGEX_CHECK_STATUS; 2497 REGEX_CHECK_STATUS;
2490 REGEX_ASSERT_UTEXT_UTF8("", result); 2498 REGEX_ASSERT_UTEXT_UTF8("", result);
2491 utext_close(result); 2499 utext_close(result);
2492 result = matcher->replaceAll(&replText, &destText, status); 2500 result = matcher->replaceAll(&replText, &destText, status);
2493 REGEX_CHECK_STATUS; 2501 REGEX_CHECK_STATUS;
2494 REGEX_ASSERT(result == &destText); 2502 REGEX_ASSERT(result == &destText);
2495 REGEX_ASSERT_UTEXT_UTF8("", result); 2503 REGEX_ASSERT_UTEXT_UTF8("", result);
2496 2504
2497 // 2505 //
2498 // Empty substitution string 2506 // Empty substitution string
2499 // 2507 //
2500 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." 2508 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.."
2501 matcher->reset(&dataText); 2509 matcher->reset(&dataText);
2502 2510
2503 utext_openUTF8(&replText, NULL, 0, &status); 2511 utext_openUTF8(&replText, NULL, 0, &status);
2504 result = matcher->replaceFirst(&replText, NULL, status); 2512 result = matcher->replaceFirst(&replText, NULL, status);
2505 REGEX_CHECK_STATUS; 2513 REGEX_CHECK_STATUS;
2506 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ 2514 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */
2507 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 2515 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
2508 utext_close(result); 2516 utext_close(result);
2509 result = matcher->replaceFirst(&replText, &destText, status); 2517 result = matcher->replaceFirst(&replText, &destText, status);
2510 REGEX_CHECK_STATUS; 2518 REGEX_CHECK_STATUS;
2511 REGEX_ASSERT(result == &destText); 2519 REGEX_ASSERT(result == &destText);
2512 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); 2520 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result);
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
2556 // 2564 //
2557 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ 2565 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */
2558 utext_openUTF8(&re, str_add, -1, &status); 2566 utext_openUTF8(&re, str_add, -1, &status);
2559 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); 2567 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status);
2560 REGEX_CHECK_STATUS; 2568 REGEX_CHECK_STATUS;
2561 2569
2562 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */ 2570 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* abcdefg */
2563 utext_openUTF8(&dataText, str_abcdefg, -1, &status); 2571 utext_openUTF8(&dataText, str_abcdefg, -1, &status);
2564 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); 2572 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText);
2565 REGEX_CHECK_STATUS; 2573 REGEX_CHECK_STATUS;
2566 2574
2567 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ 2575 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */
2568 utext_openUTF8(&replText, str_11, -1, &status); 2576 utext_openUTF8(&replText, str_11, -1, &status);
2569 result = matcher2->replaceFirst(&replText, NULL, status); 2577 result = matcher2->replaceFirst(&replText, NULL, status);
2570 REGEX_CHECK_STATUS; 2578 REGEX_CHECK_STATUS;
2571 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 , 0x00 }; /* bcbcdefg */ 2579 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67 , 0x00 }; /* bcbcdefg */
2572 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 2580 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
2573 utext_close(result); 2581 utext_close(result);
2574 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2582 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2575 result = matcher2->replaceFirst(&replText, &destText, status); 2583 result = matcher2->replaceFirst(&replText, &destText, status);
2576 REGEX_CHECK_STATUS; 2584 REGEX_CHECK_STATUS;
2577 REGEX_ASSERT(result == &destText); 2585 REGEX_ASSERT(result == &destText);
2578 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); 2586 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result);
2579 2587
2580 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6 5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */ 2588 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6 5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31, 0x2e, 0x00 }; /* The value of \$1 is $1. */
2581 utext_openUTF8(&replText, str_v, -1, &status); 2589 utext_openUTF8(&replText, str_v, -1, &status);
2582 REGEX_VERBOSE_TEXT(&replText); 2590 REGEX_VERBOSE_TEXT(&replText);
2583 result = matcher2->replaceFirst(&replText, NULL, status); 2591 result = matcher2->replaceFirst(&replText, NULL, status);
2584 REGEX_CHECK_STATUS; 2592 REGEX_CHECK_STATUS;
2585 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * / 2593 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0 x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg * /
2586 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 2594 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
2587 utext_close(result); 2595 utext_close(result);
2588 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2596 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2589 result = matcher2->replaceFirst(&replText, &destText, status); 2597 result = matcher2->replaceFirst(&replText, &destText, status);
2590 REGEX_CHECK_STATUS; 2598 REGEX_CHECK_STATUS;
2591 REGEX_ASSERT(result == &destText); 2599 REGEX_ASSERT(result == &destText);
2592 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); 2600 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result);
2593 2601
2594 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6 9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0 x00 }; /* $ by itself, no group number $$$ */ 2602 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6 9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0 x00 }; /* $ by itself, no group number $$$ */
2595 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); 2603 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status);
2596 result = matcher2->replaceFirst(&replText, NULL, status); 2604 result = matcher2->replaceFirst(&replText, NULL, status);
2597 REGEX_CHECK_STATUS; 2605 REGEX_CHECK_STATUS;
2598 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ 2606 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0 x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2 4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */
2599 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 2607 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
2600 utext_close(result); 2608 utext_close(result);
2601 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2609 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2602 result = matcher2->replaceFirst(&replText, &destText, status); 2610 result = matcher2->replaceFirst(&replText, &destText, status);
2603 REGEX_CHECK_STATUS; 2611 REGEX_CHECK_STATUS;
2604 REGEX_ASSERT(result == &destText); 2612 REGEX_ASSERT(result == &destText);
2605 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); 2613 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result);
2606 2614
2607 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d , 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */ 2615 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d , 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31, 0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx. */
2608 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001 D7CF, MATHEMATICAL BOLD DIGIT ONE 2616 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001 D7CF, MATHEMATICAL BOLD DIGIT ONE
2609 // 012345678901234567890123456 2617 // 012345678901234567890123456
2610 supplDigitChars[22] = 0xF0; 2618 supplDigitChars[22] = 0xF0;
2611 supplDigitChars[23] = 0x9D; 2619 supplDigitChars[23] = 0x9D;
2612 supplDigitChars[24] = 0x9F; 2620 supplDigitChars[24] = 0x9F;
2613 supplDigitChars[25] = 0x8F; 2621 supplDigitChars[25] = 0x8F;
2614 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); 2622 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status);
2615 2623
2616 result = matcher2->replaceFirst(&replText, NULL, status); 2624 result = matcher2->replaceFirst(&replText, NULL, status);
2617 REGEX_CHECK_STATUS; 2625 REGEX_CHECK_STATUS;
2618 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x 20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa l Digit 1 bc.defg */ 2626 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x 20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa l Digit 1 bc.defg */
2619 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 2627 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
2620 utext_close(result); 2628 utext_close(result);
2621 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2629 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2622 result = matcher2->replaceFirst(&replText, &destText, status); 2630 result = matcher2->replaceFirst(&replText, &destText, status);
2623 REGEX_CHECK_STATUS; 2631 REGEX_CHECK_STATUS;
2624 REGEX_ASSERT(result == &destText); 2632 REGEX_ASSERT(result == &destText);
2625 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); 2633 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result);
2626 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x 61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e , 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */ 2634 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x 61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e , 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /* bad capture group number $5..." */
2627 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); 2635 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status);
2628 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)) , U_INDEX_OUTOFBOUNDS_ERROR); 2636 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status)) , U_INDEX_OUTOFBOUNDS_ERROR);
2629 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 2637 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
2630 utext_close(result); 2638 utext_close(result);
2631 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ; 2639 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status) ;
2632 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta tus)), U_INDEX_OUTOFBOUNDS_ERROR); 2640 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta tus)), U_INDEX_OUTOFBOUNDS_ERROR);
2633 REGEX_ASSERT(result == &destText); 2641 REGEX_ASSERT(result == &destText);
2634 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); 2642 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result);
2635 2643
2636 // 2644 //
2637 // Replacement String with \u hex escapes 2645 // Replacement String with \u hex escapes
2638 // 2646 //
2639 { 2647 {
2640 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61 , 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a bc 2 abc 3 */ 2648 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61 , 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a bc 2 abc 3 */
2641 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */ 2649 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33, 0x2d, 0x2d, 0x00 }; /* --\u0043-- */
2642 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); 2650 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status);
2643 utext_openUTF8(&replText, str_u0043, -1, &status); 2651 utext_openUTF8(&replText, str_u0043, -1, &status);
2644 matcher->reset(&dataText); 2652 matcher->reset(&dataText);
2645 2653
2646 result = matcher->replaceAll(&replText, NULL, status); 2654 result = matcher->replaceAll(&replText, NULL, status);
2647 REGEX_CHECK_STATUS; 2655 REGEX_CHECK_STATUS;
2648 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x 20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d , 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ 2656 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x 20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d , 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */
2649 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 2657 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
2650 utext_close(result); 2658 utext_close(result);
2651 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus); 2659 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);
2652 result = matcher->replaceAll(&replText, &destText, status); 2660 result = matcher->replaceAll(&replText, &destText, status);
2653 REGEX_CHECK_STATUS; 2661 REGEX_CHECK_STATUS;
2654 REGEX_ASSERT(result == &destText); 2662 REGEX_ASSERT(result == &destText);
2655 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); 2663 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result);
2656 } 2664 }
2657 { 2665 {
2658 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ 2666 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */
2659 utext_openUTF8(&dataText, str_abc, -1, &status); 2667 utext_openUTF8(&dataText, str_abc, -1, &status);
2660 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ 2668 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */
2661 utext_openUTF8(&replText, str_U00010000, -1, &status); 2669 utext_openUTF8(&replText, str_U00010000, -1, &status);
2662 matcher->reset(&dataText); 2670 matcher->reset(&dataText);
2663 2671
2664 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0 x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A" 2672 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0 x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008 A"
2665 // 0123456789 2673 // 0123456789
2666 expected[2] = 0xF0; 2674 expected[2] = 0xF0;
2667 expected[3] = 0x90; 2675 expected[3] = 0x90;
2668 expected[4] = 0x80; 2676 expected[4] = 0x80;
2669 expected[5] = 0x80; 2677 expected[5] = 0x80;
2670 2678
2671 result = matcher->replaceAll(&replText, NULL, status); 2679 result = matcher->replaceAll(&replText, NULL, status);
2672 REGEX_CHECK_STATUS; 2680 REGEX_CHECK_STATUS;
2673 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 2681 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
2674 utext_close(result); 2682 utext_close(result);
2675 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus); 2683 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta tus);
2676 result = matcher->replaceAll(&replText, &destText, status); 2684 result = matcher->replaceAll(&replText, &destText, status);
2677 REGEX_CHECK_STATUS; 2685 REGEX_CHECK_STATUS;
2678 REGEX_ASSERT(result == &destText); 2686 REGEX_ASSERT(result == &destText);
2679 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); 2687 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result);
2680 } 2688 }
2681 // TODO: need more through testing of capture substitutions. 2689 // TODO: need more through testing of capture substitutions.
2682 2690
2683 // Bug 4057 2691 // Bug 4057
2684 // 2692 //
2685 { 2693 {
2686 status = U_ZERO_ERROR; 2694 status = U_ZERO_ERROR;
2687 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */ 2695 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65, 0x00 }; /* ss(.*?)ee */
2688 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x 20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69 , 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit h ss and end with ee ss stuff ee fin */ 2696 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x 20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69 , 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66, 0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit h ss and end with ee ss stuff ee fin */
2689 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ 2697 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */
2690 utext_openUTF8(&re, str_ssee, -1, &status); 2698 utext_openUTF8(&re, str_ssee, -1, &status);
2691 utext_openUTF8(&dataText, str_blah, -1, &status); 2699 utext_openUTF8(&dataText, str_blah, -1, &status);
2692 utext_openUTF8(&replText, str_ooh, -1, &status); 2700 utext_openUTF8(&replText, str_ooh, -1, &status);
2693 2701
2694 RegexMatcher m(&re, 0, status); 2702 RegexMatcher m(&re, 0, status);
2695 REGEX_CHECK_STATUS; 2703 REGEX_CHECK_STATUS;
2696 2704
2697 UnicodeString result; 2705 UnicodeString result;
2698 UText resultText = UTEXT_INITIALIZER; 2706 UText resultText = UTEXT_INITIALIZER;
2699 utext_openUnicodeString(&resultText, &result, &status); 2707 utext_openUnicodeString(&resultText, &result, &status);
2700 2708
2701 // Multiple finds do NOT bump up the previous appendReplacement postion. 2709 // Multiple finds do NOT bump up the previous appendReplacement postion.
2702 m.reset(&dataText); 2710 m.reset(&dataText);
2703 m.find(); 2711 m.find();
2704 m.find(); 2712 m.find();
2705 m.appendReplacement(&resultText, &replText, status); 2713 m.appendReplacement(&resultText, &replText, status);
2706 REGEX_CHECK_STATUS; 2714 REGEX_CHECK_STATUS;
(...skipping 20 matching lines...) Expand all
2727 m.find(10, status); 2735 m.find(10, status);
2728 m.find(); 2736 m.find();
2729 m.appendReplacement(&resultText, &replText, status); 2737 m.appendReplacement(&resultText, &replText, status);
2730 REGEX_CHECK_STATUS; 2738 REGEX_CHECK_STATUS;
2731 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */ 2739 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The matches start with ss and end with ee ooh */
2732 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); 2740 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText);
2733 2741
2734 m.appendTail(&resultText, status); 2742 m.appendTail(&resultText, status);
2735 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6 9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ 2743 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0 x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6 9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */
2736 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); 2744 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText);
2737 2745
2738 utext_close(&resultText); 2746 utext_close(&resultText);
2739 } 2747 }
2740 2748
2741 delete matcher2; 2749 delete matcher2;
2742 delete pat2; 2750 delete pat2;
2743 delete matcher; 2751 delete matcher;
2744 delete pat; 2752 delete pat;
2745 2753
2746 utext_close(&dataText); 2754 utext_close(&dataText);
2747 utext_close(&replText); 2755 utext_close(&replText);
2748 utext_close(&destText); 2756 utext_close(&destText);
2749 utext_close(&re); 2757 utext_close(&re);
2750 } 2758 }
2751 2759
2752 2760
2753 //--------------------------------------------------------------------------- 2761 //---------------------------------------------------------------------------
2754 // 2762 //
2755 // API_Pattern_UTF8 Test that the API for class RegexPattern is 2763 // API_Pattern_UTF8 Test that the API for class RegexPattern is
2756 // present and nominally working. 2764 // present and nominally working.
2757 // 2765 //
2758 //--------------------------------------------------------------------------- 2766 //---------------------------------------------------------------------------
2759 void RegexTest::API_Pattern_UTF8() { 2767 void RegexTest::API_Pattern_UTF8() {
2760 RegexPattern pata; // Test default constructor to not crash. 2768 RegexPattern pata; // Test default constructor to not crash.
2761 RegexPattern patb; 2769 RegexPattern patb;
2762 2770
2763 REGEX_ASSERT(pata == patb); 2771 REGEX_ASSERT(pata == patb);
2764 REGEX_ASSERT(pata == pata); 2772 REGEX_ASSERT(pata == pata);
2765 2773
2766 UText re1 = UTEXT_INITIALIZER; 2774 UText re1 = UTEXT_INITIALIZER;
2767 UText re2 = UTEXT_INITIALIZER; 2775 UText re2 = UTEXT_INITIALIZER;
2768 UErrorCode status = U_ZERO_ERROR; 2776 UErrorCode status = U_ZERO_ERROR;
2769 UParseError pe; 2777 UParseError pe;
2770 2778
2771 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ 2779 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d, 0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */
2772 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ 2780 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */
2773 utext_openUTF8(&re1, str_abcalmz, -1, &status); 2781 utext_openUTF8(&re1, str_abcalmz, -1, &status);
2774 utext_openUTF8(&re2, str_def, -1, &status); 2782 utext_openUTF8(&re2, str_def, -1, &status);
2775 2783
2776 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); 2784 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status);
2777 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); 2785 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status);
2778 REGEX_CHECK_STATUS; 2786 REGEX_CHECK_STATUS;
2779 REGEX_ASSERT(*pat1 == *pat1); 2787 REGEX_ASSERT(*pat1 == *pat1);
2780 REGEX_ASSERT(*pat1 != pata); 2788 REGEX_ASSERT(*pat1 != pata);
(...skipping 28 matching lines...) Expand all
2809 2817
2810 // clone 2818 // clone
2811 RegexPattern *pat1c = pat1->clone(); 2819 RegexPattern *pat1c = pat1->clone();
2812 REGEX_ASSERT(*pat1c == *pat1); 2820 REGEX_ASSERT(*pat1c == *pat1);
2813 REGEX_ASSERT(*pat1c != *pat2); 2821 REGEX_ASSERT(*pat1c != *pat2);
2814 2822
2815 delete pat1c; 2823 delete pat1c;
2816 delete pat1a; 2824 delete pat1a;
2817 delete pat1; 2825 delete pat1;
2818 delete pat2; 2826 delete pat2;
2819 2827
2820 utext_close(&re1); 2828 utext_close(&re1);
2821 utext_close(&re2); 2829 utext_close(&re2);
2822 2830
2823 2831
2824 // 2832 //
2825 // Verify that a matcher created from a cloned pattern works. 2833 // Verify that a matcher created from a cloned pattern works.
2826 // (Jitterbug 3423) 2834 // (Jitterbug 3423)
2827 // 2835 //
2828 { 2836 {
2829 UErrorCode status = U_ZERO_ERROR; 2837 UErrorCode status = U_ZERO_ERROR;
2830 UText pattern = UTEXT_INITIALIZER; 2838 UText pattern = UTEXT_INITIALIZER;
2831 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \ p{L}+ */ 2839 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \ p{L}+ */
2832 utext_openUTF8(&pattern, str_pL, -1, &status); 2840 utext_openUTF8(&pattern, str_pL, -1, &status);
2833 2841
2834 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); 2842 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status);
2835 RegexPattern *pClone = pSource->clone(); 2843 RegexPattern *pClone = pSource->clone();
2836 delete pSource; 2844 delete pSource;
2837 RegexMatcher *mFromClone = pClone->matcher(status); 2845 RegexMatcher *mFromClone = pClone->matcher(status);
2838 REGEX_CHECK_STATUS; 2846 REGEX_CHECK_STATUS;
2839 2847
2840 UText input = UTEXT_INITIALIZER; 2848 UText input = UTEXT_INITIALIZER;
2841 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57 , 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ 2849 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57 , 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */
2842 utext_openUTF8(&input, str_HelloWorld, -1, &status); 2850 utext_openUTF8(&input, str_HelloWorld, -1, &status);
2843 mFromClone->reset(&input); 2851 mFromClone->reset(&input);
2844 REGEX_ASSERT(mFromClone->find() == TRUE); 2852 REGEX_ASSERT(mFromClone->find() == TRUE);
2845 REGEX_ASSERT(mFromClone->group(status) == "Hello"); 2853 REGEX_ASSERT(mFromClone->group(status) == "Hello");
2846 REGEX_ASSERT(mFromClone->find() == TRUE); 2854 REGEX_ASSERT(mFromClone->find() == TRUE);
2847 REGEX_ASSERT(mFromClone->group(status) == "World"); 2855 REGEX_ASSERT(mFromClone->group(status) == "World");
2848 REGEX_ASSERT(mFromClone->find() == FALSE); 2856 REGEX_ASSERT(mFromClone->find() == FALSE);
2849 delete mFromClone; 2857 delete mFromClone;
2850 delete pClone; 2858 delete pClone;
2851 2859
2852 utext_close(&input); 2860 utext_close(&input);
2853 utext_close(&pattern); 2861 utext_close(&pattern);
2854 } 2862 }
2855 2863
2856 // 2864 //
2857 // matches convenience API 2865 // matches convenience API
2858 // 2866 //
2859 { 2867 {
2860 UErrorCode status = U_ZERO_ERROR; 2868 UErrorCode status = U_ZERO_ERROR;
2861 UText pattern = UTEXT_INITIALIZER; 2869 UText pattern = UTEXT_INITIALIZER;
2862 UText input = UTEXT_INITIALIZER; 2870 UText input = UTEXT_INITIALIZER;
2863 2871
2864 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2 0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ 2872 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2 0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */
2865 utext_openUTF8(&input, str_randominput, -1, &status); 2873 utext_openUTF8(&input, str_randominput, -1, &status);
2866 2874
2867 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ 2875 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */
2868 utext_openUTF8(&pattern, str_dotstar, -1, &status); 2876 utext_openUTF8(&pattern, str_dotstar, -1, &status);
2869 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE ); 2877 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE );
2870 REGEX_CHECK_STATUS; 2878 REGEX_CHECK_STATUS;
2871 2879
2872 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ 2880 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */
2873 utext_openUTF8(&pattern, str_abc, -1, &status); 2881 utext_openUTF8(&pattern, str_abc, -1, &status);
2874 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE); 2882 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) == FALSE);
2875 REGEX_CHECK_STATUS; 2883 REGEX_CHECK_STATUS;
2876 2884
2877 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */ 2885 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* .*nput */
2878 utext_openUTF8(&pattern, str_nput, -1, &status); 2886 utext_openUTF8(&pattern, str_nput, -1, &status);
2879 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE); 2887 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status) == TRUE);
2880 REGEX_CHECK_STATUS; 2888 REGEX_CHECK_STATUS;
2881 2889
2882 utext_openUTF8(&pattern, str_randominput, -1, &status); 2890 utext_openUTF8(&pattern, str_randominput, -1, &status);
2883 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s tatus) == TRUE); 2891 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s tatus) == TRUE);
2884 REGEX_CHECK_STATUS; 2892 REGEX_CHECK_STATUS;
2885 2893
2886 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ 2894 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */
2887 utext_openUTF8(&pattern, str_u, -1, &status); 2895 utext_openUTF8(&pattern, str_u, -1, &status);
2888 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE); 2896 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) == FALSE);
2889 REGEX_CHECK_STATUS; 2897 REGEX_CHECK_STATUS;
2890 2898
2891 utext_openUTF8(&input, str_abc, -1, &status); 2899 utext_openUTF8(&input, str_abc, -1, &status);
2892 utext_openUTF8(&pattern, str_abc, -1, &status); 2900 utext_openUTF8(&pattern, str_abc, -1, &status);
2893 status = U_INDEX_OUTOFBOUNDS_ERROR; 2901 status = U_INDEX_OUTOFBOUNDS_ERROR;
2894 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); 2902 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE);
2895 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 2903 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
2896 2904
2897 utext_close(&input); 2905 utext_close(&input);
2898 utext_close(&pattern); 2906 utext_close(&pattern);
2899 } 2907 }
2900 2908
2901 2909
2902 // 2910 //
2903 // Split() 2911 // Split()
2904 // 2912 //
2905 status = U_ZERO_ERROR; 2913 status = U_ZERO_ERROR;
2906 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ 2914 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after
3277 } 3285 }
3278 3286
3279 3287
3280 void RegexTest::regex_find(const UnicodeString &pattern, 3288 void RegexTest::regex_find(const UnicodeString &pattern,
3281 const UnicodeString &flags, 3289 const UnicodeString &flags,
3282 const UnicodeString &inputString, 3290 const UnicodeString &inputString,
3283 const char *srcPath, 3291 const char *srcPath,
3284 int32_t line) { 3292 int32_t line) {
3285 UnicodeString unEscapedInput; 3293 UnicodeString unEscapedInput;
3286 UnicodeString deTaggedInput; 3294 UnicodeString deTaggedInput;
3287 3295
3288 int32_t patternUTF8Length, inputUTF8Length; 3296 int32_t patternUTF8Length, inputUTF8Length;
3289 char *patternChars = NULL, *inputChars = NULL; 3297 char *patternChars = NULL, *inputChars = NULL;
3290 UText patternText = UTEXT_INITIALIZER; 3298 UText patternText = UTEXT_INITIALIZER;
3291 UText inputText = UTEXT_INITIALIZER; 3299 UText inputText = UTEXT_INITIALIZER;
3292 UConverter *UTF8Converter = NULL; 3300 UConverter *UTF8Converter = NULL;
3293 3301
3294 UErrorCode status = U_ZERO_ERROR; 3302 UErrorCode status = U_ZERO_ERROR;
3295 UParseError pe; 3303 UParseError pe;
3296 RegexPattern *parsePat = NULL; 3304 RegexPattern *parsePat = NULL;
3297 RegexMatcher *parseMatcher = NULL; 3305 RegexMatcher *parseMatcher = NULL;
3298 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; 3306 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL;
3299 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; 3307 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL;
3300 UVector groupStarts(status); 3308 UVector groupStarts(status);
3301 UVector groupEnds(status); 3309 UVector groupEnds(status);
3302 UVector groupStartsUTF8(status); 3310 UVector groupStartsUTF8(status);
3303 UVector groupEndsUTF8(status); 3311 UVector groupEndsUTF8(status);
3304 UBool isMatch = FALSE, isUTF8Match = FALSE; 3312 UBool isMatch = FALSE, isUTF8Match = FALSE;
3305 UBool failed = FALSE; 3313 UBool failed = FALSE;
3306 int32_t numFinds; 3314 int32_t numFinds;
3307 int32_t i; 3315 int32_t i;
3308 UBool useMatchesFunc = FALSE; 3316 UBool useMatchesFunc = FALSE;
3309 UBool useLookingAtFunc = FALSE; 3317 UBool useLookingAtFunc = FALSE;
3310 int32_t regionStart = -1; 3318 int32_t regionStart = -1;
3311 int32_t regionEnd = -1; 3319 int32_t regionEnd = -1;
3312 int32_t regionStartUTF8 = -1; 3320 int32_t regionStartUTF8 = -1;
3313 int32_t regionEndUTF8 = -1; 3321 int32_t regionEndUTF8 = -1;
3314 3322
3315 3323
3316 // 3324 //
3317 // Compile the caller's pattern 3325 // Compile the caller's pattern
3318 // 3326 //
3319 uint32_t bflags = 0; 3327 uint32_t bflags = 0;
3320 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag 3328 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag
3321 bflags |= UREGEX_CASE_INSENSITIVE; 3329 bflags |= UREGEX_CASE_INSENSITIVE;
3322 } 3330 }
3323 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag 3331 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag
3324 bflags |= UREGEX_COMMENTS; 3332 bflags |= UREGEX_COMMENTS;
3325 } 3333 }
3326 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag 3334 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag
3327 bflags |= UREGEX_DOTALL; 3335 bflags |= UREGEX_DOTALL;
3328 } 3336 }
3329 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag 3337 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag
3330 bflags |= UREGEX_MULTILINE; 3338 bflags |= UREGEX_MULTILINE;
3331 } 3339 }
3332 3340
3333 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag 3341 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag
3334 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; 3342 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
3335 } 3343 }
3336 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag 3344 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag
3337 bflags |= UREGEX_UNIX_LINES; 3345 bflags |= UREGEX_UNIX_LINES;
3338 } 3346 }
3339 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag 3347 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag
3340 bflags |= UREGEX_LITERAL; 3348 bflags |= UREGEX_LITERAL;
3341 } 3349 }
3342 3350
(...skipping 15 matching lines...) Expand all
3358 goto cleanupAndReturn; 3366 goto cleanupAndReturn;
3359 } else { 3367 } else {
3360 // Unexpected pattern compilation error. 3368 // Unexpected pattern compilation error.
3361 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName( status)); 3369 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName( status));
3362 goto cleanupAndReturn; 3370 goto cleanupAndReturn;
3363 } 3371 }
3364 } 3372 }
3365 3373
3366 UTF8Converter = ucnv_open("UTF8", &status); 3374 UTF8Converter = ucnv_open("UTF8", &status);
3367 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status); 3375 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, &status);
3368 3376
3369 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); 3377 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status);
3370 status = U_ZERO_ERROR; // buffer overflow 3378 status = U_ZERO_ERROR; // buffer overflow
3371 patternChars = new char[patternUTF8Length+1]; 3379 patternChars = new char[patternUTF8Length+1];
3372 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); 3380 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status);
3373 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); 3381 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status);
3374 3382
3375 if (status == U_ZERO_ERROR) { 3383 if (status == U_ZERO_ERROR) {
3376 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); 3384 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status);
3377 3385
3378 if (status != U_ZERO_ERROR) { 3386 if (status != U_ZERO_ERROR) {
3379 #if UCONFIG_NO_BREAK_ITERATION==1 3387 #if UCONFIG_NO_BREAK_ITERATION==1
3380 // 'v' test flag means that the test pattern should not compile if I CU was configured 3388 // 'v' test flag means that the test pattern should not compile if I CU was configured
3381 // to not include break iteration. RBBI is needed for Unicode w ord boundaries. 3389 // to not include break iteration. RBBI is needed for Unicode w ord boundaries.
3382 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT ED_ERROR) { 3390 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT ED_ERROR) {
3383 goto cleanupAndReturn; 3391 goto cleanupAndReturn;
3384 } 3392 }
3385 #endif 3393 #endif
3386 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' 3394 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E'
3387 // Expected pattern compilation error. 3395 // Expected pattern compilation error.
3388 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' 3396 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd'
3389 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s tatus)); 3397 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s tatus));
3390 } 3398 }
3391 goto cleanupAndReturn; 3399 goto cleanupAndReturn;
3392 } else { 3400 } else {
3393 // Unexpected pattern compilation error. 3401 // Unexpected pattern compilation error.
3394 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err orName(status)); 3402 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err orName(status));
3395 goto cleanupAndReturn; 3403 goto cleanupAndReturn;
3396 } 3404 }
3397 } 3405 }
3398 } 3406 }
3399 3407
3400 if (UTF8Pattern == NULL) { 3408 if (UTF8Pattern == NULL) {
3401 // UTF-8 does not allow unpaired surrogates, so this could actually happ en without being a failure of the engine 3409 // UTF-8 does not allow unpaired surrogates, so this could actually happ en without being a failure of the engine
3402 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line); 3410 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d", srcPath, line);
3403 status = U_ZERO_ERROR; 3411 status = U_ZERO_ERROR;
3404 } 3412 }
3405 3413
3406 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag 3414 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag
3407 RegexPatternDump(callerPattern); 3415 callerPattern->dumpPattern();
3408 } 3416 }
3409 3417
3410 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag 3418 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag
3411 errln("%s, Line %d: Expected, but did not get, a pattern compilation err or.", srcPath, line); 3419 errln("%s, Line %d: Expected, but did not get, a pattern compilation err or.", srcPath, line);
3412 goto cleanupAndReturn; 3420 goto cleanupAndReturn;
3413 } 3421 }
3414 3422
3415 3423
3416 // 3424 //
3417 // Number of times find() should be called on the test string, default to 1 3425 // Number of times find() should be called on the test string, default to 1
3418 // 3426 //
3419 numFinds = 1; 3427 numFinds = 1;
3420 for (i=2; i<=9; i++) { 3428 for (i=2; i<=9; i++) {
3421 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag 3429 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag
3422 if (numFinds != 1) { 3430 if (numFinds != 1) {
3423 errln("Line %d: more than one digit flag. Scanning %d.", line, i); 3431 errln("Line %d: more than one digit flag. Scanning %d.", line, i);
3424 goto cleanupAndReturn; 3432 goto cleanupAndReturn;
3425 } 3433 }
3426 numFinds = i; 3434 numFinds = i;
3427 } 3435 }
3428 } 3436 }
3429 3437
3430 // 'M' flag. Use matches() instead of find() 3438 // 'M' flag. Use matches() instead of find()
3431 if (flags.indexOf((UChar)0x4d) >= 0) { 3439 if (flags.indexOf((UChar)0x4d) >= 0) {
3432 useMatchesFunc = TRUE; 3440 useMatchesFunc = TRUE;
3433 } 3441 }
3434 if (flags.indexOf((UChar)0x4c) >= 0) { 3442 if (flags.indexOf((UChar)0x4c) >= 0) {
3435 useLookingAtFunc = TRUE; 3443 useLookingAtFunc = TRUE;
3436 } 3444 }
3437 3445
3438 // 3446 //
3439 // Find the tags in the input data, remove them, and record the group bound ary 3447 // Find the tags in the input data, remove them, and record the group bound ary
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3474 } 3482 }
3475 3483
3476 // 3484 //
3477 // Configure the matcher according to the flags specified with this test. 3485 // Configure the matcher according to the flags specified with this test.
3478 // 3486 //
3479 matcher = callerPattern->matcher(deTaggedInput, status); 3487 matcher = callerPattern->matcher(deTaggedInput, status);
3480 REGEX_CHECK_STATUS_L(line); 3488 REGEX_CHECK_STATUS_L(line);
3481 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag 3489 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag
3482 matcher->setTrace(TRUE); 3490 matcher->setTrace(TRUE);
3483 } 3491 }
3484 3492
3485 if (UTF8Pattern != NULL) { 3493 if (UTF8Pattern != NULL) {
3486 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); 3494 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status);
3487 status = U_ZERO_ERROR; // buffer overflow 3495 status = U_ZERO_ERROR; // buffer overflow
3488 inputChars = new char[inputUTF8Length+1]; 3496 inputChars = new char[inputUTF8Length+1];
3489 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat us); 3497 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat us);
3490 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); 3498 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status);
3491 3499
3492 if (status == U_ZERO_ERROR) { 3500 if (status == U_ZERO_ERROR) {
3493 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); 3501 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText);
3494 REGEX_CHECK_STATUS_L(line); 3502 REGEX_CHECK_STATUS_L(line);
3495 } 3503 }
3496 3504
3497 if (UTF8Matcher == NULL) { 3505 if (UTF8Matcher == NULL) {
3498 // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine 3506 // UTF-8 does not allow unpaired surrogates, so this could actually happen without being a failure of the engine
3499 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d" , srcPath, line); 3507 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d" , srcPath, line);
3500 status = U_ZERO_ERROR; 3508 status = U_ZERO_ERROR;
3501 } 3509 }
3502 } 3510 }
3503 3511
3504 // 3512 //
3505 // Generate native indices for UTF8 versions of region and capture group in fo 3513 // Generate native indices for UTF8 versions of region and capture group in fo
3506 // 3514 //
3507 if (UTF8Matcher != NULL) { 3515 if (UTF8Matcher != NULL) {
3508 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar t, regionStartUTF8); 3516 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar t, regionStartUTF8);
3509 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8); 3517 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd, regionEndUTF8);
3510 3518
3511 // Fill out the native index UVector info. 3519 // Fill out the native index UVector info.
3512 // Only need 1 loop, from above we know groupStarts.size() = groupEnds. size() 3520 // Only need 1 loop, from above we know groupStarts.size() = groupEnds. size()
3513 for (i=0; i<groupStarts.size(); i++) { 3521 for (i=0; i<groupStarts.size(); i++) {
3514 int32_t start = groupStarts.elementAti(i); 3522 int32_t start = groupStarts.elementAti(i);
3515 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting 3523 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting
3516 if (start >= 0) { 3524 if (start >= 0) {
3517 int32_t startUTF8; 3525 int32_t startUTF8;
3518 if (!utextOffsetToNative(&inputText, start, startUTF8)) { 3526 if (!utextOffsetToNative(&inputText, start, startUTF8)) {
3519 errln("Error at line %d: could not find native index for gro up start %d. UTF16 index %d", line, i, start); 3527 errln("Error at line %d: could not find native index for gro up start %d. UTF16 index %d", line, i, start);
3520 failed = TRUE; 3528 failed = TRUE;
3521 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now. 3529 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.
3522 } 3530 }
3523 setInt(groupStartsUTF8, startUTF8, i); 3531 setInt(groupStartsUTF8, startUTF8, i);
3524 } 3532 }
3525 3533
3526 int32_t end = groupEnds.elementAti(i); 3534 int32_t end = groupEnds.elementAti(i);
3527 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting 3535 // -1 means there was no UVector slot and we won't be requesting th at capture group for this test, don't bother inserting
3528 if (end >= 0) { 3536 if (end >= 0) {
3529 int32_t endUTF8; 3537 int32_t endUTF8;
3530 if (!utextOffsetToNative(&inputText, end, endUTF8)) { 3538 if (!utextOffsetToNative(&inputText, end, endUTF8)) {
3531 errln("Error at line %d: could not find native index for gro up end %d. UTF16 index %d", line, i, end); 3539 errln("Error at line %d: could not find native index for gro up end %d. UTF16 index %d", line, i, end);
3532 failed = TRUE; 3540 failed = TRUE;
3533 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now. 3541 goto cleanupAndReturn; // Good chance of subsequent bogus e rrors. Stop now.
3534 } 3542 }
3535 setInt(groupEndsUTF8, endUTF8, i); 3543 setInt(groupEndsUTF8, endUTF8, i);
(...skipping 14 matching lines...) Expand all
3550 if (UTF8Matcher != NULL) { 3558 if (UTF8Matcher != NULL) {
3551 UTF8Matcher->useAnchoringBounds(FALSE); 3559 UTF8Matcher->useAnchoringBounds(FALSE);
3552 } 3560 }
3553 } 3561 }
3554 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag 3562 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag
3555 matcher->useTransparentBounds(TRUE); 3563 matcher->useTransparentBounds(TRUE);
3556 if (UTF8Matcher != NULL) { 3564 if (UTF8Matcher != NULL) {
3557 UTF8Matcher->useTransparentBounds(TRUE); 3565 UTF8Matcher->useTransparentBounds(TRUE);
3558 } 3566 }
3559 } 3567 }
3560 3568
3561 3569
3562 3570
3563 // 3571 //
3564 // Do a find on the de-tagged input using the caller's pattern 3572 // Do a find on the de-tagged input using the caller's pattern
3565 // TODO: error on count>1 and not find(). 3573 // TODO: error on count>1 and not find().
3566 // error on both matches() and lookingAt(). 3574 // error on both matches() and lookingAt().
3567 // 3575 //
3568 for (i=0; i<numFinds; i++) { 3576 for (i=0; i<numFinds; i++) {
3569 if (useMatchesFunc) { 3577 if (useMatchesFunc) {
3570 isMatch = matcher->matches(status); 3578 isMatch = matcher->matches(status);
3571 if (UTF8Matcher != NULL) { 3579 if (UTF8Matcher != NULL) {
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
3626 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d", 3634 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d",
3627 line, i, expectedStart, matcher->start(i, status)); 3635 line, i, expectedStart, matcher->start(i, status));
3628 failed = TRUE; 3636 failed = TRUE;
3629 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 3637 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
3630 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec tedStartUTF8) { 3638 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec tedStartUTF8) {
3631 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d (UTF8)", 3639 errln("Error at line %d: incorrect start position for group %d. Exp ected %d, got %d (UTF8)",
3632 line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); 3640 line, i, expectedStartUTF8, UTF8Matcher->start(i, status));
3633 failed = TRUE; 3641 failed = TRUE;
3634 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now. 3642 goto cleanupAndReturn; // Good chance of subsequent bogus errors. Stop now.
3635 } 3643 }
3636 3644
3637 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti (i)); 3645 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti (i));
3638 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF 8.elementAti(i)); 3646 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF 8.elementAti(i));
3639 if (matcher->end(i, status) != expectedEnd) { 3647 if (matcher->end(i, status) != expectedEnd) {
3640 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d", 3648 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d",
3641 line, i, expectedEnd, matcher->end(i, status)); 3649 line, i, expectedEnd, matcher->end(i, status));
3642 failed = TRUE; 3650 failed = TRUE;
3643 // Error on end position; keep going; real error is probably yet to come as group 3651 // Error on end position; keep going; real error is probably yet to come as group
3644 // end positions work from end of the input data towards the front . 3652 // end positions work from end of the input data towards the front .
3645 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte dEndUTF8) { 3653 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte dEndUTF8) {
3646 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d (UTF8)", 3654 errln("Error at line %d: incorrect end position for group %d. Expec ted %d, got %d (UTF8)",
(...skipping 16 matching lines...) Expand all
3663 3671
3664 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa lse 3672 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa lse
3665 matcher->requireEnd() == TRUE) { 3673 matcher->requireEnd() == TRUE) {
3666 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l ine); 3674 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l ine);
3667 failed = TRUE; 3675 failed = TRUE;
3668 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false 3676 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == false
3669 UTF8Matcher->requireEnd() == TRUE) { 3677 UTF8Matcher->requireEnd() == TRUE) {
3670 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT F8)", line); 3678 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT F8)", line);
3671 failed = TRUE; 3679 failed = TRUE;
3672 } 3680 }
3673 3681
3674 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr ue 3682 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr ue
3675 matcher->requireEnd() == FALSE) { 3683 matcher->requireEnd() == FALSE) {
3676 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l ine); 3684 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l ine);
3677 failed = TRUE; 3685 failed = TRUE;
3678 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false 3686 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && // 'Y' flag: RequireEnd() == false
3679 UTF8Matcher->requireEnd() == FALSE) { 3687 UTF8Matcher->requireEnd() == FALSE) {
3680 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT F8)", line); 3688 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT F8)", line);
3681 failed = TRUE; 3689 failed = TRUE;
3682 } 3690 }
3683 3691
3684 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 3692 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false
3685 matcher->hitEnd() == TRUE) { 3693 matcher->hitEnd() == TRUE) {
3686 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line) ; 3694 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line) ;
3687 failed = TRUE; 3695 failed = TRUE;
3688 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false 3696 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false
3689 UTF8Matcher->hitEnd() == TRUE) { 3697 UTF8Matcher->hitEnd() == TRUE) {
3690 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)" , line); 3698 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)" , line);
3691 failed = TRUE; 3699 failed = TRUE;
3692 } 3700 }
3693 3701
3694 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 3702 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true
3695 matcher->hitEnd() == FALSE) { 3703 matcher->hitEnd() == FALSE) {
3696 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line) ; 3704 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line) ;
3697 failed = TRUE; 3705 failed = TRUE;
3698 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true 3706 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true
3699 UTF8Matcher->hitEnd() == FALSE) { 3707 UTF8Matcher->hitEnd() == FALSE) {
3700 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)" , line); 3708 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)" , line);
3701 failed = TRUE; 3709 failed = TRUE;
3702 } 3710 }
3703 3711
3704 3712
3705 cleanupAndReturn: 3713 cleanupAndReturn:
3706 if (failed) { 3714 if (failed) {
3707 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " 3715 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" "
3708 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); 3716 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\"");
3709 // callerPattern->dump(); 3717 // callerPattern->dump();
3710 } 3718 }
3711 delete parseMatcher; 3719 delete parseMatcher;
3712 delete parsePat; 3720 delete parsePat;
3713 delete UTF8Matcher; 3721 delete UTF8Matcher;
3714 delete UTF8Pattern; 3722 delete UTF8Pattern;
3715 delete matcher; 3723 delete matcher;
3716 delete callerPattern; 3724 delete callerPattern;
3717 3725
3718 utext_close(&inputText); 3726 utext_close(&inputText);
3719 delete[] inputChars; 3727 delete[] inputChars;
3720 utext_close(&patternText); 3728 utext_close(&patternText);
3721 delete[] patternChars; 3729 delete[] patternChars;
3722 ucnv_close(UTF8Converter); 3730 ucnv_close(UTF8Converter);
3723 } 3731 }
3724 3732
3725 3733
3726 3734
3727 3735
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
3783 // Invalid Back Reference \0 3791 // Invalid Back Reference \0
3784 // For ICU 3.8 and earlier 3792 // For ICU 3.8 and earlier
3785 // For ICU versions newer than 3.8, \0 introduces an octal escape. 3793 // For ICU versions newer than 3.8, \0 introduces an octal escape.
3786 // 3794 //
3787 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); 3795 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE);
3788 3796
3789 } 3797 }
3790 3798
3791 3799
3792 //------------------------------------------------------------------------------ - 3800 //------------------------------------------------------------------------------ -
3793 // 3801 //
3794 // Read a text data file, convert it to UChars, and return the data 3802 // Read a text data file, convert it to UChars, and return the data
3795 // in one big UChar * buffer, which the caller must delete. 3803 // in one big UChar * buffer, which the caller must delete.
3796 // 3804 //
3797 //------------------------------------------------------------------------------ -- 3805 //------------------------------------------------------------------------------ --
3798 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, 3806 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen,
3799 const char *defEncoding, UErrorCode &status ) { 3807 const char *defEncoding, UErrorCode &status ) {
3800 UChar *retPtr = NULL; 3808 UChar *retPtr = NULL;
3801 char *fileBuf = NULL; 3809 char *fileBuf = NULL;
3802 UConverter* conv = NULL; 3810 UConverter* conv = NULL;
3803 FILE *f = NULL; 3811 FILE *f = NULL;
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after
4126 UBool found = testMat->find(); 4134 UBool found = testMat->find();
4127 UBool expected = FALSE; 4135 UBool expected = FALSE;
4128 if (fields[2].indexOf(UChar_y) >=0) { 4136 if (fields[2].indexOf(UChar_y) >=0) {
4129 expected = TRUE; 4137 expected = TRUE;
4130 } 4138 }
4131 if (expected != found) { 4139 if (expected != found) {
4132 errln("line %d: Expected %smatch, got %smatch", 4140 errln("line %d: Expected %smatch, got %smatch",
4133 lineNum, expected?"":"no ", found?"":"no " ); 4141 lineNum, expected?"":"no ", found?"":"no " );
4134 continue; 4142 continue;
4135 } 4143 }
4136 4144
4137 // Don't try to check expected results if there is no match. 4145 // Don't try to check expected results if there is no match.
4138 // (Some have stuff in the expected fields) 4146 // (Some have stuff in the expected fields)
4139 if (!found) { 4147 if (!found) {
4140 delete testMat; 4148 delete testMat;
4141 delete testPat; 4149 delete testPat;
4142 continue; 4150 continue;
4143 } 4151 }
4144 4152
4145 // 4153 //
4146 // Interpret the Perl expression from the fourth field of the data file, 4154 // Interpret the Perl expression from the fourth field of the data file,
(...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after
4424 const UChar UChar_y = 0x79; 4432 const UChar UChar_y = 0x79;
4425 if (flagStr.indexOf(UChar_i) != -1) { 4433 if (flagStr.indexOf(UChar_i) != -1) {
4426 flags |= UREGEX_CASE_INSENSITIVE; 4434 flags |= UREGEX_CASE_INSENSITIVE;
4427 } 4435 }
4428 if (flagStr.indexOf(UChar_m) != -1) { 4436 if (flagStr.indexOf(UChar_m) != -1) {
4429 flags |= UREGEX_MULTILINE; 4437 flags |= UREGEX_MULTILINE;
4430 } 4438 }
4431 if (flagStr.indexOf(UChar_x) != -1) { 4439 if (flagStr.indexOf(UChar_x) != -1) {
4432 flags |= UREGEX_COMMENTS; 4440 flags |= UREGEX_COMMENTS;
4433 } 4441 }
4434 4442
4435 // 4443 //
4436 // Put the pattern in a UTF-8 UText 4444 // Put the pattern in a UTF-8 UText
4437 // 4445 //
4438 status = U_ZERO_ERROR; 4446 status = U_ZERO_ERROR;
4439 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve rter.getAlias(), status); 4447 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve rter.getAlias(), status);
4440 if (status == U_BUFFER_OVERFLOW_ERROR) { 4448 if (status == U_BUFFER_OVERFLOW_ERROR) {
4441 status = U_ZERO_ERROR; 4449 status = U_ZERO_ERROR;
4442 delete[] patternChars; 4450 delete[] patternChars;
4443 patternCapacity = patternLength + 1; 4451 patternCapacity = patternLength + 1;
4444 patternChars = new char[patternCapacity]; 4452 patternChars = new char[patternCapacity];
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4521 UBool found = testMat->find(); 4529 UBool found = testMat->find();
4522 UBool expected = FALSE; 4530 UBool expected = FALSE;
4523 if (fields[2].indexOf(UChar_y) >=0) { 4531 if (fields[2].indexOf(UChar_y) >=0) {
4524 expected = TRUE; 4532 expected = TRUE;
4525 } 4533 }
4526 if (expected != found) { 4534 if (expected != found) {
4527 errln("line %d: Expected %smatch, got %smatch", 4535 errln("line %d: Expected %smatch, got %smatch",
4528 lineNum, expected?"":"no ", found?"":"no " ); 4536 lineNum, expected?"":"no ", found?"":"no " );
4529 continue; 4537 continue;
4530 } 4538 }
4531 4539
4532 // Don't try to check expected results if there is no match. 4540 // Don't try to check expected results if there is no match.
4533 // (Some have stuff in the expected fields) 4541 // (Some have stuff in the expected fields)
4534 if (!found) { 4542 if (!found) {
4535 delete testMat; 4543 delete testMat;
4536 delete testPat; 4544 delete testPat;
4537 continue; 4545 continue;
4538 } 4546 }
4539 4547
4540 // 4548 //
4541 // Interpret the Perl expression from the fourth field of the data file, 4549 // Interpret the Perl expression from the fourth field of the data file,
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
4664 delete groupsPat; 4672 delete groupsPat;
4665 4673
4666 delete flagMat; 4674 delete flagMat;
4667 delete flagPat; 4675 delete flagPat;
4668 4676
4669 delete lineMat; 4677 delete lineMat;
4670 delete linePat; 4678 delete linePat;
4671 4679
4672 delete fieldPat; 4680 delete fieldPat;
4673 delete [] testData; 4681 delete [] testData;
4674 4682
4675 utext_close(&patternText); 4683 utext_close(&patternText);
4676 utext_close(&inputText); 4684 utext_close(&inputText);
4677 4685
4678 delete [] patternChars; 4686 delete [] patternChars;
4679 delete [] inputChars; 4687 delete [] inputChars;
4680 4688
4681 4689
4682 logln("%d tests skipped because of unimplemented regexp features.", skippedU nimplementedCount); 4690 logln("%d tests skipped because of unimplemented regexp features.", skippedU nimplementedCount);
4683 4691
4684 } 4692 }
4685 4693
4686 4694
4687 //-------------------------------------------------------------- 4695 //--------------------------------------------------------------
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
4731 } 4739 }
4732 info->lastSteps = steps; 4740 info->lastSteps = steps;
4733 info->numCalls++; 4741 info->numCalls++;
4734 return (info->numCalls < info->maxCalls); 4742 return (info->numCalls < info->maxCalls);
4735 } 4743 }
4736 U_CDECL_END 4744 U_CDECL_END
4737 4745
4738 void RegexTest::Callbacks() { 4746 void RegexTest::Callbacks() {
4739 { 4747 {
4740 // Getter returns NULLs if no callback has been set 4748 // Getter returns NULLs if no callback has been set
4741 4749
4742 // The variables that the getter will fill in. 4750 // The variables that the getter will fill in.
4743 // Init to non-null values so that the action of the getter can be see n. 4751 // Init to non-null values so that the action of the getter can be see n.
4744 const void *returnedContext = &returnedContext; 4752 const void *returnedContext = &returnedContext;
4745 URegexMatchCallback *returnedFn = &testCallBackFn; 4753 URegexMatchCallback *returnedFn = &testCallBackFn;
4746 4754
4747 UErrorCode status = U_ZERO_ERROR; 4755 UErrorCode status = U_ZERO_ERROR;
4748 RegexMatcher matcher("x", 0, status); 4756 RegexMatcher matcher("x", 0, status);
4749 REGEX_CHECK_STATUS; 4757 REGEX_CHECK_STATUS;
4750 matcher.getMatchCallback(returnedFn, returnedContext, status); 4758 matcher.getMatchCallback(returnedFn, returnedContext, status);
4751 REGEX_CHECK_STATUS; 4759 REGEX_CHECK_STATUS;
4752 REGEX_ASSERT(returnedFn == NULL); 4760 REGEX_ASSERT(returnedFn == NULL);
4753 REGEX_ASSERT(returnedContext == NULL); 4761 REGEX_ASSERT(returnedContext == NULL);
4754 } 4762 }
4755 4763
4756 { 4764 {
4757 // Set and Get work 4765 // Set and Get work
4758 callBackContext cbInfo = {this, 0, 0, 0}; 4766 callBackContext cbInfo = {this, 0, 0, 0};
4759 const void *returnedContext; 4767 const void *returnedContext;
4760 URegexMatchCallback *returnedFn; 4768 URegexMatchCallback *returnedFn;
4761 UErrorCode status = U_ZERO_ERROR; 4769 UErrorCode status = U_ZERO_ERROR;
4762 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 4770 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long.
4763 REGEX_CHECK_STATUS; 4771 REGEX_CHECK_STATUS;
4764 matcher.setMatchCallback(testCallBackFn, &cbInfo, status); 4772 matcher.setMatchCallback(testCallBackFn, &cbInfo, status);
4765 REGEX_CHECK_STATUS; 4773 REGEX_CHECK_STATUS;
4766 matcher.getMatchCallback(returnedFn, returnedContext, status); 4774 matcher.getMatchCallback(returnedFn, returnedContext, status);
4767 REGEX_CHECK_STATUS; 4775 REGEX_CHECK_STATUS;
4768 REGEX_ASSERT(returnedFn == testCallBackFn); 4776 REGEX_ASSERT(returnedFn == testCallBackFn);
4769 REGEX_ASSERT(returnedContext == &cbInfo); 4777 REGEX_ASSERT(returnedContext == &cbInfo);
4770 4778
4771 // A short-running match shouldn't invoke the callback 4779 // A short-running match shouldn't invoke the callback
4772 status = U_ZERO_ERROR; 4780 status = U_ZERO_ERROR;
4773 cbInfo.reset(1); 4781 cbInfo.reset(1);
4774 UnicodeString s = "xxx"; 4782 UnicodeString s = "xxx";
4775 matcher.reset(s); 4783 matcher.reset(s);
4776 REGEX_ASSERT(matcher.matches(status)); 4784 REGEX_ASSERT(matcher.matches(status));
4777 REGEX_CHECK_STATUS; 4785 REGEX_CHECK_STATUS;
4778 REGEX_ASSERT(cbInfo.numCalls == 0); 4786 REGEX_ASSERT(cbInfo.numCalls == 0);
4779 4787
4780 // A medium-length match that runs long enough to invoke the 4788 // A medium-length match that runs long enough to invoke the
4781 // callback, but not so long that the callback aborts it. 4789 // callback, but not so long that the callback aborts it.
4782 status = U_ZERO_ERROR; 4790 status = U_ZERO_ERROR;
4783 cbInfo.reset(4); 4791 cbInfo.reset(4);
4784 s = "aaaaaaaaaaaaaaaaaaab"; 4792 s = "aaaaaaaaaaaaaaaaaaab";
4785 matcher.reset(s); 4793 matcher.reset(s);
4786 REGEX_ASSERT(matcher.matches(status)==FALSE); 4794 REGEX_ASSERT(matcher.matches(status)==FALSE);
4787 REGEX_CHECK_STATUS; 4795 REGEX_CHECK_STATUS;
4788 REGEX_ASSERT(cbInfo.numCalls > 0); 4796 REGEX_ASSERT(cbInfo.numCalls > 0);
4789 4797
4790 // A longer running match that the callback function will abort. 4798 // A longer running match that the callback function will abort.
4791 status = U_ZERO_ERROR; 4799 status = U_ZERO_ERROR;
4792 cbInfo.reset(4); 4800 cbInfo.reset(4);
4793 s = "aaaaaaaaaaaaaaaaaaaaaaab"; 4801 s = "aaaaaaaaaaaaaaaaaaaaaaab";
4794 matcher.reset(s); 4802 matcher.reset(s);
4795 REGEX_ASSERT(matcher.matches(status)==FALSE); 4803 REGEX_ASSERT(matcher.matches(status)==FALSE);
4796 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); 4804 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
4797 REGEX_ASSERT(cbInfo.numCalls == 4); 4805 REGEX_ASSERT(cbInfo.numCalls == 4);
4798 } 4806 }
4799 4807
4800 4808
4801 } 4809 }
4802 4810
4803 4811
4804 // 4812 //
4805 // FindProgressCallbacks() Test the find "progress" callback function. 4813 // FindProgressCallbacks() Test the find "progress" callback function.
4806 // When set, the find progress callback will be invoked during a find operations 4814 // When set, the find progress callback will be invoked during a find operations
4807 // after each return from a match attempt, giving the applicati on the opportunity 4815 // after each return from a match attempt, giving the applicati on the opportunity
4808 // to terminate a long-running find operation before it's norma l completion. 4816 // to terminate a long-running find operation before it's norma l completion.
4809 // 4817 //
4810 4818
4811 struct progressCallBackContext { 4819 struct progressCallBackContext {
4812 RegexTest *test; 4820 RegexTest *test;
4813 int64_t lastIndex; 4821 int64_t lastIndex;
4814 int32_t maxCalls; 4822 int32_t maxCalls;
4815 int32_t numCalls; 4823 int32_t numCalls;
4816 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; 4824 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;};
4817 }; 4825 };
4818 4826
4827 // call-back function for find().
4828 // Return TRUE to continue the find().
4829 // Return FALSE to stop the find().
4819 U_CDECL_BEGIN 4830 U_CDECL_BEGIN
4820 static UBool U_CALLCONV 4831 static UBool U_CALLCONV
4821 testProgressCallBackFn(const void *context, int64_t matchIndex) { 4832 testProgressCallBackFn(const void *context, int64_t matchIndex) {
4822 progressCallBackContext *info = (progressCallBackContext *)context; 4833 progressCallBackContext *info = (progressCallBackContext *)context;
4823 info->numCalls++; 4834 info->numCalls++;
4824 info->lastIndex = matchIndex; 4835 info->lastIndex = matchIndex;
4825 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls); 4836 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n", matchIndex, info->numCalls);
4826 return (info->numCalls < info->maxCalls); 4837 return (info->numCalls < info->maxCalls);
4827 } 4838 }
4828 U_CDECL_END 4839 U_CDECL_END
4829 4840
4830 void RegexTest::FindProgressCallbacks() { 4841 void RegexTest::FindProgressCallbacks() {
4831 { 4842 {
4832 // Getter returns NULLs if no callback has been set 4843 // Getter returns NULLs if no callback has been set
4833 4844
4834 // The variables that the getter will fill in. 4845 // The variables that the getter will fill in.
4835 // Init to non-null values so that the action of the getter can be see n. 4846 // Init to non-null values so that the action of the getter can be see n.
4836 const void *returnedContext = &returnedContext; 4847 const void *returnedContext = &returnedContext;
4837 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; 4848 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn;
4838 4849
4839 UErrorCode status = U_ZERO_ERROR; 4850 UErrorCode status = U_ZERO_ERROR;
4840 RegexMatcher matcher("x", 0, status); 4851 RegexMatcher matcher("x", 0, status);
4841 REGEX_CHECK_STATUS; 4852 REGEX_CHECK_STATUS;
4842 matcher.getFindProgressCallback(returnedFn, returnedContext, status); 4853 matcher.getFindProgressCallback(returnedFn, returnedContext, status);
4843 REGEX_CHECK_STATUS; 4854 REGEX_CHECK_STATUS;
4844 REGEX_ASSERT(returnedFn == NULL); 4855 REGEX_ASSERT(returnedFn == NULL);
4845 REGEX_ASSERT(returnedContext == NULL); 4856 REGEX_ASSERT(returnedContext == NULL);
4846 } 4857 }
4847 4858
4848 { 4859 {
4849 // Set and Get work 4860 // Set and Get work
4850 progressCallBackContext cbInfo = {this, 0, 0, 0}; 4861 progressCallBackContext cbInfo = {this, 0, 0, 0};
4851 const void *returnedContext; 4862 const void *returnedContext;
4852 URegexFindProgressCallback *returnedFn; 4863 URegexFindProgressCallback *returnedFn;
4853 UErrorCode status = U_ZERO_ERROR; 4864 UErrorCode status = U_ZERO_ERROR;
4854 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status); // A pattern that can run long. 4865 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status);
4855 REGEX_CHECK_STATUS; 4866 REGEX_CHECK_STATUS;
4856 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status) ; 4867 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status) ;
4857 REGEX_CHECK_STATUS; 4868 REGEX_CHECK_STATUS;
4858 matcher.getFindProgressCallback(returnedFn, returnedContext, status); 4869 matcher.getFindProgressCallback(returnedFn, returnedContext, status);
4859 REGEX_CHECK_STATUS; 4870 REGEX_CHECK_STATUS;
4860 REGEX_ASSERT(returnedFn == testProgressCallBackFn); 4871 REGEX_ASSERT(returnedFn == testProgressCallBackFn);
4861 REGEX_ASSERT(returnedContext == &cbInfo); 4872 REGEX_ASSERT(returnedContext == &cbInfo);
4862 4873
4863 // A short-running match should NOT invoke the callback. 4874 // A find that matches on the initial position does NOT invoke the callb ack.
4864 status = U_ZERO_ERROR; 4875 status = U_ZERO_ERROR;
4865 cbInfo.reset(100); 4876 cbInfo.reset(100);
4866 UnicodeString s = "abxxx"; 4877 UnicodeString s = "aaxxx";
4867 matcher.reset(s); 4878 matcher.reset(s);
4868 #if 0 4879 #if 0
4869 matcher.setTrace(TRUE); 4880 matcher.setTrace(TRUE);
4870 #endif 4881 #endif
4871 REGEX_ASSERT(matcher.find(0, status)); 4882 REGEX_ASSERT(matcher.find(0, status));
4872 REGEX_CHECK_STATUS; 4883 REGEX_CHECK_STATUS;
4873 REGEX_ASSERT(cbInfo.numCalls == 0); 4884 REGEX_ASSERT(cbInfo.numCalls == 0);
4874 4885
4875 // A medium running match that causes matcher.find() to invoke our callb ack for each index. 4886 // A medium running find() that causes matcher.find() to invoke our call back for each index,
4887 // but not so many times that we interrupt the operation.
4876 status = U_ZERO_ERROR; 4888 status = U_ZERO_ERROR;
4877 s = "aaaaaaaaaaaaaaaaaaab"; 4889 s = "aaaaaaaaaaaaaaaaaaab";
4878 cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string 4890 cbInfo.reset(s.length()); // Some upper limit for number of calls that is greater than size of our input string
4879 matcher.reset(s); 4891 matcher.reset(s);
4880 REGEX_ASSERT(matcher.find(0, status)==FALSE); 4892 REGEX_ASSERT(matcher.find(0, status)==FALSE);
4881 REGEX_CHECK_STATUS; 4893 REGEX_CHECK_STATUS;
4882 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); 4894 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25);
4883 4895
4884 // A longer running match that causes matcher.find() to invoke our callb ack which we cancel/interrupt at some point. 4896 // A longer running match that causes matcher.find() to invoke our callb ack which we cancel/interrupt at some point.
4885 status = U_ZERO_ERROR; 4897 status = U_ZERO_ERROR;
4886 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; 4898 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab";
4887 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string 4899 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of input string
4888 matcher.reset(s1); 4900 matcher.reset(s1);
4889 REGEX_ASSERT(matcher.find(0, status)==FALSE); 4901 REGEX_ASSERT(matcher.find(0, status)==FALSE);
4890 REGEX_CHECK_STATUS; 4902 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
4891 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); 4903 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5);
4892 4904
4893 #if 0
4894 // Now a match that will succeed, but after an interruption 4905 // Now a match that will succeed, but after an interruption
4895 status = U_ZERO_ERROR; 4906 status = U_ZERO_ERROR;
4896 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; 4907 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx";
4897 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string 4908 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of input string
4898 matcher.reset(s2); 4909 matcher.reset(s2);
4899 REGEX_ASSERT(matcher.find(0, status)==FALSE); 4910 REGEX_ASSERT(matcher.find(0, status)==FALSE);
4900 REGEX_CHECK_STATUS; 4911 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
4901 // Now retry the match from where left off 4912 // Now retry the match from where left off
4902 cbInfo.maxCalls = 100; // No callback limit 4913 cbInfo.maxCalls = 100; // No callback limit
4914 status = U_ZERO_ERROR;
4903 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); 4915 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status));
4904 REGEX_CHECK_STATUS; 4916 REGEX_CHECK_STATUS;
4905 #endif
4906 } 4917 }
4907 4918
4908 4919
4909 } 4920 }
4910 4921
4911 4922
4912 //--------------------------------------------------------------------------- 4923 //---------------------------------------------------------------------------
4913 // 4924 //
4914 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable 4925 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable
4915 // UTexts. The pure-C implementation of UText 4926 // UTexts. The pure-C implementation of UText
4916 // has no mutable backing stores, but we can 4927 // has no mutable backing stores, but we can
4917 // use UnicodeString here to test the functionality. 4928 // use UnicodeString here to test the functionality.
4918 // 4929 //
4919 //--------------------------------------------------------------------------- 4930 //---------------------------------------------------------------------------
4920 void RegexTest::PreAllocatedUTextCAPI () { 4931 void RegexTest::PreAllocatedUTextCAPI () {
4921 UErrorCode status = U_ZERO_ERROR; 4932 UErrorCode status = U_ZERO_ERROR;
4922 URegularExpression *re; 4933 URegularExpression *re;
4923 UText patternText = UTEXT_INITIALIZER; 4934 UText patternText = UTEXT_INITIALIZER;
4924 UnicodeString buffer; 4935 UnicodeString buffer;
4925 UText bufferText = UTEXT_INITIALIZER; 4936 UText bufferText = UTEXT_INITIALIZER;
4926 4937
4927 utext_openUnicodeString(&bufferText, &buffer, &status); 4938 utext_openUnicodeString(&bufferText, &buffer, &status);
4928 4939
4929 /* 4940 /*
4930 * getText() and getUText() 4941 * getText() and getUText()
4931 */ 4942 */
4932 { 4943 {
4933 UText text1 = UTEXT_INITIALIZER; 4944 UText text1 = UTEXT_INITIALIZER;
4934 UText text2 = UTEXT_INITIALIZER; 4945 UText text2 = UTEXT_INITIALIZER;
4935 UChar text2Chars[20]; 4946 UChar text2Chars[20];
4936 UText *resultText; 4947 UText *resultText;
4937 4948
4938 status = U_ZERO_ERROR; 4949 status = U_ZERO_ERROR;
4939 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); 4950 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status);
4940 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); 4951 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status);
4941 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); 4952 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2);
4942 utext_openUChars(&text2, text2Chars, -1, &status); 4953 utext_openUChars(&text2, text2Chars, -1, &status);
4943 4954
4944 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); 4955 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status);
4945 re = uregex_openUText(&patternText, 0, NULL, &status); 4956 re = uregex_openUText(&patternText, 0, NULL, &status);
4946 4957
4947 /* First set a UText */ 4958 /* First set a UText */
4948 uregex_setUText(re, &text1, &status); 4959 uregex_setUText(re, &text1, &status);
4949 resultText = uregex_getUText(re, &bufferText, &status); 4960 resultText = uregex_getUText(re, &bufferText, &status);
4950 REGEX_CHECK_STATUS; 4961 REGEX_CHECK_STATUS;
4951 REGEX_ASSERT(resultText == &bufferText); 4962 REGEX_ASSERT(resultText == &bufferText);
4952 utext_setNativeIndex(resultText, 0); 4963 utext_setNativeIndex(resultText, 0);
4953 utext_setNativeIndex(&text1, 0); 4964 utext_setNativeIndex(&text1, 0);
4954 REGEX_ASSERT(testUTextEqual(resultText, &text1)); 4965 REGEX_ASSERT(testUTextEqual(resultText, &text1));
4955 4966
4956 resultText = uregex_getUText(re, &bufferText, &status); 4967 resultText = uregex_getUText(re, &bufferText, &status);
4957 REGEX_CHECK_STATUS; 4968 REGEX_CHECK_STATUS;
4958 REGEX_ASSERT(resultText == &bufferText); 4969 REGEX_ASSERT(resultText == &bufferText);
4959 utext_setNativeIndex(resultText, 0); 4970 utext_setNativeIndex(resultText, 0);
4960 utext_setNativeIndex(&text1, 0); 4971 utext_setNativeIndex(&text1, 0);
4961 REGEX_ASSERT(testUTextEqual(resultText, &text1)); 4972 REGEX_ASSERT(testUTextEqual(resultText, &text1));
4962 4973
4963 /* Then set a UChar * */ 4974 /* Then set a UChar * */
4964 uregex_setText(re, text2Chars, 7, &status); 4975 uregex_setText(re, text2Chars, 7, &status);
4965 resultText = uregex_getUText(re, &bufferText, &status); 4976 resultText = uregex_getUText(re, &bufferText, &status);
4966 REGEX_CHECK_STATUS; 4977 REGEX_CHECK_STATUS;
4967 REGEX_ASSERT(resultText == &bufferText); 4978 REGEX_ASSERT(resultText == &bufferText);
4968 utext_setNativeIndex(resultText, 0); 4979 utext_setNativeIndex(resultText, 0);
4969 utext_setNativeIndex(&text2, 0); 4980 utext_setNativeIndex(&text2, 0);
4970 REGEX_ASSERT(testUTextEqual(resultText, &text2)); 4981 REGEX_ASSERT(testUTextEqual(resultText, &text2));
4971 4982
4972 uregex_close(re); 4983 uregex_close(re);
4973 utext_close(&text1); 4984 utext_close(&text1);
4974 utext_close(&text2); 4985 utext_close(&text2);
4975 } 4986 }
4976 4987
4977 /* 4988 /*
4978 * group() 4989 * group()
4979 */ 4990 */
4980 { 4991 {
4981 UChar text1[80]; 4992 UChar text1[80];
(...skipping 25 matching lines...) Expand all
5007 5018
5008 /* Capture group out of range. Error. */ 5019 /* Capture group out of range. Error. */
5009 status = U_ZERO_ERROR; 5020 status = U_ZERO_ERROR;
5010 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); 5021 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status);
5011 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); 5022 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
5012 REGEX_ASSERT(actual == &bufferText); 5023 REGEX_ASSERT(actual == &bufferText);
5013 5024
5014 uregex_close(re); 5025 uregex_close(re);
5015 5026
5016 } 5027 }
5017 5028
5018 /* 5029 /*
5019 * replaceFirst() 5030 * replaceFirst()
5020 */ 5031 */
5021 { 5032 {
5022 UChar text1[80]; 5033 UChar text1[80];
5023 UChar text2[80]; 5034 UChar text2[80];
5024 UText replText = UTEXT_INITIALIZER; 5035 UText replText = UTEXT_INITIALIZER;
5025 UText *result; 5036 UText *result;
5026 5037
5027 status = U_ZERO_ERROR; 5038 status = U_ZERO_ERROR;
5028 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); 5039 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
5029 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); 5040 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
5030 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); 5041 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status);
5031 5042
5032 re = uregex_openC("x(.*?)x", 0, NULL, &status); 5043 re = uregex_openC("x(.*?)x", 0, NULL, &status);
5033 REGEX_CHECK_STATUS; 5044 REGEX_CHECK_STATUS;
5034 5045
5035 /* Normal case, with match */ 5046 /* Normal case, with match */
5036 uregex_setText(re, text1, -1, &status); 5047 uregex_setText(re, text1, -1, &status);
5037 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 5048 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
5038 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 5049 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
5039 REGEX_CHECK_STATUS; 5050 REGEX_CHECK_STATUS;
5040 REGEX_ASSERT(result == &bufferText); 5051 REGEX_ASSERT(result == &bufferText);
5041 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); 5052 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result);
5042 5053
5043 /* No match. Text should copy to output with no changes. */ 5054 /* No match. Text should copy to output with no changes. */
5044 uregex_setText(re, text2, -1, &status); 5055 uregex_setText(re, text2, -1, &status);
5045 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 5056 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
5046 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 5057 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
5047 REGEX_CHECK_STATUS; 5058 REGEX_CHECK_STATUS;
5048 REGEX_ASSERT(result == &bufferText); 5059 REGEX_ASSERT(result == &bufferText);
5049 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); 5060 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result);
5050 5061
5051 /* Unicode escapes */ 5062 /* Unicode escapes */
5052 uregex_setText(re, text1, -1, &status); 5063 uregex_setText(re, text1, -1, &status);
5053 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a" , -1, &status); 5064 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a" , -1, &status);
5054 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status); 5065 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0, &status);
5055 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); 5066 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status);
5056 REGEX_CHECK_STATUS; 5067 REGEX_CHECK_STATUS;
5057 REGEX_ASSERT(result == &bufferText); 5068 REGEX_ASSERT(result == &bufferText);
5058 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); 5069 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result);
5059 5070
5060 uregex_close(re); 5071 uregex_close(re);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
5097 5108
5098 uregex_close(re); 5109 uregex_close(re);
5099 utext_close(&replText); 5110 utext_close(&replText);
5100 } 5111 }
5101 5112
5102 5113
5103 /* 5114 /*
5104 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts, 5115 * splitUText() uses the C++ API directly, and the UnicodeString version us es mutable UTexts,
5105 * so we don't need to test it here. 5116 * so we don't need to test it here.
5106 */ 5117 */
5107 5118
5108 utext_close(&bufferText); 5119 utext_close(&bufferText);
5109 utext_close(&patternText); 5120 utext_close(&patternText);
5110 } 5121 }
5111 5122
5112 //-------------------------------------------------------------- 5123 //--------------------------------------------------------------
5113 // 5124 //
5114 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher . 5125 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher .
5115 // 5126 //
5116 //--------------------------------------------------------------- 5127 //---------------------------------------------------------------
5117 void RegexTest::Bug7651() { 5128 void RegexTest::Bug7651() {
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
5172 { 5183 {
5173 UnicodeString str; 5184 UnicodeString str;
5174 str.setToBogus(); 5185 str.setToBogus();
5175 pMatcher->reset(str); 5186 pMatcher->reset(str);
5176 status = U_ZERO_ERROR; 5187 status = U_ZERO_ERROR;
5177 pMatcher->matches(status); 5188 pMatcher->matches(status);
5178 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); 5189 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
5179 delete pMatcher; 5190 delete pMatcher;
5180 } 5191 }
5181 } 5192 }
5182 5193
5183 5194
5184 // Bug 7029 5195 // Bug 7029
5185 void RegexTest::Bug7029() { 5196 void RegexTest::Bug7029() {
5186 UErrorCode status = U_ZERO_ERROR; 5197 UErrorCode status = U_ZERO_ERROR;
5187 5198
5188 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); 5199 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status);
5189 UnicodeString text = "abc.def"; 5200 UnicodeString text = "abc.def";
5190 UnicodeString splits[10]; 5201 UnicodeString splits[10];
5191 REGEX_CHECK_STATUS; 5202 REGEX_CHECK_STATUS;
5192 int32_t numFields = pMatcher->split(text, splits, 10, status); 5203 int32_t numFields = pMatcher->split(text, splits, 10, status);
5193 REGEX_CHECK_STATUS; 5204 REGEX_CHECK_STATUS;
5194 REGEX_ASSERT(numFields == 8); 5205 REGEX_ASSERT(numFields == 8);
5195 delete pMatcher; 5206 delete pMatcher;
5196 } 5207 }
5197 5208
5198 // Bug 9283 5209 // Bug 9283
5199 // This test is checking for the existance of any supplemental characters that case-fold 5210 // This test is checking for the existance of any supplemental characters that case-fold
5200 // to a bmp character. 5211 // to a bmp character.
5201 // 5212 //
5202 // At the time of this writing there are none. If any should appear in a subse quent release 5213 // At the time of this writing there are none. If any should appear in a subse quent release
5203 // of Unicode, the code in regular expressions compilation that determines the longest 5214 // of Unicode, the code in regular expressions compilation that determines the longest
5204 // posssible match for a literal string will need to be enhanced. 5215 // posssible match for a literal string will need to be enhanced.
5205 // 5216 //
5206 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() 5217 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength()
5207 // for details on what to do in case of a failure of this test. 5218 // for details on what to do in case of a failure of this test.
5208 // 5219 //
5209 void RegexTest::Bug9283() { 5220 void RegexTest::Bug9283() {
5221 #if !UCONFIG_NO_NORMALIZATION
5210 UErrorCode status = U_ZERO_ERROR; 5222 UErrorCode status = U_ZERO_ERROR;
5211 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF] ]", status); 5223 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF] ]", status);
5212 REGEX_CHECK_STATUS; 5224 REGEX_CHECK_STATUS;
5213 int32_t index; 5225 int32_t index;
5214 UChar32 c; 5226 UChar32 c;
5215 for (index=0; ; index++) { 5227 for (index=0; ; index++) {
5216 c = supplementalsWithCaseFolding.charAt(index); 5228 c = supplementalsWithCaseFolding.charAt(index);
5217 if (c == -1) { 5229 if (c == -1) {
5218 break; 5230 break;
5219 } 5231 }
5220 UnicodeString cf = UnicodeString(c).foldCase(); 5232 UnicodeString cf = UnicodeString(c).foldCase();
5221 REGEX_ASSERT(cf.length() >= 2); 5233 REGEX_ASSERT(cf.length() >= 2);
5222 } 5234 }
5235 #endif /* #if !UCONFIG_NO_NORMALIZATION */
5223 } 5236 }
5224 5237
5225 5238
5226 void RegexTest::CheckInvBufSize() { 5239 void RegexTest::CheckInvBufSize() {
5227 if(inv_next>=INV_BUFSIZ) { 5240 if(inv_next>=INV_BUFSIZ) {
5228 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least % d )\n", 5241 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least % d )\n",
5229 __FILE__, INV_BUFSIZ, inv_next); 5242 __FILE__, INV_BUFSIZ, inv_next);
5230 } else { 5243 } else {
5231 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); 5244 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next);
5232 } 5245 }
5233 } 5246 }
5234 5247
5235 void RegexTest::TestBug11371() { 5248
5249 void RegexTest::Bug10459() {
5236 UErrorCode status = U_ZERO_ERROR; 5250 UErrorCode status = U_ZERO_ERROR;
5237 UnicodeString patternString; 5251 UnicodeString patternString("(txt)");
5252 UnicodeString txtString("txt");
5238 5253
5239 for (int i=0; i<8000000; i++) { 5254 UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status);
5240 patternString.append(UnicodeString("()")); 5255 REGEX_CHECK_STATUS;
5256 UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status);
5257 REGEX_CHECK_STATUS;
5258
5259 URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status);
5260 REGEX_CHECK_STATUS;
5261
5262 uregex_setUText(icu_re, utext_txt, &status);
5263 REGEX_CHECK_STATUS;
5264
5265 // The bug was that calling uregex_group() before doing a matching operation
5266 // was causing a segfault. Only for Regular Expressions created from UText .
5267 // It should set an U_REGEX_INVALID_STATE.
5268
5269 UChar buf[100];
5270 int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status);
5271 REGEX_ASSERT(status == U_REGEX_INVALID_STATE);
5272 REGEX_ASSERT(len == 0);
5273
5274 uregex_close(icu_re);
5275 utext_close(utext_pat);
5276 utext_close(utext_txt);
5277 }
5278
5279 void RegexTest::TestCaseInsensitiveStarters() {
5280 // Test that the data used by RegexCompile::findCaseInsensitiveStarters() ha sn't
5281 // become stale because of new Unicode characters.
5282 // If it is stale, rerun the generation tool
5283 // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genre gexcasing
5284 // and replace the embedded data in i18n/regexcmp.cpp
5285
5286 for (UChar32 cp=0; cp<=0x10ffff; cp++) {
5287 if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) {
5288 continue;
5289 }
5290 UnicodeSet s(cp, cp);
5291 s.closeOver(USET_CASE_INSENSITIVE);
5292 UnicodeSetIterator setIter(s);
5293 while (setIter.next()) {
5294 if (!setIter.isString()) {
5295 continue;
5296 }
5297 const UnicodeString &str = setIter.getString();
5298 UChar32 firstChar = str.char32At(0);
5299 UnicodeSet starters;
5300 RegexCompile::findCaseInsensitiveStarters(firstChar, &starters);
5301 if (!starters.contains(cp)) {
5302 errln("CaseInsensitiveStarters for \\u%x is missing character \\ u%x.", cp, firstChar);
5303 return;
5304 }
5305 }
5241 } 5306 }
5307 }
5308
5309
5310 void RegexTest::TestBug11049() {
5311 // Original bug report: pattern with match start consisting of one of severa l individual characters,
5312 // and the text being matched ending with a supplementary character. find() would read past the
5313 // end of the input text when searching for potential match starting points .
5314
5315 // To see the problem, the text must exactly fill an allocated buffer, so th at valgrind will
5316 // detect the bad read.
5317
5318 TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__);
5319 TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__);
5320
5321 // Test again with a pattern starting with a single character,
5322 // which takes a different code path than starting with an OR expression,
5323 // but with similar logic.
5324 TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__);
5325 TestCase11049("C", "string matches at end C", TRUE, __LINE__);
5326 }
5327
5328 // Run a single test case from TestBug11049(). Internal function.
5329 void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec tMatch, int32_t lineNumber) {
5330 UErrorCode status = U_ZERO_ERROR;
5331 UnicodeString patternString = UnicodeString(pattern).unescape();
5242 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status)); 5332 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString, 0, status));
5243 if (status != U_REGEX_PATTERN_TOO_BIG) { 5333
5244 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ", 5334 UnicodeString dataString = UnicodeString(data).unescape();
5245 __FILE__, __LINE__, u_errorName(status)); 5335 UChar *exactBuffer = new UChar[dataString.length()];
5336 dataString.extract(exactBuffer, dataString.length(), status);
5337 UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status );
5338
5339 LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status));
5340 REGEX_CHECK_STATUS;
5341 matcher->reset(ut);
5342 UBool result = matcher->find();
5343 if (result != expectMatch) {
5344 errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \ "%s\"",
5345 __FILE__, lineNumber, expectMatch, result, pattern, data);
5246 } 5346 }
5247 5347
5248 status = U_ZERO_ERROR; 5348 // Rerun test with UTF-8 input text. Won't see buffer overreads, but could s ee
5249 patternString = "("; 5349 // off-by-one on find() with match at the last code point.
5250 for (int i=0; i<20000000; i++) { 5350 // Size of the original char * data (invariant charset) will be <= than th e equivalent UTF-8
5251 patternString.append(UnicodeString("A++")); 5351 // because string.unescape() will only shrink it.
5352 char * utf8Buffer = new char[uprv_strlen(data)+1];
5353 u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), d ataString.length(), &status);
5354 REGEX_CHECK_STATUS;
5355 ut = utext_openUTF8(ut, utf8Buffer, -1, &status);
5356 REGEX_CHECK_STATUS;
5357 matcher->reset(ut);
5358 result = matcher->find();
5359 if (result != expectMatch) {
5360 errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \" %s\", text = \"%s\"",
5361 __FILE__, lineNumber, expectMatch, result, pattern, data);
5252 } 5362 }
5253 patternString.append(UnicodeString("){0}B++")); 5363 delete [] utf8Buffer;
5254 LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString, 0, status));
5255 if (status != U_REGEX_PATTERN_TOO_BIG) {
5256 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",
5257 __FILE__, __LINE__, u_errorName(status));
5258 }
5259 5364
5260 // Pattern with too much string data, such that string indexes overflow oper and data. 5365 utext_close(ut);
5261 status = U_ZERO_ERROR; 5366 delete [] exactBuffer;
5262 patternString = ""; 5367 }
5263 while (patternString.length() < 0x00ffffff) {
5264 patternString.append(UnicodeString("stuff and things dont you know, thes e are a few of my favorite strings\n"));
5265 }
5266 patternString.append(UnicodeString("X? trailing string"));
5267 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString, 0, status));
5268 compiledPat3->dumpPattern();
5269 if (status != U_REGEX_PATTERN_TOO_BIG) {
5270 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s. ",
5271 __FILE__, __LINE__, u_errorName(status));
5272 }
5273 5368
5274 5369
5275 5370
5276 } 5371 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
5277 5372
5278 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
OLDNEW
« no previous file with comments | « source/test/intltest/regextst.h ('k') | source/test/intltest/regiontst.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698