| OLD | NEW |
| 1 /******************************************************************** | 1 /******************************************************************** |
| 2 * COPYRIGHT: | 2 * COPYRIGHT: |
| 3 * Copyright (c) 2002-2013, International Business Machines Corporation and | 3 * Copyright (c) 2002-2014, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ | 5 ********************************************************************/ |
| 6 | 6 |
| 7 // | 7 // |
| 8 // regextst.cpp | 8 // regextst.cpp |
| 9 // | 9 // |
| 10 // ICU Regular Expressions test, part of intltest. | 10 // ICU Regular Expressions test, part of intltest. |
| 11 // | 11 // |
| 12 | 12 |
| 13 /* | 13 /* |
| 14 NOTE!! | 14 NOTE!! |
| 15 | 15 |
| 16 PLEASE be careful about ASCII assumptions in this test. | 16 PLEASE be careful about ASCII assumptions in this test. |
| 17 This test is one of the worst repeat offenders. | 17 This test is one of the worst repeat offenders. |
| 18 If you have questions, contact someone on the ICU PMC | 18 If you have questions, contact someone on the ICU PMC |
| 19 who has access to an EBCDIC system. | 19 who has access to an EBCDIC system. |
| 20 | 20 |
| 21 */ | 21 */ |
| 22 | 22 |
| 23 #include "intltest.h" | 23 #include "intltest.h" |
| 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 25 | 25 |
| 26 #include "unicode/localpointer.h" |
| 26 #include "unicode/regex.h" | 27 #include "unicode/regex.h" |
| 27 #include "unicode/uchar.h" | 28 #include "unicode/uchar.h" |
| 28 #include "unicode/ucnv.h" | 29 #include "unicode/ucnv.h" |
| 29 #include "unicode/uniset.h" | 30 #include "unicode/uniset.h" |
| 31 #include "unicode/uregex.h" |
| 32 #include "unicode/usetiter.h" |
| 30 #include "unicode/ustring.h" | 33 #include "unicode/ustring.h" |
| 31 #include "regextst.h" | 34 #include "regextst.h" |
| 35 #include "regexcmp.h" |
| 32 #include "uvector.h" | 36 #include "uvector.h" |
| 33 #include "util.h" | 37 #include "util.h" |
| 34 #include <stdlib.h> | 38 #include <stdlib.h> |
| 35 #include <string.h> | 39 #include <string.h> |
| 36 #include <stdio.h> | 40 #include <stdio.h> |
| 37 #include "cstring.h" | 41 #include "cstring.h" |
| 38 #include "uinvchar.h" | 42 #include "uinvchar.h" |
| 39 | 43 |
| 40 #define SUPPORT_MUTATING_INPUT_STRING 0 | 44 #define SUPPORT_MUTATING_INPUT_STRING 0 |
| 41 | 45 |
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 124 break; | 128 break; |
| 125 case 19: name = "Bug 7029"; | 129 case 19: name = "Bug 7029"; |
| 126 if (exec) Bug7029(); | 130 if (exec) Bug7029(); |
| 127 break; | 131 break; |
| 128 case 20: name = "CheckInvBufSize"; | 132 case 20: name = "CheckInvBufSize"; |
| 129 if (exec) CheckInvBufSize(); | 133 if (exec) CheckInvBufSize(); |
| 130 break; | 134 break; |
| 131 case 21: name = "Bug 9283"; | 135 case 21: name = "Bug 9283"; |
| 132 if (exec) Bug9283(); | 136 if (exec) Bug9283(); |
| 133 break; | 137 break; |
| 134 case 22: name = "TestBug11371"; | 138 case 22: name = "Bug10459"; |
| 135 if (exec) TestBug11371(); | 139 if (exec) Bug10459(); |
| 136 break; | 140 break; |
| 137 | 141 case 23: name = "TestCaseInsensitiveStarters"; |
| 142 if (exec) TestCaseInsensitiveStarters(); |
| 143 break; |
| 144 case 24: name = "TestBug11049"; |
| 145 if (exec) TestBug11049(); |
| 146 break; |
| 138 default: name = ""; | 147 default: name = ""; |
| 139 break; //needed to end loop | 148 break; //needed to end loop |
| 140 } | 149 } |
| 141 } | 150 } |
| 142 | 151 |
| 143 | 152 |
| 144 | 153 |
| 145 /** | 154 /** |
| 146 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage | 155 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage |
| 147 * into ASCII. | 156 * into ASCII. |
| 148 * @see utext_openUTF8 | 157 * @see utext_openUTF8 |
| 149 */ | 158 */ |
| 150 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t
length, UErrorCode *status); | 159 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t
length, UErrorCode *status); |
| 151 | 160 |
| 152 //--------------------------------------------------------------------------- | 161 //--------------------------------------------------------------------------- |
| 153 // | 162 // |
| 154 // Error Checking / Reporting macros used in all of the tests. | 163 // Error Checking / Reporting macros used in all of the tests. |
| 155 // | 164 // |
| 156 //--------------------------------------------------------------------------- | 165 //--------------------------------------------------------------------------- |
| 157 | 166 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 203 UChar ch = buf[i]; | 212 UChar ch = buf[i]; |
| 204 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); | 213 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); |
| 205 } | 214 } |
| 206 } | 215 } |
| 207 } | 216 } |
| 208 } | 217 } |
| 209 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; | 218 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; |
| 210 return ASSERT_BUF; | 219 return ASSERT_BUF; |
| 211 } | 220 } |
| 212 | 221 |
| 213 | |
| 214 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)
/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text,
buf);} | 222 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)
/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text,
buf);} |
| 215 | 223 |
| 216 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest
failure. status=%s", \ | 224 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest
failure. status=%s", \ |
| 217 __FILE__, __LINE__
, u_errorName(status)); return;}} | 225 __FILE__, __LINE__
, u_errorName(status)); return;}} |
| 218 | 226 |
| 219 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure:
REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} | 227 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure:
REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} |
| 220 | 228 |
| 221 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ | 229 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ |
| 222 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ | 230 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ |
| 223 __LINE__, u_errorName(errcode), u_errorName(status));};} | 231 __LINE__, u_errorName(errcode), u_errorName(status));};} |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 289 char buf[201 /*21*/]; | 297 char buf[201 /*21*/]; |
| 290 char expectedBuf[201]; | 298 char expectedBuf[201]; |
| 291 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); | 299 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); |
| 292 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0])
, &expectedText); | 300 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0])
, &expectedText); |
| 293 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars)
, got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe
ctedText), buf, (int)utext_nativeLength(actual)); | 301 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars)
, got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe
ctedText), buf, (int)utext_nativeLength(actual)); |
| 294 } | 302 } |
| 295 utext_close(&expectedText); | 303 utext_close(&expectedText); |
| 296 } | 304 } |
| 297 | 305 |
| 298 /** | 306 /** |
| 299 * Assumes utf-8 input | 307 * Assumes utf-8 input |
| 300 */ | 308 */ |
| 301 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua
l), __FILE__, __LINE__) | 309 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua
l), __FILE__, __LINE__) |
| 302 /** | 310 /** |
| 303 * Assumes Invariant input | 311 * Assumes Invariant input |
| 304 */ | 312 */ |
| 305 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp
ected), (actual), __FILE__, __LINE__) | 313 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp
ected), (actual), __FILE__, __LINE__) |
| 306 | 314 |
| 307 /** | 315 /** |
| 308 * This buffer ( inv_buf ) is used to hold the UTF-8 strings | 316 * This buffer ( inv_buf ) is used to hold the UTF-8 strings |
| 309 * passed into utext_openUTF8. An error will be given if | 317 * passed into utext_openUTF8. An error will be given if |
| 310 * INV_BUFSIZ is too small. It's only used on EBCDIC systems. | 318 * INV_BUFSIZ is too small. It's only used on EBCDIC systems. |
| 311 */ | 319 */ |
| 312 | 320 |
| 313 #define INV_BUFSIZ 2048 /* increase this if too small */ | 321 #define INV_BUFSIZ 2048 /* increase this if too small */ |
| 314 | 322 |
| 315 static int64_t inv_next=0; | 323 static int64_t inv_next=0; |
| 316 | 324 |
| 317 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY | 325 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY |
| 318 static char inv_buf[INV_BUFSIZ]; | 326 static char inv_buf[INV_BUFSIZ]; |
| 319 #endif | 327 #endif |
| 320 | 328 |
| 321 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t
length, UErrorCode *status) { | 329 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t
length, UErrorCode *status) { |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 369 RegexMatcher *REMatcher = NULL; | 377 RegexMatcher *REMatcher = NULL; |
| 370 UBool retVal = TRUE; | 378 UBool retVal = TRUE; |
| 371 | 379 |
| 372 UnicodeString patString(pat, -1, US_INV); | 380 UnicodeString patString(pat, -1, US_INV); |
| 373 REPattern = RegexPattern::compile(patString, 0, pe, status); | 381 REPattern = RegexPattern::compile(patString, 0, pe, status); |
| 374 if (U_FAILURE(status)) { | 382 if (U_FAILURE(status)) { |
| 375 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta
tus = %s", | 383 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta
tus = %s", |
| 376 line, u_errorName(status)); | 384 line, u_errorName(status)); |
| 377 return FALSE; | 385 return FALSE; |
| 378 } | 386 } |
| 379 if (line==376) { RegexPatternDump(REPattern);} | 387 if (line==376) { REPattern->dumpPattern();} |
| 380 | 388 |
| 381 UnicodeString inputString(inputText); | 389 UnicodeString inputString(inputText); |
| 382 UnicodeString unEscapedInput = inputString.unescape(); | 390 UnicodeString unEscapedInput = inputString.unescape(); |
| 383 REMatcher = REPattern->matcher(unEscapedInput, status); | 391 REMatcher = REPattern->matcher(unEscapedInput, status); |
| 384 if (U_FAILURE(status)) { | 392 if (U_FAILURE(status)) { |
| 385 errln("RegexTest failure in REPattern::matcher() at line %d. Status = %
s\n", | 393 errln("RegexTest failure in REPattern::matcher() at line %d. Status = %
s\n", |
| 386 line, u_errorName(status)); | 394 line, u_errorName(status)); |
| 387 return FALSE; | 395 return FALSE; |
| 388 } | 396 } |
| 389 | 397 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 405 errln("RegexTest failure in matches() at line %d. Status = %s\n", | 413 errln("RegexTest failure in matches() at line %d. Status = %s\n", |
| 406 line, u_errorName(status)); | 414 line, u_errorName(status)); |
| 407 retVal = FALSE; | 415 retVal = FALSE; |
| 408 } | 416 } |
| 409 if (actualmatch != match) { | 417 if (actualmatch != match) { |
| 410 errln("RegexTest: wrong return from matches() at line %d.\n", line); | 418 errln("RegexTest: wrong return from matches() at line %d.\n", line); |
| 411 retVal = FALSE; | 419 retVal = FALSE; |
| 412 } | 420 } |
| 413 | 421 |
| 414 if (retVal == FALSE) { | 422 if (retVal == FALSE) { |
| 415 RegexPatternDump(REPattern); | 423 REPattern->dumpPattern(); |
| 416 } | 424 } |
| 417 | 425 |
| 418 delete REPattern; | 426 delete REPattern; |
| 419 delete REMatcher; | 427 delete REMatcher; |
| 420 return retVal; | 428 return retVal; |
| 421 } | 429 } |
| 422 | 430 |
| 423 | 431 |
| 424 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
ing, UBool match, int32_t line) { | 432 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
ing, UBool match, int32_t line) { |
| 425 UText pattern = UTEXT_INITIALIZER; | 433 UText pattern = UTEXT_INITIALIZER; |
| 426 int32_t inputUTF8Length; | 434 int32_t inputUTF8Length; |
| 427 char *textChars = NULL; | 435 char *textChars = NULL; |
| 428 UText inputText = UTEXT_INITIALIZER; | 436 UText inputText = UTEXT_INITIALIZER; |
| 429 UErrorCode status = U_ZERO_ERROR; | 437 UErrorCode status = U_ZERO_ERROR; |
| 430 UParseError pe; | 438 UParseError pe; |
| 431 RegexPattern *REPattern = NULL; | 439 RegexPattern *REPattern = NULL; |
| 432 RegexMatcher *REMatcher = NULL; | 440 RegexMatcher *REMatcher = NULL; |
| 433 UBool retVal = TRUE; | 441 UBool retVal = TRUE; |
| 434 | 442 |
| 435 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); | 443 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); |
| 436 REPattern = RegexPattern::compile(&pattern, 0, pe, status); | 444 REPattern = RegexPattern::compile(&pattern, 0, pe, status); |
| 437 if (U_FAILURE(status)) { | 445 if (U_FAILURE(status)) { |
| 438 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8
). Status = %s\n", | 446 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8
). Status = %s\n", |
| 439 line, u_errorName(status)); | 447 line, u_errorName(status)); |
| 440 return FALSE; | 448 return FALSE; |
| 441 } | 449 } |
| 442 | 450 |
| 443 UnicodeString inputString(text, -1, US_INV); | 451 UnicodeString inputString(text, -1, US_INV); |
| 444 UnicodeString unEscapedInput = inputString.unescape(); | 452 UnicodeString unEscapedInput = inputString.unescape(); |
| 445 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); | 453 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); |
| 446 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N
ULL, NULL, NULL, &status); | 454 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N
ULL, NULL, NULL, &status); |
| 447 | 455 |
| 448 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(),
status); | 456 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(),
status); |
| 449 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { | 457 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { |
| 450 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en | 458 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en |
| 451 logln("RegexTest unable to convert input to UTF8 at line %d. Status = %
s\n", line, u_errorName(status)); | 459 logln("RegexTest unable to convert input to UTF8 at line %d. Status = %
s\n", line, u_errorName(status)); |
| 452 return TRUE; // not a failure of the Regex engine | 460 return TRUE; // not a failure of the Regex engine |
| 453 } | 461 } |
| 454 status = U_ZERO_ERROR; // buffer overflow | 462 status = U_ZERO_ERROR; // buffer overflow |
| 455 textChars = new char[inputUTF8Length+1]; | 463 textChars = new char[inputUTF8Length+1]; |
| 456 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(
), status); | 464 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(
), status); |
| 457 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); | 465 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); |
| 458 | 466 |
| 459 REMatcher = &REPattern->matcher(status)->reset(&inputText); | 467 REMatcher = &REPattern->matcher(status)->reset(&inputText); |
| 460 if (U_FAILURE(status)) { | 468 if (U_FAILURE(status)) { |
| 461 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta
tus = %s\n", | 469 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta
tus = %s\n", |
| 462 line, u_errorName(status)); | 470 line, u_errorName(status)); |
| 463 return FALSE; | 471 return FALSE; |
| 464 } | 472 } |
| 465 | 473 |
| 466 UBool actualmatch; | 474 UBool actualmatch; |
| 467 actualmatch = REMatcher->lookingAt(status); | 475 actualmatch = REMatcher->lookingAt(status); |
| 468 if (U_FAILURE(status)) { | 476 if (U_FAILURE(status)) { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 481 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n"
, | 489 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n"
, |
| 482 line, u_errorName(status)); | 490 line, u_errorName(status)); |
| 483 retVal = FALSE; | 491 retVal = FALSE; |
| 484 } | 492 } |
| 485 if (actualmatch != match) { | 493 if (actualmatch != match) { |
| 486 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin
e); | 494 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin
e); |
| 487 retVal = FALSE; | 495 retVal = FALSE; |
| 488 } | 496 } |
| 489 | 497 |
| 490 if (retVal == FALSE) { | 498 if (retVal == FALSE) { |
| 491 RegexPatternDump(REPattern); | 499 REPattern->dumpPattern(); |
| 492 } | 500 } |
| 493 | 501 |
| 494 delete REPattern; | 502 delete REPattern; |
| 495 delete REMatcher; | 503 delete REMatcher; |
| 496 utext_close(&inputText); | 504 utext_close(&inputText); |
| 497 utext_close(&pattern); | 505 utext_close(&pattern); |
| 498 delete[] textChars; | 506 delete[] textChars; |
| 499 return retVal; | 507 return retVal; |
| 500 } | 508 } |
| 501 | 509 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 547 if (status != expectedStatus) { | 555 if (status != expectedStatus) { |
| 548 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err
orName(status)); | 556 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err
orName(status)); |
| 549 } else { | 557 } else { |
| 550 if (status != U_ZERO_ERROR) { | 558 if (status != U_ZERO_ERROR) { |
| 551 if (pe.line != errLine || pe.offset != errCol) { | 559 if (pe.line != errLine || pe.offset != errCol) { |
| 552 errln("Line %d: incorrect line/offset from UParseError. Expecte
d %d/%d; got %d/%d.\n", | 560 errln("Line %d: incorrect line/offset from UParseError. Expecte
d %d/%d; got %d/%d.\n", |
| 553 line, errLine, errCol, pe.line, pe.offset); | 561 line, errLine, errCol, pe.line, pe.offset); |
| 554 } | 562 } |
| 555 } | 563 } |
| 556 } | 564 } |
| 557 | 565 |
| 558 delete callerPattern; | 566 delete callerPattern; |
| 559 utext_close(&patternText); | 567 utext_close(&patternText); |
| 560 } | 568 } |
| 561 | 569 |
| 562 | 570 |
| 563 | 571 |
| 564 //--------------------------------------------------------------------------- | 572 //--------------------------------------------------------------------------- |
| 565 // | 573 // |
| 566 // Basic Check for basic functionality of regex pattern matching. | 574 // Basic Check for basic functionality of regex pattern matching. |
| 567 // Avoid the use of REGEX_FIND test macro, which has | 575 // Avoid the use of REGEX_FIND test macro, which has |
| 568 // substantial dependencies on basic Regex functionality. | 576 // substantial dependencies on basic Regex functionality. |
| 569 // | 577 // |
| 570 //--------------------------------------------------------------------------- | 578 //--------------------------------------------------------------------------- |
| 571 void RegexTest::Basic() { | 579 void RegexTest::Basic() { |
| 572 | 580 |
| 573 | 581 |
| 574 // | 582 // |
| 575 // Debug - slide failing test cases early | 583 // Debug - slide failing test cases early |
| 576 // | 584 // |
| 577 #if 0 | 585 #if 0 |
| 578 { | 586 { |
| 579 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); | 587 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); |
| 580 UParseError pe; | 588 UParseError pe; |
| 581 UErrorCode status = U_ZERO_ERROR; | 589 UErrorCode status = U_ZERO_ERROR; |
| 582 RegexPattern *pattern; | 590 RegexPattern *pattern; |
| 583 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc
ape(), UREGEX_CASE_INSENSITIVE, pe, status); | 591 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc
ape(), UREGEX_CASE_INSENSITIVE, pe, status); |
| 584 RegexPatternDump(pattern); | 592 pattern->dumpPattern(); |
| 585 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz")
.unescape(), status); | 593 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz")
.unescape(), status); |
| 586 UBool result = m->find(); | 594 UBool result = m->find(); |
| 587 printf("result = %d\n", result); | 595 printf("result = %d\n", result); |
| 588 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); | 596 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); |
| 589 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); | 597 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); |
| 590 } | 598 } |
| 591 exit(1); | 599 exit(1); |
| 592 #endif | 600 #endif |
| 593 | 601 |
| 594 | 602 |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 722 // implementation. | 730 // implementation. |
| 723 // | 731 // |
| 724 //--------------------------------------------------------------------------- | 732 //--------------------------------------------------------------------------- |
| 725 void RegexTest::UTextBasic() { | 733 void RegexTest::UTextBasic() { |
| 726 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ | 734 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ |
| 727 UErrorCode status = U_ZERO_ERROR; | 735 UErrorCode status = U_ZERO_ERROR; |
| 728 UText pattern = UTEXT_INITIALIZER; | 736 UText pattern = UTEXT_INITIALIZER; |
| 729 utext_openUTF8(&pattern, str_abc, -1, &status); | 737 utext_openUTF8(&pattern, str_abc, -1, &status); |
| 730 RegexMatcher matcher(&pattern, 0, status); | 738 RegexMatcher matcher(&pattern, 0, status); |
| 731 REGEX_CHECK_STATUS; | 739 REGEX_CHECK_STATUS; |
| 732 | 740 |
| 733 UText input = UTEXT_INITIALIZER; | 741 UText input = UTEXT_INITIALIZER; |
| 734 utext_openUTF8(&input, str_abc, -1, &status); | 742 utext_openUTF8(&input, str_abc, -1, &status); |
| 735 REGEX_CHECK_STATUS; | 743 REGEX_CHECK_STATUS; |
| 736 matcher.reset(&input); | 744 matcher.reset(&input); |
| 737 REGEX_CHECK_STATUS; | 745 REGEX_CHECK_STATUS; |
| 738 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); | 746 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); |
| 739 | 747 |
| 740 matcher.reset(matcher.inputText()); | 748 matcher.reset(matcher.inputText()); |
| 741 REGEX_CHECK_STATUS; | 749 REGEX_CHECK_STATUS; |
| 742 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); | 750 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); |
| 743 | 751 |
| 744 utext_close(&pattern); | 752 utext_close(&pattern); |
| 745 utext_close(&input); | 753 utext_close(&input); |
| 746 } | 754 } |
| 747 | 755 |
| 748 | 756 |
| 749 //--------------------------------------------------------------------------- | 757 //--------------------------------------------------------------------------- |
| 750 // | 758 // |
| 751 // API_Match Test that the API for class RegexMatcher | 759 // API_Match Test that the API for class RegexMatcher |
| 752 // is present and nominally working, but excluding functions | 760 // is present and nominally working, but excluding functions |
| 753 // implementing replace operations. | 761 // implementing replace operations. |
| (...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1110 UErrorCode status = U_ZERO_ERROR; | 1118 UErrorCode status = U_ZERO_ERROR; |
| 1111 RegexPattern *p = RegexPattern::compile(".", 0, status); | 1119 RegexPattern *p = RegexPattern::compile(".", 0, status); |
| 1112 RegexMatcher *m = p->matcher(status); | 1120 RegexMatcher *m = p->matcher(status); |
| 1113 REGEX_CHECK_STATUS; | 1121 REGEX_CHECK_STATUS; |
| 1114 | 1122 |
| 1115 REGEX_ASSERT(m->find() == FALSE); | 1123 REGEX_ASSERT(m->find() == FALSE); |
| 1116 REGEX_ASSERT(m->input() == ""); | 1124 REGEX_ASSERT(m->input() == ""); |
| 1117 delete m; | 1125 delete m; |
| 1118 delete p; | 1126 delete p; |
| 1119 } | 1127 } |
| 1120 | 1128 |
| 1121 // | 1129 // |
| 1122 // Regions | 1130 // Regions |
| 1123 // | 1131 // |
| 1124 { | 1132 { |
| 1125 UErrorCode status = U_ZERO_ERROR; | 1133 UErrorCode status = U_ZERO_ERROR; |
| 1126 UnicodeString testString("This is test data"); | 1134 UnicodeString testString("This is test data"); |
| 1127 RegexMatcher m(".*", testString, 0, status); | 1135 RegexMatcher m(".*", testString, 0, status); |
| 1128 REGEX_CHECK_STATUS; | 1136 REGEX_CHECK_STATUS; |
| 1129 REGEX_ASSERT(m.regionStart() == 0); | 1137 REGEX_ASSERT(m.regionStart() == 0); |
| 1130 REGEX_ASSERT(m.regionEnd() == testString.length()); | 1138 REGEX_ASSERT(m.regionEnd() == testString.length()); |
| 1131 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1139 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 1132 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1140 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 1133 | 1141 |
| 1134 m.region(2,4, status); | 1142 m.region(2,4, status); |
| 1135 REGEX_CHECK_STATUS; | 1143 REGEX_CHECK_STATUS; |
| 1136 REGEX_ASSERT(m.matches(status)); | 1144 REGEX_ASSERT(m.matches(status)); |
| 1137 REGEX_ASSERT(m.start(status)==2); | 1145 REGEX_ASSERT(m.start(status)==2); |
| 1138 REGEX_ASSERT(m.end(status)==4); | 1146 REGEX_ASSERT(m.end(status)==4); |
| 1139 REGEX_CHECK_STATUS; | 1147 REGEX_CHECK_STATUS; |
| 1140 | 1148 |
| 1141 m.reset(); | 1149 m.reset(); |
| 1142 REGEX_ASSERT(m.regionStart() == 0); | 1150 REGEX_ASSERT(m.regionStart() == 0); |
| 1143 REGEX_ASSERT(m.regionEnd() == testString.length()); | 1151 REGEX_ASSERT(m.regionEnd() == testString.length()); |
| 1144 | 1152 |
| 1145 UnicodeString shorterString("short"); | 1153 UnicodeString shorterString("short"); |
| 1146 m.reset(shorterString); | 1154 m.reset(shorterString); |
| 1147 REGEX_ASSERT(m.regionStart() == 0); | 1155 REGEX_ASSERT(m.regionStart() == 0); |
| 1148 REGEX_ASSERT(m.regionEnd() == shorterString.length()); | 1156 REGEX_ASSERT(m.regionEnd() == shorterString.length()); |
| 1149 | 1157 |
| 1150 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1158 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 1151 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); | 1159 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); |
| 1152 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 1160 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
| 1153 REGEX_ASSERT(&m == &m.reset()); | 1161 REGEX_ASSERT(&m == &m.reset()); |
| 1154 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 1162 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
| 1155 | 1163 |
| 1156 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); | 1164 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); |
| 1157 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1165 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 1158 REGEX_ASSERT(&m == &m.reset()); | 1166 REGEX_ASSERT(&m == &m.reset()); |
| 1159 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1167 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 1160 | 1168 |
| 1161 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1169 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 1162 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); | 1170 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); |
| 1163 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 1171 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
| 1164 REGEX_ASSERT(&m == &m.reset()); | 1172 REGEX_ASSERT(&m == &m.reset()); |
| 1165 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 1173 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
| 1166 | 1174 |
| 1167 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); | 1175 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); |
| 1168 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1176 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 1169 REGEX_ASSERT(&m == &m.reset()); | 1177 REGEX_ASSERT(&m == &m.reset()); |
| 1170 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1178 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 1171 | 1179 |
| 1172 } | 1180 } |
| 1173 | 1181 |
| 1174 // | 1182 // |
| 1175 // hitEnd() and requireEnd() | 1183 // hitEnd() and requireEnd() |
| 1176 // | 1184 // |
| 1177 { | 1185 { |
| 1178 UErrorCode status = U_ZERO_ERROR; | 1186 UErrorCode status = U_ZERO_ERROR; |
| 1179 UnicodeString testString("aabb"); | 1187 UnicodeString testString("aabb"); |
| 1180 RegexMatcher m1(".*", testString, 0, status); | 1188 RegexMatcher m1(".*", testString, 0, status); |
| 1181 REGEX_ASSERT(m1.lookingAt(status) == TRUE); | 1189 REGEX_ASSERT(m1.lookingAt(status) == TRUE); |
| 1182 REGEX_ASSERT(m1.hitEnd() == TRUE); | 1190 REGEX_ASSERT(m1.hitEnd() == TRUE); |
| 1183 REGEX_ASSERT(m1.requireEnd() == FALSE); | 1191 REGEX_ASSERT(m1.requireEnd() == FALSE); |
| 1184 REGEX_CHECK_STATUS; | 1192 REGEX_CHECK_STATUS; |
| 1185 | 1193 |
| 1186 status = U_ZERO_ERROR; | 1194 status = U_ZERO_ERROR; |
| 1187 RegexMatcher m2("a*", testString, 0, status); | 1195 RegexMatcher m2("a*", testString, 0, status); |
| 1188 REGEX_ASSERT(m2.lookingAt(status) == TRUE); | 1196 REGEX_ASSERT(m2.lookingAt(status) == TRUE); |
| 1189 REGEX_ASSERT(m2.hitEnd() == FALSE); | 1197 REGEX_ASSERT(m2.hitEnd() == FALSE); |
| 1190 REGEX_ASSERT(m2.requireEnd() == FALSE); | 1198 REGEX_ASSERT(m2.requireEnd() == FALSE); |
| 1191 REGEX_CHECK_STATUS; | 1199 REGEX_CHECK_STATUS; |
| 1192 | 1200 |
| 1193 status = U_ZERO_ERROR; | 1201 status = U_ZERO_ERROR; |
| 1194 RegexMatcher m3(".*$", testString, 0, status); | 1202 RegexMatcher m3(".*$", testString, 0, status); |
| 1195 REGEX_ASSERT(m3.lookingAt(status) == TRUE); | 1203 REGEX_ASSERT(m3.lookingAt(status) == TRUE); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 1213 m.reset(ucharString); // should not compile. | 1221 m.reset(ucharString); // should not compile. |
| 1214 | 1222 |
| 1215 RegexPattern *p = RegexPattern::compile(".", 0, status); | 1223 RegexPattern *p = RegexPattern::compile(".", 0, status); |
| 1216 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co
mpile. | 1224 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co
mpile. |
| 1217 | 1225 |
| 1218 RegexMatcher m3(".", ucharString, 0, status); // Should not compile | 1226 RegexMatcher m3(".", ucharString, 0, status); // Should not compile |
| 1219 } | 1227 } |
| 1220 #endif | 1228 #endif |
| 1221 | 1229 |
| 1222 // | 1230 // |
| 1223 // Time Outs. | 1231 // Time Outs. |
| 1224 // Note: These tests will need to be changed when the regexp engine i
s | 1232 // Note: These tests will need to be changed when the regexp engine i
s |
| 1225 // able to detect and cut short the exponential time behavior o
n | 1233 // able to detect and cut short the exponential time behavior o
n |
| 1226 // this type of match. | 1234 // this type of match. |
| 1227 // | 1235 // |
| 1228 { | 1236 { |
| 1229 UErrorCode status = U_ZERO_ERROR; | 1237 UErrorCode status = U_ZERO_ERROR; |
| 1230 // Enough 'a's in the string to cause the match to time out. | 1238 // Enough 'a's in the string to cause the match to time out. |
| 1231 // (Each on additonal 'a' doubles the time) | 1239 // (Each on additonal 'a' doubles the time) |
| 1232 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); | 1240 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); |
| 1233 RegexMatcher matcher("(a+)+b", testString, 0, status); | 1241 RegexMatcher matcher("(a+)+b", testString, 0, status); |
| 1234 REGEX_CHECK_STATUS; | 1242 REGEX_CHECK_STATUS; |
| 1235 REGEX_ASSERT(matcher.getTimeLimit() == 0); | 1243 REGEX_ASSERT(matcher.getTimeLimit() == 0); |
| 1236 matcher.setTimeLimit(100, status); | 1244 matcher.setTimeLimit(100, status); |
| 1237 REGEX_ASSERT(matcher.getTimeLimit() == 100); | 1245 REGEX_ASSERT(matcher.getTimeLimit() == 100); |
| 1238 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1246 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
| 1239 REGEX_ASSERT(status == U_REGEX_TIME_OUT); | 1247 REGEX_ASSERT(status == U_REGEX_TIME_OUT); |
| 1240 } | 1248 } |
| 1241 { | 1249 { |
| 1242 UErrorCode status = U_ZERO_ERROR; | 1250 UErrorCode status = U_ZERO_ERROR; |
| 1243 // Few enough 'a's to slip in under the time limit. | 1251 // Few enough 'a's to slip in under the time limit. |
| 1244 UnicodeString testString("aaaaaaaaaaaaaaaaaa"); | 1252 UnicodeString testString("aaaaaaaaaaaaaaaaaa"); |
| 1245 RegexMatcher matcher("(a+)+b", testString, 0, status); | 1253 RegexMatcher matcher("(a+)+b", testString, 0, status); |
| 1246 REGEX_CHECK_STATUS; | 1254 REGEX_CHECK_STATUS; |
| 1247 matcher.setTimeLimit(100, status); | 1255 matcher.setTimeLimit(100, status); |
| 1248 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1256 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
| 1249 REGEX_CHECK_STATUS; | 1257 REGEX_CHECK_STATUS; |
| 1250 } | 1258 } |
| 1251 | 1259 |
| 1252 // | 1260 // |
| 1253 // Stack Limits | 1261 // Stack Limits |
| 1254 // | 1262 // |
| 1255 { | 1263 { |
| 1256 UErrorCode status = U_ZERO_ERROR; | 1264 UErrorCode status = U_ZERO_ERROR; |
| 1257 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000,
filled with 'A' | 1265 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000,
filled with 'A' |
| 1258 | 1266 |
| 1259 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt
imizations | 1267 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt
imizations |
| 1260 // of the '+', and makes the stack frames larger. | 1268 // of the '+', and makes the stack frames larger. |
| 1261 RegexMatcher matcher("(A)+A$", testString, 0, status); | 1269 RegexMatcher matcher("(A)+A$", testString, 0, status); |
| 1262 | 1270 |
| 1263 // With the default stack, this match should fail to run | 1271 // With the default stack, this match should fail to run |
| 1264 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1272 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
| 1265 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); | 1273 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); |
| 1266 | 1274 |
| 1267 // With unlimited stack, it should run | 1275 // With unlimited stack, it should run |
| 1268 status = U_ZERO_ERROR; | 1276 status = U_ZERO_ERROR; |
| 1269 matcher.setStackLimit(0, status); | 1277 matcher.setStackLimit(0, status); |
| 1270 REGEX_CHECK_STATUS; | 1278 REGEX_CHECK_STATUS; |
| 1271 REGEX_ASSERT(matcher.lookingAt(status) == TRUE); | 1279 REGEX_ASSERT(matcher.lookingAt(status) == TRUE); |
| 1272 REGEX_CHECK_STATUS; | 1280 REGEX_CHECK_STATUS; |
| 1273 REGEX_ASSERT(matcher.getStackLimit() == 0); | 1281 REGEX_ASSERT(matcher.getStackLimit() == 0); |
| 1274 | 1282 |
| 1275 // With a limited stack, it the match should fail | 1283 // With a limited stack, it the match should fail |
| 1276 status = U_ZERO_ERROR; | 1284 status = U_ZERO_ERROR; |
| 1277 matcher.setStackLimit(10000, status); | 1285 matcher.setStackLimit(10000, status); |
| 1278 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1286 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
| 1279 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); | 1287 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); |
| 1280 REGEX_ASSERT(matcher.getStackLimit() == 10000); | 1288 REGEX_ASSERT(matcher.getStackLimit() == 10000); |
| 1281 } | 1289 } |
| 1282 | 1290 |
| 1283 // A pattern that doesn't save state should work with | 1291 // A pattern that doesn't save state should work with |
| 1284 // a minimal sized stack | 1292 // a minimal sized stack |
| 1285 { | 1293 { |
| 1286 UErrorCode status = U_ZERO_ERROR; | 1294 UErrorCode status = U_ZERO_ERROR; |
| 1287 UnicodeString testString = "abc"; | 1295 UnicodeString testString = "abc"; |
| 1288 RegexMatcher matcher("abc", testString, 0, status); | 1296 RegexMatcher matcher("abc", testString, 0, status); |
| 1289 REGEX_CHECK_STATUS; | 1297 REGEX_CHECK_STATUS; |
| 1290 matcher.setStackLimit(30, status); | 1298 matcher.setStackLimit(30, status); |
| 1291 REGEX_CHECK_STATUS; | 1299 REGEX_CHECK_STATUS; |
| 1292 REGEX_ASSERT(matcher.matches(status) == TRUE); | 1300 REGEX_ASSERT(matcher.matches(status) == TRUE); |
| 1293 REGEX_CHECK_STATUS; | 1301 REGEX_CHECK_STATUS; |
| 1294 REGEX_ASSERT(matcher.getStackLimit() == 30); | 1302 REGEX_ASSERT(matcher.getStackLimit() == 30); |
| 1295 | 1303 |
| 1296 // Negative stack sizes should fail | 1304 // Negative stack sizes should fail |
| 1297 status = U_ZERO_ERROR; | 1305 status = U_ZERO_ERROR; |
| 1298 matcher.setStackLimit(1000, status); | 1306 matcher.setStackLimit(1000, status); |
| 1299 REGEX_CHECK_STATUS; | 1307 REGEX_CHECK_STATUS; |
| 1300 matcher.setStackLimit(-1, status); | 1308 matcher.setStackLimit(-1, status); |
| 1301 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | 1309 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
| 1302 REGEX_ASSERT(matcher.getStackLimit() == 1000); | 1310 REGEX_ASSERT(matcher.getStackLimit() == 1000); |
| 1303 } | 1311 } |
| 1304 | 1312 |
| 1305 | 1313 |
| 1306 } | 1314 } |
| 1307 | 1315 |
| 1308 | 1316 |
| 1309 | 1317 |
| 1310 | 1318 |
| 1311 | 1319 |
| 1312 | 1320 |
| 1313 //--------------------------------------------------------------------------- | 1321 //--------------------------------------------------------------------------- |
| 1314 // | 1322 // |
| (...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1843 REGEX_CHECK_STATUS; | 1851 REGEX_CHECK_STATUS; |
| 1844 | 1852 |
| 1845 UText input1 = UTEXT_INITIALIZER; | 1853 UText input1 = UTEXT_INITIALIZER; |
| 1846 UText input2 = UTEXT_INITIALIZER; | 1854 UText input2 = UTEXT_INITIALIZER; |
| 1847 UText empty = UTEXT_INITIALIZER; | 1855 UText empty = UTEXT_INITIALIZER; |
| 1848 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st
atus); | 1856 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st
atus); |
| 1849 REGEX_VERBOSE_TEXT(&input1); | 1857 REGEX_VERBOSE_TEXT(&input1); |
| 1850 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); | 1858 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); |
| 1851 REGEX_VERBOSE_TEXT(&input2); | 1859 REGEX_VERBOSE_TEXT(&input2); |
| 1852 utext_openUChars(&empty, NULL, 0, &status); | 1860 utext_openUChars(&empty, NULL, 0, &status); |
| 1853 | 1861 |
| 1854 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na
tivelen (input1) ? */ | 1862 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na
tivelen (input1) ? */ |
| 1855 int32_t input2Len = strlen("not abc"); | 1863 int32_t input2Len = strlen("not abc"); |
| 1856 | 1864 |
| 1857 | 1865 |
| 1858 // | 1866 // |
| 1859 // Matcher creation and reset. | 1867 // Matcher creation and reset. |
| 1860 // | 1868 // |
| 1861 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); | 1869 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); |
| 1862 REGEX_CHECK_STATUS; | 1870 REGEX_CHECK_STATUS; |
| 1863 REGEX_ASSERT(m1->lookingAt(status) == TRUE); | 1871 REGEX_ASSERT(m1->lookingAt(status) == TRUE); |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1953 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); | 1961 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); |
| 1954 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 1962 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 1955 status = U_ZERO_ERROR; | 1963 status = U_ZERO_ERROR; |
| 1956 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); | 1964 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); |
| 1957 REGEX_CHECK_STATUS; | 1965 REGEX_CHECK_STATUS; |
| 1958 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); | 1966 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); |
| 1959 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 1967 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 1960 | 1968 |
| 1961 delete m1; | 1969 delete m1; |
| 1962 delete pat2; | 1970 delete pat2; |
| 1963 | 1971 |
| 1964 utext_close(&re); | 1972 utext_close(&re); |
| 1965 utext_close(&input1); | 1973 utext_close(&input1); |
| 1966 utext_close(&input2); | 1974 utext_close(&input2); |
| 1967 utext_close(&empty); | 1975 utext_close(&empty); |
| 1968 } | 1976 } |
| 1969 | 1977 |
| 1970 | 1978 |
| 1971 // | 1979 // |
| 1972 // Capture Group. | 1980 // Capture Group. |
| 1973 // RegexMatcher::start(); | 1981 // RegexMatcher::start(); |
| 1974 // RegexMatcher::end(); | 1982 // RegexMatcher::end(); |
| 1975 // RegexMatcher::groupCount(); | 1983 // RegexMatcher::groupCount(); |
| 1976 // | 1984 // |
| 1977 { | 1985 { |
| 1978 int32_t flags=0; | 1986 int32_t flags=0; |
| 1979 UParseError pe; | 1987 UParseError pe; |
| 1980 UErrorCode status=U_ZERO_ERROR; | 1988 UErrorCode status=U_ZERO_ERROR; |
| 1981 UText re=UTEXT_INITIALIZER; | 1989 UText re=UTEXT_INITIALIZER; |
| 1982 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x
34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67
)(.*) */ | 1990 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x
34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67
)(.*) */ |
| 1983 utext_openUTF8(&re, str_01234567_pat, -1, &status); | 1991 utext_openUTF8(&re, str_01234567_pat, -1, &status); |
| 1984 | 1992 |
| 1985 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 1993 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
| 1986 REGEX_CHECK_STATUS; | 1994 REGEX_CHECK_STATUS; |
| 1987 | 1995 |
| 1988 UText input = UTEXT_INITIALIZER; | 1996 UText input = UTEXT_INITIALIZER; |
| 1989 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36
, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ | 1997 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36
, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ |
| 1990 utext_openUTF8(&input, str_0123456789, -1, &status); | 1998 utext_openUTF8(&input, str_0123456789, -1, &status); |
| 1991 | 1999 |
| 1992 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); | 2000 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); |
| 1993 REGEX_CHECK_STATUS; | 2001 REGEX_CHECK_STATUS; |
| 1994 REGEX_ASSERT(matcher->lookingAt(status) == TRUE); | 2002 REGEX_ASSERT(matcher->lookingAt(status) == TRUE); |
| 1995 static const int32_t matchStarts[] = {0, 2, 4, 8}; | 2003 static const int32_t matchStarts[] = {0, 2, 4, 8}; |
| 1996 static const int32_t matchEnds[] = {10, 8, 6, 10}; | 2004 static const int32_t matchEnds[] = {10, 8, 6, 10}; |
| 1997 int32_t i; | 2005 int32_t i; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 2012 | 2020 |
| 2013 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); | 2021 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); |
| 2014 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); | 2022 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); |
| 2015 | 2023 |
| 2016 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2024 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2017 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2025 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2018 matcher->reset(); | 2026 matcher->reset(); |
| 2019 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); | 2027 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); |
| 2020 | 2028 |
| 2021 matcher->lookingAt(status); | 2029 matcher->lookingAt(status); |
| 2022 | 2030 |
| 2023 UnicodeString dest; | 2031 UnicodeString dest; |
| 2024 UText destText = UTEXT_INITIALIZER; | 2032 UText destText = UTEXT_INITIALIZER; |
| 2025 utext_openUnicodeString(&destText, &dest, &status); | 2033 utext_openUnicodeString(&destText, &dest, &status); |
| 2026 UText *result; | 2034 UText *result; |
| 2027 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x
36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ | 2035 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x
36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ |
| 2028 //» Test shallow-clone API | 2036 // Test shallow-clone API |
| 2029 int64_t group_len; | 2037 int64_t group_len; |
| 2030 result = matcher->group((UText *)NULL, group_len, status); | 2038 result = matcher->group((UText *)NULL, group_len, status); |
| 2031 REGEX_CHECK_STATUS; | 2039 REGEX_CHECK_STATUS; |
| 2032 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2040 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2033 utext_close(result); | 2041 utext_close(result); |
| 2034 result = matcher->group(0, &destText, group_len, status); | 2042 result = matcher->group(0, &destText, group_len, status); |
| 2035 REGEX_CHECK_STATUS; | 2043 REGEX_CHECK_STATUS; |
| 2036 REGEX_ASSERT(result == &destText); | 2044 REGEX_ASSERT(result == &destText); |
| 2037 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2045 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2038 // destText is now immutable, reopen it | 2046 // destText is now immutable, reopen it |
| 2039 utext_close(&destText); | 2047 utext_close(&destText); |
| 2040 utext_openUnicodeString(&destText, &dest, &status); | 2048 utext_openUnicodeString(&destText, &dest, &status); |
| 2041 | 2049 |
| 2042 result = matcher->group(0, NULL, status); | 2050 result = matcher->group(0, NULL, status); |
| 2043 REGEX_CHECK_STATUS; | 2051 REGEX_CHECK_STATUS; |
| 2044 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2052 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2045 utext_close(result); | 2053 utext_close(result); |
| 2046 result = matcher->group(0, &destText, status); | 2054 result = matcher->group(0, &destText, status); |
| 2047 REGEX_CHECK_STATUS; | 2055 REGEX_CHECK_STATUS; |
| 2048 REGEX_ASSERT(result == &destText); | 2056 REGEX_ASSERT(result == &destText); |
| 2049 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2057 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
| 2050 | 2058 |
| 2051 result = matcher->group(1, NULL, status); | 2059 result = matcher->group(1, NULL, status); |
| 2052 REGEX_CHECK_STATUS; | 2060 REGEX_CHECK_STATUS; |
| 2053 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 };
/* 234567 */ | 2061 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 };
/* 234567 */ |
| 2054 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2062 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); |
| 2055 utext_close(result); | 2063 utext_close(result); |
| 2056 result = matcher->group(1, &destText, status); | 2064 result = matcher->group(1, &destText, status); |
| 2057 REGEX_CHECK_STATUS; | 2065 REGEX_CHECK_STATUS; |
| 2058 REGEX_ASSERT(result == &destText); | 2066 REGEX_ASSERT(result == &destText); |
| 2059 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2067 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); |
| 2060 | 2068 |
| 2061 result = matcher->group(2, NULL, status); | 2069 result = matcher->group(2, NULL, status); |
| 2062 REGEX_CHECK_STATUS; | 2070 REGEX_CHECK_STATUS; |
| 2063 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ | 2071 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ |
| 2064 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2072 REGEX_ASSERT_UTEXT_UTF8(str_45, result); |
| 2065 utext_close(result); | 2073 utext_close(result); |
| 2066 result = matcher->group(2, &destText, status); | 2074 result = matcher->group(2, &destText, status); |
| 2067 REGEX_CHECK_STATUS; | 2075 REGEX_CHECK_STATUS; |
| 2068 REGEX_ASSERT(result == &destText); | 2076 REGEX_ASSERT(result == &destText); |
| 2069 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2077 REGEX_ASSERT_UTEXT_UTF8(str_45, result); |
| 2070 | 2078 |
| 2071 result = matcher->group(3, NULL, status); | 2079 result = matcher->group(3, NULL, status); |
| 2072 REGEX_CHECK_STATUS; | 2080 REGEX_CHECK_STATUS; |
| 2073 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ | 2081 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ |
| 2074 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2082 REGEX_ASSERT_UTEXT_UTF8(str_89, result); |
| 2075 utext_close(result); | 2083 utext_close(result); |
| 2076 result = matcher->group(3, &destText, status); | 2084 result = matcher->group(3, &destText, status); |
| 2077 REGEX_CHECK_STATUS; | 2085 REGEX_CHECK_STATUS; |
| 2078 REGEX_ASSERT(result == &destText); | 2086 REGEX_ASSERT(result == &destText); |
| 2079 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2087 REGEX_ASSERT_UTEXT_UTF8(str_89, result); |
| 2080 | 2088 |
| 2081 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2089 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2082 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2090 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
| 2083 matcher->reset(); | 2091 matcher->reset(); |
| 2084 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); | 2092 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); |
| 2085 | 2093 |
| 2086 delete matcher; | 2094 delete matcher; |
| 2087 delete pat; | 2095 delete pat; |
| 2088 | 2096 |
| 2089 utext_close(&destText); | 2097 utext_close(&destText); |
| 2090 utext_close(&input); | 2098 utext_close(&input); |
| 2091 utext_close(&re); | 2099 utext_close(&re); |
| 2092 } | 2100 } |
| 2093 | 2101 |
| 2094 // | 2102 // |
| 2095 // find | 2103 // find |
| 2096 // | 2104 // |
| 2097 { | 2105 { |
| 2098 int32_t flags=0; | 2106 int32_t flags=0; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2139 | 2147 |
| 2140 status = U_ZERO_ERROR; | 2148 status = U_ZERO_ERROR; |
| 2141 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); | 2149 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); |
| 2142 status = U_ZERO_ERROR; | 2150 status = U_ZERO_ERROR; |
| 2143 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); | 2151 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); |
| 2144 | 2152 |
| 2145 REGEX_ASSERT(matcher->groupCount() == 0); | 2153 REGEX_ASSERT(matcher->groupCount() == 0); |
| 2146 | 2154 |
| 2147 delete matcher; | 2155 delete matcher; |
| 2148 delete pat; | 2156 delete pat; |
| 2149 | 2157 |
| 2150 utext_close(&input); | 2158 utext_close(&input); |
| 2151 utext_close(&re); | 2159 utext_close(&re); |
| 2152 } | 2160 } |
| 2153 | 2161 |
| 2154 | 2162 |
| 2155 // | 2163 // |
| 2156 // find, with \G in pattern (true if at the end of a previous match). | 2164 // find, with \G in pattern (true if at the end of a previous match). |
| 2157 // | 2165 // |
| 2158 { | 2166 { |
| 2159 int32_t flags=0; | 2167 int32_t flags=0; |
| 2160 UParseError pe; | 2168 UParseError pe; |
| 2161 UErrorCode status=U_ZERO_ERROR; | 2169 UErrorCode status=U_ZERO_ERROR; |
| 2162 UText re=UTEXT_INITIALIZER; | 2170 UText re=UTEXT_INITIALIZER; |
| 2163 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0
x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0
0 }; /* .*?(?:(\\Gabc)|(abc)) */ | 2171 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0
x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0
0 }; /* .*?(?:(\\Gabc)|(abc)) */ |
| 2164 utext_openUTF8(&re, str_Gabcabc, -1, &status); | 2172 utext_openUTF8(&re, str_Gabcabc, -1, &status); |
| 2165 | 2173 |
| 2166 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 2174 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
| 2167 | 2175 |
| 2168 REGEX_CHECK_STATUS; | 2176 REGEX_CHECK_STATUS; |
| 2169 UText input = UTEXT_INITIALIZER; | 2177 UText input = UTEXT_INITIALIZER; |
| 2170 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ | 2178 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ |
| 2171 utext_openUTF8(&input, str_abcabcabc, -1, &status); | 2179 utext_openUTF8(&input, str_abcabcabc, -1, &status); |
| 2172 // 012345678901234567 | 2180 // 012345678901234567 |
| 2173 | 2181 |
| 2174 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); | 2182 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); |
| 2175 REGEX_CHECK_STATUS; | 2183 REGEX_CHECK_STATUS; |
| 2176 REGEX_ASSERT(matcher->find()); | 2184 REGEX_ASSERT(matcher->find()); |
| 2177 REGEX_ASSERT(matcher->start(status) == 0); | 2185 REGEX_ASSERT(matcher->start(status) == 0); |
| 2178 REGEX_ASSERT(matcher->start(1, status) == -1); | 2186 REGEX_ASSERT(matcher->start(1, status) == -1); |
| 2179 REGEX_ASSERT(matcher->start(2, status) == 1); | 2187 REGEX_ASSERT(matcher->start(2, status) == 1); |
| 2180 | 2188 |
| 2181 REGEX_ASSERT(matcher->find()); | 2189 REGEX_ASSERT(matcher->find()); |
| 2182 REGEX_ASSERT(matcher->start(status) == 4); | 2190 REGEX_ASSERT(matcher->start(status) == 4); |
| 2183 REGEX_ASSERT(matcher->start(1, status) == 4); | 2191 REGEX_ASSERT(matcher->start(1, status) == 4); |
| 2184 REGEX_ASSERT(matcher->start(2, status) == -1); | 2192 REGEX_ASSERT(matcher->start(2, status) == -1); |
| 2185 REGEX_CHECK_STATUS; | 2193 REGEX_CHECK_STATUS; |
| 2186 | 2194 |
| 2187 delete matcher; | 2195 delete matcher; |
| 2188 delete pat; | 2196 delete pat; |
| 2189 | 2197 |
| 2190 utext_close(&input); | 2198 utext_close(&input); |
| 2191 utext_close(&re); | 2199 utext_close(&re); |
| 2192 } | 2200 } |
| 2193 | 2201 |
| 2194 // | 2202 // |
| 2195 // find with zero length matches, match position should bump ahead | 2203 // find with zero length matches, match position should bump ahead |
| 2196 // to prevent loops. | 2204 // to prevent loops. |
| 2197 // | 2205 // |
| 2198 { | 2206 { |
| 2199 int32_t i; | 2207 int32_t i; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 2219 utext_openUTF8(&s, (char *)aboveBMP, -1, &status); | 2227 utext_openUTF8(&s, (char *)aboveBMP, -1, &status); |
| 2220 m.reset(&s); | 2228 m.reset(&s); |
| 2221 for (i=0; ; i+=4) { | 2229 for (i=0; ; i+=4) { |
| 2222 if (m.find() == FALSE) { | 2230 if (m.find() == FALSE) { |
| 2223 break; | 2231 break; |
| 2224 } | 2232 } |
| 2225 REGEX_ASSERT(m.start(status) == i); | 2233 REGEX_ASSERT(m.start(status) == i); |
| 2226 REGEX_ASSERT(m.end(status) == i); | 2234 REGEX_ASSERT(m.end(status) == i); |
| 2227 } | 2235 } |
| 2228 REGEX_ASSERT(i==20); | 2236 REGEX_ASSERT(i==20); |
| 2229 | 2237 |
| 2230 utext_close(&s); | 2238 utext_close(&s); |
| 2231 } | 2239 } |
| 2232 { | 2240 { |
| 2233 // find() loop breaking test. | 2241 // find() loop breaking test. |
| 2234 // with pattern of /.?/, should see a series of one char matches,
then a single | 2242 // with pattern of /.?/, should see a series of one char matches,
then a single |
| 2235 // match of zero length at the end of the input string. | 2243 // match of zero length at the end of the input string. |
| 2236 int32_t i; | 2244 int32_t i; |
| 2237 UErrorCode status=U_ZERO_ERROR; | 2245 UErrorCode status=U_ZERO_ERROR; |
| 2238 RegexMatcher m(".?", 0, status); | 2246 RegexMatcher m(".?", 0, status); |
| 2239 REGEX_CHECK_STATUS; | 2247 REGEX_CHECK_STATUS; |
| 2240 UText s = UTEXT_INITIALIZER; | 2248 UText s = UTEXT_INITIALIZER; |
| 2241 utext_openUTF8(&s, " ", -1, &status); | 2249 utext_openUTF8(&s, " ", -1, &status); |
| 2242 m.reset(&s); | 2250 m.reset(&s); |
| 2243 for (i=0; ; i++) { | 2251 for (i=0; ; i++) { |
| 2244 if (m.find() == FALSE) { | 2252 if (m.find() == FALSE) { |
| 2245 break; | 2253 break; |
| 2246 } | 2254 } |
| 2247 REGEX_ASSERT(m.start(status) == i); | 2255 REGEX_ASSERT(m.start(status) == i); |
| 2248 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); | 2256 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); |
| 2249 } | 2257 } |
| 2250 REGEX_ASSERT(i==5); | 2258 REGEX_ASSERT(i==5); |
| 2251 | 2259 |
| 2252 utext_close(&s); | 2260 utext_close(&s); |
| 2253 } | 2261 } |
| 2254 | 2262 |
| 2255 | 2263 |
| 2256 // | 2264 // |
| 2257 // Matchers with no input string behave as if they had an empty input string
. | 2265 // Matchers with no input string behave as if they had an empty input string
. |
| 2258 // | 2266 // |
| 2259 | 2267 |
| 2260 { | 2268 { |
| 2261 UErrorCode status = U_ZERO_ERROR; | 2269 UErrorCode status = U_ZERO_ERROR; |
| 2262 RegexMatcher m(".?", 0, status); | 2270 RegexMatcher m(".?", 0, status); |
| 2263 REGEX_CHECK_STATUS; | 2271 REGEX_CHECK_STATUS; |
| 2264 REGEX_ASSERT(m.find()); | 2272 REGEX_ASSERT(m.find()); |
| 2265 REGEX_ASSERT(m.start(status) == 0); | 2273 REGEX_ASSERT(m.start(status) == 0); |
| 2266 REGEX_ASSERT(m.input() == ""); | 2274 REGEX_ASSERT(m.input() == ""); |
| 2267 } | 2275 } |
| 2268 { | 2276 { |
| 2269 UErrorCode status = U_ZERO_ERROR; | 2277 UErrorCode status = U_ZERO_ERROR; |
| 2270 RegexPattern *p = RegexPattern::compile(".", 0, status); | 2278 RegexPattern *p = RegexPattern::compile(".", 0, status); |
| 2271 RegexMatcher *m = p->matcher(status); | 2279 RegexMatcher *m = p->matcher(status); |
| 2272 REGEX_CHECK_STATUS; | 2280 REGEX_CHECK_STATUS; |
| 2273 | 2281 |
| 2274 REGEX_ASSERT(m->find() == FALSE); | 2282 REGEX_ASSERT(m->find() == FALSE); |
| 2275 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); | 2283 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); |
| 2276 delete m; | 2284 delete m; |
| 2277 delete p; | 2285 delete p; |
| 2278 } | 2286 } |
| 2279 | 2287 |
| 2280 // | 2288 // |
| 2281 // Regions | 2289 // Regions |
| 2282 // | 2290 // |
| 2283 { | 2291 { |
| 2284 UErrorCode status = U_ZERO_ERROR; | 2292 UErrorCode status = U_ZERO_ERROR; |
| 2285 UText testPattern = UTEXT_INITIALIZER; | 2293 UText testPattern = UTEXT_INITIALIZER; |
| 2286 UText testText = UTEXT_INITIALIZER; | 2294 UText testText = UTEXT_INITIALIZER; |
| 2287 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); | 2295 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); |
| 2288 REGEX_VERBOSE_TEXT(&testPattern); | 2296 REGEX_VERBOSE_TEXT(&testPattern); |
| 2289 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat
us); | 2297 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat
us); |
| 2290 REGEX_VERBOSE_TEXT(&testText); | 2298 REGEX_VERBOSE_TEXT(&testText); |
| 2291 | 2299 |
| 2292 RegexMatcher m(&testPattern, &testText, 0, status); | 2300 RegexMatcher m(&testPattern, &testText, 0, status); |
| 2293 REGEX_CHECK_STATUS; | 2301 REGEX_CHECK_STATUS; |
| 2294 REGEX_ASSERT(m.regionStart() == 0); | 2302 REGEX_ASSERT(m.regionStart() == 0); |
| 2295 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); | 2303 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); |
| 2296 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2304 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 2297 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2305 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 2298 | 2306 |
| 2299 m.region(2,4, status); | 2307 m.region(2,4, status); |
| 2300 REGEX_CHECK_STATUS; | 2308 REGEX_CHECK_STATUS; |
| 2301 REGEX_ASSERT(m.matches(status)); | 2309 REGEX_ASSERT(m.matches(status)); |
| 2302 REGEX_ASSERT(m.start(status)==2); | 2310 REGEX_ASSERT(m.start(status)==2); |
| 2303 REGEX_ASSERT(m.end(status)==4); | 2311 REGEX_ASSERT(m.end(status)==4); |
| 2304 REGEX_CHECK_STATUS; | 2312 REGEX_CHECK_STATUS; |
| 2305 | 2313 |
| 2306 m.reset(); | 2314 m.reset(); |
| 2307 REGEX_ASSERT(m.regionStart() == 0); | 2315 REGEX_ASSERT(m.regionStart() == 0); |
| 2308 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); | 2316 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); |
| 2309 | 2317 |
| 2310 regextst_openUTF8FromInvariant(&testText, "short", -1, &status); | 2318 regextst_openUTF8FromInvariant(&testText, "short", -1, &status); |
| 2311 REGEX_VERBOSE_TEXT(&testText); | 2319 REGEX_VERBOSE_TEXT(&testText); |
| 2312 m.reset(&testText); | 2320 m.reset(&testText); |
| 2313 REGEX_ASSERT(m.regionStart() == 0); | 2321 REGEX_ASSERT(m.regionStart() == 0); |
| 2314 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); | 2322 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); |
| 2315 | 2323 |
| 2316 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2324 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 2317 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); | 2325 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); |
| 2318 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 2326 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
| 2319 REGEX_ASSERT(&m == &m.reset()); | 2327 REGEX_ASSERT(&m == &m.reset()); |
| 2320 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 2328 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
| 2321 | 2329 |
| 2322 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); | 2330 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); |
| 2323 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2331 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 2324 REGEX_ASSERT(&m == &m.reset()); | 2332 REGEX_ASSERT(&m == &m.reset()); |
| 2325 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2333 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
| 2326 | 2334 |
| 2327 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2335 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 2328 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); | 2336 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); |
| 2329 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 2337 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
| 2330 REGEX_ASSERT(&m == &m.reset()); | 2338 REGEX_ASSERT(&m == &m.reset()); |
| 2331 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 2339 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
| 2332 | 2340 |
| 2333 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); | 2341 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); |
| 2334 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2342 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 2335 REGEX_ASSERT(&m == &m.reset()); | 2343 REGEX_ASSERT(&m == &m.reset()); |
| 2336 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2344 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
| 2337 | 2345 |
| 2338 utext_close(&testText); | 2346 utext_close(&testText); |
| 2339 utext_close(&testPattern); | 2347 utext_close(&testPattern); |
| 2340 } | 2348 } |
| 2341 | 2349 |
| 2342 // | 2350 // |
| 2343 // hitEnd() and requireEnd() | 2351 // hitEnd() and requireEnd() |
| 2344 // | 2352 // |
| 2345 { | 2353 { |
| 2346 UErrorCode status = U_ZERO_ERROR; | 2354 UErrorCode status = U_ZERO_ERROR; |
| 2347 UText testPattern = UTEXT_INITIALIZER; | 2355 UText testPattern = UTEXT_INITIALIZER; |
| 2348 UText testText = UTEXT_INITIALIZER; | 2356 UText testText = UTEXT_INITIALIZER; |
| 2349 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ | 2357 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ |
| 2350 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ | 2358 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ |
| 2351 utext_openUTF8(&testPattern, str_, -1, &status); | 2359 utext_openUTF8(&testPattern, str_, -1, &status); |
| 2352 utext_openUTF8(&testText, str_aabb, -1, &status); | 2360 utext_openUTF8(&testText, str_aabb, -1, &status); |
| 2353 | 2361 |
| 2354 RegexMatcher m1(&testPattern, &testText, 0, status); | 2362 RegexMatcher m1(&testPattern, &testText, 0, status); |
| 2355 REGEX_ASSERT(m1.lookingAt(status) == TRUE); | 2363 REGEX_ASSERT(m1.lookingAt(status) == TRUE); |
| 2356 REGEX_ASSERT(m1.hitEnd() == TRUE); | 2364 REGEX_ASSERT(m1.hitEnd() == TRUE); |
| 2357 REGEX_ASSERT(m1.requireEnd() == FALSE); | 2365 REGEX_ASSERT(m1.requireEnd() == FALSE); |
| 2358 REGEX_CHECK_STATUS; | 2366 REGEX_CHECK_STATUS; |
| 2359 | 2367 |
| 2360 status = U_ZERO_ERROR; | 2368 status = U_ZERO_ERROR; |
| 2361 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ | 2369 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ |
| 2362 utext_openUTF8(&testPattern, str_a, -1, &status); | 2370 utext_openUTF8(&testPattern, str_a, -1, &status); |
| 2363 RegexMatcher m2(&testPattern, &testText, 0, status); | 2371 RegexMatcher m2(&testPattern, &testText, 0, status); |
| 2364 REGEX_ASSERT(m2.lookingAt(status) == TRUE); | 2372 REGEX_ASSERT(m2.lookingAt(status) == TRUE); |
| 2365 REGEX_ASSERT(m2.hitEnd() == FALSE); | 2373 REGEX_ASSERT(m2.hitEnd() == FALSE); |
| 2366 REGEX_ASSERT(m2.requireEnd() == FALSE); | 2374 REGEX_ASSERT(m2.requireEnd() == FALSE); |
| 2367 REGEX_CHECK_STATUS; | 2375 REGEX_CHECK_STATUS; |
| 2368 | 2376 |
| 2369 status = U_ZERO_ERROR; | 2377 status = U_ZERO_ERROR; |
| 2370 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ | 2378 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ |
| 2371 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); | 2379 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); |
| 2372 RegexMatcher m3(&testPattern, &testText, 0, status); | 2380 RegexMatcher m3(&testPattern, &testText, 0, status); |
| 2373 REGEX_ASSERT(m3.lookingAt(status) == TRUE); | 2381 REGEX_ASSERT(m3.lookingAt(status) == TRUE); |
| 2374 REGEX_ASSERT(m3.hitEnd() == TRUE); | 2382 REGEX_ASSERT(m3.hitEnd() == TRUE); |
| 2375 REGEX_ASSERT(m3.requireEnd() == TRUE); | 2383 REGEX_ASSERT(m3.requireEnd() == TRUE); |
| 2376 REGEX_CHECK_STATUS; | 2384 REGEX_CHECK_STATUS; |
| 2377 | 2385 |
| 2378 utext_close(&testText); | 2386 utext_close(&testText); |
| 2379 utext_close(&testPattern); | 2387 utext_close(&testPattern); |
| 2380 } | 2388 } |
| 2381 } | 2389 } |
| 2382 | 2390 |
| 2383 | 2391 |
| 2384 //--------------------------------------------------------------------------- | 2392 //--------------------------------------------------------------------------- |
| 2385 // | 2393 // |
| 2386 // API_Replace_UTF8 API test for class RegexMatcher, testing the | 2394 // API_Replace_UTF8 API test for class RegexMatcher, testing the |
| 2387 // Replace family of functions. | 2395 // Replace family of functions. |
| 2388 // | 2396 // |
| 2389 //--------------------------------------------------------------------------- | 2397 //--------------------------------------------------------------------------- |
| 2390 void RegexTest::API_Replace_UTF8() { | 2398 void RegexTest::API_Replace_UTF8() { |
| 2391 // | 2399 // |
| 2392 // Replace | 2400 // Replace |
| 2393 // | 2401 // |
| 2394 int32_t flags=0; | 2402 int32_t flags=0; |
| 2395 UParseError pe; | 2403 UParseError pe; |
| 2396 UErrorCode status=U_ZERO_ERROR; | 2404 UErrorCode status=U_ZERO_ERROR; |
| 2397 | 2405 |
| 2398 UText re=UTEXT_INITIALIZER; | 2406 UText re=UTEXT_INITIALIZER; |
| 2399 regextst_openUTF8FromInvariant(&re, "abc", -1, &status); | 2407 regextst_openUTF8FromInvariant(&re, "abc", -1, &status); |
| 2400 REGEX_VERBOSE_TEXT(&re); | 2408 REGEX_VERBOSE_TEXT(&re); |
| 2401 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 2409 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
| 2402 REGEX_CHECK_STATUS; | 2410 REGEX_CHECK_STATUS; |
| 2403 | 2411 |
| 2404 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e,
0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ | 2412 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e,
0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ |
| 2405 // 012345678901234567 | 2413 // 012345678901234567 |
| 2406 UText dataText = UTEXT_INITIALIZER; | 2414 UText dataText = UTEXT_INITIALIZER; |
| 2407 utext_openUTF8(&dataText, data, -1, &status); | 2415 utext_openUTF8(&dataText, data, -1, &status); |
| 2408 REGEX_CHECK_STATUS; | 2416 REGEX_CHECK_STATUS; |
| 2409 REGEX_VERBOSE_TEXT(&dataText); | 2417 REGEX_VERBOSE_TEXT(&dataText); |
| 2410 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); | 2418 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); |
| 2411 | 2419 |
| 2412 // | 2420 // |
| 2413 // Plain vanilla matches. | 2421 // Plain vanilla matches. |
| 2414 // | 2422 // |
| 2415 UnicodeString dest; | 2423 UnicodeString dest; |
| 2416 UText destText = UTEXT_INITIALIZER; | 2424 UText destText = UTEXT_INITIALIZER; |
| 2417 utext_openUnicodeString(&destText, &dest, &status); | 2425 utext_openUnicodeString(&destText, &dest, &status); |
| 2418 UText *result; | 2426 UText *result; |
| 2419 | 2427 |
| 2420 UText replText = UTEXT_INITIALIZER; | 2428 UText replText = UTEXT_INITIALIZER; |
| 2421 | 2429 |
| 2422 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ | 2430 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ |
| 2423 utext_openUTF8(&replText, str_yz, -1, &status); | 2431 utext_openUTF8(&replText, str_yz, -1, &status); |
| 2424 REGEX_VERBOSE_TEXT(&replText); | 2432 REGEX_VERBOSE_TEXT(&replText); |
| 2425 result = matcher->replaceFirst(&replText, NULL, status); | 2433 result = matcher->replaceFirst(&replText, NULL, status); |
| 2426 REGEX_CHECK_STATUS; | 2434 REGEX_CHECK_STATUS; |
| 2427 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63
, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ | 2435 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63
, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ |
| 2428 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); | 2436 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); |
| 2429 utext_close(result); | 2437 utext_close(result); |
| 2430 result = matcher->replaceFirst(&replText, &destText, status); | 2438 result = matcher->replaceFirst(&replText, &destText, status); |
| 2431 REGEX_CHECK_STATUS; | 2439 REGEX_CHECK_STATUS; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 2443 REGEX_CHECK_STATUS; | 2451 REGEX_CHECK_STATUS; |
| 2444 REGEX_ASSERT(result == &destText); | 2452 REGEX_ASSERT(result == &destText); |
| 2445 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); | 2453 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); |
| 2446 | 2454 |
| 2447 // | 2455 // |
| 2448 // Plain vanilla non-matches. | 2456 // Plain vanilla non-matches. |
| 2449 // | 2457 // |
| 2450 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6
2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...
abx.. */ | 2458 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6
2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...
abx.. */ |
| 2451 utext_openUTF8(&dataText, str_abxabxabx, -1, &status); | 2459 utext_openUTF8(&dataText, str_abxabxabx, -1, &status); |
| 2452 matcher->reset(&dataText); | 2460 matcher->reset(&dataText); |
| 2453 | 2461 |
| 2454 result = matcher->replaceFirst(&replText, NULL, status); | 2462 result = matcher->replaceFirst(&replText, NULL, status); |
| 2455 REGEX_CHECK_STATUS; | 2463 REGEX_CHECK_STATUS; |
| 2456 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2464 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
| 2457 utext_close(result); | 2465 utext_close(result); |
| 2458 result = matcher->replaceFirst(&replText, &destText, status); | 2466 result = matcher->replaceFirst(&replText, &destText, status); |
| 2459 REGEX_CHECK_STATUS; | 2467 REGEX_CHECK_STATUS; |
| 2460 REGEX_ASSERT(result == &destText); | 2468 REGEX_ASSERT(result == &destText); |
| 2461 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2469 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
| 2462 | 2470 |
| 2463 result = matcher->replaceAll(&replText, NULL, status); | 2471 result = matcher->replaceAll(&replText, NULL, status); |
| 2464 REGEX_CHECK_STATUS; | 2472 REGEX_CHECK_STATUS; |
| 2465 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2473 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
| 2466 utext_close(result); | 2474 utext_close(result); |
| 2467 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2475 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2468 result = matcher->replaceAll(&replText, &destText, status); | 2476 result = matcher->replaceAll(&replText, &destText, status); |
| 2469 REGEX_CHECK_STATUS; | 2477 REGEX_CHECK_STATUS; |
| 2470 REGEX_ASSERT(result == &destText); | 2478 REGEX_ASSERT(result == &destText); |
| 2471 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2479 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
| 2472 | 2480 |
| 2473 // | 2481 // |
| 2474 // Empty source string | 2482 // Empty source string |
| 2475 // | 2483 // |
| 2476 utext_openUTF8(&dataText, NULL, 0, &status); | 2484 utext_openUTF8(&dataText, NULL, 0, &status); |
| 2477 matcher->reset(&dataText); | 2485 matcher->reset(&dataText); |
| 2478 | 2486 |
| 2479 result = matcher->replaceFirst(&replText, NULL, status); | 2487 result = matcher->replaceFirst(&replText, NULL, status); |
| 2480 REGEX_CHECK_STATUS; | 2488 REGEX_CHECK_STATUS; |
| 2481 REGEX_ASSERT_UTEXT_UTF8("", result); | 2489 REGEX_ASSERT_UTEXT_UTF8("", result); |
| 2482 utext_close(result); | 2490 utext_close(result); |
| 2483 result = matcher->replaceFirst(&replText, &destText, status); | 2491 result = matcher->replaceFirst(&replText, &destText, status); |
| 2484 REGEX_CHECK_STATUS; | 2492 REGEX_CHECK_STATUS; |
| 2485 REGEX_ASSERT(result == &destText); | 2493 REGEX_ASSERT(result == &destText); |
| 2486 REGEX_ASSERT_UTEXT_UTF8("", result); | 2494 REGEX_ASSERT_UTEXT_UTF8("", result); |
| 2487 | 2495 |
| 2488 result = matcher->replaceAll(&replText, NULL, status); | 2496 result = matcher->replaceAll(&replText, NULL, status); |
| 2489 REGEX_CHECK_STATUS; | 2497 REGEX_CHECK_STATUS; |
| 2490 REGEX_ASSERT_UTEXT_UTF8("", result); | 2498 REGEX_ASSERT_UTEXT_UTF8("", result); |
| 2491 utext_close(result); | 2499 utext_close(result); |
| 2492 result = matcher->replaceAll(&replText, &destText, status); | 2500 result = matcher->replaceAll(&replText, &destText, status); |
| 2493 REGEX_CHECK_STATUS; | 2501 REGEX_CHECK_STATUS; |
| 2494 REGEX_ASSERT(result == &destText); | 2502 REGEX_ASSERT(result == &destText); |
| 2495 REGEX_ASSERT_UTEXT_UTF8("", result); | 2503 REGEX_ASSERT_UTEXT_UTF8("", result); |
| 2496 | 2504 |
| 2497 // | 2505 // |
| 2498 // Empty substitution string | 2506 // Empty substitution string |
| 2499 // | 2507 // |
| 2500 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." | 2508 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." |
| 2501 matcher->reset(&dataText); | 2509 matcher->reset(&dataText); |
| 2502 | 2510 |
| 2503 utext_openUTF8(&replText, NULL, 0, &status); | 2511 utext_openUTF8(&replText, NULL, 0, &status); |
| 2504 result = matcher->replaceFirst(&replText, NULL, status); | 2512 result = matcher->replaceFirst(&replText, NULL, status); |
| 2505 REGEX_CHECK_STATUS; | 2513 REGEX_CHECK_STATUS; |
| 2506 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ | 2514 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ |
| 2507 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); | 2515 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); |
| 2508 utext_close(result); | 2516 utext_close(result); |
| 2509 result = matcher->replaceFirst(&replText, &destText, status); | 2517 result = matcher->replaceFirst(&replText, &destText, status); |
| 2510 REGEX_CHECK_STATUS; | 2518 REGEX_CHECK_STATUS; |
| 2511 REGEX_ASSERT(result == &destText); | 2519 REGEX_ASSERT(result == &destText); |
| 2512 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); | 2520 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2556 // | 2564 // |
| 2557 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ | 2565 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ |
| 2558 utext_openUTF8(&re, str_add, -1, &status); | 2566 utext_openUTF8(&re, str_add, -1, &status); |
| 2559 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); | 2567 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); |
| 2560 REGEX_CHECK_STATUS; | 2568 REGEX_CHECK_STATUS; |
| 2561 | 2569 |
| 2562 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00
}; /* abcdefg */ | 2570 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00
}; /* abcdefg */ |
| 2563 utext_openUTF8(&dataText, str_abcdefg, -1, &status); | 2571 utext_openUTF8(&dataText, str_abcdefg, -1, &status); |
| 2564 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); | 2572 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); |
| 2565 REGEX_CHECK_STATUS; | 2573 REGEX_CHECK_STATUS; |
| 2566 | 2574 |
| 2567 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ | 2575 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ |
| 2568 utext_openUTF8(&replText, str_11, -1, &status); | 2576 utext_openUTF8(&replText, str_11, -1, &status); |
| 2569 result = matcher2->replaceFirst(&replText, NULL, status); | 2577 result = matcher2->replaceFirst(&replText, NULL, status); |
| 2570 REGEX_CHECK_STATUS; | 2578 REGEX_CHECK_STATUS; |
| 2571 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67
, 0x00 }; /* bcbcdefg */ | 2579 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67
, 0x00 }; /* bcbcdefg */ |
| 2572 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); | 2580 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); |
| 2573 utext_close(result); | 2581 utext_close(result); |
| 2574 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2582 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2575 result = matcher2->replaceFirst(&replText, &destText, status); | 2583 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2576 REGEX_CHECK_STATUS; | 2584 REGEX_CHECK_STATUS; |
| 2577 REGEX_ASSERT(result == &destText); | 2585 REGEX_ASSERT(result == &destText); |
| 2578 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); | 2586 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); |
| 2579 | 2587 |
| 2580 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6
5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31,
0x2e, 0x00 }; /* The value of \$1 is $1. */ | 2588 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6
5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31,
0x2e, 0x00 }; /* The value of \$1 is $1. */ |
| 2581 utext_openUTF8(&replText, str_v, -1, &status); | 2589 utext_openUTF8(&replText, str_v, -1, &status); |
| 2582 REGEX_VERBOSE_TEXT(&replText); | 2590 REGEX_VERBOSE_TEXT(&replText); |
| 2583 result = matcher2->replaceFirst(&replText, NULL, status); | 2591 result = matcher2->replaceFirst(&replText, NULL, status); |
| 2584 REGEX_CHECK_STATUS; | 2592 REGEX_CHECK_STATUS; |
| 2585 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ | 2593 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ |
| 2586 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2594 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
| 2587 utext_close(result); | 2595 utext_close(result); |
| 2588 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2596 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2589 result = matcher2->replaceFirst(&replText, &destText, status); | 2597 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2590 REGEX_CHECK_STATUS; | 2598 REGEX_CHECK_STATUS; |
| 2591 REGEX_ASSERT(result == &destText); | 2599 REGEX_ASSERT(result == &destText); |
| 2592 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2600 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
| 2593 | 2601 |
| 2594 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6
9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f,
0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0
x00 }; /* $ by itself, no group number $$$ */ | 2602 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6
9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f,
0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0
x00 }; /* $ by itself, no group number $$$ */ |
| 2595 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); | 2603 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); |
| 2596 result = matcher2->replaceFirst(&replText, NULL, status); | 2604 result = matcher2->replaceFirst(&replText, NULL, status); |
| 2597 REGEX_CHECK_STATUS; | 2605 REGEX_CHECK_STATUS; |
| 2598 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ | 2606 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ |
| 2599 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); | 2607 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); |
| 2600 utext_close(result); | 2608 utext_close(result); |
| 2601 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2609 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2602 result = matcher2->replaceFirst(&replText, &destText, status); | 2610 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2603 REGEX_CHECK_STATUS; | 2611 REGEX_CHECK_STATUS; |
| 2604 REGEX_ASSERT(result == &destText); | 2612 REGEX_ASSERT(result == &destText); |
| 2605 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); | 2613 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); |
| 2606 | 2614 |
| 2607 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d
, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31,
0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx.
*/ | 2615 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d
, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31,
0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx.
*/ |
| 2608 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001
D7CF, MATHEMATICAL BOLD DIGIT ONE | 2616 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001
D7CF, MATHEMATICAL BOLD DIGIT ONE |
| 2609 // 012345678901234567890123456 | 2617 // 012345678901234567890123456 |
| 2610 supplDigitChars[22] = 0xF0; | 2618 supplDigitChars[22] = 0xF0; |
| 2611 supplDigitChars[23] = 0x9D; | 2619 supplDigitChars[23] = 0x9D; |
| 2612 supplDigitChars[24] = 0x9F; | 2620 supplDigitChars[24] = 0x9F; |
| 2613 supplDigitChars[25] = 0x8F; | 2621 supplDigitChars[25] = 0x8F; |
| 2614 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); | 2622 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); |
| 2615 | 2623 |
| 2616 result = matcher2->replaceFirst(&replText, NULL, status); | 2624 result = matcher2->replaceFirst(&replText, NULL, status); |
| 2617 REGEX_CHECK_STATUS; | 2625 REGEX_CHECK_STATUS; |
| 2618 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x
20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa
l Digit 1 bc.defg */ | 2626 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x
20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa
l Digit 1 bc.defg */ |
| 2619 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); | 2627 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); |
| 2620 utext_close(result); | 2628 utext_close(result); |
| 2621 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2629 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2622 result = matcher2->replaceFirst(&replText, &destText, status); | 2630 result = matcher2->replaceFirst(&replText, &destText, status); |
| 2623 REGEX_CHECK_STATUS; | 2631 REGEX_CHECK_STATUS; |
| 2624 REGEX_ASSERT(result == &destText); | 2632 REGEX_ASSERT(result == &destText); |
| 2625 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); | 2633 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); |
| 2626 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x
61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e
, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /*
bad capture group number $5..." */ | 2634 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x
61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e
, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /*
bad capture group number $5..." */ |
| 2627 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); | 2635 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); |
| 2628 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status))
, U_INDEX_OUTOFBOUNDS_ERROR); | 2636 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status))
, U_INDEX_OUTOFBOUNDS_ERROR); |
| 2629 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); | 2637 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); |
| 2630 utext_close(result); | 2638 utext_close(result); |
| 2631 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2639 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
| 2632 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta
tus)), U_INDEX_OUTOFBOUNDS_ERROR); | 2640 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta
tus)), U_INDEX_OUTOFBOUNDS_ERROR); |
| 2633 REGEX_ASSERT(result == &destText); | 2641 REGEX_ASSERT(result == &destText); |
| 2634 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); | 2642 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); |
| 2635 | 2643 |
| 2636 // | 2644 // |
| 2637 // Replacement String with \u hex escapes | 2645 // Replacement String with \u hex escapes |
| 2638 // | 2646 // |
| 2639 { | 2647 { |
| 2640 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61
, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a
bc 2 abc 3 */ | 2648 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61
, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a
bc 2 abc 3 */ |
| 2641 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33,
0x2d, 0x2d, 0x00 }; /* --\u0043-- */ | 2649 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33,
0x2d, 0x2d, 0x00 }; /* --\u0043-- */ |
| 2642 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); | 2650 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); |
| 2643 utext_openUTF8(&replText, str_u0043, -1, &status); | 2651 utext_openUTF8(&replText, str_u0043, -1, &status); |
| 2644 matcher->reset(&dataText); | 2652 matcher->reset(&dataText); |
| 2645 | 2653 |
| 2646 result = matcher->replaceAll(&replText, NULL, status); | 2654 result = matcher->replaceAll(&replText, NULL, status); |
| 2647 REGEX_CHECK_STATUS; | 2655 REGEX_CHECK_STATUS; |
| 2648 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x
20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d
, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ | 2656 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x
20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d
, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ |
| 2649 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); | 2657 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); |
| 2650 utext_close(result); | 2658 utext_close(result); |
| 2651 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); | 2659 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); |
| 2652 result = matcher->replaceAll(&replText, &destText, status); | 2660 result = matcher->replaceAll(&replText, &destText, status); |
| 2653 REGEX_CHECK_STATUS; | 2661 REGEX_CHECK_STATUS; |
| 2654 REGEX_ASSERT(result == &destText); | 2662 REGEX_ASSERT(result == &destText); |
| 2655 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); | 2663 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); |
| 2656 } | 2664 } |
| 2657 { | 2665 { |
| 2658 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ | 2666 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ |
| 2659 utext_openUTF8(&dataText, str_abc, -1, &status); | 2667 utext_openUTF8(&dataText, str_abc, -1, &status); |
| 2660 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30,
0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ | 2668 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30,
0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ |
| 2661 utext_openUTF8(&replText, str_U00010000, -1, &status); | 2669 utext_openUTF8(&replText, str_U00010000, -1, &status); |
| 2662 matcher->reset(&dataText); | 2670 matcher->reset(&dataText); |
| 2663 | 2671 |
| 2664 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0
x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008
A" | 2672 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0
x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008
A" |
| 2665 // 0123456789 | 2673 // 0123456789 |
| 2666 expected[2] = 0xF0; | 2674 expected[2] = 0xF0; |
| 2667 expected[3] = 0x90; | 2675 expected[3] = 0x90; |
| 2668 expected[4] = 0x80; | 2676 expected[4] = 0x80; |
| 2669 expected[5] = 0x80; | 2677 expected[5] = 0x80; |
| 2670 | 2678 |
| 2671 result = matcher->replaceAll(&replText, NULL, status); | 2679 result = matcher->replaceAll(&replText, NULL, status); |
| 2672 REGEX_CHECK_STATUS; | 2680 REGEX_CHECK_STATUS; |
| 2673 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); | 2681 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); |
| 2674 utext_close(result); | 2682 utext_close(result); |
| 2675 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); | 2683 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); |
| 2676 result = matcher->replaceAll(&replText, &destText, status); | 2684 result = matcher->replaceAll(&replText, &destText, status); |
| 2677 REGEX_CHECK_STATUS; | 2685 REGEX_CHECK_STATUS; |
| 2678 REGEX_ASSERT(result == &destText); | 2686 REGEX_ASSERT(result == &destText); |
| 2679 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); | 2687 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); |
| 2680 } | 2688 } |
| 2681 // TODO: need more through testing of capture substitutions. | 2689 // TODO: need more through testing of capture substitutions. |
| 2682 | 2690 |
| 2683 // Bug 4057 | 2691 // Bug 4057 |
| 2684 // | 2692 // |
| 2685 { | 2693 { |
| 2686 status = U_ZERO_ERROR; | 2694 status = U_ZERO_ERROR; |
| 2687 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65,
0x00 }; /* ss(.*?)ee */ | 2695 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65,
0x00 }; /* ss(.*?)ee */ |
| 2688 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68,
0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x
20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69
, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66,
0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit
h ss and end with ee ss stuff ee fin */ | 2696 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68,
0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x
20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69
, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66,
0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit
h ss and end with ee ss stuff ee fin */ |
| 2689 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ | 2697 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ |
| 2690 utext_openUTF8(&re, str_ssee, -1, &status); | 2698 utext_openUTF8(&re, str_ssee, -1, &status); |
| 2691 utext_openUTF8(&dataText, str_blah, -1, &status); | 2699 utext_openUTF8(&dataText, str_blah, -1, &status); |
| 2692 utext_openUTF8(&replText, str_ooh, -1, &status); | 2700 utext_openUTF8(&replText, str_ooh, -1, &status); |
| 2693 | 2701 |
| 2694 RegexMatcher m(&re, 0, status); | 2702 RegexMatcher m(&re, 0, status); |
| 2695 REGEX_CHECK_STATUS; | 2703 REGEX_CHECK_STATUS; |
| 2696 | 2704 |
| 2697 UnicodeString result; | 2705 UnicodeString result; |
| 2698 UText resultText = UTEXT_INITIALIZER; | 2706 UText resultText = UTEXT_INITIALIZER; |
| 2699 utext_openUnicodeString(&resultText, &result, &status); | 2707 utext_openUnicodeString(&resultText, &result, &status); |
| 2700 | 2708 |
| 2701 // Multiple finds do NOT bump up the previous appendReplacement postion. | 2709 // Multiple finds do NOT bump up the previous appendReplacement postion. |
| 2702 m.reset(&dataText); | 2710 m.reset(&dataText); |
| 2703 m.find(); | 2711 m.find(); |
| 2704 m.find(); | 2712 m.find(); |
| 2705 m.appendReplacement(&resultText, &replText, status); | 2713 m.appendReplacement(&resultText, &replText, status); |
| 2706 REGEX_CHECK_STATUS; | 2714 REGEX_CHECK_STATUS; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 2727 m.find(10, status); | 2735 m.find(10, status); |
| 2728 m.find(); | 2736 m.find(); |
| 2729 m.appendReplacement(&resultText, &replText, status); | 2737 m.appendReplacement(&resultText, &replText, status); |
| 2730 REGEX_CHECK_STATUS; | 2738 REGEX_CHECK_STATUS; |
| 2731 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The
matches start with ss and end with ee ooh */ | 2739 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The
matches start with ss and end with ee ooh */ |
| 2732 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); | 2740 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); |
| 2733 | 2741 |
| 2734 m.appendTail(&resultText, status); | 2742 m.appendTail(&resultText, status); |
| 2735 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6
9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ | 2743 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6
9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ |
| 2736 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); | 2744 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); |
| 2737 | 2745 |
| 2738 utext_close(&resultText); | 2746 utext_close(&resultText); |
| 2739 } | 2747 } |
| 2740 | 2748 |
| 2741 delete matcher2; | 2749 delete matcher2; |
| 2742 delete pat2; | 2750 delete pat2; |
| 2743 delete matcher; | 2751 delete matcher; |
| 2744 delete pat; | 2752 delete pat; |
| 2745 | 2753 |
| 2746 utext_close(&dataText); | 2754 utext_close(&dataText); |
| 2747 utext_close(&replText); | 2755 utext_close(&replText); |
| 2748 utext_close(&destText); | 2756 utext_close(&destText); |
| 2749 utext_close(&re); | 2757 utext_close(&re); |
| 2750 } | 2758 } |
| 2751 | 2759 |
| 2752 | 2760 |
| 2753 //--------------------------------------------------------------------------- | 2761 //--------------------------------------------------------------------------- |
| 2754 // | 2762 // |
| 2755 // API_Pattern_UTF8 Test that the API for class RegexPattern is | 2763 // API_Pattern_UTF8 Test that the API for class RegexPattern is |
| 2756 // present and nominally working. | 2764 // present and nominally working. |
| 2757 // | 2765 // |
| 2758 //--------------------------------------------------------------------------- | 2766 //--------------------------------------------------------------------------- |
| 2759 void RegexTest::API_Pattern_UTF8() { | 2767 void RegexTest::API_Pattern_UTF8() { |
| 2760 RegexPattern pata; // Test default constructor to not crash. | 2768 RegexPattern pata; // Test default constructor to not crash. |
| 2761 RegexPattern patb; | 2769 RegexPattern patb; |
| 2762 | 2770 |
| 2763 REGEX_ASSERT(pata == patb); | 2771 REGEX_ASSERT(pata == patb); |
| 2764 REGEX_ASSERT(pata == pata); | 2772 REGEX_ASSERT(pata == pata); |
| 2765 | 2773 |
| 2766 UText re1 = UTEXT_INITIALIZER; | 2774 UText re1 = UTEXT_INITIALIZER; |
| 2767 UText re2 = UTEXT_INITIALIZER; | 2775 UText re2 = UTEXT_INITIALIZER; |
| 2768 UErrorCode status = U_ZERO_ERROR; | 2776 UErrorCode status = U_ZERO_ERROR; |
| 2769 UParseError pe; | 2777 UParseError pe; |
| 2770 | 2778 |
| 2771 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d,
0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ | 2779 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d,
0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ |
| 2772 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ | 2780 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ |
| 2773 utext_openUTF8(&re1, str_abcalmz, -1, &status); | 2781 utext_openUTF8(&re1, str_abcalmz, -1, &status); |
| 2774 utext_openUTF8(&re2, str_def, -1, &status); | 2782 utext_openUTF8(&re2, str_def, -1, &status); |
| 2775 | 2783 |
| 2776 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); | 2784 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); |
| 2777 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); | 2785 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); |
| 2778 REGEX_CHECK_STATUS; | 2786 REGEX_CHECK_STATUS; |
| 2779 REGEX_ASSERT(*pat1 == *pat1); | 2787 REGEX_ASSERT(*pat1 == *pat1); |
| 2780 REGEX_ASSERT(*pat1 != pata); | 2788 REGEX_ASSERT(*pat1 != pata); |
| (...skipping 28 matching lines...) Expand all Loading... |
| 2809 | 2817 |
| 2810 // clone | 2818 // clone |
| 2811 RegexPattern *pat1c = pat1->clone(); | 2819 RegexPattern *pat1c = pat1->clone(); |
| 2812 REGEX_ASSERT(*pat1c == *pat1); | 2820 REGEX_ASSERT(*pat1c == *pat1); |
| 2813 REGEX_ASSERT(*pat1c != *pat2); | 2821 REGEX_ASSERT(*pat1c != *pat2); |
| 2814 | 2822 |
| 2815 delete pat1c; | 2823 delete pat1c; |
| 2816 delete pat1a; | 2824 delete pat1a; |
| 2817 delete pat1; | 2825 delete pat1; |
| 2818 delete pat2; | 2826 delete pat2; |
| 2819 | 2827 |
| 2820 utext_close(&re1); | 2828 utext_close(&re1); |
| 2821 utext_close(&re2); | 2829 utext_close(&re2); |
| 2822 | 2830 |
| 2823 | 2831 |
| 2824 // | 2832 // |
| 2825 // Verify that a matcher created from a cloned pattern works. | 2833 // Verify that a matcher created from a cloned pattern works. |
| 2826 // (Jitterbug 3423) | 2834 // (Jitterbug 3423) |
| 2827 // | 2835 // |
| 2828 { | 2836 { |
| 2829 UErrorCode status = U_ZERO_ERROR; | 2837 UErrorCode status = U_ZERO_ERROR; |
| 2830 UText pattern = UTEXT_INITIALIZER; | 2838 UText pattern = UTEXT_INITIALIZER; |
| 2831 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \
p{L}+ */ | 2839 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \
p{L}+ */ |
| 2832 utext_openUTF8(&pattern, str_pL, -1, &status); | 2840 utext_openUTF8(&pattern, str_pL, -1, &status); |
| 2833 | 2841 |
| 2834 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); | 2842 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); |
| 2835 RegexPattern *pClone = pSource->clone(); | 2843 RegexPattern *pClone = pSource->clone(); |
| 2836 delete pSource; | 2844 delete pSource; |
| 2837 RegexMatcher *mFromClone = pClone->matcher(status); | 2845 RegexMatcher *mFromClone = pClone->matcher(status); |
| 2838 REGEX_CHECK_STATUS; | 2846 REGEX_CHECK_STATUS; |
| 2839 | 2847 |
| 2840 UText input = UTEXT_INITIALIZER; | 2848 UText input = UTEXT_INITIALIZER; |
| 2841 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57
, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ | 2849 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57
, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ |
| 2842 utext_openUTF8(&input, str_HelloWorld, -1, &status); | 2850 utext_openUTF8(&input, str_HelloWorld, -1, &status); |
| 2843 mFromClone->reset(&input); | 2851 mFromClone->reset(&input); |
| 2844 REGEX_ASSERT(mFromClone->find() == TRUE); | 2852 REGEX_ASSERT(mFromClone->find() == TRUE); |
| 2845 REGEX_ASSERT(mFromClone->group(status) == "Hello"); | 2853 REGEX_ASSERT(mFromClone->group(status) == "Hello"); |
| 2846 REGEX_ASSERT(mFromClone->find() == TRUE); | 2854 REGEX_ASSERT(mFromClone->find() == TRUE); |
| 2847 REGEX_ASSERT(mFromClone->group(status) == "World"); | 2855 REGEX_ASSERT(mFromClone->group(status) == "World"); |
| 2848 REGEX_ASSERT(mFromClone->find() == FALSE); | 2856 REGEX_ASSERT(mFromClone->find() == FALSE); |
| 2849 delete mFromClone; | 2857 delete mFromClone; |
| 2850 delete pClone; | 2858 delete pClone; |
| 2851 | 2859 |
| 2852 utext_close(&input); | 2860 utext_close(&input); |
| 2853 utext_close(&pattern); | 2861 utext_close(&pattern); |
| 2854 } | 2862 } |
| 2855 | 2863 |
| 2856 // | 2864 // |
| 2857 // matches convenience API | 2865 // matches convenience API |
| 2858 // | 2866 // |
| 2859 { | 2867 { |
| 2860 UErrorCode status = U_ZERO_ERROR; | 2868 UErrorCode status = U_ZERO_ERROR; |
| 2861 UText pattern = UTEXT_INITIALIZER; | 2869 UText pattern = UTEXT_INITIALIZER; |
| 2862 UText input = UTEXT_INITIALIZER; | 2870 UText input = UTEXT_INITIALIZER; |
| 2863 | 2871 |
| 2864 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2
0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ | 2872 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2
0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ |
| 2865 utext_openUTF8(&input, str_randominput, -1, &status); | 2873 utext_openUTF8(&input, str_randominput, -1, &status); |
| 2866 | 2874 |
| 2867 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ | 2875 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ |
| 2868 utext_openUTF8(&pattern, str_dotstar, -1, &status); | 2876 utext_openUTF8(&pattern, str_dotstar, -1, &status); |
| 2869 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE
); | 2877 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE
); |
| 2870 REGEX_CHECK_STATUS; | 2878 REGEX_CHECK_STATUS; |
| 2871 | 2879 |
| 2872 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ | 2880 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ |
| 2873 utext_openUTF8(&pattern, str_abc, -1, &status); | 2881 utext_openUTF8(&pattern, str_abc, -1, &status); |
| 2874 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) ==
FALSE); | 2882 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) ==
FALSE); |
| 2875 REGEX_CHECK_STATUS; | 2883 REGEX_CHECK_STATUS; |
| 2876 | 2884 |
| 2877 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /*
.*nput */ | 2885 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /*
.*nput */ |
| 2878 utext_openUTF8(&pattern, str_nput, -1, &status); | 2886 utext_openUTF8(&pattern, str_nput, -1, &status); |
| 2879 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status)
== TRUE); | 2887 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status)
== TRUE); |
| 2880 REGEX_CHECK_STATUS; | 2888 REGEX_CHECK_STATUS; |
| 2881 | 2889 |
| 2882 utext_openUTF8(&pattern, str_randominput, -1, &status); | 2890 utext_openUTF8(&pattern, str_randominput, -1, &status); |
| 2883 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s
tatus) == TRUE); | 2891 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s
tatus) == TRUE); |
| 2884 REGEX_CHECK_STATUS; | 2892 REGEX_CHECK_STATUS; |
| 2885 | 2893 |
| 2886 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ | 2894 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ |
| 2887 utext_openUTF8(&pattern, str_u, -1, &status); | 2895 utext_openUTF8(&pattern, str_u, -1, &status); |
| 2888 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) ==
FALSE); | 2896 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) ==
FALSE); |
| 2889 REGEX_CHECK_STATUS; | 2897 REGEX_CHECK_STATUS; |
| 2890 | 2898 |
| 2891 utext_openUTF8(&input, str_abc, -1, &status); | 2899 utext_openUTF8(&input, str_abc, -1, &status); |
| 2892 utext_openUTF8(&pattern, str_abc, -1, &status); | 2900 utext_openUTF8(&pattern, str_abc, -1, &status); |
| 2893 status = U_INDEX_OUTOFBOUNDS_ERROR; | 2901 status = U_INDEX_OUTOFBOUNDS_ERROR; |
| 2894 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); | 2902 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); |
| 2895 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 2903 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 2896 | 2904 |
| 2897 utext_close(&input); | 2905 utext_close(&input); |
| 2898 utext_close(&pattern); | 2906 utext_close(&pattern); |
| 2899 } | 2907 } |
| 2900 | 2908 |
| 2901 | 2909 |
| 2902 // | 2910 // |
| 2903 // Split() | 2911 // Split() |
| 2904 // | 2912 // |
| 2905 status = U_ZERO_ERROR; | 2913 status = U_ZERO_ERROR; |
| 2906 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ | 2914 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ |
| (...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3277 } | 3285 } |
| 3278 | 3286 |
| 3279 | 3287 |
| 3280 void RegexTest::regex_find(const UnicodeString &pattern, | 3288 void RegexTest::regex_find(const UnicodeString &pattern, |
| 3281 const UnicodeString &flags, | 3289 const UnicodeString &flags, |
| 3282 const UnicodeString &inputString, | 3290 const UnicodeString &inputString, |
| 3283 const char *srcPath, | 3291 const char *srcPath, |
| 3284 int32_t line) { | 3292 int32_t line) { |
| 3285 UnicodeString unEscapedInput; | 3293 UnicodeString unEscapedInput; |
| 3286 UnicodeString deTaggedInput; | 3294 UnicodeString deTaggedInput; |
| 3287 | 3295 |
| 3288 int32_t patternUTF8Length, inputUTF8Length; | 3296 int32_t patternUTF8Length, inputUTF8Length; |
| 3289 char *patternChars = NULL, *inputChars = NULL; | 3297 char *patternChars = NULL, *inputChars = NULL; |
| 3290 UText patternText = UTEXT_INITIALIZER; | 3298 UText patternText = UTEXT_INITIALIZER; |
| 3291 UText inputText = UTEXT_INITIALIZER; | 3299 UText inputText = UTEXT_INITIALIZER; |
| 3292 UConverter *UTF8Converter = NULL; | 3300 UConverter *UTF8Converter = NULL; |
| 3293 | 3301 |
| 3294 UErrorCode status = U_ZERO_ERROR; | 3302 UErrorCode status = U_ZERO_ERROR; |
| 3295 UParseError pe; | 3303 UParseError pe; |
| 3296 RegexPattern *parsePat = NULL; | 3304 RegexPattern *parsePat = NULL; |
| 3297 RegexMatcher *parseMatcher = NULL; | 3305 RegexMatcher *parseMatcher = NULL; |
| 3298 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; | 3306 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; |
| 3299 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; | 3307 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; |
| 3300 UVector groupStarts(status); | 3308 UVector groupStarts(status); |
| 3301 UVector groupEnds(status); | 3309 UVector groupEnds(status); |
| 3302 UVector groupStartsUTF8(status); | 3310 UVector groupStartsUTF8(status); |
| 3303 UVector groupEndsUTF8(status); | 3311 UVector groupEndsUTF8(status); |
| 3304 UBool isMatch = FALSE, isUTF8Match = FALSE; | 3312 UBool isMatch = FALSE, isUTF8Match = FALSE; |
| 3305 UBool failed = FALSE; | 3313 UBool failed = FALSE; |
| 3306 int32_t numFinds; | 3314 int32_t numFinds; |
| 3307 int32_t i; | 3315 int32_t i; |
| 3308 UBool useMatchesFunc = FALSE; | 3316 UBool useMatchesFunc = FALSE; |
| 3309 UBool useLookingAtFunc = FALSE; | 3317 UBool useLookingAtFunc = FALSE; |
| 3310 int32_t regionStart = -1; | 3318 int32_t regionStart = -1; |
| 3311 int32_t regionEnd = -1; | 3319 int32_t regionEnd = -1; |
| 3312 int32_t regionStartUTF8 = -1; | 3320 int32_t regionStartUTF8 = -1; |
| 3313 int32_t regionEndUTF8 = -1; | 3321 int32_t regionEndUTF8 = -1; |
| 3314 | 3322 |
| 3315 | 3323 |
| 3316 // | 3324 // |
| 3317 // Compile the caller's pattern | 3325 // Compile the caller's pattern |
| 3318 // | 3326 // |
| 3319 uint32_t bflags = 0; | 3327 uint32_t bflags = 0; |
| 3320 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag | 3328 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag |
| 3321 bflags |= UREGEX_CASE_INSENSITIVE; | 3329 bflags |= UREGEX_CASE_INSENSITIVE; |
| 3322 } | 3330 } |
| 3323 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag | 3331 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag |
| 3324 bflags |= UREGEX_COMMENTS; | 3332 bflags |= UREGEX_COMMENTS; |
| 3325 } | 3333 } |
| 3326 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag | 3334 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag |
| 3327 bflags |= UREGEX_DOTALL; | 3335 bflags |= UREGEX_DOTALL; |
| 3328 } | 3336 } |
| 3329 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag | 3337 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag |
| 3330 bflags |= UREGEX_MULTILINE; | 3338 bflags |= UREGEX_MULTILINE; |
| 3331 } | 3339 } |
| 3332 | 3340 |
| 3333 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag | 3341 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag |
| 3334 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; | 3342 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; |
| 3335 } | 3343 } |
| 3336 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag | 3344 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag |
| 3337 bflags |= UREGEX_UNIX_LINES; | 3345 bflags |= UREGEX_UNIX_LINES; |
| 3338 } | 3346 } |
| 3339 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag | 3347 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag |
| 3340 bflags |= UREGEX_LITERAL; | 3348 bflags |= UREGEX_LITERAL; |
| 3341 } | 3349 } |
| 3342 | 3350 |
| (...skipping 15 matching lines...) Expand all Loading... |
| 3358 goto cleanupAndReturn; | 3366 goto cleanupAndReturn; |
| 3359 } else { | 3367 } else { |
| 3360 // Unexpected pattern compilation error. | 3368 // Unexpected pattern compilation error. |
| 3361 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(
status)); | 3369 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(
status)); |
| 3362 goto cleanupAndReturn; | 3370 goto cleanupAndReturn; |
| 3363 } | 3371 } |
| 3364 } | 3372 } |
| 3365 | 3373 |
| 3366 UTF8Converter = ucnv_open("UTF8", &status); | 3374 UTF8Converter = ucnv_open("UTF8", &status); |
| 3367 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
NULL, &status); | 3375 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
NULL, &status); |
| 3368 | 3376 |
| 3369 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); | 3377 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); |
| 3370 status = U_ZERO_ERROR; // buffer overflow | 3378 status = U_ZERO_ERROR; // buffer overflow |
| 3371 patternChars = new char[patternUTF8Length+1]; | 3379 patternChars = new char[patternUTF8Length+1]; |
| 3372 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); | 3380 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); |
| 3373 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); | 3381 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); |
| 3374 | 3382 |
| 3375 if (status == U_ZERO_ERROR) { | 3383 if (status == U_ZERO_ERROR) { |
| 3376 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); | 3384 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); |
| 3377 | 3385 |
| 3378 if (status != U_ZERO_ERROR) { | 3386 if (status != U_ZERO_ERROR) { |
| 3379 #if UCONFIG_NO_BREAK_ITERATION==1 | 3387 #if UCONFIG_NO_BREAK_ITERATION==1 |
| 3380 // 'v' test flag means that the test pattern should not compile if I
CU was configured | 3388 // 'v' test flag means that the test pattern should not compile if I
CU was configured |
| 3381 // to not include break iteration. RBBI is needed for Unicode w
ord boundaries. | 3389 // to not include break iteration. RBBI is needed for Unicode w
ord boundaries. |
| 3382 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT
ED_ERROR) { | 3390 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT
ED_ERROR) { |
| 3383 goto cleanupAndReturn; | 3391 goto cleanupAndReturn; |
| 3384 } | 3392 } |
| 3385 #endif | 3393 #endif |
| 3386 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' | 3394 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' |
| 3387 // Expected pattern compilation error. | 3395 // Expected pattern compilation error. |
| 3388 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' | 3396 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' |
| 3389 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s
tatus)); | 3397 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s
tatus)); |
| 3390 } | 3398 } |
| 3391 goto cleanupAndReturn; | 3399 goto cleanupAndReturn; |
| 3392 } else { | 3400 } else { |
| 3393 // Unexpected pattern compilation error. | 3401 // Unexpected pattern compilation error. |
| 3394 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err
orName(status)); | 3402 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err
orName(status)); |
| 3395 goto cleanupAndReturn; | 3403 goto cleanupAndReturn; |
| 3396 } | 3404 } |
| 3397 } | 3405 } |
| 3398 } | 3406 } |
| 3399 | 3407 |
| 3400 if (UTF8Pattern == NULL) { | 3408 if (UTF8Pattern == NULL) { |
| 3401 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en without being a failure of the engine | 3409 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en without being a failure of the engine |
| 3402 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d",
srcPath, line); | 3410 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d",
srcPath, line); |
| 3403 status = U_ZERO_ERROR; | 3411 status = U_ZERO_ERROR; |
| 3404 } | 3412 } |
| 3405 | 3413 |
| 3406 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag | 3414 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag |
| 3407 RegexPatternDump(callerPattern); | 3415 callerPattern->dumpPattern(); |
| 3408 } | 3416 } |
| 3409 | 3417 |
| 3410 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag | 3418 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag |
| 3411 errln("%s, Line %d: Expected, but did not get, a pattern compilation err
or.", srcPath, line); | 3419 errln("%s, Line %d: Expected, but did not get, a pattern compilation err
or.", srcPath, line); |
| 3412 goto cleanupAndReturn; | 3420 goto cleanupAndReturn; |
| 3413 } | 3421 } |
| 3414 | 3422 |
| 3415 | 3423 |
| 3416 // | 3424 // |
| 3417 // Number of times find() should be called on the test string, default to 1 | 3425 // Number of times find() should be called on the test string, default to 1 |
| 3418 // | 3426 // |
| 3419 numFinds = 1; | 3427 numFinds = 1; |
| 3420 for (i=2; i<=9; i++) { | 3428 for (i=2; i<=9; i++) { |
| 3421 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag | 3429 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag |
| 3422 if (numFinds != 1) { | 3430 if (numFinds != 1) { |
| 3423 errln("Line %d: more than one digit flag. Scanning %d.", line,
i); | 3431 errln("Line %d: more than one digit flag. Scanning %d.", line,
i); |
| 3424 goto cleanupAndReturn; | 3432 goto cleanupAndReturn; |
| 3425 } | 3433 } |
| 3426 numFinds = i; | 3434 numFinds = i; |
| 3427 } | 3435 } |
| 3428 } | 3436 } |
| 3429 | 3437 |
| 3430 // 'M' flag. Use matches() instead of find() | 3438 // 'M' flag. Use matches() instead of find() |
| 3431 if (flags.indexOf((UChar)0x4d) >= 0) { | 3439 if (flags.indexOf((UChar)0x4d) >= 0) { |
| 3432 useMatchesFunc = TRUE; | 3440 useMatchesFunc = TRUE; |
| 3433 } | 3441 } |
| 3434 if (flags.indexOf((UChar)0x4c) >= 0) { | 3442 if (flags.indexOf((UChar)0x4c) >= 0) { |
| 3435 useLookingAtFunc = TRUE; | 3443 useLookingAtFunc = TRUE; |
| 3436 } | 3444 } |
| 3437 | 3445 |
| 3438 // | 3446 // |
| 3439 // Find the tags in the input data, remove them, and record the group bound
ary | 3447 // Find the tags in the input data, remove them, and record the group bound
ary |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3474 } | 3482 } |
| 3475 | 3483 |
| 3476 // | 3484 // |
| 3477 // Configure the matcher according to the flags specified with this test. | 3485 // Configure the matcher according to the flags specified with this test. |
| 3478 // | 3486 // |
| 3479 matcher = callerPattern->matcher(deTaggedInput, status); | 3487 matcher = callerPattern->matcher(deTaggedInput, status); |
| 3480 REGEX_CHECK_STATUS_L(line); | 3488 REGEX_CHECK_STATUS_L(line); |
| 3481 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag | 3489 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag |
| 3482 matcher->setTrace(TRUE); | 3490 matcher->setTrace(TRUE); |
| 3483 } | 3491 } |
| 3484 | 3492 |
| 3485 if (UTF8Pattern != NULL) { | 3493 if (UTF8Pattern != NULL) { |
| 3486 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); | 3494 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); |
| 3487 status = U_ZERO_ERROR; // buffer overflow | 3495 status = U_ZERO_ERROR; // buffer overflow |
| 3488 inputChars = new char[inputUTF8Length+1]; | 3496 inputChars = new char[inputUTF8Length+1]; |
| 3489 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat
us); | 3497 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat
us); |
| 3490 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); | 3498 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); |
| 3491 | 3499 |
| 3492 if (status == U_ZERO_ERROR) { | 3500 if (status == U_ZERO_ERROR) { |
| 3493 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); | 3501 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); |
| 3494 REGEX_CHECK_STATUS_L(line); | 3502 REGEX_CHECK_STATUS_L(line); |
| 3495 } | 3503 } |
| 3496 | 3504 |
| 3497 if (UTF8Matcher == NULL) { | 3505 if (UTF8Matcher == NULL) { |
| 3498 // UTF-8 does not allow unpaired surrogates, so this could actually
happen without being a failure of the engine | 3506 // UTF-8 does not allow unpaired surrogates, so this could actually
happen without being a failure of the engine |
| 3499 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d"
, srcPath, line); | 3507 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d"
, srcPath, line); |
| 3500 status = U_ZERO_ERROR; | 3508 status = U_ZERO_ERROR; |
| 3501 } | 3509 } |
| 3502 } | 3510 } |
| 3503 | 3511 |
| 3504 // | 3512 // |
| 3505 // Generate native indices for UTF8 versions of region and capture group in
fo | 3513 // Generate native indices for UTF8 versions of region and capture group in
fo |
| 3506 // | 3514 // |
| 3507 if (UTF8Matcher != NULL) { | 3515 if (UTF8Matcher != NULL) { |
| 3508 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar
t, regionStartUTF8); | 3516 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar
t, regionStartUTF8); |
| 3509 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd,
regionEndUTF8); | 3517 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd,
regionEndUTF8); |
| 3510 | 3518 |
| 3511 // Fill out the native index UVector info. | 3519 // Fill out the native index UVector info. |
| 3512 // Only need 1 loop, from above we know groupStarts.size() = groupEnds.
size() | 3520 // Only need 1 loop, from above we know groupStarts.size() = groupEnds.
size() |
| 3513 for (i=0; i<groupStarts.size(); i++) { | 3521 for (i=0; i<groupStarts.size(); i++) { |
| 3514 int32_t start = groupStarts.elementAti(i); | 3522 int32_t start = groupStarts.elementAti(i); |
| 3515 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting | 3523 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting |
| 3516 if (start >= 0) { | 3524 if (start >= 0) { |
| 3517 int32_t startUTF8; | 3525 int32_t startUTF8; |
| 3518 if (!utextOffsetToNative(&inputText, start, startUTF8)) { | 3526 if (!utextOffsetToNative(&inputText, start, startUTF8)) { |
| 3519 errln("Error at line %d: could not find native index for gro
up start %d. UTF16 index %d", line, i, start); | 3527 errln("Error at line %d: could not find native index for gro
up start %d. UTF16 index %d", line, i, start); |
| 3520 failed = TRUE; | 3528 failed = TRUE; |
| 3521 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. | 3529 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. |
| 3522 } | 3530 } |
| 3523 setInt(groupStartsUTF8, startUTF8, i); | 3531 setInt(groupStartsUTF8, startUTF8, i); |
| 3524 } | 3532 } |
| 3525 | 3533 |
| 3526 int32_t end = groupEnds.elementAti(i); | 3534 int32_t end = groupEnds.elementAti(i); |
| 3527 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting | 3535 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting |
| 3528 if (end >= 0) { | 3536 if (end >= 0) { |
| 3529 int32_t endUTF8; | 3537 int32_t endUTF8; |
| 3530 if (!utextOffsetToNative(&inputText, end, endUTF8)) { | 3538 if (!utextOffsetToNative(&inputText, end, endUTF8)) { |
| 3531 errln("Error at line %d: could not find native index for gro
up end %d. UTF16 index %d", line, i, end); | 3539 errln("Error at line %d: could not find native index for gro
up end %d. UTF16 index %d", line, i, end); |
| 3532 failed = TRUE; | 3540 failed = TRUE; |
| 3533 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. | 3541 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. |
| 3534 } | 3542 } |
| 3535 setInt(groupEndsUTF8, endUTF8, i); | 3543 setInt(groupEndsUTF8, endUTF8, i); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 3550 if (UTF8Matcher != NULL) { | 3558 if (UTF8Matcher != NULL) { |
| 3551 UTF8Matcher->useAnchoringBounds(FALSE); | 3559 UTF8Matcher->useAnchoringBounds(FALSE); |
| 3552 } | 3560 } |
| 3553 } | 3561 } |
| 3554 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag | 3562 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag |
| 3555 matcher->useTransparentBounds(TRUE); | 3563 matcher->useTransparentBounds(TRUE); |
| 3556 if (UTF8Matcher != NULL) { | 3564 if (UTF8Matcher != NULL) { |
| 3557 UTF8Matcher->useTransparentBounds(TRUE); | 3565 UTF8Matcher->useTransparentBounds(TRUE); |
| 3558 } | 3566 } |
| 3559 } | 3567 } |
| 3560 | 3568 |
| 3561 | 3569 |
| 3562 | 3570 |
| 3563 // | 3571 // |
| 3564 // Do a find on the de-tagged input using the caller's pattern | 3572 // Do a find on the de-tagged input using the caller's pattern |
| 3565 // TODO: error on count>1 and not find(). | 3573 // TODO: error on count>1 and not find(). |
| 3566 // error on both matches() and lookingAt(). | 3574 // error on both matches() and lookingAt(). |
| 3567 // | 3575 // |
| 3568 for (i=0; i<numFinds; i++) { | 3576 for (i=0; i<numFinds; i++) { |
| 3569 if (useMatchesFunc) { | 3577 if (useMatchesFunc) { |
| 3570 isMatch = matcher->matches(status); | 3578 isMatch = matcher->matches(status); |
| 3571 if (UTF8Matcher != NULL) { | 3579 if (UTF8Matcher != NULL) { |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3626 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d", | 3634 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d", |
| 3627 line, i, expectedStart, matcher->start(i, status)); | 3635 line, i, expectedStart, matcher->start(i, status)); |
| 3628 failed = TRUE; | 3636 failed = TRUE; |
| 3629 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. | 3637 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. |
| 3630 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec
tedStartUTF8) { | 3638 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec
tedStartUTF8) { |
| 3631 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d (UTF8)", | 3639 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d (UTF8)", |
| 3632 line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); | 3640 line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); |
| 3633 failed = TRUE; | 3641 failed = TRUE; |
| 3634 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. | 3642 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. |
| 3635 } | 3643 } |
| 3636 | 3644 |
| 3637 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti
(i)); | 3645 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti
(i)); |
| 3638 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF
8.elementAti(i)); | 3646 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF
8.elementAti(i)); |
| 3639 if (matcher->end(i, status) != expectedEnd) { | 3647 if (matcher->end(i, status) != expectedEnd) { |
| 3640 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d", | 3648 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d", |
| 3641 line, i, expectedEnd, matcher->end(i, status)); | 3649 line, i, expectedEnd, matcher->end(i, status)); |
| 3642 failed = TRUE; | 3650 failed = TRUE; |
| 3643 // Error on end position; keep going; real error is probably yet to
come as group | 3651 // Error on end position; keep going; real error is probably yet to
come as group |
| 3644 // end positions work from end of the input data towards the front
. | 3652 // end positions work from end of the input data towards the front
. |
| 3645 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte
dEndUTF8) { | 3653 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte
dEndUTF8) { |
| 3646 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d (UTF8)", | 3654 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d (UTF8)", |
| (...skipping 16 matching lines...) Expand all Loading... |
| 3663 | 3671 |
| 3664 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa
lse | 3672 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa
lse |
| 3665 matcher->requireEnd() == TRUE) { | 3673 matcher->requireEnd() == TRUE) { |
| 3666 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l
ine); | 3674 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l
ine); |
| 3667 failed = TRUE; | 3675 failed = TRUE; |
| 3668 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && //
'Y' flag: RequireEnd() == false | 3676 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && //
'Y' flag: RequireEnd() == false |
| 3669 UTF8Matcher->requireEnd() == TRUE) { | 3677 UTF8Matcher->requireEnd() == TRUE) { |
| 3670 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT
F8)", line); | 3678 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT
F8)", line); |
| 3671 failed = TRUE; | 3679 failed = TRUE; |
| 3672 } | 3680 } |
| 3673 | 3681 |
| 3674 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr
ue | 3682 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr
ue |
| 3675 matcher->requireEnd() == FALSE) { | 3683 matcher->requireEnd() == FALSE) { |
| 3676 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l
ine); | 3684 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l
ine); |
| 3677 failed = TRUE; | 3685 failed = TRUE; |
| 3678 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && //
'Y' flag: RequireEnd() == false | 3686 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && //
'Y' flag: RequireEnd() == false |
| 3679 UTF8Matcher->requireEnd() == FALSE) { | 3687 UTF8Matcher->requireEnd() == FALSE) { |
| 3680 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT
F8)", line); | 3688 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT
F8)", line); |
| 3681 failed = TRUE; | 3689 failed = TRUE; |
| 3682 } | 3690 } |
| 3683 | 3691 |
| 3684 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false | 3692 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false |
| 3685 matcher->hitEnd() == TRUE) { | 3693 matcher->hitEnd() == TRUE) { |
| 3686 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line)
; | 3694 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line)
; |
| 3687 failed = TRUE; | 3695 failed = TRUE; |
| 3688 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && //
'Z' flag: hitEnd() == false | 3696 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && //
'Z' flag: hitEnd() == false |
| 3689 UTF8Matcher->hitEnd() == TRUE) { | 3697 UTF8Matcher->hitEnd() == TRUE) { |
| 3690 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)"
, line); | 3698 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)"
, line); |
| 3691 failed = TRUE; | 3699 failed = TRUE; |
| 3692 } | 3700 } |
| 3693 | 3701 |
| 3694 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true | 3702 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true |
| 3695 matcher->hitEnd() == FALSE) { | 3703 matcher->hitEnd() == FALSE) { |
| 3696 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line)
; | 3704 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line)
; |
| 3697 failed = TRUE; | 3705 failed = TRUE; |
| 3698 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && //
'z' flag: hitEnd() == true | 3706 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && //
'z' flag: hitEnd() == true |
| 3699 UTF8Matcher->hitEnd() == FALSE) { | 3707 UTF8Matcher->hitEnd() == FALSE) { |
| 3700 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)"
, line); | 3708 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)"
, line); |
| 3701 failed = TRUE; | 3709 failed = TRUE; |
| 3702 } | 3710 } |
| 3703 | 3711 |
| 3704 | 3712 |
| 3705 cleanupAndReturn: | 3713 cleanupAndReturn: |
| 3706 if (failed) { | 3714 if (failed) { |
| 3707 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " | 3715 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " |
| 3708 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); | 3716 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); |
| 3709 // callerPattern->dump(); | 3717 // callerPattern->dump(); |
| 3710 } | 3718 } |
| 3711 delete parseMatcher; | 3719 delete parseMatcher; |
| 3712 delete parsePat; | 3720 delete parsePat; |
| 3713 delete UTF8Matcher; | 3721 delete UTF8Matcher; |
| 3714 delete UTF8Pattern; | 3722 delete UTF8Pattern; |
| 3715 delete matcher; | 3723 delete matcher; |
| 3716 delete callerPattern; | 3724 delete callerPattern; |
| 3717 | 3725 |
| 3718 utext_close(&inputText); | 3726 utext_close(&inputText); |
| 3719 delete[] inputChars; | 3727 delete[] inputChars; |
| 3720 utext_close(&patternText); | 3728 utext_close(&patternText); |
| 3721 delete[] patternChars; | 3729 delete[] patternChars; |
| 3722 ucnv_close(UTF8Converter); | 3730 ucnv_close(UTF8Converter); |
| 3723 } | 3731 } |
| 3724 | 3732 |
| 3725 | 3733 |
| 3726 | 3734 |
| 3727 | 3735 |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3783 // Invalid Back Reference \0 | 3791 // Invalid Back Reference \0 |
| 3784 // For ICU 3.8 and earlier | 3792 // For ICU 3.8 and earlier |
| 3785 // For ICU versions newer than 3.8, \0 introduces an octal escape. | 3793 // For ICU versions newer than 3.8, \0 introduces an octal escape. |
| 3786 // | 3794 // |
| 3787 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); | 3795 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); |
| 3788 | 3796 |
| 3789 } | 3797 } |
| 3790 | 3798 |
| 3791 | 3799 |
| 3792 //------------------------------------------------------------------------------
- | 3800 //------------------------------------------------------------------------------
- |
| 3793 // | 3801 // |
| 3794 // Read a text data file, convert it to UChars, and return the data | 3802 // Read a text data file, convert it to UChars, and return the data |
| 3795 // in one big UChar * buffer, which the caller must delete. | 3803 // in one big UChar * buffer, which the caller must delete. |
| 3796 // | 3804 // |
| 3797 //------------------------------------------------------------------------------
-- | 3805 //------------------------------------------------------------------------------
-- |
| 3798 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, | 3806 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, |
| 3799 const char *defEncoding, UErrorCode &status
) { | 3807 const char *defEncoding, UErrorCode &status
) { |
| 3800 UChar *retPtr = NULL; | 3808 UChar *retPtr = NULL; |
| 3801 char *fileBuf = NULL; | 3809 char *fileBuf = NULL; |
| 3802 UConverter* conv = NULL; | 3810 UConverter* conv = NULL; |
| 3803 FILE *f = NULL; | 3811 FILE *f = NULL; |
| (...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4126 UBool found = testMat->find(); | 4134 UBool found = testMat->find(); |
| 4127 UBool expected = FALSE; | 4135 UBool expected = FALSE; |
| 4128 if (fields[2].indexOf(UChar_y) >=0) { | 4136 if (fields[2].indexOf(UChar_y) >=0) { |
| 4129 expected = TRUE; | 4137 expected = TRUE; |
| 4130 } | 4138 } |
| 4131 if (expected != found) { | 4139 if (expected != found) { |
| 4132 errln("line %d: Expected %smatch, got %smatch", | 4140 errln("line %d: Expected %smatch, got %smatch", |
| 4133 lineNum, expected?"":"no ", found?"":"no " ); | 4141 lineNum, expected?"":"no ", found?"":"no " ); |
| 4134 continue; | 4142 continue; |
| 4135 } | 4143 } |
| 4136 | 4144 |
| 4137 // Don't try to check expected results if there is no match. | 4145 // Don't try to check expected results if there is no match. |
| 4138 // (Some have stuff in the expected fields) | 4146 // (Some have stuff in the expected fields) |
| 4139 if (!found) { | 4147 if (!found) { |
| 4140 delete testMat; | 4148 delete testMat; |
| 4141 delete testPat; | 4149 delete testPat; |
| 4142 continue; | 4150 continue; |
| 4143 } | 4151 } |
| 4144 | 4152 |
| 4145 // | 4153 // |
| 4146 // Interpret the Perl expression from the fourth field of the data file, | 4154 // Interpret the Perl expression from the fourth field of the data file, |
| (...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4424 const UChar UChar_y = 0x79; | 4432 const UChar UChar_y = 0x79; |
| 4425 if (flagStr.indexOf(UChar_i) != -1) { | 4433 if (flagStr.indexOf(UChar_i) != -1) { |
| 4426 flags |= UREGEX_CASE_INSENSITIVE; | 4434 flags |= UREGEX_CASE_INSENSITIVE; |
| 4427 } | 4435 } |
| 4428 if (flagStr.indexOf(UChar_m) != -1) { | 4436 if (flagStr.indexOf(UChar_m) != -1) { |
| 4429 flags |= UREGEX_MULTILINE; | 4437 flags |= UREGEX_MULTILINE; |
| 4430 } | 4438 } |
| 4431 if (flagStr.indexOf(UChar_x) != -1) { | 4439 if (flagStr.indexOf(UChar_x) != -1) { |
| 4432 flags |= UREGEX_COMMENTS; | 4440 flags |= UREGEX_COMMENTS; |
| 4433 } | 4441 } |
| 4434 | 4442 |
| 4435 // | 4443 // |
| 4436 // Put the pattern in a UTF-8 UText | 4444 // Put the pattern in a UTF-8 UText |
| 4437 // | 4445 // |
| 4438 status = U_ZERO_ERROR; | 4446 status = U_ZERO_ERROR; |
| 4439 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve
rter.getAlias(), status); | 4447 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve
rter.getAlias(), status); |
| 4440 if (status == U_BUFFER_OVERFLOW_ERROR) { | 4448 if (status == U_BUFFER_OVERFLOW_ERROR) { |
| 4441 status = U_ZERO_ERROR; | 4449 status = U_ZERO_ERROR; |
| 4442 delete[] patternChars; | 4450 delete[] patternChars; |
| 4443 patternCapacity = patternLength + 1; | 4451 patternCapacity = patternLength + 1; |
| 4444 patternChars = new char[patternCapacity]; | 4452 patternChars = new char[patternCapacity]; |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4521 UBool found = testMat->find(); | 4529 UBool found = testMat->find(); |
| 4522 UBool expected = FALSE; | 4530 UBool expected = FALSE; |
| 4523 if (fields[2].indexOf(UChar_y) >=0) { | 4531 if (fields[2].indexOf(UChar_y) >=0) { |
| 4524 expected = TRUE; | 4532 expected = TRUE; |
| 4525 } | 4533 } |
| 4526 if (expected != found) { | 4534 if (expected != found) { |
| 4527 errln("line %d: Expected %smatch, got %smatch", | 4535 errln("line %d: Expected %smatch, got %smatch", |
| 4528 lineNum, expected?"":"no ", found?"":"no " ); | 4536 lineNum, expected?"":"no ", found?"":"no " ); |
| 4529 continue; | 4537 continue; |
| 4530 } | 4538 } |
| 4531 | 4539 |
| 4532 // Don't try to check expected results if there is no match. | 4540 // Don't try to check expected results if there is no match. |
| 4533 // (Some have stuff in the expected fields) | 4541 // (Some have stuff in the expected fields) |
| 4534 if (!found) { | 4542 if (!found) { |
| 4535 delete testMat; | 4543 delete testMat; |
| 4536 delete testPat; | 4544 delete testPat; |
| 4537 continue; | 4545 continue; |
| 4538 } | 4546 } |
| 4539 | 4547 |
| 4540 // | 4548 // |
| 4541 // Interpret the Perl expression from the fourth field of the data file, | 4549 // Interpret the Perl expression from the fourth field of the data file, |
| (...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4664 delete groupsPat; | 4672 delete groupsPat; |
| 4665 | 4673 |
| 4666 delete flagMat; | 4674 delete flagMat; |
| 4667 delete flagPat; | 4675 delete flagPat; |
| 4668 | 4676 |
| 4669 delete lineMat; | 4677 delete lineMat; |
| 4670 delete linePat; | 4678 delete linePat; |
| 4671 | 4679 |
| 4672 delete fieldPat; | 4680 delete fieldPat; |
| 4673 delete [] testData; | 4681 delete [] testData; |
| 4674 | 4682 |
| 4675 utext_close(&patternText); | 4683 utext_close(&patternText); |
| 4676 utext_close(&inputText); | 4684 utext_close(&inputText); |
| 4677 | 4685 |
| 4678 delete [] patternChars; | 4686 delete [] patternChars; |
| 4679 delete [] inputChars; | 4687 delete [] inputChars; |
| 4680 | 4688 |
| 4681 | 4689 |
| 4682 logln("%d tests skipped because of unimplemented regexp features.", skippedU
nimplementedCount); | 4690 logln("%d tests skipped because of unimplemented regexp features.", skippedU
nimplementedCount); |
| 4683 | 4691 |
| 4684 } | 4692 } |
| 4685 | 4693 |
| 4686 | 4694 |
| 4687 //-------------------------------------------------------------- | 4695 //-------------------------------------------------------------- |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4731 } | 4739 } |
| 4732 info->lastSteps = steps; | 4740 info->lastSteps = steps; |
| 4733 info->numCalls++; | 4741 info->numCalls++; |
| 4734 return (info->numCalls < info->maxCalls); | 4742 return (info->numCalls < info->maxCalls); |
| 4735 } | 4743 } |
| 4736 U_CDECL_END | 4744 U_CDECL_END |
| 4737 | 4745 |
| 4738 void RegexTest::Callbacks() { | 4746 void RegexTest::Callbacks() { |
| 4739 { | 4747 { |
| 4740 // Getter returns NULLs if no callback has been set | 4748 // Getter returns NULLs if no callback has been set |
| 4741 | 4749 |
| 4742 // The variables that the getter will fill in. | 4750 // The variables that the getter will fill in. |
| 4743 // Init to non-null values so that the action of the getter can be see
n. | 4751 // Init to non-null values so that the action of the getter can be see
n. |
| 4744 const void *returnedContext = &returnedContext; | 4752 const void *returnedContext = &returnedContext; |
| 4745 URegexMatchCallback *returnedFn = &testCallBackFn; | 4753 URegexMatchCallback *returnedFn = &testCallBackFn; |
| 4746 | 4754 |
| 4747 UErrorCode status = U_ZERO_ERROR; | 4755 UErrorCode status = U_ZERO_ERROR; |
| 4748 RegexMatcher matcher("x", 0, status); | 4756 RegexMatcher matcher("x", 0, status); |
| 4749 REGEX_CHECK_STATUS; | 4757 REGEX_CHECK_STATUS; |
| 4750 matcher.getMatchCallback(returnedFn, returnedContext, status); | 4758 matcher.getMatchCallback(returnedFn, returnedContext, status); |
| 4751 REGEX_CHECK_STATUS; | 4759 REGEX_CHECK_STATUS; |
| 4752 REGEX_ASSERT(returnedFn == NULL); | 4760 REGEX_ASSERT(returnedFn == NULL); |
| 4753 REGEX_ASSERT(returnedContext == NULL); | 4761 REGEX_ASSERT(returnedContext == NULL); |
| 4754 } | 4762 } |
| 4755 | 4763 |
| 4756 { | 4764 { |
| 4757 // Set and Get work | 4765 // Set and Get work |
| 4758 callBackContext cbInfo = {this, 0, 0, 0}; | 4766 callBackContext cbInfo = {this, 0, 0, 0}; |
| 4759 const void *returnedContext; | 4767 const void *returnedContext; |
| 4760 URegexMatchCallback *returnedFn; | 4768 URegexMatchCallback *returnedFn; |
| 4761 UErrorCode status = U_ZERO_ERROR; | 4769 UErrorCode status = U_ZERO_ERROR; |
| 4762 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. | 4770 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. |
| 4763 REGEX_CHECK_STATUS; | 4771 REGEX_CHECK_STATUS; |
| 4764 matcher.setMatchCallback(testCallBackFn, &cbInfo, status); | 4772 matcher.setMatchCallback(testCallBackFn, &cbInfo, status); |
| 4765 REGEX_CHECK_STATUS; | 4773 REGEX_CHECK_STATUS; |
| 4766 matcher.getMatchCallback(returnedFn, returnedContext, status); | 4774 matcher.getMatchCallback(returnedFn, returnedContext, status); |
| 4767 REGEX_CHECK_STATUS; | 4775 REGEX_CHECK_STATUS; |
| 4768 REGEX_ASSERT(returnedFn == testCallBackFn); | 4776 REGEX_ASSERT(returnedFn == testCallBackFn); |
| 4769 REGEX_ASSERT(returnedContext == &cbInfo); | 4777 REGEX_ASSERT(returnedContext == &cbInfo); |
| 4770 | 4778 |
| 4771 // A short-running match shouldn't invoke the callback | 4779 // A short-running match shouldn't invoke the callback |
| 4772 status = U_ZERO_ERROR; | 4780 status = U_ZERO_ERROR; |
| 4773 cbInfo.reset(1); | 4781 cbInfo.reset(1); |
| 4774 UnicodeString s = "xxx"; | 4782 UnicodeString s = "xxx"; |
| 4775 matcher.reset(s); | 4783 matcher.reset(s); |
| 4776 REGEX_ASSERT(matcher.matches(status)); | 4784 REGEX_ASSERT(matcher.matches(status)); |
| 4777 REGEX_CHECK_STATUS; | 4785 REGEX_CHECK_STATUS; |
| 4778 REGEX_ASSERT(cbInfo.numCalls == 0); | 4786 REGEX_ASSERT(cbInfo.numCalls == 0); |
| 4779 | 4787 |
| 4780 // A medium-length match that runs long enough to invoke the | 4788 // A medium-length match that runs long enough to invoke the |
| 4781 // callback, but not so long that the callback aborts it. | 4789 // callback, but not so long that the callback aborts it. |
| 4782 status = U_ZERO_ERROR; | 4790 status = U_ZERO_ERROR; |
| 4783 cbInfo.reset(4); | 4791 cbInfo.reset(4); |
| 4784 s = "aaaaaaaaaaaaaaaaaaab"; | 4792 s = "aaaaaaaaaaaaaaaaaaab"; |
| 4785 matcher.reset(s); | 4793 matcher.reset(s); |
| 4786 REGEX_ASSERT(matcher.matches(status)==FALSE); | 4794 REGEX_ASSERT(matcher.matches(status)==FALSE); |
| 4787 REGEX_CHECK_STATUS; | 4795 REGEX_CHECK_STATUS; |
| 4788 REGEX_ASSERT(cbInfo.numCalls > 0); | 4796 REGEX_ASSERT(cbInfo.numCalls > 0); |
| 4789 | 4797 |
| 4790 // A longer running match that the callback function will abort. | 4798 // A longer running match that the callback function will abort. |
| 4791 status = U_ZERO_ERROR; | 4799 status = U_ZERO_ERROR; |
| 4792 cbInfo.reset(4); | 4800 cbInfo.reset(4); |
| 4793 s = "aaaaaaaaaaaaaaaaaaaaaaab"; | 4801 s = "aaaaaaaaaaaaaaaaaaaaaaab"; |
| 4794 matcher.reset(s); | 4802 matcher.reset(s); |
| 4795 REGEX_ASSERT(matcher.matches(status)==FALSE); | 4803 REGEX_ASSERT(matcher.matches(status)==FALSE); |
| 4796 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); | 4804 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
| 4797 REGEX_ASSERT(cbInfo.numCalls == 4); | 4805 REGEX_ASSERT(cbInfo.numCalls == 4); |
| 4798 } | 4806 } |
| 4799 | 4807 |
| 4800 | 4808 |
| 4801 } | 4809 } |
| 4802 | 4810 |
| 4803 | 4811 |
| 4804 // | 4812 // |
| 4805 // FindProgressCallbacks() Test the find "progress" callback function. | 4813 // FindProgressCallbacks() Test the find "progress" callback function. |
| 4806 // When set, the find progress callback will be invoked during
a find operations | 4814 // When set, the find progress callback will be invoked during
a find operations |
| 4807 // after each return from a match attempt, giving the applicati
on the opportunity | 4815 // after each return from a match attempt, giving the applicati
on the opportunity |
| 4808 // to terminate a long-running find operation before it's norma
l completion. | 4816 // to terminate a long-running find operation before it's norma
l completion. |
| 4809 // | 4817 // |
| 4810 | 4818 |
| 4811 struct progressCallBackContext { | 4819 struct progressCallBackContext { |
| 4812 RegexTest *test; | 4820 RegexTest *test; |
| 4813 int64_t lastIndex; | 4821 int64_t lastIndex; |
| 4814 int32_t maxCalls; | 4822 int32_t maxCalls; |
| 4815 int32_t numCalls; | 4823 int32_t numCalls; |
| 4816 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; | 4824 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; |
| 4817 }; | 4825 }; |
| 4818 | 4826 |
| 4827 // call-back function for find(). |
| 4828 // Return TRUE to continue the find(). |
| 4829 // Return FALSE to stop the find(). |
| 4819 U_CDECL_BEGIN | 4830 U_CDECL_BEGIN |
| 4820 static UBool U_CALLCONV | 4831 static UBool U_CALLCONV |
| 4821 testProgressCallBackFn(const void *context, int64_t matchIndex) { | 4832 testProgressCallBackFn(const void *context, int64_t matchIndex) { |
| 4822 progressCallBackContext *info = (progressCallBackContext *)context; | 4833 progressCallBackContext *info = (progressCallBackContext *)context; |
| 4823 info->numCalls++; | 4834 info->numCalls++; |
| 4824 info->lastIndex = matchIndex; | 4835 info->lastIndex = matchIndex; |
| 4825 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n",
matchIndex, info->numCalls); | 4836 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n",
matchIndex, info->numCalls); |
| 4826 return (info->numCalls < info->maxCalls); | 4837 return (info->numCalls < info->maxCalls); |
| 4827 } | 4838 } |
| 4828 U_CDECL_END | 4839 U_CDECL_END |
| 4829 | 4840 |
| 4830 void RegexTest::FindProgressCallbacks() { | 4841 void RegexTest::FindProgressCallbacks() { |
| 4831 { | 4842 { |
| 4832 // Getter returns NULLs if no callback has been set | 4843 // Getter returns NULLs if no callback has been set |
| 4833 | 4844 |
| 4834 // The variables that the getter will fill in. | 4845 // The variables that the getter will fill in. |
| 4835 // Init to non-null values so that the action of the getter can be see
n. | 4846 // Init to non-null values so that the action of the getter can be see
n. |
| 4836 const void *returnedContext = &returnedContext; | 4847 const void *returnedContext = &returnedContext; |
| 4837 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; | 4848 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; |
| 4838 | 4849 |
| 4839 UErrorCode status = U_ZERO_ERROR; | 4850 UErrorCode status = U_ZERO_ERROR; |
| 4840 RegexMatcher matcher("x", 0, status); | 4851 RegexMatcher matcher("x", 0, status); |
| 4841 REGEX_CHECK_STATUS; | 4852 REGEX_CHECK_STATUS; |
| 4842 matcher.getFindProgressCallback(returnedFn, returnedContext, status); | 4853 matcher.getFindProgressCallback(returnedFn, returnedContext, status); |
| 4843 REGEX_CHECK_STATUS; | 4854 REGEX_CHECK_STATUS; |
| 4844 REGEX_ASSERT(returnedFn == NULL); | 4855 REGEX_ASSERT(returnedFn == NULL); |
| 4845 REGEX_ASSERT(returnedContext == NULL); | 4856 REGEX_ASSERT(returnedContext == NULL); |
| 4846 } | 4857 } |
| 4847 | 4858 |
| 4848 { | 4859 { |
| 4849 // Set and Get work | 4860 // Set and Get work |
| 4850 progressCallBackContext cbInfo = {this, 0, 0, 0}; | 4861 progressCallBackContext cbInfo = {this, 0, 0, 0}; |
| 4851 const void *returnedContext; | 4862 const void *returnedContext; |
| 4852 URegexFindProgressCallback *returnedFn; | 4863 URegexFindProgressCallback *returnedFn; |
| 4853 UErrorCode status = U_ZERO_ERROR; | 4864 UErrorCode status = U_ZERO_ERROR; |
| 4854 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. | 4865 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status); |
| 4855 REGEX_CHECK_STATUS; | 4866 REGEX_CHECK_STATUS; |
| 4856 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status)
; | 4867 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status)
; |
| 4857 REGEX_CHECK_STATUS; | 4868 REGEX_CHECK_STATUS; |
| 4858 matcher.getFindProgressCallback(returnedFn, returnedContext, status); | 4869 matcher.getFindProgressCallback(returnedFn, returnedContext, status); |
| 4859 REGEX_CHECK_STATUS; | 4870 REGEX_CHECK_STATUS; |
| 4860 REGEX_ASSERT(returnedFn == testProgressCallBackFn); | 4871 REGEX_ASSERT(returnedFn == testProgressCallBackFn); |
| 4861 REGEX_ASSERT(returnedContext == &cbInfo); | 4872 REGEX_ASSERT(returnedContext == &cbInfo); |
| 4862 | 4873 |
| 4863 // A short-running match should NOT invoke the callback. | 4874 // A find that matches on the initial position does NOT invoke the callb
ack. |
| 4864 status = U_ZERO_ERROR; | 4875 status = U_ZERO_ERROR; |
| 4865 cbInfo.reset(100); | 4876 cbInfo.reset(100); |
| 4866 UnicodeString s = "abxxx"; | 4877 UnicodeString s = "aaxxx"; |
| 4867 matcher.reset(s); | 4878 matcher.reset(s); |
| 4868 #if 0 | 4879 #if 0 |
| 4869 matcher.setTrace(TRUE); | 4880 matcher.setTrace(TRUE); |
| 4870 #endif | 4881 #endif |
| 4871 REGEX_ASSERT(matcher.find(0, status)); | 4882 REGEX_ASSERT(matcher.find(0, status)); |
| 4872 REGEX_CHECK_STATUS; | 4883 REGEX_CHECK_STATUS; |
| 4873 REGEX_ASSERT(cbInfo.numCalls == 0); | 4884 REGEX_ASSERT(cbInfo.numCalls == 0); |
| 4874 | 4885 |
| 4875 // A medium running match that causes matcher.find() to invoke our callb
ack for each index. | 4886 // A medium running find() that causes matcher.find() to invoke our call
back for each index, |
| 4887 // but not so many times that we interrupt the operation. |
| 4876 status = U_ZERO_ERROR; | 4888 status = U_ZERO_ERROR; |
| 4877 s = "aaaaaaaaaaaaaaaaaaab"; | 4889 s = "aaaaaaaaaaaaaaaaaaab"; |
| 4878 cbInfo.reset(s.length()); // Some upper limit for number of calls that
is greater than size of our input string | 4890 cbInfo.reset(s.length()); // Some upper limit for number of calls that
is greater than size of our input string |
| 4879 matcher.reset(s); | 4891 matcher.reset(s); |
| 4880 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4892 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
| 4881 REGEX_CHECK_STATUS; | 4893 REGEX_CHECK_STATUS; |
| 4882 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); | 4894 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); |
| 4883 | 4895 |
| 4884 // A longer running match that causes matcher.find() to invoke our callb
ack which we cancel/interrupt at some point. | 4896 // A longer running match that causes matcher.find() to invoke our callb
ack which we cancel/interrupt at some point. |
| 4885 status = U_ZERO_ERROR; | 4897 status = U_ZERO_ERROR; |
| 4886 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; | 4898 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; |
| 4887 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of
input string | 4899 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of
input string |
| 4888 matcher.reset(s1); | 4900 matcher.reset(s1); |
| 4889 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4901 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
| 4890 REGEX_CHECK_STATUS; | 4902 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
| 4891 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); | 4903 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); |
| 4892 | 4904 |
| 4893 #if 0 | |
| 4894 // Now a match that will succeed, but after an interruption | 4905 // Now a match that will succeed, but after an interruption |
| 4895 status = U_ZERO_ERROR; | 4906 status = U_ZERO_ERROR; |
| 4896 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; | 4907 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; |
| 4897 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of
input string | 4908 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of
input string |
| 4898 matcher.reset(s2); | 4909 matcher.reset(s2); |
| 4899 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4910 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
| 4900 REGEX_CHECK_STATUS; | 4911 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
| 4901 // Now retry the match from where left off | 4912 // Now retry the match from where left off |
| 4902 cbInfo.maxCalls = 100; // No callback limit | 4913 cbInfo.maxCalls = 100; // No callback limit |
| 4914 status = U_ZERO_ERROR; |
| 4903 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); | 4915 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); |
| 4904 REGEX_CHECK_STATUS; | 4916 REGEX_CHECK_STATUS; |
| 4905 #endif | |
| 4906 } | 4917 } |
| 4907 | 4918 |
| 4908 | 4919 |
| 4909 } | 4920 } |
| 4910 | 4921 |
| 4911 | 4922 |
| 4912 //--------------------------------------------------------------------------- | 4923 //--------------------------------------------------------------------------- |
| 4913 // | 4924 // |
| 4914 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable | 4925 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable |
| 4915 // UTexts. The pure-C implementation of UText | 4926 // UTexts. The pure-C implementation of UText |
| 4916 // has no mutable backing stores, but we can | 4927 // has no mutable backing stores, but we can |
| 4917 // use UnicodeString here to test the functionality. | 4928 // use UnicodeString here to test the functionality. |
| 4918 // | 4929 // |
| 4919 //--------------------------------------------------------------------------- | 4930 //--------------------------------------------------------------------------- |
| 4920 void RegexTest::PreAllocatedUTextCAPI () { | 4931 void RegexTest::PreAllocatedUTextCAPI () { |
| 4921 UErrorCode status = U_ZERO_ERROR; | 4932 UErrorCode status = U_ZERO_ERROR; |
| 4922 URegularExpression *re; | 4933 URegularExpression *re; |
| 4923 UText patternText = UTEXT_INITIALIZER; | 4934 UText patternText = UTEXT_INITIALIZER; |
| 4924 UnicodeString buffer; | 4935 UnicodeString buffer; |
| 4925 UText bufferText = UTEXT_INITIALIZER; | 4936 UText bufferText = UTEXT_INITIALIZER; |
| 4926 | 4937 |
| 4927 utext_openUnicodeString(&bufferText, &buffer, &status); | 4938 utext_openUnicodeString(&bufferText, &buffer, &status); |
| 4928 | 4939 |
| 4929 /* | 4940 /* |
| 4930 * getText() and getUText() | 4941 * getText() and getUText() |
| 4931 */ | 4942 */ |
| 4932 { | 4943 { |
| 4933 UText text1 = UTEXT_INITIALIZER; | 4944 UText text1 = UTEXT_INITIALIZER; |
| 4934 UText text2 = UTEXT_INITIALIZER; | 4945 UText text2 = UTEXT_INITIALIZER; |
| 4935 UChar text2Chars[20]; | 4946 UChar text2Chars[20]; |
| 4936 UText *resultText; | 4947 UText *resultText; |
| 4937 | 4948 |
| 4938 status = U_ZERO_ERROR; | 4949 status = U_ZERO_ERROR; |
| 4939 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); | 4950 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); |
| 4940 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); | 4951 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); |
| 4941 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); | 4952 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); |
| 4942 utext_openUChars(&text2, text2Chars, -1, &status); | 4953 utext_openUChars(&text2, text2Chars, -1, &status); |
| 4943 | 4954 |
| 4944 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); | 4955 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); |
| 4945 re = uregex_openUText(&patternText, 0, NULL, &status); | 4956 re = uregex_openUText(&patternText, 0, NULL, &status); |
| 4946 | 4957 |
| 4947 /* First set a UText */ | 4958 /* First set a UText */ |
| 4948 uregex_setUText(re, &text1, &status); | 4959 uregex_setUText(re, &text1, &status); |
| 4949 resultText = uregex_getUText(re, &bufferText, &status); | 4960 resultText = uregex_getUText(re, &bufferText, &status); |
| 4950 REGEX_CHECK_STATUS; | 4961 REGEX_CHECK_STATUS; |
| 4951 REGEX_ASSERT(resultText == &bufferText); | 4962 REGEX_ASSERT(resultText == &bufferText); |
| 4952 utext_setNativeIndex(resultText, 0); | 4963 utext_setNativeIndex(resultText, 0); |
| 4953 utext_setNativeIndex(&text1, 0); | 4964 utext_setNativeIndex(&text1, 0); |
| 4954 REGEX_ASSERT(testUTextEqual(resultText, &text1)); | 4965 REGEX_ASSERT(testUTextEqual(resultText, &text1)); |
| 4955 | 4966 |
| 4956 resultText = uregex_getUText(re, &bufferText, &status); | 4967 resultText = uregex_getUText(re, &bufferText, &status); |
| 4957 REGEX_CHECK_STATUS; | 4968 REGEX_CHECK_STATUS; |
| 4958 REGEX_ASSERT(resultText == &bufferText); | 4969 REGEX_ASSERT(resultText == &bufferText); |
| 4959 utext_setNativeIndex(resultText, 0); | 4970 utext_setNativeIndex(resultText, 0); |
| 4960 utext_setNativeIndex(&text1, 0); | 4971 utext_setNativeIndex(&text1, 0); |
| 4961 REGEX_ASSERT(testUTextEqual(resultText, &text1)); | 4972 REGEX_ASSERT(testUTextEqual(resultText, &text1)); |
| 4962 | 4973 |
| 4963 /* Then set a UChar * */ | 4974 /* Then set a UChar * */ |
| 4964 uregex_setText(re, text2Chars, 7, &status); | 4975 uregex_setText(re, text2Chars, 7, &status); |
| 4965 resultText = uregex_getUText(re, &bufferText, &status); | 4976 resultText = uregex_getUText(re, &bufferText, &status); |
| 4966 REGEX_CHECK_STATUS; | 4977 REGEX_CHECK_STATUS; |
| 4967 REGEX_ASSERT(resultText == &bufferText); | 4978 REGEX_ASSERT(resultText == &bufferText); |
| 4968 utext_setNativeIndex(resultText, 0); | 4979 utext_setNativeIndex(resultText, 0); |
| 4969 utext_setNativeIndex(&text2, 0); | 4980 utext_setNativeIndex(&text2, 0); |
| 4970 REGEX_ASSERT(testUTextEqual(resultText, &text2)); | 4981 REGEX_ASSERT(testUTextEqual(resultText, &text2)); |
| 4971 | 4982 |
| 4972 uregex_close(re); | 4983 uregex_close(re); |
| 4973 utext_close(&text1); | 4984 utext_close(&text1); |
| 4974 utext_close(&text2); | 4985 utext_close(&text2); |
| 4975 } | 4986 } |
| 4976 | 4987 |
| 4977 /* | 4988 /* |
| 4978 * group() | 4989 * group() |
| 4979 */ | 4990 */ |
| 4980 { | 4991 { |
| 4981 UChar text1[80]; | 4992 UChar text1[80]; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 5007 | 5018 |
| 5008 /* Capture group out of range. Error. */ | 5019 /* Capture group out of range. Error. */ |
| 5009 status = U_ZERO_ERROR; | 5020 status = U_ZERO_ERROR; |
| 5010 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); | 5021 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); |
| 5011 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 5022 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 5012 REGEX_ASSERT(actual == &bufferText); | 5023 REGEX_ASSERT(actual == &bufferText); |
| 5013 | 5024 |
| 5014 uregex_close(re); | 5025 uregex_close(re); |
| 5015 | 5026 |
| 5016 } | 5027 } |
| 5017 | 5028 |
| 5018 /* | 5029 /* |
| 5019 * replaceFirst() | 5030 * replaceFirst() |
| 5020 */ | 5031 */ |
| 5021 { | 5032 { |
| 5022 UChar text1[80]; | 5033 UChar text1[80]; |
| 5023 UChar text2[80]; | 5034 UChar text2[80]; |
| 5024 UText replText = UTEXT_INITIALIZER; | 5035 UText replText = UTEXT_INITIALIZER; |
| 5025 UText *result; | 5036 UText *result; |
| 5026 | 5037 |
| 5027 status = U_ZERO_ERROR; | 5038 status = U_ZERO_ERROR; |
| 5028 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); | 5039 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
| 5029 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | 5040 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 5030 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); | 5041 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); |
| 5031 | 5042 |
| 5032 re = uregex_openC("x(.*?)x", 0, NULL, &status); | 5043 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
| 5033 REGEX_CHECK_STATUS; | 5044 REGEX_CHECK_STATUS; |
| 5034 | 5045 |
| 5035 /* Normal case, with match */ | 5046 /* Normal case, with match */ |
| 5036 uregex_setText(re, text1, -1, &status); | 5047 uregex_setText(re, text1, -1, &status); |
| 5037 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5048 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5038 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5049 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5039 REGEX_CHECK_STATUS; | 5050 REGEX_CHECK_STATUS; |
| 5040 REGEX_ASSERT(result == &bufferText); | 5051 REGEX_ASSERT(result == &bufferText); |
| 5041 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); | 5052 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); |
| 5042 | 5053 |
| 5043 /* No match. Text should copy to output with no changes. */ | 5054 /* No match. Text should copy to output with no changes. */ |
| 5044 uregex_setText(re, text2, -1, &status); | 5055 uregex_setText(re, text2, -1, &status); |
| 5045 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5056 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5046 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5057 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5047 REGEX_CHECK_STATUS; | 5058 REGEX_CHECK_STATUS; |
| 5048 REGEX_ASSERT(result == &bufferText); | 5059 REGEX_ASSERT(result == &bufferText); |
| 5049 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); | 5060 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); |
| 5050 | 5061 |
| 5051 /* Unicode escapes */ | 5062 /* Unicode escapes */ |
| 5052 uregex_setText(re, text1, -1, &status); | 5063 uregex_setText(re, text1, -1, &status); |
| 5053 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a"
, -1, &status); | 5064 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a"
, -1, &status); |
| 5054 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5065 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
| 5055 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5066 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
| 5056 REGEX_CHECK_STATUS; | 5067 REGEX_CHECK_STATUS; |
| 5057 REGEX_ASSERT(result == &bufferText); | 5068 REGEX_ASSERT(result == &bufferText); |
| 5058 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); | 5069 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); |
| 5059 | 5070 |
| 5060 uregex_close(re); | 5071 uregex_close(re); |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5097 | 5108 |
| 5098 uregex_close(re); | 5109 uregex_close(re); |
| 5099 utext_close(&replText); | 5110 utext_close(&replText); |
| 5100 } | 5111 } |
| 5101 | 5112 |
| 5102 | 5113 |
| 5103 /* | 5114 /* |
| 5104 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, | 5115 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, |
| 5105 * so we don't need to test it here. | 5116 * so we don't need to test it here. |
| 5106 */ | 5117 */ |
| 5107 | 5118 |
| 5108 utext_close(&bufferText); | 5119 utext_close(&bufferText); |
| 5109 utext_close(&patternText); | 5120 utext_close(&patternText); |
| 5110 } | 5121 } |
| 5111 | 5122 |
| 5112 //-------------------------------------------------------------- | 5123 //-------------------------------------------------------------- |
| 5113 // | 5124 // |
| 5114 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. | 5125 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. |
| 5115 // | 5126 // |
| 5116 //--------------------------------------------------------------- | 5127 //--------------------------------------------------------------- |
| 5117 void RegexTest::Bug7651() { | 5128 void RegexTest::Bug7651() { |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5172 { | 5183 { |
| 5173 UnicodeString str; | 5184 UnicodeString str; |
| 5174 str.setToBogus(); | 5185 str.setToBogus(); |
| 5175 pMatcher->reset(str); | 5186 pMatcher->reset(str); |
| 5176 status = U_ZERO_ERROR; | 5187 status = U_ZERO_ERROR; |
| 5177 pMatcher->matches(status); | 5188 pMatcher->matches(status); |
| 5178 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | 5189 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
| 5179 delete pMatcher; | 5190 delete pMatcher; |
| 5180 } | 5191 } |
| 5181 } | 5192 } |
| 5182 | 5193 |
| 5183 | 5194 |
| 5184 // Bug 7029 | 5195 // Bug 7029 |
| 5185 void RegexTest::Bug7029() { | 5196 void RegexTest::Bug7029() { |
| 5186 UErrorCode status = U_ZERO_ERROR; | 5197 UErrorCode status = U_ZERO_ERROR; |
| 5187 | 5198 |
| 5188 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); | 5199 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); |
| 5189 UnicodeString text = "abc.def"; | 5200 UnicodeString text = "abc.def"; |
| 5190 UnicodeString splits[10]; | 5201 UnicodeString splits[10]; |
| 5191 REGEX_CHECK_STATUS; | 5202 REGEX_CHECK_STATUS; |
| 5192 int32_t numFields = pMatcher->split(text, splits, 10, status); | 5203 int32_t numFields = pMatcher->split(text, splits, 10, status); |
| 5193 REGEX_CHECK_STATUS; | 5204 REGEX_CHECK_STATUS; |
| 5194 REGEX_ASSERT(numFields == 8); | 5205 REGEX_ASSERT(numFields == 8); |
| 5195 delete pMatcher; | 5206 delete pMatcher; |
| 5196 } | 5207 } |
| 5197 | 5208 |
| 5198 // Bug 9283 | 5209 // Bug 9283 |
| 5199 // This test is checking for the existance of any supplemental characters that
case-fold | 5210 // This test is checking for the existance of any supplemental characters that
case-fold |
| 5200 // to a bmp character. | 5211 // to a bmp character. |
| 5201 // | 5212 // |
| 5202 // At the time of this writing there are none. If any should appear in a subse
quent release | 5213 // At the time of this writing there are none. If any should appear in a subse
quent release |
| 5203 // of Unicode, the code in regular expressions compilation that determines the
longest | 5214 // of Unicode, the code in regular expressions compilation that determines the
longest |
| 5204 // posssible match for a literal string will need to be enhanced. | 5215 // posssible match for a literal string will need to be enhanced. |
| 5205 // | 5216 // |
| 5206 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() | 5217 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() |
| 5207 // for details on what to do in case of a failure of this test. | 5218 // for details on what to do in case of a failure of this test. |
| 5208 // | 5219 // |
| 5209 void RegexTest::Bug9283() { | 5220 void RegexTest::Bug9283() { |
| 5221 #if !UCONFIG_NO_NORMALIZATION |
| 5210 UErrorCode status = U_ZERO_ERROR; | 5222 UErrorCode status = U_ZERO_ERROR; |
| 5211 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]
]", status); | 5223 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]
]", status); |
| 5212 REGEX_CHECK_STATUS; | 5224 REGEX_CHECK_STATUS; |
| 5213 int32_t index; | 5225 int32_t index; |
| 5214 UChar32 c; | 5226 UChar32 c; |
| 5215 for (index=0; ; index++) { | 5227 for (index=0; ; index++) { |
| 5216 c = supplementalsWithCaseFolding.charAt(index); | 5228 c = supplementalsWithCaseFolding.charAt(index); |
| 5217 if (c == -1) { | 5229 if (c == -1) { |
| 5218 break; | 5230 break; |
| 5219 } | 5231 } |
| 5220 UnicodeString cf = UnicodeString(c).foldCase(); | 5232 UnicodeString cf = UnicodeString(c).foldCase(); |
| 5221 REGEX_ASSERT(cf.length() >= 2); | 5233 REGEX_ASSERT(cf.length() >= 2); |
| 5222 } | 5234 } |
| 5235 #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
| 5223 } | 5236 } |
| 5224 | 5237 |
| 5225 | 5238 |
| 5226 void RegexTest::CheckInvBufSize() { | 5239 void RegexTest::CheckInvBufSize() { |
| 5227 if(inv_next>=INV_BUFSIZ) { | 5240 if(inv_next>=INV_BUFSIZ) { |
| 5228 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %
d )\n", | 5241 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %
d )\n", |
| 5229 __FILE__, INV_BUFSIZ, inv_next); | 5242 __FILE__, INV_BUFSIZ, inv_next); |
| 5230 } else { | 5243 } else { |
| 5231 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); | 5244 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); |
| 5232 } | 5245 } |
| 5233 } | 5246 } |
| 5234 | 5247 |
| 5235 void RegexTest::TestBug11371() { | 5248 |
| 5249 void RegexTest::Bug10459() { |
| 5236 UErrorCode status = U_ZERO_ERROR; | 5250 UErrorCode status = U_ZERO_ERROR; |
| 5237 UnicodeString patternString; | 5251 UnicodeString patternString("(txt)"); |
| 5252 UnicodeString txtString("txt"); |
| 5238 | 5253 |
| 5239 for (int i=0; i<8000000; i++) { | 5254 UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status); |
| 5240 patternString.append(UnicodeString("()")); | 5255 REGEX_CHECK_STATUS; |
| 5256 UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status); |
| 5257 REGEX_CHECK_STATUS; |
| 5258 |
| 5259 URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status); |
| 5260 REGEX_CHECK_STATUS; |
| 5261 |
| 5262 uregex_setUText(icu_re, utext_txt, &status); |
| 5263 REGEX_CHECK_STATUS; |
| 5264 |
| 5265 // The bug was that calling uregex_group() before doing a matching operation |
| 5266 // was causing a segfault. Only for Regular Expressions created from UText
. |
| 5267 // It should set an U_REGEX_INVALID_STATE. |
| 5268 |
| 5269 UChar buf[100]; |
| 5270 int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status); |
| 5271 REGEX_ASSERT(status == U_REGEX_INVALID_STATE); |
| 5272 REGEX_ASSERT(len == 0); |
| 5273 |
| 5274 uregex_close(icu_re); |
| 5275 utext_close(utext_pat); |
| 5276 utext_close(utext_txt); |
| 5277 } |
| 5278 |
| 5279 void RegexTest::TestCaseInsensitiveStarters() { |
| 5280 // Test that the data used by RegexCompile::findCaseInsensitiveStarters() ha
sn't |
| 5281 // become stale because of new Unicode characters. |
| 5282 // If it is stale, rerun the generation tool |
| 5283 // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genre
gexcasing |
| 5284 // and replace the embedded data in i18n/regexcmp.cpp |
| 5285 |
| 5286 for (UChar32 cp=0; cp<=0x10ffff; cp++) { |
| 5287 if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) { |
| 5288 continue; |
| 5289 } |
| 5290 UnicodeSet s(cp, cp); |
| 5291 s.closeOver(USET_CASE_INSENSITIVE); |
| 5292 UnicodeSetIterator setIter(s); |
| 5293 while (setIter.next()) { |
| 5294 if (!setIter.isString()) { |
| 5295 continue; |
| 5296 } |
| 5297 const UnicodeString &str = setIter.getString(); |
| 5298 UChar32 firstChar = str.char32At(0); |
| 5299 UnicodeSet starters; |
| 5300 RegexCompile::findCaseInsensitiveStarters(firstChar, &starters); |
| 5301 if (!starters.contains(cp)) { |
| 5302 errln("CaseInsensitiveStarters for \\u%x is missing character \\
u%x.", cp, firstChar); |
| 5303 return; |
| 5304 } |
| 5305 } |
| 5241 } | 5306 } |
| 5307 } |
| 5308 |
| 5309 |
| 5310 void RegexTest::TestBug11049() { |
| 5311 // Original bug report: pattern with match start consisting of one of severa
l individual characters, |
| 5312 // and the text being matched ending with a supplementary character. find()
would read past the |
| 5313 // end of the input text when searching for potential match starting points
. |
| 5314 |
| 5315 // To see the problem, the text must exactly fill an allocated buffer, so th
at valgrind will |
| 5316 // detect the bad read. |
| 5317 |
| 5318 TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__); |
| 5319 TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__); |
| 5320 |
| 5321 // Test again with a pattern starting with a single character, |
| 5322 // which takes a different code path than starting with an OR expression, |
| 5323 // but with similar logic. |
| 5324 TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__); |
| 5325 TestCase11049("C", "string matches at end C", TRUE, __LINE__); |
| 5326 } |
| 5327 |
| 5328 // Run a single test case from TestBug11049(). Internal function. |
| 5329 void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec
tMatch, int32_t lineNumber) { |
| 5330 UErrorCode status = U_ZERO_ERROR; |
| 5331 UnicodeString patternString = UnicodeString(pattern).unescape(); |
| 5242 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString,
0, status)); | 5332 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString,
0, status)); |
| 5243 if (status != U_REGEX_PATTERN_TOO_BIG) { | 5333 |
| 5244 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | 5334 UnicodeString dataString = UnicodeString(data).unescape(); |
| 5245 __FILE__, __LINE__, u_errorName(status)); | 5335 UChar *exactBuffer = new UChar[dataString.length()]; |
| 5336 dataString.extract(exactBuffer, dataString.length(), status); |
| 5337 UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status
); |
| 5338 |
| 5339 LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status)); |
| 5340 REGEX_CHECK_STATUS; |
| 5341 matcher->reset(ut); |
| 5342 UBool result = matcher->find(); |
| 5343 if (result != expectMatch) { |
| 5344 errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \
"%s\"", |
| 5345 __FILE__, lineNumber, expectMatch, result, pattern, data); |
| 5246 } | 5346 } |
| 5247 | 5347 |
| 5248 status = U_ZERO_ERROR; | 5348 // Rerun test with UTF-8 input text. Won't see buffer overreads, but could s
ee |
| 5249 patternString = "("; | 5349 // off-by-one on find() with match at the last code point. |
| 5250 for (int i=0; i<20000000; i++) { | 5350 // Size of the original char * data (invariant charset) will be <= than th
e equivalent UTF-8 |
| 5251 patternString.append(UnicodeString("A++")); | 5351 // because string.unescape() will only shrink it. |
| 5352 char * utf8Buffer = new char[uprv_strlen(data)+1]; |
| 5353 u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), d
ataString.length(), &status); |
| 5354 REGEX_CHECK_STATUS; |
| 5355 ut = utext_openUTF8(ut, utf8Buffer, -1, &status); |
| 5356 REGEX_CHECK_STATUS; |
| 5357 matcher->reset(ut); |
| 5358 result = matcher->find(); |
| 5359 if (result != expectMatch) { |
| 5360 errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \"
%s\", text = \"%s\"", |
| 5361 __FILE__, lineNumber, expectMatch, result, pattern, data); |
| 5252 } | 5362 } |
| 5253 patternString.append(UnicodeString("){0}B++")); | 5363 delete [] utf8Buffer; |
| 5254 LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString,
0, status)); | |
| 5255 if (status != U_REGEX_PATTERN_TOO_BIG) { | |
| 5256 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | |
| 5257 __FILE__, __LINE__, u_errorName(status)); | |
| 5258 } | |
| 5259 | 5364 |
| 5260 // Pattern with too much string data, such that string indexes overflow oper
and data. | 5365 utext_close(ut); |
| 5261 status = U_ZERO_ERROR; | 5366 delete [] exactBuffer; |
| 5262 patternString = ""; | 5367 } |
| 5263 while (patternString.length() < 0x00ffffff) { | |
| 5264 patternString.append(UnicodeString("stuff and things dont you know, thes
e are a few of my favorite strings\n")); | |
| 5265 } | |
| 5266 patternString.append(UnicodeString("X? trailing string")); | |
| 5267 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString,
0, status)); | |
| 5268 compiledPat3->dumpPattern(); | |
| 5269 if (status != U_REGEX_PATTERN_TOO_BIG) { | |
| 5270 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | |
| 5271 __FILE__, __LINE__, u_errorName(status)); | |
| 5272 } | |
| 5273 | 5368 |
| 5274 | 5369 |
| 5275 | 5370 |
| 5276 } | 5371 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
| 5277 | 5372 |
| 5278 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
| OLD | NEW |