OLD | NEW |
1 /******************************************************************** | 1 /******************************************************************** |
2 * COPYRIGHT: | 2 * COPYRIGHT: |
3 * Copyright (c) 2002-2013, International Business Machines Corporation and | 3 * Copyright (c) 2002-2014, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ********************************************************************/ | 5 ********************************************************************/ |
6 | 6 |
7 // | 7 // |
8 // regextst.cpp | 8 // regextst.cpp |
9 // | 9 // |
10 // ICU Regular Expressions test, part of intltest. | 10 // ICU Regular Expressions test, part of intltest. |
11 // | 11 // |
12 | 12 |
13 /* | 13 /* |
14 NOTE!! | 14 NOTE!! |
15 | 15 |
16 PLEASE be careful about ASCII assumptions in this test. | 16 PLEASE be careful about ASCII assumptions in this test. |
17 This test is one of the worst repeat offenders. | 17 This test is one of the worst repeat offenders. |
18 If you have questions, contact someone on the ICU PMC | 18 If you have questions, contact someone on the ICU PMC |
19 who has access to an EBCDIC system. | 19 who has access to an EBCDIC system. |
20 | 20 |
21 */ | 21 */ |
22 | 22 |
23 #include "intltest.h" | 23 #include "intltest.h" |
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
25 | 25 |
| 26 #include "unicode/localpointer.h" |
26 #include "unicode/regex.h" | 27 #include "unicode/regex.h" |
27 #include "unicode/uchar.h" | 28 #include "unicode/uchar.h" |
28 #include "unicode/ucnv.h" | 29 #include "unicode/ucnv.h" |
29 #include "unicode/uniset.h" | 30 #include "unicode/uniset.h" |
| 31 #include "unicode/uregex.h" |
| 32 #include "unicode/usetiter.h" |
30 #include "unicode/ustring.h" | 33 #include "unicode/ustring.h" |
31 #include "regextst.h" | 34 #include "regextst.h" |
| 35 #include "regexcmp.h" |
32 #include "uvector.h" | 36 #include "uvector.h" |
33 #include "util.h" | 37 #include "util.h" |
34 #include <stdlib.h> | 38 #include <stdlib.h> |
35 #include <string.h> | 39 #include <string.h> |
36 #include <stdio.h> | 40 #include <stdio.h> |
37 #include "cstring.h" | 41 #include "cstring.h" |
38 #include "uinvchar.h" | 42 #include "uinvchar.h" |
39 | 43 |
40 #define SUPPORT_MUTATING_INPUT_STRING 0 | 44 #define SUPPORT_MUTATING_INPUT_STRING 0 |
41 | 45 |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 break; | 128 break; |
125 case 19: name = "Bug 7029"; | 129 case 19: name = "Bug 7029"; |
126 if (exec) Bug7029(); | 130 if (exec) Bug7029(); |
127 break; | 131 break; |
128 case 20: name = "CheckInvBufSize"; | 132 case 20: name = "CheckInvBufSize"; |
129 if (exec) CheckInvBufSize(); | 133 if (exec) CheckInvBufSize(); |
130 break; | 134 break; |
131 case 21: name = "Bug 9283"; | 135 case 21: name = "Bug 9283"; |
132 if (exec) Bug9283(); | 136 if (exec) Bug9283(); |
133 break; | 137 break; |
134 case 22: name = "TestBug11371"; | 138 case 22: name = "Bug10459"; |
135 if (exec) TestBug11371(); | 139 if (exec) Bug10459(); |
136 break; | 140 break; |
137 | 141 case 23: name = "TestCaseInsensitiveStarters"; |
| 142 if (exec) TestCaseInsensitiveStarters(); |
| 143 break; |
| 144 case 24: name = "TestBug11049"; |
| 145 if (exec) TestBug11049(); |
| 146 break; |
138 default: name = ""; | 147 default: name = ""; |
139 break; //needed to end loop | 148 break; //needed to end loop |
140 } | 149 } |
141 } | 150 } |
142 | 151 |
143 | 152 |
144 | 153 |
145 /** | 154 /** |
146 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage | 155 * Calls utext_openUTF8 after, potentially, converting invariant text from the c
ompilation codepage |
147 * into ASCII. | 156 * into ASCII. |
148 * @see utext_openUTF8 | 157 * @see utext_openUTF8 |
149 */ | 158 */ |
150 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t
length, UErrorCode *status); | 159 static UText* regextst_openUTF8FromInvariant(UText* ut, const char *inv, int64_t
length, UErrorCode *status); |
151 | 160 |
152 //--------------------------------------------------------------------------- | 161 //--------------------------------------------------------------------------- |
153 // | 162 // |
154 // Error Checking / Reporting macros used in all of the tests. | 163 // Error Checking / Reporting macros used in all of the tests. |
155 // | 164 // |
156 //--------------------------------------------------------------------------- | 165 //--------------------------------------------------------------------------- |
157 | 166 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
203 UChar ch = buf[i]; | 212 UChar ch = buf[i]; |
204 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); | 213 sprintf(ASSERT_BUF+strlen(ASSERT_BUF),"\\u%02x",ch); |
205 } | 214 } |
206 } | 215 } |
207 } | 216 } |
208 } | 217 } |
209 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; | 218 ASSERT_BUF[sizeof(ASSERT_BUF)-1] = 0; |
210 return ASSERT_BUF; | 219 return ASSERT_BUF; |
211 } | 220 } |
212 | 221 |
213 | |
214 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)
/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text,
buf);} | 222 #define REGEX_VERBOSE_TEXT(text) {char buf[200];utextToPrintable(buf,sizeof(buf)
/sizeof(buf[0]),text);logln("%s:%d: UText %s=\"%s\"", __FILE__, __LINE__, #text,
buf);} |
215 | 223 |
216 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest
failure. status=%s", \ | 224 #define REGEX_CHECK_STATUS {if (U_FAILURE(status)) {dataerrln("%s:%d: RegexTest
failure. status=%s", \ |
217 __FILE__, __LINE__
, u_errorName(status)); return;}} | 225 __FILE__, __LINE__
, u_errorName(status)); return;}} |
218 | 226 |
219 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure:
REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} | 227 #define REGEX_ASSERT(expr) {if ((expr)==FALSE) {errln("%s:%d: RegexTest failure:
REGEX_ASSERT(%s) failed \n", __FILE__, __LINE__, #expr);};} |
220 | 228 |
221 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ | 229 #define REGEX_ASSERT_FAIL(expr, errcode) {UErrorCode status=U_ZERO_ERROR; (expr)
;\ |
222 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ | 230 if (status!=errcode) {dataerrln("RegexTest failure at line %d. Expected status=
%s, got %s", \ |
223 __LINE__, u_errorName(errcode), u_errorName(status));};} | 231 __LINE__, u_errorName(errcode), u_errorName(status));};} |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 char buf[201 /*21*/]; | 297 char buf[201 /*21*/]; |
290 char expectedBuf[201]; | 298 char expectedBuf[201]; |
291 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); | 299 utextToPrintable(buf, sizeof(buf)/sizeof(buf[0]), actual); |
292 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0])
, &expectedText); | 300 utextToPrintable(expectedBuf, sizeof(expectedBuf)/sizeof(expectedBuf[0])
, &expectedText); |
293 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars)
, got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe
ctedText), buf, (int)utext_nativeLength(actual)); | 301 errln("%s:%d: assertUTextInvariant: Failure: expected \"%s\" (%d uchars)
, got \"%s\" (%d chars)", file, line, expectedBuf, (int)utext_nativeLength(&expe
ctedText), buf, (int)utext_nativeLength(actual)); |
294 } | 302 } |
295 utext_close(&expectedText); | 303 utext_close(&expectedText); |
296 } | 304 } |
297 | 305 |
298 /** | 306 /** |
299 * Assumes utf-8 input | 307 * Assumes utf-8 input |
300 */ | 308 */ |
301 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua
l), __FILE__, __LINE__) | 309 #define REGEX_ASSERT_UTEXT_UTF8(expected, actual) assertUText((expected), (actua
l), __FILE__, __LINE__) |
302 /** | 310 /** |
303 * Assumes Invariant input | 311 * Assumes Invariant input |
304 */ | 312 */ |
305 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp
ected), (actual), __FILE__, __LINE__) | 313 #define REGEX_ASSERT_UTEXT_INVARIANT(expected, actual) assertUTextInvariant((exp
ected), (actual), __FILE__, __LINE__) |
306 | 314 |
307 /** | 315 /** |
308 * This buffer ( inv_buf ) is used to hold the UTF-8 strings | 316 * This buffer ( inv_buf ) is used to hold the UTF-8 strings |
309 * passed into utext_openUTF8. An error will be given if | 317 * passed into utext_openUTF8. An error will be given if |
310 * INV_BUFSIZ is too small. It's only used on EBCDIC systems. | 318 * INV_BUFSIZ is too small. It's only used on EBCDIC systems. |
311 */ | 319 */ |
312 | 320 |
313 #define INV_BUFSIZ 2048 /* increase this if too small */ | 321 #define INV_BUFSIZ 2048 /* increase this if too small */ |
314 | 322 |
315 static int64_t inv_next=0; | 323 static int64_t inv_next=0; |
316 | 324 |
317 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY | 325 #if U_CHARSET_FAMILY!=U_ASCII_FAMILY |
318 static char inv_buf[INV_BUFSIZ]; | 326 static char inv_buf[INV_BUFSIZ]; |
319 #endif | 327 #endif |
320 | 328 |
321 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t
length, UErrorCode *status) { | 329 static UText* regextst_openUTF8FromInvariant(UText *ut, const char *inv, int64_t
length, UErrorCode *status) { |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
369 RegexMatcher *REMatcher = NULL; | 377 RegexMatcher *REMatcher = NULL; |
370 UBool retVal = TRUE; | 378 UBool retVal = TRUE; |
371 | 379 |
372 UnicodeString patString(pat, -1, US_INV); | 380 UnicodeString patString(pat, -1, US_INV); |
373 REPattern = RegexPattern::compile(patString, 0, pe, status); | 381 REPattern = RegexPattern::compile(patString, 0, pe, status); |
374 if (U_FAILURE(status)) { | 382 if (U_FAILURE(status)) { |
375 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta
tus = %s", | 383 dataerrln("RegexTest failure in RegexPattern::compile() at line %d. Sta
tus = %s", |
376 line, u_errorName(status)); | 384 line, u_errorName(status)); |
377 return FALSE; | 385 return FALSE; |
378 } | 386 } |
379 if (line==376) { RegexPatternDump(REPattern);} | 387 if (line==376) { REPattern->dumpPattern();} |
380 | 388 |
381 UnicodeString inputString(inputText); | 389 UnicodeString inputString(inputText); |
382 UnicodeString unEscapedInput = inputString.unescape(); | 390 UnicodeString unEscapedInput = inputString.unescape(); |
383 REMatcher = REPattern->matcher(unEscapedInput, status); | 391 REMatcher = REPattern->matcher(unEscapedInput, status); |
384 if (U_FAILURE(status)) { | 392 if (U_FAILURE(status)) { |
385 errln("RegexTest failure in REPattern::matcher() at line %d. Status = %
s\n", | 393 errln("RegexTest failure in REPattern::matcher() at line %d. Status = %
s\n", |
386 line, u_errorName(status)); | 394 line, u_errorName(status)); |
387 return FALSE; | 395 return FALSE; |
388 } | 396 } |
389 | 397 |
(...skipping 15 matching lines...) Expand all Loading... |
405 errln("RegexTest failure in matches() at line %d. Status = %s\n", | 413 errln("RegexTest failure in matches() at line %d. Status = %s\n", |
406 line, u_errorName(status)); | 414 line, u_errorName(status)); |
407 retVal = FALSE; | 415 retVal = FALSE; |
408 } | 416 } |
409 if (actualmatch != match) { | 417 if (actualmatch != match) { |
410 errln("RegexTest: wrong return from matches() at line %d.\n", line); | 418 errln("RegexTest: wrong return from matches() at line %d.\n", line); |
411 retVal = FALSE; | 419 retVal = FALSE; |
412 } | 420 } |
413 | 421 |
414 if (retVal == FALSE) { | 422 if (retVal == FALSE) { |
415 RegexPatternDump(REPattern); | 423 REPattern->dumpPattern(); |
416 } | 424 } |
417 | 425 |
418 delete REPattern; | 426 delete REPattern; |
419 delete REMatcher; | 427 delete REMatcher; |
420 return retVal; | 428 return retVal; |
421 } | 429 } |
422 | 430 |
423 | 431 |
424 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
ing, UBool match, int32_t line) { | 432 UBool RegexTest::doRegexLMTestUTF8(const char *pat, const char *text, UBool look
ing, UBool match, int32_t line) { |
425 UText pattern = UTEXT_INITIALIZER; | 433 UText pattern = UTEXT_INITIALIZER; |
426 int32_t inputUTF8Length; | 434 int32_t inputUTF8Length; |
427 char *textChars = NULL; | 435 char *textChars = NULL; |
428 UText inputText = UTEXT_INITIALIZER; | 436 UText inputText = UTEXT_INITIALIZER; |
429 UErrorCode status = U_ZERO_ERROR; | 437 UErrorCode status = U_ZERO_ERROR; |
430 UParseError pe; | 438 UParseError pe; |
431 RegexPattern *REPattern = NULL; | 439 RegexPattern *REPattern = NULL; |
432 RegexMatcher *REMatcher = NULL; | 440 RegexMatcher *REMatcher = NULL; |
433 UBool retVal = TRUE; | 441 UBool retVal = TRUE; |
434 | 442 |
435 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); | 443 regextst_openUTF8FromInvariant(&pattern, pat, -1, &status); |
436 REPattern = RegexPattern::compile(&pattern, 0, pe, status); | 444 REPattern = RegexPattern::compile(&pattern, 0, pe, status); |
437 if (U_FAILURE(status)) { | 445 if (U_FAILURE(status)) { |
438 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8
). Status = %s\n", | 446 dataerrln("RegexTest failure in RegexPattern::compile() at line %d (UTF8
). Status = %s\n", |
439 line, u_errorName(status)); | 447 line, u_errorName(status)); |
440 return FALSE; | 448 return FALSE; |
441 } | 449 } |
442 | 450 |
443 UnicodeString inputString(text, -1, US_INV); | 451 UnicodeString inputString(text, -1, US_INV); |
444 UnicodeString unEscapedInput = inputString.unescape(); | 452 UnicodeString unEscapedInput = inputString.unescape(); |
445 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); | 453 LocalUConverterPointer UTF8Converter(ucnv_open("UTF8", &status)); |
446 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N
ULL, NULL, NULL, &status); | 454 ucnv_setFromUCallBack(UTF8Converter.getAlias(), UCNV_FROM_U_CALLBACK_STOP, N
ULL, NULL, NULL, &status); |
447 | 455 |
448 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(),
status); | 456 inputUTF8Length = unEscapedInput.extract(NULL, 0, UTF8Converter.getAlias(),
status); |
449 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { | 457 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) { |
450 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en | 458 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en |
451 logln("RegexTest unable to convert input to UTF8 at line %d. Status = %
s\n", line, u_errorName(status)); | 459 logln("RegexTest unable to convert input to UTF8 at line %d. Status = %
s\n", line, u_errorName(status)); |
452 return TRUE; // not a failure of the Regex engine | 460 return TRUE; // not a failure of the Regex engine |
453 } | 461 } |
454 status = U_ZERO_ERROR; // buffer overflow | 462 status = U_ZERO_ERROR; // buffer overflow |
455 textChars = new char[inputUTF8Length+1]; | 463 textChars = new char[inputUTF8Length+1]; |
456 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(
), status); | 464 unEscapedInput.extract(textChars, inputUTF8Length+1, UTF8Converter.getAlias(
), status); |
457 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); | 465 utext_openUTF8(&inputText, textChars, inputUTF8Length, &status); |
458 | 466 |
459 REMatcher = &REPattern->matcher(status)->reset(&inputText); | 467 REMatcher = &REPattern->matcher(status)->reset(&inputText); |
460 if (U_FAILURE(status)) { | 468 if (U_FAILURE(status)) { |
461 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta
tus = %s\n", | 469 errln("RegexTest failure in REPattern::matcher() at line %d (UTF8). Sta
tus = %s\n", |
462 line, u_errorName(status)); | 470 line, u_errorName(status)); |
463 return FALSE; | 471 return FALSE; |
464 } | 472 } |
465 | 473 |
466 UBool actualmatch; | 474 UBool actualmatch; |
467 actualmatch = REMatcher->lookingAt(status); | 475 actualmatch = REMatcher->lookingAt(status); |
468 if (U_FAILURE(status)) { | 476 if (U_FAILURE(status)) { |
(...skipping 12 matching lines...) Expand all Loading... |
481 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n"
, | 489 errln("RegexTest failure in matches() at line %d (UTF8). Status = %s\n"
, |
482 line, u_errorName(status)); | 490 line, u_errorName(status)); |
483 retVal = FALSE; | 491 retVal = FALSE; |
484 } | 492 } |
485 if (actualmatch != match) { | 493 if (actualmatch != match) { |
486 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin
e); | 494 errln("RegexTest: wrong return from matches() at line %d (UTF8).\n", lin
e); |
487 retVal = FALSE; | 495 retVal = FALSE; |
488 } | 496 } |
489 | 497 |
490 if (retVal == FALSE) { | 498 if (retVal == FALSE) { |
491 RegexPatternDump(REPattern); | 499 REPattern->dumpPattern(); |
492 } | 500 } |
493 | 501 |
494 delete REPattern; | 502 delete REPattern; |
495 delete REMatcher; | 503 delete REMatcher; |
496 utext_close(&inputText); | 504 utext_close(&inputText); |
497 utext_close(&pattern); | 505 utext_close(&pattern); |
498 delete[] textChars; | 506 delete[] textChars; |
499 return retVal; | 507 return retVal; |
500 } | 508 } |
501 | 509 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
547 if (status != expectedStatus) { | 555 if (status != expectedStatus) { |
548 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err
orName(status)); | 556 dataerrln("Line %d: unexpected error %s compiling pattern.", line, u_err
orName(status)); |
549 } else { | 557 } else { |
550 if (status != U_ZERO_ERROR) { | 558 if (status != U_ZERO_ERROR) { |
551 if (pe.line != errLine || pe.offset != errCol) { | 559 if (pe.line != errLine || pe.offset != errCol) { |
552 errln("Line %d: incorrect line/offset from UParseError. Expecte
d %d/%d; got %d/%d.\n", | 560 errln("Line %d: incorrect line/offset from UParseError. Expecte
d %d/%d; got %d/%d.\n", |
553 line, errLine, errCol, pe.line, pe.offset); | 561 line, errLine, errCol, pe.line, pe.offset); |
554 } | 562 } |
555 } | 563 } |
556 } | 564 } |
557 | 565 |
558 delete callerPattern; | 566 delete callerPattern; |
559 utext_close(&patternText); | 567 utext_close(&patternText); |
560 } | 568 } |
561 | 569 |
562 | 570 |
563 | 571 |
564 //--------------------------------------------------------------------------- | 572 //--------------------------------------------------------------------------- |
565 // | 573 // |
566 // Basic Check for basic functionality of regex pattern matching. | 574 // Basic Check for basic functionality of regex pattern matching. |
567 // Avoid the use of REGEX_FIND test macro, which has | 575 // Avoid the use of REGEX_FIND test macro, which has |
568 // substantial dependencies on basic Regex functionality. | 576 // substantial dependencies on basic Regex functionality. |
569 // | 577 // |
570 //--------------------------------------------------------------------------- | 578 //--------------------------------------------------------------------------- |
571 void RegexTest::Basic() { | 579 void RegexTest::Basic() { |
572 | 580 |
573 | 581 |
574 // | 582 // |
575 // Debug - slide failing test cases early | 583 // Debug - slide failing test cases early |
576 // | 584 // |
577 #if 0 | 585 #if 0 |
578 { | 586 { |
579 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); | 587 // REGEX_TESTLM("a\N{LATIN SMALL LETTER B}c", "abc", FALSE, FALSE); |
580 UParseError pe; | 588 UParseError pe; |
581 UErrorCode status = U_ZERO_ERROR; | 589 UErrorCode status = U_ZERO_ERROR; |
582 RegexPattern *pattern; | 590 RegexPattern *pattern; |
583 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc
ape(), UREGEX_CASE_INSENSITIVE, pe, status); | 591 pattern = RegexPattern::compile(UNICODE_STRING_SIMPLE("a\\u00dfx").unesc
ape(), UREGEX_CASE_INSENSITIVE, pe, status); |
584 RegexPatternDump(pattern); | 592 pattern->dumpPattern(); |
585 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz")
.unescape(), status); | 593 RegexMatcher *m = pattern->matcher(UNICODE_STRING_SIMPLE("a\\u00dfxzzz")
.unescape(), status); |
586 UBool result = m->find(); | 594 UBool result = m->find(); |
587 printf("result = %d\n", result); | 595 printf("result = %d\n", result); |
588 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); | 596 // REGEX_FIND("", "<0>ab<1>cc</1><2>ccc</2></0>ddd"); |
589 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); | 597 // REGEX_FIND("(X([abc=X]+)+X)|(y[abc=]+)", "=XX===================="); |
590 } | 598 } |
591 exit(1); | 599 exit(1); |
592 #endif | 600 #endif |
593 | 601 |
594 | 602 |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
722 // implementation. | 730 // implementation. |
723 // | 731 // |
724 //--------------------------------------------------------------------------- | 732 //--------------------------------------------------------------------------- |
725 void RegexTest::UTextBasic() { | 733 void RegexTest::UTextBasic() { |
726 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ | 734 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ |
727 UErrorCode status = U_ZERO_ERROR; | 735 UErrorCode status = U_ZERO_ERROR; |
728 UText pattern = UTEXT_INITIALIZER; | 736 UText pattern = UTEXT_INITIALIZER; |
729 utext_openUTF8(&pattern, str_abc, -1, &status); | 737 utext_openUTF8(&pattern, str_abc, -1, &status); |
730 RegexMatcher matcher(&pattern, 0, status); | 738 RegexMatcher matcher(&pattern, 0, status); |
731 REGEX_CHECK_STATUS; | 739 REGEX_CHECK_STATUS; |
732 | 740 |
733 UText input = UTEXT_INITIALIZER; | 741 UText input = UTEXT_INITIALIZER; |
734 utext_openUTF8(&input, str_abc, -1, &status); | 742 utext_openUTF8(&input, str_abc, -1, &status); |
735 REGEX_CHECK_STATUS; | 743 REGEX_CHECK_STATUS; |
736 matcher.reset(&input); | 744 matcher.reset(&input); |
737 REGEX_CHECK_STATUS; | 745 REGEX_CHECK_STATUS; |
738 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); | 746 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); |
739 | 747 |
740 matcher.reset(matcher.inputText()); | 748 matcher.reset(matcher.inputText()); |
741 REGEX_CHECK_STATUS; | 749 REGEX_CHECK_STATUS; |
742 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); | 750 REGEX_ASSERT_UTEXT_UTF8(str_abc, matcher.inputText()); |
743 | 751 |
744 utext_close(&pattern); | 752 utext_close(&pattern); |
745 utext_close(&input); | 753 utext_close(&input); |
746 } | 754 } |
747 | 755 |
748 | 756 |
749 //--------------------------------------------------------------------------- | 757 //--------------------------------------------------------------------------- |
750 // | 758 // |
751 // API_Match Test that the API for class RegexMatcher | 759 // API_Match Test that the API for class RegexMatcher |
752 // is present and nominally working, but excluding functions | 760 // is present and nominally working, but excluding functions |
753 // implementing replace operations. | 761 // implementing replace operations. |
(...skipping 356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1110 UErrorCode status = U_ZERO_ERROR; | 1118 UErrorCode status = U_ZERO_ERROR; |
1111 RegexPattern *p = RegexPattern::compile(".", 0, status); | 1119 RegexPattern *p = RegexPattern::compile(".", 0, status); |
1112 RegexMatcher *m = p->matcher(status); | 1120 RegexMatcher *m = p->matcher(status); |
1113 REGEX_CHECK_STATUS; | 1121 REGEX_CHECK_STATUS; |
1114 | 1122 |
1115 REGEX_ASSERT(m->find() == FALSE); | 1123 REGEX_ASSERT(m->find() == FALSE); |
1116 REGEX_ASSERT(m->input() == ""); | 1124 REGEX_ASSERT(m->input() == ""); |
1117 delete m; | 1125 delete m; |
1118 delete p; | 1126 delete p; |
1119 } | 1127 } |
1120 | 1128 |
1121 // | 1129 // |
1122 // Regions | 1130 // Regions |
1123 // | 1131 // |
1124 { | 1132 { |
1125 UErrorCode status = U_ZERO_ERROR; | 1133 UErrorCode status = U_ZERO_ERROR; |
1126 UnicodeString testString("This is test data"); | 1134 UnicodeString testString("This is test data"); |
1127 RegexMatcher m(".*", testString, 0, status); | 1135 RegexMatcher m(".*", testString, 0, status); |
1128 REGEX_CHECK_STATUS; | 1136 REGEX_CHECK_STATUS; |
1129 REGEX_ASSERT(m.regionStart() == 0); | 1137 REGEX_ASSERT(m.regionStart() == 0); |
1130 REGEX_ASSERT(m.regionEnd() == testString.length()); | 1138 REGEX_ASSERT(m.regionEnd() == testString.length()); |
1131 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1139 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
1132 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1140 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
1133 | 1141 |
1134 m.region(2,4, status); | 1142 m.region(2,4, status); |
1135 REGEX_CHECK_STATUS; | 1143 REGEX_CHECK_STATUS; |
1136 REGEX_ASSERT(m.matches(status)); | 1144 REGEX_ASSERT(m.matches(status)); |
1137 REGEX_ASSERT(m.start(status)==2); | 1145 REGEX_ASSERT(m.start(status)==2); |
1138 REGEX_ASSERT(m.end(status)==4); | 1146 REGEX_ASSERT(m.end(status)==4); |
1139 REGEX_CHECK_STATUS; | 1147 REGEX_CHECK_STATUS; |
1140 | 1148 |
1141 m.reset(); | 1149 m.reset(); |
1142 REGEX_ASSERT(m.regionStart() == 0); | 1150 REGEX_ASSERT(m.regionStart() == 0); |
1143 REGEX_ASSERT(m.regionEnd() == testString.length()); | 1151 REGEX_ASSERT(m.regionEnd() == testString.length()); |
1144 | 1152 |
1145 UnicodeString shorterString("short"); | 1153 UnicodeString shorterString("short"); |
1146 m.reset(shorterString); | 1154 m.reset(shorterString); |
1147 REGEX_ASSERT(m.regionStart() == 0); | 1155 REGEX_ASSERT(m.regionStart() == 0); |
1148 REGEX_ASSERT(m.regionEnd() == shorterString.length()); | 1156 REGEX_ASSERT(m.regionEnd() == shorterString.length()); |
1149 | 1157 |
1150 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1158 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
1151 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); | 1159 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); |
1152 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 1160 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
1153 REGEX_ASSERT(&m == &m.reset()); | 1161 REGEX_ASSERT(&m == &m.reset()); |
1154 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 1162 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
1155 | 1163 |
1156 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); | 1164 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); |
1157 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1165 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
1158 REGEX_ASSERT(&m == &m.reset()); | 1166 REGEX_ASSERT(&m == &m.reset()); |
1159 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 1167 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
1160 | 1168 |
1161 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1169 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
1162 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); | 1170 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); |
1163 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 1171 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
1164 REGEX_ASSERT(&m == &m.reset()); | 1172 REGEX_ASSERT(&m == &m.reset()); |
1165 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 1173 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
1166 | 1174 |
1167 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); | 1175 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); |
1168 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1176 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
1169 REGEX_ASSERT(&m == &m.reset()); | 1177 REGEX_ASSERT(&m == &m.reset()); |
1170 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 1178 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
1171 | 1179 |
1172 } | 1180 } |
1173 | 1181 |
1174 // | 1182 // |
1175 // hitEnd() and requireEnd() | 1183 // hitEnd() and requireEnd() |
1176 // | 1184 // |
1177 { | 1185 { |
1178 UErrorCode status = U_ZERO_ERROR; | 1186 UErrorCode status = U_ZERO_ERROR; |
1179 UnicodeString testString("aabb"); | 1187 UnicodeString testString("aabb"); |
1180 RegexMatcher m1(".*", testString, 0, status); | 1188 RegexMatcher m1(".*", testString, 0, status); |
1181 REGEX_ASSERT(m1.lookingAt(status) == TRUE); | 1189 REGEX_ASSERT(m1.lookingAt(status) == TRUE); |
1182 REGEX_ASSERT(m1.hitEnd() == TRUE); | 1190 REGEX_ASSERT(m1.hitEnd() == TRUE); |
1183 REGEX_ASSERT(m1.requireEnd() == FALSE); | 1191 REGEX_ASSERT(m1.requireEnd() == FALSE); |
1184 REGEX_CHECK_STATUS; | 1192 REGEX_CHECK_STATUS; |
1185 | 1193 |
1186 status = U_ZERO_ERROR; | 1194 status = U_ZERO_ERROR; |
1187 RegexMatcher m2("a*", testString, 0, status); | 1195 RegexMatcher m2("a*", testString, 0, status); |
1188 REGEX_ASSERT(m2.lookingAt(status) == TRUE); | 1196 REGEX_ASSERT(m2.lookingAt(status) == TRUE); |
1189 REGEX_ASSERT(m2.hitEnd() == FALSE); | 1197 REGEX_ASSERT(m2.hitEnd() == FALSE); |
1190 REGEX_ASSERT(m2.requireEnd() == FALSE); | 1198 REGEX_ASSERT(m2.requireEnd() == FALSE); |
1191 REGEX_CHECK_STATUS; | 1199 REGEX_CHECK_STATUS; |
1192 | 1200 |
1193 status = U_ZERO_ERROR; | 1201 status = U_ZERO_ERROR; |
1194 RegexMatcher m3(".*$", testString, 0, status); | 1202 RegexMatcher m3(".*$", testString, 0, status); |
1195 REGEX_ASSERT(m3.lookingAt(status) == TRUE); | 1203 REGEX_ASSERT(m3.lookingAt(status) == TRUE); |
(...skipping 17 matching lines...) Expand all Loading... |
1213 m.reset(ucharString); // should not compile. | 1221 m.reset(ucharString); // should not compile. |
1214 | 1222 |
1215 RegexPattern *p = RegexPattern::compile(".", 0, status); | 1223 RegexPattern *p = RegexPattern::compile(".", 0, status); |
1216 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co
mpile. | 1224 RegexMatcher *m2 = p->matcher(ucharString, status); // should not co
mpile. |
1217 | 1225 |
1218 RegexMatcher m3(".", ucharString, 0, status); // Should not compile | 1226 RegexMatcher m3(".", ucharString, 0, status); // Should not compile |
1219 } | 1227 } |
1220 #endif | 1228 #endif |
1221 | 1229 |
1222 // | 1230 // |
1223 // Time Outs. | 1231 // Time Outs. |
1224 // Note: These tests will need to be changed when the regexp engine i
s | 1232 // Note: These tests will need to be changed when the regexp engine i
s |
1225 // able to detect and cut short the exponential time behavior o
n | 1233 // able to detect and cut short the exponential time behavior o
n |
1226 // this type of match. | 1234 // this type of match. |
1227 // | 1235 // |
1228 { | 1236 { |
1229 UErrorCode status = U_ZERO_ERROR; | 1237 UErrorCode status = U_ZERO_ERROR; |
1230 // Enough 'a's in the string to cause the match to time out. | 1238 // Enough 'a's in the string to cause the match to time out. |
1231 // (Each on additonal 'a' doubles the time) | 1239 // (Each on additonal 'a' doubles the time) |
1232 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); | 1240 UnicodeString testString("aaaaaaaaaaaaaaaaaaaaa"); |
1233 RegexMatcher matcher("(a+)+b", testString, 0, status); | 1241 RegexMatcher matcher("(a+)+b", testString, 0, status); |
1234 REGEX_CHECK_STATUS; | 1242 REGEX_CHECK_STATUS; |
1235 REGEX_ASSERT(matcher.getTimeLimit() == 0); | 1243 REGEX_ASSERT(matcher.getTimeLimit() == 0); |
1236 matcher.setTimeLimit(100, status); | 1244 matcher.setTimeLimit(100, status); |
1237 REGEX_ASSERT(matcher.getTimeLimit() == 100); | 1245 REGEX_ASSERT(matcher.getTimeLimit() == 100); |
1238 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1246 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
1239 REGEX_ASSERT(status == U_REGEX_TIME_OUT); | 1247 REGEX_ASSERT(status == U_REGEX_TIME_OUT); |
1240 } | 1248 } |
1241 { | 1249 { |
1242 UErrorCode status = U_ZERO_ERROR; | 1250 UErrorCode status = U_ZERO_ERROR; |
1243 // Few enough 'a's to slip in under the time limit. | 1251 // Few enough 'a's to slip in under the time limit. |
1244 UnicodeString testString("aaaaaaaaaaaaaaaaaa"); | 1252 UnicodeString testString("aaaaaaaaaaaaaaaaaa"); |
1245 RegexMatcher matcher("(a+)+b", testString, 0, status); | 1253 RegexMatcher matcher("(a+)+b", testString, 0, status); |
1246 REGEX_CHECK_STATUS; | 1254 REGEX_CHECK_STATUS; |
1247 matcher.setTimeLimit(100, status); | 1255 matcher.setTimeLimit(100, status); |
1248 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1256 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
1249 REGEX_CHECK_STATUS; | 1257 REGEX_CHECK_STATUS; |
1250 } | 1258 } |
1251 | 1259 |
1252 // | 1260 // |
1253 // Stack Limits | 1261 // Stack Limits |
1254 // | 1262 // |
1255 { | 1263 { |
1256 UErrorCode status = U_ZERO_ERROR; | 1264 UErrorCode status = U_ZERO_ERROR; |
1257 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000,
filled with 'A' | 1265 UnicodeString testString(1000000, 0x41, 1000000); // Length 1,000,000,
filled with 'A' |
1258 | 1266 |
1259 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt
imizations | 1267 // Adding the capturing parentheses to the pattern "(A)+A$" inhibits opt
imizations |
1260 // of the '+', and makes the stack frames larger. | 1268 // of the '+', and makes the stack frames larger. |
1261 RegexMatcher matcher("(A)+A$", testString, 0, status); | 1269 RegexMatcher matcher("(A)+A$", testString, 0, status); |
1262 | 1270 |
1263 // With the default stack, this match should fail to run | 1271 // With the default stack, this match should fail to run |
1264 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1272 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
1265 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); | 1273 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); |
1266 | 1274 |
1267 // With unlimited stack, it should run | 1275 // With unlimited stack, it should run |
1268 status = U_ZERO_ERROR; | 1276 status = U_ZERO_ERROR; |
1269 matcher.setStackLimit(0, status); | 1277 matcher.setStackLimit(0, status); |
1270 REGEX_CHECK_STATUS; | 1278 REGEX_CHECK_STATUS; |
1271 REGEX_ASSERT(matcher.lookingAt(status) == TRUE); | 1279 REGEX_ASSERT(matcher.lookingAt(status) == TRUE); |
1272 REGEX_CHECK_STATUS; | 1280 REGEX_CHECK_STATUS; |
1273 REGEX_ASSERT(matcher.getStackLimit() == 0); | 1281 REGEX_ASSERT(matcher.getStackLimit() == 0); |
1274 | 1282 |
1275 // With a limited stack, it the match should fail | 1283 // With a limited stack, it the match should fail |
1276 status = U_ZERO_ERROR; | 1284 status = U_ZERO_ERROR; |
1277 matcher.setStackLimit(10000, status); | 1285 matcher.setStackLimit(10000, status); |
1278 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); | 1286 REGEX_ASSERT(matcher.lookingAt(status) == FALSE); |
1279 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); | 1287 REGEX_ASSERT(status == U_REGEX_STACK_OVERFLOW); |
1280 REGEX_ASSERT(matcher.getStackLimit() == 10000); | 1288 REGEX_ASSERT(matcher.getStackLimit() == 10000); |
1281 } | 1289 } |
1282 | 1290 |
1283 // A pattern that doesn't save state should work with | 1291 // A pattern that doesn't save state should work with |
1284 // a minimal sized stack | 1292 // a minimal sized stack |
1285 { | 1293 { |
1286 UErrorCode status = U_ZERO_ERROR; | 1294 UErrorCode status = U_ZERO_ERROR; |
1287 UnicodeString testString = "abc"; | 1295 UnicodeString testString = "abc"; |
1288 RegexMatcher matcher("abc", testString, 0, status); | 1296 RegexMatcher matcher("abc", testString, 0, status); |
1289 REGEX_CHECK_STATUS; | 1297 REGEX_CHECK_STATUS; |
1290 matcher.setStackLimit(30, status); | 1298 matcher.setStackLimit(30, status); |
1291 REGEX_CHECK_STATUS; | 1299 REGEX_CHECK_STATUS; |
1292 REGEX_ASSERT(matcher.matches(status) == TRUE); | 1300 REGEX_ASSERT(matcher.matches(status) == TRUE); |
1293 REGEX_CHECK_STATUS; | 1301 REGEX_CHECK_STATUS; |
1294 REGEX_ASSERT(matcher.getStackLimit() == 30); | 1302 REGEX_ASSERT(matcher.getStackLimit() == 30); |
1295 | 1303 |
1296 // Negative stack sizes should fail | 1304 // Negative stack sizes should fail |
1297 status = U_ZERO_ERROR; | 1305 status = U_ZERO_ERROR; |
1298 matcher.setStackLimit(1000, status); | 1306 matcher.setStackLimit(1000, status); |
1299 REGEX_CHECK_STATUS; | 1307 REGEX_CHECK_STATUS; |
1300 matcher.setStackLimit(-1, status); | 1308 matcher.setStackLimit(-1, status); |
1301 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | 1309 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
1302 REGEX_ASSERT(matcher.getStackLimit() == 1000); | 1310 REGEX_ASSERT(matcher.getStackLimit() == 1000); |
1303 } | 1311 } |
1304 | 1312 |
1305 | 1313 |
1306 } | 1314 } |
1307 | 1315 |
1308 | 1316 |
1309 | 1317 |
1310 | 1318 |
1311 | 1319 |
1312 | 1320 |
1313 //--------------------------------------------------------------------------- | 1321 //--------------------------------------------------------------------------- |
1314 // | 1322 // |
(...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1843 REGEX_CHECK_STATUS; | 1851 REGEX_CHECK_STATUS; |
1844 | 1852 |
1845 UText input1 = UTEXT_INITIALIZER; | 1853 UText input1 = UTEXT_INITIALIZER; |
1846 UText input2 = UTEXT_INITIALIZER; | 1854 UText input2 = UTEXT_INITIALIZER; |
1847 UText empty = UTEXT_INITIALIZER; | 1855 UText empty = UTEXT_INITIALIZER; |
1848 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st
atus); | 1856 regextst_openUTF8FromInvariant(&input1, "abcdef this is a test", -1, &st
atus); |
1849 REGEX_VERBOSE_TEXT(&input1); | 1857 REGEX_VERBOSE_TEXT(&input1); |
1850 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); | 1858 regextst_openUTF8FromInvariant(&input2, "not abc", -1, &status); |
1851 REGEX_VERBOSE_TEXT(&input2); | 1859 REGEX_VERBOSE_TEXT(&input2); |
1852 utext_openUChars(&empty, NULL, 0, &status); | 1860 utext_openUChars(&empty, NULL, 0, &status); |
1853 | 1861 |
1854 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na
tivelen (input1) ? */ | 1862 int32_t input1Len = strlen("abcdef this is a test"); /* TODO: why not na
tivelen (input1) ? */ |
1855 int32_t input2Len = strlen("not abc"); | 1863 int32_t input2Len = strlen("not abc"); |
1856 | 1864 |
1857 | 1865 |
1858 // | 1866 // |
1859 // Matcher creation and reset. | 1867 // Matcher creation and reset. |
1860 // | 1868 // |
1861 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); | 1869 RegexMatcher *m1 = &pat2->matcher(status)->reset(&input1); |
1862 REGEX_CHECK_STATUS; | 1870 REGEX_CHECK_STATUS; |
1863 REGEX_ASSERT(m1->lookingAt(status) == TRUE); | 1871 REGEX_ASSERT(m1->lookingAt(status) == TRUE); |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1953 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); | 1961 REGEX_ASSERT(m1->lookingAt(-1, status) == FALSE); |
1954 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 1962 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
1955 status = U_ZERO_ERROR; | 1963 status = U_ZERO_ERROR; |
1956 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); | 1964 REGEX_ASSERT(m1->lookingAt(input2Len, status) == FALSE); |
1957 REGEX_CHECK_STATUS; | 1965 REGEX_CHECK_STATUS; |
1958 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); | 1966 REGEX_ASSERT(m1->lookingAt(input2Len+1, status) == FALSE); |
1959 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 1967 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
1960 | 1968 |
1961 delete m1; | 1969 delete m1; |
1962 delete pat2; | 1970 delete pat2; |
1963 | 1971 |
1964 utext_close(&re); | 1972 utext_close(&re); |
1965 utext_close(&input1); | 1973 utext_close(&input1); |
1966 utext_close(&input2); | 1974 utext_close(&input2); |
1967 utext_close(&empty); | 1975 utext_close(&empty); |
1968 } | 1976 } |
1969 | 1977 |
1970 | 1978 |
1971 // | 1979 // |
1972 // Capture Group. | 1980 // Capture Group. |
1973 // RegexMatcher::start(); | 1981 // RegexMatcher::start(); |
1974 // RegexMatcher::end(); | 1982 // RegexMatcher::end(); |
1975 // RegexMatcher::groupCount(); | 1983 // RegexMatcher::groupCount(); |
1976 // | 1984 // |
1977 { | 1985 { |
1978 int32_t flags=0; | 1986 int32_t flags=0; |
1979 UParseError pe; | 1987 UParseError pe; |
1980 UErrorCode status=U_ZERO_ERROR; | 1988 UErrorCode status=U_ZERO_ERROR; |
1981 UText re=UTEXT_INITIALIZER; | 1989 UText re=UTEXT_INITIALIZER; |
1982 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x
34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67
)(.*) */ | 1990 const char str_01234567_pat[] = { 0x30, 0x31, 0x28, 0x32, 0x33, 0x28, 0x
34, 0x35, 0x29, 0x36, 0x37, 0x29, 0x28, 0x2e, 0x2a, 0x29, 0x00 }; /* 01(23(45)67
)(.*) */ |
1983 utext_openUTF8(&re, str_01234567_pat, -1, &status); | 1991 utext_openUTF8(&re, str_01234567_pat, -1, &status); |
1984 | 1992 |
1985 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 1993 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
1986 REGEX_CHECK_STATUS; | 1994 REGEX_CHECK_STATUS; |
1987 | 1995 |
1988 UText input = UTEXT_INITIALIZER; | 1996 UText input = UTEXT_INITIALIZER; |
1989 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36
, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ | 1997 const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36
, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ |
1990 utext_openUTF8(&input, str_0123456789, -1, &status); | 1998 utext_openUTF8(&input, str_0123456789, -1, &status); |
1991 | 1999 |
1992 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); | 2000 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); |
1993 REGEX_CHECK_STATUS; | 2001 REGEX_CHECK_STATUS; |
1994 REGEX_ASSERT(matcher->lookingAt(status) == TRUE); | 2002 REGEX_ASSERT(matcher->lookingAt(status) == TRUE); |
1995 static const int32_t matchStarts[] = {0, 2, 4, 8}; | 2003 static const int32_t matchStarts[] = {0, 2, 4, 8}; |
1996 static const int32_t matchEnds[] = {10, 8, 6, 10}; | 2004 static const int32_t matchEnds[] = {10, 8, 6, 10}; |
1997 int32_t i; | 2005 int32_t i; |
(...skipping 14 matching lines...) Expand all Loading... |
2012 | 2020 |
2013 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); | 2021 REGEX_ASSERT(matcher->start(0, status) == matcher->start(status)); |
2014 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); | 2022 REGEX_ASSERT(matcher->end(0, status) == matcher->end(status)); |
2015 | 2023 |
2016 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2024 REGEX_ASSERT_FAIL(matcher->start(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
2017 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2025 REGEX_ASSERT_FAIL(matcher->start( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
2018 matcher->reset(); | 2026 matcher->reset(); |
2019 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); | 2027 REGEX_ASSERT_FAIL(matcher->start( 0, status), U_REGEX_INVALID_STATE); |
2020 | 2028 |
2021 matcher->lookingAt(status); | 2029 matcher->lookingAt(status); |
2022 | 2030 |
2023 UnicodeString dest; | 2031 UnicodeString dest; |
2024 UText destText = UTEXT_INITIALIZER; | 2032 UText destText = UTEXT_INITIALIZER; |
2025 utext_openUnicodeString(&destText, &dest, &status); | 2033 utext_openUnicodeString(&destText, &dest, &status); |
2026 UText *result; | 2034 UText *result; |
2027 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x
36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ | 2035 //const char str_0123456789[] = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x
36, 0x37, 0x38, 0x39, 0x00 }; /* 0123456789 */ |
2028 //» Test shallow-clone API | 2036 // Test shallow-clone API |
2029 int64_t group_len; | 2037 int64_t group_len; |
2030 result = matcher->group((UText *)NULL, group_len, status); | 2038 result = matcher->group((UText *)NULL, group_len, status); |
2031 REGEX_CHECK_STATUS; | 2039 REGEX_CHECK_STATUS; |
2032 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2040 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
2033 utext_close(result); | 2041 utext_close(result); |
2034 result = matcher->group(0, &destText, group_len, status); | 2042 result = matcher->group(0, &destText, group_len, status); |
2035 REGEX_CHECK_STATUS; | 2043 REGEX_CHECK_STATUS; |
2036 REGEX_ASSERT(result == &destText); | 2044 REGEX_ASSERT(result == &destText); |
2037 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2045 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
2038 // destText is now immutable, reopen it | 2046 // destText is now immutable, reopen it |
2039 utext_close(&destText); | 2047 utext_close(&destText); |
2040 utext_openUnicodeString(&destText, &dest, &status); | 2048 utext_openUnicodeString(&destText, &dest, &status); |
2041 | 2049 |
2042 result = matcher->group(0, NULL, status); | 2050 result = matcher->group(0, NULL, status); |
2043 REGEX_CHECK_STATUS; | 2051 REGEX_CHECK_STATUS; |
2044 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2052 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
2045 utext_close(result); | 2053 utext_close(result); |
2046 result = matcher->group(0, &destText, status); | 2054 result = matcher->group(0, &destText, status); |
2047 REGEX_CHECK_STATUS; | 2055 REGEX_CHECK_STATUS; |
2048 REGEX_ASSERT(result == &destText); | 2056 REGEX_ASSERT(result == &destText); |
2049 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); | 2057 REGEX_ASSERT_UTEXT_UTF8(str_0123456789, result); |
2050 | 2058 |
2051 result = matcher->group(1, NULL, status); | 2059 result = matcher->group(1, NULL, status); |
2052 REGEX_CHECK_STATUS; | 2060 REGEX_CHECK_STATUS; |
2053 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 };
/* 234567 */ | 2061 const char str_234567[] = { 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x00 };
/* 234567 */ |
2054 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2062 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); |
2055 utext_close(result); | 2063 utext_close(result); |
2056 result = matcher->group(1, &destText, status); | 2064 result = matcher->group(1, &destText, status); |
2057 REGEX_CHECK_STATUS; | 2065 REGEX_CHECK_STATUS; |
2058 REGEX_ASSERT(result == &destText); | 2066 REGEX_ASSERT(result == &destText); |
2059 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); | 2067 REGEX_ASSERT_UTEXT_UTF8(str_234567, result); |
2060 | 2068 |
2061 result = matcher->group(2, NULL, status); | 2069 result = matcher->group(2, NULL, status); |
2062 REGEX_CHECK_STATUS; | 2070 REGEX_CHECK_STATUS; |
2063 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ | 2071 const char str_45[] = { 0x34, 0x35, 0x00 }; /* 45 */ |
2064 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2072 REGEX_ASSERT_UTEXT_UTF8(str_45, result); |
2065 utext_close(result); | 2073 utext_close(result); |
2066 result = matcher->group(2, &destText, status); | 2074 result = matcher->group(2, &destText, status); |
2067 REGEX_CHECK_STATUS; | 2075 REGEX_CHECK_STATUS; |
2068 REGEX_ASSERT(result == &destText); | 2076 REGEX_ASSERT(result == &destText); |
2069 REGEX_ASSERT_UTEXT_UTF8(str_45, result); | 2077 REGEX_ASSERT_UTEXT_UTF8(str_45, result); |
2070 | 2078 |
2071 result = matcher->group(3, NULL, status); | 2079 result = matcher->group(3, NULL, status); |
2072 REGEX_CHECK_STATUS; | 2080 REGEX_CHECK_STATUS; |
2073 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ | 2081 const char str_89[] = { 0x38, 0x39, 0x00 }; /* 89 */ |
2074 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2082 REGEX_ASSERT_UTEXT_UTF8(str_89, result); |
2075 utext_close(result); | 2083 utext_close(result); |
2076 result = matcher->group(3, &destText, status); | 2084 result = matcher->group(3, &destText, status); |
2077 REGEX_CHECK_STATUS; | 2085 REGEX_CHECK_STATUS; |
2078 REGEX_ASSERT(result == &destText); | 2086 REGEX_ASSERT(result == &destText); |
2079 REGEX_ASSERT_UTEXT_UTF8(str_89, result); | 2087 REGEX_ASSERT_UTEXT_UTF8(str_89, result); |
2080 | 2088 |
2081 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2089 REGEX_ASSERT_FAIL(matcher->group(-1, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
2082 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; | 2090 REGEX_ASSERT_FAIL(matcher->group( 4, status), U_INDEX_OUTOFBOUNDS_ERROR)
; |
2083 matcher->reset(); | 2091 matcher->reset(); |
2084 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); | 2092 REGEX_ASSERT_FAIL(matcher->group( 0, status), U_REGEX_INVALID_STATE); |
2085 | 2093 |
2086 delete matcher; | 2094 delete matcher; |
2087 delete pat; | 2095 delete pat; |
2088 | 2096 |
2089 utext_close(&destText); | 2097 utext_close(&destText); |
2090 utext_close(&input); | 2098 utext_close(&input); |
2091 utext_close(&re); | 2099 utext_close(&re); |
2092 } | 2100 } |
2093 | 2101 |
2094 // | 2102 // |
2095 // find | 2103 // find |
2096 // | 2104 // |
2097 { | 2105 { |
2098 int32_t flags=0; | 2106 int32_t flags=0; |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2139 | 2147 |
2140 status = U_ZERO_ERROR; | 2148 status = U_ZERO_ERROR; |
2141 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); | 2149 REGEX_ASSERT_FAIL(matcher->find(-1, status), U_INDEX_OUTOFBOUNDS_ERROR); |
2142 status = U_ZERO_ERROR; | 2150 status = U_ZERO_ERROR; |
2143 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); | 2151 REGEX_ASSERT_FAIL(matcher->find(18, status), U_INDEX_OUTOFBOUNDS_ERROR); |
2144 | 2152 |
2145 REGEX_ASSERT(matcher->groupCount() == 0); | 2153 REGEX_ASSERT(matcher->groupCount() == 0); |
2146 | 2154 |
2147 delete matcher; | 2155 delete matcher; |
2148 delete pat; | 2156 delete pat; |
2149 | 2157 |
2150 utext_close(&input); | 2158 utext_close(&input); |
2151 utext_close(&re); | 2159 utext_close(&re); |
2152 } | 2160 } |
2153 | 2161 |
2154 | 2162 |
2155 // | 2163 // |
2156 // find, with \G in pattern (true if at the end of a previous match). | 2164 // find, with \G in pattern (true if at the end of a previous match). |
2157 // | 2165 // |
2158 { | 2166 { |
2159 int32_t flags=0; | 2167 int32_t flags=0; |
2160 UParseError pe; | 2168 UParseError pe; |
2161 UErrorCode status=U_ZERO_ERROR; | 2169 UErrorCode status=U_ZERO_ERROR; |
2162 UText re=UTEXT_INITIALIZER; | 2170 UText re=UTEXT_INITIALIZER; |
2163 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0
x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0
0 }; /* .*?(?:(\\Gabc)|(abc)) */ | 2171 const char str_Gabcabc[] = { 0x2e, 0x2a, 0x3f, 0x28, 0x3f, 0x3a, 0x28, 0
x5c, 0x47, 0x61, 0x62, 0x63, 0x29, 0x7c, 0x28, 0x61, 0x62, 0x63, 0x29, 0x29, 0x0
0 }; /* .*?(?:(\\Gabc)|(abc)) */ |
2164 utext_openUTF8(&re, str_Gabcabc, -1, &status); | 2172 utext_openUTF8(&re, str_Gabcabc, -1, &status); |
2165 | 2173 |
2166 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 2174 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
2167 | 2175 |
2168 REGEX_CHECK_STATUS; | 2176 REGEX_CHECK_STATUS; |
2169 UText input = UTEXT_INITIALIZER; | 2177 UText input = UTEXT_INITIALIZER; |
2170 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ | 2178 const char str_abcabcabc[] = { 0x2e, 0x61, 0x62, 0x63, 0x61, 0x62, 0x63,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abcabc.abc.. */ |
2171 utext_openUTF8(&input, str_abcabcabc, -1, &status); | 2179 utext_openUTF8(&input, str_abcabcabc, -1, &status); |
2172 // 012345678901234567 | 2180 // 012345678901234567 |
2173 | 2181 |
2174 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); | 2182 RegexMatcher *matcher = &pat->matcher(status)->reset(&input); |
2175 REGEX_CHECK_STATUS; | 2183 REGEX_CHECK_STATUS; |
2176 REGEX_ASSERT(matcher->find()); | 2184 REGEX_ASSERT(matcher->find()); |
2177 REGEX_ASSERT(matcher->start(status) == 0); | 2185 REGEX_ASSERT(matcher->start(status) == 0); |
2178 REGEX_ASSERT(matcher->start(1, status) == -1); | 2186 REGEX_ASSERT(matcher->start(1, status) == -1); |
2179 REGEX_ASSERT(matcher->start(2, status) == 1); | 2187 REGEX_ASSERT(matcher->start(2, status) == 1); |
2180 | 2188 |
2181 REGEX_ASSERT(matcher->find()); | 2189 REGEX_ASSERT(matcher->find()); |
2182 REGEX_ASSERT(matcher->start(status) == 4); | 2190 REGEX_ASSERT(matcher->start(status) == 4); |
2183 REGEX_ASSERT(matcher->start(1, status) == 4); | 2191 REGEX_ASSERT(matcher->start(1, status) == 4); |
2184 REGEX_ASSERT(matcher->start(2, status) == -1); | 2192 REGEX_ASSERT(matcher->start(2, status) == -1); |
2185 REGEX_CHECK_STATUS; | 2193 REGEX_CHECK_STATUS; |
2186 | 2194 |
2187 delete matcher; | 2195 delete matcher; |
2188 delete pat; | 2196 delete pat; |
2189 | 2197 |
2190 utext_close(&input); | 2198 utext_close(&input); |
2191 utext_close(&re); | 2199 utext_close(&re); |
2192 } | 2200 } |
2193 | 2201 |
2194 // | 2202 // |
2195 // find with zero length matches, match position should bump ahead | 2203 // find with zero length matches, match position should bump ahead |
2196 // to prevent loops. | 2204 // to prevent loops. |
2197 // | 2205 // |
2198 { | 2206 { |
2199 int32_t i; | 2207 int32_t i; |
(...skipping 19 matching lines...) Expand all Loading... |
2219 utext_openUTF8(&s, (char *)aboveBMP, -1, &status); | 2227 utext_openUTF8(&s, (char *)aboveBMP, -1, &status); |
2220 m.reset(&s); | 2228 m.reset(&s); |
2221 for (i=0; ; i+=4) { | 2229 for (i=0; ; i+=4) { |
2222 if (m.find() == FALSE) { | 2230 if (m.find() == FALSE) { |
2223 break; | 2231 break; |
2224 } | 2232 } |
2225 REGEX_ASSERT(m.start(status) == i); | 2233 REGEX_ASSERT(m.start(status) == i); |
2226 REGEX_ASSERT(m.end(status) == i); | 2234 REGEX_ASSERT(m.end(status) == i); |
2227 } | 2235 } |
2228 REGEX_ASSERT(i==20); | 2236 REGEX_ASSERT(i==20); |
2229 | 2237 |
2230 utext_close(&s); | 2238 utext_close(&s); |
2231 } | 2239 } |
2232 { | 2240 { |
2233 // find() loop breaking test. | 2241 // find() loop breaking test. |
2234 // with pattern of /.?/, should see a series of one char matches,
then a single | 2242 // with pattern of /.?/, should see a series of one char matches,
then a single |
2235 // match of zero length at the end of the input string. | 2243 // match of zero length at the end of the input string. |
2236 int32_t i; | 2244 int32_t i; |
2237 UErrorCode status=U_ZERO_ERROR; | 2245 UErrorCode status=U_ZERO_ERROR; |
2238 RegexMatcher m(".?", 0, status); | 2246 RegexMatcher m(".?", 0, status); |
2239 REGEX_CHECK_STATUS; | 2247 REGEX_CHECK_STATUS; |
2240 UText s = UTEXT_INITIALIZER; | 2248 UText s = UTEXT_INITIALIZER; |
2241 utext_openUTF8(&s, " ", -1, &status); | 2249 utext_openUTF8(&s, " ", -1, &status); |
2242 m.reset(&s); | 2250 m.reset(&s); |
2243 for (i=0; ; i++) { | 2251 for (i=0; ; i++) { |
2244 if (m.find() == FALSE) { | 2252 if (m.find() == FALSE) { |
2245 break; | 2253 break; |
2246 } | 2254 } |
2247 REGEX_ASSERT(m.start(status) == i); | 2255 REGEX_ASSERT(m.start(status) == i); |
2248 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); | 2256 REGEX_ASSERT(m.end(status) == (i<4 ? i+1 : i)); |
2249 } | 2257 } |
2250 REGEX_ASSERT(i==5); | 2258 REGEX_ASSERT(i==5); |
2251 | 2259 |
2252 utext_close(&s); | 2260 utext_close(&s); |
2253 } | 2261 } |
2254 | 2262 |
2255 | 2263 |
2256 // | 2264 // |
2257 // Matchers with no input string behave as if they had an empty input string
. | 2265 // Matchers with no input string behave as if they had an empty input string
. |
2258 // | 2266 // |
2259 | 2267 |
2260 { | 2268 { |
2261 UErrorCode status = U_ZERO_ERROR; | 2269 UErrorCode status = U_ZERO_ERROR; |
2262 RegexMatcher m(".?", 0, status); | 2270 RegexMatcher m(".?", 0, status); |
2263 REGEX_CHECK_STATUS; | 2271 REGEX_CHECK_STATUS; |
2264 REGEX_ASSERT(m.find()); | 2272 REGEX_ASSERT(m.find()); |
2265 REGEX_ASSERT(m.start(status) == 0); | 2273 REGEX_ASSERT(m.start(status) == 0); |
2266 REGEX_ASSERT(m.input() == ""); | 2274 REGEX_ASSERT(m.input() == ""); |
2267 } | 2275 } |
2268 { | 2276 { |
2269 UErrorCode status = U_ZERO_ERROR; | 2277 UErrorCode status = U_ZERO_ERROR; |
2270 RegexPattern *p = RegexPattern::compile(".", 0, status); | 2278 RegexPattern *p = RegexPattern::compile(".", 0, status); |
2271 RegexMatcher *m = p->matcher(status); | 2279 RegexMatcher *m = p->matcher(status); |
2272 REGEX_CHECK_STATUS; | 2280 REGEX_CHECK_STATUS; |
2273 | 2281 |
2274 REGEX_ASSERT(m->find() == FALSE); | 2282 REGEX_ASSERT(m->find() == FALSE); |
2275 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); | 2283 REGEX_ASSERT(utext_nativeLength(m->inputText()) == 0); |
2276 delete m; | 2284 delete m; |
2277 delete p; | 2285 delete p; |
2278 } | 2286 } |
2279 | 2287 |
2280 // | 2288 // |
2281 // Regions | 2289 // Regions |
2282 // | 2290 // |
2283 { | 2291 { |
2284 UErrorCode status = U_ZERO_ERROR; | 2292 UErrorCode status = U_ZERO_ERROR; |
2285 UText testPattern = UTEXT_INITIALIZER; | 2293 UText testPattern = UTEXT_INITIALIZER; |
2286 UText testText = UTEXT_INITIALIZER; | 2294 UText testText = UTEXT_INITIALIZER; |
2287 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); | 2295 regextst_openUTF8FromInvariant(&testPattern, ".*", -1, &status); |
2288 REGEX_VERBOSE_TEXT(&testPattern); | 2296 REGEX_VERBOSE_TEXT(&testPattern); |
2289 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat
us); | 2297 regextst_openUTF8FromInvariant(&testText, "This is test data", -1, &stat
us); |
2290 REGEX_VERBOSE_TEXT(&testText); | 2298 REGEX_VERBOSE_TEXT(&testText); |
2291 | 2299 |
2292 RegexMatcher m(&testPattern, &testText, 0, status); | 2300 RegexMatcher m(&testPattern, &testText, 0, status); |
2293 REGEX_CHECK_STATUS; | 2301 REGEX_CHECK_STATUS; |
2294 REGEX_ASSERT(m.regionStart() == 0); | 2302 REGEX_ASSERT(m.regionStart() == 0); |
2295 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); | 2303 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); |
2296 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2304 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
2297 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2305 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
2298 | 2306 |
2299 m.region(2,4, status); | 2307 m.region(2,4, status); |
2300 REGEX_CHECK_STATUS; | 2308 REGEX_CHECK_STATUS; |
2301 REGEX_ASSERT(m.matches(status)); | 2309 REGEX_ASSERT(m.matches(status)); |
2302 REGEX_ASSERT(m.start(status)==2); | 2310 REGEX_ASSERT(m.start(status)==2); |
2303 REGEX_ASSERT(m.end(status)==4); | 2311 REGEX_ASSERT(m.end(status)==4); |
2304 REGEX_CHECK_STATUS; | 2312 REGEX_CHECK_STATUS; |
2305 | 2313 |
2306 m.reset(); | 2314 m.reset(); |
2307 REGEX_ASSERT(m.regionStart() == 0); | 2315 REGEX_ASSERT(m.regionStart() == 0); |
2308 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); | 2316 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("This is test data")); |
2309 | 2317 |
2310 regextst_openUTF8FromInvariant(&testText, "short", -1, &status); | 2318 regextst_openUTF8FromInvariant(&testText, "short", -1, &status); |
2311 REGEX_VERBOSE_TEXT(&testText); | 2319 REGEX_VERBOSE_TEXT(&testText); |
2312 m.reset(&testText); | 2320 m.reset(&testText); |
2313 REGEX_ASSERT(m.regionStart() == 0); | 2321 REGEX_ASSERT(m.regionStart() == 0); |
2314 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); | 2322 REGEX_ASSERT(m.regionEnd() == (int32_t)strlen("short")); |
2315 | 2323 |
2316 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2324 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
2317 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); | 2325 REGEX_ASSERT(&m == &m.useAnchoringBounds(FALSE)); |
2318 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 2326 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
2319 REGEX_ASSERT(&m == &m.reset()); | 2327 REGEX_ASSERT(&m == &m.reset()); |
2320 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); | 2328 REGEX_ASSERT(m.hasAnchoringBounds() == FALSE); |
2321 | 2329 |
2322 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); | 2330 REGEX_ASSERT(&m == &m.useAnchoringBounds(TRUE)); |
2323 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2331 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
2324 REGEX_ASSERT(&m == &m.reset()); | 2332 REGEX_ASSERT(&m == &m.reset()); |
2325 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); | 2333 REGEX_ASSERT(m.hasAnchoringBounds() == TRUE); |
2326 | 2334 |
2327 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2335 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
2328 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); | 2336 REGEX_ASSERT(&m == &m.useTransparentBounds(TRUE)); |
2329 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 2337 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
2330 REGEX_ASSERT(&m == &m.reset()); | 2338 REGEX_ASSERT(&m == &m.reset()); |
2331 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); | 2339 REGEX_ASSERT(m.hasTransparentBounds() == TRUE); |
2332 | 2340 |
2333 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); | 2341 REGEX_ASSERT(&m == &m.useTransparentBounds(FALSE)); |
2334 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2342 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
2335 REGEX_ASSERT(&m == &m.reset()); | 2343 REGEX_ASSERT(&m == &m.reset()); |
2336 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); | 2344 REGEX_ASSERT(m.hasTransparentBounds() == FALSE); |
2337 | 2345 |
2338 utext_close(&testText); | 2346 utext_close(&testText); |
2339 utext_close(&testPattern); | 2347 utext_close(&testPattern); |
2340 } | 2348 } |
2341 | 2349 |
2342 // | 2350 // |
2343 // hitEnd() and requireEnd() | 2351 // hitEnd() and requireEnd() |
2344 // | 2352 // |
2345 { | 2353 { |
2346 UErrorCode status = U_ZERO_ERROR; | 2354 UErrorCode status = U_ZERO_ERROR; |
2347 UText testPattern = UTEXT_INITIALIZER; | 2355 UText testPattern = UTEXT_INITIALIZER; |
2348 UText testText = UTEXT_INITIALIZER; | 2356 UText testText = UTEXT_INITIALIZER; |
2349 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ | 2357 const char str_[] = { 0x2e, 0x2a, 0x00 }; /* .* */ |
2350 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ | 2358 const char str_aabb[] = { 0x61, 0x61, 0x62, 0x62, 0x00 }; /* aabb */ |
2351 utext_openUTF8(&testPattern, str_, -1, &status); | 2359 utext_openUTF8(&testPattern, str_, -1, &status); |
2352 utext_openUTF8(&testText, str_aabb, -1, &status); | 2360 utext_openUTF8(&testText, str_aabb, -1, &status); |
2353 | 2361 |
2354 RegexMatcher m1(&testPattern, &testText, 0, status); | 2362 RegexMatcher m1(&testPattern, &testText, 0, status); |
2355 REGEX_ASSERT(m1.lookingAt(status) == TRUE); | 2363 REGEX_ASSERT(m1.lookingAt(status) == TRUE); |
2356 REGEX_ASSERT(m1.hitEnd() == TRUE); | 2364 REGEX_ASSERT(m1.hitEnd() == TRUE); |
2357 REGEX_ASSERT(m1.requireEnd() == FALSE); | 2365 REGEX_ASSERT(m1.requireEnd() == FALSE); |
2358 REGEX_CHECK_STATUS; | 2366 REGEX_CHECK_STATUS; |
2359 | 2367 |
2360 status = U_ZERO_ERROR; | 2368 status = U_ZERO_ERROR; |
2361 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ | 2369 const char str_a[] = { 0x61, 0x2a, 0x00 }; /* a* */ |
2362 utext_openUTF8(&testPattern, str_a, -1, &status); | 2370 utext_openUTF8(&testPattern, str_a, -1, &status); |
2363 RegexMatcher m2(&testPattern, &testText, 0, status); | 2371 RegexMatcher m2(&testPattern, &testText, 0, status); |
2364 REGEX_ASSERT(m2.lookingAt(status) == TRUE); | 2372 REGEX_ASSERT(m2.lookingAt(status) == TRUE); |
2365 REGEX_ASSERT(m2.hitEnd() == FALSE); | 2373 REGEX_ASSERT(m2.hitEnd() == FALSE); |
2366 REGEX_ASSERT(m2.requireEnd() == FALSE); | 2374 REGEX_ASSERT(m2.requireEnd() == FALSE); |
2367 REGEX_CHECK_STATUS; | 2375 REGEX_CHECK_STATUS; |
2368 | 2376 |
2369 status = U_ZERO_ERROR; | 2377 status = U_ZERO_ERROR; |
2370 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ | 2378 const char str_dotstardollar[] = { 0x2e, 0x2a, 0x24, 0x00 }; /* .*$ */ |
2371 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); | 2379 utext_openUTF8(&testPattern, str_dotstardollar, -1, &status); |
2372 RegexMatcher m3(&testPattern, &testText, 0, status); | 2380 RegexMatcher m3(&testPattern, &testText, 0, status); |
2373 REGEX_ASSERT(m3.lookingAt(status) == TRUE); | 2381 REGEX_ASSERT(m3.lookingAt(status) == TRUE); |
2374 REGEX_ASSERT(m3.hitEnd() == TRUE); | 2382 REGEX_ASSERT(m3.hitEnd() == TRUE); |
2375 REGEX_ASSERT(m3.requireEnd() == TRUE); | 2383 REGEX_ASSERT(m3.requireEnd() == TRUE); |
2376 REGEX_CHECK_STATUS; | 2384 REGEX_CHECK_STATUS; |
2377 | 2385 |
2378 utext_close(&testText); | 2386 utext_close(&testText); |
2379 utext_close(&testPattern); | 2387 utext_close(&testPattern); |
2380 } | 2388 } |
2381 } | 2389 } |
2382 | 2390 |
2383 | 2391 |
2384 //--------------------------------------------------------------------------- | 2392 //--------------------------------------------------------------------------- |
2385 // | 2393 // |
2386 // API_Replace_UTF8 API test for class RegexMatcher, testing the | 2394 // API_Replace_UTF8 API test for class RegexMatcher, testing the |
2387 // Replace family of functions. | 2395 // Replace family of functions. |
2388 // | 2396 // |
2389 //--------------------------------------------------------------------------- | 2397 //--------------------------------------------------------------------------- |
2390 void RegexTest::API_Replace_UTF8() { | 2398 void RegexTest::API_Replace_UTF8() { |
2391 // | 2399 // |
2392 // Replace | 2400 // Replace |
2393 // | 2401 // |
2394 int32_t flags=0; | 2402 int32_t flags=0; |
2395 UParseError pe; | 2403 UParseError pe; |
2396 UErrorCode status=U_ZERO_ERROR; | 2404 UErrorCode status=U_ZERO_ERROR; |
2397 | 2405 |
2398 UText re=UTEXT_INITIALIZER; | 2406 UText re=UTEXT_INITIALIZER; |
2399 regextst_openUTF8FromInvariant(&re, "abc", -1, &status); | 2407 regextst_openUTF8FromInvariant(&re, "abc", -1, &status); |
2400 REGEX_VERBOSE_TEXT(&re); | 2408 REGEX_VERBOSE_TEXT(&re); |
2401 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); | 2409 RegexPattern *pat = RegexPattern::compile(&re, flags, pe, status); |
2402 REGEX_CHECK_STATUS; | 2410 REGEX_CHECK_STATUS; |
2403 | 2411 |
2404 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e,
0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ | 2412 char data[] = { 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e,
0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .abc..abc...abc.. */ |
2405 // 012345678901234567 | 2413 // 012345678901234567 |
2406 UText dataText = UTEXT_INITIALIZER; | 2414 UText dataText = UTEXT_INITIALIZER; |
2407 utext_openUTF8(&dataText, data, -1, &status); | 2415 utext_openUTF8(&dataText, data, -1, &status); |
2408 REGEX_CHECK_STATUS; | 2416 REGEX_CHECK_STATUS; |
2409 REGEX_VERBOSE_TEXT(&dataText); | 2417 REGEX_VERBOSE_TEXT(&dataText); |
2410 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); | 2418 RegexMatcher *matcher = &pat->matcher(status)->reset(&dataText); |
2411 | 2419 |
2412 // | 2420 // |
2413 // Plain vanilla matches. | 2421 // Plain vanilla matches. |
2414 // | 2422 // |
2415 UnicodeString dest; | 2423 UnicodeString dest; |
2416 UText destText = UTEXT_INITIALIZER; | 2424 UText destText = UTEXT_INITIALIZER; |
2417 utext_openUnicodeString(&destText, &dest, &status); | 2425 utext_openUnicodeString(&destText, &dest, &status); |
2418 UText *result; | 2426 UText *result; |
2419 | 2427 |
2420 UText replText = UTEXT_INITIALIZER; | 2428 UText replText = UTEXT_INITIALIZER; |
2421 | 2429 |
2422 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ | 2430 const char str_yz[] = { 0x79, 0x7a, 0x00 }; /* yz */ |
2423 utext_openUTF8(&replText, str_yz, -1, &status); | 2431 utext_openUTF8(&replText, str_yz, -1, &status); |
2424 REGEX_VERBOSE_TEXT(&replText); | 2432 REGEX_VERBOSE_TEXT(&replText); |
2425 result = matcher->replaceFirst(&replText, NULL, status); | 2433 result = matcher->replaceFirst(&replText, NULL, status); |
2426 REGEX_CHECK_STATUS; | 2434 REGEX_CHECK_STATUS; |
2427 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63
, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ | 2435 const char str_yzabcabc[] = { 0x2e, 0x79, 0x7a, 0x2e, 0x2e, 0x61, 0x62, 0x63
, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* .yz..abc...abc.. */ |
2428 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); | 2436 REGEX_ASSERT_UTEXT_UTF8(str_yzabcabc, result); |
2429 utext_close(result); | 2437 utext_close(result); |
2430 result = matcher->replaceFirst(&replText, &destText, status); | 2438 result = matcher->replaceFirst(&replText, &destText, status); |
2431 REGEX_CHECK_STATUS; | 2439 REGEX_CHECK_STATUS; |
(...skipping 11 matching lines...) Expand all Loading... |
2443 REGEX_CHECK_STATUS; | 2451 REGEX_CHECK_STATUS; |
2444 REGEX_ASSERT(result == &destText); | 2452 REGEX_ASSERT(result == &destText); |
2445 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); | 2453 REGEX_ASSERT_UTEXT_UTF8(str_yzyzyz, result); |
2446 | 2454 |
2447 // | 2455 // |
2448 // Plain vanilla non-matches. | 2456 // Plain vanilla non-matches. |
2449 // | 2457 // |
2450 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6
2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...
abx.. */ | 2458 const char str_abxabxabx[] = { 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x61, 0x6
2, 0x78, 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x78, 0x2e, 0x2e, 0x00 }; /* .abx..abx...
abx.. */ |
2451 utext_openUTF8(&dataText, str_abxabxabx, -1, &status); | 2459 utext_openUTF8(&dataText, str_abxabxabx, -1, &status); |
2452 matcher->reset(&dataText); | 2460 matcher->reset(&dataText); |
2453 | 2461 |
2454 result = matcher->replaceFirst(&replText, NULL, status); | 2462 result = matcher->replaceFirst(&replText, NULL, status); |
2455 REGEX_CHECK_STATUS; | 2463 REGEX_CHECK_STATUS; |
2456 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2464 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
2457 utext_close(result); | 2465 utext_close(result); |
2458 result = matcher->replaceFirst(&replText, &destText, status); | 2466 result = matcher->replaceFirst(&replText, &destText, status); |
2459 REGEX_CHECK_STATUS; | 2467 REGEX_CHECK_STATUS; |
2460 REGEX_ASSERT(result == &destText); | 2468 REGEX_ASSERT(result == &destText); |
2461 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2469 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
2462 | 2470 |
2463 result = matcher->replaceAll(&replText, NULL, status); | 2471 result = matcher->replaceAll(&replText, NULL, status); |
2464 REGEX_CHECK_STATUS; | 2472 REGEX_CHECK_STATUS; |
2465 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2473 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
2466 utext_close(result); | 2474 utext_close(result); |
2467 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2475 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2468 result = matcher->replaceAll(&replText, &destText, status); | 2476 result = matcher->replaceAll(&replText, &destText, status); |
2469 REGEX_CHECK_STATUS; | 2477 REGEX_CHECK_STATUS; |
2470 REGEX_ASSERT(result == &destText); | 2478 REGEX_ASSERT(result == &destText); |
2471 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); | 2479 REGEX_ASSERT_UTEXT_UTF8(str_abxabxabx, result); |
2472 | 2480 |
2473 // | 2481 // |
2474 // Empty source string | 2482 // Empty source string |
2475 // | 2483 // |
2476 utext_openUTF8(&dataText, NULL, 0, &status); | 2484 utext_openUTF8(&dataText, NULL, 0, &status); |
2477 matcher->reset(&dataText); | 2485 matcher->reset(&dataText); |
2478 | 2486 |
2479 result = matcher->replaceFirst(&replText, NULL, status); | 2487 result = matcher->replaceFirst(&replText, NULL, status); |
2480 REGEX_CHECK_STATUS; | 2488 REGEX_CHECK_STATUS; |
2481 REGEX_ASSERT_UTEXT_UTF8("", result); | 2489 REGEX_ASSERT_UTEXT_UTF8("", result); |
2482 utext_close(result); | 2490 utext_close(result); |
2483 result = matcher->replaceFirst(&replText, &destText, status); | 2491 result = matcher->replaceFirst(&replText, &destText, status); |
2484 REGEX_CHECK_STATUS; | 2492 REGEX_CHECK_STATUS; |
2485 REGEX_ASSERT(result == &destText); | 2493 REGEX_ASSERT(result == &destText); |
2486 REGEX_ASSERT_UTEXT_UTF8("", result); | 2494 REGEX_ASSERT_UTEXT_UTF8("", result); |
2487 | 2495 |
2488 result = matcher->replaceAll(&replText, NULL, status); | 2496 result = matcher->replaceAll(&replText, NULL, status); |
2489 REGEX_CHECK_STATUS; | 2497 REGEX_CHECK_STATUS; |
2490 REGEX_ASSERT_UTEXT_UTF8("", result); | 2498 REGEX_ASSERT_UTEXT_UTF8("", result); |
2491 utext_close(result); | 2499 utext_close(result); |
2492 result = matcher->replaceAll(&replText, &destText, status); | 2500 result = matcher->replaceAll(&replText, &destText, status); |
2493 REGEX_CHECK_STATUS; | 2501 REGEX_CHECK_STATUS; |
2494 REGEX_ASSERT(result == &destText); | 2502 REGEX_ASSERT(result == &destText); |
2495 REGEX_ASSERT_UTEXT_UTF8("", result); | 2503 REGEX_ASSERT_UTEXT_UTF8("", result); |
2496 | 2504 |
2497 // | 2505 // |
2498 // Empty substitution string | 2506 // Empty substitution string |
2499 // | 2507 // |
2500 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." | 2508 utext_openUTF8(&dataText, data, -1, &status); // ".abc..abc...abc.." |
2501 matcher->reset(&dataText); | 2509 matcher->reset(&dataText); |
2502 | 2510 |
2503 utext_openUTF8(&replText, NULL, 0, &status); | 2511 utext_openUTF8(&replText, NULL, 0, &status); |
2504 result = matcher->replaceFirst(&replText, NULL, status); | 2512 result = matcher->replaceFirst(&replText, NULL, status); |
2505 REGEX_CHECK_STATUS; | 2513 REGEX_CHECK_STATUS; |
2506 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ | 2514 const char str_abcabc[] = { 0x2e, 0x2e, 0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e,
0x2e, 0x61, 0x62, 0x63, 0x2e, 0x2e, 0x00 }; /* ...abc...abc.. */ |
2507 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); | 2515 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); |
2508 utext_close(result); | 2516 utext_close(result); |
2509 result = matcher->replaceFirst(&replText, &destText, status); | 2517 result = matcher->replaceFirst(&replText, &destText, status); |
2510 REGEX_CHECK_STATUS; | 2518 REGEX_CHECK_STATUS; |
2511 REGEX_ASSERT(result == &destText); | 2519 REGEX_ASSERT(result == &destText); |
2512 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); | 2520 REGEX_ASSERT_UTEXT_UTF8(str_abcabc, result); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2556 // | 2564 // |
2557 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ | 2565 const char str_add[] = { 0x61, 0x28, 0x2e, 0x2e, 0x29, 0x00 }; /* a(..) */ |
2558 utext_openUTF8(&re, str_add, -1, &status); | 2566 utext_openUTF8(&re, str_add, -1, &status); |
2559 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); | 2567 RegexPattern *pat2 = RegexPattern::compile(&re, flags, pe, status); |
2560 REGEX_CHECK_STATUS; | 2568 REGEX_CHECK_STATUS; |
2561 | 2569 |
2562 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00
}; /* abcdefg */ | 2570 const char str_abcdefg[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x00
}; /* abcdefg */ |
2563 utext_openUTF8(&dataText, str_abcdefg, -1, &status); | 2571 utext_openUTF8(&dataText, str_abcdefg, -1, &status); |
2564 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); | 2572 RegexMatcher *matcher2 = &pat2->matcher(status)->reset(&dataText); |
2565 REGEX_CHECK_STATUS; | 2573 REGEX_CHECK_STATUS; |
2566 | 2574 |
2567 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ | 2575 const char str_11[] = { 0x24, 0x31, 0x24, 0x31, 0x00 }; /* $1$1 */ |
2568 utext_openUTF8(&replText, str_11, -1, &status); | 2576 utext_openUTF8(&replText, str_11, -1, &status); |
2569 result = matcher2->replaceFirst(&replText, NULL, status); | 2577 result = matcher2->replaceFirst(&replText, NULL, status); |
2570 REGEX_CHECK_STATUS; | 2578 REGEX_CHECK_STATUS; |
2571 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67
, 0x00 }; /* bcbcdefg */ | 2579 const char str_bcbcdefg[] = { 0x62, 0x63, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67
, 0x00 }; /* bcbcdefg */ |
2572 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); | 2580 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); |
2573 utext_close(result); | 2581 utext_close(result); |
2574 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2582 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2575 result = matcher2->replaceFirst(&replText, &destText, status); | 2583 result = matcher2->replaceFirst(&replText, &destText, status); |
2576 REGEX_CHECK_STATUS; | 2584 REGEX_CHECK_STATUS; |
2577 REGEX_ASSERT(result == &destText); | 2585 REGEX_ASSERT(result == &destText); |
2578 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); | 2586 REGEX_ASSERT_UTEXT_UTF8(str_bcbcdefg, result); |
2579 | 2587 |
2580 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6
5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31,
0x2e, 0x00 }; /* The value of \$1 is $1. */ | 2588 const char str_v[24] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x6
5, 0x20, 0x6f, 0x66, 0x20, 0x5c, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0x24, 0x31,
0x2e, 0x00 }; /* The value of \$1 is $1. */ |
2581 utext_openUTF8(&replText, str_v, -1, &status); | 2589 utext_openUTF8(&replText, str_v, -1, &status); |
2582 REGEX_VERBOSE_TEXT(&replText); | 2590 REGEX_VERBOSE_TEXT(&replText); |
2583 result = matcher2->replaceFirst(&replText, NULL, status); | 2591 result = matcher2->replaceFirst(&replText, NULL, status); |
2584 REGEX_CHECK_STATUS; | 2592 REGEX_CHECK_STATUS; |
2585 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ | 2593 const char str_Thevalueof1isbcdefg[] = { 0x54, 0x68, 0x65, 0x20, 0x76, 0x61,
0x6c, 0x75, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x24, 0x31, 0x20, 0x69, 0x73, 0x20, 0
x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* The value of $1 is bc.defg *
/ |
2586 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2594 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
2587 utext_close(result); | 2595 utext_close(result); |
2588 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2596 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2589 result = matcher2->replaceFirst(&replText, &destText, status); | 2597 result = matcher2->replaceFirst(&replText, &destText, status); |
2590 REGEX_CHECK_STATUS; | 2598 REGEX_CHECK_STATUS; |
2591 REGEX_ASSERT(result == &destText); | 2599 REGEX_ASSERT(result == &destText); |
2592 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); | 2600 REGEX_ASSERT_UTEXT_UTF8(str_Thevalueof1isbcdefg, result); |
2593 | 2601 |
2594 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6
9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f,
0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0
x00 }; /* $ by itself, no group number $$$ */ | 2602 const char str_byitselfnogroupnumber[] = { 0x24, 0x20, 0x62, 0x79, 0x20, 0x6
9, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0x6f,
0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x24, 0
x00 }; /* $ by itself, no group number $$$ */ |
2595 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); | 2603 utext_openUTF8(&replText, str_byitselfnogroupnumber, -1, &status); |
2596 result = matcher2->replaceFirst(&replText, NULL, status); | 2604 result = matcher2->replaceFirst(&replText, NULL, status); |
2597 REGEX_CHECK_STATUS; | 2605 REGEX_CHECK_STATUS; |
2598 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ | 2606 const char str_byitselfnogroupnumberdefg[] = { 0x24, 0x20, 0x62, 0x79, 0x20,
0x69, 0x74, 0x73, 0x65, 0x6c, 0x66, 0x2c, 0x20, 0x6e, 0x6f, 0x20, 0x67, 0x72, 0
x6f, 0x75, 0x70, 0x20, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x24, 0x2
4, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* $ by itself, no group number $$$defg */ |
2599 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); | 2607 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); |
2600 utext_close(result); | 2608 utext_close(result); |
2601 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2609 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2602 result = matcher2->replaceFirst(&replText, &destText, status); | 2610 result = matcher2->replaceFirst(&replText, &destText, status); |
2603 REGEX_CHECK_STATUS; | 2611 REGEX_CHECK_STATUS; |
2604 REGEX_ASSERT(result == &destText); | 2612 REGEX_ASSERT(result == &destText); |
2605 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); | 2613 REGEX_ASSERT_UTEXT_UTF8(str_byitselfnogroupnumberdefg, result); |
2606 | 2614 |
2607 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d
, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31,
0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx.
*/ | 2615 unsigned char supplDigitChars[] = { 0x53, 0x75, 0x70, 0x70, 0x6c, 0x65, 0x6d
, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x20, 0x31,
0x20, 0x24, 0x78, 0x78, 0x78, 0x78, 0x2e, 0x00 }; /* Supplemental Digit 1 $xxxx.
*/ |
2608 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001
D7CF, MATHEMATICAL BOLD DIGIT ONE | 2616 //unsigned char supplDigitChars[] = "Supplemental Digit 1 $xxxx."; // \U0001
D7CF, MATHEMATICAL BOLD DIGIT ONE |
2609 // 012345678901234567890123456 | 2617 // 012345678901234567890123456 |
2610 supplDigitChars[22] = 0xF0; | 2618 supplDigitChars[22] = 0xF0; |
2611 supplDigitChars[23] = 0x9D; | 2619 supplDigitChars[23] = 0x9D; |
2612 supplDigitChars[24] = 0x9F; | 2620 supplDigitChars[24] = 0x9F; |
2613 supplDigitChars[25] = 0x8F; | 2621 supplDigitChars[25] = 0x8F; |
2614 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); | 2622 utext_openUTF8(&replText, (char *)supplDigitChars, -1, &status); |
2615 | 2623 |
2616 result = matcher2->replaceFirst(&replText, NULL, status); | 2624 result = matcher2->replaceFirst(&replText, NULL, status); |
2617 REGEX_CHECK_STATUS; | 2625 REGEX_CHECK_STATUS; |
2618 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x
20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa
l Digit 1 bc.defg */ | 2626 const char str_SupplementalDigit1bcdefg[] = { 0x53, 0x75, 0x70, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x6c, 0x20, 0x44, 0x69, 0x67, 0x69, 0x74, 0x
20, 0x31, 0x20, 0x62, 0x63, 0x2e, 0x64, 0x65, 0x66, 0x67, 0x00 }; /* Supplementa
l Digit 1 bc.defg */ |
2619 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); | 2627 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); |
2620 utext_close(result); | 2628 utext_close(result); |
2621 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2629 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2622 result = matcher2->replaceFirst(&replText, &destText, status); | 2630 result = matcher2->replaceFirst(&replText, &destText, status); |
2623 REGEX_CHECK_STATUS; | 2631 REGEX_CHECK_STATUS; |
2624 REGEX_ASSERT(result == &destText); | 2632 REGEX_ASSERT(result == &destText); |
2625 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); | 2633 REGEX_ASSERT_UTEXT_UTF8(str_SupplementalDigit1bcdefg, result); |
2626 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x
61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e
, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /*
bad capture group number $5..." */ | 2634 const char str_badcapturegroupnumber5[] = { 0x62, 0x61, 0x64, 0x20, 0x63, 0x
61, 0x70, 0x74, 0x75, 0x72, 0x65, 0x20, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x20, 0x6e
, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x20, 0x24, 0x35, 0x2e, 0x2e, 0x2e, 0x00 }; /*
bad capture group number $5..." */ |
2627 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); | 2635 utext_openUTF8(&replText, str_badcapturegroupnumber5, -1, &status); |
2628 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status))
, U_INDEX_OUTOFBOUNDS_ERROR); | 2636 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, NULL, status))
, U_INDEX_OUTOFBOUNDS_ERROR); |
2629 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); | 2637 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); |
2630 utext_close(result); | 2638 utext_close(result); |
2631 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; | 2639 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &status)
; |
2632 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta
tus)), U_INDEX_OUTOFBOUNDS_ERROR); | 2640 REGEX_ASSERT_FAIL((result = matcher2->replaceFirst(&replText, &destText, sta
tus)), U_INDEX_OUTOFBOUNDS_ERROR); |
2633 REGEX_ASSERT(result == &destText); | 2641 REGEX_ASSERT(result == &destText); |
2634 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); | 2642 // REGEX_ASSERT_UTEXT_UTF8("abcdefg", result); |
2635 | 2643 |
2636 // | 2644 // |
2637 // Replacement String with \u hex escapes | 2645 // Replacement String with \u hex escapes |
2638 // | 2646 // |
2639 { | 2647 { |
2640 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61
, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a
bc 2 abc 3 */ | 2648 const char str_abc1abc2abc3[] = { 0x61, 0x62, 0x63, 0x20, 0x31, 0x20, 0x61
, 0x62, 0x63, 0x20, 0x32, 0x20, 0x61, 0x62, 0x63, 0x20, 0x33, 0x00 }; /* abc 1 a
bc 2 abc 3 */ |
2641 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33,
0x2d, 0x2d, 0x00 }; /* --\u0043-- */ | 2649 const char str_u0043[] = { 0x2d, 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x33,
0x2d, 0x2d, 0x00 }; /* --\u0043-- */ |
2642 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); | 2650 utext_openUTF8(&dataText, str_abc1abc2abc3, -1, &status); |
2643 utext_openUTF8(&replText, str_u0043, -1, &status); | 2651 utext_openUTF8(&replText, str_u0043, -1, &status); |
2644 matcher->reset(&dataText); | 2652 matcher->reset(&dataText); |
2645 | 2653 |
2646 result = matcher->replaceAll(&replText, NULL, status); | 2654 result = matcher->replaceAll(&replText, NULL, status); |
2647 REGEX_CHECK_STATUS; | 2655 REGEX_CHECK_STATUS; |
2648 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x
20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d
, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ | 2656 const char str_C1C2C3[] = { 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x31, 0x
20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d, 0x20, 0x32, 0x20, 0x2d, 0x2d, 0x43, 0x2d, 0x2d
, 0x20, 0x33, 0x00 }; /* --C-- 1 --C-- 2 --C-- 3 */ |
2649 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); | 2657 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); |
2650 utext_close(result); | 2658 utext_close(result); |
2651 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); | 2659 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); |
2652 result = matcher->replaceAll(&replText, &destText, status); | 2660 result = matcher->replaceAll(&replText, &destText, status); |
2653 REGEX_CHECK_STATUS; | 2661 REGEX_CHECK_STATUS; |
2654 REGEX_ASSERT(result == &destText); | 2662 REGEX_ASSERT(result == &destText); |
2655 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); | 2663 REGEX_ASSERT_UTEXT_UTF8(str_C1C2C3, result); |
2656 } | 2664 } |
2657 { | 2665 { |
2658 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ | 2666 const char str_abc[] = { 0x61, 0x62, 0x63, 0x20, 0x21, 0x00 }; /* abc ! */ |
2659 utext_openUTF8(&dataText, str_abc, -1, &status); | 2667 utext_openUTF8(&dataText, str_abc, -1, &status); |
2660 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30,
0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ | 2668 const char str_U00010000[] = { 0x2d, 0x2d, 0x5c, 0x55, 0x30, 0x30, 0x30,
0x31, 0x30, 0x30, 0x30, 0x30, 0x2d, 0x2d, 0x00 }; /* --\U00010000-- */ |
2661 utext_openUTF8(&replText, str_U00010000, -1, &status); | 2669 utext_openUTF8(&replText, str_U00010000, -1, &status); |
2662 matcher->reset(&dataText); | 2670 matcher->reset(&dataText); |
2663 | 2671 |
2664 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0
x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008
A" | 2672 unsigned char expected[] = { 0x2d, 0x2d, 0x78, 0x78, 0x78, 0x78, 0x2d, 0
x2d, 0x20, 0x21, 0x00 }; /* --xxxx-- ! */ // \U00010000, "LINEAR B SYLLABLE B008
A" |
2665 // 0123456789 | 2673 // 0123456789 |
2666 expected[2] = 0xF0; | 2674 expected[2] = 0xF0; |
2667 expected[3] = 0x90; | 2675 expected[3] = 0x90; |
2668 expected[4] = 0x80; | 2676 expected[4] = 0x80; |
2669 expected[5] = 0x80; | 2677 expected[5] = 0x80; |
2670 | 2678 |
2671 result = matcher->replaceAll(&replText, NULL, status); | 2679 result = matcher->replaceAll(&replText, NULL, status); |
2672 REGEX_CHECK_STATUS; | 2680 REGEX_CHECK_STATUS; |
2673 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); | 2681 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); |
2674 utext_close(result); | 2682 utext_close(result); |
2675 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); | 2683 utext_replace(&destText, 0, utext_nativeLength(&destText), NULL, 0, &sta
tus); |
2676 result = matcher->replaceAll(&replText, &destText, status); | 2684 result = matcher->replaceAll(&replText, &destText, status); |
2677 REGEX_CHECK_STATUS; | 2685 REGEX_CHECK_STATUS; |
2678 REGEX_ASSERT(result == &destText); | 2686 REGEX_ASSERT(result == &destText); |
2679 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); | 2687 REGEX_ASSERT_UTEXT_UTF8((char *)expected, result); |
2680 } | 2688 } |
2681 // TODO: need more through testing of capture substitutions. | 2689 // TODO: need more through testing of capture substitutions. |
2682 | 2690 |
2683 // Bug 4057 | 2691 // Bug 4057 |
2684 // | 2692 // |
2685 { | 2693 { |
2686 status = U_ZERO_ERROR; | 2694 status = U_ZERO_ERROR; |
2687 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65,
0x00 }; /* ss(.*?)ee */ | 2695 const char str_ssee[] = { 0x73, 0x73, 0x28, 0x2e, 0x2a, 0x3f, 0x29, 0x65, 0x65,
0x00 }; /* ss(.*?)ee */ |
2688 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68,
0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x
20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69
, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66,
0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit
h ss and end with ee ss stuff ee fin */ | 2696 const char str_blah[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68,
0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x
20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0x77, 0x69
, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x73, 0x73, 0x20, 0x73, 0x74, 0x75, 0x66,
0x66, 0x20, 0x65, 0x65, 0x20, 0x66, 0x69, 0x6e, 0x00 }; /* The matches start wit
h ss and end with ee ss stuff ee fin */ |
2689 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ | 2697 const char str_ooh[] = { 0x6f, 0x6f, 0x68, 0x00 }; /* ooh */ |
2690 utext_openUTF8(&re, str_ssee, -1, &status); | 2698 utext_openUTF8(&re, str_ssee, -1, &status); |
2691 utext_openUTF8(&dataText, str_blah, -1, &status); | 2699 utext_openUTF8(&dataText, str_blah, -1, &status); |
2692 utext_openUTF8(&replText, str_ooh, -1, &status); | 2700 utext_openUTF8(&replText, str_ooh, -1, &status); |
2693 | 2701 |
2694 RegexMatcher m(&re, 0, status); | 2702 RegexMatcher m(&re, 0, status); |
2695 REGEX_CHECK_STATUS; | 2703 REGEX_CHECK_STATUS; |
2696 | 2704 |
2697 UnicodeString result; | 2705 UnicodeString result; |
2698 UText resultText = UTEXT_INITIALIZER; | 2706 UText resultText = UTEXT_INITIALIZER; |
2699 utext_openUnicodeString(&resultText, &result, &status); | 2707 utext_openUnicodeString(&resultText, &result, &status); |
2700 | 2708 |
2701 // Multiple finds do NOT bump up the previous appendReplacement postion. | 2709 // Multiple finds do NOT bump up the previous appendReplacement postion. |
2702 m.reset(&dataText); | 2710 m.reset(&dataText); |
2703 m.find(); | 2711 m.find(); |
2704 m.find(); | 2712 m.find(); |
2705 m.appendReplacement(&resultText, &replText, status); | 2713 m.appendReplacement(&resultText, &replText, status); |
2706 REGEX_CHECK_STATUS; | 2714 REGEX_CHECK_STATUS; |
(...skipping 20 matching lines...) Expand all Loading... |
2727 m.find(10, status); | 2735 m.find(10, status); |
2728 m.find(); | 2736 m.find(); |
2729 m.appendReplacement(&resultText, &replText, status); | 2737 m.appendReplacement(&resultText, &replText, status); |
2730 REGEX_CHECK_STATUS; | 2738 REGEX_CHECK_STATUS; |
2731 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The
matches start with ss and end with ee ooh */ | 2739 const char str_blah8[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x00 }; /* The
matches start with ss and end with ee ooh */ |
2732 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); | 2740 REGEX_ASSERT_UTEXT_UTF8(str_blah8, &resultText); |
2733 | 2741 |
2734 m.appendTail(&resultText, status); | 2742 m.appendTail(&resultText, status); |
2735 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6
9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ | 2743 const char str_blah9[] = { 0x54, 0x68, 0x65, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x65, 0x73, 0x20, 0x73, 0x74, 0x61, 0x72, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x65, 0x6e, 0x64, 0x20, 0
x77, 0x69, 0x74, 0x68, 0x20, 0x65, 0x65, 0x20, 0x6f, 0x6f, 0x68, 0x20, 0x66, 0x6
9, 0x6e, 0x00 }; /* The matches start with ss and end with ee ooh fin */ |
2736 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); | 2744 REGEX_ASSERT_UTEXT_UTF8(str_blah9, &resultText); |
2737 | 2745 |
2738 utext_close(&resultText); | 2746 utext_close(&resultText); |
2739 } | 2747 } |
2740 | 2748 |
2741 delete matcher2; | 2749 delete matcher2; |
2742 delete pat2; | 2750 delete pat2; |
2743 delete matcher; | 2751 delete matcher; |
2744 delete pat; | 2752 delete pat; |
2745 | 2753 |
2746 utext_close(&dataText); | 2754 utext_close(&dataText); |
2747 utext_close(&replText); | 2755 utext_close(&replText); |
2748 utext_close(&destText); | 2756 utext_close(&destText); |
2749 utext_close(&re); | 2757 utext_close(&re); |
2750 } | 2758 } |
2751 | 2759 |
2752 | 2760 |
2753 //--------------------------------------------------------------------------- | 2761 //--------------------------------------------------------------------------- |
2754 // | 2762 // |
2755 // API_Pattern_UTF8 Test that the API for class RegexPattern is | 2763 // API_Pattern_UTF8 Test that the API for class RegexPattern is |
2756 // present and nominally working. | 2764 // present and nominally working. |
2757 // | 2765 // |
2758 //--------------------------------------------------------------------------- | 2766 //--------------------------------------------------------------------------- |
2759 void RegexTest::API_Pattern_UTF8() { | 2767 void RegexTest::API_Pattern_UTF8() { |
2760 RegexPattern pata; // Test default constructor to not crash. | 2768 RegexPattern pata; // Test default constructor to not crash. |
2761 RegexPattern patb; | 2769 RegexPattern patb; |
2762 | 2770 |
2763 REGEX_ASSERT(pata == patb); | 2771 REGEX_ASSERT(pata == patb); |
2764 REGEX_ASSERT(pata == pata); | 2772 REGEX_ASSERT(pata == pata); |
2765 | 2773 |
2766 UText re1 = UTEXT_INITIALIZER; | 2774 UText re1 = UTEXT_INITIALIZER; |
2767 UText re2 = UTEXT_INITIALIZER; | 2775 UText re2 = UTEXT_INITIALIZER; |
2768 UErrorCode status = U_ZERO_ERROR; | 2776 UErrorCode status = U_ZERO_ERROR; |
2769 UParseError pe; | 2777 UParseError pe; |
2770 | 2778 |
2771 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d,
0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ | 2779 const char str_abcalmz[] = { 0x61, 0x62, 0x63, 0x5b, 0x61, 0x2d, 0x6c, 0x5d,
0x5b, 0x6d, 0x2d, 0x7a, 0x5d, 0x00 }; /* abc[a-l][m-z] */ |
2772 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ | 2780 const char str_def[] = { 0x64, 0x65, 0x66, 0x00 }; /* def */ |
2773 utext_openUTF8(&re1, str_abcalmz, -1, &status); | 2781 utext_openUTF8(&re1, str_abcalmz, -1, &status); |
2774 utext_openUTF8(&re2, str_def, -1, &status); | 2782 utext_openUTF8(&re2, str_def, -1, &status); |
2775 | 2783 |
2776 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); | 2784 RegexPattern *pat1 = RegexPattern::compile(&re1, 0, pe, status); |
2777 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); | 2785 RegexPattern *pat2 = RegexPattern::compile(&re2, 0, pe, status); |
2778 REGEX_CHECK_STATUS; | 2786 REGEX_CHECK_STATUS; |
2779 REGEX_ASSERT(*pat1 == *pat1); | 2787 REGEX_ASSERT(*pat1 == *pat1); |
2780 REGEX_ASSERT(*pat1 != pata); | 2788 REGEX_ASSERT(*pat1 != pata); |
(...skipping 28 matching lines...) Expand all Loading... |
2809 | 2817 |
2810 // clone | 2818 // clone |
2811 RegexPattern *pat1c = pat1->clone(); | 2819 RegexPattern *pat1c = pat1->clone(); |
2812 REGEX_ASSERT(*pat1c == *pat1); | 2820 REGEX_ASSERT(*pat1c == *pat1); |
2813 REGEX_ASSERT(*pat1c != *pat2); | 2821 REGEX_ASSERT(*pat1c != *pat2); |
2814 | 2822 |
2815 delete pat1c; | 2823 delete pat1c; |
2816 delete pat1a; | 2824 delete pat1a; |
2817 delete pat1; | 2825 delete pat1; |
2818 delete pat2; | 2826 delete pat2; |
2819 | 2827 |
2820 utext_close(&re1); | 2828 utext_close(&re1); |
2821 utext_close(&re2); | 2829 utext_close(&re2); |
2822 | 2830 |
2823 | 2831 |
2824 // | 2832 // |
2825 // Verify that a matcher created from a cloned pattern works. | 2833 // Verify that a matcher created from a cloned pattern works. |
2826 // (Jitterbug 3423) | 2834 // (Jitterbug 3423) |
2827 // | 2835 // |
2828 { | 2836 { |
2829 UErrorCode status = U_ZERO_ERROR; | 2837 UErrorCode status = U_ZERO_ERROR; |
2830 UText pattern = UTEXT_INITIALIZER; | 2838 UText pattern = UTEXT_INITIALIZER; |
2831 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \
p{L}+ */ | 2839 const char str_pL[] = { 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x2b, 0x00 }; /* \
p{L}+ */ |
2832 utext_openUTF8(&pattern, str_pL, -1, &status); | 2840 utext_openUTF8(&pattern, str_pL, -1, &status); |
2833 | 2841 |
2834 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); | 2842 RegexPattern *pSource = RegexPattern::compile(&pattern, 0, status); |
2835 RegexPattern *pClone = pSource->clone(); | 2843 RegexPattern *pClone = pSource->clone(); |
2836 delete pSource; | 2844 delete pSource; |
2837 RegexMatcher *mFromClone = pClone->matcher(status); | 2845 RegexMatcher *mFromClone = pClone->matcher(status); |
2838 REGEX_CHECK_STATUS; | 2846 REGEX_CHECK_STATUS; |
2839 | 2847 |
2840 UText input = UTEXT_INITIALIZER; | 2848 UText input = UTEXT_INITIALIZER; |
2841 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57
, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ | 2849 const char str_HelloWorld[] = { 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x57
, 0x6f, 0x72, 0x6c, 0x64, 0x00 }; /* Hello World */ |
2842 utext_openUTF8(&input, str_HelloWorld, -1, &status); | 2850 utext_openUTF8(&input, str_HelloWorld, -1, &status); |
2843 mFromClone->reset(&input); | 2851 mFromClone->reset(&input); |
2844 REGEX_ASSERT(mFromClone->find() == TRUE); | 2852 REGEX_ASSERT(mFromClone->find() == TRUE); |
2845 REGEX_ASSERT(mFromClone->group(status) == "Hello"); | 2853 REGEX_ASSERT(mFromClone->group(status) == "Hello"); |
2846 REGEX_ASSERT(mFromClone->find() == TRUE); | 2854 REGEX_ASSERT(mFromClone->find() == TRUE); |
2847 REGEX_ASSERT(mFromClone->group(status) == "World"); | 2855 REGEX_ASSERT(mFromClone->group(status) == "World"); |
2848 REGEX_ASSERT(mFromClone->find() == FALSE); | 2856 REGEX_ASSERT(mFromClone->find() == FALSE); |
2849 delete mFromClone; | 2857 delete mFromClone; |
2850 delete pClone; | 2858 delete pClone; |
2851 | 2859 |
2852 utext_close(&input); | 2860 utext_close(&input); |
2853 utext_close(&pattern); | 2861 utext_close(&pattern); |
2854 } | 2862 } |
2855 | 2863 |
2856 // | 2864 // |
2857 // matches convenience API | 2865 // matches convenience API |
2858 // | 2866 // |
2859 { | 2867 { |
2860 UErrorCode status = U_ZERO_ERROR; | 2868 UErrorCode status = U_ZERO_ERROR; |
2861 UText pattern = UTEXT_INITIALIZER; | 2869 UText pattern = UTEXT_INITIALIZER; |
2862 UText input = UTEXT_INITIALIZER; | 2870 UText input = UTEXT_INITIALIZER; |
2863 | 2871 |
2864 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2
0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ | 2872 const char str_randominput[] = { 0x72, 0x61, 0x6e, 0x64, 0x6f, 0x6d, 0x2
0, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /* random input */ |
2865 utext_openUTF8(&input, str_randominput, -1, &status); | 2873 utext_openUTF8(&input, str_randominput, -1, &status); |
2866 | 2874 |
2867 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ | 2875 const char str_dotstar[] = { 0x2e, 0x2a, 0x00 }; /* .* */ |
2868 utext_openUTF8(&pattern, str_dotstar, -1, &status); | 2876 utext_openUTF8(&pattern, str_dotstar, -1, &status); |
2869 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE
); | 2877 REGEX_ASSERT(RegexPattern::matches(&pattern, &input, pe, status) == TRUE
); |
2870 REGEX_CHECK_STATUS; | 2878 REGEX_CHECK_STATUS; |
2871 | 2879 |
2872 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ | 2880 const char str_abc[] = { 0x61, 0x62, 0x63, 0x00 }; /* abc */ |
2873 utext_openUTF8(&pattern, str_abc, -1, &status); | 2881 utext_openUTF8(&pattern, str_abc, -1, &status); |
2874 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) ==
FALSE); | 2882 REGEX_ASSERT(RegexPattern::matches("abc", "random input", pe, status) ==
FALSE); |
2875 REGEX_CHECK_STATUS; | 2883 REGEX_CHECK_STATUS; |
2876 | 2884 |
2877 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /*
.*nput */ | 2885 const char str_nput[] = { 0x2e, 0x2a, 0x6e, 0x70, 0x75, 0x74, 0x00 }; /*
.*nput */ |
2878 utext_openUTF8(&pattern, str_nput, -1, &status); | 2886 utext_openUTF8(&pattern, str_nput, -1, &status); |
2879 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status)
== TRUE); | 2887 REGEX_ASSERT(RegexPattern::matches(".*nput", "random input", pe, status)
== TRUE); |
2880 REGEX_CHECK_STATUS; | 2888 REGEX_CHECK_STATUS; |
2881 | 2889 |
2882 utext_openUTF8(&pattern, str_randominput, -1, &status); | 2890 utext_openUTF8(&pattern, str_randominput, -1, &status); |
2883 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s
tatus) == TRUE); | 2891 REGEX_ASSERT(RegexPattern::matches("random input", "random input", pe, s
tatus) == TRUE); |
2884 REGEX_CHECK_STATUS; | 2892 REGEX_CHECK_STATUS; |
2885 | 2893 |
2886 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ | 2894 const char str_u[] = { 0x2e, 0x2a, 0x75, 0x00 }; /* .*u */ |
2887 utext_openUTF8(&pattern, str_u, -1, &status); | 2895 utext_openUTF8(&pattern, str_u, -1, &status); |
2888 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) ==
FALSE); | 2896 REGEX_ASSERT(RegexPattern::matches(".*u", "random input", pe, status) ==
FALSE); |
2889 REGEX_CHECK_STATUS; | 2897 REGEX_CHECK_STATUS; |
2890 | 2898 |
2891 utext_openUTF8(&input, str_abc, -1, &status); | 2899 utext_openUTF8(&input, str_abc, -1, &status); |
2892 utext_openUTF8(&pattern, str_abc, -1, &status); | 2900 utext_openUTF8(&pattern, str_abc, -1, &status); |
2893 status = U_INDEX_OUTOFBOUNDS_ERROR; | 2901 status = U_INDEX_OUTOFBOUNDS_ERROR; |
2894 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); | 2902 REGEX_ASSERT(RegexPattern::matches("abc", "abc", pe, status) == FALSE); |
2895 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 2903 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
2896 | 2904 |
2897 utext_close(&input); | 2905 utext_close(&input); |
2898 utext_close(&pattern); | 2906 utext_close(&pattern); |
2899 } | 2907 } |
2900 | 2908 |
2901 | 2909 |
2902 // | 2910 // |
2903 // Split() | 2911 // Split() |
2904 // | 2912 // |
2905 status = U_ZERO_ERROR; | 2913 status = U_ZERO_ERROR; |
2906 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ | 2914 const char str_spaceplus[] = { 0x20, 0x2b, 0x00 }; /* + */ |
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3277 } | 3285 } |
3278 | 3286 |
3279 | 3287 |
3280 void RegexTest::regex_find(const UnicodeString &pattern, | 3288 void RegexTest::regex_find(const UnicodeString &pattern, |
3281 const UnicodeString &flags, | 3289 const UnicodeString &flags, |
3282 const UnicodeString &inputString, | 3290 const UnicodeString &inputString, |
3283 const char *srcPath, | 3291 const char *srcPath, |
3284 int32_t line) { | 3292 int32_t line) { |
3285 UnicodeString unEscapedInput; | 3293 UnicodeString unEscapedInput; |
3286 UnicodeString deTaggedInput; | 3294 UnicodeString deTaggedInput; |
3287 | 3295 |
3288 int32_t patternUTF8Length, inputUTF8Length; | 3296 int32_t patternUTF8Length, inputUTF8Length; |
3289 char *patternChars = NULL, *inputChars = NULL; | 3297 char *patternChars = NULL, *inputChars = NULL; |
3290 UText patternText = UTEXT_INITIALIZER; | 3298 UText patternText = UTEXT_INITIALIZER; |
3291 UText inputText = UTEXT_INITIALIZER; | 3299 UText inputText = UTEXT_INITIALIZER; |
3292 UConverter *UTF8Converter = NULL; | 3300 UConverter *UTF8Converter = NULL; |
3293 | 3301 |
3294 UErrorCode status = U_ZERO_ERROR; | 3302 UErrorCode status = U_ZERO_ERROR; |
3295 UParseError pe; | 3303 UParseError pe; |
3296 RegexPattern *parsePat = NULL; | 3304 RegexPattern *parsePat = NULL; |
3297 RegexMatcher *parseMatcher = NULL; | 3305 RegexMatcher *parseMatcher = NULL; |
3298 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; | 3306 RegexPattern *callerPattern = NULL, *UTF8Pattern = NULL; |
3299 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; | 3307 RegexMatcher *matcher = NULL, *UTF8Matcher = NULL; |
3300 UVector groupStarts(status); | 3308 UVector groupStarts(status); |
3301 UVector groupEnds(status); | 3309 UVector groupEnds(status); |
3302 UVector groupStartsUTF8(status); | 3310 UVector groupStartsUTF8(status); |
3303 UVector groupEndsUTF8(status); | 3311 UVector groupEndsUTF8(status); |
3304 UBool isMatch = FALSE, isUTF8Match = FALSE; | 3312 UBool isMatch = FALSE, isUTF8Match = FALSE; |
3305 UBool failed = FALSE; | 3313 UBool failed = FALSE; |
3306 int32_t numFinds; | 3314 int32_t numFinds; |
3307 int32_t i; | 3315 int32_t i; |
3308 UBool useMatchesFunc = FALSE; | 3316 UBool useMatchesFunc = FALSE; |
3309 UBool useLookingAtFunc = FALSE; | 3317 UBool useLookingAtFunc = FALSE; |
3310 int32_t regionStart = -1; | 3318 int32_t regionStart = -1; |
3311 int32_t regionEnd = -1; | 3319 int32_t regionEnd = -1; |
3312 int32_t regionStartUTF8 = -1; | 3320 int32_t regionStartUTF8 = -1; |
3313 int32_t regionEndUTF8 = -1; | 3321 int32_t regionEndUTF8 = -1; |
3314 | 3322 |
3315 | 3323 |
3316 // | 3324 // |
3317 // Compile the caller's pattern | 3325 // Compile the caller's pattern |
3318 // | 3326 // |
3319 uint32_t bflags = 0; | 3327 uint32_t bflags = 0; |
3320 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag | 3328 if (flags.indexOf((UChar)0x69) >= 0) { // 'i' flag |
3321 bflags |= UREGEX_CASE_INSENSITIVE; | 3329 bflags |= UREGEX_CASE_INSENSITIVE; |
3322 } | 3330 } |
3323 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag | 3331 if (flags.indexOf((UChar)0x78) >= 0) { // 'x' flag |
3324 bflags |= UREGEX_COMMENTS; | 3332 bflags |= UREGEX_COMMENTS; |
3325 } | 3333 } |
3326 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag | 3334 if (flags.indexOf((UChar)0x73) >= 0) { // 's' flag |
3327 bflags |= UREGEX_DOTALL; | 3335 bflags |= UREGEX_DOTALL; |
3328 } | 3336 } |
3329 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag | 3337 if (flags.indexOf((UChar)0x6d) >= 0) { // 'm' flag |
3330 bflags |= UREGEX_MULTILINE; | 3338 bflags |= UREGEX_MULTILINE; |
3331 } | 3339 } |
3332 | 3340 |
3333 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag | 3341 if (flags.indexOf((UChar)0x65) >= 0) { // 'e' flag |
3334 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; | 3342 bflags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES; |
3335 } | 3343 } |
3336 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag | 3344 if (flags.indexOf((UChar)0x44) >= 0) { // 'D' flag |
3337 bflags |= UREGEX_UNIX_LINES; | 3345 bflags |= UREGEX_UNIX_LINES; |
3338 } | 3346 } |
3339 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag | 3347 if (flags.indexOf((UChar)0x51) >= 0) { // 'Q' flag |
3340 bflags |= UREGEX_LITERAL; | 3348 bflags |= UREGEX_LITERAL; |
3341 } | 3349 } |
3342 | 3350 |
(...skipping 15 matching lines...) Expand all Loading... |
3358 goto cleanupAndReturn; | 3366 goto cleanupAndReturn; |
3359 } else { | 3367 } else { |
3360 // Unexpected pattern compilation error. | 3368 // Unexpected pattern compilation error. |
3361 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(
status)); | 3369 dataerrln("Line %d: error %s compiling pattern.", line, u_errorName(
status)); |
3362 goto cleanupAndReturn; | 3370 goto cleanupAndReturn; |
3363 } | 3371 } |
3364 } | 3372 } |
3365 | 3373 |
3366 UTF8Converter = ucnv_open("UTF8", &status); | 3374 UTF8Converter = ucnv_open("UTF8", &status); |
3367 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
NULL, &status); | 3375 ucnv_setFromUCallBack(UTF8Converter, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
NULL, &status); |
3368 | 3376 |
3369 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); | 3377 patternUTF8Length = pattern.extract(NULL, 0, UTF8Converter, status); |
3370 status = U_ZERO_ERROR; // buffer overflow | 3378 status = U_ZERO_ERROR; // buffer overflow |
3371 patternChars = new char[patternUTF8Length+1]; | 3379 patternChars = new char[patternUTF8Length+1]; |
3372 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); | 3380 pattern.extract(patternChars, patternUTF8Length+1, UTF8Converter, status); |
3373 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); | 3381 utext_openUTF8(&patternText, patternChars, patternUTF8Length, &status); |
3374 | 3382 |
3375 if (status == U_ZERO_ERROR) { | 3383 if (status == U_ZERO_ERROR) { |
3376 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); | 3384 UTF8Pattern = RegexPattern::compile(&patternText, bflags, pe, status); |
3377 | 3385 |
3378 if (status != U_ZERO_ERROR) { | 3386 if (status != U_ZERO_ERROR) { |
3379 #if UCONFIG_NO_BREAK_ITERATION==1 | 3387 #if UCONFIG_NO_BREAK_ITERATION==1 |
3380 // 'v' test flag means that the test pattern should not compile if I
CU was configured | 3388 // 'v' test flag means that the test pattern should not compile if I
CU was configured |
3381 // to not include break iteration. RBBI is needed for Unicode w
ord boundaries. | 3389 // to not include break iteration. RBBI is needed for Unicode w
ord boundaries. |
3382 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT
ED_ERROR) { | 3390 if (flags.indexOf((UChar)0x76) >= 0 /*'v'*/ && status == U_UNSUPPORT
ED_ERROR) { |
3383 goto cleanupAndReturn; | 3391 goto cleanupAndReturn; |
3384 } | 3392 } |
3385 #endif | 3393 #endif |
3386 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' | 3394 if (flags.indexOf((UChar)0x45) >= 0) { // flags contain 'E' |
3387 // Expected pattern compilation error. | 3395 // Expected pattern compilation error. |
3388 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' | 3396 if (flags.indexOf((UChar)0x64) >= 0) { // flags contain 'd' |
3389 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s
tatus)); | 3397 logln("Pattern Compile returns \"%s\" (UTF8)", u_errorName(s
tatus)); |
3390 } | 3398 } |
3391 goto cleanupAndReturn; | 3399 goto cleanupAndReturn; |
3392 } else { | 3400 } else { |
3393 // Unexpected pattern compilation error. | 3401 // Unexpected pattern compilation error. |
3394 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err
orName(status)); | 3402 errln("Line %d: error %s compiling pattern. (UTF8)", line, u_err
orName(status)); |
3395 goto cleanupAndReturn; | 3403 goto cleanupAndReturn; |
3396 } | 3404 } |
3397 } | 3405 } |
3398 } | 3406 } |
3399 | 3407 |
3400 if (UTF8Pattern == NULL) { | 3408 if (UTF8Pattern == NULL) { |
3401 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en without being a failure of the engine | 3409 // UTF-8 does not allow unpaired surrogates, so this could actually happ
en without being a failure of the engine |
3402 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d",
srcPath, line); | 3410 logln("Unable to create UTF-8 pattern, skipping UTF-8 tests for %s:%d",
srcPath, line); |
3403 status = U_ZERO_ERROR; | 3411 status = U_ZERO_ERROR; |
3404 } | 3412 } |
3405 | 3413 |
3406 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag | 3414 if (flags.indexOf((UChar)0x64) >= 0) { // 'd' flag |
3407 RegexPatternDump(callerPattern); | 3415 callerPattern->dumpPattern(); |
3408 } | 3416 } |
3409 | 3417 |
3410 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag | 3418 if (flags.indexOf((UChar)0x45) >= 0) { // 'E' flag |
3411 errln("%s, Line %d: Expected, but did not get, a pattern compilation err
or.", srcPath, line); | 3419 errln("%s, Line %d: Expected, but did not get, a pattern compilation err
or.", srcPath, line); |
3412 goto cleanupAndReturn; | 3420 goto cleanupAndReturn; |
3413 } | 3421 } |
3414 | 3422 |
3415 | 3423 |
3416 // | 3424 // |
3417 // Number of times find() should be called on the test string, default to 1 | 3425 // Number of times find() should be called on the test string, default to 1 |
3418 // | 3426 // |
3419 numFinds = 1; | 3427 numFinds = 1; |
3420 for (i=2; i<=9; i++) { | 3428 for (i=2; i<=9; i++) { |
3421 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag | 3429 if (flags.indexOf((UChar)(0x30 + i)) >= 0) { // digit flag |
3422 if (numFinds != 1) { | 3430 if (numFinds != 1) { |
3423 errln("Line %d: more than one digit flag. Scanning %d.", line,
i); | 3431 errln("Line %d: more than one digit flag. Scanning %d.", line,
i); |
3424 goto cleanupAndReturn; | 3432 goto cleanupAndReturn; |
3425 } | 3433 } |
3426 numFinds = i; | 3434 numFinds = i; |
3427 } | 3435 } |
3428 } | 3436 } |
3429 | 3437 |
3430 // 'M' flag. Use matches() instead of find() | 3438 // 'M' flag. Use matches() instead of find() |
3431 if (flags.indexOf((UChar)0x4d) >= 0) { | 3439 if (flags.indexOf((UChar)0x4d) >= 0) { |
3432 useMatchesFunc = TRUE; | 3440 useMatchesFunc = TRUE; |
3433 } | 3441 } |
3434 if (flags.indexOf((UChar)0x4c) >= 0) { | 3442 if (flags.indexOf((UChar)0x4c) >= 0) { |
3435 useLookingAtFunc = TRUE; | 3443 useLookingAtFunc = TRUE; |
3436 } | 3444 } |
3437 | 3445 |
3438 // | 3446 // |
3439 // Find the tags in the input data, remove them, and record the group bound
ary | 3447 // Find the tags in the input data, remove them, and record the group bound
ary |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3474 } | 3482 } |
3475 | 3483 |
3476 // | 3484 // |
3477 // Configure the matcher according to the flags specified with this test. | 3485 // Configure the matcher according to the flags specified with this test. |
3478 // | 3486 // |
3479 matcher = callerPattern->matcher(deTaggedInput, status); | 3487 matcher = callerPattern->matcher(deTaggedInput, status); |
3480 REGEX_CHECK_STATUS_L(line); | 3488 REGEX_CHECK_STATUS_L(line); |
3481 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag | 3489 if (flags.indexOf((UChar)0x74) >= 0) { // 't' trace flag |
3482 matcher->setTrace(TRUE); | 3490 matcher->setTrace(TRUE); |
3483 } | 3491 } |
3484 | 3492 |
3485 if (UTF8Pattern != NULL) { | 3493 if (UTF8Pattern != NULL) { |
3486 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); | 3494 inputUTF8Length = deTaggedInput.extract(NULL, 0, UTF8Converter, status); |
3487 status = U_ZERO_ERROR; // buffer overflow | 3495 status = U_ZERO_ERROR; // buffer overflow |
3488 inputChars = new char[inputUTF8Length+1]; | 3496 inputChars = new char[inputUTF8Length+1]; |
3489 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat
us); | 3497 deTaggedInput.extract(inputChars, inputUTF8Length+1, UTF8Converter, stat
us); |
3490 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); | 3498 utext_openUTF8(&inputText, inputChars, inputUTF8Length, &status); |
3491 | 3499 |
3492 if (status == U_ZERO_ERROR) { | 3500 if (status == U_ZERO_ERROR) { |
3493 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); | 3501 UTF8Matcher = &UTF8Pattern->matcher(status)->reset(&inputText); |
3494 REGEX_CHECK_STATUS_L(line); | 3502 REGEX_CHECK_STATUS_L(line); |
3495 } | 3503 } |
3496 | 3504 |
3497 if (UTF8Matcher == NULL) { | 3505 if (UTF8Matcher == NULL) { |
3498 // UTF-8 does not allow unpaired surrogates, so this could actually
happen without being a failure of the engine | 3506 // UTF-8 does not allow unpaired surrogates, so this could actually
happen without being a failure of the engine |
3499 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d"
, srcPath, line); | 3507 logln("Unable to create UTF-8 matcher, skipping UTF-8 tests for %s:%d"
, srcPath, line); |
3500 status = U_ZERO_ERROR; | 3508 status = U_ZERO_ERROR; |
3501 } | 3509 } |
3502 } | 3510 } |
3503 | 3511 |
3504 // | 3512 // |
3505 // Generate native indices for UTF8 versions of region and capture group in
fo | 3513 // Generate native indices for UTF8 versions of region and capture group in
fo |
3506 // | 3514 // |
3507 if (UTF8Matcher != NULL) { | 3515 if (UTF8Matcher != NULL) { |
3508 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar
t, regionStartUTF8); | 3516 if (regionStart>=0) (void) utextOffsetToNative(&inputText, regionStar
t, regionStartUTF8); |
3509 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd,
regionEndUTF8); | 3517 if (regionEnd>=0) (void) utextOffsetToNative(&inputText, regionEnd,
regionEndUTF8); |
3510 | 3518 |
3511 // Fill out the native index UVector info. | 3519 // Fill out the native index UVector info. |
3512 // Only need 1 loop, from above we know groupStarts.size() = groupEnds.
size() | 3520 // Only need 1 loop, from above we know groupStarts.size() = groupEnds.
size() |
3513 for (i=0; i<groupStarts.size(); i++) { | 3521 for (i=0; i<groupStarts.size(); i++) { |
3514 int32_t start = groupStarts.elementAti(i); | 3522 int32_t start = groupStarts.elementAti(i); |
3515 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting | 3523 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting |
3516 if (start >= 0) { | 3524 if (start >= 0) { |
3517 int32_t startUTF8; | 3525 int32_t startUTF8; |
3518 if (!utextOffsetToNative(&inputText, start, startUTF8)) { | 3526 if (!utextOffsetToNative(&inputText, start, startUTF8)) { |
3519 errln("Error at line %d: could not find native index for gro
up start %d. UTF16 index %d", line, i, start); | 3527 errln("Error at line %d: could not find native index for gro
up start %d. UTF16 index %d", line, i, start); |
3520 failed = TRUE; | 3528 failed = TRUE; |
3521 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. | 3529 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. |
3522 } | 3530 } |
3523 setInt(groupStartsUTF8, startUTF8, i); | 3531 setInt(groupStartsUTF8, startUTF8, i); |
3524 } | 3532 } |
3525 | 3533 |
3526 int32_t end = groupEnds.elementAti(i); | 3534 int32_t end = groupEnds.elementAti(i); |
3527 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting | 3535 // -1 means there was no UVector slot and we won't be requesting th
at capture group for this test, don't bother inserting |
3528 if (end >= 0) { | 3536 if (end >= 0) { |
3529 int32_t endUTF8; | 3537 int32_t endUTF8; |
3530 if (!utextOffsetToNative(&inputText, end, endUTF8)) { | 3538 if (!utextOffsetToNative(&inputText, end, endUTF8)) { |
3531 errln("Error at line %d: could not find native index for gro
up end %d. UTF16 index %d", line, i, end); | 3539 errln("Error at line %d: could not find native index for gro
up end %d. UTF16 index %d", line, i, end); |
3532 failed = TRUE; | 3540 failed = TRUE; |
3533 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. | 3541 goto cleanupAndReturn; // Good chance of subsequent bogus e
rrors. Stop now. |
3534 } | 3542 } |
3535 setInt(groupEndsUTF8, endUTF8, i); | 3543 setInt(groupEndsUTF8, endUTF8, i); |
(...skipping 14 matching lines...) Expand all Loading... |
3550 if (UTF8Matcher != NULL) { | 3558 if (UTF8Matcher != NULL) { |
3551 UTF8Matcher->useAnchoringBounds(FALSE); | 3559 UTF8Matcher->useAnchoringBounds(FALSE); |
3552 } | 3560 } |
3553 } | 3561 } |
3554 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag | 3562 if (flags.indexOf((UChar)0x62) >= 0) { // 'b' transparent bounds flag |
3555 matcher->useTransparentBounds(TRUE); | 3563 matcher->useTransparentBounds(TRUE); |
3556 if (UTF8Matcher != NULL) { | 3564 if (UTF8Matcher != NULL) { |
3557 UTF8Matcher->useTransparentBounds(TRUE); | 3565 UTF8Matcher->useTransparentBounds(TRUE); |
3558 } | 3566 } |
3559 } | 3567 } |
3560 | 3568 |
3561 | 3569 |
3562 | 3570 |
3563 // | 3571 // |
3564 // Do a find on the de-tagged input using the caller's pattern | 3572 // Do a find on the de-tagged input using the caller's pattern |
3565 // TODO: error on count>1 and not find(). | 3573 // TODO: error on count>1 and not find(). |
3566 // error on both matches() and lookingAt(). | 3574 // error on both matches() and lookingAt(). |
3567 // | 3575 // |
3568 for (i=0; i<numFinds; i++) { | 3576 for (i=0; i<numFinds; i++) { |
3569 if (useMatchesFunc) { | 3577 if (useMatchesFunc) { |
3570 isMatch = matcher->matches(status); | 3578 isMatch = matcher->matches(status); |
3571 if (UTF8Matcher != NULL) { | 3579 if (UTF8Matcher != NULL) { |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3626 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d", | 3634 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d", |
3627 line, i, expectedStart, matcher->start(i, status)); | 3635 line, i, expectedStart, matcher->start(i, status)); |
3628 failed = TRUE; | 3636 failed = TRUE; |
3629 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. | 3637 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. |
3630 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec
tedStartUTF8) { | 3638 } else if (UTF8Matcher != NULL && UTF8Matcher->start(i, status) != expec
tedStartUTF8) { |
3631 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d (UTF8)", | 3639 errln("Error at line %d: incorrect start position for group %d. Exp
ected %d, got %d (UTF8)", |
3632 line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); | 3640 line, i, expectedStartUTF8, UTF8Matcher->start(i, status)); |
3633 failed = TRUE; | 3641 failed = TRUE; |
3634 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. | 3642 goto cleanupAndReturn; // Good chance of subsequent bogus errors.
Stop now. |
3635 } | 3643 } |
3636 | 3644 |
3637 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti
(i)); | 3645 int32_t expectedEnd = (i >= groupEnds.size()? -1 : groupEnds.elementAti
(i)); |
3638 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF
8.elementAti(i)); | 3646 int32_t expectedEndUTF8 = (i >= groupEndsUTF8.size()? -1 : groupEndsUTF
8.elementAti(i)); |
3639 if (matcher->end(i, status) != expectedEnd) { | 3647 if (matcher->end(i, status) != expectedEnd) { |
3640 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d", | 3648 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d", |
3641 line, i, expectedEnd, matcher->end(i, status)); | 3649 line, i, expectedEnd, matcher->end(i, status)); |
3642 failed = TRUE; | 3650 failed = TRUE; |
3643 // Error on end position; keep going; real error is probably yet to
come as group | 3651 // Error on end position; keep going; real error is probably yet to
come as group |
3644 // end positions work from end of the input data towards the front
. | 3652 // end positions work from end of the input data towards the front
. |
3645 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte
dEndUTF8) { | 3653 } else if (UTF8Matcher != NULL && UTF8Matcher->end(i, status) != expecte
dEndUTF8) { |
3646 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d (UTF8)", | 3654 errln("Error at line %d: incorrect end position for group %d. Expec
ted %d, got %d (UTF8)", |
(...skipping 16 matching lines...) Expand all Loading... |
3663 | 3671 |
3664 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa
lse | 3672 if ((flags.indexOf((UChar)0x59) >= 0) && // 'Y' flag: RequireEnd() == fa
lse |
3665 matcher->requireEnd() == TRUE) { | 3673 matcher->requireEnd() == TRUE) { |
3666 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l
ine); | 3674 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE", l
ine); |
3667 failed = TRUE; | 3675 failed = TRUE; |
3668 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && //
'Y' flag: RequireEnd() == false | 3676 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x59) >= 0) && //
'Y' flag: RequireEnd() == false |
3669 UTF8Matcher->requireEnd() == TRUE) { | 3677 UTF8Matcher->requireEnd() == TRUE) { |
3670 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT
F8)", line); | 3678 errln("Error at line %d: requireEnd() returned TRUE. Expected FALSE (UT
F8)", line); |
3671 failed = TRUE; | 3679 failed = TRUE; |
3672 } | 3680 } |
3673 | 3681 |
3674 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr
ue | 3682 if ((flags.indexOf((UChar)0x79) >= 0) && // 'y' flag: RequireEnd() == tr
ue |
3675 matcher->requireEnd() == FALSE) { | 3683 matcher->requireEnd() == FALSE) { |
3676 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l
ine); | 3684 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE", l
ine); |
3677 failed = TRUE; | 3685 failed = TRUE; |
3678 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && //
'Y' flag: RequireEnd() == false | 3686 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x79) >= 0) && //
'Y' flag: RequireEnd() == false |
3679 UTF8Matcher->requireEnd() == FALSE) { | 3687 UTF8Matcher->requireEnd() == FALSE) { |
3680 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT
F8)", line); | 3688 errln("Error at line %d: requireEnd() returned FALSE. Expected TRUE (UT
F8)", line); |
3681 failed = TRUE; | 3689 failed = TRUE; |
3682 } | 3690 } |
3683 | 3691 |
3684 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false | 3692 if ((flags.indexOf((UChar)0x5A) >= 0) && // 'Z' flag: hitEnd() == false |
3685 matcher->hitEnd() == TRUE) { | 3693 matcher->hitEnd() == TRUE) { |
3686 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line)
; | 3694 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE", line)
; |
3687 failed = TRUE; | 3695 failed = TRUE; |
3688 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && //
'Z' flag: hitEnd() == false | 3696 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x5A) >= 0) && //
'Z' flag: hitEnd() == false |
3689 UTF8Matcher->hitEnd() == TRUE) { | 3697 UTF8Matcher->hitEnd() == TRUE) { |
3690 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)"
, line); | 3698 errln("Error at line %d: hitEnd() returned TRUE. Expected FALSE (UTF8)"
, line); |
3691 failed = TRUE; | 3699 failed = TRUE; |
3692 } | 3700 } |
3693 | 3701 |
3694 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true | 3702 if ((flags.indexOf((UChar)0x7A) >= 0) && // 'z' flag: hitEnd() == true |
3695 matcher->hitEnd() == FALSE) { | 3703 matcher->hitEnd() == FALSE) { |
3696 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line)
; | 3704 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE", line)
; |
3697 failed = TRUE; | 3705 failed = TRUE; |
3698 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && //
'z' flag: hitEnd() == true | 3706 } else if (UTF8Matcher != NULL && (flags.indexOf((UChar)0x7A) >= 0) && //
'z' flag: hitEnd() == true |
3699 UTF8Matcher->hitEnd() == FALSE) { | 3707 UTF8Matcher->hitEnd() == FALSE) { |
3700 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)"
, line); | 3708 errln("Error at line %d: hitEnd() returned FALSE. Expected TRUE (UTF8)"
, line); |
3701 failed = TRUE; | 3709 failed = TRUE; |
3702 } | 3710 } |
3703 | 3711 |
3704 | 3712 |
3705 cleanupAndReturn: | 3713 cleanupAndReturn: |
3706 if (failed) { | 3714 if (failed) { |
3707 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " | 3715 infoln((UnicodeString)"\""+pattern+(UnicodeString)"\" " |
3708 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); | 3716 +flags+(UnicodeString)" \""+inputString+(UnicodeString)"\""); |
3709 // callerPattern->dump(); | 3717 // callerPattern->dump(); |
3710 } | 3718 } |
3711 delete parseMatcher; | 3719 delete parseMatcher; |
3712 delete parsePat; | 3720 delete parsePat; |
3713 delete UTF8Matcher; | 3721 delete UTF8Matcher; |
3714 delete UTF8Pattern; | 3722 delete UTF8Pattern; |
3715 delete matcher; | 3723 delete matcher; |
3716 delete callerPattern; | 3724 delete callerPattern; |
3717 | 3725 |
3718 utext_close(&inputText); | 3726 utext_close(&inputText); |
3719 delete[] inputChars; | 3727 delete[] inputChars; |
3720 utext_close(&patternText); | 3728 utext_close(&patternText); |
3721 delete[] patternChars; | 3729 delete[] patternChars; |
3722 ucnv_close(UTF8Converter); | 3730 ucnv_close(UTF8Converter); |
3723 } | 3731 } |
3724 | 3732 |
3725 | 3733 |
3726 | 3734 |
3727 | 3735 |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3783 // Invalid Back Reference \0 | 3791 // Invalid Back Reference \0 |
3784 // For ICU 3.8 and earlier | 3792 // For ICU 3.8 and earlier |
3785 // For ICU versions newer than 3.8, \0 introduces an octal escape. | 3793 // For ICU versions newer than 3.8, \0 introduces an octal escape. |
3786 // | 3794 // |
3787 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); | 3795 REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_BAD_ESCAPE_SEQUENCE); |
3788 | 3796 |
3789 } | 3797 } |
3790 | 3798 |
3791 | 3799 |
3792 //------------------------------------------------------------------------------
- | 3800 //------------------------------------------------------------------------------
- |
3793 // | 3801 // |
3794 // Read a text data file, convert it to UChars, and return the data | 3802 // Read a text data file, convert it to UChars, and return the data |
3795 // in one big UChar * buffer, which the caller must delete. | 3803 // in one big UChar * buffer, which the caller must delete. |
3796 // | 3804 // |
3797 //------------------------------------------------------------------------------
-- | 3805 //------------------------------------------------------------------------------
-- |
3798 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, | 3806 UChar *RegexTest::ReadAndConvertFile(const char *fileName, int32_t &ulen, |
3799 const char *defEncoding, UErrorCode &status
) { | 3807 const char *defEncoding, UErrorCode &status
) { |
3800 UChar *retPtr = NULL; | 3808 UChar *retPtr = NULL; |
3801 char *fileBuf = NULL; | 3809 char *fileBuf = NULL; |
3802 UConverter* conv = NULL; | 3810 UConverter* conv = NULL; |
3803 FILE *f = NULL; | 3811 FILE *f = NULL; |
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4126 UBool found = testMat->find(); | 4134 UBool found = testMat->find(); |
4127 UBool expected = FALSE; | 4135 UBool expected = FALSE; |
4128 if (fields[2].indexOf(UChar_y) >=0) { | 4136 if (fields[2].indexOf(UChar_y) >=0) { |
4129 expected = TRUE; | 4137 expected = TRUE; |
4130 } | 4138 } |
4131 if (expected != found) { | 4139 if (expected != found) { |
4132 errln("line %d: Expected %smatch, got %smatch", | 4140 errln("line %d: Expected %smatch, got %smatch", |
4133 lineNum, expected?"":"no ", found?"":"no " ); | 4141 lineNum, expected?"":"no ", found?"":"no " ); |
4134 continue; | 4142 continue; |
4135 } | 4143 } |
4136 | 4144 |
4137 // Don't try to check expected results if there is no match. | 4145 // Don't try to check expected results if there is no match. |
4138 // (Some have stuff in the expected fields) | 4146 // (Some have stuff in the expected fields) |
4139 if (!found) { | 4147 if (!found) { |
4140 delete testMat; | 4148 delete testMat; |
4141 delete testPat; | 4149 delete testPat; |
4142 continue; | 4150 continue; |
4143 } | 4151 } |
4144 | 4152 |
4145 // | 4153 // |
4146 // Interpret the Perl expression from the fourth field of the data file, | 4154 // Interpret the Perl expression from the fourth field of the data file, |
(...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4424 const UChar UChar_y = 0x79; | 4432 const UChar UChar_y = 0x79; |
4425 if (flagStr.indexOf(UChar_i) != -1) { | 4433 if (flagStr.indexOf(UChar_i) != -1) { |
4426 flags |= UREGEX_CASE_INSENSITIVE; | 4434 flags |= UREGEX_CASE_INSENSITIVE; |
4427 } | 4435 } |
4428 if (flagStr.indexOf(UChar_m) != -1) { | 4436 if (flagStr.indexOf(UChar_m) != -1) { |
4429 flags |= UREGEX_MULTILINE; | 4437 flags |= UREGEX_MULTILINE; |
4430 } | 4438 } |
4431 if (flagStr.indexOf(UChar_x) != -1) { | 4439 if (flagStr.indexOf(UChar_x) != -1) { |
4432 flags |= UREGEX_COMMENTS; | 4440 flags |= UREGEX_COMMENTS; |
4433 } | 4441 } |
4434 | 4442 |
4435 // | 4443 // |
4436 // Put the pattern in a UTF-8 UText | 4444 // Put the pattern in a UTF-8 UText |
4437 // | 4445 // |
4438 status = U_ZERO_ERROR; | 4446 status = U_ZERO_ERROR; |
4439 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve
rter.getAlias(), status); | 4447 patternLength = pattern.extract(patternChars, patternCapacity, UTF8Conve
rter.getAlias(), status); |
4440 if (status == U_BUFFER_OVERFLOW_ERROR) { | 4448 if (status == U_BUFFER_OVERFLOW_ERROR) { |
4441 status = U_ZERO_ERROR; | 4449 status = U_ZERO_ERROR; |
4442 delete[] patternChars; | 4450 delete[] patternChars; |
4443 patternCapacity = patternLength + 1; | 4451 patternCapacity = patternLength + 1; |
4444 patternChars = new char[patternCapacity]; | 4452 patternChars = new char[patternCapacity]; |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4521 UBool found = testMat->find(); | 4529 UBool found = testMat->find(); |
4522 UBool expected = FALSE; | 4530 UBool expected = FALSE; |
4523 if (fields[2].indexOf(UChar_y) >=0) { | 4531 if (fields[2].indexOf(UChar_y) >=0) { |
4524 expected = TRUE; | 4532 expected = TRUE; |
4525 } | 4533 } |
4526 if (expected != found) { | 4534 if (expected != found) { |
4527 errln("line %d: Expected %smatch, got %smatch", | 4535 errln("line %d: Expected %smatch, got %smatch", |
4528 lineNum, expected?"":"no ", found?"":"no " ); | 4536 lineNum, expected?"":"no ", found?"":"no " ); |
4529 continue; | 4537 continue; |
4530 } | 4538 } |
4531 | 4539 |
4532 // Don't try to check expected results if there is no match. | 4540 // Don't try to check expected results if there is no match. |
4533 // (Some have stuff in the expected fields) | 4541 // (Some have stuff in the expected fields) |
4534 if (!found) { | 4542 if (!found) { |
4535 delete testMat; | 4543 delete testMat; |
4536 delete testPat; | 4544 delete testPat; |
4537 continue; | 4545 continue; |
4538 } | 4546 } |
4539 | 4547 |
4540 // | 4548 // |
4541 // Interpret the Perl expression from the fourth field of the data file, | 4549 // Interpret the Perl expression from the fourth field of the data file, |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4664 delete groupsPat; | 4672 delete groupsPat; |
4665 | 4673 |
4666 delete flagMat; | 4674 delete flagMat; |
4667 delete flagPat; | 4675 delete flagPat; |
4668 | 4676 |
4669 delete lineMat; | 4677 delete lineMat; |
4670 delete linePat; | 4678 delete linePat; |
4671 | 4679 |
4672 delete fieldPat; | 4680 delete fieldPat; |
4673 delete [] testData; | 4681 delete [] testData; |
4674 | 4682 |
4675 utext_close(&patternText); | 4683 utext_close(&patternText); |
4676 utext_close(&inputText); | 4684 utext_close(&inputText); |
4677 | 4685 |
4678 delete [] patternChars; | 4686 delete [] patternChars; |
4679 delete [] inputChars; | 4687 delete [] inputChars; |
4680 | 4688 |
4681 | 4689 |
4682 logln("%d tests skipped because of unimplemented regexp features.", skippedU
nimplementedCount); | 4690 logln("%d tests skipped because of unimplemented regexp features.", skippedU
nimplementedCount); |
4683 | 4691 |
4684 } | 4692 } |
4685 | 4693 |
4686 | 4694 |
4687 //-------------------------------------------------------------- | 4695 //-------------------------------------------------------------- |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4731 } | 4739 } |
4732 info->lastSteps = steps; | 4740 info->lastSteps = steps; |
4733 info->numCalls++; | 4741 info->numCalls++; |
4734 return (info->numCalls < info->maxCalls); | 4742 return (info->numCalls < info->maxCalls); |
4735 } | 4743 } |
4736 U_CDECL_END | 4744 U_CDECL_END |
4737 | 4745 |
4738 void RegexTest::Callbacks() { | 4746 void RegexTest::Callbacks() { |
4739 { | 4747 { |
4740 // Getter returns NULLs if no callback has been set | 4748 // Getter returns NULLs if no callback has been set |
4741 | 4749 |
4742 // The variables that the getter will fill in. | 4750 // The variables that the getter will fill in. |
4743 // Init to non-null values so that the action of the getter can be see
n. | 4751 // Init to non-null values so that the action of the getter can be see
n. |
4744 const void *returnedContext = &returnedContext; | 4752 const void *returnedContext = &returnedContext; |
4745 URegexMatchCallback *returnedFn = &testCallBackFn; | 4753 URegexMatchCallback *returnedFn = &testCallBackFn; |
4746 | 4754 |
4747 UErrorCode status = U_ZERO_ERROR; | 4755 UErrorCode status = U_ZERO_ERROR; |
4748 RegexMatcher matcher("x", 0, status); | 4756 RegexMatcher matcher("x", 0, status); |
4749 REGEX_CHECK_STATUS; | 4757 REGEX_CHECK_STATUS; |
4750 matcher.getMatchCallback(returnedFn, returnedContext, status); | 4758 matcher.getMatchCallback(returnedFn, returnedContext, status); |
4751 REGEX_CHECK_STATUS; | 4759 REGEX_CHECK_STATUS; |
4752 REGEX_ASSERT(returnedFn == NULL); | 4760 REGEX_ASSERT(returnedFn == NULL); |
4753 REGEX_ASSERT(returnedContext == NULL); | 4761 REGEX_ASSERT(returnedContext == NULL); |
4754 } | 4762 } |
4755 | 4763 |
4756 { | 4764 { |
4757 // Set and Get work | 4765 // Set and Get work |
4758 callBackContext cbInfo = {this, 0, 0, 0}; | 4766 callBackContext cbInfo = {this, 0, 0, 0}; |
4759 const void *returnedContext; | 4767 const void *returnedContext; |
4760 URegexMatchCallback *returnedFn; | 4768 URegexMatchCallback *returnedFn; |
4761 UErrorCode status = U_ZERO_ERROR; | 4769 UErrorCode status = U_ZERO_ERROR; |
4762 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. | 4770 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. |
4763 REGEX_CHECK_STATUS; | 4771 REGEX_CHECK_STATUS; |
4764 matcher.setMatchCallback(testCallBackFn, &cbInfo, status); | 4772 matcher.setMatchCallback(testCallBackFn, &cbInfo, status); |
4765 REGEX_CHECK_STATUS; | 4773 REGEX_CHECK_STATUS; |
4766 matcher.getMatchCallback(returnedFn, returnedContext, status); | 4774 matcher.getMatchCallback(returnedFn, returnedContext, status); |
4767 REGEX_CHECK_STATUS; | 4775 REGEX_CHECK_STATUS; |
4768 REGEX_ASSERT(returnedFn == testCallBackFn); | 4776 REGEX_ASSERT(returnedFn == testCallBackFn); |
4769 REGEX_ASSERT(returnedContext == &cbInfo); | 4777 REGEX_ASSERT(returnedContext == &cbInfo); |
4770 | 4778 |
4771 // A short-running match shouldn't invoke the callback | 4779 // A short-running match shouldn't invoke the callback |
4772 status = U_ZERO_ERROR; | 4780 status = U_ZERO_ERROR; |
4773 cbInfo.reset(1); | 4781 cbInfo.reset(1); |
4774 UnicodeString s = "xxx"; | 4782 UnicodeString s = "xxx"; |
4775 matcher.reset(s); | 4783 matcher.reset(s); |
4776 REGEX_ASSERT(matcher.matches(status)); | 4784 REGEX_ASSERT(matcher.matches(status)); |
4777 REGEX_CHECK_STATUS; | 4785 REGEX_CHECK_STATUS; |
4778 REGEX_ASSERT(cbInfo.numCalls == 0); | 4786 REGEX_ASSERT(cbInfo.numCalls == 0); |
4779 | 4787 |
4780 // A medium-length match that runs long enough to invoke the | 4788 // A medium-length match that runs long enough to invoke the |
4781 // callback, but not so long that the callback aborts it. | 4789 // callback, but not so long that the callback aborts it. |
4782 status = U_ZERO_ERROR; | 4790 status = U_ZERO_ERROR; |
4783 cbInfo.reset(4); | 4791 cbInfo.reset(4); |
4784 s = "aaaaaaaaaaaaaaaaaaab"; | 4792 s = "aaaaaaaaaaaaaaaaaaab"; |
4785 matcher.reset(s); | 4793 matcher.reset(s); |
4786 REGEX_ASSERT(matcher.matches(status)==FALSE); | 4794 REGEX_ASSERT(matcher.matches(status)==FALSE); |
4787 REGEX_CHECK_STATUS; | 4795 REGEX_CHECK_STATUS; |
4788 REGEX_ASSERT(cbInfo.numCalls > 0); | 4796 REGEX_ASSERT(cbInfo.numCalls > 0); |
4789 | 4797 |
4790 // A longer running match that the callback function will abort. | 4798 // A longer running match that the callback function will abort. |
4791 status = U_ZERO_ERROR; | 4799 status = U_ZERO_ERROR; |
4792 cbInfo.reset(4); | 4800 cbInfo.reset(4); |
4793 s = "aaaaaaaaaaaaaaaaaaaaaaab"; | 4801 s = "aaaaaaaaaaaaaaaaaaaaaaab"; |
4794 matcher.reset(s); | 4802 matcher.reset(s); |
4795 REGEX_ASSERT(matcher.matches(status)==FALSE); | 4803 REGEX_ASSERT(matcher.matches(status)==FALSE); |
4796 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); | 4804 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
4797 REGEX_ASSERT(cbInfo.numCalls == 4); | 4805 REGEX_ASSERT(cbInfo.numCalls == 4); |
4798 } | 4806 } |
4799 | 4807 |
4800 | 4808 |
4801 } | 4809 } |
4802 | 4810 |
4803 | 4811 |
4804 // | 4812 // |
4805 // FindProgressCallbacks() Test the find "progress" callback function. | 4813 // FindProgressCallbacks() Test the find "progress" callback function. |
4806 // When set, the find progress callback will be invoked during
a find operations | 4814 // When set, the find progress callback will be invoked during
a find operations |
4807 // after each return from a match attempt, giving the applicati
on the opportunity | 4815 // after each return from a match attempt, giving the applicati
on the opportunity |
4808 // to terminate a long-running find operation before it's norma
l completion. | 4816 // to terminate a long-running find operation before it's norma
l completion. |
4809 // | 4817 // |
4810 | 4818 |
4811 struct progressCallBackContext { | 4819 struct progressCallBackContext { |
4812 RegexTest *test; | 4820 RegexTest *test; |
4813 int64_t lastIndex; | 4821 int64_t lastIndex; |
4814 int32_t maxCalls; | 4822 int32_t maxCalls; |
4815 int32_t numCalls; | 4823 int32_t numCalls; |
4816 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; | 4824 void reset(int32_t max) {maxCalls=max; numCalls=0;lastIndex=0;}; |
4817 }; | 4825 }; |
4818 | 4826 |
| 4827 // call-back function for find(). |
| 4828 // Return TRUE to continue the find(). |
| 4829 // Return FALSE to stop the find(). |
4819 U_CDECL_BEGIN | 4830 U_CDECL_BEGIN |
4820 static UBool U_CALLCONV | 4831 static UBool U_CALLCONV |
4821 testProgressCallBackFn(const void *context, int64_t matchIndex) { | 4832 testProgressCallBackFn(const void *context, int64_t matchIndex) { |
4822 progressCallBackContext *info = (progressCallBackContext *)context; | 4833 progressCallBackContext *info = (progressCallBackContext *)context; |
4823 info->numCalls++; | 4834 info->numCalls++; |
4824 info->lastIndex = matchIndex; | 4835 info->lastIndex = matchIndex; |
4825 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n",
matchIndex, info->numCalls); | 4836 // info->test->infoln("ProgressCallback - matchIndex = %d, numCalls = %d\n",
matchIndex, info->numCalls); |
4826 return (info->numCalls < info->maxCalls); | 4837 return (info->numCalls < info->maxCalls); |
4827 } | 4838 } |
4828 U_CDECL_END | 4839 U_CDECL_END |
4829 | 4840 |
4830 void RegexTest::FindProgressCallbacks() { | 4841 void RegexTest::FindProgressCallbacks() { |
4831 { | 4842 { |
4832 // Getter returns NULLs if no callback has been set | 4843 // Getter returns NULLs if no callback has been set |
4833 | 4844 |
4834 // The variables that the getter will fill in. | 4845 // The variables that the getter will fill in. |
4835 // Init to non-null values so that the action of the getter can be see
n. | 4846 // Init to non-null values so that the action of the getter can be see
n. |
4836 const void *returnedContext = &returnedContext; | 4847 const void *returnedContext = &returnedContext; |
4837 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; | 4848 URegexFindProgressCallback *returnedFn = &testProgressCallBackFn; |
4838 | 4849 |
4839 UErrorCode status = U_ZERO_ERROR; | 4850 UErrorCode status = U_ZERO_ERROR; |
4840 RegexMatcher matcher("x", 0, status); | 4851 RegexMatcher matcher("x", 0, status); |
4841 REGEX_CHECK_STATUS; | 4852 REGEX_CHECK_STATUS; |
4842 matcher.getFindProgressCallback(returnedFn, returnedContext, status); | 4853 matcher.getFindProgressCallback(returnedFn, returnedContext, status); |
4843 REGEX_CHECK_STATUS; | 4854 REGEX_CHECK_STATUS; |
4844 REGEX_ASSERT(returnedFn == NULL); | 4855 REGEX_ASSERT(returnedFn == NULL); |
4845 REGEX_ASSERT(returnedContext == NULL); | 4856 REGEX_ASSERT(returnedContext == NULL); |
4846 } | 4857 } |
4847 | 4858 |
4848 { | 4859 { |
4849 // Set and Get work | 4860 // Set and Get work |
4850 progressCallBackContext cbInfo = {this, 0, 0, 0}; | 4861 progressCallBackContext cbInfo = {this, 0, 0, 0}; |
4851 const void *returnedContext; | 4862 const void *returnedContext; |
4852 URegexFindProgressCallback *returnedFn; | 4863 URegexFindProgressCallback *returnedFn; |
4853 UErrorCode status = U_ZERO_ERROR; | 4864 UErrorCode status = U_ZERO_ERROR; |
4854 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)+\\2)+x"), 0, status);
// A pattern that can run long. | 4865 RegexMatcher matcher(UNICODE_STRING_SIMPLE("((.)\\2)x"), 0, status); |
4855 REGEX_CHECK_STATUS; | 4866 REGEX_CHECK_STATUS; |
4856 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status)
; | 4867 matcher.setFindProgressCallback(testProgressCallBackFn, &cbInfo, status)
; |
4857 REGEX_CHECK_STATUS; | 4868 REGEX_CHECK_STATUS; |
4858 matcher.getFindProgressCallback(returnedFn, returnedContext, status); | 4869 matcher.getFindProgressCallback(returnedFn, returnedContext, status); |
4859 REGEX_CHECK_STATUS; | 4870 REGEX_CHECK_STATUS; |
4860 REGEX_ASSERT(returnedFn == testProgressCallBackFn); | 4871 REGEX_ASSERT(returnedFn == testProgressCallBackFn); |
4861 REGEX_ASSERT(returnedContext == &cbInfo); | 4872 REGEX_ASSERT(returnedContext == &cbInfo); |
4862 | 4873 |
4863 // A short-running match should NOT invoke the callback. | 4874 // A find that matches on the initial position does NOT invoke the callb
ack. |
4864 status = U_ZERO_ERROR; | 4875 status = U_ZERO_ERROR; |
4865 cbInfo.reset(100); | 4876 cbInfo.reset(100); |
4866 UnicodeString s = "abxxx"; | 4877 UnicodeString s = "aaxxx"; |
4867 matcher.reset(s); | 4878 matcher.reset(s); |
4868 #if 0 | 4879 #if 0 |
4869 matcher.setTrace(TRUE); | 4880 matcher.setTrace(TRUE); |
4870 #endif | 4881 #endif |
4871 REGEX_ASSERT(matcher.find(0, status)); | 4882 REGEX_ASSERT(matcher.find(0, status)); |
4872 REGEX_CHECK_STATUS; | 4883 REGEX_CHECK_STATUS; |
4873 REGEX_ASSERT(cbInfo.numCalls == 0); | 4884 REGEX_ASSERT(cbInfo.numCalls == 0); |
4874 | 4885 |
4875 // A medium running match that causes matcher.find() to invoke our callb
ack for each index. | 4886 // A medium running find() that causes matcher.find() to invoke our call
back for each index, |
| 4887 // but not so many times that we interrupt the operation. |
4876 status = U_ZERO_ERROR; | 4888 status = U_ZERO_ERROR; |
4877 s = "aaaaaaaaaaaaaaaaaaab"; | 4889 s = "aaaaaaaaaaaaaaaaaaab"; |
4878 cbInfo.reset(s.length()); // Some upper limit for number of calls that
is greater than size of our input string | 4890 cbInfo.reset(s.length()); // Some upper limit for number of calls that
is greater than size of our input string |
4879 matcher.reset(s); | 4891 matcher.reset(s); |
4880 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4892 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
4881 REGEX_CHECK_STATUS; | 4893 REGEX_CHECK_STATUS; |
4882 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); | 4894 REGEX_ASSERT(cbInfo.numCalls > 0 && cbInfo.numCalls < 25); |
4883 | 4895 |
4884 // A longer running match that causes matcher.find() to invoke our callb
ack which we cancel/interrupt at some point. | 4896 // A longer running match that causes matcher.find() to invoke our callb
ack which we cancel/interrupt at some point. |
4885 status = U_ZERO_ERROR; | 4897 status = U_ZERO_ERROR; |
4886 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; | 4898 UnicodeString s1 = "aaaaaaaaaaaaaaaaaaaaaaab"; |
4887 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of
input string | 4899 cbInfo.reset(s1.length() - 5); // Bail early somewhere near the end of
input string |
4888 matcher.reset(s1); | 4900 matcher.reset(s1); |
4889 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4901 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
4890 REGEX_CHECK_STATUS; | 4902 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
4891 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); | 4903 REGEX_ASSERT(cbInfo.numCalls == s1.length() - 5); |
4892 | 4904 |
4893 #if 0 | |
4894 // Now a match that will succeed, but after an interruption | 4905 // Now a match that will succeed, but after an interruption |
4895 status = U_ZERO_ERROR; | 4906 status = U_ZERO_ERROR; |
4896 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; | 4907 UnicodeString s2 = "aaaaaaaaaaaaaa aaaaaaaaab xxx"; |
4897 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of
input string | 4908 cbInfo.reset(s2.length() - 10); // Bail early somewhere near the end of
input string |
4898 matcher.reset(s2); | 4909 matcher.reset(s2); |
4899 REGEX_ASSERT(matcher.find(0, status)==FALSE); | 4910 REGEX_ASSERT(matcher.find(0, status)==FALSE); |
4900 REGEX_CHECK_STATUS; | 4911 REGEX_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); |
4901 // Now retry the match from where left off | 4912 // Now retry the match from where left off |
4902 cbInfo.maxCalls = 100; // No callback limit | 4913 cbInfo.maxCalls = 100; // No callback limit |
| 4914 status = U_ZERO_ERROR; |
4903 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); | 4915 REGEX_ASSERT(matcher.find(cbInfo.lastIndex, status)); |
4904 REGEX_CHECK_STATUS; | 4916 REGEX_CHECK_STATUS; |
4905 #endif | |
4906 } | 4917 } |
4907 | 4918 |
4908 | 4919 |
4909 } | 4920 } |
4910 | 4921 |
4911 | 4922 |
4912 //--------------------------------------------------------------------------- | 4923 //--------------------------------------------------------------------------- |
4913 // | 4924 // |
4914 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable | 4925 // PreAllocatedUTextCAPI Check the C API with pre-allocated mutable |
4915 // UTexts. The pure-C implementation of UText | 4926 // UTexts. The pure-C implementation of UText |
4916 // has no mutable backing stores, but we can | 4927 // has no mutable backing stores, but we can |
4917 // use UnicodeString here to test the functionality. | 4928 // use UnicodeString here to test the functionality. |
4918 // | 4929 // |
4919 //--------------------------------------------------------------------------- | 4930 //--------------------------------------------------------------------------- |
4920 void RegexTest::PreAllocatedUTextCAPI () { | 4931 void RegexTest::PreAllocatedUTextCAPI () { |
4921 UErrorCode status = U_ZERO_ERROR; | 4932 UErrorCode status = U_ZERO_ERROR; |
4922 URegularExpression *re; | 4933 URegularExpression *re; |
4923 UText patternText = UTEXT_INITIALIZER; | 4934 UText patternText = UTEXT_INITIALIZER; |
4924 UnicodeString buffer; | 4935 UnicodeString buffer; |
4925 UText bufferText = UTEXT_INITIALIZER; | 4936 UText bufferText = UTEXT_INITIALIZER; |
4926 | 4937 |
4927 utext_openUnicodeString(&bufferText, &buffer, &status); | 4938 utext_openUnicodeString(&bufferText, &buffer, &status); |
4928 | 4939 |
4929 /* | 4940 /* |
4930 * getText() and getUText() | 4941 * getText() and getUText() |
4931 */ | 4942 */ |
4932 { | 4943 { |
4933 UText text1 = UTEXT_INITIALIZER; | 4944 UText text1 = UTEXT_INITIALIZER; |
4934 UText text2 = UTEXT_INITIALIZER; | 4945 UText text2 = UTEXT_INITIALIZER; |
4935 UChar text2Chars[20]; | 4946 UChar text2Chars[20]; |
4936 UText *resultText; | 4947 UText *resultText; |
4937 | 4948 |
4938 status = U_ZERO_ERROR; | 4949 status = U_ZERO_ERROR; |
4939 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); | 4950 regextst_openUTF8FromInvariant(&text1, "abcccd", -1, &status); |
4940 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); | 4951 regextst_openUTF8FromInvariant(&text2, "abcccxd", -1, &status); |
4941 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); | 4952 u_uastrncpy(text2Chars, "abcccxd", sizeof(text2)/2); |
4942 utext_openUChars(&text2, text2Chars, -1, &status); | 4953 utext_openUChars(&text2, text2Chars, -1, &status); |
4943 | 4954 |
4944 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); | 4955 regextst_openUTF8FromInvariant(&patternText, "abc*d", -1, &status); |
4945 re = uregex_openUText(&patternText, 0, NULL, &status); | 4956 re = uregex_openUText(&patternText, 0, NULL, &status); |
4946 | 4957 |
4947 /* First set a UText */ | 4958 /* First set a UText */ |
4948 uregex_setUText(re, &text1, &status); | 4959 uregex_setUText(re, &text1, &status); |
4949 resultText = uregex_getUText(re, &bufferText, &status); | 4960 resultText = uregex_getUText(re, &bufferText, &status); |
4950 REGEX_CHECK_STATUS; | 4961 REGEX_CHECK_STATUS; |
4951 REGEX_ASSERT(resultText == &bufferText); | 4962 REGEX_ASSERT(resultText == &bufferText); |
4952 utext_setNativeIndex(resultText, 0); | 4963 utext_setNativeIndex(resultText, 0); |
4953 utext_setNativeIndex(&text1, 0); | 4964 utext_setNativeIndex(&text1, 0); |
4954 REGEX_ASSERT(testUTextEqual(resultText, &text1)); | 4965 REGEX_ASSERT(testUTextEqual(resultText, &text1)); |
4955 | 4966 |
4956 resultText = uregex_getUText(re, &bufferText, &status); | 4967 resultText = uregex_getUText(re, &bufferText, &status); |
4957 REGEX_CHECK_STATUS; | 4968 REGEX_CHECK_STATUS; |
4958 REGEX_ASSERT(resultText == &bufferText); | 4969 REGEX_ASSERT(resultText == &bufferText); |
4959 utext_setNativeIndex(resultText, 0); | 4970 utext_setNativeIndex(resultText, 0); |
4960 utext_setNativeIndex(&text1, 0); | 4971 utext_setNativeIndex(&text1, 0); |
4961 REGEX_ASSERT(testUTextEqual(resultText, &text1)); | 4972 REGEX_ASSERT(testUTextEqual(resultText, &text1)); |
4962 | 4973 |
4963 /* Then set a UChar * */ | 4974 /* Then set a UChar * */ |
4964 uregex_setText(re, text2Chars, 7, &status); | 4975 uregex_setText(re, text2Chars, 7, &status); |
4965 resultText = uregex_getUText(re, &bufferText, &status); | 4976 resultText = uregex_getUText(re, &bufferText, &status); |
4966 REGEX_CHECK_STATUS; | 4977 REGEX_CHECK_STATUS; |
4967 REGEX_ASSERT(resultText == &bufferText); | 4978 REGEX_ASSERT(resultText == &bufferText); |
4968 utext_setNativeIndex(resultText, 0); | 4979 utext_setNativeIndex(resultText, 0); |
4969 utext_setNativeIndex(&text2, 0); | 4980 utext_setNativeIndex(&text2, 0); |
4970 REGEX_ASSERT(testUTextEqual(resultText, &text2)); | 4981 REGEX_ASSERT(testUTextEqual(resultText, &text2)); |
4971 | 4982 |
4972 uregex_close(re); | 4983 uregex_close(re); |
4973 utext_close(&text1); | 4984 utext_close(&text1); |
4974 utext_close(&text2); | 4985 utext_close(&text2); |
4975 } | 4986 } |
4976 | 4987 |
4977 /* | 4988 /* |
4978 * group() | 4989 * group() |
4979 */ | 4990 */ |
4980 { | 4991 { |
4981 UChar text1[80]; | 4992 UChar text1[80]; |
(...skipping 25 matching lines...) Expand all Loading... |
5007 | 5018 |
5008 /* Capture group out of range. Error. */ | 5019 /* Capture group out of range. Error. */ |
5009 status = U_ZERO_ERROR; | 5020 status = U_ZERO_ERROR; |
5010 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); | 5021 actual = uregex_groupUTextDeep(re, 2, &bufferText, &status); |
5011 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | 5022 REGEX_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
5012 REGEX_ASSERT(actual == &bufferText); | 5023 REGEX_ASSERT(actual == &bufferText); |
5013 | 5024 |
5014 uregex_close(re); | 5025 uregex_close(re); |
5015 | 5026 |
5016 } | 5027 } |
5017 | 5028 |
5018 /* | 5029 /* |
5019 * replaceFirst() | 5030 * replaceFirst() |
5020 */ | 5031 */ |
5021 { | 5032 { |
5022 UChar text1[80]; | 5033 UChar text1[80]; |
5023 UChar text2[80]; | 5034 UChar text2[80]; |
5024 UText replText = UTEXT_INITIALIZER; | 5035 UText replText = UTEXT_INITIALIZER; |
5025 UText *result; | 5036 UText *result; |
5026 | 5037 |
5027 status = U_ZERO_ERROR; | 5038 status = U_ZERO_ERROR; |
5028 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); | 5039 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
5029 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | 5040 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
5030 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); | 5041 regextst_openUTF8FromInvariant(&replText, "<$1>", -1, &status); |
5031 | 5042 |
5032 re = uregex_openC("x(.*?)x", 0, NULL, &status); | 5043 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
5033 REGEX_CHECK_STATUS; | 5044 REGEX_CHECK_STATUS; |
5034 | 5045 |
5035 /* Normal case, with match */ | 5046 /* Normal case, with match */ |
5036 uregex_setText(re, text1, -1, &status); | 5047 uregex_setText(re, text1, -1, &status); |
5037 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5048 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
5038 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5049 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
5039 REGEX_CHECK_STATUS; | 5050 REGEX_CHECK_STATUS; |
5040 REGEX_ASSERT(result == &bufferText); | 5051 REGEX_ASSERT(result == &bufferText); |
5041 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); | 5052 REGEX_ASSERT_UTEXT_INVARIANT("Replace <aa> x1x x...x.", result); |
5042 | 5053 |
5043 /* No match. Text should copy to output with no changes. */ | 5054 /* No match. Text should copy to output with no changes. */ |
5044 uregex_setText(re, text2, -1, &status); | 5055 uregex_setText(re, text2, -1, &status); |
5045 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5056 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
5046 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5057 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
5047 REGEX_CHECK_STATUS; | 5058 REGEX_CHECK_STATUS; |
5048 REGEX_ASSERT(result == &bufferText); | 5059 REGEX_ASSERT(result == &bufferText); |
5049 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); | 5060 REGEX_ASSERT_UTEXT_INVARIANT("No match here.", result); |
5050 | 5061 |
5051 /* Unicode escapes */ | 5062 /* Unicode escapes */ |
5052 uregex_setText(re, text1, -1, &status); | 5063 uregex_setText(re, text1, -1, &status); |
5053 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a"
, -1, &status); | 5064 regextst_openUTF8FromInvariant(&replText, "\\\\\\u0041$1\\U00000042$\\a"
, -1, &status); |
5054 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); | 5065 utext_replace(&bufferText, 0, utext_nativeLength(&bufferText), NULL, 0,
&status); |
5055 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); | 5066 result = uregex_replaceFirstUText(re, &replText, &bufferText, &status); |
5056 REGEX_CHECK_STATUS; | 5067 REGEX_CHECK_STATUS; |
5057 REGEX_ASSERT(result == &bufferText); | 5068 REGEX_ASSERT(result == &bufferText); |
5058 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); | 5069 REGEX_ASSERT_UTEXT_INVARIANT("Replace \\AaaB$a x1x x...x.", result); |
5059 | 5070 |
5060 uregex_close(re); | 5071 uregex_close(re); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5097 | 5108 |
5098 uregex_close(re); | 5109 uregex_close(re); |
5099 utext_close(&replText); | 5110 utext_close(&replText); |
5100 } | 5111 } |
5101 | 5112 |
5102 | 5113 |
5103 /* | 5114 /* |
5104 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, | 5115 * splitUText() uses the C++ API directly, and the UnicodeString version us
es mutable UTexts, |
5105 * so we don't need to test it here. | 5116 * so we don't need to test it here. |
5106 */ | 5117 */ |
5107 | 5118 |
5108 utext_close(&bufferText); | 5119 utext_close(&bufferText); |
5109 utext_close(&patternText); | 5120 utext_close(&patternText); |
5110 } | 5121 } |
5111 | 5122 |
5112 //-------------------------------------------------------------- | 5123 //-------------------------------------------------------------- |
5113 // | 5124 // |
5114 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. | 5125 // Bug7651 Regex pattern that exceeds default operator stack depth in matcher
. |
5115 // | 5126 // |
5116 //--------------------------------------------------------------- | 5127 //--------------------------------------------------------------- |
5117 void RegexTest::Bug7651() { | 5128 void RegexTest::Bug7651() { |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5172 { | 5183 { |
5173 UnicodeString str; | 5184 UnicodeString str; |
5174 str.setToBogus(); | 5185 str.setToBogus(); |
5175 pMatcher->reset(str); | 5186 pMatcher->reset(str); |
5176 status = U_ZERO_ERROR; | 5187 status = U_ZERO_ERROR; |
5177 pMatcher->matches(status); | 5188 pMatcher->matches(status); |
5178 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | 5189 REGEX_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
5179 delete pMatcher; | 5190 delete pMatcher; |
5180 } | 5191 } |
5181 } | 5192 } |
5182 | 5193 |
5183 | 5194 |
5184 // Bug 7029 | 5195 // Bug 7029 |
5185 void RegexTest::Bug7029() { | 5196 void RegexTest::Bug7029() { |
5186 UErrorCode status = U_ZERO_ERROR; | 5197 UErrorCode status = U_ZERO_ERROR; |
5187 | 5198 |
5188 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); | 5199 RegexMatcher* const pMatcher = new RegexMatcher(".", 0, status); |
5189 UnicodeString text = "abc.def"; | 5200 UnicodeString text = "abc.def"; |
5190 UnicodeString splits[10]; | 5201 UnicodeString splits[10]; |
5191 REGEX_CHECK_STATUS; | 5202 REGEX_CHECK_STATUS; |
5192 int32_t numFields = pMatcher->split(text, splits, 10, status); | 5203 int32_t numFields = pMatcher->split(text, splits, 10, status); |
5193 REGEX_CHECK_STATUS; | 5204 REGEX_CHECK_STATUS; |
5194 REGEX_ASSERT(numFields == 8); | 5205 REGEX_ASSERT(numFields == 8); |
5195 delete pMatcher; | 5206 delete pMatcher; |
5196 } | 5207 } |
5197 | 5208 |
5198 // Bug 9283 | 5209 // Bug 9283 |
5199 // This test is checking for the existance of any supplemental characters that
case-fold | 5210 // This test is checking for the existance of any supplemental characters that
case-fold |
5200 // to a bmp character. | 5211 // to a bmp character. |
5201 // | 5212 // |
5202 // At the time of this writing there are none. If any should appear in a subse
quent release | 5213 // At the time of this writing there are none. If any should appear in a subse
quent release |
5203 // of Unicode, the code in regular expressions compilation that determines the
longest | 5214 // of Unicode, the code in regular expressions compilation that determines the
longest |
5204 // posssible match for a literal string will need to be enhanced. | 5215 // posssible match for a literal string will need to be enhanced. |
5205 // | 5216 // |
5206 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() | 5217 // See file regexcmp.cpp, case URX_STRING_I in RegexCompile::maxMatchLength() |
5207 // for details on what to do in case of a failure of this test. | 5218 // for details on what to do in case of a failure of this test. |
5208 // | 5219 // |
5209 void RegexTest::Bug9283() { | 5220 void RegexTest::Bug9283() { |
| 5221 #if !UCONFIG_NO_NORMALIZATION |
5210 UErrorCode status = U_ZERO_ERROR; | 5222 UErrorCode status = U_ZERO_ERROR; |
5211 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]
]", status); | 5223 UnicodeSet supplementalsWithCaseFolding("[[:CWCF:]&[\\U00010000-\\U0010FFFF]
]", status); |
5212 REGEX_CHECK_STATUS; | 5224 REGEX_CHECK_STATUS; |
5213 int32_t index; | 5225 int32_t index; |
5214 UChar32 c; | 5226 UChar32 c; |
5215 for (index=0; ; index++) { | 5227 for (index=0; ; index++) { |
5216 c = supplementalsWithCaseFolding.charAt(index); | 5228 c = supplementalsWithCaseFolding.charAt(index); |
5217 if (c == -1) { | 5229 if (c == -1) { |
5218 break; | 5230 break; |
5219 } | 5231 } |
5220 UnicodeString cf = UnicodeString(c).foldCase(); | 5232 UnicodeString cf = UnicodeString(c).foldCase(); |
5221 REGEX_ASSERT(cf.length() >= 2); | 5233 REGEX_ASSERT(cf.length() >= 2); |
5222 } | 5234 } |
| 5235 #endif /* #if !UCONFIG_NO_NORMALIZATION */ |
5223 } | 5236 } |
5224 | 5237 |
5225 | 5238 |
5226 void RegexTest::CheckInvBufSize() { | 5239 void RegexTest::CheckInvBufSize() { |
5227 if(inv_next>=INV_BUFSIZ) { | 5240 if(inv_next>=INV_BUFSIZ) { |
5228 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %
d )\n", | 5241 errln("%s: increase #define of INV_BUFSIZ ( is %d but needs to be at least %
d )\n", |
5229 __FILE__, INV_BUFSIZ, inv_next); | 5242 __FILE__, INV_BUFSIZ, inv_next); |
5230 } else { | 5243 } else { |
5231 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); | 5244 logln("%s: INV_BUFSIZ is %d, usage %d\n", __FILE__, INV_BUFSIZ, inv_next); |
5232 } | 5245 } |
5233 } | 5246 } |
5234 | 5247 |
5235 void RegexTest::TestBug11371() { | 5248 |
| 5249 void RegexTest::Bug10459() { |
5236 UErrorCode status = U_ZERO_ERROR; | 5250 UErrorCode status = U_ZERO_ERROR; |
5237 UnicodeString patternString; | 5251 UnicodeString patternString("(txt)"); |
| 5252 UnicodeString txtString("txt"); |
5238 | 5253 |
5239 for (int i=0; i<8000000; i++) { | 5254 UText *utext_pat = utext_openUnicodeString(NULL, &patternString, &status); |
5240 patternString.append(UnicodeString("()")); | 5255 REGEX_CHECK_STATUS; |
| 5256 UText *utext_txt = utext_openUnicodeString(NULL, &txtString, &status); |
| 5257 REGEX_CHECK_STATUS; |
| 5258 |
| 5259 URegularExpression *icu_re = uregex_openUText(utext_pat, 0, NULL, &status); |
| 5260 REGEX_CHECK_STATUS; |
| 5261 |
| 5262 uregex_setUText(icu_re, utext_txt, &status); |
| 5263 REGEX_CHECK_STATUS; |
| 5264 |
| 5265 // The bug was that calling uregex_group() before doing a matching operation |
| 5266 // was causing a segfault. Only for Regular Expressions created from UText
. |
| 5267 // It should set an U_REGEX_INVALID_STATE. |
| 5268 |
| 5269 UChar buf[100]; |
| 5270 int32_t len = uregex_group(icu_re, 0, buf, UPRV_LENGTHOF(buf), &status); |
| 5271 REGEX_ASSERT(status == U_REGEX_INVALID_STATE); |
| 5272 REGEX_ASSERT(len == 0); |
| 5273 |
| 5274 uregex_close(icu_re); |
| 5275 utext_close(utext_pat); |
| 5276 utext_close(utext_txt); |
| 5277 } |
| 5278 |
| 5279 void RegexTest::TestCaseInsensitiveStarters() { |
| 5280 // Test that the data used by RegexCompile::findCaseInsensitiveStarters() ha
sn't |
| 5281 // become stale because of new Unicode characters. |
| 5282 // If it is stale, rerun the generation tool |
| 5283 // svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genre
gexcasing |
| 5284 // and replace the embedded data in i18n/regexcmp.cpp |
| 5285 |
| 5286 for (UChar32 cp=0; cp<=0x10ffff; cp++) { |
| 5287 if (!u_hasBinaryProperty(cp, UCHAR_CASE_SENSITIVE)) { |
| 5288 continue; |
| 5289 } |
| 5290 UnicodeSet s(cp, cp); |
| 5291 s.closeOver(USET_CASE_INSENSITIVE); |
| 5292 UnicodeSetIterator setIter(s); |
| 5293 while (setIter.next()) { |
| 5294 if (!setIter.isString()) { |
| 5295 continue; |
| 5296 } |
| 5297 const UnicodeString &str = setIter.getString(); |
| 5298 UChar32 firstChar = str.char32At(0); |
| 5299 UnicodeSet starters; |
| 5300 RegexCompile::findCaseInsensitiveStarters(firstChar, &starters); |
| 5301 if (!starters.contains(cp)) { |
| 5302 errln("CaseInsensitiveStarters for \\u%x is missing character \\
u%x.", cp, firstChar); |
| 5303 return; |
| 5304 } |
| 5305 } |
5241 } | 5306 } |
| 5307 } |
| 5308 |
| 5309 |
| 5310 void RegexTest::TestBug11049() { |
| 5311 // Original bug report: pattern with match start consisting of one of severa
l individual characters, |
| 5312 // and the text being matched ending with a supplementary character. find()
would read past the |
| 5313 // end of the input text when searching for potential match starting points
. |
| 5314 |
| 5315 // To see the problem, the text must exactly fill an allocated buffer, so th
at valgrind will |
| 5316 // detect the bad read. |
| 5317 |
| 5318 TestCase11049("A|B|C", "a string \\ud800\\udc00", FALSE, __LINE__); |
| 5319 TestCase11049("A|B|C", "string matches at end C", TRUE, __LINE__); |
| 5320 |
| 5321 // Test again with a pattern starting with a single character, |
| 5322 // which takes a different code path than starting with an OR expression, |
| 5323 // but with similar logic. |
| 5324 TestCase11049("C", "a string \\ud800\\udc00", FALSE, __LINE__); |
| 5325 TestCase11049("C", "string matches at end C", TRUE, __LINE__); |
| 5326 } |
| 5327 |
| 5328 // Run a single test case from TestBug11049(). Internal function. |
| 5329 void RegexTest::TestCase11049(const char *pattern, const char *data, UBool expec
tMatch, int32_t lineNumber) { |
| 5330 UErrorCode status = U_ZERO_ERROR; |
| 5331 UnicodeString patternString = UnicodeString(pattern).unescape(); |
5242 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString,
0, status)); | 5332 LocalPointer<RegexPattern> compiledPat(RegexPattern::compile(patternString,
0, status)); |
5243 if (status != U_REGEX_PATTERN_TOO_BIG) { | 5333 |
5244 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | 5334 UnicodeString dataString = UnicodeString(data).unescape(); |
5245 __FILE__, __LINE__, u_errorName(status)); | 5335 UChar *exactBuffer = new UChar[dataString.length()]; |
| 5336 dataString.extract(exactBuffer, dataString.length(), status); |
| 5337 UText *ut = utext_openUChars(NULL, exactBuffer, dataString.length(), &status
); |
| 5338 |
| 5339 LocalPointer<RegexMatcher> matcher(compiledPat->matcher(status)); |
| 5340 REGEX_CHECK_STATUS; |
| 5341 matcher->reset(ut); |
| 5342 UBool result = matcher->find(); |
| 5343 if (result != expectMatch) { |
| 5344 errln("File %s, line %d: expected %d, got %d. Pattern = \"%s\", text = \
"%s\"", |
| 5345 __FILE__, lineNumber, expectMatch, result, pattern, data); |
5246 } | 5346 } |
5247 | 5347 |
5248 status = U_ZERO_ERROR; | 5348 // Rerun test with UTF-8 input text. Won't see buffer overreads, but could s
ee |
5249 patternString = "("; | 5349 // off-by-one on find() with match at the last code point. |
5250 for (int i=0; i<20000000; i++) { | 5350 // Size of the original char * data (invariant charset) will be <= than th
e equivalent UTF-8 |
5251 patternString.append(UnicodeString("A++")); | 5351 // because string.unescape() will only shrink it. |
| 5352 char * utf8Buffer = new char[uprv_strlen(data)+1]; |
| 5353 u_strToUTF8(utf8Buffer, uprv_strlen(data)+1, NULL, dataString.getBuffer(), d
ataString.length(), &status); |
| 5354 REGEX_CHECK_STATUS; |
| 5355 ut = utext_openUTF8(ut, utf8Buffer, -1, &status); |
| 5356 REGEX_CHECK_STATUS; |
| 5357 matcher->reset(ut); |
| 5358 result = matcher->find(); |
| 5359 if (result != expectMatch) { |
| 5360 errln("File %s, line %d (UTF-8 check): expected %d, got %d. Pattern = \"
%s\", text = \"%s\"", |
| 5361 __FILE__, lineNumber, expectMatch, result, pattern, data); |
5252 } | 5362 } |
5253 patternString.append(UnicodeString("){0}B++")); | 5363 delete [] utf8Buffer; |
5254 LocalPointer<RegexPattern> compiledPat2(RegexPattern::compile(patternString,
0, status)); | |
5255 if (status != U_REGEX_PATTERN_TOO_BIG) { | |
5256 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | |
5257 __FILE__, __LINE__, u_errorName(status)); | |
5258 } | |
5259 | 5364 |
5260 // Pattern with too much string data, such that string indexes overflow oper
and data. | 5365 utext_close(ut); |
5261 status = U_ZERO_ERROR; | 5366 delete [] exactBuffer; |
5262 patternString = ""; | 5367 } |
5263 while (patternString.length() < 0x00ffffff) { | |
5264 patternString.append(UnicodeString("stuff and things dont you know, thes
e are a few of my favorite strings\n")); | |
5265 } | |
5266 patternString.append(UnicodeString("X? trailing string")); | |
5267 LocalPointer<RegexPattern> compiledPat3(RegexPattern::compile(patternString,
0, status)); | |
5268 compiledPat3->dumpPattern(); | |
5269 if (status != U_REGEX_PATTERN_TOO_BIG) { | |
5270 errln("File %s, line %d expected status=U_REGEX_PATTERN_TOO_BIG; got %s.
", | |
5271 __FILE__, __LINE__, u_errorName(status)); | |
5272 } | |
5273 | 5368 |
5274 | 5369 |
5275 | 5370 |
5276 } | 5371 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
5277 | 5372 |
5278 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
OLD | NEW |