OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 2004-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 /*******************************************************************************
* |
| 7 * |
| 8 * File reapits.c |
| 9 * |
| 10 ********************************************************************************
*/ |
| 11 /*C API TEST FOR Regular Expressions */ |
| 12 /** |
| 13 * This is an API test for ICU regular expressions in C. It doesn't test very
many cases, and doesn't |
| 14 * try to test the full functionality. It just calls each function and verifie
s that it |
| 15 * works on a basic level. |
| 16 * |
| 17 * More complete testing of regular expression functionality is done with the C
++ tests. |
| 18 **/ |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 |
| 22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 23 |
| 24 #include <stdlib.h> |
| 25 #include <string.h> |
| 26 #include "unicode/uloc.h" |
| 27 #include "unicode/uregex.h" |
| 28 #include "unicode/ustring.h" |
| 29 #include "unicode/utext.h" |
| 30 #include "cintltst.h" |
| 31 |
| 32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ |
| 33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n"
, __FILE__, __LINE__, u_errorName(status));}} |
| 34 |
| 35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ |
| 36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FIL
E__, __LINE__);}} |
| 37 |
| 38 /* |
| 39 * TEST_SETUP and TEST_TEARDOWN |
| 40 * macros to handle the boilerplate around setting up regex test cases. |
| 41 * parameteres to setup: |
| 42 * pattern: The regex pattern, a (char *) null terminated C str
ing. |
| 43 * testString: The string data, also a (char *) C string. |
| 44 * flags: Regex flags to set when compiling the pattern |
| 45 * |
| 46 * Put arbitrary test code between SETUP and TEARDOWN. |
| 47 * 're" is the compiled, ready-to-go regular expression. |
| 48 */ |
| 49 #define TEST_SETUP(pattern, testString, flags) { \ |
| 50 UChar *srcString = NULL; \ |
| 51 status = U_ZERO_ERROR; \ |
| 52 re = uregex_openC(pattern, flags, NULL, &status); \ |
| 53 TEST_ASSERT_SUCCESS(status); \ |
| 54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ |
| 55 u_uastrncpy(srcString, testString, strlen(testString)+1); \ |
| 56 uregex_setText(re, srcString, -1, &status); \ |
| 57 TEST_ASSERT_SUCCESS(status); \ |
| 58 if (U_SUCCESS(status)) { |
| 59 |
| 60 #define TEST_TEARDOWN \ |
| 61 } \ |
| 62 TEST_ASSERT_SUCCESS(status); \ |
| 63 uregex_close(re); \ |
| 64 free(srcString); \ |
| 65 } |
| 66 |
| 67 |
| 68 /** |
| 69 * @param expected utf-8 array of bytes to be expected |
| 70 */ |
| 71 static void test_assert_string(const char *expected, const UChar *actual, UBool
nulTerm, const char *file, int line) { |
| 72 char buf_inside_macro[120]; |
| 73 int32_t len = (int32_t)strlen(expected); |
| 74 UBool success; |
| 75 if (nulTerm) { |
| 76 u_austrncpy(buf_inside_macro, (actual), len+1); |
| 77 buf_inside_macro[len+2] = 0; |
| 78 success = (strcmp((expected), buf_inside_macro) == 0); |
| 79 } else { |
| 80 u_austrncpy(buf_inside_macro, (actual), len); |
| 81 buf_inside_macro[len+1] = 0; |
| 82 success = (strncmp((expected), buf_inside_macro, len) == 0); |
| 83 } |
| 84 if (success == FALSE) { |
| 85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", |
| 86 file, line, (expected), buf_inside_macro); |
| 87 } |
| 88 } |
| 89 |
| 90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expecte
d, actual, nulTerm, __FILE__, __LINE__) |
| 91 |
| 92 |
| 93 static void test_assert_utext(const char *expected, UText *actual, const char *f
ile, int line) { |
| 94 UErrorCode status = U_ZERO_ERROR; |
| 95 UText expectedText = UTEXT_INITIALIZER; |
| 96 utext_openUTF8(&expectedText, expected, -1, &status); |
| 97 utext_setNativeIndex(actual, 0); |
| 98 if (utext_compare(&expectedText, -1, actual, -1) != 0) { |
| 99 UChar32 c; |
| 100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, li
ne, expected); |
| 101 c = utext_next32From(actual, 0); |
| 102 while (c != U_SENTINEL) { |
| 103 if (0x20<c && c <0x7e) { |
| 104 log_err("%c", c); |
| 105 } else { |
| 106 log_err("%#x", c); |
| 107 } |
| 108 c = UTEXT_NEXT32(actual); |
| 109 } |
| 110 log_err("\"\n"); |
| 111 } |
| 112 utext_close(&expectedText); |
| 113 } |
| 114 |
| 115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual,
__FILE__, __LINE__) |
| 116 |
| 117 |
| 118 |
| 119 static void TestRegexCAPI(void); |
| 120 static void TestBug4315(void); |
| 121 static void TestUTextAPI(void); |
| 122 |
| 123 void addURegexTest(TestNode** root); |
| 124 |
| 125 void addURegexTest(TestNode** root) |
| 126 { |
| 127 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); |
| 128 addTest(root, &TestBug4315, "regex/TestBug4315"); |
| 129 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); |
| 130 } |
| 131 |
| 132 /* |
| 133 * Call back function and context struct used for testing |
| 134 * regular expression user callbacks. This test is mostly the same as |
| 135 * the corresponding C++ test in intltest. |
| 136 */ |
| 137 typedef struct callBackContext { |
| 138 int32_t maxCalls; |
| 139 int32_t numCalls; |
| 140 int32_t lastSteps; |
| 141 } callBackContext; |
| 142 |
| 143 static UBool U_EXPORT2 U_CALLCONV |
| 144 TestCallbackFn(const void *context, int32_t steps) { |
| 145 callBackContext *info = (callBackContext *)context; |
| 146 if (info->lastSteps+1 != steps) { |
| 147 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastS
teps+1, steps); |
| 148 } |
| 149 info->lastSteps = steps; |
| 150 info->numCalls++; |
| 151 return (info->numCalls < info->maxCalls); |
| 152 } |
| 153 |
| 154 /* |
| 155 * Regular Expression C API Tests |
| 156 */ |
| 157 static void TestRegexCAPI(void) { |
| 158 UErrorCode status = U_ZERO_ERROR; |
| 159 URegularExpression *re; |
| 160 UChar pat[200]; |
| 161 UChar *minus1; |
| 162 |
| 163 memset(&minus1, -1, sizeof(minus1)); |
| 164 |
| 165 /* Mimimalist open/close */ |
| 166 u_uastrncpy(pat, "abc*", sizeof(pat)/2); |
| 167 re = uregex_open(pat, -1, 0, 0, &status); |
| 168 if (U_FAILURE(status)) { |
| 169 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\"
(Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); |
| 170 return; |
| 171 } |
| 172 uregex_close(re); |
| 173 |
| 174 /* Open with all flag values set */ |
| 175 status = U_ZERO_ERROR; |
| 176 re = uregex_open(pat, -1, |
| 177 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, |
| 178 0, &status); |
| 179 TEST_ASSERT_SUCCESS(status); |
| 180 uregex_close(re); |
| 181 |
| 182 /* Open with an invalid flag */ |
| 183 status = U_ZERO_ERROR; |
| 184 re = uregex_open(pat, -1, 0x40000000, 0, &status); |
| 185 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); |
| 186 uregex_close(re); |
| 187 |
| 188 /* Open with an unimplemented flag */ |
| 189 status = U_ZERO_ERROR; |
| 190 re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status); |
| 191 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); |
| 192 uregex_close(re); |
| 193 |
| 194 /* openC with an invalid parameter */ |
| 195 status = U_ZERO_ERROR; |
| 196 re = uregex_openC(NULL, |
| 197 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); |
| 198 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); |
| 199 |
| 200 /* openC with an invalid parameter */ |
| 201 status = U_USELESS_COLLATOR_ERROR; |
| 202 re = uregex_openC(NULL, |
| 203 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); |
| 204 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); |
| 205 |
| 206 /* openC open from a C string */ |
| 207 { |
| 208 const UChar *p; |
| 209 int32_t len; |
| 210 status = U_ZERO_ERROR; |
| 211 re = uregex_openC("abc*", 0, 0, &status); |
| 212 TEST_ASSERT_SUCCESS(status); |
| 213 p = uregex_pattern(re, &len, &status); |
| 214 TEST_ASSERT_SUCCESS(status); |
| 215 |
| 216 /* The TEST_ASSERT_SUCCESS above should change too... */ |
| 217 if(U_SUCCESS(status)) { |
| 218 u_uastrncpy(pat, "abc*", sizeof(pat)/2); |
| 219 TEST_ASSERT(u_strcmp(pat, p) == 0); |
| 220 TEST_ASSERT(len==(int32_t)strlen("abc*")); |
| 221 } |
| 222 |
| 223 uregex_close(re); |
| 224 |
| 225 /* TODO: Open with ParseError parameter */ |
| 226 } |
| 227 |
| 228 /* |
| 229 * clone |
| 230 */ |
| 231 { |
| 232 URegularExpression *clone1; |
| 233 URegularExpression *clone2; |
| 234 URegularExpression *clone3; |
| 235 UChar testString1[30]; |
| 236 UChar testString2[30]; |
| 237 UBool result; |
| 238 |
| 239 |
| 240 status = U_ZERO_ERROR; |
| 241 re = uregex_openC("abc*", 0, 0, &status); |
| 242 TEST_ASSERT_SUCCESS(status); |
| 243 clone1 = uregex_clone(re, &status); |
| 244 TEST_ASSERT_SUCCESS(status); |
| 245 TEST_ASSERT(clone1 != NULL); |
| 246 |
| 247 status = U_ZERO_ERROR; |
| 248 clone2 = uregex_clone(re, &status); |
| 249 TEST_ASSERT_SUCCESS(status); |
| 250 TEST_ASSERT(clone2 != NULL); |
| 251 uregex_close(re); |
| 252 |
| 253 status = U_ZERO_ERROR; |
| 254 clone3 = uregex_clone(clone2, &status); |
| 255 TEST_ASSERT_SUCCESS(status); |
| 256 TEST_ASSERT(clone3 != NULL); |
| 257 |
| 258 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); |
| 259 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); |
| 260 |
| 261 status = U_ZERO_ERROR; |
| 262 uregex_setText(clone1, testString1, -1, &status); |
| 263 TEST_ASSERT_SUCCESS(status); |
| 264 result = uregex_lookingAt(clone1, 0, &status); |
| 265 TEST_ASSERT_SUCCESS(status); |
| 266 TEST_ASSERT(result==TRUE); |
| 267 |
| 268 status = U_ZERO_ERROR; |
| 269 uregex_setText(clone2, testString2, -1, &status); |
| 270 TEST_ASSERT_SUCCESS(status); |
| 271 result = uregex_lookingAt(clone2, 0, &status); |
| 272 TEST_ASSERT_SUCCESS(status); |
| 273 TEST_ASSERT(result==FALSE); |
| 274 result = uregex_find(clone2, 0, &status); |
| 275 TEST_ASSERT_SUCCESS(status); |
| 276 TEST_ASSERT(result==TRUE); |
| 277 |
| 278 uregex_close(clone1); |
| 279 uregex_close(clone2); |
| 280 uregex_close(clone3); |
| 281 |
| 282 } |
| 283 |
| 284 /* |
| 285 * pattern() |
| 286 */ |
| 287 { |
| 288 const UChar *resultPat; |
| 289 int32_t resultLen; |
| 290 u_uastrncpy(pat, "hello", sizeof(pat)/2); |
| 291 status = U_ZERO_ERROR; |
| 292 re = uregex_open(pat, -1, 0, NULL, &status); |
| 293 resultPat = uregex_pattern(re, &resultLen, &status); |
| 294 TEST_ASSERT_SUCCESS(status); |
| 295 |
| 296 /* The TEST_ASSERT_SUCCESS above should change too... */ |
| 297 if (U_SUCCESS(status)) { |
| 298 TEST_ASSERT(resultLen == -1); |
| 299 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); |
| 300 } |
| 301 |
| 302 uregex_close(re); |
| 303 |
| 304 status = U_ZERO_ERROR; |
| 305 re = uregex_open(pat, 3, 0, NULL, &status); |
| 306 resultPat = uregex_pattern(re, &resultLen, &status); |
| 307 TEST_ASSERT_SUCCESS(status); |
| 308 TEST_ASSERT_SUCCESS(status); |
| 309 |
| 310 /* The TEST_ASSERT_SUCCESS above should change too... */ |
| 311 if (U_SUCCESS(status)) { |
| 312 TEST_ASSERT(resultLen == 3); |
| 313 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); |
| 314 TEST_ASSERT(u_strlen(resultPat) == 3); |
| 315 } |
| 316 |
| 317 uregex_close(re); |
| 318 } |
| 319 |
| 320 /* |
| 321 * flags() |
| 322 */ |
| 323 { |
| 324 int32_t t; |
| 325 |
| 326 status = U_ZERO_ERROR; |
| 327 re = uregex_open(pat, -1, 0, NULL, &status); |
| 328 t = uregex_flags(re, &status); |
| 329 TEST_ASSERT_SUCCESS(status); |
| 330 TEST_ASSERT(t == 0); |
| 331 uregex_close(re); |
| 332 |
| 333 status = U_ZERO_ERROR; |
| 334 re = uregex_open(pat, -1, 0, NULL, &status); |
| 335 t = uregex_flags(re, &status); |
| 336 TEST_ASSERT_SUCCESS(status); |
| 337 TEST_ASSERT(t == 0); |
| 338 uregex_close(re); |
| 339 |
| 340 status = U_ZERO_ERROR; |
| 341 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL,
&status); |
| 342 t = uregex_flags(re, &status); |
| 343 TEST_ASSERT_SUCCESS(status); |
| 344 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); |
| 345 uregex_close(re); |
| 346 } |
| 347 |
| 348 /* |
| 349 * setText() and lookingAt() |
| 350 */ |
| 351 { |
| 352 UChar text1[50]; |
| 353 UChar text2[50]; |
| 354 UBool result; |
| 355 |
| 356 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); |
| 357 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); |
| 358 status = U_ZERO_ERROR; |
| 359 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); |
| 360 re = uregex_open(pat, -1, 0, NULL, &status); |
| 361 TEST_ASSERT_SUCCESS(status); |
| 362 |
| 363 /* Operation before doing a setText should fail... */ |
| 364 status = U_ZERO_ERROR; |
| 365 uregex_lookingAt(re, 0, &status); |
| 366 TEST_ASSERT( status== U_REGEX_INVALID_STATE); |
| 367 |
| 368 status = U_ZERO_ERROR; |
| 369 uregex_setText(re, text1, -1, &status); |
| 370 result = uregex_lookingAt(re, 0, &status); |
| 371 TEST_ASSERT(result == TRUE); |
| 372 TEST_ASSERT_SUCCESS(status); |
| 373 |
| 374 status = U_ZERO_ERROR; |
| 375 uregex_setText(re, text2, -1, &status); |
| 376 result = uregex_lookingAt(re, 0, &status); |
| 377 TEST_ASSERT(result == FALSE); |
| 378 TEST_ASSERT_SUCCESS(status); |
| 379 |
| 380 status = U_ZERO_ERROR; |
| 381 uregex_setText(re, text1, -1, &status); |
| 382 result = uregex_lookingAt(re, 0, &status); |
| 383 TEST_ASSERT(result == TRUE); |
| 384 TEST_ASSERT_SUCCESS(status); |
| 385 |
| 386 status = U_ZERO_ERROR; |
| 387 uregex_setText(re, text1, 5, &status); |
| 388 result = uregex_lookingAt(re, 0, &status); |
| 389 TEST_ASSERT(result == FALSE); |
| 390 TEST_ASSERT_SUCCESS(status); |
| 391 |
| 392 status = U_ZERO_ERROR; |
| 393 uregex_setText(re, text1, 6, &status); |
| 394 result = uregex_lookingAt(re, 0, &status); |
| 395 TEST_ASSERT(result == TRUE); |
| 396 TEST_ASSERT_SUCCESS(status); |
| 397 |
| 398 uregex_close(re); |
| 399 } |
| 400 |
| 401 |
| 402 /* |
| 403 * getText() |
| 404 */ |
| 405 { |
| 406 UChar text1[50]; |
| 407 UChar text2[50]; |
| 408 const UChar *result; |
| 409 int32_t textLength; |
| 410 |
| 411 u_uastrncpy(text1, "abcccd", sizeof(text1)/2); |
| 412 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); |
| 413 status = U_ZERO_ERROR; |
| 414 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); |
| 415 re = uregex_open(pat, -1, 0, NULL, &status); |
| 416 |
| 417 uregex_setText(re, text1, -1, &status); |
| 418 result = uregex_getText(re, &textLength, &status); |
| 419 TEST_ASSERT(result == text1); |
| 420 TEST_ASSERT(textLength == -1); |
| 421 TEST_ASSERT_SUCCESS(status); |
| 422 |
| 423 status = U_ZERO_ERROR; |
| 424 uregex_setText(re, text2, 7, &status); |
| 425 result = uregex_getText(re, &textLength, &status); |
| 426 TEST_ASSERT(result == text2); |
| 427 TEST_ASSERT(textLength == 7); |
| 428 TEST_ASSERT_SUCCESS(status); |
| 429 |
| 430 status = U_ZERO_ERROR; |
| 431 uregex_setText(re, text2, 4, &status); |
| 432 result = uregex_getText(re, &textLength, &status); |
| 433 TEST_ASSERT(result == text2); |
| 434 TEST_ASSERT(textLength == 4); |
| 435 TEST_ASSERT_SUCCESS(status); |
| 436 uregex_close(re); |
| 437 } |
| 438 |
| 439 /* |
| 440 * matches() |
| 441 */ |
| 442 { |
| 443 UChar text1[50]; |
| 444 UBool result; |
| 445 int len; |
| 446 UChar nullString[] = {0,0,0}; |
| 447 |
| 448 u_uastrncpy(text1, "abcccde", sizeof(text1)/2); |
| 449 status = U_ZERO_ERROR; |
| 450 u_uastrncpy(pat, "abc*d", sizeof(pat)/2); |
| 451 re = uregex_open(pat, -1, 0, NULL, &status); |
| 452 |
| 453 uregex_setText(re, text1, -1, &status); |
| 454 result = uregex_matches(re, 0, &status); |
| 455 TEST_ASSERT(result == FALSE); |
| 456 TEST_ASSERT_SUCCESS(status); |
| 457 |
| 458 status = U_ZERO_ERROR; |
| 459 uregex_setText(re, text1, 6, &status); |
| 460 result = uregex_matches(re, 0, &status); |
| 461 TEST_ASSERT(result == TRUE); |
| 462 TEST_ASSERT_SUCCESS(status); |
| 463 |
| 464 status = U_ZERO_ERROR; |
| 465 uregex_setText(re, text1, 6, &status); |
| 466 result = uregex_matches(re, 1, &status); |
| 467 TEST_ASSERT(result == FALSE); |
| 468 TEST_ASSERT_SUCCESS(status); |
| 469 uregex_close(re); |
| 470 |
| 471 status = U_ZERO_ERROR; |
| 472 re = uregex_openC(".?", 0, NULL, &status); |
| 473 uregex_setText(re, text1, -1, &status); |
| 474 len = u_strlen(text1); |
| 475 result = uregex_matches(re, len, &status); |
| 476 TEST_ASSERT(result == TRUE); |
| 477 TEST_ASSERT_SUCCESS(status); |
| 478 |
| 479 status = U_ZERO_ERROR; |
| 480 uregex_setText(re, nullString, -1, &status); |
| 481 TEST_ASSERT_SUCCESS(status); |
| 482 result = uregex_matches(re, 0, &status); |
| 483 TEST_ASSERT(result == TRUE); |
| 484 TEST_ASSERT_SUCCESS(status); |
| 485 uregex_close(re); |
| 486 } |
| 487 |
| 488 |
| 489 /* |
| 490 * lookingAt() Used in setText test. |
| 491 */ |
| 492 |
| 493 |
| 494 /* |
| 495 * find(), findNext, start, end, reset |
| 496 */ |
| 497 { |
| 498 UChar text1[50]; |
| 499 UBool result; |
| 500 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); |
| 501 status = U_ZERO_ERROR; |
| 502 re = uregex_openC("rx", 0, NULL, &status); |
| 503 |
| 504 uregex_setText(re, text1, -1, &status); |
| 505 result = uregex_find(re, 0, &status); |
| 506 TEST_ASSERT(result == TRUE); |
| 507 TEST_ASSERT(uregex_start(re, 0, &status) == 3); |
| 508 TEST_ASSERT(uregex_end(re, 0, &status) == 5); |
| 509 TEST_ASSERT_SUCCESS(status); |
| 510 |
| 511 result = uregex_find(re, 9, &status); |
| 512 TEST_ASSERT(result == TRUE); |
| 513 TEST_ASSERT(uregex_start(re, 0, &status) == 11); |
| 514 TEST_ASSERT(uregex_end(re, 0, &status) == 13); |
| 515 TEST_ASSERT_SUCCESS(status); |
| 516 |
| 517 result = uregex_find(re, 14, &status); |
| 518 TEST_ASSERT(result == FALSE); |
| 519 TEST_ASSERT_SUCCESS(status); |
| 520 |
| 521 status = U_ZERO_ERROR; |
| 522 uregex_reset(re, 0, &status); |
| 523 |
| 524 result = uregex_findNext(re, &status); |
| 525 TEST_ASSERT(result == TRUE); |
| 526 TEST_ASSERT(uregex_start(re, 0, &status) == 3); |
| 527 TEST_ASSERT(uregex_end(re, 0, &status) == 5); |
| 528 TEST_ASSERT_SUCCESS(status); |
| 529 |
| 530 result = uregex_findNext(re, &status); |
| 531 TEST_ASSERT(result == TRUE); |
| 532 TEST_ASSERT(uregex_start(re, 0, &status) == 6); |
| 533 TEST_ASSERT(uregex_end(re, 0, &status) == 8); |
| 534 TEST_ASSERT_SUCCESS(status); |
| 535 |
| 536 status = U_ZERO_ERROR; |
| 537 uregex_reset(re, 12, &status); |
| 538 |
| 539 result = uregex_findNext(re, &status); |
| 540 TEST_ASSERT(result == TRUE); |
| 541 TEST_ASSERT(uregex_start(re, 0, &status) == 13); |
| 542 TEST_ASSERT(uregex_end(re, 0, &status) == 15); |
| 543 TEST_ASSERT_SUCCESS(status); |
| 544 |
| 545 result = uregex_findNext(re, &status); |
| 546 TEST_ASSERT(result == FALSE); |
| 547 TEST_ASSERT_SUCCESS(status); |
| 548 |
| 549 uregex_close(re); |
| 550 } |
| 551 |
| 552 /* |
| 553 * groupCount |
| 554 */ |
| 555 { |
| 556 int32_t result; |
| 557 |
| 558 status = U_ZERO_ERROR; |
| 559 re = uregex_openC("abc", 0, NULL, &status); |
| 560 result = uregex_groupCount(re, &status); |
| 561 TEST_ASSERT_SUCCESS(status); |
| 562 TEST_ASSERT(result == 0); |
| 563 uregex_close(re); |
| 564 |
| 565 status = U_ZERO_ERROR; |
| 566 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); |
| 567 result = uregex_groupCount(re, &status); |
| 568 TEST_ASSERT_SUCCESS(status); |
| 569 TEST_ASSERT(result == 3); |
| 570 uregex_close(re); |
| 571 |
| 572 } |
| 573 |
| 574 |
| 575 /* |
| 576 * group() |
| 577 */ |
| 578 { |
| 579 UChar text1[80]; |
| 580 UChar buf[80]; |
| 581 UBool result; |
| 582 int32_t resultSz; |
| 583 u_uastrncpy(text1, "noise abc interior def, and this is off the end", s
izeof(text1)/2); |
| 584 |
| 585 status = U_ZERO_ERROR; |
| 586 re = uregex_openC("abc(.*?)def", 0, NULL, &status); |
| 587 TEST_ASSERT_SUCCESS(status); |
| 588 |
| 589 |
| 590 uregex_setText(re, text1, -1, &status); |
| 591 result = uregex_find(re, 0, &status); |
| 592 TEST_ASSERT(result==TRUE); |
| 593 |
| 594 /* Capture Group 0, the full match. Should succeed. */ |
| 595 status = U_ZERO_ERROR; |
| 596 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); |
| 597 TEST_ASSERT_SUCCESS(status); |
| 598 TEST_ASSERT_STRING("abc interior def", buf, TRUE); |
| 599 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); |
| 600 |
| 601 /* Capture group #1. Should succeed. */ |
| 602 status = U_ZERO_ERROR; |
| 603 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); |
| 604 TEST_ASSERT_SUCCESS(status); |
| 605 TEST_ASSERT_STRING(" interior ", buf, TRUE); |
| 606 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); |
| 607 |
| 608 /* Capture group out of range. Error. */ |
| 609 status = U_ZERO_ERROR; |
| 610 uregex_group(re, 2, buf, sizeof(buf)/2, &status); |
| 611 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 612 |
| 613 /* NULL buffer, pure pre-flight */ |
| 614 status = U_ZERO_ERROR; |
| 615 resultSz = uregex_group(re, 0, NULL, 0, &status); |
| 616 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 617 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); |
| 618 |
| 619 /* Too small buffer, truncated string */ |
| 620 status = U_ZERO_ERROR; |
| 621 memset(buf, -1, sizeof(buf)); |
| 622 resultSz = uregex_group(re, 0, buf, 5, &status); |
| 623 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 624 TEST_ASSERT_STRING("abc i", buf, FALSE); |
| 625 TEST_ASSERT(buf[5] == (UChar)0xffff); |
| 626 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); |
| 627 |
| 628 /* Output string just fits buffer, no NUL term. */ |
| 629 status = U_ZERO_ERROR; |
| 630 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"),
&status); |
| 631 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); |
| 632 TEST_ASSERT_STRING("abc interior def", buf, FALSE); |
| 633 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); |
| 634 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); |
| 635 |
| 636 uregex_close(re); |
| 637 |
| 638 } |
| 639 |
| 640 /* |
| 641 * Regions |
| 642 */ |
| 643 |
| 644 |
| 645 /* SetRegion(), getRegion() do something */ |
| 646 TEST_SETUP(".*", "0123456789ABCDEF", 0) |
| 647 UChar resultString[40]; |
| 648 TEST_ASSERT(uregex_regionStart(re, &status) == 0); |
| 649 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); |
| 650 uregex_setRegion(re, 3, 6, &status); |
| 651 TEST_ASSERT(uregex_regionStart(re, &status) == 3); |
| 652 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); |
| 653 TEST_ASSERT(uregex_findNext(re, &status)); |
| 654 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &s
tatus) == 3) |
| 655 TEST_ASSERT_STRING("345", resultString, TRUE); |
| 656 TEST_TEARDOWN; |
| 657 |
| 658 /* find(start=-1) uses regions */ |
| 659 TEST_SETUP(".*", "0123456789ABCDEF", 0); |
| 660 uregex_setRegion(re, 4, 6, &status); |
| 661 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); |
| 662 TEST_ASSERT(uregex_start(re, 0, &status) == 4); |
| 663 TEST_ASSERT(uregex_end(re, 0, &status) == 6); |
| 664 TEST_TEARDOWN; |
| 665 |
| 666 /* find (start >=0) does not use regions */ |
| 667 TEST_SETUP(".*", "0123456789ABCDEF", 0); |
| 668 uregex_setRegion(re, 4, 6, &status); |
| 669 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); |
| 670 TEST_ASSERT(uregex_start(re, 0, &status) == 0); |
| 671 TEST_ASSERT(uregex_end(re, 0, &status) == 16); |
| 672 TEST_TEARDOWN; |
| 673 |
| 674 /* findNext() obeys regions */ |
| 675 TEST_SETUP(".", "0123456789ABCDEF", 0); |
| 676 uregex_setRegion(re, 4, 6, &status); |
| 677 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); |
| 678 TEST_ASSERT(uregex_start(re, 0, &status) == 4); |
| 679 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); |
| 680 TEST_ASSERT(uregex_start(re, 0, &status) == 5); |
| 681 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); |
| 682 TEST_TEARDOWN; |
| 683 |
| 684 /* matches(start=-1) uses regions
*/ |
| 685 /* Also, verify that non-greedy *? succeeds in finding the full match
. */ |
| 686 TEST_SETUP(".*?", "0123456789ABCDEF", 0); |
| 687 uregex_setRegion(re, 4, 6, &status); |
| 688 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); |
| 689 TEST_ASSERT(uregex_start(re, 0, &status) == 4); |
| 690 TEST_ASSERT(uregex_end(re, 0, &status) == 6); |
| 691 TEST_TEARDOWN; |
| 692 |
| 693 /* matches (start >=0) does not use regions */ |
| 694 TEST_SETUP(".*?", "0123456789ABCDEF", 0); |
| 695 uregex_setRegion(re, 4, 6, &status); |
| 696 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); |
| 697 TEST_ASSERT(uregex_start(re, 0, &status) == 0); |
| 698 TEST_ASSERT(uregex_end(re, 0, &status) == 16); |
| 699 TEST_TEARDOWN; |
| 700 |
| 701 /* lookingAt(start=-1) uses regions
*/ |
| 702 /* Also, verify that non-greedy *? finds the first (shortest) match.
*/ |
| 703 TEST_SETUP(".*?", "0123456789ABCDEF", 0); |
| 704 uregex_setRegion(re, 4, 6, &status); |
| 705 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); |
| 706 TEST_ASSERT(uregex_start(re, 0, &status) == 4); |
| 707 TEST_ASSERT(uregex_end(re, 0, &status) == 4); |
| 708 TEST_TEARDOWN; |
| 709 |
| 710 /* lookingAt (start >=0) does not use regions */ |
| 711 TEST_SETUP(".*?", "0123456789ABCDEF", 0); |
| 712 uregex_setRegion(re, 4, 6, &status); |
| 713 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); |
| 714 TEST_ASSERT(uregex_start(re, 0, &status) == 0); |
| 715 TEST_ASSERT(uregex_end(re, 0, &status) == 0); |
| 716 TEST_TEARDOWN; |
| 717 |
| 718 /* hitEnd() */ |
| 719 TEST_SETUP("[a-f]*", "abcdefghij", 0); |
| 720 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); |
| 721 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); |
| 722 TEST_TEARDOWN; |
| 723 |
| 724 TEST_SETUP("[a-f]*", "abcdef", 0); |
| 725 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); |
| 726 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); |
| 727 TEST_TEARDOWN; |
| 728 |
| 729 /* requireEnd */ |
| 730 TEST_SETUP("abcd", "abcd", 0); |
| 731 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); |
| 732 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); |
| 733 TEST_TEARDOWN; |
| 734 |
| 735 TEST_SETUP("abcd$", "abcd", 0); |
| 736 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); |
| 737 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); |
| 738 TEST_TEARDOWN; |
| 739 |
| 740 /* anchoringBounds */ |
| 741 TEST_SETUP("abc$", "abcdef", 0); |
| 742 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); |
| 743 uregex_useAnchoringBounds(re, FALSE, &status); |
| 744 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); |
| 745 |
| 746 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); |
| 747 uregex_useAnchoringBounds(re, TRUE, &status); |
| 748 uregex_setRegion(re, 0, 3, &status); |
| 749 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); |
| 750 TEST_ASSERT(uregex_end(re, 0, &status) == 3); |
| 751 TEST_TEARDOWN; |
| 752 |
| 753 /* Transparent Bounds */ |
| 754 TEST_SETUP("abc(?=def)", "abcdef", 0); |
| 755 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); |
| 756 uregex_useTransparentBounds(re, TRUE, &status); |
| 757 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); |
| 758 |
| 759 uregex_useTransparentBounds(re, FALSE, &status); |
| 760 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ |
| 761 uregex_setRegion(re, 0, 3, &status); |
| 762 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, op
aque bounds */ |
| 763 uregex_useTransparentBounds(re, TRUE, &status); |
| 764 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, tr
ansparent bounds */ |
| 765 TEST_ASSERT(uregex_end(re, 0, &status) == 3); |
| 766 TEST_TEARDOWN; |
| 767 |
| 768 |
| 769 /* |
| 770 * replaceFirst() |
| 771 */ |
| 772 { |
| 773 UChar text1[80]; |
| 774 UChar text2[80]; |
| 775 UChar replText[80]; |
| 776 UChar buf[80]; |
| 777 int32_t resultSz; |
| 778 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
| 779 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 780 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); |
| 781 |
| 782 status = U_ZERO_ERROR; |
| 783 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
| 784 TEST_ASSERT_SUCCESS(status); |
| 785 |
| 786 /* Normal case, with match */ |
| 787 uregex_setText(re, text1, -1, &status); |
| 788 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &st
atus); |
| 789 TEST_ASSERT_SUCCESS(status); |
| 790 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); |
| 791 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); |
| 792 |
| 793 /* No match. Text should copy to output with no changes. */ |
| 794 status = U_ZERO_ERROR; |
| 795 uregex_setText(re, text2, -1, &status); |
| 796 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &st
atus); |
| 797 TEST_ASSERT_SUCCESS(status); |
| 798 TEST_ASSERT_STRING("No match here.", buf, TRUE); |
| 799 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); |
| 800 |
| 801 /* Match, output just fills buffer, no termination warning. */ |
| 802 status = U_ZERO_ERROR; |
| 803 uregex_setText(re, text1, -1, &status); |
| 804 memset(buf, -1, sizeof(buf)); |
| 805 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x."), &status); |
| 806 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); |
| 807 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); |
| 808 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); |
| 809 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); |
| 810 |
| 811 /* Do the replaceFirst again, without first resetting anything. |
| 812 * Should give the same results. |
| 813 */ |
| 814 status = U_ZERO_ERROR; |
| 815 memset(buf, -1, sizeof(buf)); |
| 816 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x."), &status); |
| 817 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); |
| 818 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); |
| 819 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); |
| 820 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); |
| 821 |
| 822 /* NULL buffer, zero buffer length */ |
| 823 status = U_ZERO_ERROR; |
| 824 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); |
| 825 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 826 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); |
| 827 |
| 828 /* Buffer too small by one */ |
| 829 status = U_ZERO_ERROR; |
| 830 memset(buf, -1, sizeof(buf)); |
| 831 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x.")-1, &status); |
| 832 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 833 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); |
| 834 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); |
| 835 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); |
| 836 |
| 837 uregex_close(re); |
| 838 } |
| 839 |
| 840 |
| 841 /* |
| 842 * replaceAll() |
| 843 */ |
| 844 { |
| 845 UChar text1[80]; /* "Replace xaax x1x x...x." */ |
| 846 UChar text2[80]; /* "No match Here" */ |
| 847 UChar replText[80]; /* "<$1>" */ |
| 848 UChar replText2[80]; /* "<<$1>>" */ |
| 849 const char * pattern = "x(.*?)x"; |
| 850 const char * expectedResult = "Replace <aa> <1> <...>."; |
| 851 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; |
| 852 UChar buf[80]; |
| 853 int32_t resultSize; |
| 854 int32_t expectedResultSize; |
| 855 int32_t expectedResultSize2; |
| 856 int32_t i; |
| 857 |
| 858 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
| 859 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 860 u_uastrncpy(replText, "<$1>", sizeof(replText)/2); |
| 861 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); |
| 862 expectedResultSize = strlen(expectedResult); |
| 863 expectedResultSize2 = strlen(expectedResult2); |
| 864 |
| 865 status = U_ZERO_ERROR; |
| 866 re = uregex_openC(pattern, 0, NULL, &status); |
| 867 TEST_ASSERT_SUCCESS(status); |
| 868 |
| 869 /* Normal case, with match */ |
| 870 uregex_setText(re, text1, -1, &status); |
| 871 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &st
atus); |
| 872 TEST_ASSERT_SUCCESS(status); |
| 873 TEST_ASSERT_STRING(expectedResult, buf, TRUE); |
| 874 TEST_ASSERT(resultSize == expectedResultSize); |
| 875 |
| 876 /* No match. Text should copy to output with no changes. */ |
| 877 status = U_ZERO_ERROR; |
| 878 uregex_setText(re, text2, -1, &status); |
| 879 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &st
atus); |
| 880 TEST_ASSERT_SUCCESS(status); |
| 881 TEST_ASSERT_STRING("No match here.", buf, TRUE); |
| 882 TEST_ASSERT(resultSize == u_strlen(text2)); |
| 883 |
| 884 /* Match, output just fills buffer, no termination warning. */ |
| 885 status = U_ZERO_ERROR; |
| 886 uregex_setText(re, text1, -1, &status); |
| 887 memset(buf, -1, sizeof(buf)); |
| 888 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize
, &status); |
| 889 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); |
| 890 TEST_ASSERT_STRING(expectedResult, buf, FALSE); |
| 891 TEST_ASSERT(resultSize == expectedResultSize); |
| 892 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); |
| 893 |
| 894 /* Do the replaceFirst again, without first resetting anything. |
| 895 * Should give the same results. |
| 896 */ |
| 897 status = U_ZERO_ERROR; |
| 898 memset(buf, -1, sizeof(buf)); |
| 899 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xa
ax x1x x...x."), &status); |
| 900 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); |
| 901 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); |
| 902 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); |
| 903 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); |
| 904 |
| 905 /* NULL buffer, zero buffer length */ |
| 906 status = U_ZERO_ERROR; |
| 907 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); |
| 908 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 909 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); |
| 910 |
| 911 /* Buffer too small. Try every size, which will tickle edge cases |
| 912 * in uregex_appendReplacement (used by replaceAll) */ |
| 913 for (i=0; i<expectedResultSize; i++) { |
| 914 char expected[80]; |
| 915 status = U_ZERO_ERROR; |
| 916 memset(buf, -1, sizeof(buf)); |
| 917 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); |
| 918 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 919 strcpy(expected, expectedResult); |
| 920 expected[i] = 0; |
| 921 TEST_ASSERT_STRING(expected, buf, FALSE); |
| 922 TEST_ASSERT(resultSize == expectedResultSize); |
| 923 TEST_ASSERT(buf[i] == (UChar)0xffff); |
| 924 } |
| 925 |
| 926 /* Buffer too small. Same as previous test, except this time the replac
ement |
| 927 * text is longer than the match capture group, making the length of the
complete |
| 928 * replacement longer than the original string. |
| 929 */ |
| 930 for (i=0; i<expectedResultSize2; i++) { |
| 931 char expected[80]; |
| 932 status = U_ZERO_ERROR; |
| 933 memset(buf, -1, sizeof(buf)); |
| 934 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); |
| 935 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 936 strcpy(expected, expectedResult2); |
| 937 expected[i] = 0; |
| 938 TEST_ASSERT_STRING(expected, buf, FALSE); |
| 939 TEST_ASSERT(resultSize == expectedResultSize2); |
| 940 TEST_ASSERT(buf[i] == (UChar)0xffff); |
| 941 } |
| 942 |
| 943 |
| 944 uregex_close(re); |
| 945 } |
| 946 |
| 947 |
| 948 /* |
| 949 * appendReplacement() |
| 950 */ |
| 951 { |
| 952 UChar text[100]; |
| 953 UChar repl[100]; |
| 954 UChar buf[100]; |
| 955 UChar *bufPtr; |
| 956 int32_t bufCap; |
| 957 |
| 958 |
| 959 status = U_ZERO_ERROR; |
| 960 re = uregex_openC(".*", 0, 0, &status); |
| 961 TEST_ASSERT_SUCCESS(status); |
| 962 |
| 963 u_uastrncpy(text, "whatever", sizeof(text)/2); |
| 964 u_uastrncpy(repl, "some other", sizeof(repl)/2); |
| 965 uregex_setText(re, text, -1, &status); |
| 966 |
| 967 /* match covers whole target string */ |
| 968 uregex_find(re, 0, &status); |
| 969 TEST_ASSERT_SUCCESS(status); |
| 970 bufPtr = buf; |
| 971 bufCap = sizeof(buf) / 2; |
| 972 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); |
| 973 TEST_ASSERT_SUCCESS(status); |
| 974 TEST_ASSERT_STRING("some other", buf, TRUE); |
| 975 |
| 976 /* Match has \u \U escapes */ |
| 977 uregex_find(re, 0, &status); |
| 978 TEST_ASSERT_SUCCESS(status); |
| 979 bufPtr = buf; |
| 980 bufCap = sizeof(buf) / 2; |
| 981 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); |
| 982 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); |
| 983 TEST_ASSERT_SUCCESS(status); |
| 984 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); |
| 985 |
| 986 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. *
/ |
| 987 status = U_ZERO_ERROR; |
| 988 uregex_find(re, 0, &status); |
| 989 TEST_ASSERT_SUCCESS(status); |
| 990 bufPtr = buf; |
| 991 status = U_BUFFER_OVERFLOW_ERROR; |
| 992 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); |
| 993 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 994 |
| 995 uregex_close(re); |
| 996 } |
| 997 |
| 998 |
| 999 /* |
| 1000 * appendTail(). Checked in ReplaceFirst(), replaceAll(). |
| 1001 */ |
| 1002 |
| 1003 /* |
| 1004 * split() |
| 1005 */ |
| 1006 { |
| 1007 UChar textToSplit[80]; |
| 1008 UChar text2[80]; |
| 1009 UChar buf[200]; |
| 1010 UChar *fields[10]; |
| 1011 int32_t numFields; |
| 1012 int32_t requiredCapacity; |
| 1013 int32_t spaceNeeded; |
| 1014 int32_t sz; |
| 1015 |
| 1016 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/
2); |
| 1017 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 1018 |
| 1019 status = U_ZERO_ERROR; |
| 1020 re = uregex_openC(":", 0, NULL, &status); |
| 1021 |
| 1022 |
| 1023 /* Simple split */ |
| 1024 |
| 1025 uregex_setText(re, textToSplit, -1, &status); |
| 1026 TEST_ASSERT_SUCCESS(status); |
| 1027 |
| 1028 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1029 if (U_SUCCESS(status)) { |
| 1030 memset(fields, -1, sizeof(fields)); |
| 1031 numFields = |
| 1032 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields,
10, &status); |
| 1033 TEST_ASSERT_SUCCESS(status); |
| 1034 |
| 1035 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1036 if(U_SUCCESS(status)) { |
| 1037 TEST_ASSERT(numFields == 3); |
| 1038 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1039 TEST_ASSERT_STRING(" second", fields[1], TRUE); |
| 1040 TEST_ASSERT_STRING(" third", fields[2], TRUE); |
| 1041 TEST_ASSERT(fields[3] == NULL); |
| 1042 |
| 1043 spaceNeeded = u_strlen(textToSplit) - |
| 1044 (numFields - 1) + /* Field delimiters do not appea
r in output */ |
| 1045 numFields; /* Each field gets a NUL termina
tor */ |
| 1046 |
| 1047 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1048 } |
| 1049 } |
| 1050 |
| 1051 uregex_close(re); |
| 1052 |
| 1053 |
| 1054 /* Split with too few output strings available */ |
| 1055 status = U_ZERO_ERROR; |
| 1056 re = uregex_openC(":", 0, NULL, &status); |
| 1057 uregex_setText(re, textToSplit, -1, &status); |
| 1058 TEST_ASSERT_SUCCESS(status); |
| 1059 |
| 1060 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1061 if(U_SUCCESS(status)) { |
| 1062 memset(fields, -1, sizeof(fields)); |
| 1063 numFields = |
| 1064 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields,
2, &status); |
| 1065 TEST_ASSERT_SUCCESS(status); |
| 1066 |
| 1067 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1068 if(U_SUCCESS(status)) { |
| 1069 TEST_ASSERT(numFields == 2); |
| 1070 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1071 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); |
| 1072 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); |
| 1073 |
| 1074 spaceNeeded = u_strlen(textToSplit) - |
| 1075 (numFields - 1) + /* Field delimiters do not appea
r in output */ |
| 1076 numFields; /* Each field gets a NUL termina
tor */ |
| 1077 |
| 1078 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1079 |
| 1080 /* Split with a range of output buffer sizes. */ |
| 1081 spaceNeeded = u_strlen(textToSplit) - |
| 1082 (numFields - 1) + /* Field delimiters do not appear in out
put */ |
| 1083 numFields; /* Each field gets a NUL terminator */ |
| 1084 |
| 1085 for (sz=0; sz < spaceNeeded+1; sz++) { |
| 1086 memset(fields, -1, sizeof(fields)); |
| 1087 status = U_ZERO_ERROR; |
| 1088 numFields = |
| 1089 uregex_split(re, buf, sz, &requiredCapacity, fields, 10,
&status); |
| 1090 if (sz >= spaceNeeded) { |
| 1091 TEST_ASSERT_SUCCESS(status); |
| 1092 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1093 TEST_ASSERT_STRING(" second", fields[1], TRUE); |
| 1094 TEST_ASSERT_STRING(" third", fields[2], TRUE); |
| 1095 } else { |
| 1096 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 1097 } |
| 1098 TEST_ASSERT(numFields == 3); |
| 1099 TEST_ASSERT(fields[3] == NULL); |
| 1100 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1101 } |
| 1102 } |
| 1103 } |
| 1104 |
| 1105 uregex_close(re); |
| 1106 } |
| 1107 |
| 1108 |
| 1109 |
| 1110 |
| 1111 /* Split(), part 2. Patterns with capture groups. The capture group text |
| 1112 * comes out as additional fields. */ |
| 1113 { |
| 1114 UChar textToSplit[80]; |
| 1115 UChar buf[200]; |
| 1116 UChar *fields[10]; |
| 1117 int32_t numFields; |
| 1118 int32_t requiredCapacity; |
| 1119 int32_t spaceNeeded; |
| 1120 int32_t sz; |
| 1121 |
| 1122 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(t
extToSplit)/2); |
| 1123 |
| 1124 status = U_ZERO_ERROR; |
| 1125 re = uregex_openC("<(.*?)>", 0, NULL, &status); |
| 1126 |
| 1127 uregex_setText(re, textToSplit, -1, &status); |
| 1128 TEST_ASSERT_SUCCESS(status); |
| 1129 |
| 1130 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1131 if(U_SUCCESS(status)) { |
| 1132 memset(fields, -1, sizeof(fields)); |
| 1133 numFields = |
| 1134 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields,
10, &status); |
| 1135 TEST_ASSERT_SUCCESS(status); |
| 1136 |
| 1137 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1138 if(U_SUCCESS(status)) { |
| 1139 TEST_ASSERT(numFields == 5); |
| 1140 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1141 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); |
| 1142 TEST_ASSERT_STRING(" second", fields[2], TRUE); |
| 1143 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); |
| 1144 TEST_ASSERT_STRING(" third", fields[4], TRUE); |
| 1145 TEST_ASSERT(fields[5] == NULL); |
| 1146 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /*
"." at NUL positions */ |
| 1147 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1148 } |
| 1149 } |
| 1150 |
| 1151 /* Split with too few output strings available (2) */ |
| 1152 status = U_ZERO_ERROR; |
| 1153 memset(fields, -1, sizeof(fields)); |
| 1154 numFields = |
| 1155 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &
status); |
| 1156 TEST_ASSERT_SUCCESS(status); |
| 1157 |
| 1158 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1159 if(U_SUCCESS(status)) { |
| 1160 TEST_ASSERT(numFields == 2); |
| 1161 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1162 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); |
| 1163 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); |
| 1164 |
| 1165 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NU
L positions */ |
| 1166 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1167 } |
| 1168 |
| 1169 /* Split with too few output strings available (3) */ |
| 1170 status = U_ZERO_ERROR; |
| 1171 memset(fields, -1, sizeof(fields)); |
| 1172 numFields = |
| 1173 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &
status); |
| 1174 TEST_ASSERT_SUCCESS(status); |
| 1175 |
| 1176 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1177 if(U_SUCCESS(status)) { |
| 1178 TEST_ASSERT(numFields == 3); |
| 1179 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1180 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); |
| 1181 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); |
| 1182 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); |
| 1183 |
| 1184 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "."
at NUL positions */ |
| 1185 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1186 } |
| 1187 |
| 1188 /* Split with just enough output strings available (5) */ |
| 1189 status = U_ZERO_ERROR; |
| 1190 memset(fields, -1, sizeof(fields)); |
| 1191 numFields = |
| 1192 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &
status); |
| 1193 TEST_ASSERT_SUCCESS(status); |
| 1194 |
| 1195 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1196 if(U_SUCCESS(status)) { |
| 1197 TEST_ASSERT(numFields == 5); |
| 1198 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1199 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); |
| 1200 TEST_ASSERT_STRING(" second", fields[2], TRUE); |
| 1201 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); |
| 1202 TEST_ASSERT_STRING(" third", fields[4], TRUE); |
| 1203 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); |
| 1204 |
| 1205 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "."
at NUL positions */ |
| 1206 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1207 } |
| 1208 |
| 1209 /* Split, end of text is a field delimiter. */ |
| 1210 status = U_ZERO_ERROR; |
| 1211 sz = strlen("first <tag-a> second<tag-b>"); |
| 1212 uregex_setText(re, textToSplit, sz, &status); |
| 1213 TEST_ASSERT_SUCCESS(status); |
| 1214 |
| 1215 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1216 if(U_SUCCESS(status)) { |
| 1217 memset(fields, -1, sizeof(fields)); |
| 1218 numFields = |
| 1219 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields,
9, &status); |
| 1220 TEST_ASSERT_SUCCESS(status); |
| 1221 |
| 1222 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1223 if(U_SUCCESS(status)) { |
| 1224 TEST_ASSERT(numFields == 4); |
| 1225 TEST_ASSERT_STRING("first ", fields[0], TRUE); |
| 1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); |
| 1227 TEST_ASSERT_STRING(" second", fields[2], TRUE); |
| 1228 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); |
| 1229 TEST_ASSERT(fields[4] == NULL); |
| 1230 TEST_ASSERT(fields[8] == NULL); |
| 1231 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); |
| 1232 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at
NUL positions */ |
| 1233 TEST_ASSERT(spaceNeeded == requiredCapacity); |
| 1234 } |
| 1235 } |
| 1236 |
| 1237 uregex_close(re); |
| 1238 } |
| 1239 |
| 1240 /* |
| 1241 * set/getTimeLimit |
| 1242 */ |
| 1243 TEST_SETUP("abc$", "abcdef", 0); |
| 1244 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); |
| 1245 uregex_setTimeLimit(re, 1000, &status); |
| 1246 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); |
| 1247 TEST_ASSERT_SUCCESS(status); |
| 1248 uregex_setTimeLimit(re, -1, &status); |
| 1249 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
| 1250 status = U_ZERO_ERROR; |
| 1251 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); |
| 1252 TEST_TEARDOWN; |
| 1253 |
| 1254 /* |
| 1255 * set/get Stack Limit |
| 1256 */ |
| 1257 TEST_SETUP("abc$", "abcdef", 0); |
| 1258 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); |
| 1259 uregex_setStackLimit(re, 40000, &status); |
| 1260 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); |
| 1261 TEST_ASSERT_SUCCESS(status); |
| 1262 uregex_setStackLimit(re, -1, &status); |
| 1263 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); |
| 1264 status = U_ZERO_ERROR; |
| 1265 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); |
| 1266 TEST_TEARDOWN; |
| 1267 |
| 1268 |
| 1269 /* |
| 1270 * Get/Set callback functions |
| 1271 * This test is copied from intltest regex/Callbacks |
| 1272 * The pattern and test data will run long enough to cause the callback |
| 1273 * to be invoked. The nested '+' operators give exponential time |
| 1274 * behavior with increasing string length. |
| 1275 */ |
| 1276 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) |
| 1277 callBackContext cbInfo = {4, 0, 0}; |
| 1278 const void *pContext = &cbInfo; |
| 1279 URegexMatchCallback *returnedFn = &TestCallbackFn; |
| 1280 |
| 1281 /* Getting the callback fn when it hasn't been set must return NULL */ |
| 1282 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); |
| 1283 TEST_ASSERT_SUCCESS(status); |
| 1284 TEST_ASSERT(returnedFn == NULL); |
| 1285 TEST_ASSERT(pContext == NULL); |
| 1286 |
| 1287 /* Set thecallback and do a match. */ |
| 1288 /* The callback function should record that it has been called. */ |
| 1289 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); |
| 1290 TEST_ASSERT_SUCCESS(status); |
| 1291 TEST_ASSERT(cbInfo.numCalls == 0); |
| 1292 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); |
| 1293 TEST_ASSERT_SUCCESS(status); |
| 1294 TEST_ASSERT(cbInfo.numCalls > 0); |
| 1295 |
| 1296 /* Getting the callback should return the values that were set above. */ |
| 1297 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); |
| 1298 TEST_ASSERT(returnedFn == &TestCallbackFn); |
| 1299 TEST_ASSERT(pContext == &cbInfo); |
| 1300 |
| 1301 TEST_TEARDOWN; |
| 1302 } |
| 1303 |
| 1304 |
| 1305 |
| 1306 static void TestBug4315(void) { |
| 1307 UErrorCode theICUError = U_ZERO_ERROR; |
| 1308 URegularExpression *theRegEx; |
| 1309 UChar *textBuff; |
| 1310 const char *thePattern; |
| 1311 UChar theString[100]; |
| 1312 UChar *destFields[24]; |
| 1313 int32_t neededLength1; |
| 1314 int32_t neededLength2; |
| 1315 |
| 1316 int32_t wordCount = 0; |
| 1317 int32_t destFieldsSize = 24; |
| 1318 |
| 1319 thePattern = "ck "; |
| 1320 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle
."); |
| 1321 |
| 1322 /* open a regex */ |
| 1323 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); |
| 1324 TEST_ASSERT_SUCCESS(theICUError); |
| 1325 |
| 1326 /* set the input string */ |
| 1327 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); |
| 1328 TEST_ASSERT_SUCCESS(theICUError); |
| 1329 |
| 1330 /* split */ |
| 1331 /*explicitly pass NULL and 0 to force the overflow error -> this is where th
e |
| 1332 * error occurs! */ |
| 1333 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, |
| 1334 destFieldsSize, &theICUError); |
| 1335 |
| 1336 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); |
| 1337 TEST_ASSERT(wordCount==3); |
| 1338 |
| 1339 if(theICUError == U_BUFFER_OVERFLOW_ERROR) |
| 1340 { |
| 1341 theICUError = U_ZERO_ERROR; |
| 1342 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); |
| 1343 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLen
gth2, |
| 1344 destFields, destFieldsSize, &theICUError); |
| 1345 TEST_ASSERT(wordCount==3); |
| 1346 TEST_ASSERT_SUCCESS(theICUError); |
| 1347 TEST_ASSERT(neededLength1 == neededLength2); |
| 1348 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); |
| 1349 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1],
TRUE); |
| 1350 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); |
| 1351 TEST_ASSERT(destFields[3] == NULL); |
| 1352 free(textBuff); |
| 1353 } |
| 1354 uregex_close(theRegEx); |
| 1355 } |
| 1356 |
| 1357 /* Based on TestRegexCAPI() */ |
| 1358 static void TestUTextAPI(void) { |
| 1359 UErrorCode status = U_ZERO_ERROR; |
| 1360 URegularExpression *re; |
| 1361 UText patternText = UTEXT_INITIALIZER; |
| 1362 UChar pat[200]; |
| 1363 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; |
| 1364 |
| 1365 /* Mimimalist open/close */ |
| 1366 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); |
| 1367 re = uregex_openUText(&patternText, 0, 0, &status); |
| 1368 if (U_FAILURE(status)) { |
| 1369 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\"
(Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); |
| 1370 utext_close(&patternText); |
| 1371 return; |
| 1372 } |
| 1373 uregex_close(re); |
| 1374 |
| 1375 /* Open with all flag values set */ |
| 1376 status = U_ZERO_ERROR; |
| 1377 re = uregex_openUText(&patternText, |
| 1378 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, |
| 1379 0, &status); |
| 1380 TEST_ASSERT_SUCCESS(status); |
| 1381 uregex_close(re); |
| 1382 |
| 1383 /* Open with an invalid flag */ |
| 1384 status = U_ZERO_ERROR; |
| 1385 re = uregex_openUText(&patternText, 0x40000000, 0, &status); |
| 1386 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); |
| 1387 uregex_close(re); |
| 1388 |
| 1389 /* open with an invalid parameter */ |
| 1390 status = U_ZERO_ERROR; |
| 1391 re = uregex_openUText(NULL, |
| 1392 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); |
| 1393 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); |
| 1394 |
| 1395 /* |
| 1396 * clone |
| 1397 */ |
| 1398 { |
| 1399 URegularExpression *clone1; |
| 1400 URegularExpression *clone2; |
| 1401 URegularExpression *clone3; |
| 1402 UChar testString1[30]; |
| 1403 UChar testString2[30]; |
| 1404 UBool result; |
| 1405 |
| 1406 |
| 1407 status = U_ZERO_ERROR; |
| 1408 re = uregex_openUText(&patternText, 0, 0, &status); |
| 1409 TEST_ASSERT_SUCCESS(status); |
| 1410 clone1 = uregex_clone(re, &status); |
| 1411 TEST_ASSERT_SUCCESS(status); |
| 1412 TEST_ASSERT(clone1 != NULL); |
| 1413 |
| 1414 status = U_ZERO_ERROR; |
| 1415 clone2 = uregex_clone(re, &status); |
| 1416 TEST_ASSERT_SUCCESS(status); |
| 1417 TEST_ASSERT(clone2 != NULL); |
| 1418 uregex_close(re); |
| 1419 |
| 1420 status = U_ZERO_ERROR; |
| 1421 clone3 = uregex_clone(clone2, &status); |
| 1422 TEST_ASSERT_SUCCESS(status); |
| 1423 TEST_ASSERT(clone3 != NULL); |
| 1424 |
| 1425 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); |
| 1426 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); |
| 1427 |
| 1428 status = U_ZERO_ERROR; |
| 1429 uregex_setText(clone1, testString1, -1, &status); |
| 1430 TEST_ASSERT_SUCCESS(status); |
| 1431 result = uregex_lookingAt(clone1, 0, &status); |
| 1432 TEST_ASSERT_SUCCESS(status); |
| 1433 TEST_ASSERT(result==TRUE); |
| 1434 |
| 1435 status = U_ZERO_ERROR; |
| 1436 uregex_setText(clone2, testString2, -1, &status); |
| 1437 TEST_ASSERT_SUCCESS(status); |
| 1438 result = uregex_lookingAt(clone2, 0, &status); |
| 1439 TEST_ASSERT_SUCCESS(status); |
| 1440 TEST_ASSERT(result==FALSE); |
| 1441 result = uregex_find(clone2, 0, &status); |
| 1442 TEST_ASSERT_SUCCESS(status); |
| 1443 TEST_ASSERT(result==TRUE); |
| 1444 |
| 1445 uregex_close(clone1); |
| 1446 uregex_close(clone2); |
| 1447 uregex_close(clone3); |
| 1448 |
| 1449 } |
| 1450 |
| 1451 /* |
| 1452 * pattern() and patternText() |
| 1453 */ |
| 1454 { |
| 1455 const UChar *resultPat; |
| 1456 int32_t resultLen; |
| 1457 UText *resultText; |
| 1458 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hell
o */ |
| 1459 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ |
| 1460 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ |
| 1461 status = U_ZERO_ERROR; |
| 1462 |
| 1463 utext_openUTF8(&patternText, str_hello, -1, &status); |
| 1464 re = uregex_open(pat, -1, 0, NULL, &status); |
| 1465 resultPat = uregex_pattern(re, &resultLen, &status); |
| 1466 TEST_ASSERT_SUCCESS(status); |
| 1467 |
| 1468 /* The TEST_ASSERT_SUCCESS above should change too... */ |
| 1469 if (U_SUCCESS(status)) { |
| 1470 TEST_ASSERT(resultLen == -1); |
| 1471 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); |
| 1472 } |
| 1473 |
| 1474 resultText = uregex_patternUText(re, &status); |
| 1475 TEST_ASSERT_SUCCESS(status); |
| 1476 TEST_ASSERT_UTEXT(str_hello, resultText); |
| 1477 |
| 1478 uregex_close(re); |
| 1479 |
| 1480 status = U_ZERO_ERROR; |
| 1481 re = uregex_open(pat, 3, 0, NULL, &status); |
| 1482 resultPat = uregex_pattern(re, &resultLen, &status); |
| 1483 TEST_ASSERT_SUCCESS(status); |
| 1484 |
| 1485 /* The TEST_ASSERT_SUCCESS above should change too... */ |
| 1486 if (U_SUCCESS(status)) { |
| 1487 TEST_ASSERT(resultLen == 3); |
| 1488 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); |
| 1489 TEST_ASSERT(u_strlen(resultPat) == 3); |
| 1490 } |
| 1491 |
| 1492 resultText = uregex_patternUText(re, &status); |
| 1493 TEST_ASSERT_SUCCESS(status); |
| 1494 TEST_ASSERT_UTEXT(str_hel, resultText); |
| 1495 |
| 1496 uregex_close(re); |
| 1497 } |
| 1498 |
| 1499 /* |
| 1500 * setUText() and lookingAt() |
| 1501 */ |
| 1502 { |
| 1503 UText text1 = UTEXT_INITIALIZER; |
| 1504 UText text2 = UTEXT_INITIALIZER; |
| 1505 UBool result; |
| 1506 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 };
/* abcccd */ |
| 1507 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0
x00 }; /* abcccxd */ |
| 1508 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d
*/ |
| 1509 status = U_ZERO_ERROR; |
| 1510 utext_openUTF8(&text1, str_abcccd, -1, &status); |
| 1511 utext_openUTF8(&text2, str_abcccxd, -1, &status); |
| 1512 |
| 1513 utext_openUTF8(&patternText, str_abcd, -1, &status); |
| 1514 re = uregex_openUText(&patternText, 0, NULL, &status); |
| 1515 TEST_ASSERT_SUCCESS(status); |
| 1516 |
| 1517 /* Operation before doing a setText should fail... */ |
| 1518 status = U_ZERO_ERROR; |
| 1519 uregex_lookingAt(re, 0, &status); |
| 1520 TEST_ASSERT( status== U_REGEX_INVALID_STATE); |
| 1521 |
| 1522 status = U_ZERO_ERROR; |
| 1523 uregex_setUText(re, &text1, &status); |
| 1524 result = uregex_lookingAt(re, 0, &status); |
| 1525 TEST_ASSERT(result == TRUE); |
| 1526 TEST_ASSERT_SUCCESS(status); |
| 1527 |
| 1528 status = U_ZERO_ERROR; |
| 1529 uregex_setUText(re, &text2, &status); |
| 1530 result = uregex_lookingAt(re, 0, &status); |
| 1531 TEST_ASSERT(result == FALSE); |
| 1532 TEST_ASSERT_SUCCESS(status); |
| 1533 |
| 1534 status = U_ZERO_ERROR; |
| 1535 uregex_setUText(re, &text1, &status); |
| 1536 result = uregex_lookingAt(re, 0, &status); |
| 1537 TEST_ASSERT(result == TRUE); |
| 1538 TEST_ASSERT_SUCCESS(status); |
| 1539 |
| 1540 uregex_close(re); |
| 1541 utext_close(&text1); |
| 1542 utext_close(&text2); |
| 1543 } |
| 1544 |
| 1545 |
| 1546 /* |
| 1547 * getText() and getUText() |
| 1548 */ |
| 1549 { |
| 1550 UText text1 = UTEXT_INITIALIZER; |
| 1551 UText text2 = UTEXT_INITIALIZER; |
| 1552 UChar text2Chars[20]; |
| 1553 UText *resultText; |
| 1554 const UChar *result; |
| 1555 int32_t textLength; |
| 1556 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 };
/* abcccd */ |
| 1557 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0
x00 }; /* abcccxd */ |
| 1558 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d
*/ |
| 1559 |
| 1560 |
| 1561 status = U_ZERO_ERROR; |
| 1562 utext_openUTF8(&text1, str_abcccd, -1, &status); |
| 1563 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); |
| 1564 utext_openUChars(&text2, text2Chars, -1, &status); |
| 1565 |
| 1566 utext_openUTF8(&patternText, str_abcd, -1, &status); |
| 1567 re = uregex_openUText(&patternText, 0, NULL, &status); |
| 1568 |
| 1569 /* First set a UText */ |
| 1570 uregex_setUText(re, &text1, &status); |
| 1571 resultText = uregex_getUText(re, NULL, &status); |
| 1572 TEST_ASSERT_SUCCESS(status); |
| 1573 TEST_ASSERT(resultText != &text1); |
| 1574 utext_setNativeIndex(resultText, 0); |
| 1575 utext_setNativeIndex(&text1, 0); |
| 1576 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); |
| 1577 utext_close(resultText); |
| 1578 |
| 1579 result = uregex_getText(re, &textLength, &status); /* flattens UText int
o buffer */ |
| 1580 TEST_ASSERT(textLength == -1 || textLength == 6); |
| 1581 resultText = uregex_getUText(re, NULL, &status); |
| 1582 TEST_ASSERT_SUCCESS(status); |
| 1583 TEST_ASSERT(resultText != &text1); |
| 1584 utext_setNativeIndex(resultText, 0); |
| 1585 utext_setNativeIndex(&text1, 0); |
| 1586 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); |
| 1587 utext_close(resultText); |
| 1588 |
| 1589 /* Then set a UChar * */ |
| 1590 uregex_setText(re, text2Chars, 7, &status); |
| 1591 resultText = uregex_getUText(re, NULL, &status); |
| 1592 TEST_ASSERT_SUCCESS(status); |
| 1593 utext_setNativeIndex(resultText, 0); |
| 1594 utext_setNativeIndex(&text2, 0); |
| 1595 TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); |
| 1596 utext_close(resultText); |
| 1597 result = uregex_getText(re, &textLength, &status); |
| 1598 TEST_ASSERT(textLength == 7); |
| 1599 |
| 1600 uregex_close(re); |
| 1601 utext_close(&text1); |
| 1602 utext_close(&text2); |
| 1603 } |
| 1604 |
| 1605 /* |
| 1606 * matches() |
| 1607 */ |
| 1608 { |
| 1609 UText text1 = UTEXT_INITIALIZER; |
| 1610 UBool result; |
| 1611 UText nullText = UTEXT_INITIALIZER; |
| 1612 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0
x00 }; /* abcccde */ |
| 1613 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d
*/ |
| 1614 |
| 1615 status = U_ZERO_ERROR; |
| 1616 utext_openUTF8(&text1, str_abcccde, -1, &status); |
| 1617 utext_openUTF8(&patternText, str_abcd, -1, &status); |
| 1618 re = uregex_openUText(&patternText, 0, NULL, &status); |
| 1619 |
| 1620 uregex_setUText(re, &text1, &status); |
| 1621 result = uregex_matches(re, 0, &status); |
| 1622 TEST_ASSERT(result == FALSE); |
| 1623 TEST_ASSERT_SUCCESS(status); |
| 1624 uregex_close(re); |
| 1625 |
| 1626 status = U_ZERO_ERROR; |
| 1627 re = uregex_openC(".?", 0, NULL, &status); |
| 1628 uregex_setUText(re, &text1, &status); |
| 1629 result = uregex_matches(re, 7, &status); |
| 1630 TEST_ASSERT(result == TRUE); |
| 1631 TEST_ASSERT_SUCCESS(status); |
| 1632 |
| 1633 status = U_ZERO_ERROR; |
| 1634 utext_openUTF8(&nullText, "", -1, &status); |
| 1635 uregex_setUText(re, &nullText, &status); |
| 1636 TEST_ASSERT_SUCCESS(status); |
| 1637 result = uregex_matches(re, 0, &status); |
| 1638 TEST_ASSERT(result == TRUE); |
| 1639 TEST_ASSERT_SUCCESS(status); |
| 1640 |
| 1641 uregex_close(re); |
| 1642 utext_close(&text1); |
| 1643 utext_close(&nullText); |
| 1644 } |
| 1645 |
| 1646 |
| 1647 /* |
| 1648 * lookingAt() Used in setText test. |
| 1649 */ |
| 1650 |
| 1651 |
| 1652 /* |
| 1653 * find(), findNext, start, end, reset |
| 1654 */ |
| 1655 { |
| 1656 UChar text1[50]; |
| 1657 UBool result; |
| 1658 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); |
| 1659 status = U_ZERO_ERROR; |
| 1660 re = uregex_openC("rx", 0, NULL, &status); |
| 1661 |
| 1662 uregex_setText(re, text1, -1, &status); |
| 1663 result = uregex_find(re, 0, &status); |
| 1664 TEST_ASSERT(result == TRUE); |
| 1665 TEST_ASSERT(uregex_start(re, 0, &status) == 3); |
| 1666 TEST_ASSERT(uregex_end(re, 0, &status) == 5); |
| 1667 TEST_ASSERT_SUCCESS(status); |
| 1668 |
| 1669 result = uregex_find(re, 9, &status); |
| 1670 TEST_ASSERT(result == TRUE); |
| 1671 TEST_ASSERT(uregex_start(re, 0, &status) == 11); |
| 1672 TEST_ASSERT(uregex_end(re, 0, &status) == 13); |
| 1673 TEST_ASSERT_SUCCESS(status); |
| 1674 |
| 1675 result = uregex_find(re, 14, &status); |
| 1676 TEST_ASSERT(result == FALSE); |
| 1677 TEST_ASSERT_SUCCESS(status); |
| 1678 |
| 1679 status = U_ZERO_ERROR; |
| 1680 uregex_reset(re, 0, &status); |
| 1681 |
| 1682 result = uregex_findNext(re, &status); |
| 1683 TEST_ASSERT(result == TRUE); |
| 1684 TEST_ASSERT(uregex_start(re, 0, &status) == 3); |
| 1685 TEST_ASSERT(uregex_end(re, 0, &status) == 5); |
| 1686 TEST_ASSERT_SUCCESS(status); |
| 1687 |
| 1688 result = uregex_findNext(re, &status); |
| 1689 TEST_ASSERT(result == TRUE); |
| 1690 TEST_ASSERT(uregex_start(re, 0, &status) == 6); |
| 1691 TEST_ASSERT(uregex_end(re, 0, &status) == 8); |
| 1692 TEST_ASSERT_SUCCESS(status); |
| 1693 |
| 1694 status = U_ZERO_ERROR; |
| 1695 uregex_reset(re, 12, &status); |
| 1696 |
| 1697 result = uregex_findNext(re, &status); |
| 1698 TEST_ASSERT(result == TRUE); |
| 1699 TEST_ASSERT(uregex_start(re, 0, &status) == 13); |
| 1700 TEST_ASSERT(uregex_end(re, 0, &status) == 15); |
| 1701 TEST_ASSERT_SUCCESS(status); |
| 1702 |
| 1703 result = uregex_findNext(re, &status); |
| 1704 TEST_ASSERT(result == FALSE); |
| 1705 TEST_ASSERT_SUCCESS(status); |
| 1706 |
| 1707 uregex_close(re); |
| 1708 } |
| 1709 |
| 1710 /* |
| 1711 * group() |
| 1712 */ |
| 1713 { |
| 1714 UChar text1[80]; |
| 1715 UText *actual; |
| 1716 UBool result; |
| 1717 |
| 1718 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e,
0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc inter
ior def */ |
| 1719 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69,
0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ |
| 1720 |
| 1721 |
| 1722 u_uastrncpy(text1, "noise abc interior def, and this is off the end", s
izeof(text1)/2); |
| 1723 |
| 1724 status = U_ZERO_ERROR; |
| 1725 re = uregex_openC("abc(.*?)def", 0, NULL, &status); |
| 1726 TEST_ASSERT_SUCCESS(status); |
| 1727 |
| 1728 uregex_setText(re, text1, -1, &status); |
| 1729 result = uregex_find(re, 0, &status); |
| 1730 TEST_ASSERT(result==TRUE); |
| 1731 |
| 1732 /* Capture Group 0, the full match. Should succeed. */ |
| 1733 status = U_ZERO_ERROR; |
| 1734 actual = uregex_groupUTextDeep(re, 0, NULL, &status); |
| 1735 TEST_ASSERT_SUCCESS(status); |
| 1736 TEST_ASSERT_UTEXT(str_abcinteriordef, actual); |
| 1737 utext_close(actual); |
| 1738 |
| 1739 /* Capture Group 0 with shallow clone API. Should succeed. */ |
| 1740 status = U_ZERO_ERROR; |
| 1741 { |
| 1742 int64_t group_len; |
| 1743 int32_t len16; |
| 1744 UErrorCode shallowStatus = U_ZERO_ERROR; |
| 1745 int64_t nativeIndex; |
| 1746 UChar *groupChars; |
| 1747 UText groupText = UTEXT_INITIALIZER; |
| 1748 |
| 1749 actual = uregex_groupUText(re, 0, NULL, &group_len, &status); |
| 1750 TEST_ASSERT_SUCCESS(status); |
| 1751 |
| 1752 nativeIndex = utext_getNativeIndex(actual); |
| 1753 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug
in ucstrFuncs UTextFuncs [utext.cpp] */ |
| 1754 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_l
en, NULL, 0, &shallowStatus); */ |
| 1755 len16 = group_len; |
| 1756 |
| 1757 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1)); |
| 1758 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupCha
rs, len16+1, &shallowStatus); |
| 1759 |
| 1760 utext_openUChars(&groupText, groupChars, len16, &shallowStatus); |
| 1761 |
| 1762 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText); |
| 1763 utext_close(&groupText); |
| 1764 free(groupChars); |
| 1765 } |
| 1766 utext_close(actual); |
| 1767 |
| 1768 /* Capture group #1. Should succeed. */ |
| 1769 status = U_ZERO_ERROR; |
| 1770 actual = uregex_groupUTextDeep(re, 1, NULL, &status); |
| 1771 TEST_ASSERT_SUCCESS(status); |
| 1772 TEST_ASSERT_UTEXT(str_interior, actual); |
| 1773 utext_close(actual); |
| 1774 |
| 1775 /* Capture group out of range. Error. */ |
| 1776 status = U_ZERO_ERROR; |
| 1777 actual = uregex_groupUTextDeep(re, 2, NULL, &status); |
| 1778 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); |
| 1779 TEST_ASSERT(utext_nativeLength(actual) == 0); |
| 1780 utext_close(actual); |
| 1781 |
| 1782 uregex_close(re); |
| 1783 |
| 1784 } |
| 1785 |
| 1786 /* |
| 1787 * replaceFirst() |
| 1788 */ |
| 1789 { |
| 1790 UChar text1[80]; |
| 1791 UChar text2[80]; |
| 1792 UText replText = UTEXT_INITIALIZER; |
| 1793 UText *result; |
| 1794 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0
x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2
e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ |
| 1795 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ |
| 1796 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x3
0, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34,
0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ |
| 1797 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ |
| 1798 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x6
3, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78,
0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x.
*/ |
| 1799 status = U_ZERO_ERROR; |
| 1800 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
| 1801 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 1802 utext_openUTF8(&replText, str_1x, -1, &status); |
| 1803 |
| 1804 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
| 1805 TEST_ASSERT_SUCCESS(status); |
| 1806 |
| 1807 /* Normal case, with match */ |
| 1808 uregex_setText(re, text1, -1, &status); |
| 1809 result = uregex_replaceFirstUText(re, &replText, NULL, &status); |
| 1810 TEST_ASSERT_SUCCESS(status); |
| 1811 TEST_ASSERT_UTEXT(str_Replxxx, result); |
| 1812 utext_close(result); |
| 1813 |
| 1814 /* No match. Text should copy to output with no changes. */ |
| 1815 uregex_setText(re, text2, -1, &status); |
| 1816 result = uregex_replaceFirstUText(re, &replText, NULL, &status); |
| 1817 TEST_ASSERT_SUCCESS(status); |
| 1818 TEST_ASSERT_UTEXT(str_Nomatchhere, result); |
| 1819 utext_close(result); |
| 1820 |
| 1821 /* Unicode escapes */ |
| 1822 uregex_setText(re, text1, -1, &status); |
| 1823 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); |
| 1824 result = uregex_replaceFirstUText(re, &replText, NULL, &status); |
| 1825 TEST_ASSERT_SUCCESS(status); |
| 1826 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); |
| 1827 utext_close(result); |
| 1828 |
| 1829 uregex_close(re); |
| 1830 utext_close(&replText); |
| 1831 } |
| 1832 |
| 1833 |
| 1834 /* |
| 1835 * replaceAll() |
| 1836 */ |
| 1837 { |
| 1838 UChar text1[80]; |
| 1839 UChar text2[80]; |
| 1840 UText replText = UTEXT_INITIALIZER; |
| 1841 UText *result; |
| 1842 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ |
| 1843 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65
, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e,
0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ |
| 1844 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ |
| 1845 status = U_ZERO_ERROR; |
| 1846 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); |
| 1847 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 1848 utext_openUTF8(&replText, str_1, -1, &status); |
| 1849 |
| 1850 re = uregex_openC("x(.*?)x", 0, NULL, &status); |
| 1851 TEST_ASSERT_SUCCESS(status); |
| 1852 |
| 1853 /* Normal case, with match */ |
| 1854 uregex_setText(re, text1, -1, &status); |
| 1855 result = uregex_replaceAllUText(re, &replText, NULL, &status); |
| 1856 TEST_ASSERT_SUCCESS(status); |
| 1857 TEST_ASSERT_UTEXT(str_Replaceaa1, result); |
| 1858 utext_close(result); |
| 1859 |
| 1860 /* No match. Text should copy to output with no changes. */ |
| 1861 uregex_setText(re, text2, -1, &status); |
| 1862 result = uregex_replaceAllUText(re, &replText, NULL, &status); |
| 1863 TEST_ASSERT_SUCCESS(status); |
| 1864 TEST_ASSERT_UTEXT(str_Nomatchhere, result); |
| 1865 utext_close(result); |
| 1866 |
| 1867 uregex_close(re); |
| 1868 utext_close(&replText); |
| 1869 } |
| 1870 |
| 1871 |
| 1872 /* |
| 1873 * appendReplacement() |
| 1874 */ |
| 1875 { |
| 1876 UChar text[100]; |
| 1877 UChar repl[100]; |
| 1878 UChar buf[100]; |
| 1879 UChar *bufPtr; |
| 1880 int32_t bufCap; |
| 1881 |
| 1882 status = U_ZERO_ERROR; |
| 1883 re = uregex_openC(".*", 0, 0, &status); |
| 1884 TEST_ASSERT_SUCCESS(status); |
| 1885 |
| 1886 u_uastrncpy(text, "whatever", sizeof(text)/2); |
| 1887 u_uastrncpy(repl, "some other", sizeof(repl)/2); |
| 1888 uregex_setText(re, text, -1, &status); |
| 1889 |
| 1890 /* match covers whole target string */ |
| 1891 uregex_find(re, 0, &status); |
| 1892 TEST_ASSERT_SUCCESS(status); |
| 1893 bufPtr = buf; |
| 1894 bufCap = sizeof(buf) / 2; |
| 1895 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); |
| 1896 TEST_ASSERT_SUCCESS(status); |
| 1897 TEST_ASSERT_STRING("some other", buf, TRUE); |
| 1898 |
| 1899 /* Match has \u \U escapes */ |
| 1900 uregex_find(re, 0, &status); |
| 1901 TEST_ASSERT_SUCCESS(status); |
| 1902 bufPtr = buf; |
| 1903 bufCap = sizeof(buf) / 2; |
| 1904 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); |
| 1905 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); |
| 1906 TEST_ASSERT_SUCCESS(status); |
| 1907 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); |
| 1908 |
| 1909 uregex_close(re); |
| 1910 } |
| 1911 |
| 1912 |
| 1913 /* |
| 1914 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(
). |
| 1915 */ |
| 1916 |
| 1917 /* |
| 1918 * splitUText() |
| 1919 */ |
| 1920 { |
| 1921 UChar textToSplit[80]; |
| 1922 UChar text2[80]; |
| 1923 UText *fields[10]; |
| 1924 int32_t numFields; |
| 1925 int32_t i; |
| 1926 |
| 1927 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/
2); |
| 1928 u_uastrncpy(text2, "No match here.", sizeof(text2)/2); |
| 1929 |
| 1930 status = U_ZERO_ERROR; |
| 1931 re = uregex_openC(":", 0, NULL, &status); |
| 1932 |
| 1933 |
| 1934 /* Simple split */ |
| 1935 |
| 1936 uregex_setText(re, textToSplit, -1, &status); |
| 1937 TEST_ASSERT_SUCCESS(status); |
| 1938 |
| 1939 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1940 if (U_SUCCESS(status)) { |
| 1941 memset(fields, 0, sizeof(fields)); |
| 1942 numFields = uregex_splitUText(re, fields, 10, &status); |
| 1943 TEST_ASSERT_SUCCESS(status); |
| 1944 |
| 1945 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1946 if(U_SUCCESS(status)) { |
| 1947 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x0
0 }; /* 'first ' */ |
| 1948 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x
64, 0x00 }; /* ' second' */ |
| 1949 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x6
4, 0x00 }; /* ' third' */ |
| 1950 TEST_ASSERT(numFields == 3); |
| 1951 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 1952 TEST_ASSERT_UTEXT(str_second, fields[1]); |
| 1953 TEST_ASSERT_UTEXT(str_third, fields[2]); |
| 1954 TEST_ASSERT(fields[3] == NULL); |
| 1955 } |
| 1956 for(i = 0; i < numFields; i++) { |
| 1957 utext_close(fields[i]); |
| 1958 } |
| 1959 } |
| 1960 |
| 1961 uregex_close(re); |
| 1962 |
| 1963 |
| 1964 /* Split with too few output strings available */ |
| 1965 status = U_ZERO_ERROR; |
| 1966 re = uregex_openC(":", 0, NULL, &status); |
| 1967 uregex_setText(re, textToSplit, -1, &status); |
| 1968 TEST_ASSERT_SUCCESS(status); |
| 1969 |
| 1970 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1971 if(U_SUCCESS(status)) { |
| 1972 fields[0] = NULL; |
| 1973 fields[1] = NULL; |
| 1974 fields[2] = &patternText; |
| 1975 numFields = uregex_splitUText(re, fields, 2, &status); |
| 1976 TEST_ASSERT_SUCCESS(status); |
| 1977 |
| 1978 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 1979 if(U_SUCCESS(status)) { |
| 1980 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ |
| 1981 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second:
third */ |
| 1982 TEST_ASSERT(numFields == 2); |
| 1983 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 1984 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); |
| 1985 TEST_ASSERT(fields[2] == &patternText); |
| 1986 } |
| 1987 for(i = 0; i < numFields; i++) { |
| 1988 utext_close(fields[i]); |
| 1989 } |
| 1990 } |
| 1991 |
| 1992 uregex_close(re); |
| 1993 } |
| 1994 |
| 1995 /* splitUText(), part 2. Patterns with capture groups. The capture group t
ext |
| 1996 * comes out as additional fields. */ |
| 1997 { |
| 1998 UChar textToSplit[80]; |
| 1999 UText *fields[10]; |
| 2000 int32_t numFields; |
| 2001 int32_t i; |
| 2002 |
| 2003 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(t
extToSplit)/2); |
| 2004 |
| 2005 status = U_ZERO_ERROR; |
| 2006 re = uregex_openC("<(.*?)>", 0, NULL, &status); |
| 2007 |
| 2008 uregex_setText(re, textToSplit, -1, &status); |
| 2009 TEST_ASSERT_SUCCESS(status); |
| 2010 |
| 2011 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2012 if(U_SUCCESS(status)) { |
| 2013 memset(fields, 0, sizeof(fields)); |
| 2014 numFields = uregex_splitUText(re, fields, 10, &status); |
| 2015 TEST_ASSERT_SUCCESS(status); |
| 2016 |
| 2017 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2018 if(U_SUCCESS(status)) { |
| 2019 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ |
| 2020 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 };
/* tag-a */ |
| 2021 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
0x64, 0x00 }; /* second */ |
| 2022 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 };
/* tag-b */ |
| 2023 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0
x64, 0x00 }; /* third */ |
| 2024 |
| 2025 TEST_ASSERT(numFields == 5); |
| 2026 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 2027 TEST_ASSERT_UTEXT(str_taga, fields[1]); |
| 2028 TEST_ASSERT_UTEXT(str_second, fields[2]); |
| 2029 TEST_ASSERT_UTEXT(str_tagb, fields[3]); |
| 2030 TEST_ASSERT_UTEXT(str_third, fields[4]); |
| 2031 TEST_ASSERT(fields[5] == NULL); |
| 2032 } |
| 2033 for(i = 0; i < numFields; i++) { |
| 2034 utext_close(fields[i]); |
| 2035 } |
| 2036 } |
| 2037 |
| 2038 /* Split with too few output strings available (2) */ |
| 2039 status = U_ZERO_ERROR; |
| 2040 fields[0] = NULL; |
| 2041 fields[1] = NULL; |
| 2042 fields[2] = &patternText; |
| 2043 numFields = uregex_splitUText(re, fields, 2, &status); |
| 2044 TEST_ASSERT_SUCCESS(status); |
| 2045 |
| 2046 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2047 if(U_SUCCESS(status)) { |
| 2048 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ |
| 2049 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6
9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ |
| 2050 TEST_ASSERT(numFields == 2); |
| 2051 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 2052 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); |
| 2053 TEST_ASSERT(fields[2] == &patternText); |
| 2054 } |
| 2055 for(i = 0; i < numFields; i++) { |
| 2056 utext_close(fields[i]); |
| 2057 } |
| 2058 |
| 2059 |
| 2060 /* Split with too few output strings available (3) */ |
| 2061 status = U_ZERO_ERROR; |
| 2062 fields[0] = NULL; |
| 2063 fields[1] = NULL; |
| 2064 fields[2] = NULL; |
| 2065 fields[3] = &patternText; |
| 2066 numFields = uregex_splitUText(re, fields, 3, &status); |
| 2067 TEST_ASSERT_SUCCESS(status); |
| 2068 |
| 2069 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2070 if(U_SUCCESS(status)) { |
| 2071 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ |
| 2072 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t
ag-a */ |
| 2073 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6
9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ |
| 2074 TEST_ASSERT(numFields == 3); |
| 2075 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 2076 TEST_ASSERT_UTEXT(str_taga, fields[1]); |
| 2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); |
| 2078 TEST_ASSERT(fields[3] == &patternText); |
| 2079 } |
| 2080 for(i = 0; i < numFields; i++) { |
| 2081 utext_close(fields[i]); |
| 2082 } |
| 2083 |
| 2084 /* Split with just enough output strings available (5) */ |
| 2085 status = U_ZERO_ERROR; |
| 2086 fields[0] = NULL; |
| 2087 fields[1] = NULL; |
| 2088 fields[2] = NULL; |
| 2089 fields[3] = NULL; |
| 2090 fields[4] = NULL; |
| 2091 fields[5] = &patternText; |
| 2092 numFields = uregex_splitUText(re, fields, 5, &status); |
| 2093 TEST_ASSERT_SUCCESS(status); |
| 2094 |
| 2095 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2096 if(U_SUCCESS(status)) { |
| 2097 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ |
| 2098 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t
ag-a */ |
| 2099 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64
, 0x00 }; /* second */ |
| 2100 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* t
ag-b */ |
| 2101 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
0x00 }; /* third */ |
| 2102 |
| 2103 TEST_ASSERT(numFields == 5); |
| 2104 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 2105 TEST_ASSERT_UTEXT(str_taga, fields[1]); |
| 2106 TEST_ASSERT_UTEXT(str_second, fields[2]); |
| 2107 TEST_ASSERT_UTEXT(str_tagb, fields[3]); |
| 2108 TEST_ASSERT_UTEXT(str_third, fields[4]); |
| 2109 TEST_ASSERT(fields[5] == &patternText); |
| 2110 } |
| 2111 for(i = 0; i < numFields; i++) { |
| 2112 utext_close(fields[i]); |
| 2113 } |
| 2114 |
| 2115 /* Split, end of text is a field delimiter. */ |
| 2116 status = U_ZERO_ERROR; |
| 2117 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &
status); |
| 2118 TEST_ASSERT_SUCCESS(status); |
| 2119 |
| 2120 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2121 if(U_SUCCESS(status)) { |
| 2122 memset(fields, 0, sizeof(fields)); |
| 2123 fields[9] = &patternText; |
| 2124 numFields = uregex_splitUText(re, fields, 9, &status); |
| 2125 TEST_ASSERT_SUCCESS(status); |
| 2126 |
| 2127 /* The TEST_ASSERT_SUCCESS call above should change too... */ |
| 2128 if(U_SUCCESS(status)) { |
| 2129 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ |
| 2130 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 };
/* tag-a */ |
| 2131 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
0x64, 0x00 }; /* second */ |
| 2132 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 };
/* tag-b */ |
| 2133 |
| 2134 TEST_ASSERT(numFields == 4); |
| 2135 TEST_ASSERT_UTEXT(str_first, fields[0]); |
| 2136 TEST_ASSERT_UTEXT(str_taga, fields[1]); |
| 2137 TEST_ASSERT_UTEXT(str_second, fields[2]); |
| 2138 TEST_ASSERT_UTEXT(str_tagb, fields[3]); |
| 2139 TEST_ASSERT(fields[4] == NULL); |
| 2140 TEST_ASSERT(fields[8] == NULL); |
| 2141 TEST_ASSERT(fields[9] == &patternText); |
| 2142 } |
| 2143 for(i = 0; i < numFields; i++) { |
| 2144 utext_close(fields[i]); |
| 2145 } |
| 2146 } |
| 2147 |
| 2148 uregex_close(re); |
| 2149 } |
| 2150 utext_close(&patternText); |
| 2151 } |
| 2152 |
| 2153 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
OLD | NEW |