source/test/cintltst/reapits.c - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/cintltst/reapits.c

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1 /********************************************************************

2 * COPYRIGHT:

3 * Copyright (c) 2004-2015, International Business Machines Corporation and

4 * others. All Rights Reserved.

5 ********************************************************************/

6 /******************************************************************************* *

7 *

8 * File reapits.c

9 *

10 ******************************************************************************** */

11 /C API TEST FOR Regular Expressions /

12 /**

13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't

14 * try to test the full functionality. It just calls each function and verifie s that it

15 * works on a basic level.

16 *

17 * More complete testing of regular expression functionality is done with the C ++ tests.

18 **/

19

20 #include "unicode/utypes.h"

21

22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS

23

24 #include <stdlib.h>

25 #include <string.h>

26 #include "unicode/uloc.h"

27 #include "unicode/uregex.h"

28 #include "unicode/ustring.h"

29 #include "unicode/utext.h"

30 #include "cintltst.h"

31 #include "cmemory.h"

32

33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \

34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __F ILE__, __LINE__, u_errorName(status));}}

35

36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \

37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}

38

39 /*

40 * TEST_SETUP and TEST_TEARDOWN

41 * macros to handle the boilerplate around setting up regex test cases.

42 * parameteres to setup:

43 * pattern: The regex pattern, a (char *) null terminated C str ing.

44 * testString: The string data, also a (char *) C string.

45 * flags: Regex flags to set when compiling the pattern

46 *

47 * Put arbitrary test code between SETUP and TEARDOWN.

48 * 're" is the compiled, ready-to-go regular expression.

49 */

50 #define TEST_SETUP(pattern, testString, flags) { \

51 UChar *srcString = NULL; \

52 status = U_ZERO_ERROR; \

53 re = uregex_openC(pattern, flags, NULL, &status); \

54 TEST_ASSERT_SUCCESS(status); \

55 srcString = (UChar )malloc((strlen(testString)+2)sizeof(UChar)); \

56 u_uastrncpy(srcString, testString, strlen(testString)+1); \

57 uregex_setText(re, srcString, -1, &status); \

58 TEST_ASSERT_SUCCESS(status); \

59 if (U_SUCCESS(status)) {

60

61 #define TEST_TEARDOWN \

62 } \

63 TEST_ASSERT_SUCCESS(status); \

64 uregex_close(re); \

65 free(srcString); \

66 }

67

68

69 /**

70 * @param expected utf-8 array of bytes to be expected

71 */

72 static void test_assert_string(const char expected, const UChar actual, UBool nulTerm, const char *file, int line) {

73 char buf_inside_macro[120];

74 int32_t len = (int32_t)strlen(expected);

75 UBool success;

76 if (nulTerm) {

77 u_austrncpy(buf_inside_macro, (actual), len+1);

78 buf_inside_macro[len+2] = 0;

79 success = (strcmp((expected), buf_inside_macro) == 0);

80 } else {

81 u_austrncpy(buf_inside_macro, (actual), len);

82 buf_inside_macro[len+1] = 0;

83 success = (strncmp((expected), buf_inside_macro, len) == 0);

84 }

85 if (success == FALSE) {

86 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",

87 file, line, (expected), buf_inside_macro);

88 }

89 }

90

91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expecte d, actual, nulTerm, __FILE__, __LINE__)

92

93

94 static UBool equals_utf8_utext(const char utf8, UText utext) {

95 int32_t u8i = 0;

96 UChar32 u8c = 0;

97 UChar32 utc = 0;

98 UBool stringsEqual = TRUE;

99 utext_setNativeIndex(utext, 0);

100 for (;;) {

101 U8_NEXT_UNSAFE(utf8, u8i, u8c);

102 utc = utext_next32(utext);

103 if (u8c == 0 && utc == U_SENTINEL) {

104 break;

105 }

106 if (u8c != utc \|\| u8c == 0) {

107 stringsEqual = FALSE;

108 break;

109 }

110 }

111 return stringsEqual;

112 }

113

114

115 static void test_assert_utext(const char expected, UText actual, const char *f ile, int line) {

116 utext_setNativeIndex(actual, 0);

117 if (!equals_utf8_utext(expected, actual)) {

118 UChar32 c;

119 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, li ne, expected);

120 c = utext_next32From(actual, 0);

121 while (c != U_SENTINEL) {

122 if (0x20<c && c <0x7e) {

123 log_err("%c", c);

124 } else {

125 log_err("%#x", c);

126 }

127 c = UTEXT_NEXT32(actual);

128 }

129 log_err("\"\n");

130 }

131 }

132

133 /*

134 * TEST_ASSERT_UTEXT(const char expected, const UText actual)

135 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.

136 */

137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)

138

139 static UBool testUTextEqual(UText uta, UText utb) {

140 UChar32 ca = 0;

141 UChar32 cb = 0;

142 utext_setNativeIndex(uta, 0);

143 utext_setNativeIndex(utb, 0);

144 do {

145 ca = utext_next32(uta);

146 cb = utext_next32(utb);

147 if (ca != cb) {

148 break;

149 }

150 } while (ca != U_SENTINEL);

151 return ca == cb;

152 }

153

154

155

156

157 static void TestRegexCAPI(void);

158 static void TestBug4315(void);

159 static void TestUTextAPI(void);

160 static void TestRefreshInput(void);

161 static void TestBug8421(void);

162 static void TestBug10815(void);

163

164 void addURegexTest(TestNode** root);

165

166 void addURegexTest(TestNode** root)

167 {

168 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");

169 addTest(root, &TestBug4315, "regex/TestBug4315");

170 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");

171 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");

172 addTest(root, &TestBug8421, "regex/TestBug8421");

173 addTest(root, &TestBug10815, "regex/TestBug10815");

174 }

175

176 /*

177 * Call back function and context struct used for testing

178 * regular expression user callbacks. This test is mostly the same as

179 * the corresponding C++ test in intltest.

180 */

181 typedef struct callBackContext {

182 int32_t maxCalls;

183 int32_t numCalls;

184 int32_t lastSteps;

185 } callBackContext;

186

187 static UBool U_EXPORT2 U_CALLCONV

188 TestCallbackFn(const void *context, int32_t steps) {

189 callBackContext info = (callBackContext )context;

190 if (info->lastSteps+1 != steps) {

191 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastS teps+1, steps);

192 }

193 info->lastSteps = steps;

194 info->numCalls++;

195 return (info->numCalls < info->maxCalls);

196 }

197

198 /*

199 * Regular Expression C API Tests

200 */

201 static void TestRegexCAPI(void) {

202 UErrorCode status = U_ZERO_ERROR;

203 URegularExpression *re;

204 UChar pat[200];

205 UChar *minus1;

206

207 memset(&minus1, -1, sizeof(minus1));

208

209 /* Mimimalist open/close */

210 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));

211 re = uregex_open(pat, -1, 0, 0, &status);

212 if (U_FAILURE(status)) {

213 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));

214 return;

215 }

216 uregex_close(re);

217

218 /* Open with all flag values set */

219 status = U_ZERO_ERROR;

220 re = uregex_open(pat, -1,

221 UREGEX_CASE_INSENSITIVE \| UREGEX_COMMENTS \| UREGEX_DOTALL \| UREGEX_MULTI LINE \| UREGEX_UWORD \| UREGEX_LITERAL,

222 0, &status);

223 TEST_ASSERT_SUCCESS(status);

224 uregex_close(re);

225

226 /* Open with an invalid flag */

227 status = U_ZERO_ERROR;

228 re = uregex_open(pat, -1, 0x40000000, 0, &status);

229 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);

230 uregex_close(re);

231

232 /* Open with an unimplemented flag */

233 status = U_ZERO_ERROR;

234 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);

235 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);

236 uregex_close(re);

237

238 /* openC with an invalid parameter */

239 status = U_ZERO_ERROR;

240 re = uregex_openC(NULL,

241 UREGEX_CASE_INSENSITIVE \| UREGEX_COMMENTS \| UREGEX_DOTALL \| UREGEX_MULTI LINE \| UREGEX_UWORD, 0, &status);

242 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);

243

244 /* openC with an invalid parameter */

245 status = U_USELESS_COLLATOR_ERROR;

246 re = uregex_openC(NULL,

247 UREGEX_CASE_INSENSITIVE \| UREGEX_COMMENTS \| UREGEX_DOTALL \| UREGEX_MULTI LINE \| UREGEX_UWORD, 0, &status);

248 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);

249

250 /* openC open from a C string */

251 {

252 const UChar *p;

253 int32_t len;

254 status = U_ZERO_ERROR;

255 re = uregex_openC("abc*", 0, 0, &status);

256 TEST_ASSERT_SUCCESS(status);

257 p = uregex_pattern(re, &len, &status);

258 TEST_ASSERT_SUCCESS(status);

259

260 /* The TEST_ASSERT_SUCCESS above should change too... */

261 if(U_SUCCESS(status)) {

262 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));

263 TEST_ASSERT(u_strcmp(pat, p) == 0);

264 TEST_ASSERT(len==(int32_t)strlen("abc*"));

265 }

266

267 uregex_close(re);

268

269 /* TODO: Open with ParseError parameter */

270 }

271

272 /*

273 * clone

274 */

275 {

276 URegularExpression *clone1;

277 URegularExpression *clone2;

278 URegularExpression *clone3;

279 UChar testString1[30];

280 UChar testString2[30];

281 UBool result;

282

283

284 status = U_ZERO_ERROR;

285 re = uregex_openC("abc*", 0, 0, &status);

286 TEST_ASSERT_SUCCESS(status);

287 clone1 = uregex_clone(re, &status);

288 TEST_ASSERT_SUCCESS(status);

289 TEST_ASSERT(clone1 != NULL);

290

291 status = U_ZERO_ERROR;

292 clone2 = uregex_clone(re, &status);

293 TEST_ASSERT_SUCCESS(status);

294 TEST_ASSERT(clone2 != NULL);

295 uregex_close(re);

296

297 status = U_ZERO_ERROR;

298 clone3 = uregex_clone(clone2, &status);

299 TEST_ASSERT_SUCCESS(status);

300 TEST_ASSERT(clone3 != NULL);

301

302 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));

303 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));

304

305 status = U_ZERO_ERROR;

306 uregex_setText(clone1, testString1, -1, &status);

307 TEST_ASSERT_SUCCESS(status);

308 result = uregex_lookingAt(clone1, 0, &status);

309 TEST_ASSERT_SUCCESS(status);

310 TEST_ASSERT(result==TRUE);

311

312 status = U_ZERO_ERROR;

313 uregex_setText(clone2, testString2, -1, &status);

314 TEST_ASSERT_SUCCESS(status);

315 result = uregex_lookingAt(clone2, 0, &status);

316 TEST_ASSERT_SUCCESS(status);

317 TEST_ASSERT(result==FALSE);

318 result = uregex_find(clone2, 0, &status);

319 TEST_ASSERT_SUCCESS(status);

320 TEST_ASSERT(result==TRUE);

321

322 uregex_close(clone1);

323 uregex_close(clone2);

324 uregex_close(clone3);

325

326 }

327

328 /*

329 * pattern()

330 */

331 {

332 const UChar *resultPat;

333 int32_t resultLen;

334 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));

335 status = U_ZERO_ERROR;

336 re = uregex_open(pat, -1, 0, NULL, &status);

337 resultPat = uregex_pattern(re, &resultLen, &status);

338 TEST_ASSERT_SUCCESS(status);

339

340 /* The TEST_ASSERT_SUCCESS above should change too... */

341 if (U_SUCCESS(status)) {

342 TEST_ASSERT(resultLen == -1);

343 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);

344 }

345

346 uregex_close(re);

347

348 status = U_ZERO_ERROR;

349 re = uregex_open(pat, 3, 0, NULL, &status);

350 resultPat = uregex_pattern(re, &resultLen, &status);

351 TEST_ASSERT_SUCCESS(status);

352 TEST_ASSERT_SUCCESS(status);

353

354 /* The TEST_ASSERT_SUCCESS above should change too... */

355 if (U_SUCCESS(status)) {

356 TEST_ASSERT(resultLen == 3);

357 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);

358 TEST_ASSERT(u_strlen(resultPat) == 3);

359 }

360

361 uregex_close(re);

362 }

363

364 /*

365 * flags()

366 */

367 {

368 int32_t t;

369

370 status = U_ZERO_ERROR;

371 re = uregex_open(pat, -1, 0, NULL, &status);

372 t = uregex_flags(re, &status);

373 TEST_ASSERT_SUCCESS(status);

374 TEST_ASSERT(t == 0);

375 uregex_close(re);

376

377 status = U_ZERO_ERROR;

378 re = uregex_open(pat, -1, 0, NULL, &status);

379 t = uregex_flags(re, &status);

380 TEST_ASSERT_SUCCESS(status);

381 TEST_ASSERT(t == 0);

382 uregex_close(re);

383

384 status = U_ZERO_ERROR;

385 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE \| UREGEX_DOTALL, NULL, &status);

386 t = uregex_flags(re, &status);

387 TEST_ASSERT_SUCCESS(status);

388 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE \| UREGEX_DOTALL));

389 uregex_close(re);

390 }

391

392 /*

393 * setText() and lookingAt()

394 */

395 {

396 UChar text1[50];

397 UChar text2[50];

398 UBool result;

399

400 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));

401 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));

402 status = U_ZERO_ERROR;

403 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));

404 re = uregex_open(pat, -1, 0, NULL, &status);

405 TEST_ASSERT_SUCCESS(status);

406

407 /* Operation before doing a setText should fail... */

408 status = U_ZERO_ERROR;

409 uregex_lookingAt(re, 0, &status);

410 TEST_ASSERT( status== U_REGEX_INVALID_STATE);

411

412 status = U_ZERO_ERROR;

413 uregex_setText(re, text1, -1, &status);

414 result = uregex_lookingAt(re, 0, &status);

415 TEST_ASSERT(result == TRUE);

416 TEST_ASSERT_SUCCESS(status);

417

418 status = U_ZERO_ERROR;

419 uregex_setText(re, text2, -1, &status);

420 result = uregex_lookingAt(re, 0, &status);

421 TEST_ASSERT(result == FALSE);

422 TEST_ASSERT_SUCCESS(status);

423

424 status = U_ZERO_ERROR;

425 uregex_setText(re, text1, -1, &status);

426 result = uregex_lookingAt(re, 0, &status);

427 TEST_ASSERT(result == TRUE);

428 TEST_ASSERT_SUCCESS(status);

429

430 status = U_ZERO_ERROR;

431 uregex_setText(re, text1, 5, &status);

432 result = uregex_lookingAt(re, 0, &status);

433 TEST_ASSERT(result == FALSE);

434 TEST_ASSERT_SUCCESS(status);

435

436 status = U_ZERO_ERROR;

437 uregex_setText(re, text1, 6, &status);

438 result = uregex_lookingAt(re, 0, &status);

439 TEST_ASSERT(result == TRUE);

440 TEST_ASSERT_SUCCESS(status);

441

442 uregex_close(re);

443 }

444

445

446 /*

447 * getText()

448 */

449 {

450 UChar text1[50];

451 UChar text2[50];

452 const UChar *result;

453 int32_t textLength;

454

455 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));

456 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));

457 status = U_ZERO_ERROR;

458 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));

459 re = uregex_open(pat, -1, 0, NULL, &status);

460

461 uregex_setText(re, text1, -1, &status);

462 result = uregex_getText(re, &textLength, &status);

463 TEST_ASSERT(result == text1);

464 TEST_ASSERT(textLength == -1);

465 TEST_ASSERT_SUCCESS(status);

466

467 status = U_ZERO_ERROR;

468 uregex_setText(re, text2, 7, &status);

469 result = uregex_getText(re, &textLength, &status);

470 TEST_ASSERT(result == text2);

471 TEST_ASSERT(textLength == 7);

472 TEST_ASSERT_SUCCESS(status);

473

474 status = U_ZERO_ERROR;

475 uregex_setText(re, text2, 4, &status);

476 result = uregex_getText(re, &textLength, &status);

477 TEST_ASSERT(result == text2);

478 TEST_ASSERT(textLength == 4);

479 TEST_ASSERT_SUCCESS(status);

480 uregex_close(re);

481 }

482

483 /*

484 * matches()

485 */

486 {

487 UChar text1[50];

488 UBool result;

489 int len;

490 UChar nullString[] = {0,0,0};

491

492 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));

493 status = U_ZERO_ERROR;

494 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));

495 re = uregex_open(pat, -1, 0, NULL, &status);

496

497 uregex_setText(re, text1, -1, &status);

498 result = uregex_matches(re, 0, &status);

499 TEST_ASSERT(result == FALSE);

500 TEST_ASSERT_SUCCESS(status);

501

502 status = U_ZERO_ERROR;

503 uregex_setText(re, text1, 6, &status);

504 result = uregex_matches(re, 0, &status);

505 TEST_ASSERT(result == TRUE);

506 TEST_ASSERT_SUCCESS(status);

507

508 status = U_ZERO_ERROR;

509 uregex_setText(re, text1, 6, &status);

510 result = uregex_matches(re, 1, &status);

511 TEST_ASSERT(result == FALSE);

512 TEST_ASSERT_SUCCESS(status);

513 uregex_close(re);

514

515 status = U_ZERO_ERROR;

516 re = uregex_openC(".?", 0, NULL, &status);

517 uregex_setText(re, text1, -1, &status);

518 len = u_strlen(text1);

519 result = uregex_matches(re, len, &status);

520 TEST_ASSERT(result == TRUE);

521 TEST_ASSERT_SUCCESS(status);

522

523 status = U_ZERO_ERROR;

524 uregex_setText(re, nullString, -1, &status);

525 TEST_ASSERT_SUCCESS(status);

526 result = uregex_matches(re, 0, &status);

527 TEST_ASSERT(result == TRUE);

528 TEST_ASSERT_SUCCESS(status);

529 uregex_close(re);

530 }

531

532

533 /*

534 * lookingAt() Used in setText test.

535 */

536

537

538 /*

539 * find(), findNext, start, end, reset

540 */

541 {

542 UChar text1[50];

543 UBool result;

544 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));

545 status = U_ZERO_ERROR;

546 re = uregex_openC("rx", 0, NULL, &status);

547

548 uregex_setText(re, text1, -1, &status);

549 result = uregex_find(re, 0, &status);

550 TEST_ASSERT(result == TRUE);

551 TEST_ASSERT(uregex_start(re, 0, &status) == 3);

552 TEST_ASSERT(uregex_end(re, 0, &status) == 5);

553 TEST_ASSERT_SUCCESS(status);

554

555 result = uregex_find(re, 9, &status);

556 TEST_ASSERT(result == TRUE);

557 TEST_ASSERT(uregex_start(re, 0, &status) == 11);

558 TEST_ASSERT(uregex_end(re, 0, &status) == 13);

559 TEST_ASSERT_SUCCESS(status);

560

561 result = uregex_find(re, 14, &status);

562 TEST_ASSERT(result == FALSE);

563 TEST_ASSERT_SUCCESS(status);

564

565 status = U_ZERO_ERROR;

566 uregex_reset(re, 0, &status);

567

568 result = uregex_findNext(re, &status);

569 TEST_ASSERT(result == TRUE);

570 TEST_ASSERT(uregex_start(re, 0, &status) == 3);

571 TEST_ASSERT(uregex_end(re, 0, &status) == 5);

572 TEST_ASSERT_SUCCESS(status);

573

574 result = uregex_findNext(re, &status);

575 TEST_ASSERT(result == TRUE);

576 TEST_ASSERT(uregex_start(re, 0, &status) == 6);

577 TEST_ASSERT(uregex_end(re, 0, &status) == 8);

578 TEST_ASSERT_SUCCESS(status);

579

580 status = U_ZERO_ERROR;

581 uregex_reset(re, 12, &status);

582

583 result = uregex_findNext(re, &status);

584 TEST_ASSERT(result == TRUE);

585 TEST_ASSERT(uregex_start(re, 0, &status) == 13);

586 TEST_ASSERT(uregex_end(re, 0, &status) == 15);

587 TEST_ASSERT_SUCCESS(status);

588

589 result = uregex_findNext(re, &status);

590 TEST_ASSERT(result == FALSE);

591 TEST_ASSERT_SUCCESS(status);

592

593 uregex_close(re);

594 }

595

596 /*

597 * groupCount

598 */

599 {

600 int32_t result;

601

602 status = U_ZERO_ERROR;

603 re = uregex_openC("abc", 0, NULL, &status);

604 result = uregex_groupCount(re, &status);

605 TEST_ASSERT_SUCCESS(status);

606 TEST_ASSERT(result == 0);

607 uregex_close(re);

608

609 status = U_ZERO_ERROR;

610 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);

611 result = uregex_groupCount(re, &status);

612 TEST_ASSERT_SUCCESS(status);

613 TEST_ASSERT(result == 3);

614 uregex_close(re);

615

616 }

617

618

619 /*

620 * group()

621 */

622 {

623 UChar text1[80];

624 UChar buf[80];

625 UBool result;

626 int32_t resultSz;

627 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U PRV_LENGTHOF(text1));

628

629 status = U_ZERO_ERROR;

630 re = uregex_openC("abc(.*?)def", 0, NULL, &status);

631 TEST_ASSERT_SUCCESS(status);

632

633

634 uregex_setText(re, text1, -1, &status);

635 result = uregex_find(re, 0, &status);

636 TEST_ASSERT(result==TRUE);

637

638 /* Capture Group 0, the full match. Should succeed. */

639 status = U_ZERO_ERROR;

640 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);

641 TEST_ASSERT_SUCCESS(status);

642 TEST_ASSERT_STRING("abc interior def", buf, TRUE);

643 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));

644

645 /* Capture group #1. Should succeed. */

646 status = U_ZERO_ERROR;

647 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);

648 TEST_ASSERT_SUCCESS(status);

649 TEST_ASSERT_STRING(" interior ", buf, TRUE);

650 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));

651

652 /* Capture group out of range. Error. */

653 status = U_ZERO_ERROR;

654 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);

655 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

656

657 /* NULL buffer, pure pre-flight */

658 status = U_ZERO_ERROR;

659 resultSz = uregex_group(re, 0, NULL, 0, &status);

660 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

661 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));

662

663 /* Too small buffer, truncated string */

664 status = U_ZERO_ERROR;

665 memset(buf, -1, sizeof(buf));

666 resultSz = uregex_group(re, 0, buf, 5, &status);

667 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

668 TEST_ASSERT_STRING("abc i", buf, FALSE);

669 TEST_ASSERT(buf[5] == (UChar)0xffff);

670 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));

671

672 /* Output string just fits buffer, no NUL term. */

673 status = U_ZERO_ERROR;

674 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);

675 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

676 TEST_ASSERT_STRING("abc interior def", buf, FALSE);

677 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));

678 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);

679

680 uregex_close(re);

681

682 }

683

684 /*

685 * Regions

686 */

687

688

689 /* SetRegion(), getRegion() do something */

690 TEST_SETUP(".*", "0123456789ABCDEF", 0)

691 UChar resultString[40];

692 TEST_ASSERT(uregex_regionStart(re, &status) == 0);

693 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);

694 uregex_setRegion(re, 3, 6, &status);

695 TEST_ASSERT(uregex_regionStart(re, &status) == 3);

696 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);

697 TEST_ASSERT(uregex_findNext(re, &status));

698 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString ), &status) == 3)

699 TEST_ASSERT_STRING("345", resultString, TRUE);

700 TEST_TEARDOWN;

701

702 /* find(start=-1) uses regions */

703 TEST_SETUP(".*", "0123456789ABCDEF", 0);

704 uregex_setRegion(re, 4, 6, &status);

705 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);

706 TEST_ASSERT(uregex_start(re, 0, &status) == 4);

707 TEST_ASSERT(uregex_end(re, 0, &status) == 6);

708 TEST_TEARDOWN;

709

710 /* find (start >=0) does not use regions */

711 TEST_SETUP(".*", "0123456789ABCDEF", 0);

712 uregex_setRegion(re, 4, 6, &status);

713 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);

714 TEST_ASSERT(uregex_start(re, 0, &status) == 0);

715 TEST_ASSERT(uregex_end(re, 0, &status) == 16);

716 TEST_TEARDOWN;

717

718 /* findNext() obeys regions */

719 TEST_SETUP(".", "0123456789ABCDEF", 0);

720 uregex_setRegion(re, 4, 6, &status);

721 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);

722 TEST_ASSERT(uregex_start(re, 0, &status) == 4);

723 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);

724 TEST_ASSERT(uregex_start(re, 0, &status) == 5);

725 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);

726 TEST_TEARDOWN;

727

728 /* matches(start=-1) uses regions */

729 /* Also, verify that non-greedy ? succeeds in finding the full match . /

730 TEST_SETUP(".*?", "0123456789ABCDEF", 0);

731 uregex_setRegion(re, 4, 6, &status);

732 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);

733 TEST_ASSERT(uregex_start(re, 0, &status) == 4);

734 TEST_ASSERT(uregex_end(re, 0, &status) == 6);

735 TEST_TEARDOWN;

736

737 /* matches (start >=0) does not use regions */

738 TEST_SETUP(".*?", "0123456789ABCDEF", 0);

739 uregex_setRegion(re, 4, 6, &status);

740 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);

741 TEST_ASSERT(uregex_start(re, 0, &status) == 0);

742 TEST_ASSERT(uregex_end(re, 0, &status) == 16);

743 TEST_TEARDOWN;

744

745 /* lookingAt(start=-1) uses regions */

746 /* Also, verify that non-greedy ? finds the first (shortest) match. /

747 TEST_SETUP(".*?", "0123456789ABCDEF", 0);

748 uregex_setRegion(re, 4, 6, &status);

749 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);

750 TEST_ASSERT(uregex_start(re, 0, &status) == 4);

751 TEST_ASSERT(uregex_end(re, 0, &status) == 4);

752 TEST_TEARDOWN;

753

754 /* lookingAt (start >=0) does not use regions */

755 TEST_SETUP(".*?", "0123456789ABCDEF", 0);

756 uregex_setRegion(re, 4, 6, &status);

757 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);

758 TEST_ASSERT(uregex_start(re, 0, &status) == 0);

759 TEST_ASSERT(uregex_end(re, 0, &status) == 0);

760 TEST_TEARDOWN;

761

762 /* hitEnd() */

763 TEST_SETUP("[a-f]*", "abcdefghij", 0);

764 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);

765 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);

766 TEST_TEARDOWN;

767

768 TEST_SETUP("[a-f]*", "abcdef", 0);

769 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);

770 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);

771 TEST_TEARDOWN;

772

773 /* requireEnd */

774 TEST_SETUP("abcd", "abcd", 0);

775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);

776 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);

777 TEST_TEARDOWN;

778

779 TEST_SETUP("abcd$", "abcd", 0);

780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);

781 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);

782 TEST_TEARDOWN;

783

784 /* anchoringBounds */

785 TEST_SETUP("abc$", "abcdef", 0);

786 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);

787 uregex_useAnchoringBounds(re, FALSE, &status);

788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);

789

790 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);

791 uregex_useAnchoringBounds(re, TRUE, &status);

792 uregex_setRegion(re, 0, 3, &status);

793 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);

794 TEST_ASSERT(uregex_end(re, 0, &status) == 3);

795 TEST_TEARDOWN;

796

797 /* Transparent Bounds */

798 TEST_SETUP("abc(?=def)", "abcdef", 0);

799 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);

800 uregex_useTransparentBounds(re, TRUE, &status);

801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);

802

803 uregex_useTransparentBounds(re, FALSE, &status);

804 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */

805 uregex_setRegion(re, 0, 3, &status);

806 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, op aque bounds */

807 uregex_useTransparentBounds(re, TRUE, &status);

808 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, tr ansparent bounds */

809 TEST_ASSERT(uregex_end(re, 0, &status) == 3);

810 TEST_TEARDOWN;

811

812

813 /*

814 * replaceFirst()

815 */

816 {

817 UChar text1[80];

818 UChar text2[80];

819 UChar replText[80];

820 UChar buf[80];

821 int32_t resultSz;

822 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));

823 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

824 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));

825

826 status = U_ZERO_ERROR;

827 re = uregex_openC("x(.*?)x", 0, NULL, &status);

828 TEST_ASSERT_SUCCESS(status);

829

830 /* Normal case, with match */

831 uregex_setText(re, text1, -1, &status);

832 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf) , &status);

833 TEST_ASSERT_SUCCESS(status);

834 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);

835 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));

836

837 /* No match. Text should copy to output with no changes. */

838 status = U_ZERO_ERROR;

839 uregex_setText(re, text2, -1, &status);

840 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf) , &status);

841 TEST_ASSERT_SUCCESS(status);

842 TEST_ASSERT_STRING("No match here.", buf, TRUE);

843 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));

844

845 /* Match, output just fills buffer, no termination warning. */

846 status = U_ZERO_ERROR;

847 uregex_setText(re, text1, -1, &status);

848 memset(buf, -1, sizeof(buf));

849 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a a> x1x x...x."), &status);

850 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

851 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);

852 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));

853 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);

854

855 /* Do the replaceFirst again, without first resetting anything.

856 * Should give the same results.

857 */

858 status = U_ZERO_ERROR;

859 memset(buf, -1, sizeof(buf));

860 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a a> x1x x...x."), &status);

861 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);

863 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));

864 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);

865

866 /* NULL buffer, zero buffer length */

867 status = U_ZERO_ERROR;

868 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);

869 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

870 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));

871

872 /* Buffer too small by one */

873 status = U_ZERO_ERROR;

874 memset(buf, -1, sizeof(buf));

875 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a a> x1x x...x.")-1, &status);

876 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

877 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);

878 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));

879 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);

880

881 uregex_close(re);

882 }

883

884

885 /*

886 * replaceAll()

887 */

888 {

889 UChar text1[80]; /* "Replace xaax x1x x...x." */

890 UChar text2[80]; /* "No match Here" */

891 UChar replText[80]; /* "<$1>" */

892 UChar replText2[80]; /* "<<$1>>" */

893 const char * pattern = "x(.*?)x";

894 const char * expectedResult = "Replace <aa> <1> <...>.";

895 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";

896 UChar buf[80];

897 int32_t resultSize;

898 int32_t expectedResultSize;

899 int32_t expectedResultSize2;

900 int32_t i;

901

902 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));

903 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

904 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));

905 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));

906 expectedResultSize = strlen(expectedResult);

907 expectedResultSize2 = strlen(expectedResult2);

908

909 status = U_ZERO_ERROR;

910 re = uregex_openC(pattern, 0, NULL, &status);

911 TEST_ASSERT_SUCCESS(status);

912

913 /* Normal case, with match */

914 uregex_setText(re, text1, -1, &status);

915 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf) , &status);

916 TEST_ASSERT_SUCCESS(status);

917 TEST_ASSERT_STRING(expectedResult, buf, TRUE);

918 TEST_ASSERT(resultSize == expectedResultSize);

919

920 /* No match. Text should copy to output with no changes. */

921 status = U_ZERO_ERROR;

922 uregex_setText(re, text2, -1, &status);

923 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf) , &status);

924 TEST_ASSERT_SUCCESS(status);

925 TEST_ASSERT_STRING("No match here.", buf, TRUE);

926 TEST_ASSERT(resultSize == u_strlen(text2));

927

928 /* Match, output just fills buffer, no termination warning. */

929 status = U_ZERO_ERROR;

930 uregex_setText(re, text1, -1, &status);

931 memset(buf, -1, sizeof(buf));

932 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize , &status);

933 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

934 TEST_ASSERT_STRING(expectedResult, buf, FALSE);

935 TEST_ASSERT(resultSize == expectedResultSize);

936 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);

937

938 /* Do the replaceFirst again, without first resetting anything.

939 * Should give the same results.

940 */

941 status = U_ZERO_ERROR;

942 memset(buf, -1, sizeof(buf));

943 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xa ax x1x x...x."), &status);

944 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);

945 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);

946 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));

947 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);

948

949 /* NULL buffer, zero buffer length */

950 status = U_ZERO_ERROR;

951 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);

952 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

953 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));

954

955 /* Buffer too small. Try every size, which will tickle edge cases

956 * in uregex_appendReplacement (used by replaceAll) */

957 for (i=0; i<expectedResultSize; i++) {

958 char expected[80];

959 status = U_ZERO_ERROR;

960 memset(buf, -1, sizeof(buf));

961 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);

962 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

963 strcpy(expected, expectedResult);

964 expected[i] = 0;

965 TEST_ASSERT_STRING(expected, buf, FALSE);

966 TEST_ASSERT(resultSize == expectedResultSize);

967 TEST_ASSERT(buf[i] == (UChar)0xffff);

968 }

969

970 /* Buffer too small. Same as previous test, except this time the replac ement

971 * text is longer than the match capture group, making the length of the complete

972 * replacement longer than the original string.

973 */

974 for (i=0; i<expectedResultSize2; i++) {

975 char expected[80];

976 status = U_ZERO_ERROR;

977 memset(buf, -1, sizeof(buf));

978 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);

979 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

980 strcpy(expected, expectedResult2);

981 expected[i] = 0;

982 TEST_ASSERT_STRING(expected, buf, FALSE);

983 TEST_ASSERT(resultSize == expectedResultSize2);

984 TEST_ASSERT(buf[i] == (UChar)0xffff);

985 }

986

987

988 uregex_close(re);

989 }

990

991

992 /*

993 * appendReplacement()

994 */

995 {

996 UChar text[100];

997 UChar repl[100];

998 UChar buf[100];

999 UChar *bufPtr;

1000 int32_t bufCap;

1001

1002

1003 status = U_ZERO_ERROR;

1004 re = uregex_openC(".*", 0, 0, &status);

1005 TEST_ASSERT_SUCCESS(status);

1006

1007 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));

1008 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));

1009 uregex_setText(re, text, -1, &status);

1010

1011 /* match covers whole target string */

1012 uregex_find(re, 0, &status);

1013 TEST_ASSERT_SUCCESS(status);

1014 bufPtr = buf;

1015 bufCap = UPRV_LENGTHOF(buf);

1016 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);

1017 TEST_ASSERT_SUCCESS(status);

1018 TEST_ASSERT_STRING("some other", buf, TRUE);

1019

1020 /* Match has \u \U escapes */

1021 uregex_find(re, 0, &status);

1022 TEST_ASSERT_SUCCESS(status);

1023 bufPtr = buf;

1024 bufCap = UPRV_LENGTHOF(buf);

1025 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF( repl));

1026 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);

1027 TEST_ASSERT_SUCCESS(status);

1028 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);

1029

1030 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. * /

1031 status = U_ZERO_ERROR;

1032 uregex_find(re, 0, &status);

1033 TEST_ASSERT_SUCCESS(status);

1034 bufPtr = buf;

1035 status = U_BUFFER_OVERFLOW_ERROR;

1036 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);

1037 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

1038

1039 uregex_close(re);

1040 }

1041

1042

1043 /*

1044 * appendTail(). Checked in ReplaceFirst(), replaceAll().

1045 */

1046

1047 /*

1048 * split()

1049 */

1050 {

1051 UChar textToSplit[80];

1052 UChar text2[80];

1053 UChar buf[200];

1054 UChar *fields[10];

1055 int32_t numFields;

1056 int32_t requiredCapacity;

1057 int32_t spaceNeeded;

1058 int32_t sz;

1059

1060 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textTo Split));

1061 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

1062

1063 status = U_ZERO_ERROR;

1064 re = uregex_openC(":", 0, NULL, &status);

1065

1066

1067 /* Simple split */

1068

1069 uregex_setText(re, textToSplit, -1, &status);

1070 TEST_ASSERT_SUCCESS(status);

1071

1072 /* The TEST_ASSERT_SUCCESS call above should change too... */

1073 if (U_SUCCESS(status)) {

1074 memset(fields, -1, sizeof(fields));

1075 numFields =

1076 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie lds, 10, &status);

1077 TEST_ASSERT_SUCCESS(status);

1078

1079 /* The TEST_ASSERT_SUCCESS call above should change too... */

1080 if(U_SUCCESS(status)) {

1081 TEST_ASSERT(numFields == 3);

1082 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1083 TEST_ASSERT_STRING(" second", fields[1], TRUE);

1084 TEST_ASSERT_STRING(" third", fields[2], TRUE);

1085 TEST_ASSERT(fields[3] == NULL);

1086

1087 spaceNeeded = u_strlen(textToSplit) -

1088 (numFields - 1) + /* Field delimiters do not appea r in output */

1089 numFields; /* Each field gets a NUL termina tor */

1090

1091 TEST_ASSERT(spaceNeeded == requiredCapacity);

1092 }

1093 }

1094

1095 uregex_close(re);

1096

1097

1098 /* Split with too few output strings available */

1099 status = U_ZERO_ERROR;

1100 re = uregex_openC(":", 0, NULL, &status);

1101 uregex_setText(re, textToSplit, -1, &status);

1102 TEST_ASSERT_SUCCESS(status);

1103

1104 /* The TEST_ASSERT_SUCCESS call above should change too... */

1105 if(U_SUCCESS(status)) {

1106 memset(fields, -1, sizeof(fields));

1107 numFields =

1108 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie lds, 2, &status);

1109 TEST_ASSERT_SUCCESS(status);

1110

1111 /* The TEST_ASSERT_SUCCESS call above should change too... */

1112 if(U_SUCCESS(status)) {

1113 TEST_ASSERT(numFields == 2);

1114 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1115 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);

1116 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));

1117

1118 spaceNeeded = u_strlen(textToSplit) -

1119 (numFields - 1) + /* Field delimiters do not appea r in output */

1120 numFields; /* Each field gets a NUL termina tor */

1121

1122 TEST_ASSERT(spaceNeeded == requiredCapacity);

1123

1124 /* Split with a range of output buffer sizes. */

1125 spaceNeeded = u_strlen(textToSplit) -

1126 (numFields - 1) + /* Field delimiters do not appear in out put */

1127 numFields; /* Each field gets a NUL terminator */

1128

1129 for (sz=0; sz < spaceNeeded+1; sz++) {

1130 memset(fields, -1, sizeof(fields));

1131 status = U_ZERO_ERROR;

1132 numFields =

1133 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);

1134 if (sz >= spaceNeeded) {

1135 TEST_ASSERT_SUCCESS(status);

1136 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1137 TEST_ASSERT_STRING(" second", fields[1], TRUE);

1138 TEST_ASSERT_STRING(" third", fields[2], TRUE);

1139 } else {

1140 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);

1141 }

1142 TEST_ASSERT(numFields == 3);

1143 TEST_ASSERT(fields[3] == NULL);

1144 TEST_ASSERT(spaceNeeded == requiredCapacity);

1145 }

1146 }

1147 }

1148

1149 uregex_close(re);

1150 }

1151

1152

1153

1154

1155 /* Split(), part 2. Patterns with capture groups. The capture group text

1156 * comes out as additional fields. */

1157 {

1158 UChar textToSplit[80];

1159 UChar buf[200];

1160 UChar *fields[10];

1161 int32_t numFields;

1162 int32_t requiredCapacity;

1163 int32_t spaceNeeded;

1164 int32_t sz;

1165

1166 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LEN GTHOF(textToSplit));

1167

1168 status = U_ZERO_ERROR;

1169 re = uregex_openC("<(.*?)>", 0, NULL, &status);

1170

1171 uregex_setText(re, textToSplit, -1, &status);

1172 TEST_ASSERT_SUCCESS(status);

1173

1174 /* The TEST_ASSERT_SUCCESS call above should change too... */

1175 if(U_SUCCESS(status)) {

1176 memset(fields, -1, sizeof(fields));

1177 numFields =

1178 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie lds, 10, &status);

1179 TEST_ASSERT_SUCCESS(status);

1180

1181 /* The TEST_ASSERT_SUCCESS call above should change too... */

1182 if(U_SUCCESS(status)) {

1183 TEST_ASSERT(numFields == 5);

1184 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1185 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);

1186 TEST_ASSERT_STRING(" second", fields[2], TRUE);

1187 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);

1188 TEST_ASSERT_STRING(" third", fields[4], TRUE);

1189 TEST_ASSERT(fields[5] == NULL);

1190 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */

1191 TEST_ASSERT(spaceNeeded == requiredCapacity);

1192 }

1193 }

1194

1195 /* Split with too few output strings available (2) */

1196 status = U_ZERO_ERROR;

1197 memset(fields, -1, sizeof(fields));

1198 numFields =

1199 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);

1200 TEST_ASSERT_SUCCESS(status);

1201

1202 /* The TEST_ASSERT_SUCCESS call above should change too... */

1203 if(U_SUCCESS(status)) {

1204 TEST_ASSERT(numFields == 2);

1205 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1206 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);

1207 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));

1208

1209 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NU L positions */

1210 TEST_ASSERT(spaceNeeded == requiredCapacity);

1211 }

1212

1213 /* Split with too few output strings available (3) */

1214 status = U_ZERO_ERROR;

1215 memset(fields, -1, sizeof(fields));

1216 numFields =

1217 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);

1218 TEST_ASSERT_SUCCESS(status);

1219

1220 /* The TEST_ASSERT_SUCCESS call above should change too... */

1221 if(U_SUCCESS(status)) {

1222 TEST_ASSERT(numFields == 3);

1223 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1224 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);

1225 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);

1226 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));

1227

1228 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */

1229 TEST_ASSERT(spaceNeeded == requiredCapacity);

1230 }

1231

1232 /* Split with just enough output strings available (5) */

1233 status = U_ZERO_ERROR;

1234 memset(fields, -1, sizeof(fields));

1235 numFields =

1236 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);

1237 TEST_ASSERT_SUCCESS(status);

1238

1239 /* The TEST_ASSERT_SUCCESS call above should change too... */

1240 if(U_SUCCESS(status)) {

1241 TEST_ASSERT(numFields == 5);

1242 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1243 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);

1244 TEST_ASSERT_STRING(" second", fields[2], TRUE);

1245 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);

1246 TEST_ASSERT_STRING(" third", fields[4], TRUE);

1247 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));

1248

1249 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */

1250 TEST_ASSERT(spaceNeeded == requiredCapacity);

1251 }

1252

1253 /* Split, end of text is a field delimiter. */

1254 status = U_ZERO_ERROR;

1255 sz = strlen("first <tag-a> second<tag-b>");

1256 uregex_setText(re, textToSplit, sz, &status);

1257 TEST_ASSERT_SUCCESS(status);

1258

1259 /* The TEST_ASSERT_SUCCESS call above should change too... */

1260 if(U_SUCCESS(status)) {

1261 memset(fields, -1, sizeof(fields));

1262 numFields =

1263 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie lds, 9, &status);

1264 TEST_ASSERT_SUCCESS(status);

1265

1266 /* The TEST_ASSERT_SUCCESS call above should change too... */

1267 if(U_SUCCESS(status)) {

1268 TEST_ASSERT(numFields == 5);

1269 TEST_ASSERT_STRING("first ", fields[0], TRUE);

1270 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);

1271 TEST_ASSERT_STRING(" second", fields[2], TRUE);

1272 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);

1273 TEST_ASSERT_STRING("", fields[4], TRUE);

1274 TEST_ASSERT(fields[5] == NULL);

1275 TEST_ASSERT(fields[8] == NULL);

1276 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));

1277 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */

1278 TEST_ASSERT(spaceNeeded == requiredCapacity);

1279 }

1280 }

1281

1282 uregex_close(re);

1283 }

1284

1285 /*

1286 * set/getTimeLimit

1287 */

1288 TEST_SETUP("abc$", "abcdef", 0);

1289 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);

1290 uregex_setTimeLimit(re, 1000, &status);

1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);

1292 TEST_ASSERT_SUCCESS(status);

1293 uregex_setTimeLimit(re, -1, &status);

1294 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

1295 status = U_ZERO_ERROR;

1296 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);

1297 TEST_TEARDOWN;

1298

1299 /*

1300 * set/get Stack Limit

1301 */

1302 TEST_SETUP("abc$", "abcdef", 0);

1303 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);

1304 uregex_setStackLimit(re, 40000, &status);

1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);

1306 TEST_ASSERT_SUCCESS(status);

1307 uregex_setStackLimit(re, -1, &status);

1308 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);

1309 status = U_ZERO_ERROR;

1310 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);

1311 TEST_TEARDOWN;

1312

1313

1314 /*

1315 * Get/Set callback functions

1316 * This test is copied from intltest regex/Callbacks

1317 * The pattern and test data will run long enough to cause the callback

1318 * to be invoked. The nested '+' operators give exponential time

1319 * behavior with increasing string length.

1320 */

1321 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)

1322 callBackContext cbInfo = {4, 0, 0};

1323 const void *pContext = &cbInfo;

1324 URegexMatchCallback *returnedFn = &TestCallbackFn;

1325

1326 /* Getting the callback fn when it hasn't been set must return NULL */

1327 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);

1328 TEST_ASSERT_SUCCESS(status);

1329 TEST_ASSERT(returnedFn == NULL);

1330 TEST_ASSERT(pContext == NULL);

1331

1332 /* Set thecallback and do a match. */

1333 /* The callback function should record that it has been called. */

1334 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);

1335 TEST_ASSERT_SUCCESS(status);

1336 TEST_ASSERT(cbInfo.numCalls == 0);

1337 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);

1338 TEST_ASSERT_SUCCESS(status);

1339 TEST_ASSERT(cbInfo.numCalls > 0);

1340

1341 /* Getting the callback should return the values that were set above. */

1342 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);

1343 TEST_ASSERT(returnedFn == &TestCallbackFn);

1344 TEST_ASSERT(pContext == &cbInfo);

1345

1346 TEST_TEARDOWN;

1347 }

1348

1349

1350

1351 static void TestBug4315(void) {

1352 UErrorCode theICUError = U_ZERO_ERROR;

1353 URegularExpression *theRegEx;

1354 UChar *textBuff;

1355 const char *thePattern;

1356 UChar theString[100];

1357 UChar *destFields[24];

1358 int32_t neededLength1;

1359 int32_t neededLength2;

1360

1361 int32_t wordCount = 0;

1362 int32_t destFieldsSize = 24;

1363

1364 thePattern = "ck ";

1365 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle .");

1366

1367 /* open a regex */

1368 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);

1369 TEST_ASSERT_SUCCESS(theICUError);

1370

1371 /* set the input string */

1372 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);

1373 TEST_ASSERT_SUCCESS(theICUError);

1374

1375 /* split */

1376 /*explicitly pass NULL and 0 to force the overflow error -> this is where th e

1377 * error occurs! */

1378 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,

1379 destFieldsSize, &theICUError);

1380

1381 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);

1382 TEST_ASSERT(wordCount==3);

1383

1384 if(theICUError == U_BUFFER_OVERFLOW_ERROR)

1385 {

1386 theICUError = U_ZERO_ERROR;

1387 textBuff = (UChar ) malloc(sizeof(UChar) (neededLength1 + 1));

1388 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLen gth2,

1389 destFields, destFieldsSize, &theICUError);

1390 TEST_ASSERT(wordCount==3);

1391 TEST_ASSERT_SUCCESS(theICUError);

1392 TEST_ASSERT(neededLength1 == neededLength2);

1393 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);

1394 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);

1395 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);

1396 TEST_ASSERT(destFields[3] == NULL);

1397 free(textBuff);

1398 }

1399 uregex_close(theRegEx);

1400 }

1401

1402 /* Based on TestRegexCAPI() */

1403 static void TestUTextAPI(void) {

1404 UErrorCode status = U_ZERO_ERROR;

1405 URegularExpression *re;

1406 UText patternText = UTEXT_INITIALIZER;

1407 UChar pat[200];

1408 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };

1409

1410 /* Mimimalist open/close */

1411 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);

1412 re = uregex_openUText(&patternText, 0, 0, &status);

1413 if (U_FAILURE(status)) {

1414 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));

1415 utext_close(&patternText);

1416 return;

1417 }

1418 uregex_close(re);

1419

1420 /* Open with all flag values set */

1421 status = U_ZERO_ERROR;

1422 re = uregex_openUText(&patternText,

1423 UREGEX_CASE_INSENSITIVE \| UREGEX_COMMENTS \| UREGEX_DOTALL \| UREGEX_MULTI LINE \| UREGEX_UWORD,

1424 0, &status);

1425 TEST_ASSERT_SUCCESS(status);

1426 uregex_close(re);

1427

1428 /* Open with an invalid flag */

1429 status = U_ZERO_ERROR;

1430 re = uregex_openUText(&patternText, 0x40000000, 0, &status);

1431 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);

1432 uregex_close(re);

1433

1434 /* open with an invalid parameter */

1435 status = U_ZERO_ERROR;

1436 re = uregex_openUText(NULL,

1437 UREGEX_CASE_INSENSITIVE \| UREGEX_COMMENTS \| UREGEX_DOTALL \| UREGEX_MULTI LINE \| UREGEX_UWORD, 0, &status);

1438 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);

1439

1440 /*

1441 * clone

1442 */

1443 {

1444 URegularExpression *clone1;

1445 URegularExpression *clone2;

1446 URegularExpression *clone3;

1447 UChar testString1[30];

1448 UChar testString2[30];

1449 UBool result;

1450

1451

1452 status = U_ZERO_ERROR;

1453 re = uregex_openUText(&patternText, 0, 0, &status);

1454 TEST_ASSERT_SUCCESS(status);

1455 clone1 = uregex_clone(re, &status);

1456 TEST_ASSERT_SUCCESS(status);

1457 TEST_ASSERT(clone1 != NULL);

1458

1459 status = U_ZERO_ERROR;

1460 clone2 = uregex_clone(re, &status);

1461 TEST_ASSERT_SUCCESS(status);

1462 TEST_ASSERT(clone2 != NULL);

1463 uregex_close(re);

1464

1465 status = U_ZERO_ERROR;

1466 clone3 = uregex_clone(clone2, &status);

1467 TEST_ASSERT_SUCCESS(status);

1468 TEST_ASSERT(clone3 != NULL);

1469

1470 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));

1471 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));

1472

1473 status = U_ZERO_ERROR;

1474 uregex_setText(clone1, testString1, -1, &status);

1475 TEST_ASSERT_SUCCESS(status);

1476 result = uregex_lookingAt(clone1, 0, &status);

1477 TEST_ASSERT_SUCCESS(status);

1478 TEST_ASSERT(result==TRUE);

1479

1480 status = U_ZERO_ERROR;

1481 uregex_setText(clone2, testString2, -1, &status);

1482 TEST_ASSERT_SUCCESS(status);

1483 result = uregex_lookingAt(clone2, 0, &status);

1484 TEST_ASSERT_SUCCESS(status);

1485 TEST_ASSERT(result==FALSE);

1486 result = uregex_find(clone2, 0, &status);

1487 TEST_ASSERT_SUCCESS(status);

1488 TEST_ASSERT(result==TRUE);

1489

1490 uregex_close(clone1);

1491 uregex_close(clone2);

1492 uregex_close(clone3);

1493

1494 }

1495

1496 /*

1497 * pattern() and patternText()

1498 */

1499 {

1500 const UChar *resultPat;

1501 int32_t resultLen;

1502 UText *resultText;

1503 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hell o */

1504 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */

1505 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */

1506 status = U_ZERO_ERROR;

1507

1508 utext_openUTF8(&patternText, str_hello, -1, &status);

1509 re = uregex_open(pat, -1, 0, NULL, &status);

1510 resultPat = uregex_pattern(re, &resultLen, &status);

1511 TEST_ASSERT_SUCCESS(status);

1512

1513 /* The TEST_ASSERT_SUCCESS above should change too... */

1514 if (U_SUCCESS(status)) {

1515 TEST_ASSERT(resultLen == -1);

1516 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);

1517 }

1518

1519 resultText = uregex_patternUText(re, &status);

1520 TEST_ASSERT_SUCCESS(status);

1521 TEST_ASSERT_UTEXT(str_hello, resultText);

1522

1523 uregex_close(re);

1524

1525 status = U_ZERO_ERROR;

1526 re = uregex_open(pat, 3, 0, NULL, &status);

1527 resultPat = uregex_pattern(re, &resultLen, &status);

1528 TEST_ASSERT_SUCCESS(status);

1529

1530 /* The TEST_ASSERT_SUCCESS above should change too... */

1531 if (U_SUCCESS(status)) {

1532 TEST_ASSERT(resultLen == 3);

1533 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);

1534 TEST_ASSERT(u_strlen(resultPat) == 3);

1535 }

1536

1537 resultText = uregex_patternUText(re, &status);

1538 TEST_ASSERT_SUCCESS(status);

1539 TEST_ASSERT_UTEXT(str_hel, resultText);

1540

1541 uregex_close(re);

1542 }

1543

1544 /*

1545 * setUText() and lookingAt()

1546 */

1547 {

1548 UText text1 = UTEXT_INITIALIZER;

1549 UText text2 = UTEXT_INITIALIZER;

1550 UBool result;

1551 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */

1552 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0 x00 }; /* abcccxd */

1553 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abcd /

1554 status = U_ZERO_ERROR;

1555 utext_openUTF8(&text1, str_abcccd, -1, &status);

1556 utext_openUTF8(&text2, str_abcccxd, -1, &status);

1557

1558 utext_openUTF8(&patternText, str_abcd, -1, &status);

1559 re = uregex_openUText(&patternText, 0, NULL, &status);

1560 TEST_ASSERT_SUCCESS(status);

1561

1562 /* Operation before doing a setText should fail... */

1563 status = U_ZERO_ERROR;

1564 uregex_lookingAt(re, 0, &status);

1565 TEST_ASSERT( status== U_REGEX_INVALID_STATE);

1566

1567 status = U_ZERO_ERROR;

1568 uregex_setUText(re, &text1, &status);

1569 result = uregex_lookingAt(re, 0, &status);

1570 TEST_ASSERT(result == TRUE);

1571 TEST_ASSERT_SUCCESS(status);

1572

1573 status = U_ZERO_ERROR;

1574 uregex_setUText(re, &text2, &status);

1575 result = uregex_lookingAt(re, 0, &status);

1576 TEST_ASSERT(result == FALSE);

1577 TEST_ASSERT_SUCCESS(status);

1578

1579 status = U_ZERO_ERROR;

1580 uregex_setUText(re, &text1, &status);

1581 result = uregex_lookingAt(re, 0, &status);

1582 TEST_ASSERT(result == TRUE);

1583 TEST_ASSERT_SUCCESS(status);

1584

1585 uregex_close(re);

1586 utext_close(&text1);

1587 utext_close(&text2);

1588 }

1589

1590

1591 /*

1592 * getText() and getUText()

1593 */

1594 {

1595 UText text1 = UTEXT_INITIALIZER;

1596 UText text2 = UTEXT_INITIALIZER;

1597 UChar text2Chars[20];

1598 UText *resultText;

1599 const UChar *result;

1600 int32_t textLength;

1601 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */

1602 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0 x00 }; /* abcccxd */

1603 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abcd /

1604

1605

1606 status = U_ZERO_ERROR;

1607 utext_openUTF8(&text1, str_abcccd, -1, &status);

1608 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));

1609 utext_openUChars(&text2, text2Chars, -1, &status);

1610

1611 utext_openUTF8(&patternText, str_abcd, -1, &status);

1612 re = uregex_openUText(&patternText, 0, NULL, &status);

1613

1614 /* First set a UText */

1615 uregex_setUText(re, &text1, &status);

1616 resultText = uregex_getUText(re, NULL, &status);

1617 TEST_ASSERT_SUCCESS(status);

1618 TEST_ASSERT(resultText != &text1);

1619 utext_setNativeIndex(resultText, 0);

1620 utext_setNativeIndex(&text1, 0);

1621 TEST_ASSERT(testUTextEqual(resultText, &text1));

1622 utext_close(resultText);

1623

1624 result = uregex_getText(re, &textLength, &status); /* flattens UText int o buffer */

1625 (void)result; /* Suppress set but not used warning. */

1626 TEST_ASSERT(textLength == -1 \|\| textLength == 6);

1627 resultText = uregex_getUText(re, NULL, &status);

1628 TEST_ASSERT_SUCCESS(status);

1629 TEST_ASSERT(resultText != &text1);

1630 utext_setNativeIndex(resultText, 0);

1631 utext_setNativeIndex(&text1, 0);

1632 TEST_ASSERT(testUTextEqual(resultText, &text1));

1633 utext_close(resultText);

1634

1635 /* Then set a UChar * */

1636 uregex_setText(re, text2Chars, 7, &status);

1637 resultText = uregex_getUText(re, NULL, &status);

1638 TEST_ASSERT_SUCCESS(status);

1639 utext_setNativeIndex(resultText, 0);

1640 utext_setNativeIndex(&text2, 0);

1641 TEST_ASSERT(testUTextEqual(resultText, &text2));

1642 utext_close(resultText);

1643 result = uregex_getText(re, &textLength, &status);

1644 TEST_ASSERT(textLength == 7);

1645

1646 uregex_close(re);

1647 utext_close(&text1);

1648 utext_close(&text2);

1649 }

1650

1651 /*

1652 * matches()

1653 */

1654 {

1655 UText text1 = UTEXT_INITIALIZER;

1656 UBool result;

1657 UText nullText = UTEXT_INITIALIZER;

1658 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0 x00 }; /* abcccde */

1659 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abcd /

1660

1661 status = U_ZERO_ERROR;

1662 utext_openUTF8(&text1, str_abcccde, -1, &status);

1663 utext_openUTF8(&patternText, str_abcd, -1, &status);

1664 re = uregex_openUText(&patternText, 0, NULL, &status);

1665

1666 uregex_setUText(re, &text1, &status);

1667 result = uregex_matches(re, 0, &status);

1668 TEST_ASSERT(result == FALSE);

1669 TEST_ASSERT_SUCCESS(status);

1670 uregex_close(re);

1671

1672 status = U_ZERO_ERROR;

1673 re = uregex_openC(".?", 0, NULL, &status);

1674 uregex_setUText(re, &text1, &status);

1675 result = uregex_matches(re, 7, &status);

1676 TEST_ASSERT(result == TRUE);

1677 TEST_ASSERT_SUCCESS(status);

1678

1679 status = U_ZERO_ERROR;

1680 utext_openUTF8(&nullText, "", -1, &status);

1681 uregex_setUText(re, &nullText, &status);

1682 TEST_ASSERT_SUCCESS(status);

1683 result = uregex_matches(re, 0, &status);

1684 TEST_ASSERT(result == TRUE);

1685 TEST_ASSERT_SUCCESS(status);

1686

1687 uregex_close(re);

1688 utext_close(&text1);

1689 utext_close(&nullText);

1690 }

1691

1692

1693 /*

1694 * lookingAt() Used in setText test.

1695 */

1696

1697

1698 /*

1699 * find(), findNext, start, end, reset

1700 */

1701 {

1702 UChar text1[50];

1703 UBool result;

1704 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));

1705 status = U_ZERO_ERROR;

1706 re = uregex_openC("rx", 0, NULL, &status);

1707

1708 uregex_setText(re, text1, -1, &status);

1709 result = uregex_find(re, 0, &status);

1710 TEST_ASSERT(result == TRUE);

1711 TEST_ASSERT(uregex_start(re, 0, &status) == 3);

1712 TEST_ASSERT(uregex_end(re, 0, &status) == 5);

1713 TEST_ASSERT_SUCCESS(status);

1714

1715 result = uregex_find(re, 9, &status);

1716 TEST_ASSERT(result == TRUE);

1717 TEST_ASSERT(uregex_start(re, 0, &status) == 11);

1718 TEST_ASSERT(uregex_end(re, 0, &status) == 13);

1719 TEST_ASSERT_SUCCESS(status);

1720

1721 result = uregex_find(re, 14, &status);

1722 TEST_ASSERT(result == FALSE);

1723 TEST_ASSERT_SUCCESS(status);

1724

1725 status = U_ZERO_ERROR;

1726 uregex_reset(re, 0, &status);

1727

1728 result = uregex_findNext(re, &status);

1729 TEST_ASSERT(result == TRUE);

1730 TEST_ASSERT(uregex_start(re, 0, &status) == 3);

1731 TEST_ASSERT(uregex_end(re, 0, &status) == 5);

1732 TEST_ASSERT_SUCCESS(status);

1733

1734 result = uregex_findNext(re, &status);

1735 TEST_ASSERT(result == TRUE);

1736 TEST_ASSERT(uregex_start(re, 0, &status) == 6);

1737 TEST_ASSERT(uregex_end(re, 0, &status) == 8);

1738 TEST_ASSERT_SUCCESS(status);

1739

1740 status = U_ZERO_ERROR;

1741 uregex_reset(re, 12, &status);

1742

1743 result = uregex_findNext(re, &status);

1744 TEST_ASSERT(result == TRUE);

1745 TEST_ASSERT(uregex_start(re, 0, &status) == 13);

1746 TEST_ASSERT(uregex_end(re, 0, &status) == 15);

1747 TEST_ASSERT_SUCCESS(status);

1748

1749 result = uregex_findNext(re, &status);

1750 TEST_ASSERT(result == FALSE);

1751 TEST_ASSERT_SUCCESS(status);

1752

1753 uregex_close(re);

1754 }

1755

1756 /*

1757 * groupUText()

1758 */

1759 {

1760 UChar text1[80];

1761 UText *actual;

1762 UBool result;

1763 int64_t groupLen = 0;

1764 UChar groupBuf[20];

1765

1766 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U PRV_LENGTHOF(text1));

1767

1768 status = U_ZERO_ERROR;

1769 re = uregex_openC("abc(.*?)def", 0, NULL, &status);

1770 TEST_ASSERT_SUCCESS(status);

1771

1772 uregex_setText(re, text1, -1, &status);

1773 result = uregex_find(re, 0, &status);

1774 TEST_ASSERT(result==TRUE);

1775

1776 /* Capture Group 0 with shallow clone API. Should succeed. */

1777 status = U_ZERO_ERROR;

1778 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);

1779 TEST_ASSERT_SUCCESS(status);

1780

1781 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " with in "noise abc ..." */

1782 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */

1783 utext_extract(actual, 6 /start index /, 6+16 /limit index/, groupBuf , sizeof(groupBuf), &status);

1784

1785 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);

1786 utext_close(actual);

1787

1788 /* Capture group #1. Should succeed. */

1789 status = U_ZERO_ERROR;

1790

1791 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);

1792 TEST_ASSERT_SUCCESS(status);

1793 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interio r " within "noise abc interior def ... " */

1794 /* (within the str ing text1) */

1795 TEST_ASSERT(10 == groupLen); /* length of " interi or " */

1796 utext_extract(actual, 9 /start index/, 9+10 /limit index/, groupBuf, sizeof(groupBuf), &status);

1797 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);

1798

1799 utext_close(actual);

1800

1801 /* Capture group out of range. Error. */

1802 status = U_ZERO_ERROR;

1803 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);

1804 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);

1805 utext_close(actual);

1806

1807 uregex_close(re);

1808 }

1809

1810 /*

1811 * replaceFirst()

1812 */

1813 {

1814 UChar text1[80];

1815 UChar text2[80];

1816 UText replText = UTEXT_INITIALIZER;

1817 UText *result;

1818 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0 x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2 e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */

1819 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */

1820 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x3 0, 0x34, 0x31, 0x24, 0x31,

1821 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */

1822 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */

1823 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x6 3, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */

1824 status = U_ZERO_ERROR;

1825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));

1826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

1827 utext_openUTF8(&replText, str_1x, -1, &status);

1828

1829 re = uregex_openC("x(.*?)x", 0, NULL, &status);

1830 TEST_ASSERT_SUCCESS(status);

1831

1832 /* Normal case, with match */

1833 uregex_setText(re, text1, -1, &status);

1834 result = uregex_replaceFirstUText(re, &replText, NULL, &status);

1835 TEST_ASSERT_SUCCESS(status);

1836 TEST_ASSERT_UTEXT(str_Replxxx, result);

1837 utext_close(result);

1838

1839 /* No match. Text should copy to output with no changes. */

1840 uregex_setText(re, text2, -1, &status);

1841 result = uregex_replaceFirstUText(re, &replText, NULL, &status);

1842 TEST_ASSERT_SUCCESS(status);

1843 TEST_ASSERT_UTEXT(str_Nomatchhere, result);

1844 utext_close(result);

1845

1846 /* Unicode escapes */

1847 uregex_setText(re, text1, -1, &status);

1848 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);

1849 result = uregex_replaceFirstUText(re, &replText, NULL, &status);

1850 TEST_ASSERT_SUCCESS(status);

1851 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);

1852 utext_close(result);

1853

1854 uregex_close(re);

1855 utext_close(&replText);

1856 }

1857

1858

1859 /*

1860 * replaceAll()

1861 */

1862 {

1863 UChar text1[80];

1864 UChar text2[80];

1865 UText replText = UTEXT_INITIALIZER;

1866 UText *result;

1867 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */

1868 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65 , 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */

1869 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6 3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */

1870 status = U_ZERO_ERROR;

1871 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));

1872 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

1873 utext_openUTF8(&replText, str_1, -1, &status);

1874

1875 re = uregex_openC("x(.*?)x", 0, NULL, &status);

1876 TEST_ASSERT_SUCCESS(status);

1877

1878 /* Normal case, with match */

1879 uregex_setText(re, text1, -1, &status);

1880 result = uregex_replaceAllUText(re, &replText, NULL, &status);

1881 TEST_ASSERT_SUCCESS(status);

1882 TEST_ASSERT_UTEXT(str_Replaceaa1, result);

1883 utext_close(result);

1884

1885 /* No match. Text should copy to output with no changes. */

1886 uregex_setText(re, text2, -1, &status);

1887 result = uregex_replaceAllUText(re, &replText, NULL, &status);

1888 TEST_ASSERT_SUCCESS(status);

1889 TEST_ASSERT_UTEXT(str_Nomatchhere, result);

1890 utext_close(result);

1891

1892 uregex_close(re);

1893 utext_close(&replText);

1894 }

1895

1896

1897 /*

1898 * appendReplacement()

1899 */

1900 {

1901 UChar text[100];

1902 UChar repl[100];

1903 UChar buf[100];

1904 UChar *bufPtr;

1905 int32_t bufCap;

1906

1907 status = U_ZERO_ERROR;

1908 re = uregex_openC(".*", 0, 0, &status);

1909 TEST_ASSERT_SUCCESS(status);

1910

1911 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));

1912 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));

1913 uregex_setText(re, text, -1, &status);

1914

1915 /* match covers whole target string */

1916 uregex_find(re, 0, &status);

1917 TEST_ASSERT_SUCCESS(status);

1918 bufPtr = buf;

1919 bufCap = UPRV_LENGTHOF(buf);

1920 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);

1921 TEST_ASSERT_SUCCESS(status);

1922 TEST_ASSERT_STRING("some other", buf, TRUE);

1923

1924 /* Match has \u \U escapes */

1925 uregex_find(re, 0, &status);

1926 TEST_ASSERT_SUCCESS(status);

1927 bufPtr = buf;

1928 bufCap = UPRV_LENGTHOF(buf);

1929 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF( repl));

1930 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);

1931 TEST_ASSERT_SUCCESS(status);

1932 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);

1933

1934 uregex_close(re);

1935 }

1936

1937

1938 /*

1939 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll( ).

1940 */

1941

1942 /*

1943 * splitUText()

1944 */

1945 {

1946 UChar textToSplit[80];

1947 UChar text2[80];

1948 UText *fields[10];

1949 int32_t numFields;

1950 int32_t i;

1951

1952 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textTo Split));

1953 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));

1954

1955 status = U_ZERO_ERROR;

1956 re = uregex_openC(":", 0, NULL, &status);

1957

1958

1959 /* Simple split */

1960

1961 uregex_setText(re, textToSplit, -1, &status);

1962 TEST_ASSERT_SUCCESS(status);

1963

1964 /* The TEST_ASSERT_SUCCESS call above should change too... */

1965 if (U_SUCCESS(status)) {

1966 memset(fields, 0, sizeof(fields));

1967 numFields = uregex_splitUText(re, fields, 10, &status);

1968 TEST_ASSERT_SUCCESS(status);

1969

1970 /* The TEST_ASSERT_SUCCESS call above should change too... */

1971 if(U_SUCCESS(status)) {

1972 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x0 0 }; /* 'first ' */

1973 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x 64, 0x00 }; /* ' second' */

1974 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x6 4, 0x00 }; /* ' third' */

1975 TEST_ASSERT(numFields == 3);

1976 TEST_ASSERT_UTEXT(str_first, fields[0]);

1977 TEST_ASSERT_UTEXT(str_second, fields[1]);

1978 TEST_ASSERT_UTEXT(str_third, fields[2]);

1979 TEST_ASSERT(fields[3] == NULL);

1980 }

1981 for(i = 0; i < numFields; i++) {

1982 utext_close(fields[i]);

1983 }

1984 }

1985

1986 uregex_close(re);

1987

1988

1989 /* Split with too few output strings available */

1990 status = U_ZERO_ERROR;

1991 re = uregex_openC(":", 0, NULL, &status);

1992 uregex_setText(re, textToSplit, -1, &status);

1993 TEST_ASSERT_SUCCESS(status);

1994

1995 /* The TEST_ASSERT_SUCCESS call above should change too... */

1996 if(U_SUCCESS(status)) {

1997 fields[0] = NULL;

1998 fields[1] = NULL;

1999 fields[2] = &patternText;

2000 numFields = uregex_splitUText(re, fields, 2, &status);

2001 TEST_ASSERT_SUCCESS(status);

2002

2003 /* The TEST_ASSERT_SUCCESS call above should change too... */

2004 if(U_SUCCESS(status)) {

2005 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0 x00 }; /* first */

2006 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0 x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */

2007 TEST_ASSERT(numFields == 2);

2008 TEST_ASSERT_UTEXT(str_first, fields[0]);

2009 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);

2010 TEST_ASSERT(fields[2] == &patternText);

2011 }

2012 for(i = 0; i < numFields; i++) {

2013 utext_close(fields[i]);

2014 }

2015 }

2016

2017 uregex_close(re);

2018 }

2019

2020 /* splitUText(), part 2. Patterns with capture groups. The capture group t ext

2021 * comes out as additional fields. */

2022 {

2023 UChar textToSplit[80];

2024 UText *fields[10];

2025 int32_t numFields;

2026 int32_t i;

2027

2028 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LEN GTHOF(textToSplit));

2029

2030 status = U_ZERO_ERROR;

2031 re = uregex_openC("<(.*?)>", 0, NULL, &status);

2032

2033 uregex_setText(re, textToSplit, -1, &status);

2034 TEST_ASSERT_SUCCESS(status);

2035

2036 /* The TEST_ASSERT_SUCCESS call above should change too... */

2037 if(U_SUCCESS(status)) {

2038 memset(fields, 0, sizeof(fields));

2039 numFields = uregex_splitUText(re, fields, 10, &status);

2040 TEST_ASSERT_SUCCESS(status);

2041

2042 /* The TEST_ASSERT_SUCCESS call above should change too... */

2043 if(U_SUCCESS(status)) {

2044 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0 x00 }; /* first */

2045 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */

2046 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */

2047 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */

2048 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0 x64, 0x00 }; /* third */

2049

2050 TEST_ASSERT(numFields == 5);

2051 TEST_ASSERT_UTEXT(str_first, fields[0]);

2052 TEST_ASSERT_UTEXT(str_taga, fields[1]);

2053 TEST_ASSERT_UTEXT(str_second, fields[2]);

2054 TEST_ASSERT_UTEXT(str_tagb, fields[3]);

2055 TEST_ASSERT_UTEXT(str_third, fields[4]);

2056 TEST_ASSERT(fields[5] == NULL);

2057 }

2058 for(i = 0; i < numFields; i++) {

2059 utext_close(fields[i]);

2060 }

2061 }

2062

2063 /* Split with too few output strings available (2) */

2064 status = U_ZERO_ERROR;

2065 fields[0] = NULL;

2066 fields[1] = NULL;

2067 fields[2] = &patternText;

2068 numFields = uregex_splitUText(re, fields, 2, &status);

2069 TEST_ASSERT_SUCCESS(status);

2070

2071 /* The TEST_ASSERT_SUCCESS call above should change too... */

2072 if(U_SUCCESS(status)) {

2073 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */

2074 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0 x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6 9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */

2075 TEST_ASSERT(numFields == 2);

2076 TEST_ASSERT_UTEXT(str_first, fields[0]);

2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);

2078 TEST_ASSERT(fields[2] == &patternText);

2079 }

2080 for(i = 0; i < numFields; i++) {

2081 utext_close(fields[i]);

2082 }

2083

2084

2085 /* Split with too few output strings available (3) */

2086 status = U_ZERO_ERROR;

2087 fields[0] = NULL;

2088 fields[1] = NULL;

2089 fields[2] = NULL;

2090 fields[3] = &patternText;

2091 numFields = uregex_splitUText(re, fields, 3, &status);

2092 TEST_ASSERT_SUCCESS(status);

2093

2094 /* The TEST_ASSERT_SUCCESS call above should change too... */

2095 if(U_SUCCESS(status)) {

2096 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */

2097 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t ag-a */

2098 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0 x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6 9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */

2099 TEST_ASSERT(numFields == 3);

2100 TEST_ASSERT_UTEXT(str_first, fields[0]);

2101 TEST_ASSERT_UTEXT(str_taga, fields[1]);

2102 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);

2103 TEST_ASSERT(fields[3] == &patternText);

2104 }

2105 for(i = 0; i < numFields; i++) {

2106 utext_close(fields[i]);

2107 }

2108

2109 /* Split with just enough output strings available (5) */

2110 status = U_ZERO_ERROR;

2111 fields[0] = NULL;

2112 fields[1] = NULL;

2113 fields[2] = NULL;

2114 fields[3] = NULL;

2115 fields[4] = NULL;

2116 fields[5] = &patternText;

2117 numFields = uregex_splitUText(re, fields, 5, &status);

2118 TEST_ASSERT_SUCCESS(status);

2119

2120 /* The TEST_ASSERT_SUCCESS call above should change too... */

2121 if(U_SUCCESS(status)) {

2122 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */

2123 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t ag-a */

2124 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64 , 0x00 }; /* second */

2125 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* t ag-b */

2126 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */

2127

2128 TEST_ASSERT(numFields == 5);

2129 TEST_ASSERT_UTEXT(str_first, fields[0]);

2130 TEST_ASSERT_UTEXT(str_taga, fields[1]);

2131 TEST_ASSERT_UTEXT(str_second, fields[2]);

2132 TEST_ASSERT_UTEXT(str_tagb, fields[3]);

2133 TEST_ASSERT_UTEXT(str_third, fields[4]);

2134 TEST_ASSERT(fields[5] == &patternText);

2135 }

2136 for(i = 0; i < numFields; i++) {

2137 utext_close(fields[i]);

2138 }

2139

2140 /* Split, end of text is a field delimiter. */

2141 status = U_ZERO_ERROR;

2142 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), & status);

2143 TEST_ASSERT_SUCCESS(status);

2144

2145 /* The TEST_ASSERT_SUCCESS call above should change too... */

2146 if(U_SUCCESS(status)) {

2147 memset(fields, 0, sizeof(fields));

2148 fields[9] = &patternText;

2149 numFields = uregex_splitUText(re, fields, 9, &status);

2150 TEST_ASSERT_SUCCESS(status);

2151

2152 /* The TEST_ASSERT_SUCCESS call above should change too... */

2153 if(U_SUCCESS(status)) {

2154 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0 x00 }; /* first */

2155 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */

2156 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */

2157 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */

2158 const char str_empty[] = { 0x00 };

2159

2160 TEST_ASSERT(numFields == 5);

2161 TEST_ASSERT_UTEXT(str_first, fields[0]);

2162 TEST_ASSERT_UTEXT(str_taga, fields[1]);

2163 TEST_ASSERT_UTEXT(str_second, fields[2]);

2164 TEST_ASSERT_UTEXT(str_tagb, fields[3]);

2165 TEST_ASSERT_UTEXT(str_empty, fields[4]);

2166 TEST_ASSERT(fields[5] == NULL);

2167 TEST_ASSERT(fields[8] == NULL);

2168 TEST_ASSERT(fields[9] == &patternText);

2169 }

2170 for(i = 0; i < numFields; i++) {

2171 utext_close(fields[i]);

2172 }

2173 }

2174

2175 uregex_close(re);

2176 }

2177 utext_close(&patternText);

2178 }

2179

2180

2181 static void TestRefreshInput(void) {

2182 /*

2183 * RefreshInput changes out the input of a URegularExpression without

2184 * changing anything else in the match state. Used with Java JNI,

2185 * when Java moves the underlying string storage. This test

2186 * runs a find() loop, moving the text after the first match.

2187 * The right number of matches should still be found.

2188 */

2189 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */

2190 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};

2191 UErrorCode status = U_ZERO_ERROR;

2192 URegularExpression *re;

2193 UText ut1 = UTEXT_INITIALIZER;

2194 UText ut2 = UTEXT_INITIALIZER;

2195

2196 re = uregex_openC("[ABC]", 0, 0, &status);

2197 TEST_ASSERT_SUCCESS(status);

2198

2199 utext_openUChars(&ut1, testStr, -1, &status);

2200 TEST_ASSERT_SUCCESS(status);

2201 uregex_setUText(re, &ut1, &status);

2202 TEST_ASSERT_SUCCESS(status);

2203

2204 /* Find the first match "A" in the original string */

2205 TEST_ASSERT(uregex_findNext(re, &status));

2206 TEST_ASSERT(uregex_start(re, 0, &status) == 0);

2207

2208 /* Move the string, kill the original string. */

2209 u_strcpy(movedStr, testStr);

2210 u_memset(testStr, 0, u_strlen(testStr));

2211 utext_openUChars(&ut2, movedStr, -1, &status);

2212 TEST_ASSERT_SUCCESS(status);

2213 uregex_refreshUText(re, &ut2, &status);

2214 TEST_ASSERT_SUCCESS(status);

2215

2216 /* Find the following two matches, now working in the moved string. */

2217 TEST_ASSERT(uregex_findNext(re, &status));

2218 TEST_ASSERT(uregex_start(re, 0, &status) == 2);

2219 TEST_ASSERT(uregex_findNext(re, &status));

2220 TEST_ASSERT(uregex_start(re, 0, &status) == 4);

2221 TEST_ASSERT(FALSE == uregex_findNext(re, &status));

2222

2223 uregex_close(re);

2224 }

2225

2226

2227 static void TestBug8421(void) {

2228 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to b e matched

2229 * was failing.

2230 */

2231 URegularExpression *re;

2232 UErrorCode status = U_ZERO_ERROR;

2233 int32_t limit = -1;

2234

2235 re = uregex_openC("abc", 0, 0, &status);

2236 TEST_ASSERT_SUCCESS(status);

2237

2238 limit = uregex_getTimeLimit(re, &status);

2239 TEST_ASSERT_SUCCESS(status);

2240 TEST_ASSERT(limit == 0);

2241

2242 uregex_setTimeLimit(re, 100, &status);

2243 TEST_ASSERT_SUCCESS(status);

2244 limit = uregex_getTimeLimit(re, &status);

2245 TEST_ASSERT_SUCCESS(status);

2246 TEST_ASSERT(limit == 100);

2247

2248 uregex_close(re);

2249 }

2250

2251 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {

2252 return FALSE;

2253 }

2254

2255 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {

2256 return FALSE;

2257 }

2258

2259 static void TestBug10815() {

2260 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER

2261 * when the callback function specified by uregex_setMatchCallbac k() returns FALSE

2262 */

2263 URegularExpression *re;

2264 UErrorCode status = U_ZERO_ERROR;

2265 UChar text[100];

2266

2267

2268 // findNext() with a find progress callback function.

2269

2270 re = uregex_openC(".z", 0, 0, &status);

2271 TEST_ASSERT_SUCCESS(status);

2272

2273 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));

2274 uregex_setText(re, text, -1, &status);

2275 TEST_ASSERT_SUCCESS(status);

2276

2277 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);

2278 TEST_ASSERT_SUCCESS(status);

2279

2280 uregex_findNext(re, &status);

2281 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

2282

2283 uregex_close(re);

2284

2285 // findNext() with a match progress callback function.

2286

2287 status = U_ZERO_ERROR;

2288 re = uregex_openC("((xxx))y", 0, 0, &status);

2289 TEST_ASSERT_SUCCESS(status);

2290

2291 // Pattern + this text gives an exponential time match. Without the callback to stop the match,

2292 // it will appear to be stuck in a (near) infinite loop.

2293 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));

2294 uregex_setText(re, text, -1, &status);

2295 TEST_ASSERT_SUCCESS(status);

2296

2297 uregex_setMatchCallback(re, MatchCallback, NULL, &status);

2298 TEST_ASSERT_SUCCESS(status);

2299

2300 uregex_findNext(re, &status);

2301 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);

2302

2303 uregex_close(re);

2304 }

2305

2306

2307 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */

OLD	NEW

« no previous file with comments | « source/test/cintltst/putiltst.c ('k') | source/test/cintltst/sorttest.c » ('j') | no next file with comments »