OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 2004-2015, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ********************************************************************/ | |
6 /*******************************************************************************
* | |
7 * | |
8 * File reapits.c | |
9 * | |
10 ********************************************************************************
*/ | |
11 /*C API TEST FOR Regular Expressions */ | |
12 /** | |
13 * This is an API test for ICU regular expressions in C. It doesn't test very
many cases, and doesn't | |
14 * try to test the full functionality. It just calls each function and verifie
s that it | |
15 * works on a basic level. | |
16 * | |
17 * More complete testing of regular expression functionality is done with the C
++ tests. | |
18 **/ | |
19 | |
20 #include "unicode/utypes.h" | |
21 | |
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | |
23 | |
24 #include <stdlib.h> | |
25 #include <string.h> | |
26 #include "unicode/uloc.h" | |
27 #include "unicode/uregex.h" | |
28 #include "unicode/ustring.h" | |
29 #include "unicode/utext.h" | |
30 #include "cintltst.h" | |
31 #include "cmemory.h" | |
32 | |
33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | |
34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __F
ILE__, __LINE__, u_errorName(status));}} | |
35 | |
36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ | |
37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__,
#expr);}} | |
38 | |
39 /* | |
40 * TEST_SETUP and TEST_TEARDOWN | |
41 * macros to handle the boilerplate around setting up regex test cases. | |
42 * parameteres to setup: | |
43 * pattern: The regex pattern, a (char *) null terminated C str
ing. | |
44 * testString: The string data, also a (char *) C string. | |
45 * flags: Regex flags to set when compiling the pattern | |
46 * | |
47 * Put arbitrary test code between SETUP and TEARDOWN. | |
48 * 're" is the compiled, ready-to-go regular expression. | |
49 */ | |
50 #define TEST_SETUP(pattern, testString, flags) { \ | |
51 UChar *srcString = NULL; \ | |
52 status = U_ZERO_ERROR; \ | |
53 re = uregex_openC(pattern, flags, NULL, &status); \ | |
54 TEST_ASSERT_SUCCESS(status); \ | |
55 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \ | |
56 u_uastrncpy(srcString, testString, strlen(testString)+1); \ | |
57 uregex_setText(re, srcString, -1, &status); \ | |
58 TEST_ASSERT_SUCCESS(status); \ | |
59 if (U_SUCCESS(status)) { | |
60 | |
61 #define TEST_TEARDOWN \ | |
62 } \ | |
63 TEST_ASSERT_SUCCESS(status); \ | |
64 uregex_close(re); \ | |
65 free(srcString); \ | |
66 } | |
67 | |
68 | |
69 /** | |
70 * @param expected utf-8 array of bytes to be expected | |
71 */ | |
72 static void test_assert_string(const char *expected, const UChar *actual, UBool
nulTerm, const char *file, int line) { | |
73 char buf_inside_macro[120]; | |
74 int32_t len = (int32_t)strlen(expected); | |
75 UBool success; | |
76 if (nulTerm) { | |
77 u_austrncpy(buf_inside_macro, (actual), len+1); | |
78 buf_inside_macro[len+2] = 0; | |
79 success = (strcmp((expected), buf_inside_macro) == 0); | |
80 } else { | |
81 u_austrncpy(buf_inside_macro, (actual), len); | |
82 buf_inside_macro[len+1] = 0; | |
83 success = (strncmp((expected), buf_inside_macro, len) == 0); | |
84 } | |
85 if (success == FALSE) { | |
86 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", | |
87 file, line, (expected), buf_inside_macro); | |
88 } | |
89 } | |
90 | |
91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expecte
d, actual, nulTerm, __FILE__, __LINE__) | |
92 | |
93 | |
94 static UBool equals_utf8_utext(const char *utf8, UText *utext) { | |
95 int32_t u8i = 0; | |
96 UChar32 u8c = 0; | |
97 UChar32 utc = 0; | |
98 UBool stringsEqual = TRUE; | |
99 utext_setNativeIndex(utext, 0); | |
100 for (;;) { | |
101 U8_NEXT_UNSAFE(utf8, u8i, u8c); | |
102 utc = utext_next32(utext); | |
103 if (u8c == 0 && utc == U_SENTINEL) { | |
104 break; | |
105 } | |
106 if (u8c != utc || u8c == 0) { | |
107 stringsEqual = FALSE; | |
108 break; | |
109 } | |
110 } | |
111 return stringsEqual; | |
112 } | |
113 | |
114 | |
115 static void test_assert_utext(const char *expected, UText *actual, const char *f
ile, int line) { | |
116 utext_setNativeIndex(actual, 0); | |
117 if (!equals_utf8_utext(expected, actual)) { | |
118 UChar32 c; | |
119 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, li
ne, expected); | |
120 c = utext_next32From(actual, 0); | |
121 while (c != U_SENTINEL) { | |
122 if (0x20<c && c <0x7e) { | |
123 log_err("%c", c); | |
124 } else { | |
125 log_err("%#x", c); | |
126 } | |
127 c = UTEXT_NEXT32(actual); | |
128 } | |
129 log_err("\"\n"); | |
130 } | |
131 } | |
132 | |
133 /* | |
134 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) | |
135 * Note: Expected is a UTF-8 encoded string, _not_ the system code page. | |
136 */ | |
137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual,
__FILE__, __LINE__) | |
138 | |
139 static UBool testUTextEqual(UText *uta, UText *utb) { | |
140 UChar32 ca = 0; | |
141 UChar32 cb = 0; | |
142 utext_setNativeIndex(uta, 0); | |
143 utext_setNativeIndex(utb, 0); | |
144 do { | |
145 ca = utext_next32(uta); | |
146 cb = utext_next32(utb); | |
147 if (ca != cb) { | |
148 break; | |
149 } | |
150 } while (ca != U_SENTINEL); | |
151 return ca == cb; | |
152 } | |
153 | |
154 | |
155 | |
156 | |
157 static void TestRegexCAPI(void); | |
158 static void TestBug4315(void); | |
159 static void TestUTextAPI(void); | |
160 static void TestRefreshInput(void); | |
161 static void TestBug8421(void); | |
162 static void TestBug10815(void); | |
163 | |
164 void addURegexTest(TestNode** root); | |
165 | |
166 void addURegexTest(TestNode** root) | |
167 { | |
168 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); | |
169 addTest(root, &TestBug4315, "regex/TestBug4315"); | |
170 addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); | |
171 addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); | |
172 addTest(root, &TestBug8421, "regex/TestBug8421"); | |
173 addTest(root, &TestBug10815, "regex/TestBug10815"); | |
174 } | |
175 | |
176 /* | |
177 * Call back function and context struct used for testing | |
178 * regular expression user callbacks. This test is mostly the same as | |
179 * the corresponding C++ test in intltest. | |
180 */ | |
181 typedef struct callBackContext { | |
182 int32_t maxCalls; | |
183 int32_t numCalls; | |
184 int32_t lastSteps; | |
185 } callBackContext; | |
186 | |
187 static UBool U_EXPORT2 U_CALLCONV | |
188 TestCallbackFn(const void *context, int32_t steps) { | |
189 callBackContext *info = (callBackContext *)context; | |
190 if (info->lastSteps+1 != steps) { | |
191 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastS
teps+1, steps); | |
192 } | |
193 info->lastSteps = steps; | |
194 info->numCalls++; | |
195 return (info->numCalls < info->maxCalls); | |
196 } | |
197 | |
198 /* | |
199 * Regular Expression C API Tests | |
200 */ | |
201 static void TestRegexCAPI(void) { | |
202 UErrorCode status = U_ZERO_ERROR; | |
203 URegularExpression *re; | |
204 UChar pat[200]; | |
205 UChar *minus1; | |
206 | |
207 memset(&minus1, -1, sizeof(minus1)); | |
208 | |
209 /* Mimimalist open/close */ | |
210 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); | |
211 re = uregex_open(pat, -1, 0, 0, &status); | |
212 if (U_FAILURE(status)) { | |
213 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\"
(Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); | |
214 return; | |
215 } | |
216 uregex_close(re); | |
217 | |
218 /* Open with all flag values set */ | |
219 status = U_ZERO_ERROR; | |
220 re = uregex_open(pat, -1, | |
221 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD | UREGEX_LITERAL, | |
222 0, &status); | |
223 TEST_ASSERT_SUCCESS(status); | |
224 uregex_close(re); | |
225 | |
226 /* Open with an invalid flag */ | |
227 status = U_ZERO_ERROR; | |
228 re = uregex_open(pat, -1, 0x40000000, 0, &status); | |
229 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); | |
230 uregex_close(re); | |
231 | |
232 /* Open with an unimplemented flag */ | |
233 status = U_ZERO_ERROR; | |
234 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); | |
235 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); | |
236 uregex_close(re); | |
237 | |
238 /* openC with an invalid parameter */ | |
239 status = U_ZERO_ERROR; | |
240 re = uregex_openC(NULL, | |
241 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); | |
242 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); | |
243 | |
244 /* openC with an invalid parameter */ | |
245 status = U_USELESS_COLLATOR_ERROR; | |
246 re = uregex_openC(NULL, | |
247 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); | |
248 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL); | |
249 | |
250 /* openC open from a C string */ | |
251 { | |
252 const UChar *p; | |
253 int32_t len; | |
254 status = U_ZERO_ERROR; | |
255 re = uregex_openC("abc*", 0, 0, &status); | |
256 TEST_ASSERT_SUCCESS(status); | |
257 p = uregex_pattern(re, &len, &status); | |
258 TEST_ASSERT_SUCCESS(status); | |
259 | |
260 /* The TEST_ASSERT_SUCCESS above should change too... */ | |
261 if(U_SUCCESS(status)) { | |
262 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); | |
263 TEST_ASSERT(u_strcmp(pat, p) == 0); | |
264 TEST_ASSERT(len==(int32_t)strlen("abc*")); | |
265 } | |
266 | |
267 uregex_close(re); | |
268 | |
269 /* TODO: Open with ParseError parameter */ | |
270 } | |
271 | |
272 /* | |
273 * clone | |
274 */ | |
275 { | |
276 URegularExpression *clone1; | |
277 URegularExpression *clone2; | |
278 URegularExpression *clone3; | |
279 UChar testString1[30]; | |
280 UChar testString2[30]; | |
281 UBool result; | |
282 | |
283 | |
284 status = U_ZERO_ERROR; | |
285 re = uregex_openC("abc*", 0, 0, &status); | |
286 TEST_ASSERT_SUCCESS(status); | |
287 clone1 = uregex_clone(re, &status); | |
288 TEST_ASSERT_SUCCESS(status); | |
289 TEST_ASSERT(clone1 != NULL); | |
290 | |
291 status = U_ZERO_ERROR; | |
292 clone2 = uregex_clone(re, &status); | |
293 TEST_ASSERT_SUCCESS(status); | |
294 TEST_ASSERT(clone2 != NULL); | |
295 uregex_close(re); | |
296 | |
297 status = U_ZERO_ERROR; | |
298 clone3 = uregex_clone(clone2, &status); | |
299 TEST_ASSERT_SUCCESS(status); | |
300 TEST_ASSERT(clone3 != NULL); | |
301 | |
302 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); | |
303 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); | |
304 | |
305 status = U_ZERO_ERROR; | |
306 uregex_setText(clone1, testString1, -1, &status); | |
307 TEST_ASSERT_SUCCESS(status); | |
308 result = uregex_lookingAt(clone1, 0, &status); | |
309 TEST_ASSERT_SUCCESS(status); | |
310 TEST_ASSERT(result==TRUE); | |
311 | |
312 status = U_ZERO_ERROR; | |
313 uregex_setText(clone2, testString2, -1, &status); | |
314 TEST_ASSERT_SUCCESS(status); | |
315 result = uregex_lookingAt(clone2, 0, &status); | |
316 TEST_ASSERT_SUCCESS(status); | |
317 TEST_ASSERT(result==FALSE); | |
318 result = uregex_find(clone2, 0, &status); | |
319 TEST_ASSERT_SUCCESS(status); | |
320 TEST_ASSERT(result==TRUE); | |
321 | |
322 uregex_close(clone1); | |
323 uregex_close(clone2); | |
324 uregex_close(clone3); | |
325 | |
326 } | |
327 | |
328 /* | |
329 * pattern() | |
330 */ | |
331 { | |
332 const UChar *resultPat; | |
333 int32_t resultLen; | |
334 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); | |
335 status = U_ZERO_ERROR; | |
336 re = uregex_open(pat, -1, 0, NULL, &status); | |
337 resultPat = uregex_pattern(re, &resultLen, &status); | |
338 TEST_ASSERT_SUCCESS(status); | |
339 | |
340 /* The TEST_ASSERT_SUCCESS above should change too... */ | |
341 if (U_SUCCESS(status)) { | |
342 TEST_ASSERT(resultLen == -1); | |
343 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); | |
344 } | |
345 | |
346 uregex_close(re); | |
347 | |
348 status = U_ZERO_ERROR; | |
349 re = uregex_open(pat, 3, 0, NULL, &status); | |
350 resultPat = uregex_pattern(re, &resultLen, &status); | |
351 TEST_ASSERT_SUCCESS(status); | |
352 TEST_ASSERT_SUCCESS(status); | |
353 | |
354 /* The TEST_ASSERT_SUCCESS above should change too... */ | |
355 if (U_SUCCESS(status)) { | |
356 TEST_ASSERT(resultLen == 3); | |
357 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); | |
358 TEST_ASSERT(u_strlen(resultPat) == 3); | |
359 } | |
360 | |
361 uregex_close(re); | |
362 } | |
363 | |
364 /* | |
365 * flags() | |
366 */ | |
367 { | |
368 int32_t t; | |
369 | |
370 status = U_ZERO_ERROR; | |
371 re = uregex_open(pat, -1, 0, NULL, &status); | |
372 t = uregex_flags(re, &status); | |
373 TEST_ASSERT_SUCCESS(status); | |
374 TEST_ASSERT(t == 0); | |
375 uregex_close(re); | |
376 | |
377 status = U_ZERO_ERROR; | |
378 re = uregex_open(pat, -1, 0, NULL, &status); | |
379 t = uregex_flags(re, &status); | |
380 TEST_ASSERT_SUCCESS(status); | |
381 TEST_ASSERT(t == 0); | |
382 uregex_close(re); | |
383 | |
384 status = U_ZERO_ERROR; | |
385 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL,
&status); | |
386 t = uregex_flags(re, &status); | |
387 TEST_ASSERT_SUCCESS(status); | |
388 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); | |
389 uregex_close(re); | |
390 } | |
391 | |
392 /* | |
393 * setText() and lookingAt() | |
394 */ | |
395 { | |
396 UChar text1[50]; | |
397 UChar text2[50]; | |
398 UBool result; | |
399 | |
400 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); | |
401 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); | |
402 status = U_ZERO_ERROR; | |
403 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); | |
404 re = uregex_open(pat, -1, 0, NULL, &status); | |
405 TEST_ASSERT_SUCCESS(status); | |
406 | |
407 /* Operation before doing a setText should fail... */ | |
408 status = U_ZERO_ERROR; | |
409 uregex_lookingAt(re, 0, &status); | |
410 TEST_ASSERT( status== U_REGEX_INVALID_STATE); | |
411 | |
412 status = U_ZERO_ERROR; | |
413 uregex_setText(re, text1, -1, &status); | |
414 result = uregex_lookingAt(re, 0, &status); | |
415 TEST_ASSERT(result == TRUE); | |
416 TEST_ASSERT_SUCCESS(status); | |
417 | |
418 status = U_ZERO_ERROR; | |
419 uregex_setText(re, text2, -1, &status); | |
420 result = uregex_lookingAt(re, 0, &status); | |
421 TEST_ASSERT(result == FALSE); | |
422 TEST_ASSERT_SUCCESS(status); | |
423 | |
424 status = U_ZERO_ERROR; | |
425 uregex_setText(re, text1, -1, &status); | |
426 result = uregex_lookingAt(re, 0, &status); | |
427 TEST_ASSERT(result == TRUE); | |
428 TEST_ASSERT_SUCCESS(status); | |
429 | |
430 status = U_ZERO_ERROR; | |
431 uregex_setText(re, text1, 5, &status); | |
432 result = uregex_lookingAt(re, 0, &status); | |
433 TEST_ASSERT(result == FALSE); | |
434 TEST_ASSERT_SUCCESS(status); | |
435 | |
436 status = U_ZERO_ERROR; | |
437 uregex_setText(re, text1, 6, &status); | |
438 result = uregex_lookingAt(re, 0, &status); | |
439 TEST_ASSERT(result == TRUE); | |
440 TEST_ASSERT_SUCCESS(status); | |
441 | |
442 uregex_close(re); | |
443 } | |
444 | |
445 | |
446 /* | |
447 * getText() | |
448 */ | |
449 { | |
450 UChar text1[50]; | |
451 UChar text2[50]; | |
452 const UChar *result; | |
453 int32_t textLength; | |
454 | |
455 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); | |
456 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); | |
457 status = U_ZERO_ERROR; | |
458 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); | |
459 re = uregex_open(pat, -1, 0, NULL, &status); | |
460 | |
461 uregex_setText(re, text1, -1, &status); | |
462 result = uregex_getText(re, &textLength, &status); | |
463 TEST_ASSERT(result == text1); | |
464 TEST_ASSERT(textLength == -1); | |
465 TEST_ASSERT_SUCCESS(status); | |
466 | |
467 status = U_ZERO_ERROR; | |
468 uregex_setText(re, text2, 7, &status); | |
469 result = uregex_getText(re, &textLength, &status); | |
470 TEST_ASSERT(result == text2); | |
471 TEST_ASSERT(textLength == 7); | |
472 TEST_ASSERT_SUCCESS(status); | |
473 | |
474 status = U_ZERO_ERROR; | |
475 uregex_setText(re, text2, 4, &status); | |
476 result = uregex_getText(re, &textLength, &status); | |
477 TEST_ASSERT(result == text2); | |
478 TEST_ASSERT(textLength == 4); | |
479 TEST_ASSERT_SUCCESS(status); | |
480 uregex_close(re); | |
481 } | |
482 | |
483 /* | |
484 * matches() | |
485 */ | |
486 { | |
487 UChar text1[50]; | |
488 UBool result; | |
489 int len; | |
490 UChar nullString[] = {0,0,0}; | |
491 | |
492 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1)); | |
493 status = U_ZERO_ERROR; | |
494 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); | |
495 re = uregex_open(pat, -1, 0, NULL, &status); | |
496 | |
497 uregex_setText(re, text1, -1, &status); | |
498 result = uregex_matches(re, 0, &status); | |
499 TEST_ASSERT(result == FALSE); | |
500 TEST_ASSERT_SUCCESS(status); | |
501 | |
502 status = U_ZERO_ERROR; | |
503 uregex_setText(re, text1, 6, &status); | |
504 result = uregex_matches(re, 0, &status); | |
505 TEST_ASSERT(result == TRUE); | |
506 TEST_ASSERT_SUCCESS(status); | |
507 | |
508 status = U_ZERO_ERROR; | |
509 uregex_setText(re, text1, 6, &status); | |
510 result = uregex_matches(re, 1, &status); | |
511 TEST_ASSERT(result == FALSE); | |
512 TEST_ASSERT_SUCCESS(status); | |
513 uregex_close(re); | |
514 | |
515 status = U_ZERO_ERROR; | |
516 re = uregex_openC(".?", 0, NULL, &status); | |
517 uregex_setText(re, text1, -1, &status); | |
518 len = u_strlen(text1); | |
519 result = uregex_matches(re, len, &status); | |
520 TEST_ASSERT(result == TRUE); | |
521 TEST_ASSERT_SUCCESS(status); | |
522 | |
523 status = U_ZERO_ERROR; | |
524 uregex_setText(re, nullString, -1, &status); | |
525 TEST_ASSERT_SUCCESS(status); | |
526 result = uregex_matches(re, 0, &status); | |
527 TEST_ASSERT(result == TRUE); | |
528 TEST_ASSERT_SUCCESS(status); | |
529 uregex_close(re); | |
530 } | |
531 | |
532 | |
533 /* | |
534 * lookingAt() Used in setText test. | |
535 */ | |
536 | |
537 | |
538 /* | |
539 * find(), findNext, start, end, reset | |
540 */ | |
541 { | |
542 UChar text1[50]; | |
543 UBool result; | |
544 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); | |
545 status = U_ZERO_ERROR; | |
546 re = uregex_openC("rx", 0, NULL, &status); | |
547 | |
548 uregex_setText(re, text1, -1, &status); | |
549 result = uregex_find(re, 0, &status); | |
550 TEST_ASSERT(result == TRUE); | |
551 TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
552 TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
553 TEST_ASSERT_SUCCESS(status); | |
554 | |
555 result = uregex_find(re, 9, &status); | |
556 TEST_ASSERT(result == TRUE); | |
557 TEST_ASSERT(uregex_start(re, 0, &status) == 11); | |
558 TEST_ASSERT(uregex_end(re, 0, &status) == 13); | |
559 TEST_ASSERT_SUCCESS(status); | |
560 | |
561 result = uregex_find(re, 14, &status); | |
562 TEST_ASSERT(result == FALSE); | |
563 TEST_ASSERT_SUCCESS(status); | |
564 | |
565 status = U_ZERO_ERROR; | |
566 uregex_reset(re, 0, &status); | |
567 | |
568 result = uregex_findNext(re, &status); | |
569 TEST_ASSERT(result == TRUE); | |
570 TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
571 TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
572 TEST_ASSERT_SUCCESS(status); | |
573 | |
574 result = uregex_findNext(re, &status); | |
575 TEST_ASSERT(result == TRUE); | |
576 TEST_ASSERT(uregex_start(re, 0, &status) == 6); | |
577 TEST_ASSERT(uregex_end(re, 0, &status) == 8); | |
578 TEST_ASSERT_SUCCESS(status); | |
579 | |
580 status = U_ZERO_ERROR; | |
581 uregex_reset(re, 12, &status); | |
582 | |
583 result = uregex_findNext(re, &status); | |
584 TEST_ASSERT(result == TRUE); | |
585 TEST_ASSERT(uregex_start(re, 0, &status) == 13); | |
586 TEST_ASSERT(uregex_end(re, 0, &status) == 15); | |
587 TEST_ASSERT_SUCCESS(status); | |
588 | |
589 result = uregex_findNext(re, &status); | |
590 TEST_ASSERT(result == FALSE); | |
591 TEST_ASSERT_SUCCESS(status); | |
592 | |
593 uregex_close(re); | |
594 } | |
595 | |
596 /* | |
597 * groupCount | |
598 */ | |
599 { | |
600 int32_t result; | |
601 | |
602 status = U_ZERO_ERROR; | |
603 re = uregex_openC("abc", 0, NULL, &status); | |
604 result = uregex_groupCount(re, &status); | |
605 TEST_ASSERT_SUCCESS(status); | |
606 TEST_ASSERT(result == 0); | |
607 uregex_close(re); | |
608 | |
609 status = U_ZERO_ERROR; | |
610 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); | |
611 result = uregex_groupCount(re, &status); | |
612 TEST_ASSERT_SUCCESS(status); | |
613 TEST_ASSERT(result == 3); | |
614 uregex_close(re); | |
615 | |
616 } | |
617 | |
618 | |
619 /* | |
620 * group() | |
621 */ | |
622 { | |
623 UChar text1[80]; | |
624 UChar buf[80]; | |
625 UBool result; | |
626 int32_t resultSz; | |
627 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U
PRV_LENGTHOF(text1)); | |
628 | |
629 status = U_ZERO_ERROR; | |
630 re = uregex_openC("abc(.*?)def", 0, NULL, &status); | |
631 TEST_ASSERT_SUCCESS(status); | |
632 | |
633 | |
634 uregex_setText(re, text1, -1, &status); | |
635 result = uregex_find(re, 0, &status); | |
636 TEST_ASSERT(result==TRUE); | |
637 | |
638 /* Capture Group 0, the full match. Should succeed. */ | |
639 status = U_ZERO_ERROR; | |
640 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status); | |
641 TEST_ASSERT_SUCCESS(status); | |
642 TEST_ASSERT_STRING("abc interior def", buf, TRUE); | |
643 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
644 | |
645 /* Capture group #1. Should succeed. */ | |
646 status = U_ZERO_ERROR; | |
647 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status); | |
648 TEST_ASSERT_SUCCESS(status); | |
649 TEST_ASSERT_STRING(" interior ", buf, TRUE); | |
650 TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); | |
651 | |
652 /* Capture group out of range. Error. */ | |
653 status = U_ZERO_ERROR; | |
654 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status); | |
655 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | |
656 | |
657 /* NULL buffer, pure pre-flight */ | |
658 status = U_ZERO_ERROR; | |
659 resultSz = uregex_group(re, 0, NULL, 0, &status); | |
660 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
661 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
662 | |
663 /* Too small buffer, truncated string */ | |
664 status = U_ZERO_ERROR; | |
665 memset(buf, -1, sizeof(buf)); | |
666 resultSz = uregex_group(re, 0, buf, 5, &status); | |
667 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
668 TEST_ASSERT_STRING("abc i", buf, FALSE); | |
669 TEST_ASSERT(buf[5] == (UChar)0xffff); | |
670 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
671 | |
672 /* Output string just fits buffer, no NUL term. */ | |
673 status = U_ZERO_ERROR; | |
674 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"),
&status); | |
675 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
676 TEST_ASSERT_STRING("abc interior def", buf, FALSE); | |
677 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
678 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); | |
679 | |
680 uregex_close(re); | |
681 | |
682 } | |
683 | |
684 /* | |
685 * Regions | |
686 */ | |
687 | |
688 | |
689 /* SetRegion(), getRegion() do something */ | |
690 TEST_SETUP(".*", "0123456789ABCDEF", 0) | |
691 UChar resultString[40]; | |
692 TEST_ASSERT(uregex_regionStart(re, &status) == 0); | |
693 TEST_ASSERT(uregex_regionEnd(re, &status) == 16); | |
694 uregex_setRegion(re, 3, 6, &status); | |
695 TEST_ASSERT(uregex_regionStart(re, &status) == 3); | |
696 TEST_ASSERT(uregex_regionEnd(re, &status) == 6); | |
697 TEST_ASSERT(uregex_findNext(re, &status)); | |
698 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString
), &status) == 3) | |
699 TEST_ASSERT_STRING("345", resultString, TRUE); | |
700 TEST_TEARDOWN; | |
701 | |
702 /* find(start=-1) uses regions */ | |
703 TEST_SETUP(".*", "0123456789ABCDEF", 0); | |
704 uregex_setRegion(re, 4, 6, &status); | |
705 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); | |
706 TEST_ASSERT(uregex_start(re, 0, &status) == 4); | |
707 TEST_ASSERT(uregex_end(re, 0, &status) == 6); | |
708 TEST_TEARDOWN; | |
709 | |
710 /* find (start >=0) does not use regions */ | |
711 TEST_SETUP(".*", "0123456789ABCDEF", 0); | |
712 uregex_setRegion(re, 4, 6, &status); | |
713 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); | |
714 TEST_ASSERT(uregex_start(re, 0, &status) == 0); | |
715 TEST_ASSERT(uregex_end(re, 0, &status) == 16); | |
716 TEST_TEARDOWN; | |
717 | |
718 /* findNext() obeys regions */ | |
719 TEST_SETUP(".", "0123456789ABCDEF", 0); | |
720 uregex_setRegion(re, 4, 6, &status); | |
721 TEST_ASSERT(uregex_findNext(re,&status) == TRUE); | |
722 TEST_ASSERT(uregex_start(re, 0, &status) == 4); | |
723 TEST_ASSERT(uregex_findNext(re, &status) == TRUE); | |
724 TEST_ASSERT(uregex_start(re, 0, &status) == 5); | |
725 TEST_ASSERT(uregex_findNext(re, &status) == FALSE); | |
726 TEST_TEARDOWN; | |
727 | |
728 /* matches(start=-1) uses regions
*/ | |
729 /* Also, verify that non-greedy *? succeeds in finding the full match
. */ | |
730 TEST_SETUP(".*?", "0123456789ABCDEF", 0); | |
731 uregex_setRegion(re, 4, 6, &status); | |
732 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE); | |
733 TEST_ASSERT(uregex_start(re, 0, &status) == 4); | |
734 TEST_ASSERT(uregex_end(re, 0, &status) == 6); | |
735 TEST_TEARDOWN; | |
736 | |
737 /* matches (start >=0) does not use regions */ | |
738 TEST_SETUP(".*?", "0123456789ABCDEF", 0); | |
739 uregex_setRegion(re, 4, 6, &status); | |
740 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE); | |
741 TEST_ASSERT(uregex_start(re, 0, &status) == 0); | |
742 TEST_ASSERT(uregex_end(re, 0, &status) == 16); | |
743 TEST_TEARDOWN; | |
744 | |
745 /* lookingAt(start=-1) uses regions
*/ | |
746 /* Also, verify that non-greedy *? finds the first (shortest) match.
*/ | |
747 TEST_SETUP(".*?", "0123456789ABCDEF", 0); | |
748 uregex_setRegion(re, 4, 6, &status); | |
749 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE); | |
750 TEST_ASSERT(uregex_start(re, 0, &status) == 4); | |
751 TEST_ASSERT(uregex_end(re, 0, &status) == 4); | |
752 TEST_TEARDOWN; | |
753 | |
754 /* lookingAt (start >=0) does not use regions */ | |
755 TEST_SETUP(".*?", "0123456789ABCDEF", 0); | |
756 uregex_setRegion(re, 4, 6, &status); | |
757 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE); | |
758 TEST_ASSERT(uregex_start(re, 0, &status) == 0); | |
759 TEST_ASSERT(uregex_end(re, 0, &status) == 0); | |
760 TEST_TEARDOWN; | |
761 | |
762 /* hitEnd() */ | |
763 TEST_SETUP("[a-f]*", "abcdefghij", 0); | |
764 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); | |
765 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE); | |
766 TEST_TEARDOWN; | |
767 | |
768 TEST_SETUP("[a-f]*", "abcdef", 0); | |
769 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); | |
770 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE); | |
771 TEST_TEARDOWN; | |
772 | |
773 /* requireEnd */ | |
774 TEST_SETUP("abcd", "abcd", 0); | |
775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); | |
776 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE); | |
777 TEST_TEARDOWN; | |
778 | |
779 TEST_SETUP("abcd$", "abcd", 0); | |
780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE); | |
781 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE); | |
782 TEST_TEARDOWN; | |
783 | |
784 /* anchoringBounds */ | |
785 TEST_SETUP("abc$", "abcdef", 0); | |
786 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE); | |
787 uregex_useAnchoringBounds(re, FALSE, &status); | |
788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE); | |
789 | |
790 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); | |
791 uregex_useAnchoringBounds(re, TRUE, &status); | |
792 uregex_setRegion(re, 0, 3, &status); | |
793 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); | |
794 TEST_ASSERT(uregex_end(re, 0, &status) == 3); | |
795 TEST_TEARDOWN; | |
796 | |
797 /* Transparent Bounds */ | |
798 TEST_SETUP("abc(?=def)", "abcdef", 0); | |
799 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE); | |
800 uregex_useTransparentBounds(re, TRUE, &status); | |
801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE); | |
802 | |
803 uregex_useTransparentBounds(re, FALSE, &status); | |
804 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */ | |
805 uregex_setRegion(re, 0, 3, &status); | |
806 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, op
aque bounds */ | |
807 uregex_useTransparentBounds(re, TRUE, &status); | |
808 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, tr
ansparent bounds */ | |
809 TEST_ASSERT(uregex_end(re, 0, &status) == 3); | |
810 TEST_TEARDOWN; | |
811 | |
812 | |
813 /* | |
814 * replaceFirst() | |
815 */ | |
816 { | |
817 UChar text1[80]; | |
818 UChar text2[80]; | |
819 UChar replText[80]; | |
820 UChar buf[80]; | |
821 int32_t resultSz; | |
822 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); | |
823 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
824 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); | |
825 | |
826 status = U_ZERO_ERROR; | |
827 re = uregex_openC("x(.*?)x", 0, NULL, &status); | |
828 TEST_ASSERT_SUCCESS(status); | |
829 | |
830 /* Normal case, with match */ | |
831 uregex_setText(re, text1, -1, &status); | |
832 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf)
, &status); | |
833 TEST_ASSERT_SUCCESS(status); | |
834 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); | |
835 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
836 | |
837 /* No match. Text should copy to output with no changes. */ | |
838 status = U_ZERO_ERROR; | |
839 uregex_setText(re, text2, -1, &status); | |
840 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf)
, &status); | |
841 TEST_ASSERT_SUCCESS(status); | |
842 TEST_ASSERT_STRING("No match here.", buf, TRUE); | |
843 TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); | |
844 | |
845 /* Match, output just fills buffer, no termination warning. */ | |
846 status = U_ZERO_ERROR; | |
847 uregex_setText(re, text1, -1, &status); | |
848 memset(buf, -1, sizeof(buf)); | |
849 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x."), &status); | |
850 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
851 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); | |
852 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
853 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
854 | |
855 /* Do the replaceFirst again, without first resetting anything. | |
856 * Should give the same results. | |
857 */ | |
858 status = U_ZERO_ERROR; | |
859 memset(buf, -1, sizeof(buf)); | |
860 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x."), &status); | |
861 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); | |
863 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
864 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
865 | |
866 /* NULL buffer, zero buffer length */ | |
867 status = U_ZERO_ERROR; | |
868 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); | |
869 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
870 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
871 | |
872 /* Buffer too small by one */ | |
873 status = U_ZERO_ERROR; | |
874 memset(buf, -1, sizeof(buf)); | |
875 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <a
a> x1x x...x.")-1, &status); | |
876 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
877 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); | |
878 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
879 TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
880 | |
881 uregex_close(re); | |
882 } | |
883 | |
884 | |
885 /* | |
886 * replaceAll() | |
887 */ | |
888 { | |
889 UChar text1[80]; /* "Replace xaax x1x x...x." */ | |
890 UChar text2[80]; /* "No match Here" */ | |
891 UChar replText[80]; /* "<$1>" */ | |
892 UChar replText2[80]; /* "<<$1>>" */ | |
893 const char * pattern = "x(.*?)x"; | |
894 const char * expectedResult = "Replace <aa> <1> <...>."; | |
895 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>."; | |
896 UChar buf[80]; | |
897 int32_t resultSize; | |
898 int32_t expectedResultSize; | |
899 int32_t expectedResultSize2; | |
900 int32_t i; | |
901 | |
902 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); | |
903 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
904 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); | |
905 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2)); | |
906 expectedResultSize = strlen(expectedResult); | |
907 expectedResultSize2 = strlen(expectedResult2); | |
908 | |
909 status = U_ZERO_ERROR; | |
910 re = uregex_openC(pattern, 0, NULL, &status); | |
911 TEST_ASSERT_SUCCESS(status); | |
912 | |
913 /* Normal case, with match */ | |
914 uregex_setText(re, text1, -1, &status); | |
915 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf)
, &status); | |
916 TEST_ASSERT_SUCCESS(status); | |
917 TEST_ASSERT_STRING(expectedResult, buf, TRUE); | |
918 TEST_ASSERT(resultSize == expectedResultSize); | |
919 | |
920 /* No match. Text should copy to output with no changes. */ | |
921 status = U_ZERO_ERROR; | |
922 uregex_setText(re, text2, -1, &status); | |
923 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf)
, &status); | |
924 TEST_ASSERT_SUCCESS(status); | |
925 TEST_ASSERT_STRING("No match here.", buf, TRUE); | |
926 TEST_ASSERT(resultSize == u_strlen(text2)); | |
927 | |
928 /* Match, output just fills buffer, no termination warning. */ | |
929 status = U_ZERO_ERROR; | |
930 uregex_setText(re, text1, -1, &status); | |
931 memset(buf, -1, sizeof(buf)); | |
932 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize
, &status); | |
933 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
934 TEST_ASSERT_STRING(expectedResult, buf, FALSE); | |
935 TEST_ASSERT(resultSize == expectedResultSize); | |
936 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); | |
937 | |
938 /* Do the replaceFirst again, without first resetting anything. | |
939 * Should give the same results. | |
940 */ | |
941 status = U_ZERO_ERROR; | |
942 memset(buf, -1, sizeof(buf)); | |
943 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xa
ax x1x x...x."), &status); | |
944 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
945 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); | |
946 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); | |
947 TEST_ASSERT(buf[resultSize] == (UChar)0xffff); | |
948 | |
949 /* NULL buffer, zero buffer length */ | |
950 status = U_ZERO_ERROR; | |
951 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status); | |
952 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
953 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>.")); | |
954 | |
955 /* Buffer too small. Try every size, which will tickle edge cases | |
956 * in uregex_appendReplacement (used by replaceAll) */ | |
957 for (i=0; i<expectedResultSize; i++) { | |
958 char expected[80]; | |
959 status = U_ZERO_ERROR; | |
960 memset(buf, -1, sizeof(buf)); | |
961 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status); | |
962 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
963 strcpy(expected, expectedResult); | |
964 expected[i] = 0; | |
965 TEST_ASSERT_STRING(expected, buf, FALSE); | |
966 TEST_ASSERT(resultSize == expectedResultSize); | |
967 TEST_ASSERT(buf[i] == (UChar)0xffff); | |
968 } | |
969 | |
970 /* Buffer too small. Same as previous test, except this time the replac
ement | |
971 * text is longer than the match capture group, making the length of the
complete | |
972 * replacement longer than the original string. | |
973 */ | |
974 for (i=0; i<expectedResultSize2; i++) { | |
975 char expected[80]; | |
976 status = U_ZERO_ERROR; | |
977 memset(buf, -1, sizeof(buf)); | |
978 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status); | |
979 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
980 strcpy(expected, expectedResult2); | |
981 expected[i] = 0; | |
982 TEST_ASSERT_STRING(expected, buf, FALSE); | |
983 TEST_ASSERT(resultSize == expectedResultSize2); | |
984 TEST_ASSERT(buf[i] == (UChar)0xffff); | |
985 } | |
986 | |
987 | |
988 uregex_close(re); | |
989 } | |
990 | |
991 | |
992 /* | |
993 * appendReplacement() | |
994 */ | |
995 { | |
996 UChar text[100]; | |
997 UChar repl[100]; | |
998 UChar buf[100]; | |
999 UChar *bufPtr; | |
1000 int32_t bufCap; | |
1001 | |
1002 | |
1003 status = U_ZERO_ERROR; | |
1004 re = uregex_openC(".*", 0, 0, &status); | |
1005 TEST_ASSERT_SUCCESS(status); | |
1006 | |
1007 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); | |
1008 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); | |
1009 uregex_setText(re, text, -1, &status); | |
1010 | |
1011 /* match covers whole target string */ | |
1012 uregex_find(re, 0, &status); | |
1013 TEST_ASSERT_SUCCESS(status); | |
1014 bufPtr = buf; | |
1015 bufCap = UPRV_LENGTHOF(buf); | |
1016 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
1017 TEST_ASSERT_SUCCESS(status); | |
1018 TEST_ASSERT_STRING("some other", buf, TRUE); | |
1019 | |
1020 /* Match has \u \U escapes */ | |
1021 uregex_find(re, 0, &status); | |
1022 TEST_ASSERT_SUCCESS(status); | |
1023 bufPtr = buf; | |
1024 bufCap = UPRV_LENGTHOF(buf); | |
1025 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(
repl)); | |
1026 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
1027 TEST_ASSERT_SUCCESS(status); | |
1028 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); | |
1029 | |
1030 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. *
/ | |
1031 status = U_ZERO_ERROR; | |
1032 uregex_find(re, 0, &status); | |
1033 TEST_ASSERT_SUCCESS(status); | |
1034 bufPtr = buf; | |
1035 status = U_BUFFER_OVERFLOW_ERROR; | |
1036 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status); | |
1037 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
1038 | |
1039 uregex_close(re); | |
1040 } | |
1041 | |
1042 | |
1043 /* | |
1044 * appendTail(). Checked in ReplaceFirst(), replaceAll(). | |
1045 */ | |
1046 | |
1047 /* | |
1048 * split() | |
1049 */ | |
1050 { | |
1051 UChar textToSplit[80]; | |
1052 UChar text2[80]; | |
1053 UChar buf[200]; | |
1054 UChar *fields[10]; | |
1055 int32_t numFields; | |
1056 int32_t requiredCapacity; | |
1057 int32_t spaceNeeded; | |
1058 int32_t sz; | |
1059 | |
1060 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textTo
Split)); | |
1061 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
1062 | |
1063 status = U_ZERO_ERROR; | |
1064 re = uregex_openC(":", 0, NULL, &status); | |
1065 | |
1066 | |
1067 /* Simple split */ | |
1068 | |
1069 uregex_setText(re, textToSplit, -1, &status); | |
1070 TEST_ASSERT_SUCCESS(status); | |
1071 | |
1072 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1073 if (U_SUCCESS(status)) { | |
1074 memset(fields, -1, sizeof(fields)); | |
1075 numFields = | |
1076 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie
lds, 10, &status); | |
1077 TEST_ASSERT_SUCCESS(status); | |
1078 | |
1079 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1080 if(U_SUCCESS(status)) { | |
1081 TEST_ASSERT(numFields == 3); | |
1082 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1083 TEST_ASSERT_STRING(" second", fields[1], TRUE); | |
1084 TEST_ASSERT_STRING(" third", fields[2], TRUE); | |
1085 TEST_ASSERT(fields[3] == NULL); | |
1086 | |
1087 spaceNeeded = u_strlen(textToSplit) - | |
1088 (numFields - 1) + /* Field delimiters do not appea
r in output */ | |
1089 numFields; /* Each field gets a NUL termina
tor */ | |
1090 | |
1091 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1092 } | |
1093 } | |
1094 | |
1095 uregex_close(re); | |
1096 | |
1097 | |
1098 /* Split with too few output strings available */ | |
1099 status = U_ZERO_ERROR; | |
1100 re = uregex_openC(":", 0, NULL, &status); | |
1101 uregex_setText(re, textToSplit, -1, &status); | |
1102 TEST_ASSERT_SUCCESS(status); | |
1103 | |
1104 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1105 if(U_SUCCESS(status)) { | |
1106 memset(fields, -1, sizeof(fields)); | |
1107 numFields = | |
1108 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie
lds, 2, &status); | |
1109 TEST_ASSERT_SUCCESS(status); | |
1110 | |
1111 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1112 if(U_SUCCESS(status)) { | |
1113 TEST_ASSERT(numFields == 2); | |
1114 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1115 TEST_ASSERT_STRING(" second: third", fields[1], TRUE); | |
1116 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); | |
1117 | |
1118 spaceNeeded = u_strlen(textToSplit) - | |
1119 (numFields - 1) + /* Field delimiters do not appea
r in output */ | |
1120 numFields; /* Each field gets a NUL termina
tor */ | |
1121 | |
1122 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1123 | |
1124 /* Split with a range of output buffer sizes. */ | |
1125 spaceNeeded = u_strlen(textToSplit) - | |
1126 (numFields - 1) + /* Field delimiters do not appear in out
put */ | |
1127 numFields; /* Each field gets a NUL terminator */ | |
1128 | |
1129 for (sz=0; sz < spaceNeeded+1; sz++) { | |
1130 memset(fields, -1, sizeof(fields)); | |
1131 status = U_ZERO_ERROR; | |
1132 numFields = | |
1133 uregex_split(re, buf, sz, &requiredCapacity, fields, 10,
&status); | |
1134 if (sz >= spaceNeeded) { | |
1135 TEST_ASSERT_SUCCESS(status); | |
1136 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1137 TEST_ASSERT_STRING(" second", fields[1], TRUE); | |
1138 TEST_ASSERT_STRING(" third", fields[2], TRUE); | |
1139 } else { | |
1140 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
1141 } | |
1142 TEST_ASSERT(numFields == 3); | |
1143 TEST_ASSERT(fields[3] == NULL); | |
1144 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1145 } | |
1146 } | |
1147 } | |
1148 | |
1149 uregex_close(re); | |
1150 } | |
1151 | |
1152 | |
1153 | |
1154 | |
1155 /* Split(), part 2. Patterns with capture groups. The capture group text | |
1156 * comes out as additional fields. */ | |
1157 { | |
1158 UChar textToSplit[80]; | |
1159 UChar buf[200]; | |
1160 UChar *fields[10]; | |
1161 int32_t numFields; | |
1162 int32_t requiredCapacity; | |
1163 int32_t spaceNeeded; | |
1164 int32_t sz; | |
1165 | |
1166 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LEN
GTHOF(textToSplit)); | |
1167 | |
1168 status = U_ZERO_ERROR; | |
1169 re = uregex_openC("<(.*?)>", 0, NULL, &status); | |
1170 | |
1171 uregex_setText(re, textToSplit, -1, &status); | |
1172 TEST_ASSERT_SUCCESS(status); | |
1173 | |
1174 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1175 if(U_SUCCESS(status)) { | |
1176 memset(fields, -1, sizeof(fields)); | |
1177 numFields = | |
1178 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie
lds, 10, &status); | |
1179 TEST_ASSERT_SUCCESS(status); | |
1180 | |
1181 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1182 if(U_SUCCESS(status)) { | |
1183 TEST_ASSERT(numFields == 5); | |
1184 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1185 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
1186 TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
1187 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
1188 TEST_ASSERT_STRING(" third", fields[4], TRUE); | |
1189 TEST_ASSERT(fields[5] == NULL); | |
1190 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /*
"." at NUL positions */ | |
1191 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1192 } | |
1193 } | |
1194 | |
1195 /* Split with too few output strings available (2) */ | |
1196 status = U_ZERO_ERROR; | |
1197 memset(fields, -1, sizeof(fields)); | |
1198 numFields = | |
1199 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields,
2, &status); | |
1200 TEST_ASSERT_SUCCESS(status); | |
1201 | |
1202 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1203 if(U_SUCCESS(status)) { | |
1204 TEST_ASSERT(numFields == 2); | |
1205 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1206 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); | |
1207 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); | |
1208 | |
1209 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NU
L positions */ | |
1210 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1211 } | |
1212 | |
1213 /* Split with too few output strings available (3) */ | |
1214 status = U_ZERO_ERROR; | |
1215 memset(fields, -1, sizeof(fields)); | |
1216 numFields = | |
1217 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields,
3, &status); | |
1218 TEST_ASSERT_SUCCESS(status); | |
1219 | |
1220 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1221 if(U_SUCCESS(status)) { | |
1222 TEST_ASSERT(numFields == 3); | |
1223 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1224 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
1225 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); | |
1226 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); | |
1227 | |
1228 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "."
at NUL positions */ | |
1229 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1230 } | |
1231 | |
1232 /* Split with just enough output strings available (5) */ | |
1233 status = U_ZERO_ERROR; | |
1234 memset(fields, -1, sizeof(fields)); | |
1235 numFields = | |
1236 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields,
5, &status); | |
1237 TEST_ASSERT_SUCCESS(status); | |
1238 | |
1239 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1240 if(U_SUCCESS(status)) { | |
1241 TEST_ASSERT(numFields == 5); | |
1242 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1243 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
1244 TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
1245 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
1246 TEST_ASSERT_STRING(" third", fields[4], TRUE); | |
1247 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); | |
1248 | |
1249 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "."
at NUL positions */ | |
1250 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1251 } | |
1252 | |
1253 /* Split, end of text is a field delimiter. */ | |
1254 status = U_ZERO_ERROR; | |
1255 sz = strlen("first <tag-a> second<tag-b>"); | |
1256 uregex_setText(re, textToSplit, sz, &status); | |
1257 TEST_ASSERT_SUCCESS(status); | |
1258 | |
1259 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1260 if(U_SUCCESS(status)) { | |
1261 memset(fields, -1, sizeof(fields)); | |
1262 numFields = | |
1263 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fie
lds, 9, &status); | |
1264 TEST_ASSERT_SUCCESS(status); | |
1265 | |
1266 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1267 if(U_SUCCESS(status)) { | |
1268 TEST_ASSERT(numFields == 5); | |
1269 TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
1270 TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
1271 TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
1272 TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
1273 TEST_ASSERT_STRING("", fields[4], TRUE); | |
1274 TEST_ASSERT(fields[5] == NULL); | |
1275 TEST_ASSERT(fields[8] == NULL); | |
1276 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); | |
1277 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at
NUL positions */ | |
1278 TEST_ASSERT(spaceNeeded == requiredCapacity); | |
1279 } | |
1280 } | |
1281 | |
1282 uregex_close(re); | |
1283 } | |
1284 | |
1285 /* | |
1286 * set/getTimeLimit | |
1287 */ | |
1288 TEST_SETUP("abc$", "abcdef", 0); | |
1289 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0); | |
1290 uregex_setTimeLimit(re, 1000, &status); | |
1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); | |
1292 TEST_ASSERT_SUCCESS(status); | |
1293 uregex_setTimeLimit(re, -1, &status); | |
1294 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
1295 status = U_ZERO_ERROR; | |
1296 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000); | |
1297 TEST_TEARDOWN; | |
1298 | |
1299 /* | |
1300 * set/get Stack Limit | |
1301 */ | |
1302 TEST_SETUP("abc$", "abcdef", 0); | |
1303 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000); | |
1304 uregex_setStackLimit(re, 40000, &status); | |
1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); | |
1306 TEST_ASSERT_SUCCESS(status); | |
1307 uregex_setStackLimit(re, -1, &status); | |
1308 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
1309 status = U_ZERO_ERROR; | |
1310 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000); | |
1311 TEST_TEARDOWN; | |
1312 | |
1313 | |
1314 /* | |
1315 * Get/Set callback functions | |
1316 * This test is copied from intltest regex/Callbacks | |
1317 * The pattern and test data will run long enough to cause the callback | |
1318 * to be invoked. The nested '+' operators give exponential time | |
1319 * behavior with increasing string length. | |
1320 */ | |
1321 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0) | |
1322 callBackContext cbInfo = {4, 0, 0}; | |
1323 const void *pContext = &cbInfo; | |
1324 URegexMatchCallback *returnedFn = &TestCallbackFn; | |
1325 | |
1326 /* Getting the callback fn when it hasn't been set must return NULL */ | |
1327 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); | |
1328 TEST_ASSERT_SUCCESS(status); | |
1329 TEST_ASSERT(returnedFn == NULL); | |
1330 TEST_ASSERT(pContext == NULL); | |
1331 | |
1332 /* Set thecallback and do a match. */ | |
1333 /* The callback function should record that it has been called. */ | |
1334 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status); | |
1335 TEST_ASSERT_SUCCESS(status); | |
1336 TEST_ASSERT(cbInfo.numCalls == 0); | |
1337 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE); | |
1338 TEST_ASSERT_SUCCESS(status); | |
1339 TEST_ASSERT(cbInfo.numCalls > 0); | |
1340 | |
1341 /* Getting the callback should return the values that were set above. */ | |
1342 uregex_getMatchCallback(re, &returnedFn, &pContext, &status); | |
1343 TEST_ASSERT(returnedFn == &TestCallbackFn); | |
1344 TEST_ASSERT(pContext == &cbInfo); | |
1345 | |
1346 TEST_TEARDOWN; | |
1347 } | |
1348 | |
1349 | |
1350 | |
1351 static void TestBug4315(void) { | |
1352 UErrorCode theICUError = U_ZERO_ERROR; | |
1353 URegularExpression *theRegEx; | |
1354 UChar *textBuff; | |
1355 const char *thePattern; | |
1356 UChar theString[100]; | |
1357 UChar *destFields[24]; | |
1358 int32_t neededLength1; | |
1359 int32_t neededLength2; | |
1360 | |
1361 int32_t wordCount = 0; | |
1362 int32_t destFieldsSize = 24; | |
1363 | |
1364 thePattern = "ck "; | |
1365 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle
."); | |
1366 | |
1367 /* open a regex */ | |
1368 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError); | |
1369 TEST_ASSERT_SUCCESS(theICUError); | |
1370 | |
1371 /* set the input string */ | |
1372 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError); | |
1373 TEST_ASSERT_SUCCESS(theICUError); | |
1374 | |
1375 /* split */ | |
1376 /*explicitly pass NULL and 0 to force the overflow error -> this is where th
e | |
1377 * error occurs! */ | |
1378 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields, | |
1379 destFieldsSize, &theICUError); | |
1380 | |
1381 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR); | |
1382 TEST_ASSERT(wordCount==3); | |
1383 | |
1384 if(theICUError == U_BUFFER_OVERFLOW_ERROR) | |
1385 { | |
1386 theICUError = U_ZERO_ERROR; | |
1387 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1)); | |
1388 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLen
gth2, | |
1389 destFields, destFieldsSize, &theICUError); | |
1390 TEST_ASSERT(wordCount==3); | |
1391 TEST_ASSERT_SUCCESS(theICUError); | |
1392 TEST_ASSERT(neededLength1 == neededLength2); | |
1393 TEST_ASSERT_STRING("The qui", destFields[0], TRUE); | |
1394 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1],
TRUE); | |
1395 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE); | |
1396 TEST_ASSERT(destFields[3] == NULL); | |
1397 free(textBuff); | |
1398 } | |
1399 uregex_close(theRegEx); | |
1400 } | |
1401 | |
1402 /* Based on TestRegexCAPI() */ | |
1403 static void TestUTextAPI(void) { | |
1404 UErrorCode status = U_ZERO_ERROR; | |
1405 URegularExpression *re; | |
1406 UText patternText = UTEXT_INITIALIZER; | |
1407 UChar pat[200]; | |
1408 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 }; | |
1409 | |
1410 /* Mimimalist open/close */ | |
1411 utext_openUTF8(&patternText, patternTextUTF8, -1, &status); | |
1412 re = uregex_openUText(&patternText, 0, 0, &status); | |
1413 if (U_FAILURE(status)) { | |
1414 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\"
(Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); | |
1415 utext_close(&patternText); | |
1416 return; | |
1417 } | |
1418 uregex_close(re); | |
1419 | |
1420 /* Open with all flag values set */ | |
1421 status = U_ZERO_ERROR; | |
1422 re = uregex_openUText(&patternText, | |
1423 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, | |
1424 0, &status); | |
1425 TEST_ASSERT_SUCCESS(status); | |
1426 uregex_close(re); | |
1427 | |
1428 /* Open with an invalid flag */ | |
1429 status = U_ZERO_ERROR; | |
1430 re = uregex_openUText(&patternText, 0x40000000, 0, &status); | |
1431 TEST_ASSERT(status == U_REGEX_INVALID_FLAG); | |
1432 uregex_close(re); | |
1433 | |
1434 /* open with an invalid parameter */ | |
1435 status = U_ZERO_ERROR; | |
1436 re = uregex_openUText(NULL, | |
1437 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTI
LINE | UREGEX_UWORD, 0, &status); | |
1438 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL); | |
1439 | |
1440 /* | |
1441 * clone | |
1442 */ | |
1443 { | |
1444 URegularExpression *clone1; | |
1445 URegularExpression *clone2; | |
1446 URegularExpression *clone3; | |
1447 UChar testString1[30]; | |
1448 UChar testString2[30]; | |
1449 UBool result; | |
1450 | |
1451 | |
1452 status = U_ZERO_ERROR; | |
1453 re = uregex_openUText(&patternText, 0, 0, &status); | |
1454 TEST_ASSERT_SUCCESS(status); | |
1455 clone1 = uregex_clone(re, &status); | |
1456 TEST_ASSERT_SUCCESS(status); | |
1457 TEST_ASSERT(clone1 != NULL); | |
1458 | |
1459 status = U_ZERO_ERROR; | |
1460 clone2 = uregex_clone(re, &status); | |
1461 TEST_ASSERT_SUCCESS(status); | |
1462 TEST_ASSERT(clone2 != NULL); | |
1463 uregex_close(re); | |
1464 | |
1465 status = U_ZERO_ERROR; | |
1466 clone3 = uregex_clone(clone2, &status); | |
1467 TEST_ASSERT_SUCCESS(status); | |
1468 TEST_ASSERT(clone3 != NULL); | |
1469 | |
1470 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); | |
1471 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); | |
1472 | |
1473 status = U_ZERO_ERROR; | |
1474 uregex_setText(clone1, testString1, -1, &status); | |
1475 TEST_ASSERT_SUCCESS(status); | |
1476 result = uregex_lookingAt(clone1, 0, &status); | |
1477 TEST_ASSERT_SUCCESS(status); | |
1478 TEST_ASSERT(result==TRUE); | |
1479 | |
1480 status = U_ZERO_ERROR; | |
1481 uregex_setText(clone2, testString2, -1, &status); | |
1482 TEST_ASSERT_SUCCESS(status); | |
1483 result = uregex_lookingAt(clone2, 0, &status); | |
1484 TEST_ASSERT_SUCCESS(status); | |
1485 TEST_ASSERT(result==FALSE); | |
1486 result = uregex_find(clone2, 0, &status); | |
1487 TEST_ASSERT_SUCCESS(status); | |
1488 TEST_ASSERT(result==TRUE); | |
1489 | |
1490 uregex_close(clone1); | |
1491 uregex_close(clone2); | |
1492 uregex_close(clone3); | |
1493 | |
1494 } | |
1495 | |
1496 /* | |
1497 * pattern() and patternText() | |
1498 */ | |
1499 { | |
1500 const UChar *resultPat; | |
1501 int32_t resultLen; | |
1502 UText *resultText; | |
1503 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hell
o */ | |
1504 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ | |
1505 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */ | |
1506 status = U_ZERO_ERROR; | |
1507 | |
1508 utext_openUTF8(&patternText, str_hello, -1, &status); | |
1509 re = uregex_open(pat, -1, 0, NULL, &status); | |
1510 resultPat = uregex_pattern(re, &resultLen, &status); | |
1511 TEST_ASSERT_SUCCESS(status); | |
1512 | |
1513 /* The TEST_ASSERT_SUCCESS above should change too... */ | |
1514 if (U_SUCCESS(status)) { | |
1515 TEST_ASSERT(resultLen == -1); | |
1516 TEST_ASSERT(u_strcmp(resultPat, pat) == 0); | |
1517 } | |
1518 | |
1519 resultText = uregex_patternUText(re, &status); | |
1520 TEST_ASSERT_SUCCESS(status); | |
1521 TEST_ASSERT_UTEXT(str_hello, resultText); | |
1522 | |
1523 uregex_close(re); | |
1524 | |
1525 status = U_ZERO_ERROR; | |
1526 re = uregex_open(pat, 3, 0, NULL, &status); | |
1527 resultPat = uregex_pattern(re, &resultLen, &status); | |
1528 TEST_ASSERT_SUCCESS(status); | |
1529 | |
1530 /* The TEST_ASSERT_SUCCESS above should change too... */ | |
1531 if (U_SUCCESS(status)) { | |
1532 TEST_ASSERT(resultLen == 3); | |
1533 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); | |
1534 TEST_ASSERT(u_strlen(resultPat) == 3); | |
1535 } | |
1536 | |
1537 resultText = uregex_patternUText(re, &status); | |
1538 TEST_ASSERT_SUCCESS(status); | |
1539 TEST_ASSERT_UTEXT(str_hel, resultText); | |
1540 | |
1541 uregex_close(re); | |
1542 } | |
1543 | |
1544 /* | |
1545 * setUText() and lookingAt() | |
1546 */ | |
1547 { | |
1548 UText text1 = UTEXT_INITIALIZER; | |
1549 UText text2 = UTEXT_INITIALIZER; | |
1550 UBool result; | |
1551 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 };
/* abcccd */ | |
1552 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0
x00 }; /* abcccxd */ | |
1553 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d
*/ | |
1554 status = U_ZERO_ERROR; | |
1555 utext_openUTF8(&text1, str_abcccd, -1, &status); | |
1556 utext_openUTF8(&text2, str_abcccxd, -1, &status); | |
1557 | |
1558 utext_openUTF8(&patternText, str_abcd, -1, &status); | |
1559 re = uregex_openUText(&patternText, 0, NULL, &status); | |
1560 TEST_ASSERT_SUCCESS(status); | |
1561 | |
1562 /* Operation before doing a setText should fail... */ | |
1563 status = U_ZERO_ERROR; | |
1564 uregex_lookingAt(re, 0, &status); | |
1565 TEST_ASSERT( status== U_REGEX_INVALID_STATE); | |
1566 | |
1567 status = U_ZERO_ERROR; | |
1568 uregex_setUText(re, &text1, &status); | |
1569 result = uregex_lookingAt(re, 0, &status); | |
1570 TEST_ASSERT(result == TRUE); | |
1571 TEST_ASSERT_SUCCESS(status); | |
1572 | |
1573 status = U_ZERO_ERROR; | |
1574 uregex_setUText(re, &text2, &status); | |
1575 result = uregex_lookingAt(re, 0, &status); | |
1576 TEST_ASSERT(result == FALSE); | |
1577 TEST_ASSERT_SUCCESS(status); | |
1578 | |
1579 status = U_ZERO_ERROR; | |
1580 uregex_setUText(re, &text1, &status); | |
1581 result = uregex_lookingAt(re, 0, &status); | |
1582 TEST_ASSERT(result == TRUE); | |
1583 TEST_ASSERT_SUCCESS(status); | |
1584 | |
1585 uregex_close(re); | |
1586 utext_close(&text1); | |
1587 utext_close(&text2); | |
1588 } | |
1589 | |
1590 | |
1591 /* | |
1592 * getText() and getUText() | |
1593 */ | |
1594 { | |
1595 UText text1 = UTEXT_INITIALIZER; | |
1596 UText text2 = UTEXT_INITIALIZER; | |
1597 UChar text2Chars[20]; | |
1598 UText *resultText; | |
1599 const UChar *result; | |
1600 int32_t textLength; | |
1601 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 };
/* abcccd */ | |
1602 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0
x00 }; /* abcccxd */ | |
1603 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d
*/ | |
1604 | |
1605 | |
1606 status = U_ZERO_ERROR; | |
1607 utext_openUTF8(&text1, str_abcccd, -1, &status); | |
1608 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars)); | |
1609 utext_openUChars(&text2, text2Chars, -1, &status); | |
1610 | |
1611 utext_openUTF8(&patternText, str_abcd, -1, &status); | |
1612 re = uregex_openUText(&patternText, 0, NULL, &status); | |
1613 | |
1614 /* First set a UText */ | |
1615 uregex_setUText(re, &text1, &status); | |
1616 resultText = uregex_getUText(re, NULL, &status); | |
1617 TEST_ASSERT_SUCCESS(status); | |
1618 TEST_ASSERT(resultText != &text1); | |
1619 utext_setNativeIndex(resultText, 0); | |
1620 utext_setNativeIndex(&text1, 0); | |
1621 TEST_ASSERT(testUTextEqual(resultText, &text1)); | |
1622 utext_close(resultText); | |
1623 | |
1624 result = uregex_getText(re, &textLength, &status); /* flattens UText int
o buffer */ | |
1625 (void)result; /* Suppress set but not used warning. */ | |
1626 TEST_ASSERT(textLength == -1 || textLength == 6); | |
1627 resultText = uregex_getUText(re, NULL, &status); | |
1628 TEST_ASSERT_SUCCESS(status); | |
1629 TEST_ASSERT(resultText != &text1); | |
1630 utext_setNativeIndex(resultText, 0); | |
1631 utext_setNativeIndex(&text1, 0); | |
1632 TEST_ASSERT(testUTextEqual(resultText, &text1)); | |
1633 utext_close(resultText); | |
1634 | |
1635 /* Then set a UChar * */ | |
1636 uregex_setText(re, text2Chars, 7, &status); | |
1637 resultText = uregex_getUText(re, NULL, &status); | |
1638 TEST_ASSERT_SUCCESS(status); | |
1639 utext_setNativeIndex(resultText, 0); | |
1640 utext_setNativeIndex(&text2, 0); | |
1641 TEST_ASSERT(testUTextEqual(resultText, &text2)); | |
1642 utext_close(resultText); | |
1643 result = uregex_getText(re, &textLength, &status); | |
1644 TEST_ASSERT(textLength == 7); | |
1645 | |
1646 uregex_close(re); | |
1647 utext_close(&text1); | |
1648 utext_close(&text2); | |
1649 } | |
1650 | |
1651 /* | |
1652 * matches() | |
1653 */ | |
1654 { | |
1655 UText text1 = UTEXT_INITIALIZER; | |
1656 UBool result; | |
1657 UText nullText = UTEXT_INITIALIZER; | |
1658 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0
x00 }; /* abcccde */ | |
1659 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d
*/ | |
1660 | |
1661 status = U_ZERO_ERROR; | |
1662 utext_openUTF8(&text1, str_abcccde, -1, &status); | |
1663 utext_openUTF8(&patternText, str_abcd, -1, &status); | |
1664 re = uregex_openUText(&patternText, 0, NULL, &status); | |
1665 | |
1666 uregex_setUText(re, &text1, &status); | |
1667 result = uregex_matches(re, 0, &status); | |
1668 TEST_ASSERT(result == FALSE); | |
1669 TEST_ASSERT_SUCCESS(status); | |
1670 uregex_close(re); | |
1671 | |
1672 status = U_ZERO_ERROR; | |
1673 re = uregex_openC(".?", 0, NULL, &status); | |
1674 uregex_setUText(re, &text1, &status); | |
1675 result = uregex_matches(re, 7, &status); | |
1676 TEST_ASSERT(result == TRUE); | |
1677 TEST_ASSERT_SUCCESS(status); | |
1678 | |
1679 status = U_ZERO_ERROR; | |
1680 utext_openUTF8(&nullText, "", -1, &status); | |
1681 uregex_setUText(re, &nullText, &status); | |
1682 TEST_ASSERT_SUCCESS(status); | |
1683 result = uregex_matches(re, 0, &status); | |
1684 TEST_ASSERT(result == TRUE); | |
1685 TEST_ASSERT_SUCCESS(status); | |
1686 | |
1687 uregex_close(re); | |
1688 utext_close(&text1); | |
1689 utext_close(&nullText); | |
1690 } | |
1691 | |
1692 | |
1693 /* | |
1694 * lookingAt() Used in setText test. | |
1695 */ | |
1696 | |
1697 | |
1698 /* | |
1699 * find(), findNext, start, end, reset | |
1700 */ | |
1701 { | |
1702 UChar text1[50]; | |
1703 UBool result; | |
1704 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); | |
1705 status = U_ZERO_ERROR; | |
1706 re = uregex_openC("rx", 0, NULL, &status); | |
1707 | |
1708 uregex_setText(re, text1, -1, &status); | |
1709 result = uregex_find(re, 0, &status); | |
1710 TEST_ASSERT(result == TRUE); | |
1711 TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
1712 TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
1713 TEST_ASSERT_SUCCESS(status); | |
1714 | |
1715 result = uregex_find(re, 9, &status); | |
1716 TEST_ASSERT(result == TRUE); | |
1717 TEST_ASSERT(uregex_start(re, 0, &status) == 11); | |
1718 TEST_ASSERT(uregex_end(re, 0, &status) == 13); | |
1719 TEST_ASSERT_SUCCESS(status); | |
1720 | |
1721 result = uregex_find(re, 14, &status); | |
1722 TEST_ASSERT(result == FALSE); | |
1723 TEST_ASSERT_SUCCESS(status); | |
1724 | |
1725 status = U_ZERO_ERROR; | |
1726 uregex_reset(re, 0, &status); | |
1727 | |
1728 result = uregex_findNext(re, &status); | |
1729 TEST_ASSERT(result == TRUE); | |
1730 TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
1731 TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
1732 TEST_ASSERT_SUCCESS(status); | |
1733 | |
1734 result = uregex_findNext(re, &status); | |
1735 TEST_ASSERT(result == TRUE); | |
1736 TEST_ASSERT(uregex_start(re, 0, &status) == 6); | |
1737 TEST_ASSERT(uregex_end(re, 0, &status) == 8); | |
1738 TEST_ASSERT_SUCCESS(status); | |
1739 | |
1740 status = U_ZERO_ERROR; | |
1741 uregex_reset(re, 12, &status); | |
1742 | |
1743 result = uregex_findNext(re, &status); | |
1744 TEST_ASSERT(result == TRUE); | |
1745 TEST_ASSERT(uregex_start(re, 0, &status) == 13); | |
1746 TEST_ASSERT(uregex_end(re, 0, &status) == 15); | |
1747 TEST_ASSERT_SUCCESS(status); | |
1748 | |
1749 result = uregex_findNext(re, &status); | |
1750 TEST_ASSERT(result == FALSE); | |
1751 TEST_ASSERT_SUCCESS(status); | |
1752 | |
1753 uregex_close(re); | |
1754 } | |
1755 | |
1756 /* | |
1757 * groupUText() | |
1758 */ | |
1759 { | |
1760 UChar text1[80]; | |
1761 UText *actual; | |
1762 UBool result; | |
1763 int64_t groupLen = 0; | |
1764 UChar groupBuf[20]; | |
1765 | |
1766 u_uastrncpy(text1, "noise abc interior def, and this is off the end", U
PRV_LENGTHOF(text1)); | |
1767 | |
1768 status = U_ZERO_ERROR; | |
1769 re = uregex_openC("abc(.*?)def", 0, NULL, &status); | |
1770 TEST_ASSERT_SUCCESS(status); | |
1771 | |
1772 uregex_setText(re, text1, -1, &status); | |
1773 result = uregex_find(re, 0, &status); | |
1774 TEST_ASSERT(result==TRUE); | |
1775 | |
1776 /* Capture Group 0 with shallow clone API. Should succeed. */ | |
1777 status = U_ZERO_ERROR; | |
1778 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status); | |
1779 TEST_ASSERT_SUCCESS(status); | |
1780 | |
1781 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " with
in "noise abc ..." */ | |
1782 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */ | |
1783 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf
, sizeof(groupBuf), &status); | |
1784 | |
1785 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE); | |
1786 utext_close(actual); | |
1787 | |
1788 /* Capture group #1. Should succeed. */ | |
1789 status = U_ZERO_ERROR; | |
1790 | |
1791 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status); | |
1792 TEST_ASSERT_SUCCESS(status); | |
1793 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interio
r " within "noise abc interior def ... " */ | |
1794 /* (within the str
ing text1) */ | |
1795 TEST_ASSERT(10 == groupLen); /* length of " interi
or " */ | |
1796 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf,
sizeof(groupBuf), &status); | |
1797 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE); | |
1798 | |
1799 utext_close(actual); | |
1800 | |
1801 /* Capture group out of range. Error. */ | |
1802 status = U_ZERO_ERROR; | |
1803 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status); | |
1804 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | |
1805 utext_close(actual); | |
1806 | |
1807 uregex_close(re); | |
1808 } | |
1809 | |
1810 /* | |
1811 * replaceFirst() | |
1812 */ | |
1813 { | |
1814 UChar text1[80]; | |
1815 UChar text2[80]; | |
1816 UText replText = UTEXT_INITIALIZER; | |
1817 UText *result; | |
1818 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0
x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2
e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */ | |
1819 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ | |
1820 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x3
0, 0x34, 0x31, 0x24, 0x31, | |
1821 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c,
0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */ | |
1822 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ | |
1823 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x6
3, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78,
0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x.
*/ | |
1824 status = U_ZERO_ERROR; | |
1825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); | |
1826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
1827 utext_openUTF8(&replText, str_1x, -1, &status); | |
1828 | |
1829 re = uregex_openC("x(.*?)x", 0, NULL, &status); | |
1830 TEST_ASSERT_SUCCESS(status); | |
1831 | |
1832 /* Normal case, with match */ | |
1833 uregex_setText(re, text1, -1, &status); | |
1834 result = uregex_replaceFirstUText(re, &replText, NULL, &status); | |
1835 TEST_ASSERT_SUCCESS(status); | |
1836 TEST_ASSERT_UTEXT(str_Replxxx, result); | |
1837 utext_close(result); | |
1838 | |
1839 /* No match. Text should copy to output with no changes. */ | |
1840 uregex_setText(re, text2, -1, &status); | |
1841 result = uregex_replaceFirstUText(re, &replText, NULL, &status); | |
1842 TEST_ASSERT_SUCCESS(status); | |
1843 TEST_ASSERT_UTEXT(str_Nomatchhere, result); | |
1844 utext_close(result); | |
1845 | |
1846 /* Unicode escapes */ | |
1847 uregex_setText(re, text1, -1, &status); | |
1848 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status); | |
1849 result = uregex_replaceFirstUText(re, &replText, NULL, &status); | |
1850 TEST_ASSERT_SUCCESS(status); | |
1851 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result); | |
1852 utext_close(result); | |
1853 | |
1854 uregex_close(re); | |
1855 utext_close(&replText); | |
1856 } | |
1857 | |
1858 | |
1859 /* | |
1860 * replaceAll() | |
1861 */ | |
1862 { | |
1863 UChar text1[80]; | |
1864 UChar text2[80]; | |
1865 UText replText = UTEXT_INITIALIZER; | |
1866 UText *result; | |
1867 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ | |
1868 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65
, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e,
0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */ | |
1869 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x6
3, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ | |
1870 status = U_ZERO_ERROR; | |
1871 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); | |
1872 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
1873 utext_openUTF8(&replText, str_1, -1, &status); | |
1874 | |
1875 re = uregex_openC("x(.*?)x", 0, NULL, &status); | |
1876 TEST_ASSERT_SUCCESS(status); | |
1877 | |
1878 /* Normal case, with match */ | |
1879 uregex_setText(re, text1, -1, &status); | |
1880 result = uregex_replaceAllUText(re, &replText, NULL, &status); | |
1881 TEST_ASSERT_SUCCESS(status); | |
1882 TEST_ASSERT_UTEXT(str_Replaceaa1, result); | |
1883 utext_close(result); | |
1884 | |
1885 /* No match. Text should copy to output with no changes. */ | |
1886 uregex_setText(re, text2, -1, &status); | |
1887 result = uregex_replaceAllUText(re, &replText, NULL, &status); | |
1888 TEST_ASSERT_SUCCESS(status); | |
1889 TEST_ASSERT_UTEXT(str_Nomatchhere, result); | |
1890 utext_close(result); | |
1891 | |
1892 uregex_close(re); | |
1893 utext_close(&replText); | |
1894 } | |
1895 | |
1896 | |
1897 /* | |
1898 * appendReplacement() | |
1899 */ | |
1900 { | |
1901 UChar text[100]; | |
1902 UChar repl[100]; | |
1903 UChar buf[100]; | |
1904 UChar *bufPtr; | |
1905 int32_t bufCap; | |
1906 | |
1907 status = U_ZERO_ERROR; | |
1908 re = uregex_openC(".*", 0, 0, &status); | |
1909 TEST_ASSERT_SUCCESS(status); | |
1910 | |
1911 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); | |
1912 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); | |
1913 uregex_setText(re, text, -1, &status); | |
1914 | |
1915 /* match covers whole target string */ | |
1916 uregex_find(re, 0, &status); | |
1917 TEST_ASSERT_SUCCESS(status); | |
1918 bufPtr = buf; | |
1919 bufCap = UPRV_LENGTHOF(buf); | |
1920 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
1921 TEST_ASSERT_SUCCESS(status); | |
1922 TEST_ASSERT_STRING("some other", buf, TRUE); | |
1923 | |
1924 /* Match has \u \U escapes */ | |
1925 uregex_find(re, 0, &status); | |
1926 TEST_ASSERT_SUCCESS(status); | |
1927 bufPtr = buf; | |
1928 bufCap = UPRV_LENGTHOF(buf); | |
1929 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(
repl)); | |
1930 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
1931 TEST_ASSERT_SUCCESS(status); | |
1932 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); | |
1933 | |
1934 uregex_close(re); | |
1935 } | |
1936 | |
1937 | |
1938 /* | |
1939 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll(
). | |
1940 */ | |
1941 | |
1942 /* | |
1943 * splitUText() | |
1944 */ | |
1945 { | |
1946 UChar textToSplit[80]; | |
1947 UChar text2[80]; | |
1948 UText *fields[10]; | |
1949 int32_t numFields; | |
1950 int32_t i; | |
1951 | |
1952 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textTo
Split)); | |
1953 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); | |
1954 | |
1955 status = U_ZERO_ERROR; | |
1956 re = uregex_openC(":", 0, NULL, &status); | |
1957 | |
1958 | |
1959 /* Simple split */ | |
1960 | |
1961 uregex_setText(re, textToSplit, -1, &status); | |
1962 TEST_ASSERT_SUCCESS(status); | |
1963 | |
1964 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1965 if (U_SUCCESS(status)) { | |
1966 memset(fields, 0, sizeof(fields)); | |
1967 numFields = uregex_splitUText(re, fields, 10, &status); | |
1968 TEST_ASSERT_SUCCESS(status); | |
1969 | |
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1971 if(U_SUCCESS(status)) { | |
1972 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x0
0 }; /* 'first ' */ | |
1973 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x
64, 0x00 }; /* ' second' */ | |
1974 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x6
4, 0x00 }; /* ' third' */ | |
1975 TEST_ASSERT(numFields == 3); | |
1976 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
1977 TEST_ASSERT_UTEXT(str_second, fields[1]); | |
1978 TEST_ASSERT_UTEXT(str_third, fields[2]); | |
1979 TEST_ASSERT(fields[3] == NULL); | |
1980 } | |
1981 for(i = 0; i < numFields; i++) { | |
1982 utext_close(fields[i]); | |
1983 } | |
1984 } | |
1985 | |
1986 uregex_close(re); | |
1987 | |
1988 | |
1989 /* Split with too few output strings available */ | |
1990 status = U_ZERO_ERROR; | |
1991 re = uregex_openC(":", 0, NULL, &status); | |
1992 uregex_setText(re, textToSplit, -1, &status); | |
1993 TEST_ASSERT_SUCCESS(status); | |
1994 | |
1995 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
1996 if(U_SUCCESS(status)) { | |
1997 fields[0] = NULL; | |
1998 fields[1] = NULL; | |
1999 fields[2] = &patternText; | |
2000 numFields = uregex_splitUText(re, fields, 2, &status); | |
2001 TEST_ASSERT_SUCCESS(status); | |
2002 | |
2003 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2004 if(U_SUCCESS(status)) { | |
2005 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ | |
2006 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second:
third */ | |
2007 TEST_ASSERT(numFields == 2); | |
2008 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2009 TEST_ASSERT_UTEXT(str_secondthird, fields[1]); | |
2010 TEST_ASSERT(fields[2] == &patternText); | |
2011 } | |
2012 for(i = 0; i < numFields; i++) { | |
2013 utext_close(fields[i]); | |
2014 } | |
2015 } | |
2016 | |
2017 uregex_close(re); | |
2018 } | |
2019 | |
2020 /* splitUText(), part 2. Patterns with capture groups. The capture group t
ext | |
2021 * comes out as additional fields. */ | |
2022 { | |
2023 UChar textToSplit[80]; | |
2024 UText *fields[10]; | |
2025 int32_t numFields; | |
2026 int32_t i; | |
2027 | |
2028 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LEN
GTHOF(textToSplit)); | |
2029 | |
2030 status = U_ZERO_ERROR; | |
2031 re = uregex_openC("<(.*?)>", 0, NULL, &status); | |
2032 | |
2033 uregex_setText(re, textToSplit, -1, &status); | |
2034 TEST_ASSERT_SUCCESS(status); | |
2035 | |
2036 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2037 if(U_SUCCESS(status)) { | |
2038 memset(fields, 0, sizeof(fields)); | |
2039 numFields = uregex_splitUText(re, fields, 10, &status); | |
2040 TEST_ASSERT_SUCCESS(status); | |
2041 | |
2042 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2043 if(U_SUCCESS(status)) { | |
2044 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ | |
2045 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 };
/* tag-a */ | |
2046 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
0x64, 0x00 }; /* second */ | |
2047 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 };
/* tag-b */ | |
2048 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0
x64, 0x00 }; /* third */ | |
2049 | |
2050 TEST_ASSERT(numFields == 5); | |
2051 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2052 TEST_ASSERT_UTEXT(str_taga, fields[1]); | |
2053 TEST_ASSERT_UTEXT(str_second, fields[2]); | |
2054 TEST_ASSERT_UTEXT(str_tagb, fields[3]); | |
2055 TEST_ASSERT_UTEXT(str_third, fields[4]); | |
2056 TEST_ASSERT(fields[5] == NULL); | |
2057 } | |
2058 for(i = 0; i < numFields; i++) { | |
2059 utext_close(fields[i]); | |
2060 } | |
2061 } | |
2062 | |
2063 /* Split with too few output strings available (2) */ | |
2064 status = U_ZERO_ERROR; | |
2065 fields[0] = NULL; | |
2066 fields[1] = NULL; | |
2067 fields[2] = &patternText; | |
2068 numFields = uregex_splitUText(re, fields, 2, &status); | |
2069 TEST_ASSERT_SUCCESS(status); | |
2070 | |
2071 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2072 if(U_SUCCESS(status)) { | |
2073 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ | |
2074 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6
9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ | |
2075 TEST_ASSERT(numFields == 2); | |
2076 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]); | |
2078 TEST_ASSERT(fields[2] == &patternText); | |
2079 } | |
2080 for(i = 0; i < numFields; i++) { | |
2081 utext_close(fields[i]); | |
2082 } | |
2083 | |
2084 | |
2085 /* Split with too few output strings available (3) */ | |
2086 status = U_ZERO_ERROR; | |
2087 fields[0] = NULL; | |
2088 fields[1] = NULL; | |
2089 fields[2] = NULL; | |
2090 fields[3] = &patternText; | |
2091 numFields = uregex_splitUText(re, fields, 3, &status); | |
2092 TEST_ASSERT_SUCCESS(status); | |
2093 | |
2094 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2095 if(U_SUCCESS(status)) { | |
2096 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ | |
2097 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t
ag-a */ | |
2098 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0
x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x6
9, 0x72, 0x64, 0x00 }; /* second<tag-b> third */ | |
2099 TEST_ASSERT(numFields == 3); | |
2100 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2101 TEST_ASSERT_UTEXT(str_taga, fields[1]); | |
2102 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]); | |
2103 TEST_ASSERT(fields[3] == &patternText); | |
2104 } | |
2105 for(i = 0; i < numFields; i++) { | |
2106 utext_close(fields[i]); | |
2107 } | |
2108 | |
2109 /* Split with just enough output strings available (5) */ | |
2110 status = U_ZERO_ERROR; | |
2111 fields[0] = NULL; | |
2112 fields[1] = NULL; | |
2113 fields[2] = NULL; | |
2114 fields[3] = NULL; | |
2115 fields[4] = NULL; | |
2116 fields[5] = &patternText; | |
2117 numFields = uregex_splitUText(re, fields, 5, &status); | |
2118 TEST_ASSERT_SUCCESS(status); | |
2119 | |
2120 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2121 if(U_SUCCESS(status)) { | |
2122 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00
}; /* first */ | |
2123 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* t
ag-a */ | |
2124 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64
, 0x00 }; /* second */ | |
2125 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* t
ag-b */ | |
2126 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64,
0x00 }; /* third */ | |
2127 | |
2128 TEST_ASSERT(numFields == 5); | |
2129 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2130 TEST_ASSERT_UTEXT(str_taga, fields[1]); | |
2131 TEST_ASSERT_UTEXT(str_second, fields[2]); | |
2132 TEST_ASSERT_UTEXT(str_tagb, fields[3]); | |
2133 TEST_ASSERT_UTEXT(str_third, fields[4]); | |
2134 TEST_ASSERT(fields[5] == &patternText); | |
2135 } | |
2136 for(i = 0; i < numFields; i++) { | |
2137 utext_close(fields[i]); | |
2138 } | |
2139 | |
2140 /* Split, end of text is a field delimiter. */ | |
2141 status = U_ZERO_ERROR; | |
2142 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &
status); | |
2143 TEST_ASSERT_SUCCESS(status); | |
2144 | |
2145 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2146 if(U_SUCCESS(status)) { | |
2147 memset(fields, 0, sizeof(fields)); | |
2148 fields[9] = &patternText; | |
2149 numFields = uregex_splitUText(re, fields, 9, &status); | |
2150 TEST_ASSERT_SUCCESS(status); | |
2151 | |
2152 /* The TEST_ASSERT_SUCCESS call above should change too... */ | |
2153 if(U_SUCCESS(status)) { | |
2154 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0
x00 }; /* first */ | |
2155 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 };
/* tag-a */ | |
2156 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e,
0x64, 0x00 }; /* second */ | |
2157 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 };
/* tag-b */ | |
2158 const char str_empty[] = { 0x00 }; | |
2159 | |
2160 TEST_ASSERT(numFields == 5); | |
2161 TEST_ASSERT_UTEXT(str_first, fields[0]); | |
2162 TEST_ASSERT_UTEXT(str_taga, fields[1]); | |
2163 TEST_ASSERT_UTEXT(str_second, fields[2]); | |
2164 TEST_ASSERT_UTEXT(str_tagb, fields[3]); | |
2165 TEST_ASSERT_UTEXT(str_empty, fields[4]); | |
2166 TEST_ASSERT(fields[5] == NULL); | |
2167 TEST_ASSERT(fields[8] == NULL); | |
2168 TEST_ASSERT(fields[9] == &patternText); | |
2169 } | |
2170 for(i = 0; i < numFields; i++) { | |
2171 utext_close(fields[i]); | |
2172 } | |
2173 } | |
2174 | |
2175 uregex_close(re); | |
2176 } | |
2177 utext_close(&patternText); | |
2178 } | |
2179 | |
2180 | |
2181 static void TestRefreshInput(void) { | |
2182 /* | |
2183 * RefreshInput changes out the input of a URegularExpression without | |
2184 * changing anything else in the match state. Used with Java JNI, | |
2185 * when Java moves the underlying string storage. This test | |
2186 * runs a find() loop, moving the text after the first match. | |
2187 * The right number of matches should still be found. | |
2188 */ | |
2189 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ | |
2190 UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; | |
2191 UErrorCode status = U_ZERO_ERROR; | |
2192 URegularExpression *re; | |
2193 UText ut1 = UTEXT_INITIALIZER; | |
2194 UText ut2 = UTEXT_INITIALIZER; | |
2195 | |
2196 re = uregex_openC("[ABC]", 0, 0, &status); | |
2197 TEST_ASSERT_SUCCESS(status); | |
2198 | |
2199 utext_openUChars(&ut1, testStr, -1, &status); | |
2200 TEST_ASSERT_SUCCESS(status); | |
2201 uregex_setUText(re, &ut1, &status); | |
2202 TEST_ASSERT_SUCCESS(status); | |
2203 | |
2204 /* Find the first match "A" in the original string */ | |
2205 TEST_ASSERT(uregex_findNext(re, &status)); | |
2206 TEST_ASSERT(uregex_start(re, 0, &status) == 0); | |
2207 | |
2208 /* Move the string, kill the original string. */ | |
2209 u_strcpy(movedStr, testStr); | |
2210 u_memset(testStr, 0, u_strlen(testStr)); | |
2211 utext_openUChars(&ut2, movedStr, -1, &status); | |
2212 TEST_ASSERT_SUCCESS(status); | |
2213 uregex_refreshUText(re, &ut2, &status); | |
2214 TEST_ASSERT_SUCCESS(status); | |
2215 | |
2216 /* Find the following two matches, now working in the moved string. */ | |
2217 TEST_ASSERT(uregex_findNext(re, &status)); | |
2218 TEST_ASSERT(uregex_start(re, 0, &status) == 2); | |
2219 TEST_ASSERT(uregex_findNext(re, &status)); | |
2220 TEST_ASSERT(uregex_start(re, 0, &status) == 4); | |
2221 TEST_ASSERT(FALSE == uregex_findNext(re, &status)); | |
2222 | |
2223 uregex_close(re); | |
2224 } | |
2225 | |
2226 | |
2227 static void TestBug8421(void) { | |
2228 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to b
e matched | |
2229 * was failing. | |
2230 */ | |
2231 URegularExpression *re; | |
2232 UErrorCode status = U_ZERO_ERROR; | |
2233 int32_t limit = -1; | |
2234 | |
2235 re = uregex_openC("abc", 0, 0, &status); | |
2236 TEST_ASSERT_SUCCESS(status); | |
2237 | |
2238 limit = uregex_getTimeLimit(re, &status); | |
2239 TEST_ASSERT_SUCCESS(status); | |
2240 TEST_ASSERT(limit == 0); | |
2241 | |
2242 uregex_setTimeLimit(re, 100, &status); | |
2243 TEST_ASSERT_SUCCESS(status); | |
2244 limit = uregex_getTimeLimit(re, &status); | |
2245 TEST_ASSERT_SUCCESS(status); | |
2246 TEST_ASSERT(limit == 100); | |
2247 | |
2248 uregex_close(re); | |
2249 } | |
2250 | |
2251 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { | |
2252 return FALSE; | |
2253 } | |
2254 | |
2255 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { | |
2256 return FALSE; | |
2257 } | |
2258 | |
2259 static void TestBug10815() { | |
2260 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER | |
2261 * when the callback function specified by uregex_setMatchCallbac
k() returns FALSE | |
2262 */ | |
2263 URegularExpression *re; | |
2264 UErrorCode status = U_ZERO_ERROR; | |
2265 UChar text[100]; | |
2266 | |
2267 | |
2268 // findNext() with a find progress callback function. | |
2269 | |
2270 re = uregex_openC(".z", 0, 0, &status); | |
2271 TEST_ASSERT_SUCCESS(status); | |
2272 | |
2273 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text)); | |
2274 uregex_setText(re, text, -1, &status); | |
2275 TEST_ASSERT_SUCCESS(status); | |
2276 | |
2277 uregex_setFindProgressCallback(re, FindCallback, NULL, &status); | |
2278 TEST_ASSERT_SUCCESS(status); | |
2279 | |
2280 uregex_findNext(re, &status); | |
2281 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); | |
2282 | |
2283 uregex_close(re); | |
2284 | |
2285 // findNext() with a match progress callback function. | |
2286 | |
2287 status = U_ZERO_ERROR; | |
2288 re = uregex_openC("((xxx)*)*y", 0, 0, &status); | |
2289 TEST_ASSERT_SUCCESS(status); | |
2290 | |
2291 // Pattern + this text gives an exponential time match. Without the callback
to stop the match, | |
2292 // it will appear to be stuck in a (near) infinite loop. | |
2293 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text)); | |
2294 uregex_setText(re, text, -1, &status); | |
2295 TEST_ASSERT_SUCCESS(status); | |
2296 | |
2297 uregex_setMatchCallback(re, MatchCallback, NULL, &status); | |
2298 TEST_ASSERT_SUCCESS(status); | |
2299 | |
2300 uregex_findNext(re, &status); | |
2301 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); | |
2302 | |
2303 uregex_close(re); | |
2304 } | |
2305 | |
2306 | |
2307 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
OLD | NEW |