OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * Copyright (c) 1999-2014, International Business Machines | |
3 * Corporation and others. All Rights Reserved. | |
4 ******************************************************************** | |
5 * Date Name Description | |
6 * 12/14/99 Madhu Creation. | |
7 * 01/12/2000 Madhu updated for changed API | |
8 ********************************************************************/ | |
9 | |
10 #include "unicode/utypes.h" | |
11 | |
12 #if !UCONFIG_NO_BREAK_ITERATION | |
13 | |
14 #include "unicode/uchar.h" | |
15 #include "intltest.h" | |
16 #include "unicode/rbbi.h" | |
17 #include "unicode/schriter.h" | |
18 #include "rbbiapts.h" | |
19 #include "rbbidata.h" | |
20 #include "cstring.h" | |
21 #include "ubrkimpl.h" | |
22 #include "unicode/locid.h" | |
23 #include "unicode/ustring.h" | |
24 #include "unicode/utext.h" | |
25 #include "cmemory.h" | |
26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING | |
27 #include "unicode/filteredbrk.h" | |
28 #include <stdio.h> // for sprintf | |
29 #endif | |
30 /** | |
31 * API Test the RuleBasedBreakIterator class | |
32 */ | |
33 | |
34 | |
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ | |
36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_error
Name(status));}} | |
37 | |
38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ | |
39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LI
NE__, #expr);};} | |
40 | |
41 void RBBIAPITest::TestCloneEquals() | |
42 { | |
43 | |
44 UErrorCode status=U_ZERO_ERROR; | |
45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); | |
49 if(U_FAILURE(status)){ | |
50 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
51 return; | |
52 } | |
53 | |
54 | |
55 UnicodeString testString="Testing word break iterators's clone() and equals(
)"; | |
56 bi1->setText(testString); | |
57 bi2->setText(testString); | |
58 biequal->setText(testString); | |
59 | |
60 bi3->setText("hello"); | |
61 | |
62 logln((UnicodeString)"Testing equals()"); | |
63 | |
64 logln((UnicodeString)"Testing == and !="); | |
65 UBool b = (*bi1 != *biequal); | |
66 b |= *bi1 == *bi2; | |
67 b |= *bi1 == *bi3; | |
68 if (b) { | |
69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
70 } | |
71 | |
72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
74 | |
75 | |
76 // Quick test of RulesBasedBreakIterator assignment - | |
77 // Check that | |
78 // two different iterators are != | |
79 // they are == after assignment | |
80 // source and dest iterator produce the same next() after assignment. | |
81 // deleting one doesn't disable the other. | |
82 logln("Testing assignment"); | |
83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::creat
eLineInstance(Locale::getDefault(), status); | |
84 if(U_FAILURE(status)){ | |
85 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
86 return; | |
87 } | |
88 | |
89 RuleBasedBreakIterator biDefault, biDefault2; | |
90 if(U_FAILURE(status)){ | |
91 errln((UnicodeString)"FAIL : in construction of default iterator"); | |
92 return; | |
93 } | |
94 if (biDefault == *bix) { | |
95 errln((UnicodeString)"ERROR: iterators should not compare =="); | |
96 return; | |
97 } | |
98 if (biDefault != biDefault2) { | |
99 errln((UnicodeString)"ERROR: iterators should compare =="); | |
100 return; | |
101 } | |
102 | |
103 | |
104 UnicodeString HelloString("Hello Kitty"); | |
105 bix->setText(HelloString); | |
106 if (*bix == *bi2) { | |
107 errln(UnicodeString("ERROR: strings should not be equal before assignmen
t.")); | |
108 } | |
109 *bix = *bi2; | |
110 if (*bix != *bi2) { | |
111 errln(UnicodeString("ERROR: strings should be equal before assignment.")
); | |
112 } | |
113 | |
114 int bixnext = bix->next(); | |
115 int bi2next = bi2->next(); | |
116 if (! (bixnext == bi2next && bixnext == 7)) { | |
117 errln(UnicodeString("ERROR: iterators behaved differently after assignme
nt.")); | |
118 } | |
119 delete bix; | |
120 if (bi2->next() != 8) { | |
121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."
)); | |
122 } | |
123 | |
124 | |
125 | |
126 logln((UnicodeString)"Testing clone()"); | |
127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
129 | |
130 if(*bi1clone != *bi1 || *bi1clone != *biequal || | |
131 *bi1clone == *bi3 || *bi1clone == *bi2) | |
132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
133 | |
134 if(*bi2clone == *bi1 || *bi2clone == *biequal || | |
135 *bi2clone == *bi3 || *bi2clone != *bi2) | |
136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
137 | |
138 if(bi1->getText() != bi1clone->getText() || | |
139 bi2clone->getText() != bi2->getText() || | |
140 *bi2clone == *bi1clone ) | |
141 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
142 | |
143 delete bi1clone; | |
144 delete bi2clone; | |
145 delete bi1; | |
146 delete bi3; | |
147 delete bi2; | |
148 delete biequal; | |
149 } | |
150 | |
151 void RBBIAPITest::TestBoilerPlate() | |
152 { | |
153 UErrorCode status = U_ZERO_ERROR; | |
154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); | |
155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status)
; | |
156 if (U_FAILURE(status)) { | |
157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s
tatus)); | |
158 return; | |
159 } | |
160 if(*a!=*b){ | |
161 errln("Failed: boilerplate method operator!= does not return correct res
ults"); | |
162 } | |
163 // Japanese word break iterators are identical to root with | |
164 // a dictionary-based break iterator | |
165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),statu
s); | |
166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),sta
tus); | |
167 if(c && d){ | |
168 if(*c!=*d){ | |
169 errln("Failed: boilerplate method operator== does not return correct
results"); | |
170 } | |
171 }else{ | |
172 errln("creation of break iterator failed"); | |
173 } | |
174 delete a; | |
175 delete b; | |
176 delete c; | |
177 delete d; | |
178 } | |
179 | |
180 void RBBIAPITest::TestgetRules() | |
181 { | |
182 UErrorCode status=U_ZERO_ERROR; | |
183 | |
184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createCharacterInstance(Locale::getDefault(), status); | |
185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createWordInstance(Locale::getDefault(), status); | |
186 if(U_FAILURE(status)){ | |
187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); | |
188 delete bi1; | |
189 delete bi2; | |
190 return; | |
191 } | |
192 | |
193 | |
194 | |
195 logln((UnicodeString)"Testing toString()"); | |
196 | |
197 bi1->setText((UnicodeString)"Hello there"); | |
198 | |
199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
200 | |
201 UnicodeString temp=bi1->getRules(); | |
202 UnicodeString temp2=bi2->getRules(); | |
203 UnicodeString temp3=bi3->getRules(); | |
204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(tem
p3) != 0) | |
205 errln((UnicodeString)"ERROR: error in getRules() method"); | |
206 | |
207 delete bi1; | |
208 delete bi2; | |
209 delete bi3; | |
210 } | |
211 void RBBIAPITest::TestHashCode() | |
212 { | |
213 UErrorCode status=U_ZERO_ERROR; | |
214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); | |
217 if(U_FAILURE(status)){ | |
218 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
219 delete bi1; | |
220 delete bi2; | |
221 delete bi3; | |
222 return; | |
223 } | |
224 | |
225 | |
226 logln((UnicodeString)"Testing hashCode()"); | |
227 | |
228 bi1->setText((UnicodeString)"Hash code"); | |
229 bi2->setText((UnicodeString)"Hash code"); | |
230 bi3->setText((UnicodeString)"Hash code"); | |
231 | |
232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
234 | |
235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashC
ode() || | |
236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->
hashCode()) | |
237 errln((UnicodeString)"ERROR: identical objects have different hashcodes"
); | |
238 | |
239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode()
|| | |
240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() ==
bi2->hashCode()) | |
241 errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
242 | |
243 delete bi1clone; | |
244 delete bi2clone; | |
245 delete bi1; | |
246 delete bi2; | |
247 delete bi3; | |
248 | |
249 } | |
250 void RBBIAPITest::TestGetSetAdoptText() | |
251 { | |
252 logln((UnicodeString)"Testing getText setText "); | |
253 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); | |
254 UnicodeString str1="first string."; | |
255 UnicodeString str2="Second string."; | |
256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); | |
257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createWordInstance(Locale::getDefault(), status)); | |
258 if(status.isFailure()){ | |
259 errcheckln(status, "Fail : in construction - %s", status.errorName()); | |
260 return; | |
261 } | |
262 | |
263 | |
264 CharacterIterator* text1= new StringCharacterIterator(str1); | |
265 CharacterIterator* text1Clone = text1->clone(); | |
266 CharacterIterator* text2= new StringCharacterIterator(str2); | |
267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "
ond str" | |
268 | |
269 wordIter1->setText(str1); | |
270 CharacterIterator *tci = &wordIter1->getText(); | |
271 UnicodeString tstr; | |
272 tci->getText(tstr); | |
273 TEST_ASSERT(tstr == str1); | |
274 if(wordIter1->current() != 0) | |
275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position
to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\
n"); | |
276 | |
277 wordIter1->next(2); | |
278 | |
279 wordIter1->setText(str2); | |
280 if(wordIter1->current() != 0) | |
281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration positi
on to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)
"\n"); | |
282 | |
283 | |
284 charIter1->adoptText(text1Clone); | |
285 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); | |
286 tci = &wordIter1->getText(); | |
287 tci->getText(tstr); | |
288 TEST_ASSERT(tstr == str2); | |
289 tci = &charIter1->getText(); | |
290 tci->getText(tstr); | |
291 TEST_ASSERT(tstr == str1); | |
292 | |
293 | |
294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->
clone()); | |
295 rb->adoptText(text1); | |
296 if(rb->getText() != *text1) | |
297 errln((UnicodeString)"ERROR:1 error in adoptText "); | |
298 rb->adoptText(text2); | |
299 if(rb->getText() != *text2) | |
300 errln((UnicodeString)"ERROR:2 error in adoptText "); | |
301 | |
302 // Adopt where iterator range is less than the entire orignal source string. | |
303 // (With the change of the break engine to working with UText internally, | |
304 // CharacterIterators starting at positions other than zero are not suppo
rted) | |
305 rb->adoptText(text3); | |
306 TEST_ASSERT(rb->preceding(2) == 0); | |
307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); | |
308 //if(rb->preceding(2) != 3) { | |
309 // errln((UnicodeString)"ERROR:3 error in adoptText "); | |
310 //} | |
311 //if(rb->following(11) != BreakIterator::DONE) { | |
312 // errln((UnicodeString)"ERROR:4 error in adoptText "); | |
313 //} | |
314 | |
315 // UText API | |
316 // | |
317 // Quick test to see if UText is working at all. | |
318 // | |
319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello w
orld" in UTF-8 */ | |
320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ | |
321 // 012345678901 | |
322 | |
323 status.reset(); | |
324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); | |
325 wordIter1->setText(ut.getAlias(), status); | |
326 TEST_ASSERT_SUCCESS(status); | |
327 | |
328 int32_t pos; | |
329 pos = wordIter1->first(); | |
330 TEST_ASSERT(pos==0); | |
331 pos = wordIter1->next(); | |
332 TEST_ASSERT(pos==5); | |
333 pos = wordIter1->next(); | |
334 TEST_ASSERT(pos==6); | |
335 pos = wordIter1->next(); | |
336 TEST_ASSERT(pos==11); | |
337 pos = wordIter1->next(); | |
338 TEST_ASSERT(pos==UBRK_DONE); | |
339 | |
340 status.reset(); | |
341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); | |
342 TEST_ASSERT_SUCCESS(status); | |
343 wordIter1->setText(ut2.getAlias(), status); | |
344 TEST_ASSERT_SUCCESS(status); | |
345 | |
346 pos = wordIter1->first(); | |
347 TEST_ASSERT(pos==0); | |
348 pos = wordIter1->next(); | |
349 TEST_ASSERT(pos==3); | |
350 pos = wordIter1->next(); | |
351 TEST_ASSERT(pos==4); | |
352 | |
353 pos = wordIter1->last(); | |
354 TEST_ASSERT(pos==6); | |
355 pos = wordIter1->previous(); | |
356 TEST_ASSERT(pos==4); | |
357 pos = wordIter1->previous(); | |
358 TEST_ASSERT(pos==3); | |
359 pos = wordIter1->previous(); | |
360 TEST_ASSERT(pos==0); | |
361 pos = wordIter1->previous(); | |
362 TEST_ASSERT(pos==UBRK_DONE); | |
363 | |
364 status.reset(); | |
365 UnicodeString sEmpty; | |
366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); | |
367 wordIter1->getUText(gut2.getAlias(), status); | |
368 TEST_ASSERT_SUCCESS(status); | |
369 status.reset(); | |
370 } | |
371 | |
372 | |
373 void RBBIAPITest::TestIteration() | |
374 { | |
375 // This test just verifies that the API is present. | |
376 // Testing for correct operation of the break rules happens elsewhere. | |
377 | |
378 UErrorCode status=U_ZERO_ERROR; | |
379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterato
r::createCharacterInstance(Locale::getDefault(), status); | |
380 if (U_FAILURE(status) || bi == NULL) { | |
381 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); | |
382 } | |
383 delete bi; | |
384 | |
385 status=U_ZERO_ERROR; | |
386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Lo
cale::getDefault(), status); | |
387 if (U_FAILURE(status) || bi == NULL) { | |
388 errcheckln(status, "Failure creating Word break iterator. Status = %s",
u_errorName(status)); | |
389 } | |
390 delete bi; | |
391 | |
392 status=U_ZERO_ERROR; | |
393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Lo
cale::getDefault(), status); | |
394 if (U_FAILURE(status) || bi == NULL) { | |
395 errcheckln(status, "Failure creating Line break iterator. Status = %s",
u_errorName(status)); | |
396 } | |
397 delete bi; | |
398 | |
399 status=U_ZERO_ERROR; | |
400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstanc
e(Locale::getDefault(), status); | |
401 if (U_FAILURE(status) || bi == NULL) { | |
402 errcheckln(status, "Failure creating Sentence break iterator. Status =
%s", u_errorName(status)); | |
403 } | |
404 delete bi; | |
405 | |
406 status=U_ZERO_ERROR; | |
407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(L
ocale::getDefault(), status); | |
408 if (U_FAILURE(status) || bi == NULL) { | |
409 errcheckln(status, "Failure creating Title break iterator. Status = %s"
, u_errorName(status)); | |
410 } | |
411 delete bi; | |
412 | |
413 status=U_ZERO_ERROR; | |
414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstan
ce(Locale::getDefault(), status); | |
415 if (U_FAILURE(status) || bi == NULL) { | |
416 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); | |
417 return; // Skip the rest of these tests. | |
418 } | |
419 | |
420 | |
421 UnicodeString testString="0123456789"; | |
422 bi->setText(testString); | |
423 | |
424 int32_t i; | |
425 i = bi->first(); | |
426 if (i != 0) { | |
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
428 } | |
429 | |
430 i = bi->last(); | |
431 if (i != 10) { | |
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
433 } | |
434 | |
435 // | |
436 // Previous | |
437 // | |
438 bi->last(); | |
439 i = bi->previous(); | |
440 if (i != 9) { | |
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d",
__LINE__, i); | |
442 } | |
443 | |
444 | |
445 bi->first(); | |
446 i = bi->previous(); | |
447 if (i != BreakIterator::DONE) { | |
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, g
ot %d", __LINE__, i); | |
449 } | |
450 | |
451 // | |
452 // next() | |
453 // | |
454 bi->first(); | |
455 i = bi->next(); | |
456 if (i != 1) { | |
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d",
__LINE__, i); | |
458 } | |
459 | |
460 bi->last(); | |
461 i = bi->next(); | |
462 if (i != BreakIterator::DONE) { | |
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %
d", __LINE__, i); | |
464 } | |
465 | |
466 | |
467 // | |
468 // current() | |
469 // | |
470 bi->first(); | |
471 i = bi->current(); | |
472 if (i != 0) { | |
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); | |
474 } | |
475 | |
476 bi->next(); | |
477 i = bi->current(); | |
478 if (i != 1) { | |
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got
%d", __LINE__, i); | |
480 } | |
481 | |
482 bi->last(); | |
483 bi->next(); | |
484 i = bi->current(); | |
485 if (i != 10) { | |
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got
%d", __LINE__, i); | |
487 } | |
488 | |
489 bi->first(); | |
490 bi->previous(); | |
491 i = bi->current(); | |
492 if (i != 0) { | |
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); | |
494 } | |
495 | |
496 | |
497 // | |
498 // Following() | |
499 // | |
500 i = bi->following(4); | |
501 if (i != 5) { | |
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got
%d", __LINE__, i); | |
503 } | |
504 | |
505 i = bi->following(9); | |
506 if (i != 10) { | |
507 errln("Incorrect value from bi->following() at line %d. Expected 10, go
t %d", __LINE__, i); | |
508 } | |
509 | |
510 i = bi->following(10); | |
511 if (i != BreakIterator::DONE) { | |
512 errln("Incorrect value from bi->following() at line %d. Expected DONE,
got %d", __LINE__, i); | |
513 } | |
514 | |
515 | |
516 // | |
517 // Preceding | |
518 // | |
519 i = bi->preceding(4); | |
520 if (i != 3) { | |
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got
%d", __LINE__, i); | |
522 } | |
523 | |
524 i = bi->preceding(10); | |
525 if (i != 9) { | |
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got
%d", __LINE__, i); | |
527 } | |
528 | |
529 i = bi->preceding(1); | |
530 if (i != 0) { | |
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got
%d", __LINE__, i); | |
532 } | |
533 | |
534 i = bi->preceding(0); | |
535 if (i != BreakIterator::DONE) { | |
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE,
got %d", __LINE__, i); | |
537 } | |
538 | |
539 | |
540 // | |
541 // isBoundary() | |
542 // | |
543 bi->first(); | |
544 if (bi->isBoundary(3) != TRUE) { | |
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE,
got FALSE", __LINE__, i); | |
546 } | |
547 i = bi->current(); | |
548 if (i != 3) { | |
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %
d", __LINE__, i); | |
550 } | |
551 | |
552 | |
553 if (bi->isBoundary(11) != FALSE) { | |
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE,
got TRUE", __LINE__, i); | |
555 } | |
556 i = bi->current(); | |
557 if (i != 10) { | |
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got
%d", __LINE__, i); | |
559 } | |
560 | |
561 // | |
562 // next(n) | |
563 // | |
564 bi->first(); | |
565 i = bi->next(4); | |
566 if (i != 4) { | |
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d",
__LINE__, i); | |
568 } | |
569 | |
570 i = bi->next(6); | |
571 if (i != 10) { | |
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d"
, __LINE__, i); | |
573 } | |
574 | |
575 bi->first(); | |
576 i = bi->next(11); | |
577 if (i != BreakIterator::DONE) { | |
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterat
or::DONE, got %d", __LINE__, i); | |
579 } | |
580 | |
581 delete bi; | |
582 | |
583 } | |
584 | |
585 | |
586 | |
587 | |
588 | |
589 | |
590 void RBBIAPITest::TestBuilder() { | |
591 UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
592 "$Numbers = [:N:];\n" | |
593 "$Letters+;\n" | |
594 "$Numbers+;\n" | |
595 "[^$Letters $Numbers];\n" | |
596 "!.*;\n"; | |
597 UnicodeString testString1 = "abc123..abc"; | |
598 // 01234567890 | |
599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
600 UErrorCode status=U_ZERO_ERROR; | |
601 UParseError parseError; | |
602 | |
603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
604 if(U_FAILURE(status)) { | |
605 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
606 } else { | |
607 bi->setText(testString1); | |
608 doBoundaryTest(*bi, testString1, bounds1); | |
609 } | |
610 delete bi; | |
611 } | |
612 | |
613 | |
614 // | |
615 // TestQuoteGrouping | |
616 // Single quotes within rules imply a grouping, so that a modifier | |
617 // following the quoted text (* or +) applies to all of the quoted chars. | |
618 // | |
619 void RBBIAPITest::TestQuoteGrouping() { | |
620 UnicodeString rulesString1 = "#Here comes the rule...\n" | |
621 "'$@!'*;\n" // (\$\@\!)* | |
622 ".;\n"; | |
623 | |
624 UnicodeString testString1 = "$@!$@!X$@!!X"; | |
625 // 0123456789012 | |
626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
627 UErrorCode status=U_ZERO_ERROR; | |
628 UParseError parseError; | |
629 | |
630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
631 if(U_FAILURE(status)) { | |
632 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
633 } else { | |
634 bi->setText(testString1); | |
635 doBoundaryTest(*bi, testString1, bounds1); | |
636 } | |
637 delete bi; | |
638 } | |
639 | |
640 // | |
641 // TestRuleStatus | |
642 // Test word break rule status constants. | |
643 // | |
644 void RBBIAPITest::TestRuleStatus() { | |
645 UChar str[30]; | |
646 //no longer test Han or hiragana breaking here: ruleStatusVec would return
nothing | |
647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO | |
648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", | |
649 // 012345678901234567 8 9 0 | |
650 // Katakana | |
651 str, 30); | |
652 UnicodeString testString1(str); | |
653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; | |
654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE,
UBRK_WORD_LETTER, | |
655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
656 UBRK_WORD_IDEO, UBRK_WORD_NONE}; | |
657 | |
658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WOR
D_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WOR
D_NONE_LIMIT, | |
660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; | |
661 | |
662 UErrorCode status=U_ZERO_ERROR; | |
663 | |
664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(),
status); | |
665 if(U_FAILURE(status)) { | |
666 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
667 } else { | |
668 bi->setText(testString1); | |
669 // First test that the breaks are in the right spots. | |
670 doBoundaryTest(*bi, testString1, bounds1); | |
671 | |
672 // Then go back and check tag values | |
673 int32_t i = 0; | |
674 int32_t pos, tag; | |
675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i
++) { | |
676 if (pos != bounds1[i]) { | |
677 errln("FAIL: unexpected word break at postion %d", pos); | |
678 break; | |
679 } | |
680 tag = bi->getRuleStatus(); | |
681 if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
682 errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
683 break; | |
684 } | |
685 | |
686 // Check that we get the same tag values from getRuleStatusVec() | |
687 int32_t vec[10]; | |
688 int t = bi->getRuleStatusVec(vec, 10, status); | |
689 TEST_ASSERT_SUCCESS(status); | |
690 TEST_ASSERT(t==1); | |
691 TEST_ASSERT(vec[0] == tag); | |
692 } | |
693 } | |
694 delete bi; | |
695 | |
696 // Now test line break status. This test mostly is to confirm that the sta
tus constants | |
697 // are correctly declared in the header. | |
698 testString1 = "test line. \n"; | |
699 // break type s s h | |
700 | |
701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
702 if(U_FAILURE(status)) { | |
703 errcheckln(status, "failed to create word break iterator. - %s", u_erro
rName(status)); | |
704 } else { | |
705 int32_t i = 0; | |
706 int32_t pos, tag; | |
707 UBool success; | |
708 | |
709 bi->setText(testString1); | |
710 pos = bi->current(); | |
711 tag = bi->getRuleStatus(); | |
712 for (i=0; i<3; i++) { | |
713 switch (i) { | |
714 case 0: | |
715 success = pos==0 && tag==UBRK_LINE_SOFT; break; | |
716 case 1: | |
717 success = pos==5 && tag==UBRK_LINE_SOFT; break; | |
718 case 2: | |
719 success = pos==12 && tag==UBRK_LINE_HARD; break; | |
720 default: | |
721 success = FALSE; break; | |
722 } | |
723 if (success == FALSE) { | |
724 errln("Fail: incorrect word break status or position. i=%d, po
s=%d, tag=%d", | |
725 i, pos, tag); | |
726 break; | |
727 } | |
728 pos = bi->next(); | |
729 tag = bi->getRuleStatus(); | |
730 } | |
731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || | |
732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || | |
733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT
_LIMIT)) { | |
734 errln("UBRK_LINE_* constants from header are inconsistent."); | |
735 } | |
736 } | |
737 delete bi; | |
738 | |
739 } | |
740 | |
741 | |
742 // | |
743 // TestRuleStatusVec | |
744 // Test the vector form of break rule status. | |
745 // | |
746 void RBBIAPITest::TestRuleStatusVec() { | |
747 UnicodeString rulesString( "[A-N]{100}; \n" | |
748 "[a-w]{200}; \n" | |
749 "[\\p{L}]{300}; \n" | |
750 "[\\p{N}]{400}; \n" | |
751 "[0-5]{500}; \n" | |
752 "!.*;\n", -1, US_INV); | |
753 UnicodeString testString1 = "Aapz5?"; | |
754 int32_t statusVals[10]; | |
755 int32_t numStatuses; | |
756 int32_t pos; | |
757 | |
758 UErrorCode status=U_ZERO_ERROR; | |
759 UParseError parseError; | |
760 | |
761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseE
rror, status); | |
762 if (U_FAILURE(status)) { | |
763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__
, u_errorName(status)); | |
764 } else { | |
765 bi->setText(testString1); | |
766 | |
767 // A | |
768 pos = bi->next(); | |
769 TEST_ASSERT(pos==1); | |
770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
771 TEST_ASSERT_SUCCESS(status); | |
772 TEST_ASSERT(numStatuses == 2); | |
773 TEST_ASSERT(statusVals[0] == 100); | |
774 TEST_ASSERT(statusVals[1] == 300); | |
775 | |
776 // a | |
777 pos = bi->next(); | |
778 TEST_ASSERT(pos==2); | |
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
780 TEST_ASSERT_SUCCESS(status); | |
781 TEST_ASSERT(numStatuses == 2); | |
782 TEST_ASSERT(statusVals[0] == 200); | |
783 TEST_ASSERT(statusVals[1] == 300); | |
784 | |
785 // p | |
786 pos = bi->next(); | |
787 TEST_ASSERT(pos==3); | |
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
789 TEST_ASSERT_SUCCESS(status); | |
790 TEST_ASSERT(numStatuses == 2); | |
791 TEST_ASSERT(statusVals[0] == 200); | |
792 TEST_ASSERT(statusVals[1] == 300); | |
793 | |
794 // z | |
795 pos = bi->next(); | |
796 TEST_ASSERT(pos==4); | |
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
798 TEST_ASSERT_SUCCESS(status); | |
799 TEST_ASSERT(numStatuses == 1); | |
800 TEST_ASSERT(statusVals[0] == 300); | |
801 | |
802 // 5 | |
803 pos = bi->next(); | |
804 TEST_ASSERT(pos==5); | |
805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
806 TEST_ASSERT_SUCCESS(status); | |
807 TEST_ASSERT(numStatuses == 2); | |
808 TEST_ASSERT(statusVals[0] == 400); | |
809 TEST_ASSERT(statusVals[1] == 500); | |
810 | |
811 // ? | |
812 pos = bi->next(); | |
813 TEST_ASSERT(pos==6); | |
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
815 TEST_ASSERT_SUCCESS(status); | |
816 TEST_ASSERT(numStatuses == 1); | |
817 TEST_ASSERT(statusVals[0] == 0); | |
818 | |
819 // | |
820 // Check buffer overflow error handling. Char == A | |
821 // | |
822 bi->first(); | |
823 pos = bi->next(); | |
824 TEST_ASSERT(pos==1); | |
825 memset(statusVals, -1, sizeof(statusVals)); | |
826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); | |
827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
828 TEST_ASSERT(numStatuses == 2); | |
829 TEST_ASSERT(statusVals[0] == -1); | |
830 | |
831 status = U_ZERO_ERROR; | |
832 memset(statusVals, -1, sizeof(statusVals)); | |
833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); | |
834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
835 TEST_ASSERT(numStatuses == 2); | |
836 TEST_ASSERT(statusVals[0] == 100); | |
837 TEST_ASSERT(statusVals[1] == -1); | |
838 | |
839 status = U_ZERO_ERROR; | |
840 memset(statusVals, -1, sizeof(statusVals)); | |
841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); | |
842 TEST_ASSERT_SUCCESS(status); | |
843 TEST_ASSERT(numStatuses == 2); | |
844 TEST_ASSERT(statusVals[0] == 100); | |
845 TEST_ASSERT(statusVals[1] == 300); | |
846 TEST_ASSERT(statusVals[2] == -1); | |
847 } | |
848 delete bi; | |
849 | |
850 } | |
851 | |
852 // | |
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
854 // $variable reference | |
855 void RBBIAPITest::TestBug2190() { | |
856 UnicodeString rulesString1 = "$aaa = abcd;\n" | |
857 "$bbb = $aaa;\n" | |
858 "$bbb;\n"; | |
859 UnicodeString testString1 = "abcdabcd"; | |
860 // 01234567890 | |
861 int32_t bounds1[] = {0, 4, 8}; | |
862 UErrorCode status=U_ZERO_ERROR; | |
863 UParseError parseError; | |
864 | |
865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
866 if(U_FAILURE(status)) { | |
867 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
868 } else { | |
869 bi->setText(testString1); | |
870 doBoundaryTest(*bi, testString1, bounds1); | |
871 } | |
872 delete bi; | |
873 } | |
874 | |
875 | |
876 void RBBIAPITest::TestRegistration() { | |
877 #if !UCONFIG_NO_SERVICE | |
878 UErrorCode status = U_ZERO_ERROR; | |
879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); | |
880 // ok to not delete these if we exit because of error? | |
881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", sta
tus); | |
882 BreakIterator* root_word = BreakIterator::createWordInstance("", status); | |
883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status
); | |
884 | |
885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { | |
886 dataerrln("Error creating instances of break interactors - %s", u_errorN
ame(status)); | |
887 | |
888 delete ja_word; | |
889 delete ja_char; | |
890 delete root_word; | |
891 delete root_char; | |
892 | |
893 return; | |
894 } | |
895 | |
896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD,
status); | |
897 { | |
898 #if 0 // With a dictionary based word breaking, ja_word is identical to root. | |
899 if (ja_word && *ja_word == *root_word) { | |
900 errln("japan not different from root"); | |
901 } | |
902 #endif | |
903 } | |
904 | |
905 { | |
906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", statu
s); | |
907 UBool fail = TRUE; | |
908 if(result){ | |
909 fail = *result != *ja_word; | |
910 } | |
911 delete result; | |
912 if (fail) { | |
913 errln("bad result for xx_XX/word"); | |
914 } | |
915 } | |
916 | |
917 { | |
918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP",
status); | |
919 UBool fail = TRUE; | |
920 if(result){ | |
921 fail = *result != *ja_char; | |
922 } | |
923 delete result; | |
924 if (fail) { | |
925 errln("bad result for ja_JP/char"); | |
926 } | |
927 } | |
928 | |
929 { | |
930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX",
status); | |
931 UBool fail = TRUE; | |
932 if(result){ | |
933 fail = *result != *root_char; | |
934 } | |
935 delete result; | |
936 if (fail) { | |
937 errln("bad result for xx_XX/char"); | |
938 } | |
939 } | |
940 | |
941 { | |
942 StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
943 UBool found = FALSE; | |
944 const UnicodeString* p; | |
945 while ((p = avail->snext(status))) { | |
946 if (p->compare("xx") == 0) { | |
947 found = TRUE; | |
948 break; | |
949 } | |
950 } | |
951 delete avail; | |
952 if (!found) { | |
953 errln("did not find test locale"); | |
954 } | |
955 } | |
956 | |
957 { | |
958 UBool unreg = BreakIterator::unregister(key, status); | |
959 if (!unreg) { | |
960 errln("unable to unregister"); | |
961 } | |
962 } | |
963 | |
964 { | |
965 BreakIterator* result = BreakIterator::createWordInstance("en_US", statu
s); | |
966 BreakIterator* root = BreakIterator::createWordInstance("", status); | |
967 UBool fail = TRUE; | |
968 if(root){ | |
969 fail = *root != *result; | |
970 } | |
971 delete root; | |
972 delete result; | |
973 if (fail) { | |
974 errln("did not get root break"); | |
975 } | |
976 } | |
977 | |
978 { | |
979 StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
980 UBool found = FALSE; | |
981 const UnicodeString* p; | |
982 while ((p = avail->snext(status))) { | |
983 if (p->compare("xx") == 0) { | |
984 found = TRUE; | |
985 break; | |
986 } | |
987 } | |
988 delete avail; | |
989 if (found) { | |
990 errln("found test locale"); | |
991 } | |
992 } | |
993 | |
994 { | |
995 int32_t count; | |
996 UBool foundLocale = FALSE; | |
997 const Locale *avail = BreakIterator::getAvailableLocales(count); | |
998 for (int i=0; i<count; i++) { | |
999 if (avail[i] == Locale::getEnglish()) { | |
1000 foundLocale = TRUE; | |
1001 break; | |
1002 } | |
1003 } | |
1004 if (foundLocale == FALSE) { | |
1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN
."); | |
1006 } | |
1007 } | |
1008 | |
1009 | |
1010 // ja_word was adopted by factory | |
1011 delete ja_char; | |
1012 delete root_word; | |
1013 delete root_char; | |
1014 #endif | |
1015 } | |
1016 | |
1017 void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
1018 UErrorCode status = U_ZERO_ERROR; | |
1019 UParseError parseError; | |
1020 parseError.line = 0; | |
1021 parseError.offset = 0; | |
1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &
status)); | |
1023 uint32_t length; | |
1024 const UChar *builtSource; | |
1025 const uint8_t *rbbiRules; | |
1026 const uint8_t *builtRules; | |
1027 | |
1028 if (U_FAILURE(status)) { | |
1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(statu
s)); | |
1030 return; | |
1031 } | |
1032 | |
1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); | |
1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fR
uleSource); | |
1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, par
seError, status); | |
1036 if (U_FAILURE(status)) { | |
1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, colum
n %d\n", | |
1038 u_errorName(status), parseError.line, parseError.offset); | |
1039 return; | |
1040 }; | |
1041 rbbiRules = brkItr->getBinaryRules(length); | |
1042 logln("Comparing \"%s\" len=%d", dataFile, length); | |
1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
1044 errln("Built rules and rebuilt rules are different %s", dataFile); | |
1045 return; | |
1046 } | |
1047 delete brkItr; | |
1048 } | |
1049 | |
1050 void RBBIAPITest::TestRoundtripRules() { | |
1051 RoundtripRule("word"); | |
1052 RoundtripRule("title"); | |
1053 RoundtripRule("sent"); | |
1054 RoundtripRule("line"); | |
1055 RoundtripRule("char"); | |
1056 if (!quick) { | |
1057 RoundtripRule("word_POSIX"); | |
1058 } | |
1059 } | |
1060 | |
1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* | |
1062 // (these are protected so we access them via a local class RBBIWithProtectedFun
ctions). | |
1063 // This is just a sanity check, not a thorough test (e.g. we don't check that th
e | |
1064 // first delete actually frees rulesCopy). | |
1065 void RBBIAPITest::TestCreateFromRBBIData() { | |
1066 // Get some handy RBBIData | |
1067 const char *brkName = "word"; // or "sent", "line", "char", etc. | |
1068 UErrorCode status = U_ZERO_ERROR; | |
1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &s
tatus)); | |
1070 if ( U_SUCCESS(status) ) { | |
1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMem
ory(data.getAlias()); | |
1072 uint32_t length = builtRules->fLength; | |
1073 RBBIWithProtectedFunctions * brkItr; | |
1074 | |
1075 // Try the memory-adopting constructor, need to copy the data first | |
1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); | |
1077 if ( rulesCopy ) { | |
1078 uprv_memcpy( rulesCopy, builtRules, length ); | |
1079 | |
1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); | |
1081 if ( U_SUCCESS(status) ) { | |
1082 delete brkItr; // this should free rulesCopy | |
1083 } else { | |
1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): IC
U Error \"%s\"\n", u_errorName(status) ); | |
1085 status = U_ZERO_ERROR;// reset for the next test | |
1086 uprv_free( rulesCopy ); | |
1087 } | |
1088 } | |
1089 | |
1090 // Now try the non-adopting constructor | |
1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFun
ctions::kDontAdopt, status); | |
1092 if ( U_SUCCESS(status) ) { | |
1093 delete brkItr; // this should NOT attempt to free builtRules | |
1094 if (builtRules->fLength != length) { // sanity check | |
1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted)
: delete affects data\n" ); | |
1096 } | |
1097 } else { | |
1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): IC
U Error \"%s\"\n", u_errorName(status) ); | |
1099 } | |
1100 } | |
1101 | |
1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) | |
1103 // | |
1104 status = U_ZERO_ERROR; | |
1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::create
WordInstance(Locale::getEnglish(), status); | |
1106 if (rb == NULL || U_FAILURE(status)) { | |
1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::g
etEnglish) - %s", u_errorName(status)); | |
1108 } else { | |
1109 uint32_t length; | |
1110 const uint8_t *rules = rb->getBinaryRules(length); | |
1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length,
status); | |
1112 TEST_ASSERT_SUCCESS(status); | |
1113 TEST_ASSERT(*rb == *rb2); | |
1114 UnicodeString words = "one two three "; | |
1115 rb2->setText(words); | |
1116 int wordCounter = 0; | |
1117 while (rb2->next() != UBRK_DONE) { | |
1118 wordCounter++; | |
1119 } | |
1120 TEST_ASSERT(wordCounter == 6); | |
1121 | |
1122 status = U_ZERO_ERROR; | |
1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1
, status); | |
1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
1125 | |
1126 delete rb; | |
1127 delete rb2; | |
1128 delete rb3; | |
1129 } | |
1130 } | |
1131 | |
1132 | |
1133 void RBBIAPITest::TestRefreshInputText() { | |
1134 /* | |
1135 * RefreshInput changes out the input of a Break Iterator without | |
1136 * changing anything else in the iterator's state. Used with Java JNI, | |
1137 * when Java moves the underlying string storage. This test | |
1138 * runs BreakIterator::next() repeatedly, moving the text in the middle o
f the sequence. | |
1139 * The right set of boundaries should still be found. | |
1140 */ | |
1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /
* = " A B C D" */ | |
1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; | |
1143 UErrorCode status = U_ZERO_ERROR; | |
1144 UText ut1 = UTEXT_INITIALIZER; | |
1145 UText ut2 = UTEXT_INITIALIZER; | |
1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::create
LineInstance(Locale::getEnglish(), status); | |
1147 TEST_ASSERT_SUCCESS(status); | |
1148 | |
1149 utext_openUChars(&ut1, testStr, -1, &status); | |
1150 TEST_ASSERT_SUCCESS(status); | |
1151 | |
1152 if (U_SUCCESS(status)) { | |
1153 bi->setText(&ut1, status); | |
1154 TEST_ASSERT_SUCCESS(status); | |
1155 | |
1156 /* Line boundaries will occur before each letter in the original string
*/ | |
1157 TEST_ASSERT(1 == bi->next()); | |
1158 TEST_ASSERT(3 == bi->next()); | |
1159 | |
1160 /* Move the string, kill the original string. */ | |
1161 u_strcpy(movedStr, testStr); | |
1162 u_memset(testStr, 0x20, u_strlen(testStr)); | |
1163 utext_openUChars(&ut2, movedStr, -1, &status); | |
1164 TEST_ASSERT_SUCCESS(status); | |
1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status)
; | |
1166 TEST_ASSERT_SUCCESS(status); | |
1167 TEST_ASSERT(bi == returnedBI); | |
1168 | |
1169 /* Find the following matches, now working in the moved string. */ | |
1170 TEST_ASSERT(5 == bi->next()); | |
1171 TEST_ASSERT(7 == bi->next()); | |
1172 TEST_ASSERT(8 == bi->next()); | |
1173 TEST_ASSERT(UBRK_DONE == bi->next()); | |
1174 | |
1175 utext_close(&ut1); | |
1176 utext_close(&ut2); | |
1177 } | |
1178 delete bi; | |
1179 | |
1180 } | |
1181 | |
1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE
AK_ITERATION | |
1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it)
{ | |
1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular b
rackets | |
1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); | |
1186 | |
1187 int32_t *pos = new int32_t[ustr.length()]; | |
1188 int32_t posCount = 0; | |
1189 | |
1190 // calculate breaks up front, so we can print out | |
1191 // sans any debugging | |
1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { | |
1193 pos[posCount++] = n; | |
1194 if(posCount>=ustr.length()) { | |
1195 it.errln("brk count exceeds string length!"); | |
1196 return; | |
1197 } | |
1198 } | |
1199 UnicodeString out; | |
1200 out.append((UChar)CHSTR); | |
1201 int32_t prev = 0; | |
1202 for(int32_t i=0;i<posCount;i++) { | |
1203 int32_t n=pos[i]; | |
1204 out.append(ustr.tempSubString(prev,n-prev)); | |
1205 out.append((UChar)PILCROW); | |
1206 prev=n; | |
1207 } | |
1208 out.append(ustr.tempSubString(prev,ustr.length()-prev)); | |
1209 out.append((UChar)CHEND); | |
1210 it.logln(out); | |
1211 | |
1212 out.remove(); | |
1213 for(int32_t i=0;i<posCount;i++) { | |
1214 char tmp[100]; | |
1215 sprintf(tmp,"%d ",pos[i]); | |
1216 out.append(UnicodeString(tmp)); | |
1217 } | |
1218 it.logln(out); | |
1219 delete [] pos; | |
1220 } | |
1221 #endif | |
1222 | |
1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() { | |
1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE
AK_ITERATION | |
1225 UErrorCode status = U_ZERO_ERROR; | |
1226 LocalPointer<FilteredBreakIteratorBuilder> builder; | |
1227 LocalPointer<BreakIterator> baseBI; | |
1228 LocalPointer<BreakIterator> filteredBI; | |
1229 LocalPointer<BreakIterator> frenchBI; | |
1230 | |
1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small sh
ip, which he had now recovered. Capt. Gorges, who informed the Sgt. here that on
e purpose of his going east was to meet with Mr. Weston, took this opportunity t
o call him to account for some abuses he had to lay to his charge."); // (Willia
m Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - ed
ited. | |
1232 const UnicodeString ABBR_MR("Mr."); | |
1233 const UnicodeString ABBR_CAPT("Capt."); | |
1234 | |
1235 { | |
1236 logln("Constructing empty builder\n"); | |
1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
1238 TEST_ASSERT_SUCCESS(status); | |
1239 | |
1240 logln("Constructing base BI\n"); | |
1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
1242 TEST_ASSERT_SUCCESS(status); | |
1243 | |
1244 logln("Building new BI\n"); | |
1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1246 TEST_ASSERT_SUCCESS(status); | |
1247 | |
1248 if (U_SUCCESS(status)) { | |
1249 logln("Testing:"); | |
1250 filteredBI->setText(text); | |
1251 TEST_ASSERT(20 == filteredBI->next()); // Mr. | |
1252 TEST_ASSERT(84 == filteredBI->next()); // recovered. | |
1253 TEST_ASSERT(90 == filteredBI->next()); // Capt. | |
1254 TEST_ASSERT(181 == filteredBI->next()); // Mr. | |
1255 TEST_ASSERT(278 == filteredBI->next()); // charge. | |
1256 filteredBI->first(); | |
1257 prtbrks(filteredBI.getAlias(), text, *this); | |
1258 } | |
1259 } | |
1260 | |
1261 { | |
1262 logln("Constructing empty builder\n"); | |
1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
1264 TEST_ASSERT_SUCCESS(status); | |
1265 | |
1266 if (U_SUCCESS(status)) { | |
1267 logln("Adding Mr. as an exception\n"); | |
1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // a
lready have it | |
1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); | |
1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); //
already removed it | |
1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
1273 TEST_ASSERT_SUCCESS(status); | |
1274 | |
1275 logln("Constructing base BI\n"); | |
1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng
lish(), status)); | |
1277 TEST_ASSERT_SUCCESS(status); | |
1278 | |
1279 logln("Building new BI\n"); | |
1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1281 TEST_ASSERT_SUCCESS(status); | |
1282 | |
1283 logln("Testing:"); | |
1284 filteredBI->setText(text); | |
1285 TEST_ASSERT(84 == filteredBI->next()); | |
1286 TEST_ASSERT(90 == filteredBI->next());// Capt. | |
1287 TEST_ASSERT(278 == filteredBI->next()); | |
1288 filteredBI->first(); | |
1289 prtbrks(filteredBI.getAlias(), text, *this); | |
1290 } | |
1291 } | |
1292 | |
1293 | |
1294 { | |
1295 logln("Constructing empty builder\n"); | |
1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
1297 TEST_ASSERT_SUCCESS(status); | |
1298 | |
1299 if (U_SUCCESS(status)) { | |
1300 logln("Adding Mr. and Capt as an exception\n"); | |
1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); | |
1303 TEST_ASSERT_SUCCESS(status); | |
1304 | |
1305 logln("Constructing base BI\n"); | |
1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng
lish(), status)); | |
1307 TEST_ASSERT_SUCCESS(status); | |
1308 | |
1309 logln("Building new BI\n"); | |
1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1311 TEST_ASSERT_SUCCESS(status); | |
1312 | |
1313 logln("Testing:"); | |
1314 filteredBI->setText(text); | |
1315 TEST_ASSERT(84 == filteredBI->next()); | |
1316 TEST_ASSERT(278 == filteredBI->next()); | |
1317 filteredBI->first(); | |
1318 prtbrks(filteredBI.getAlias(), text, *this); | |
1319 } | |
1320 } | |
1321 | |
1322 | |
1323 { | |
1324 logln("Constructing English builder\n"); | |
1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tEnglish(), status)); | |
1326 TEST_ASSERT_SUCCESS(status); | |
1327 | |
1328 logln("Constructing base BI\n"); | |
1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
1330 TEST_ASSERT_SUCCESS(status); | |
1331 | |
1332 if (U_SUCCESS(status)) { | |
1333 logln("unsuppressing 'Capt'"); | |
1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); | |
1335 | |
1336 logln("Building new BI\n"); | |
1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1338 TEST_ASSERT_SUCCESS(status); | |
1339 | |
1340 if(filteredBI.isValid()) { | |
1341 logln("Testing:"); | |
1342 filteredBI->setText(text); | |
1343 TEST_ASSERT(84 == filteredBI->next()); | |
1344 TEST_ASSERT(90 == filteredBI->next()); | |
1345 TEST_ASSERT(278 == filteredBI->next()); | |
1346 filteredBI->first(); | |
1347 prtbrks(filteredBI.getAlias(), text, *this); | |
1348 } | |
1349 } | |
1350 } | |
1351 | |
1352 | |
1353 { | |
1354 logln("Constructing English builder\n"); | |
1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tEnglish(), status)); | |
1356 TEST_ASSERT_SUCCESS(status); | |
1357 | |
1358 logln("Constructing base BI\n"); | |
1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
1360 TEST_ASSERT_SUCCESS(status); | |
1361 | |
1362 if (U_SUCCESS(status)) { | |
1363 logln("Building new BI\n"); | |
1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1365 TEST_ASSERT_SUCCESS(status); | |
1366 | |
1367 if(filteredBI.isValid()) { | |
1368 logln("Testing:"); | |
1369 filteredBI->setText(text); | |
1370 TEST_ASSERT(84 == filteredBI->next()); | |
1371 TEST_ASSERT(278 == filteredBI->next()); | |
1372 filteredBI->first(); | |
1373 prtbrks(filteredBI.getAlias(), text, *this); | |
1374 } | |
1375 } | |
1376 } | |
1377 | |
1378 // reenable once french is in | |
1379 { | |
1380 logln("Constructing French builder"); | |
1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tFrench(), status)); | |
1382 TEST_ASSERT_SUCCESS(status); | |
1383 | |
1384 logln("Constructing base BI\n"); | |
1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(
), status)); | |
1386 TEST_ASSERT_SUCCESS(status); | |
1387 | |
1388 if (U_SUCCESS(status)) { | |
1389 logln("Building new BI\n"); | |
1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
1391 TEST_ASSERT_SUCCESS(status); | |
1392 } | |
1393 | |
1394 if(frenchBI.isValid()) { | |
1395 logln("Testing:"); | |
1396 UnicodeString frText("C'est MM. Duval."); | |
1397 frenchBI->setText(frText); | |
1398 TEST_ASSERT(16 == frenchBI->next()); | |
1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next()); | |
1400 frenchBI->first(); | |
1401 prtbrks(frenchBI.getAlias(), frText, *this); | |
1402 logln("Testing against English:"); | |
1403 filteredBI->setText(frText); | |
1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI i
s english. | |
1405 TEST_ASSERT(16 == filteredBI->next()); | |
1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next()); | |
1407 filteredBI->first(); | |
1408 prtbrks(filteredBI.getAlias(), frText, *this); | |
1409 | |
1410 // Verify == | |
1411 TEST_ASSERT_TRUE(*frenchBI == *frenchBI); | |
1412 TEST_ASSERT_TRUE(*filteredBI != *frenchBI); | |
1413 TEST_ASSERT_TRUE(*frenchBI != *filteredBI); | |
1414 } else { | |
1415 dataerrln("French BI: not valid."); | |
1416 } | |
1417 } | |
1418 | |
1419 #else | |
1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCON
FIG_NO_FILTERED_BREAK_ITERATION"); | |
1421 #endif | |
1422 } | |
1423 | |
1424 //--------------------------------------------- | |
1425 // runIndexedTest | |
1426 //--------------------------------------------- | |
1427 | |
1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) | |
1429 { | |
1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
1431 switch (index) { | |
1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break
; | |
1433 #if !UCONFIG_NO_FILE_IO | |
1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; | |
1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText();
break; | |
1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
1439 #else | |
1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; | |
1441 #endif | |
1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; | |
1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); brea
k; | |
1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); brea
k; | |
1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
1446 #if !UCONFIG_NO_FILE_IO | |
1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; | |
1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); br
eak; | |
1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIDa
ta(); break; | |
1452 #else | |
1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; | |
1454 #endif | |
1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText()
; break; | |
1456 | |
1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING | |
1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBre
akIteratorBuilder(); break; | |
1459 #else | |
1460 case 15: name="skip"; break; | |
1461 #endif | |
1462 default: name = ""; break; // needed to end loop | |
1463 } | |
1464 } | |
1465 | |
1466 //--------------------------------------------- | |
1467 //Internal subroutines | |
1468 //--------------------------------------------- | |
1469 | |
1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t
*boundaries){ | |
1471 logln((UnicodeString)"testIsBoundary():"); | |
1472 int32_t p = 0; | |
1473 UBool isB; | |
1474 for (int32_t i = 0; i < text.length(); i++) { | |
1475 isB = bi.isBoundary(i); | |
1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
1477 | |
1478 if (i == boundaries[p]) { | |
1479 if (!isB) | |
1480 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected true, got false"); | |
1481 p++; | |
1482 } | |
1483 else { | |
1484 if (isB) | |
1485 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected false, got true"); | |
1486 } | |
1487 } | |
1488 } | |
1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof
fset, int32_t expectedOffset, const char* expectedString){ | |
1490 UnicodeString selected; | |
1491 UnicodeString expected=CharsToUnicodeString(expectedString); | |
1492 | |
1493 if(gotoffset != expectedOffset) | |
1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeStrin
g)" instead of #" + expectedOffset); | |
1495 if(start <= gotoffset){ | |
1496 testString.extractBetween(start, gotoffset, selected); | |
1497 } | |
1498 else{ | |
1499 testString.extractBetween(gotoffset, start, selected); | |
1500 } | |
1501 if(selected.compare(expected) != 0) | |
1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\"
instead of \"" + expected + "\"")); | |
1503 else | |
1504 logln(prettify("****selected \"" + selected + "\"")); | |
1505 } | |
1506 | |
1507 //--------------------------------------------- | |
1508 //RBBIWithProtectedFunctions class functions | |
1509 //--------------------------------------------- | |
1510 | |
1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UEr
rorCode &status) | |
1512 : RuleBasedBreakIterator(data, status) | |
1513 { | |
1514 } | |
1515 | |
1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* dat
a, enum EDontAdopt, UErrorCode &status) | |
1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) | |
1518 { | |
1519 } | |
1520 | |
1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | |
OLD | NEW |