Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(364)

Side by Side Diff: source/test/intltest/rbbiapts.cpp

Issue 2435373002: Delete source/test (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/test/intltest/rbbiapts.h ('k') | source/test/intltest/rbbitst.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /********************************************************************
2 * Copyright (c) 1999-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/locid.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utext.h"
25 #include "cmemory.h"
26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
27 #include "unicode/filteredbrk.h"
28 #include <stdio.h> // for sprintf
29 #endif
30 /**
31 * API Test the RuleBasedBreakIterator class
32 */
33
34
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_error Name(status));}}
37
38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LI NE__, #expr);};}
40
41 void RBBIAPITest::TestCloneEquals()
42 {
43
44 UErrorCode status=U_ZERO_ERROR;
45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);
46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);
47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);
48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createWordInstance(Locale::getDefault(), status);
49 if(U_FAILURE(status)){
50 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
51 return;
52 }
53
54
55 UnicodeString testString="Testing word break iterators's clone() and equals( )";
56 bi1->setText(testString);
57 bi2->setText(testString);
58 biequal->setText(testString);
59
60 bi3->setText("hello");
61
62 logln((UnicodeString)"Testing equals()");
63
64 logln((UnicodeString)"Testing == and !=");
65 UBool b = (*bi1 != *biequal);
66 b |= *bi1 == *bi2;
67 b |= *bi1 == *bi3;
68 if (b) {
69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
70 }
71
72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
74
75
76 // Quick test of RulesBasedBreakIterator assignment -
77 // Check that
78 // two different iterators are !=
79 // they are == after assignment
80 // source and dest iterator produce the same next() after assignment.
81 // deleting one doesn't disable the other.
82 logln("Testing assignment");
83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::creat eLineInstance(Locale::getDefault(), status);
84 if(U_FAILURE(status)){
85 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
86 return;
87 }
88
89 RuleBasedBreakIterator biDefault, biDefault2;
90 if(U_FAILURE(status)){
91 errln((UnicodeString)"FAIL : in construction of default iterator");
92 return;
93 }
94 if (biDefault == *bix) {
95 errln((UnicodeString)"ERROR: iterators should not compare ==");
96 return;
97 }
98 if (biDefault != biDefault2) {
99 errln((UnicodeString)"ERROR: iterators should compare ==");
100 return;
101 }
102
103
104 UnicodeString HelloString("Hello Kitty");
105 bix->setText(HelloString);
106 if (*bix == *bi2) {
107 errln(UnicodeString("ERROR: strings should not be equal before assignmen t."));
108 }
109 *bix = *bi2;
110 if (*bix != *bi2) {
111 errln(UnicodeString("ERROR: strings should be equal before assignment.") );
112 }
113
114 int bixnext = bix->next();
115 int bi2next = bi2->next();
116 if (! (bixnext == bi2next && bixnext == 7)) {
117 errln(UnicodeString("ERROR: iterators behaved differently after assignme nt."));
118 }
119 delete bix;
120 if (bi2->next() != 8) {
121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy." ));
122 }
123
124
125
126 logln((UnicodeString)"Testing clone()");
127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
129
130 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
131 *bi1clone == *bi3 || *bi1clone == *bi2)
132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
133
134 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
135 *bi2clone == *bi3 || *bi2clone != *bi2)
136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
137
138 if(bi1->getText() != bi1clone->getText() ||
139 bi2clone->getText() != bi2->getText() ||
140 *bi2clone == *bi1clone )
141 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
142
143 delete bi1clone;
144 delete bi2clone;
145 delete bi1;
146 delete bi3;
147 delete bi2;
148 delete biequal;
149 }
150
151 void RBBIAPITest::TestBoilerPlate()
152 {
153 UErrorCode status = U_ZERO_ERROR;
154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status) ;
156 if (U_FAILURE(status)) {
157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s tatus));
158 return;
159 }
160 if(*a!=*b){
161 errln("Failed: boilerplate method operator!= does not return correct res ults");
162 }
163 // Japanese word break iterators are identical to root with
164 // a dictionary-based break iterator
165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),statu s);
166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),sta tus);
167 if(c && d){
168 if(*c!=*d){
169 errln("Failed: boilerplate method operator== does not return correct results");
170 }
171 }else{
172 errln("creation of break iterator failed");
173 }
174 delete a;
175 delete b;
176 delete c;
177 delete d;
178 }
179
180 void RBBIAPITest::TestgetRules()
181 {
182 UErrorCode status=U_ZERO_ERROR;
183
184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator: :createCharacterInstance(Locale::getDefault(), status);
185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator: :createWordInstance(Locale::getDefault(), status);
186 if(U_FAILURE(status)){
187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
188 delete bi1;
189 delete bi2;
190 return;
191 }
192
193
194
195 logln((UnicodeString)"Testing toString()");
196
197 bi1->setText((UnicodeString)"Hello there");
198
199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
200
201 UnicodeString temp=bi1->getRules();
202 UnicodeString temp2=bi2->getRules();
203 UnicodeString temp3=bi3->getRules();
204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(tem p3) != 0)
205 errln((UnicodeString)"ERROR: error in getRules() method");
206
207 delete bi1;
208 delete bi2;
209 delete bi3;
210 }
211 void RBBIAPITest::TestHashCode()
212 {
213 UErrorCode status=U_ZERO_ERROR;
214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);
215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createCharacterInstance(Locale::getDefault(), status);
216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte rator::createWordInstance(Locale::getDefault(), status);
217 if(U_FAILURE(status)){
218 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
219 delete bi1;
220 delete bi2;
221 delete bi3;
222 return;
223 }
224
225
226 logln((UnicodeString)"Testing hashCode()");
227
228 bi1->setText((UnicodeString)"Hash code");
229 bi2->setText((UnicodeString)"Hash code");
230 bi3->setText((UnicodeString)"Hash code");
231
232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
234
235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashC ode() ||
236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone-> hashCode())
237 errln((UnicodeString)"ERROR: identical objects have different hashcodes" );
238
239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
241 errln((UnicodeString)"ERROR: different objects have same hashcodes");
242
243 delete bi1clone;
244 delete bi2clone;
245 delete bi1;
246 delete bi2;
247 delete bi3;
248
249 }
250 void RBBIAPITest::TestGetSetAdoptText()
251 {
252 logln((UnicodeString)"Testing getText setText ");
253 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
254 UnicodeString str1="first string.";
255 UnicodeString str2="Second string.";
256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)Rule BasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)Rule BasedBreakIterator::createWordInstance(Locale::getDefault(), status));
258 if(status.isFailure()){
259 errcheckln(status, "Fail : in construction - %s", status.errorName());
260 return;
261 }
262
263
264 CharacterIterator* text1= new StringCharacterIterator(str1);
265 CharacterIterator* text1Clone = text1->clone();
266 CharacterIterator* text2= new StringCharacterIterator(str2);
267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // " ond str"
268
269 wordIter1->setText(str1);
270 CharacterIterator *tci = &wordIter1->getText();
271 UnicodeString tstr;
272 tci->getText(tstr);
273 TEST_ASSERT(tstr == str1);
274 if(wordIter1->current() != 0)
275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\ n");
276
277 wordIter1->next(2);
278
279 wordIter1->setText(str2);
280 if(wordIter1->current() != 0)
281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration positi on to the beginning of the text, it is" + wordIter1->current() + (UnicodeString) "\n");
282
283
284 charIter1->adoptText(text1Clone);
285 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
286 tci = &wordIter1->getText();
287 tci->getText(tstr);
288 TEST_ASSERT(tstr == str2);
289 tci = &charIter1->getText();
290 tci->getText(tstr);
291 TEST_ASSERT(tstr == str1);
292
293
294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1-> clone());
295 rb->adoptText(text1);
296 if(rb->getText() != *text1)
297 errln((UnicodeString)"ERROR:1 error in adoptText ");
298 rb->adoptText(text2);
299 if(rb->getText() != *text2)
300 errln((UnicodeString)"ERROR:2 error in adoptText ");
301
302 // Adopt where iterator range is less than the entire orignal source string.
303 // (With the change of the break engine to working with UText internally,
304 // CharacterIterators starting at positions other than zero are not suppo rted)
305 rb->adoptText(text3);
306 TEST_ASSERT(rb->preceding(2) == 0);
307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
308 //if(rb->preceding(2) != 3) {
309 // errln((UnicodeString)"ERROR:3 error in adoptText ");
310 //}
311 //if(rb->following(11) != BreakIterator::DONE) {
312 // errln((UnicodeString)"ERROR:4 error in adoptText ");
313 //}
314
315 // UText API
316 //
317 // Quick test to see if UText is working at all.
318 //
319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello w orld" in UTF-8 */
320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
321 // 012345678901
322
323 status.reset();
324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
325 wordIter1->setText(ut.getAlias(), status);
326 TEST_ASSERT_SUCCESS(status);
327
328 int32_t pos;
329 pos = wordIter1->first();
330 TEST_ASSERT(pos==0);
331 pos = wordIter1->next();
332 TEST_ASSERT(pos==5);
333 pos = wordIter1->next();
334 TEST_ASSERT(pos==6);
335 pos = wordIter1->next();
336 TEST_ASSERT(pos==11);
337 pos = wordIter1->next();
338 TEST_ASSERT(pos==UBRK_DONE);
339
340 status.reset();
341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
342 TEST_ASSERT_SUCCESS(status);
343 wordIter1->setText(ut2.getAlias(), status);
344 TEST_ASSERT_SUCCESS(status);
345
346 pos = wordIter1->first();
347 TEST_ASSERT(pos==0);
348 pos = wordIter1->next();
349 TEST_ASSERT(pos==3);
350 pos = wordIter1->next();
351 TEST_ASSERT(pos==4);
352
353 pos = wordIter1->last();
354 TEST_ASSERT(pos==6);
355 pos = wordIter1->previous();
356 TEST_ASSERT(pos==4);
357 pos = wordIter1->previous();
358 TEST_ASSERT(pos==3);
359 pos = wordIter1->previous();
360 TEST_ASSERT(pos==0);
361 pos = wordIter1->previous();
362 TEST_ASSERT(pos==UBRK_DONE);
363
364 status.reset();
365 UnicodeString sEmpty;
366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
367 wordIter1->getUText(gut2.getAlias(), status);
368 TEST_ASSERT_SUCCESS(status);
369 status.reset();
370 }
371
372
373 void RBBIAPITest::TestIteration()
374 {
375 // This test just verifies that the API is present.
376 // Testing for correct operation of the break rules happens elsewhere.
377
378 UErrorCode status=U_ZERO_ERROR;
379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterato r::createCharacterInstance(Locale::getDefault(), status);
380 if (U_FAILURE(status) || bi == NULL) {
381 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
382 }
383 delete bi;
384
385 status=U_ZERO_ERROR;
386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Lo cale::getDefault(), status);
387 if (U_FAILURE(status) || bi == NULL) {
388 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
389 }
390 delete bi;
391
392 status=U_ZERO_ERROR;
393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Lo cale::getDefault(), status);
394 if (U_FAILURE(status) || bi == NULL) {
395 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
396 }
397 delete bi;
398
399 status=U_ZERO_ERROR;
400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstanc e(Locale::getDefault(), status);
401 if (U_FAILURE(status) || bi == NULL) {
402 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
403 }
404 delete bi;
405
406 status=U_ZERO_ERROR;
407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(L ocale::getDefault(), status);
408 if (U_FAILURE(status) || bi == NULL) {
409 errcheckln(status, "Failure creating Title break iterator. Status = %s" , u_errorName(status));
410 }
411 delete bi;
412
413 status=U_ZERO_ERROR;
414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstan ce(Locale::getDefault(), status);
415 if (U_FAILURE(status) || bi == NULL) {
416 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
417 return; // Skip the rest of these tests.
418 }
419
420
421 UnicodeString testString="0123456789";
422 bi->setText(testString);
423
424 int32_t i;
425 i = bi->first();
426 if (i != 0) {
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
428 }
429
430 i = bi->last();
431 if (i != 10) {
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
433 }
434
435 //
436 // Previous
437 //
438 bi->last();
439 i = bi->previous();
440 if (i != 9) {
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
442 }
443
444
445 bi->first();
446 i = bi->previous();
447 if (i != BreakIterator::DONE) {
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, g ot %d", __LINE__, i);
449 }
450
451 //
452 // next()
453 //
454 bi->first();
455 i = bi->next();
456 if (i != 1) {
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
458 }
459
460 bi->last();
461 i = bi->next();
462 if (i != BreakIterator::DONE) {
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got % d", __LINE__, i);
464 }
465
466
467 //
468 // current()
469 //
470 bi->first();
471 i = bi->current();
472 if (i != 0) {
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
474 }
475
476 bi->next();
477 i = bi->current();
478 if (i != 1) {
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
480 }
481
482 bi->last();
483 bi->next();
484 i = bi->current();
485 if (i != 10) {
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
487 }
488
489 bi->first();
490 bi->previous();
491 i = bi->current();
492 if (i != 0) {
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
494 }
495
496
497 //
498 // Following()
499 //
500 i = bi->following(4);
501 if (i != 5) {
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
503 }
504
505 i = bi->following(9);
506 if (i != 10) {
507 errln("Incorrect value from bi->following() at line %d. Expected 10, go t %d", __LINE__, i);
508 }
509
510 i = bi->following(10);
511 if (i != BreakIterator::DONE) {
512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
513 }
514
515
516 //
517 // Preceding
518 //
519 i = bi->preceding(4);
520 if (i != 3) {
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
522 }
523
524 i = bi->preceding(10);
525 if (i != 9) {
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
527 }
528
529 i = bi->preceding(1);
530 if (i != 0) {
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
532 }
533
534 i = bi->preceding(0);
535 if (i != BreakIterator::DONE) {
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
537 }
538
539
540 //
541 // isBoundary()
542 //
543 bi->first();
544 if (bi->isBoundary(3) != TRUE) {
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
546 }
547 i = bi->current();
548 if (i != 3) {
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got % d", __LINE__, i);
550 }
551
552
553 if (bi->isBoundary(11) != FALSE) {
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
555 }
556 i = bi->current();
557 if (i != 10) {
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
559 }
560
561 //
562 // next(n)
563 //
564 bi->first();
565 i = bi->next(4);
566 if (i != 4) {
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
568 }
569
570 i = bi->next(6);
571 if (i != 10) {
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d" , __LINE__, i);
573 }
574
575 bi->first();
576 i = bi->next(11);
577 if (i != BreakIterator::DONE) {
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterat or::DONE, got %d", __LINE__, i);
579 }
580
581 delete bi;
582
583 }
584
585
586
587
588
589
590 void RBBIAPITest::TestBuilder() {
591 UnicodeString rulesString1 = "$Letters = [:L:];\n"
592 "$Numbers = [:N:];\n"
593 "$Letters+;\n"
594 "$Numbers+;\n"
595 "[^$Letters $Numbers];\n"
596 "!.*;\n";
597 UnicodeString testString1 = "abc123..abc";
598 // 01234567890
599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600 UErrorCode status=U_ZERO_ERROR;
601 UParseError parseError;
602
603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);
604 if(U_FAILURE(status)) {
605 dataerrln("Fail : in construction - %s", u_errorName(status));
606 } else {
607 bi->setText(testString1);
608 doBoundaryTest(*bi, testString1, bounds1);
609 }
610 delete bi;
611 }
612
613
614 //
615 // TestQuoteGrouping
616 // Single quotes within rules imply a grouping, so that a modifier
617 // following the quoted text (* or +) applies to all of the quoted chars.
618 //
619 void RBBIAPITest::TestQuoteGrouping() {
620 UnicodeString rulesString1 = "#Here comes the rule...\n"
621 "'$@!'*;\n" // (\$\@\!)*
622 ".;\n";
623
624 UnicodeString testString1 = "$@!$@!X$@!!X";
625 // 0123456789012
626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627 UErrorCode status=U_ZERO_ERROR;
628 UParseError parseError;
629
630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);
631 if(U_FAILURE(status)) {
632 dataerrln("Fail : in construction - %s", u_errorName(status));
633 } else {
634 bi->setText(testString1);
635 doBoundaryTest(*bi, testString1, bounds1);
636 }
637 delete bi;
638 }
639
640 //
641 // TestRuleStatus
642 // Test word break rule status constants.
643 //
644 void RBBIAPITest::TestRuleStatus() {
645 UChar str[30];
646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
649 // 012345678901234567 8 9 0
650 // Katakana
651 str, 30);
652 UnicodeString testString1(str);
653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
656 UBRK_WORD_IDEO, UBRK_WORD_NONE};
657
658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WOR D_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WOR D_NONE_LIMIT,
660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
661
662 UErrorCode status=U_ZERO_ERROR;
663
664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
665 if(U_FAILURE(status)) {
666 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
667 } else {
668 bi->setText(testString1);
669 // First test that the breaks are in the right spots.
670 doBoundaryTest(*bi, testString1, bounds1);
671
672 // Then go back and check tag values
673 int32_t i = 0;
674 int32_t pos, tag;
675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i ++) {
676 if (pos != bounds1[i]) {
677 errln("FAIL: unexpected word break at postion %d", pos);
678 break;
679 }
680 tag = bi->getRuleStatus();
681 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683 break;
684 }
685
686 // Check that we get the same tag values from getRuleStatusVec()
687 int32_t vec[10];
688 int t = bi->getRuleStatusVec(vec, 10, status);
689 TEST_ASSERT_SUCCESS(status);
690 TEST_ASSERT(t==1);
691 TEST_ASSERT(vec[0] == tag);
692 }
693 }
694 delete bi;
695
696 // Now test line break status. This test mostly is to confirm that the sta tus constants
697 // are correctly declared in the header.
698 testString1 = "test line. \n";
699 // break type s s h
700
701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
702 if(U_FAILURE(status)) {
703 errcheckln(status, "failed to create word break iterator. - %s", u_erro rName(status));
704 } else {
705 int32_t i = 0;
706 int32_t pos, tag;
707 UBool success;
708
709 bi->setText(testString1);
710 pos = bi->current();
711 tag = bi->getRuleStatus();
712 for (i=0; i<3; i++) {
713 switch (i) {
714 case 0:
715 success = pos==0 && tag==UBRK_LINE_SOFT; break;
716 case 1:
717 success = pos==5 && tag==UBRK_LINE_SOFT; break;
718 case 2:
719 success = pos==12 && tag==UBRK_LINE_HARD; break;
720 default:
721 success = FALSE; break;
722 }
723 if (success == FALSE) {
724 errln("Fail: incorrect word break status or position. i=%d, po s=%d, tag=%d",
725 i, pos, tag);
726 break;
727 }
728 pos = bi->next();
729 tag = bi->getRuleStatus();
730 }
731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT _LIMIT)) {
734 errln("UBRK_LINE_* constants from header are inconsistent.");
735 }
736 }
737 delete bi;
738
739 }
740
741
742 //
743 // TestRuleStatusVec
744 // Test the vector form of break rule status.
745 //
746 void RBBIAPITest::TestRuleStatusVec() {
747 UnicodeString rulesString( "[A-N]{100}; \n"
748 "[a-w]{200}; \n"
749 "[\\p{L}]{300}; \n"
750 "[\\p{N}]{400}; \n"
751 "[0-5]{500}; \n"
752 "!.*;\n", -1, US_INV);
753 UnicodeString testString1 = "Aapz5?";
754 int32_t statusVals[10];
755 int32_t numStatuses;
756 int32_t pos;
757
758 UErrorCode status=U_ZERO_ERROR;
759 UParseError parseError;
760
761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseE rror, status);
762 if (U_FAILURE(status)) {
763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__ , u_errorName(status));
764 } else {
765 bi->setText(testString1);
766
767 // A
768 pos = bi->next();
769 TEST_ASSERT(pos==1);
770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
771 TEST_ASSERT_SUCCESS(status);
772 TEST_ASSERT(numStatuses == 2);
773 TEST_ASSERT(statusVals[0] == 100);
774 TEST_ASSERT(statusVals[1] == 300);
775
776 // a
777 pos = bi->next();
778 TEST_ASSERT(pos==2);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 200);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // p
786 pos = bi->next();
787 TEST_ASSERT(pos==3);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // z
795 pos = bi->next();
796 TEST_ASSERT(pos==4);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 1);
800 TEST_ASSERT(statusVals[0] == 300);
801
802 // 5
803 pos = bi->next();
804 TEST_ASSERT(pos==5);
805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
806 TEST_ASSERT_SUCCESS(status);
807 TEST_ASSERT(numStatuses == 2);
808 TEST_ASSERT(statusVals[0] == 400);
809 TEST_ASSERT(statusVals[1] == 500);
810
811 // ?
812 pos = bi->next();
813 TEST_ASSERT(pos==6);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 1);
817 TEST_ASSERT(statusVals[0] == 0);
818
819 //
820 // Check buffer overflow error handling. Char == A
821 //
822 bi->first();
823 pos = bi->next();
824 TEST_ASSERT(pos==1);
825 memset(statusVals, -1, sizeof(statusVals));
826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828 TEST_ASSERT(numStatuses == 2);
829 TEST_ASSERT(statusVals[0] == -1);
830
831 status = U_ZERO_ERROR;
832 memset(statusVals, -1, sizeof(statusVals));
833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
835 TEST_ASSERT(numStatuses == 2);
836 TEST_ASSERT(statusVals[0] == 100);
837 TEST_ASSERT(statusVals[1] == -1);
838
839 status = U_ZERO_ERROR;
840 memset(statusVals, -1, sizeof(statusVals));
841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
842 TEST_ASSERT_SUCCESS(status);
843 TEST_ASSERT(numStatuses == 2);
844 TEST_ASSERT(statusVals[0] == 100);
845 TEST_ASSERT(statusVals[1] == 300);
846 TEST_ASSERT(statusVals[2] == -1);
847 }
848 delete bi;
849
850 }
851
852 //
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a
854 // $variable reference
855 void RBBIAPITest::TestBug2190() {
856 UnicodeString rulesString1 = "$aaa = abcd;\n"
857 "$bbb = $aaa;\n"
858 "$bbb;\n";
859 UnicodeString testString1 = "abcdabcd";
860 // 01234567890
861 int32_t bounds1[] = {0, 4, 8};
862 UErrorCode status=U_ZERO_ERROR;
863 UParseError parseError;
864
865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse Error, status);
866 if(U_FAILURE(status)) {
867 dataerrln("Fail : in construction - %s", u_errorName(status));
868 } else {
869 bi->setText(testString1);
870 doBoundaryTest(*bi, testString1, bounds1);
871 }
872 delete bi;
873 }
874
875
876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878 UErrorCode status = U_ZERO_ERROR;
879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
880 // ok to not delete these if we exit because of error?
881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", sta tus);
882 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status );
884
885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
886 dataerrln("Error creating instances of break interactors - %s", u_errorN ame(status));
887
888 delete ja_word;
889 delete ja_char;
890 delete root_word;
891 delete root_char;
892
893 return;
894 }
895
896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
897 {
898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
899 if (ja_word && *ja_word == *root_word) {
900 errln("japan not different from root");
901 }
902 #endif
903 }
904
905 {
906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", statu s);
907 UBool fail = TRUE;
908 if(result){
909 fail = *result != *ja_word;
910 }
911 delete result;
912 if (fail) {
913 errln("bad result for xx_XX/word");
914 }
915 }
916
917 {
918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
919 UBool fail = TRUE;
920 if(result){
921 fail = *result != *ja_char;
922 }
923 delete result;
924 if (fail) {
925 errln("bad result for ja_JP/char");
926 }
927 }
928
929 {
930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
931 UBool fail = TRUE;
932 if(result){
933 fail = *result != *root_char;
934 }
935 delete result;
936 if (fail) {
937 errln("bad result for xx_XX/char");
938 }
939 }
940
941 {
942 StringEnumeration* avail = BreakIterator::getAvailableLocales();
943 UBool found = FALSE;
944 const UnicodeString* p;
945 while ((p = avail->snext(status))) {
946 if (p->compare("xx") == 0) {
947 found = TRUE;
948 break;
949 }
950 }
951 delete avail;
952 if (!found) {
953 errln("did not find test locale");
954 }
955 }
956
957 {
958 UBool unreg = BreakIterator::unregister(key, status);
959 if (!unreg) {
960 errln("unable to unregister");
961 }
962 }
963
964 {
965 BreakIterator* result = BreakIterator::createWordInstance("en_US", statu s);
966 BreakIterator* root = BreakIterator::createWordInstance("", status);
967 UBool fail = TRUE;
968 if(root){
969 fail = *root != *result;
970 }
971 delete root;
972 delete result;
973 if (fail) {
974 errln("did not get root break");
975 }
976 }
977
978 {
979 StringEnumeration* avail = BreakIterator::getAvailableLocales();
980 UBool found = FALSE;
981 const UnicodeString* p;
982 while ((p = avail->snext(status))) {
983 if (p->compare("xx") == 0) {
984 found = TRUE;
985 break;
986 }
987 }
988 delete avail;
989 if (found) {
990 errln("found test locale");
991 }
992 }
993
994 {
995 int32_t count;
996 UBool foundLocale = FALSE;
997 const Locale *avail = BreakIterator::getAvailableLocales(count);
998 for (int i=0; i<count; i++) {
999 if (avail[i] == Locale::getEnglish()) {
1000 foundLocale = TRUE;
1001 break;
1002 }
1003 }
1004 if (foundLocale == FALSE) {
1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN .");
1006 }
1007 }
1008
1009
1010 // ja_word was adopted by factory
1011 delete ja_char;
1012 delete root_word;
1013 delete root_char;
1014 #endif
1015 }
1016
1017 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1018 UErrorCode status = U_ZERO_ERROR;
1019 UParseError parseError;
1020 parseError.line = 0;
1021 parseError.offset = 0;
1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, & status));
1023 uint32_t length;
1024 const UChar *builtSource;
1025 const uint8_t *rbbiRules;
1026 const uint8_t *builtRules;
1027
1028 if (U_FAILURE(status)) {
1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(statu s));
1030 return;
1031 }
1032
1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fR uleSource);
1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, par seError, status);
1036 if (U_FAILURE(status)) {
1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, colum n %d\n",
1038 u_errorName(status), parseError.line, parseError.offset);
1039 return;
1040 };
1041 rbbiRules = brkItr->getBinaryRules(length);
1042 logln("Comparing \"%s\" len=%d", dataFile, length);
1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1044 errln("Built rules and rebuilt rules are different %s", dataFile);
1045 return;
1046 }
1047 delete brkItr;
1048 }
1049
1050 void RBBIAPITest::TestRoundtripRules() {
1051 RoundtripRule("word");
1052 RoundtripRule("title");
1053 RoundtripRule("sent");
1054 RoundtripRule("line");
1055 RoundtripRule("char");
1056 if (!quick) {
1057 RoundtripRule("word_POSIX");
1058 }
1059 }
1060
1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1062 // (these are protected so we access them via a local class RBBIWithProtectedFun ctions).
1063 // This is just a sanity check, not a thorough test (e.g. we don't check that th e
1064 // first delete actually frees rulesCopy).
1065 void RBBIAPITest::TestCreateFromRBBIData() {
1066 // Get some handy RBBIData
1067 const char *brkName = "word"; // or "sent", "line", "char", etc.
1068 UErrorCode status = U_ZERO_ERROR;
1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &s tatus));
1070 if ( U_SUCCESS(status) ) {
1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMem ory(data.getAlias());
1072 uint32_t length = builtRules->fLength;
1073 RBBIWithProtectedFunctions * brkItr;
1074
1075 // Try the memory-adopting constructor, need to copy the data first
1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1077 if ( rulesCopy ) {
1078 uprv_memcpy( rulesCopy, builtRules, length );
1079
1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1081 if ( U_SUCCESS(status) ) {
1082 delete brkItr; // this should free rulesCopy
1083 } else {
1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): IC U Error \"%s\"\n", u_errorName(status) );
1085 status = U_ZERO_ERROR;// reset for the next test
1086 uprv_free( rulesCopy );
1087 }
1088 }
1089
1090 // Now try the non-adopting constructor
1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFun ctions::kDontAdopt, status);
1092 if ( U_SUCCESS(status) ) {
1093 delete brkItr; // this should NOT attempt to free builtRules
1094 if (builtRules->fLength != length) { // sanity check
1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted) : delete affects data\n" );
1096 }
1097 } else {
1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): IC U Error \"%s\"\n", u_errorName(status) );
1099 }
1100 }
1101
1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1103 //
1104 status = U_ZERO_ERROR;
1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::create WordInstance(Locale::getEnglish(), status);
1106 if (rb == NULL || U_FAILURE(status)) {
1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::g etEnglish) - %s", u_errorName(status));
1108 } else {
1109 uint32_t length;
1110 const uint8_t *rules = rb->getBinaryRules(length);
1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
1112 TEST_ASSERT_SUCCESS(status);
1113 TEST_ASSERT(*rb == *rb2);
1114 UnicodeString words = "one two three ";
1115 rb2->setText(words);
1116 int wordCounter = 0;
1117 while (rb2->next() != UBRK_DONE) {
1118 wordCounter++;
1119 }
1120 TEST_ASSERT(wordCounter == 6);
1121
1122 status = U_ZERO_ERROR;
1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1 , status);
1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1125
1126 delete rb;
1127 delete rb2;
1128 delete rb3;
1129 }
1130 }
1131
1132
1133 void RBBIAPITest::TestRefreshInputText() {
1134 /*
1135 * RefreshInput changes out the input of a Break Iterator without
1136 * changing anything else in the iterator's state. Used with Java JNI,
1137 * when Java moves the underlying string storage. This test
1138 * runs BreakIterator::next() repeatedly, moving the text in the middle o f the sequence.
1139 * The right set of boundaries should still be found.
1140 */
1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; / * = " A B C D" */
1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1143 UErrorCode status = U_ZERO_ERROR;
1144 UText ut1 = UTEXT_INITIALIZER;
1145 UText ut2 = UTEXT_INITIALIZER;
1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::create LineInstance(Locale::getEnglish(), status);
1147 TEST_ASSERT_SUCCESS(status);
1148
1149 utext_openUChars(&ut1, testStr, -1, &status);
1150 TEST_ASSERT_SUCCESS(status);
1151
1152 if (U_SUCCESS(status)) {
1153 bi->setText(&ut1, status);
1154 TEST_ASSERT_SUCCESS(status);
1155
1156 /* Line boundaries will occur before each letter in the original string */
1157 TEST_ASSERT(1 == bi->next());
1158 TEST_ASSERT(3 == bi->next());
1159
1160 /* Move the string, kill the original string. */
1161 u_strcpy(movedStr, testStr);
1162 u_memset(testStr, 0x20, u_strlen(testStr));
1163 utext_openUChars(&ut2, movedStr, -1, &status);
1164 TEST_ASSERT_SUCCESS(status);
1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status) ;
1166 TEST_ASSERT_SUCCESS(status);
1167 TEST_ASSERT(bi == returnedBI);
1168
1169 /* Find the following matches, now working in the moved string. */
1170 TEST_ASSERT(5 == bi->next());
1171 TEST_ASSERT(7 == bi->next());
1172 TEST_ASSERT(8 == bi->next());
1173 TEST_ASSERT(UBRK_DONE == bi->next());
1174
1175 utext_close(&ut1);
1176 utext_close(&ut2);
1177 }
1178 delete bi;
1179
1180 }
1181
1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE AK_ITERATION
1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular b rackets
1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1186
1187 int32_t *pos = new int32_t[ustr.length()];
1188 int32_t posCount = 0;
1189
1190 // calculate breaks up front, so we can print out
1191 // sans any debugging
1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1193 pos[posCount++] = n;
1194 if(posCount>=ustr.length()) {
1195 it.errln("brk count exceeds string length!");
1196 return;
1197 }
1198 }
1199 UnicodeString out;
1200 out.append((UChar)CHSTR);
1201 int32_t prev = 0;
1202 for(int32_t i=0;i<posCount;i++) {
1203 int32_t n=pos[i];
1204 out.append(ustr.tempSubString(prev,n-prev));
1205 out.append((UChar)PILCROW);
1206 prev=n;
1207 }
1208 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1209 out.append((UChar)CHEND);
1210 it.logln(out);
1211
1212 out.remove();
1213 for(int32_t i=0;i<posCount;i++) {
1214 char tmp[100];
1215 sprintf(tmp,"%d ",pos[i]);
1216 out.append(UnicodeString(tmp));
1217 }
1218 it.logln(out);
1219 delete [] pos;
1220 }
1221 #endif
1222
1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE AK_ITERATION
1225 UErrorCode status = U_ZERO_ERROR;
1226 LocalPointer<FilteredBreakIteratorBuilder> builder;
1227 LocalPointer<BreakIterator> baseBI;
1228 LocalPointer<BreakIterator> filteredBI;
1229 LocalPointer<BreakIterator> frenchBI;
1230
1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small sh ip, which he had now recovered. Capt. Gorges, who informed the Sgt. here that on e purpose of his going east was to meet with Mr. Weston, took this opportunity t o call him to account for some abuses he had to lay to his charge."); // (Willia m Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - ed ited.
1232 const UnicodeString ABBR_MR("Mr.");
1233 const UnicodeString ABBR_CAPT("Capt.");
1234
1235 {
1236 logln("Constructing empty builder\n");
1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1238 TEST_ASSERT_SUCCESS(status);
1239
1240 logln("Constructing base BI\n");
1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));
1242 TEST_ASSERT_SUCCESS(status);
1243
1244 logln("Building new BI\n");
1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1246 TEST_ASSERT_SUCCESS(status);
1247
1248 if (U_SUCCESS(status)) {
1249 logln("Testing:");
1250 filteredBI->setText(text);
1251 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1252 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1253 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1254 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1255 TEST_ASSERT(278 == filteredBI->next()); // charge.
1256 filteredBI->first();
1257 prtbrks(filteredBI.getAlias(), text, *this);
1258 }
1259 }
1260
1261 {
1262 logln("Constructing empty builder\n");
1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1264 TEST_ASSERT_SUCCESS(status);
1265
1266 if (U_SUCCESS(status)) {
1267 logln("Adding Mr. as an exception\n");
1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // a lready have it
1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1273 TEST_ASSERT_SUCCESS(status);
1274
1275 logln("Constructing base BI\n");
1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng lish(), status));
1277 TEST_ASSERT_SUCCESS(status);
1278
1279 logln("Building new BI\n");
1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1281 TEST_ASSERT_SUCCESS(status);
1282
1283 logln("Testing:");
1284 filteredBI->setText(text);
1285 TEST_ASSERT(84 == filteredBI->next());
1286 TEST_ASSERT(90 == filteredBI->next());// Capt.
1287 TEST_ASSERT(278 == filteredBI->next());
1288 filteredBI->first();
1289 prtbrks(filteredBI.getAlias(), text, *this);
1290 }
1291 }
1292
1293
1294 {
1295 logln("Constructing empty builder\n");
1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1297 TEST_ASSERT_SUCCESS(status);
1298
1299 if (U_SUCCESS(status)) {
1300 logln("Adding Mr. and Capt as an exception\n");
1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1303 TEST_ASSERT_SUCCESS(status);
1304
1305 logln("Constructing base BI\n");
1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng lish(), status));
1307 TEST_ASSERT_SUCCESS(status);
1308
1309 logln("Building new BI\n");
1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1311 TEST_ASSERT_SUCCESS(status);
1312
1313 logln("Testing:");
1314 filteredBI->setText(text);
1315 TEST_ASSERT(84 == filteredBI->next());
1316 TEST_ASSERT(278 == filteredBI->next());
1317 filteredBI->first();
1318 prtbrks(filteredBI.getAlias(), text, *this);
1319 }
1320 }
1321
1322
1323 {
1324 logln("Constructing English builder\n");
1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tEnglish(), status));
1326 TEST_ASSERT_SUCCESS(status);
1327
1328 logln("Constructing base BI\n");
1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));
1330 TEST_ASSERT_SUCCESS(status);
1331
1332 if (U_SUCCESS(status)) {
1333 logln("unsuppressing 'Capt'");
1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1335
1336 logln("Building new BI\n");
1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1338 TEST_ASSERT_SUCCESS(status);
1339
1340 if(filteredBI.isValid()) {
1341 logln("Testing:");
1342 filteredBI->setText(text);
1343 TEST_ASSERT(84 == filteredBI->next());
1344 TEST_ASSERT(90 == filteredBI->next());
1345 TEST_ASSERT(278 == filteredBI->next());
1346 filteredBI->first();
1347 prtbrks(filteredBI.getAlias(), text, *this);
1348 }
1349 }
1350 }
1351
1352
1353 {
1354 logln("Constructing English builder\n");
1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tEnglish(), status));
1356 TEST_ASSERT_SUCCESS(status);
1357
1358 logln("Constructing base BI\n");
1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish (), status));
1360 TEST_ASSERT_SUCCESS(status);
1361
1362 if (U_SUCCESS(status)) {
1363 logln("Building new BI\n");
1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1365 TEST_ASSERT_SUCCESS(status);
1366
1367 if(filteredBI.isValid()) {
1368 logln("Testing:");
1369 filteredBI->setText(text);
1370 TEST_ASSERT(84 == filteredBI->next());
1371 TEST_ASSERT(278 == filteredBI->next());
1372 filteredBI->first();
1373 prtbrks(filteredBI.getAlias(), text, *this);
1374 }
1375 }
1376 }
1377
1378 // reenable once french is in
1379 {
1380 logln("Constructing French builder");
1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge tFrench(), status));
1382 TEST_ASSERT_SUCCESS(status);
1383
1384 logln("Constructing base BI\n");
1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench( ), status));
1386 TEST_ASSERT_SUCCESS(status);
1387
1388 if (U_SUCCESS(status)) {
1389 logln("Building new BI\n");
1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1391 TEST_ASSERT_SUCCESS(status);
1392 }
1393
1394 if(frenchBI.isValid()) {
1395 logln("Testing:");
1396 UnicodeString frText("C'est MM. Duval.");
1397 frenchBI->setText(frText);
1398 TEST_ASSERT(16 == frenchBI->next());
1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1400 frenchBI->first();
1401 prtbrks(frenchBI.getAlias(), frText, *this);
1402 logln("Testing against English:");
1403 filteredBI->setText(frText);
1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI i s english.
1405 TEST_ASSERT(16 == filteredBI->next());
1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1407 filteredBI->first();
1408 prtbrks(filteredBI.getAlias(), frText, *this);
1409
1410 // Verify ==
1411 TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
1412 TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1413 TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
1414 } else {
1415 dataerrln("French BI: not valid.");
1416 }
1417 }
1418
1419 #else
1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCON FIG_NO_FILTERED_BREAK_ITERATION");
1421 #endif
1422 }
1423
1424 //---------------------------------------------
1425 // runIndexedTest
1426 //---------------------------------------------
1427
1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1429 {
1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1431 switch (index) {
1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break ;
1433 #if !UCONFIG_NO_FILE_IO
1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
1439 #else
1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
1441 #endif
1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); brea k;
1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); brea k;
1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1446 #if !UCONFIG_NO_FILE_IO
1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); br eak;
1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIDa ta(); break;
1452 #else
1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
1454 #endif
1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText() ; break;
1456
1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBre akIteratorBuilder(); break;
1459 #else
1460 case 15: name="skip"; break;
1461 #endif
1462 default: name = ""; break; // needed to end loop
1463 }
1464 }
1465
1466 //---------------------------------------------
1467 //Internal subroutines
1468 //---------------------------------------------
1469
1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1471 logln((UnicodeString)"testIsBoundary():");
1472 int32_t p = 0;
1473 UBool isB;
1474 for (int32_t i = 0; i < text.length(); i++) {
1475 isB = bi.isBoundary(i);
1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1477
1478 if (i == boundaries[p]) {
1479 if (!isB)
1480 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1481 p++;
1482 }
1483 else {
1484 if (isB)
1485 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1486 }
1487 }
1488 }
1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof fset, int32_t expectedOffset, const char* expectedString){
1490 UnicodeString selected;
1491 UnicodeString expected=CharsToUnicodeString(expectedString);
1492
1493 if(gotoffset != expectedOffset)
1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeStrin g)" instead of #" + expectedOffset);
1495 if(start <= gotoffset){
1496 testString.extractBetween(start, gotoffset, selected);
1497 }
1498 else{
1499 testString.extractBetween(gotoffset, start, selected);
1500 }
1501 if(selected.compare(expected) != 0)
1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1503 else
1504 logln(prettify("****selected \"" + selected + "\""));
1505 }
1506
1507 //---------------------------------------------
1508 //RBBIWithProtectedFunctions class functions
1509 //---------------------------------------------
1510
1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UEr rorCode &status)
1512 : RuleBasedBreakIterator(data, status)
1513 {
1514 }
1515
1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* dat a, enum EDontAdopt, UErrorCode &status)
1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1518 {
1519 }
1520
1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
OLDNEW
« no previous file with comments | « source/test/intltest/rbbiapts.h ('k') | source/test/intltest/rbbitst.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698