| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * Copyright (c) 1999-2014, International Business Machines | |
| 3 * Corporation and others. All Rights Reserved. | |
| 4 ******************************************************************** | |
| 5 * Date Name Description | |
| 6 * 12/14/99 Madhu Creation. | |
| 7 * 01/12/2000 Madhu updated for changed API | |
| 8 ********************************************************************/ | |
| 9 | |
| 10 #include "unicode/utypes.h" | |
| 11 | |
| 12 #if !UCONFIG_NO_BREAK_ITERATION | |
| 13 | |
| 14 #include "unicode/uchar.h" | |
| 15 #include "intltest.h" | |
| 16 #include "unicode/rbbi.h" | |
| 17 #include "unicode/schriter.h" | |
| 18 #include "rbbiapts.h" | |
| 19 #include "rbbidata.h" | |
| 20 #include "cstring.h" | |
| 21 #include "ubrkimpl.h" | |
| 22 #include "unicode/locid.h" | |
| 23 #include "unicode/ustring.h" | |
| 24 #include "unicode/utext.h" | |
| 25 #include "cmemory.h" | |
| 26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING | |
| 27 #include "unicode/filteredbrk.h" | |
| 28 #include <stdio.h> // for sprintf | |
| 29 #endif | |
| 30 /** | |
| 31 * API Test the RuleBasedBreakIterator class | |
| 32 */ | |
| 33 | |
| 34 | |
| 35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ | |
| 36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_error
Name(status));}} | |
| 37 | |
| 38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ | |
| 39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LI
NE__, #expr);};} | |
| 40 | |
| 41 void RBBIAPITest::TestCloneEquals() | |
| 42 { | |
| 43 | |
| 44 UErrorCode status=U_ZERO_ERROR; | |
| 45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
| 46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
| 47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
| 48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); | |
| 49 if(U_FAILURE(status)){ | |
| 50 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
| 51 return; | |
| 52 } | |
| 53 | |
| 54 | |
| 55 UnicodeString testString="Testing word break iterators's clone() and equals(
)"; | |
| 56 bi1->setText(testString); | |
| 57 bi2->setText(testString); | |
| 58 biequal->setText(testString); | |
| 59 | |
| 60 bi3->setText("hello"); | |
| 61 | |
| 62 logln((UnicodeString)"Testing equals()"); | |
| 63 | |
| 64 logln((UnicodeString)"Testing == and !="); | |
| 65 UBool b = (*bi1 != *biequal); | |
| 66 b |= *bi1 == *bi2; | |
| 67 b |= *bi1 == *bi3; | |
| 68 if (b) { | |
| 69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
| 70 } | |
| 71 | |
| 72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
| 73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
| 74 | |
| 75 | |
| 76 // Quick test of RulesBasedBreakIterator assignment - | |
| 77 // Check that | |
| 78 // two different iterators are != | |
| 79 // they are == after assignment | |
| 80 // source and dest iterator produce the same next() after assignment. | |
| 81 // deleting one doesn't disable the other. | |
| 82 logln("Testing assignment"); | |
| 83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::creat
eLineInstance(Locale::getDefault(), status); | |
| 84 if(U_FAILURE(status)){ | |
| 85 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
| 86 return; | |
| 87 } | |
| 88 | |
| 89 RuleBasedBreakIterator biDefault, biDefault2; | |
| 90 if(U_FAILURE(status)){ | |
| 91 errln((UnicodeString)"FAIL : in construction of default iterator"); | |
| 92 return; | |
| 93 } | |
| 94 if (biDefault == *bix) { | |
| 95 errln((UnicodeString)"ERROR: iterators should not compare =="); | |
| 96 return; | |
| 97 } | |
| 98 if (biDefault != biDefault2) { | |
| 99 errln((UnicodeString)"ERROR: iterators should compare =="); | |
| 100 return; | |
| 101 } | |
| 102 | |
| 103 | |
| 104 UnicodeString HelloString("Hello Kitty"); | |
| 105 bix->setText(HelloString); | |
| 106 if (*bix == *bi2) { | |
| 107 errln(UnicodeString("ERROR: strings should not be equal before assignmen
t.")); | |
| 108 } | |
| 109 *bix = *bi2; | |
| 110 if (*bix != *bi2) { | |
| 111 errln(UnicodeString("ERROR: strings should be equal before assignment.")
); | |
| 112 } | |
| 113 | |
| 114 int bixnext = bix->next(); | |
| 115 int bi2next = bi2->next(); | |
| 116 if (! (bixnext == bi2next && bixnext == 7)) { | |
| 117 errln(UnicodeString("ERROR: iterators behaved differently after assignme
nt.")); | |
| 118 } | |
| 119 delete bix; | |
| 120 if (bi2->next() != 8) { | |
| 121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."
)); | |
| 122 } | |
| 123 | |
| 124 | |
| 125 | |
| 126 logln((UnicodeString)"Testing clone()"); | |
| 127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
| 128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
| 129 | |
| 130 if(*bi1clone != *bi1 || *bi1clone != *biequal || | |
| 131 *bi1clone == *bi3 || *bi1clone == *bi2) | |
| 132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
| 133 | |
| 134 if(*bi2clone == *bi1 || *bi2clone == *biequal || | |
| 135 *bi2clone == *bi3 || *bi2clone != *bi2) | |
| 136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
| 137 | |
| 138 if(bi1->getText() != bi1clone->getText() || | |
| 139 bi2clone->getText() != bi2->getText() || | |
| 140 *bi2clone == *bi1clone ) | |
| 141 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
| 142 | |
| 143 delete bi1clone; | |
| 144 delete bi2clone; | |
| 145 delete bi1; | |
| 146 delete bi3; | |
| 147 delete bi2; | |
| 148 delete biequal; | |
| 149 } | |
| 150 | |
| 151 void RBBIAPITest::TestBoilerPlate() | |
| 152 { | |
| 153 UErrorCode status = U_ZERO_ERROR; | |
| 154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); | |
| 155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status)
; | |
| 156 if (U_FAILURE(status)) { | |
| 157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s
tatus)); | |
| 158 return; | |
| 159 } | |
| 160 if(*a!=*b){ | |
| 161 errln("Failed: boilerplate method operator!= does not return correct res
ults"); | |
| 162 } | |
| 163 // Japanese word break iterators are identical to root with | |
| 164 // a dictionary-based break iterator | |
| 165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),statu
s); | |
| 166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),sta
tus); | |
| 167 if(c && d){ | |
| 168 if(*c!=*d){ | |
| 169 errln("Failed: boilerplate method operator== does not return correct
results"); | |
| 170 } | |
| 171 }else{ | |
| 172 errln("creation of break iterator failed"); | |
| 173 } | |
| 174 delete a; | |
| 175 delete b; | |
| 176 delete c; | |
| 177 delete d; | |
| 178 } | |
| 179 | |
| 180 void RBBIAPITest::TestgetRules() | |
| 181 { | |
| 182 UErrorCode status=U_ZERO_ERROR; | |
| 183 | |
| 184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createCharacterInstance(Locale::getDefault(), status); | |
| 185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createWordInstance(Locale::getDefault(), status); | |
| 186 if(U_FAILURE(status)){ | |
| 187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); | |
| 188 delete bi1; | |
| 189 delete bi2; | |
| 190 return; | |
| 191 } | |
| 192 | |
| 193 | |
| 194 | |
| 195 logln((UnicodeString)"Testing toString()"); | |
| 196 | |
| 197 bi1->setText((UnicodeString)"Hello there"); | |
| 198 | |
| 199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
| 200 | |
| 201 UnicodeString temp=bi1->getRules(); | |
| 202 UnicodeString temp2=bi2->getRules(); | |
| 203 UnicodeString temp3=bi3->getRules(); | |
| 204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(tem
p3) != 0) | |
| 205 errln((UnicodeString)"ERROR: error in getRules() method"); | |
| 206 | |
| 207 delete bi1; | |
| 208 delete bi2; | |
| 209 delete bi3; | |
| 210 } | |
| 211 void RBBIAPITest::TestHashCode() | |
| 212 { | |
| 213 UErrorCode status=U_ZERO_ERROR; | |
| 214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
| 215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); | |
| 216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); | |
| 217 if(U_FAILURE(status)){ | |
| 218 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
| 219 delete bi1; | |
| 220 delete bi2; | |
| 221 delete bi3; | |
| 222 return; | |
| 223 } | |
| 224 | |
| 225 | |
| 226 logln((UnicodeString)"Testing hashCode()"); | |
| 227 | |
| 228 bi1->setText((UnicodeString)"Hash code"); | |
| 229 bi2->setText((UnicodeString)"Hash code"); | |
| 230 bi3->setText((UnicodeString)"Hash code"); | |
| 231 | |
| 232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
| 233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
| 234 | |
| 235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashC
ode() || | |
| 236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->
hashCode()) | |
| 237 errln((UnicodeString)"ERROR: identical objects have different hashcodes"
); | |
| 238 | |
| 239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode()
|| | |
| 240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() ==
bi2->hashCode()) | |
| 241 errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
| 242 | |
| 243 delete bi1clone; | |
| 244 delete bi2clone; | |
| 245 delete bi1; | |
| 246 delete bi2; | |
| 247 delete bi3; | |
| 248 | |
| 249 } | |
| 250 void RBBIAPITest::TestGetSetAdoptText() | |
| 251 { | |
| 252 logln((UnicodeString)"Testing getText setText "); | |
| 253 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); | |
| 254 UnicodeString str1="first string."; | |
| 255 UnicodeString str2="Second string."; | |
| 256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); | |
| 257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createWordInstance(Locale::getDefault(), status)); | |
| 258 if(status.isFailure()){ | |
| 259 errcheckln(status, "Fail : in construction - %s", status.errorName()); | |
| 260 return; | |
| 261 } | |
| 262 | |
| 263 | |
| 264 CharacterIterator* text1= new StringCharacterIterator(str1); | |
| 265 CharacterIterator* text1Clone = text1->clone(); | |
| 266 CharacterIterator* text2= new StringCharacterIterator(str2); | |
| 267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "
ond str" | |
| 268 | |
| 269 wordIter1->setText(str1); | |
| 270 CharacterIterator *tci = &wordIter1->getText(); | |
| 271 UnicodeString tstr; | |
| 272 tci->getText(tstr); | |
| 273 TEST_ASSERT(tstr == str1); | |
| 274 if(wordIter1->current() != 0) | |
| 275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position
to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\
n"); | |
| 276 | |
| 277 wordIter1->next(2); | |
| 278 | |
| 279 wordIter1->setText(str2); | |
| 280 if(wordIter1->current() != 0) | |
| 281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration positi
on to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)
"\n"); | |
| 282 | |
| 283 | |
| 284 charIter1->adoptText(text1Clone); | |
| 285 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); | |
| 286 tci = &wordIter1->getText(); | |
| 287 tci->getText(tstr); | |
| 288 TEST_ASSERT(tstr == str2); | |
| 289 tci = &charIter1->getText(); | |
| 290 tci->getText(tstr); | |
| 291 TEST_ASSERT(tstr == str1); | |
| 292 | |
| 293 | |
| 294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->
clone()); | |
| 295 rb->adoptText(text1); | |
| 296 if(rb->getText() != *text1) | |
| 297 errln((UnicodeString)"ERROR:1 error in adoptText "); | |
| 298 rb->adoptText(text2); | |
| 299 if(rb->getText() != *text2) | |
| 300 errln((UnicodeString)"ERROR:2 error in adoptText "); | |
| 301 | |
| 302 // Adopt where iterator range is less than the entire orignal source string. | |
| 303 // (With the change of the break engine to working with UText internally, | |
| 304 // CharacterIterators starting at positions other than zero are not suppo
rted) | |
| 305 rb->adoptText(text3); | |
| 306 TEST_ASSERT(rb->preceding(2) == 0); | |
| 307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); | |
| 308 //if(rb->preceding(2) != 3) { | |
| 309 // errln((UnicodeString)"ERROR:3 error in adoptText "); | |
| 310 //} | |
| 311 //if(rb->following(11) != BreakIterator::DONE) { | |
| 312 // errln((UnicodeString)"ERROR:4 error in adoptText "); | |
| 313 //} | |
| 314 | |
| 315 // UText API | |
| 316 // | |
| 317 // Quick test to see if UText is working at all. | |
| 318 // | |
| 319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello w
orld" in UTF-8 */ | |
| 320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ | |
| 321 // 012345678901 | |
| 322 | |
| 323 status.reset(); | |
| 324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); | |
| 325 wordIter1->setText(ut.getAlias(), status); | |
| 326 TEST_ASSERT_SUCCESS(status); | |
| 327 | |
| 328 int32_t pos; | |
| 329 pos = wordIter1->first(); | |
| 330 TEST_ASSERT(pos==0); | |
| 331 pos = wordIter1->next(); | |
| 332 TEST_ASSERT(pos==5); | |
| 333 pos = wordIter1->next(); | |
| 334 TEST_ASSERT(pos==6); | |
| 335 pos = wordIter1->next(); | |
| 336 TEST_ASSERT(pos==11); | |
| 337 pos = wordIter1->next(); | |
| 338 TEST_ASSERT(pos==UBRK_DONE); | |
| 339 | |
| 340 status.reset(); | |
| 341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); | |
| 342 TEST_ASSERT_SUCCESS(status); | |
| 343 wordIter1->setText(ut2.getAlias(), status); | |
| 344 TEST_ASSERT_SUCCESS(status); | |
| 345 | |
| 346 pos = wordIter1->first(); | |
| 347 TEST_ASSERT(pos==0); | |
| 348 pos = wordIter1->next(); | |
| 349 TEST_ASSERT(pos==3); | |
| 350 pos = wordIter1->next(); | |
| 351 TEST_ASSERT(pos==4); | |
| 352 | |
| 353 pos = wordIter1->last(); | |
| 354 TEST_ASSERT(pos==6); | |
| 355 pos = wordIter1->previous(); | |
| 356 TEST_ASSERT(pos==4); | |
| 357 pos = wordIter1->previous(); | |
| 358 TEST_ASSERT(pos==3); | |
| 359 pos = wordIter1->previous(); | |
| 360 TEST_ASSERT(pos==0); | |
| 361 pos = wordIter1->previous(); | |
| 362 TEST_ASSERT(pos==UBRK_DONE); | |
| 363 | |
| 364 status.reset(); | |
| 365 UnicodeString sEmpty; | |
| 366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); | |
| 367 wordIter1->getUText(gut2.getAlias(), status); | |
| 368 TEST_ASSERT_SUCCESS(status); | |
| 369 status.reset(); | |
| 370 } | |
| 371 | |
| 372 | |
| 373 void RBBIAPITest::TestIteration() | |
| 374 { | |
| 375 // This test just verifies that the API is present. | |
| 376 // Testing for correct operation of the break rules happens elsewhere. | |
| 377 | |
| 378 UErrorCode status=U_ZERO_ERROR; | |
| 379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterato
r::createCharacterInstance(Locale::getDefault(), status); | |
| 380 if (U_FAILURE(status) || bi == NULL) { | |
| 381 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); | |
| 382 } | |
| 383 delete bi; | |
| 384 | |
| 385 status=U_ZERO_ERROR; | |
| 386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Lo
cale::getDefault(), status); | |
| 387 if (U_FAILURE(status) || bi == NULL) { | |
| 388 errcheckln(status, "Failure creating Word break iterator. Status = %s",
u_errorName(status)); | |
| 389 } | |
| 390 delete bi; | |
| 391 | |
| 392 status=U_ZERO_ERROR; | |
| 393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Lo
cale::getDefault(), status); | |
| 394 if (U_FAILURE(status) || bi == NULL) { | |
| 395 errcheckln(status, "Failure creating Line break iterator. Status = %s",
u_errorName(status)); | |
| 396 } | |
| 397 delete bi; | |
| 398 | |
| 399 status=U_ZERO_ERROR; | |
| 400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstanc
e(Locale::getDefault(), status); | |
| 401 if (U_FAILURE(status) || bi == NULL) { | |
| 402 errcheckln(status, "Failure creating Sentence break iterator. Status =
%s", u_errorName(status)); | |
| 403 } | |
| 404 delete bi; | |
| 405 | |
| 406 status=U_ZERO_ERROR; | |
| 407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(L
ocale::getDefault(), status); | |
| 408 if (U_FAILURE(status) || bi == NULL) { | |
| 409 errcheckln(status, "Failure creating Title break iterator. Status = %s"
, u_errorName(status)); | |
| 410 } | |
| 411 delete bi; | |
| 412 | |
| 413 status=U_ZERO_ERROR; | |
| 414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstan
ce(Locale::getDefault(), status); | |
| 415 if (U_FAILURE(status) || bi == NULL) { | |
| 416 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); | |
| 417 return; // Skip the rest of these tests. | |
| 418 } | |
| 419 | |
| 420 | |
| 421 UnicodeString testString="0123456789"; | |
| 422 bi->setText(testString); | |
| 423 | |
| 424 int32_t i; | |
| 425 i = bi->first(); | |
| 426 if (i != 0) { | |
| 427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
| 428 } | |
| 429 | |
| 430 i = bi->last(); | |
| 431 if (i != 10) { | |
| 432 errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
| 433 } | |
| 434 | |
| 435 // | |
| 436 // Previous | |
| 437 // | |
| 438 bi->last(); | |
| 439 i = bi->previous(); | |
| 440 if (i != 9) { | |
| 441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d",
__LINE__, i); | |
| 442 } | |
| 443 | |
| 444 | |
| 445 bi->first(); | |
| 446 i = bi->previous(); | |
| 447 if (i != BreakIterator::DONE) { | |
| 448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, g
ot %d", __LINE__, i); | |
| 449 } | |
| 450 | |
| 451 // | |
| 452 // next() | |
| 453 // | |
| 454 bi->first(); | |
| 455 i = bi->next(); | |
| 456 if (i != 1) { | |
| 457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d",
__LINE__, i); | |
| 458 } | |
| 459 | |
| 460 bi->last(); | |
| 461 i = bi->next(); | |
| 462 if (i != BreakIterator::DONE) { | |
| 463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %
d", __LINE__, i); | |
| 464 } | |
| 465 | |
| 466 | |
| 467 // | |
| 468 // current() | |
| 469 // | |
| 470 bi->first(); | |
| 471 i = bi->current(); | |
| 472 if (i != 0) { | |
| 473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); | |
| 474 } | |
| 475 | |
| 476 bi->next(); | |
| 477 i = bi->current(); | |
| 478 if (i != 1) { | |
| 479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got
%d", __LINE__, i); | |
| 480 } | |
| 481 | |
| 482 bi->last(); | |
| 483 bi->next(); | |
| 484 i = bi->current(); | |
| 485 if (i != 10) { | |
| 486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got
%d", __LINE__, i); | |
| 487 } | |
| 488 | |
| 489 bi->first(); | |
| 490 bi->previous(); | |
| 491 i = bi->current(); | |
| 492 if (i != 0) { | |
| 493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); | |
| 494 } | |
| 495 | |
| 496 | |
| 497 // | |
| 498 // Following() | |
| 499 // | |
| 500 i = bi->following(4); | |
| 501 if (i != 5) { | |
| 502 errln("Incorrect value from bi->following() at line %d. Expected 5, got
%d", __LINE__, i); | |
| 503 } | |
| 504 | |
| 505 i = bi->following(9); | |
| 506 if (i != 10) { | |
| 507 errln("Incorrect value from bi->following() at line %d. Expected 10, go
t %d", __LINE__, i); | |
| 508 } | |
| 509 | |
| 510 i = bi->following(10); | |
| 511 if (i != BreakIterator::DONE) { | |
| 512 errln("Incorrect value from bi->following() at line %d. Expected DONE,
got %d", __LINE__, i); | |
| 513 } | |
| 514 | |
| 515 | |
| 516 // | |
| 517 // Preceding | |
| 518 // | |
| 519 i = bi->preceding(4); | |
| 520 if (i != 3) { | |
| 521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got
%d", __LINE__, i); | |
| 522 } | |
| 523 | |
| 524 i = bi->preceding(10); | |
| 525 if (i != 9) { | |
| 526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got
%d", __LINE__, i); | |
| 527 } | |
| 528 | |
| 529 i = bi->preceding(1); | |
| 530 if (i != 0) { | |
| 531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got
%d", __LINE__, i); | |
| 532 } | |
| 533 | |
| 534 i = bi->preceding(0); | |
| 535 if (i != BreakIterator::DONE) { | |
| 536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE,
got %d", __LINE__, i); | |
| 537 } | |
| 538 | |
| 539 | |
| 540 // | |
| 541 // isBoundary() | |
| 542 // | |
| 543 bi->first(); | |
| 544 if (bi->isBoundary(3) != TRUE) { | |
| 545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE,
got FALSE", __LINE__, i); | |
| 546 } | |
| 547 i = bi->current(); | |
| 548 if (i != 3) { | |
| 549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %
d", __LINE__, i); | |
| 550 } | |
| 551 | |
| 552 | |
| 553 if (bi->isBoundary(11) != FALSE) { | |
| 554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE,
got TRUE", __LINE__, i); | |
| 555 } | |
| 556 i = bi->current(); | |
| 557 if (i != 10) { | |
| 558 errln("Incorrect value from bi->current() at line %d. Expected 10, got
%d", __LINE__, i); | |
| 559 } | |
| 560 | |
| 561 // | |
| 562 // next(n) | |
| 563 // | |
| 564 bi->first(); | |
| 565 i = bi->next(4); | |
| 566 if (i != 4) { | |
| 567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d",
__LINE__, i); | |
| 568 } | |
| 569 | |
| 570 i = bi->next(6); | |
| 571 if (i != 10) { | |
| 572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d"
, __LINE__, i); | |
| 573 } | |
| 574 | |
| 575 bi->first(); | |
| 576 i = bi->next(11); | |
| 577 if (i != BreakIterator::DONE) { | |
| 578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterat
or::DONE, got %d", __LINE__, i); | |
| 579 } | |
| 580 | |
| 581 delete bi; | |
| 582 | |
| 583 } | |
| 584 | |
| 585 | |
| 586 | |
| 587 | |
| 588 | |
| 589 | |
| 590 void RBBIAPITest::TestBuilder() { | |
| 591 UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
| 592 "$Numbers = [:N:];\n" | |
| 593 "$Letters+;\n" | |
| 594 "$Numbers+;\n" | |
| 595 "[^$Letters $Numbers];\n" | |
| 596 "!.*;\n"; | |
| 597 UnicodeString testString1 = "abc123..abc"; | |
| 598 // 01234567890 | |
| 599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
| 600 UErrorCode status=U_ZERO_ERROR; | |
| 601 UParseError parseError; | |
| 602 | |
| 603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
| 604 if(U_FAILURE(status)) { | |
| 605 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
| 606 } else { | |
| 607 bi->setText(testString1); | |
| 608 doBoundaryTest(*bi, testString1, bounds1); | |
| 609 } | |
| 610 delete bi; | |
| 611 } | |
| 612 | |
| 613 | |
| 614 // | |
| 615 // TestQuoteGrouping | |
| 616 // Single quotes within rules imply a grouping, so that a modifier | |
| 617 // following the quoted text (* or +) applies to all of the quoted chars. | |
| 618 // | |
| 619 void RBBIAPITest::TestQuoteGrouping() { | |
| 620 UnicodeString rulesString1 = "#Here comes the rule...\n" | |
| 621 "'$@!'*;\n" // (\$\@\!)* | |
| 622 ".;\n"; | |
| 623 | |
| 624 UnicodeString testString1 = "$@!$@!X$@!!X"; | |
| 625 // 0123456789012 | |
| 626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
| 627 UErrorCode status=U_ZERO_ERROR; | |
| 628 UParseError parseError; | |
| 629 | |
| 630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
| 631 if(U_FAILURE(status)) { | |
| 632 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
| 633 } else { | |
| 634 bi->setText(testString1); | |
| 635 doBoundaryTest(*bi, testString1, bounds1); | |
| 636 } | |
| 637 delete bi; | |
| 638 } | |
| 639 | |
| 640 // | |
| 641 // TestRuleStatus | |
| 642 // Test word break rule status constants. | |
| 643 // | |
| 644 void RBBIAPITest::TestRuleStatus() { | |
| 645 UChar str[30]; | |
| 646 //no longer test Han or hiragana breaking here: ruleStatusVec would return
nothing | |
| 647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO | |
| 648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ", | |
| 649 // 012345678901234567 8 9 0 | |
| 650 // Katakana | |
| 651 str, 30); | |
| 652 UnicodeString testString1(str); | |
| 653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; | |
| 654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE,
UBRK_WORD_LETTER, | |
| 655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
| 656 UBRK_WORD_IDEO, UBRK_WORD_NONE}; | |
| 657 | |
| 658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WOR
D_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
| 659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WOR
D_NONE_LIMIT, | |
| 660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; | |
| 661 | |
| 662 UErrorCode status=U_ZERO_ERROR; | |
| 663 | |
| 664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(),
status); | |
| 665 if(U_FAILURE(status)) { | |
| 666 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); | |
| 667 } else { | |
| 668 bi->setText(testString1); | |
| 669 // First test that the breaks are in the right spots. | |
| 670 doBoundaryTest(*bi, testString1, bounds1); | |
| 671 | |
| 672 // Then go back and check tag values | |
| 673 int32_t i = 0; | |
| 674 int32_t pos, tag; | |
| 675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i
++) { | |
| 676 if (pos != bounds1[i]) { | |
| 677 errln("FAIL: unexpected word break at postion %d", pos); | |
| 678 break; | |
| 679 } | |
| 680 tag = bi->getRuleStatus(); | |
| 681 if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
| 682 errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
| 683 break; | |
| 684 } | |
| 685 | |
| 686 // Check that we get the same tag values from getRuleStatusVec() | |
| 687 int32_t vec[10]; | |
| 688 int t = bi->getRuleStatusVec(vec, 10, status); | |
| 689 TEST_ASSERT_SUCCESS(status); | |
| 690 TEST_ASSERT(t==1); | |
| 691 TEST_ASSERT(vec[0] == tag); | |
| 692 } | |
| 693 } | |
| 694 delete bi; | |
| 695 | |
| 696 // Now test line break status. This test mostly is to confirm that the sta
tus constants | |
| 697 // are correctly declared in the header. | |
| 698 testString1 = "test line. \n"; | |
| 699 // break type s s h | |
| 700 | |
| 701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
| 702 if(U_FAILURE(status)) { | |
| 703 errcheckln(status, "failed to create word break iterator. - %s", u_erro
rName(status)); | |
| 704 } else { | |
| 705 int32_t i = 0; | |
| 706 int32_t pos, tag; | |
| 707 UBool success; | |
| 708 | |
| 709 bi->setText(testString1); | |
| 710 pos = bi->current(); | |
| 711 tag = bi->getRuleStatus(); | |
| 712 for (i=0; i<3; i++) { | |
| 713 switch (i) { | |
| 714 case 0: | |
| 715 success = pos==0 && tag==UBRK_LINE_SOFT; break; | |
| 716 case 1: | |
| 717 success = pos==5 && tag==UBRK_LINE_SOFT; break; | |
| 718 case 2: | |
| 719 success = pos==12 && tag==UBRK_LINE_HARD; break; | |
| 720 default: | |
| 721 success = FALSE; break; | |
| 722 } | |
| 723 if (success == FALSE) { | |
| 724 errln("Fail: incorrect word break status or position. i=%d, po
s=%d, tag=%d", | |
| 725 i, pos, tag); | |
| 726 break; | |
| 727 } | |
| 728 pos = bi->next(); | |
| 729 tag = bi->getRuleStatus(); | |
| 730 } | |
| 731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || | |
| 732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || | |
| 733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT
_LIMIT)) { | |
| 734 errln("UBRK_LINE_* constants from header are inconsistent."); | |
| 735 } | |
| 736 } | |
| 737 delete bi; | |
| 738 | |
| 739 } | |
| 740 | |
| 741 | |
| 742 // | |
| 743 // TestRuleStatusVec | |
| 744 // Test the vector form of break rule status. | |
| 745 // | |
| 746 void RBBIAPITest::TestRuleStatusVec() { | |
| 747 UnicodeString rulesString( "[A-N]{100}; \n" | |
| 748 "[a-w]{200}; \n" | |
| 749 "[\\p{L}]{300}; \n" | |
| 750 "[\\p{N}]{400}; \n" | |
| 751 "[0-5]{500}; \n" | |
| 752 "!.*;\n", -1, US_INV); | |
| 753 UnicodeString testString1 = "Aapz5?"; | |
| 754 int32_t statusVals[10]; | |
| 755 int32_t numStatuses; | |
| 756 int32_t pos; | |
| 757 | |
| 758 UErrorCode status=U_ZERO_ERROR; | |
| 759 UParseError parseError; | |
| 760 | |
| 761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseE
rror, status); | |
| 762 if (U_FAILURE(status)) { | |
| 763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__
, u_errorName(status)); | |
| 764 } else { | |
| 765 bi->setText(testString1); | |
| 766 | |
| 767 // A | |
| 768 pos = bi->next(); | |
| 769 TEST_ASSERT(pos==1); | |
| 770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 771 TEST_ASSERT_SUCCESS(status); | |
| 772 TEST_ASSERT(numStatuses == 2); | |
| 773 TEST_ASSERT(statusVals[0] == 100); | |
| 774 TEST_ASSERT(statusVals[1] == 300); | |
| 775 | |
| 776 // a | |
| 777 pos = bi->next(); | |
| 778 TEST_ASSERT(pos==2); | |
| 779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 780 TEST_ASSERT_SUCCESS(status); | |
| 781 TEST_ASSERT(numStatuses == 2); | |
| 782 TEST_ASSERT(statusVals[0] == 200); | |
| 783 TEST_ASSERT(statusVals[1] == 300); | |
| 784 | |
| 785 // p | |
| 786 pos = bi->next(); | |
| 787 TEST_ASSERT(pos==3); | |
| 788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 789 TEST_ASSERT_SUCCESS(status); | |
| 790 TEST_ASSERT(numStatuses == 2); | |
| 791 TEST_ASSERT(statusVals[0] == 200); | |
| 792 TEST_ASSERT(statusVals[1] == 300); | |
| 793 | |
| 794 // z | |
| 795 pos = bi->next(); | |
| 796 TEST_ASSERT(pos==4); | |
| 797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 798 TEST_ASSERT_SUCCESS(status); | |
| 799 TEST_ASSERT(numStatuses == 1); | |
| 800 TEST_ASSERT(statusVals[0] == 300); | |
| 801 | |
| 802 // 5 | |
| 803 pos = bi->next(); | |
| 804 TEST_ASSERT(pos==5); | |
| 805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 806 TEST_ASSERT_SUCCESS(status); | |
| 807 TEST_ASSERT(numStatuses == 2); | |
| 808 TEST_ASSERT(statusVals[0] == 400); | |
| 809 TEST_ASSERT(statusVals[1] == 500); | |
| 810 | |
| 811 // ? | |
| 812 pos = bi->next(); | |
| 813 TEST_ASSERT(pos==6); | |
| 814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
| 815 TEST_ASSERT_SUCCESS(status); | |
| 816 TEST_ASSERT(numStatuses == 1); | |
| 817 TEST_ASSERT(statusVals[0] == 0); | |
| 818 | |
| 819 // | |
| 820 // Check buffer overflow error handling. Char == A | |
| 821 // | |
| 822 bi->first(); | |
| 823 pos = bi->next(); | |
| 824 TEST_ASSERT(pos==1); | |
| 825 memset(statusVals, -1, sizeof(statusVals)); | |
| 826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); | |
| 827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
| 828 TEST_ASSERT(numStatuses == 2); | |
| 829 TEST_ASSERT(statusVals[0] == -1); | |
| 830 | |
| 831 status = U_ZERO_ERROR; | |
| 832 memset(statusVals, -1, sizeof(statusVals)); | |
| 833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); | |
| 834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
| 835 TEST_ASSERT(numStatuses == 2); | |
| 836 TEST_ASSERT(statusVals[0] == 100); | |
| 837 TEST_ASSERT(statusVals[1] == -1); | |
| 838 | |
| 839 status = U_ZERO_ERROR; | |
| 840 memset(statusVals, -1, sizeof(statusVals)); | |
| 841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); | |
| 842 TEST_ASSERT_SUCCESS(status); | |
| 843 TEST_ASSERT(numStatuses == 2); | |
| 844 TEST_ASSERT(statusVals[0] == 100); | |
| 845 TEST_ASSERT(statusVals[1] == 300); | |
| 846 TEST_ASSERT(statusVals[2] == -1); | |
| 847 } | |
| 848 delete bi; | |
| 849 | |
| 850 } | |
| 851 | |
| 852 // | |
| 853 // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
| 854 // $variable reference | |
| 855 void RBBIAPITest::TestBug2190() { | |
| 856 UnicodeString rulesString1 = "$aaa = abcd;\n" | |
| 857 "$bbb = $aaa;\n" | |
| 858 "$bbb;\n"; | |
| 859 UnicodeString testString1 = "abcdabcd"; | |
| 860 // 01234567890 | |
| 861 int32_t bounds1[] = {0, 4, 8}; | |
| 862 UErrorCode status=U_ZERO_ERROR; | |
| 863 UParseError parseError; | |
| 864 | |
| 865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); | |
| 866 if(U_FAILURE(status)) { | |
| 867 dataerrln("Fail : in construction - %s", u_errorName(status)); | |
| 868 } else { | |
| 869 bi->setText(testString1); | |
| 870 doBoundaryTest(*bi, testString1, bounds1); | |
| 871 } | |
| 872 delete bi; | |
| 873 } | |
| 874 | |
| 875 | |
| 876 void RBBIAPITest::TestRegistration() { | |
| 877 #if !UCONFIG_NO_SERVICE | |
| 878 UErrorCode status = U_ZERO_ERROR; | |
| 879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); | |
| 880 // ok to not delete these if we exit because of error? | |
| 881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", sta
tus); | |
| 882 BreakIterator* root_word = BreakIterator::createWordInstance("", status); | |
| 883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status
); | |
| 884 | |
| 885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { | |
| 886 dataerrln("Error creating instances of break interactors - %s", u_errorN
ame(status)); | |
| 887 | |
| 888 delete ja_word; | |
| 889 delete ja_char; | |
| 890 delete root_word; | |
| 891 delete root_char; | |
| 892 | |
| 893 return; | |
| 894 } | |
| 895 | |
| 896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD,
status); | |
| 897 { | |
| 898 #if 0 // With a dictionary based word breaking, ja_word is identical to root. | |
| 899 if (ja_word && *ja_word == *root_word) { | |
| 900 errln("japan not different from root"); | |
| 901 } | |
| 902 #endif | |
| 903 } | |
| 904 | |
| 905 { | |
| 906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", statu
s); | |
| 907 UBool fail = TRUE; | |
| 908 if(result){ | |
| 909 fail = *result != *ja_word; | |
| 910 } | |
| 911 delete result; | |
| 912 if (fail) { | |
| 913 errln("bad result for xx_XX/word"); | |
| 914 } | |
| 915 } | |
| 916 | |
| 917 { | |
| 918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP",
status); | |
| 919 UBool fail = TRUE; | |
| 920 if(result){ | |
| 921 fail = *result != *ja_char; | |
| 922 } | |
| 923 delete result; | |
| 924 if (fail) { | |
| 925 errln("bad result for ja_JP/char"); | |
| 926 } | |
| 927 } | |
| 928 | |
| 929 { | |
| 930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX",
status); | |
| 931 UBool fail = TRUE; | |
| 932 if(result){ | |
| 933 fail = *result != *root_char; | |
| 934 } | |
| 935 delete result; | |
| 936 if (fail) { | |
| 937 errln("bad result for xx_XX/char"); | |
| 938 } | |
| 939 } | |
| 940 | |
| 941 { | |
| 942 StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
| 943 UBool found = FALSE; | |
| 944 const UnicodeString* p; | |
| 945 while ((p = avail->snext(status))) { | |
| 946 if (p->compare("xx") == 0) { | |
| 947 found = TRUE; | |
| 948 break; | |
| 949 } | |
| 950 } | |
| 951 delete avail; | |
| 952 if (!found) { | |
| 953 errln("did not find test locale"); | |
| 954 } | |
| 955 } | |
| 956 | |
| 957 { | |
| 958 UBool unreg = BreakIterator::unregister(key, status); | |
| 959 if (!unreg) { | |
| 960 errln("unable to unregister"); | |
| 961 } | |
| 962 } | |
| 963 | |
| 964 { | |
| 965 BreakIterator* result = BreakIterator::createWordInstance("en_US", statu
s); | |
| 966 BreakIterator* root = BreakIterator::createWordInstance("", status); | |
| 967 UBool fail = TRUE; | |
| 968 if(root){ | |
| 969 fail = *root != *result; | |
| 970 } | |
| 971 delete root; | |
| 972 delete result; | |
| 973 if (fail) { | |
| 974 errln("did not get root break"); | |
| 975 } | |
| 976 } | |
| 977 | |
| 978 { | |
| 979 StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
| 980 UBool found = FALSE; | |
| 981 const UnicodeString* p; | |
| 982 while ((p = avail->snext(status))) { | |
| 983 if (p->compare("xx") == 0) { | |
| 984 found = TRUE; | |
| 985 break; | |
| 986 } | |
| 987 } | |
| 988 delete avail; | |
| 989 if (found) { | |
| 990 errln("found test locale"); | |
| 991 } | |
| 992 } | |
| 993 | |
| 994 { | |
| 995 int32_t count; | |
| 996 UBool foundLocale = FALSE; | |
| 997 const Locale *avail = BreakIterator::getAvailableLocales(count); | |
| 998 for (int i=0; i<count; i++) { | |
| 999 if (avail[i] == Locale::getEnglish()) { | |
| 1000 foundLocale = TRUE; | |
| 1001 break; | |
| 1002 } | |
| 1003 } | |
| 1004 if (foundLocale == FALSE) { | |
| 1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN
."); | |
| 1006 } | |
| 1007 } | |
| 1008 | |
| 1009 | |
| 1010 // ja_word was adopted by factory | |
| 1011 delete ja_char; | |
| 1012 delete root_word; | |
| 1013 delete root_char; | |
| 1014 #endif | |
| 1015 } | |
| 1016 | |
| 1017 void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
| 1018 UErrorCode status = U_ZERO_ERROR; | |
| 1019 UParseError parseError; | |
| 1020 parseError.line = 0; | |
| 1021 parseError.offset = 0; | |
| 1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &
status)); | |
| 1023 uint32_t length; | |
| 1024 const UChar *builtSource; | |
| 1025 const uint8_t *rbbiRules; | |
| 1026 const uint8_t *builtRules; | |
| 1027 | |
| 1028 if (U_FAILURE(status)) { | |
| 1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(statu
s)); | |
| 1030 return; | |
| 1031 } | |
| 1032 | |
| 1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); | |
| 1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fR
uleSource); | |
| 1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, par
seError, status); | |
| 1036 if (U_FAILURE(status)) { | |
| 1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, colum
n %d\n", | |
| 1038 u_errorName(status), parseError.line, parseError.offset); | |
| 1039 return; | |
| 1040 }; | |
| 1041 rbbiRules = brkItr->getBinaryRules(length); | |
| 1042 logln("Comparing \"%s\" len=%d", dataFile, length); | |
| 1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
| 1044 errln("Built rules and rebuilt rules are different %s", dataFile); | |
| 1045 return; | |
| 1046 } | |
| 1047 delete brkItr; | |
| 1048 } | |
| 1049 | |
| 1050 void RBBIAPITest::TestRoundtripRules() { | |
| 1051 RoundtripRule("word"); | |
| 1052 RoundtripRule("title"); | |
| 1053 RoundtripRule("sent"); | |
| 1054 RoundtripRule("line"); | |
| 1055 RoundtripRule("char"); | |
| 1056 if (!quick) { | |
| 1057 RoundtripRule("word_POSIX"); | |
| 1058 } | |
| 1059 } | |
| 1060 | |
| 1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* | |
| 1062 // (these are protected so we access them via a local class RBBIWithProtectedFun
ctions). | |
| 1063 // This is just a sanity check, not a thorough test (e.g. we don't check that th
e | |
| 1064 // first delete actually frees rulesCopy). | |
| 1065 void RBBIAPITest::TestCreateFromRBBIData() { | |
| 1066 // Get some handy RBBIData | |
| 1067 const char *brkName = "word"; // or "sent", "line", "char", etc. | |
| 1068 UErrorCode status = U_ZERO_ERROR; | |
| 1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &s
tatus)); | |
| 1070 if ( U_SUCCESS(status) ) { | |
| 1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMem
ory(data.getAlias()); | |
| 1072 uint32_t length = builtRules->fLength; | |
| 1073 RBBIWithProtectedFunctions * brkItr; | |
| 1074 | |
| 1075 // Try the memory-adopting constructor, need to copy the data first | |
| 1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); | |
| 1077 if ( rulesCopy ) { | |
| 1078 uprv_memcpy( rulesCopy, builtRules, length ); | |
| 1079 | |
| 1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); | |
| 1081 if ( U_SUCCESS(status) ) { | |
| 1082 delete brkItr; // this should free rulesCopy | |
| 1083 } else { | |
| 1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): IC
U Error \"%s\"\n", u_errorName(status) ); | |
| 1085 status = U_ZERO_ERROR;// reset for the next test | |
| 1086 uprv_free( rulesCopy ); | |
| 1087 } | |
| 1088 } | |
| 1089 | |
| 1090 // Now try the non-adopting constructor | |
| 1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFun
ctions::kDontAdopt, status); | |
| 1092 if ( U_SUCCESS(status) ) { | |
| 1093 delete brkItr; // this should NOT attempt to free builtRules | |
| 1094 if (builtRules->fLength != length) { // sanity check | |
| 1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted)
: delete affects data\n" ); | |
| 1096 } | |
| 1097 } else { | |
| 1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): IC
U Error \"%s\"\n", u_errorName(status) ); | |
| 1099 } | |
| 1100 } | |
| 1101 | |
| 1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) | |
| 1103 // | |
| 1104 status = U_ZERO_ERROR; | |
| 1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::create
WordInstance(Locale::getEnglish(), status); | |
| 1106 if (rb == NULL || U_FAILURE(status)) { | |
| 1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::g
etEnglish) - %s", u_errorName(status)); | |
| 1108 } else { | |
| 1109 uint32_t length; | |
| 1110 const uint8_t *rules = rb->getBinaryRules(length); | |
| 1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length,
status); | |
| 1112 TEST_ASSERT_SUCCESS(status); | |
| 1113 TEST_ASSERT(*rb == *rb2); | |
| 1114 UnicodeString words = "one two three "; | |
| 1115 rb2->setText(words); | |
| 1116 int wordCounter = 0; | |
| 1117 while (rb2->next() != UBRK_DONE) { | |
| 1118 wordCounter++; | |
| 1119 } | |
| 1120 TEST_ASSERT(wordCounter == 6); | |
| 1121 | |
| 1122 status = U_ZERO_ERROR; | |
| 1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1
, status); | |
| 1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
| 1125 | |
| 1126 delete rb; | |
| 1127 delete rb2; | |
| 1128 delete rb3; | |
| 1129 } | |
| 1130 } | |
| 1131 | |
| 1132 | |
| 1133 void RBBIAPITest::TestRefreshInputText() { | |
| 1134 /* | |
| 1135 * RefreshInput changes out the input of a Break Iterator without | |
| 1136 * changing anything else in the iterator's state. Used with Java JNI, | |
| 1137 * when Java moves the underlying string storage. This test | |
| 1138 * runs BreakIterator::next() repeatedly, moving the text in the middle o
f the sequence. | |
| 1139 * The right set of boundaries should still be found. | |
| 1140 */ | |
| 1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /
* = " A B C D" */ | |
| 1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; | |
| 1143 UErrorCode status = U_ZERO_ERROR; | |
| 1144 UText ut1 = UTEXT_INITIALIZER; | |
| 1145 UText ut2 = UTEXT_INITIALIZER; | |
| 1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::create
LineInstance(Locale::getEnglish(), status); | |
| 1147 TEST_ASSERT_SUCCESS(status); | |
| 1148 | |
| 1149 utext_openUChars(&ut1, testStr, -1, &status); | |
| 1150 TEST_ASSERT_SUCCESS(status); | |
| 1151 | |
| 1152 if (U_SUCCESS(status)) { | |
| 1153 bi->setText(&ut1, status); | |
| 1154 TEST_ASSERT_SUCCESS(status); | |
| 1155 | |
| 1156 /* Line boundaries will occur before each letter in the original string
*/ | |
| 1157 TEST_ASSERT(1 == bi->next()); | |
| 1158 TEST_ASSERT(3 == bi->next()); | |
| 1159 | |
| 1160 /* Move the string, kill the original string. */ | |
| 1161 u_strcpy(movedStr, testStr); | |
| 1162 u_memset(testStr, 0x20, u_strlen(testStr)); | |
| 1163 utext_openUChars(&ut2, movedStr, -1, &status); | |
| 1164 TEST_ASSERT_SUCCESS(status); | |
| 1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status)
; | |
| 1166 TEST_ASSERT_SUCCESS(status); | |
| 1167 TEST_ASSERT(bi == returnedBI); | |
| 1168 | |
| 1169 /* Find the following matches, now working in the moved string. */ | |
| 1170 TEST_ASSERT(5 == bi->next()); | |
| 1171 TEST_ASSERT(7 == bi->next()); | |
| 1172 TEST_ASSERT(8 == bi->next()); | |
| 1173 TEST_ASSERT(UBRK_DONE == bi->next()); | |
| 1174 | |
| 1175 utext_close(&ut1); | |
| 1176 utext_close(&ut2); | |
| 1177 } | |
| 1178 delete bi; | |
| 1179 | |
| 1180 } | |
| 1181 | |
| 1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE
AK_ITERATION | |
| 1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it)
{ | |
| 1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular b
rackets | |
| 1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'")); | |
| 1186 | |
| 1187 int32_t *pos = new int32_t[ustr.length()]; | |
| 1188 int32_t posCount = 0; | |
| 1189 | |
| 1190 // calculate breaks up front, so we can print out | |
| 1191 // sans any debugging | |
| 1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) { | |
| 1193 pos[posCount++] = n; | |
| 1194 if(posCount>=ustr.length()) { | |
| 1195 it.errln("brk count exceeds string length!"); | |
| 1196 return; | |
| 1197 } | |
| 1198 } | |
| 1199 UnicodeString out; | |
| 1200 out.append((UChar)CHSTR); | |
| 1201 int32_t prev = 0; | |
| 1202 for(int32_t i=0;i<posCount;i++) { | |
| 1203 int32_t n=pos[i]; | |
| 1204 out.append(ustr.tempSubString(prev,n-prev)); | |
| 1205 out.append((UChar)PILCROW); | |
| 1206 prev=n; | |
| 1207 } | |
| 1208 out.append(ustr.tempSubString(prev,ustr.length()-prev)); | |
| 1209 out.append((UChar)CHEND); | |
| 1210 it.logln(out); | |
| 1211 | |
| 1212 out.remove(); | |
| 1213 for(int32_t i=0;i<posCount;i++) { | |
| 1214 char tmp[100]; | |
| 1215 sprintf(tmp,"%d ",pos[i]); | |
| 1216 out.append(UnicodeString(tmp)); | |
| 1217 } | |
| 1218 it.logln(out); | |
| 1219 delete [] pos; | |
| 1220 } | |
| 1221 #endif | |
| 1222 | |
| 1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() { | |
| 1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BRE
AK_ITERATION | |
| 1225 UErrorCode status = U_ZERO_ERROR; | |
| 1226 LocalPointer<FilteredBreakIteratorBuilder> builder; | |
| 1227 LocalPointer<BreakIterator> baseBI; | |
| 1228 LocalPointer<BreakIterator> filteredBI; | |
| 1229 LocalPointer<BreakIterator> frenchBI; | |
| 1230 | |
| 1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small sh
ip, which he had now recovered. Capt. Gorges, who informed the Sgt. here that on
e purpose of his going east was to meet with Mr. Weston, took this opportunity t
o call him to account for some abuses he had to lay to his charge."); // (Willia
m Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - ed
ited. | |
| 1232 const UnicodeString ABBR_MR("Mr."); | |
| 1233 const UnicodeString ABBR_CAPT("Capt."); | |
| 1234 | |
| 1235 { | |
| 1236 logln("Constructing empty builder\n"); | |
| 1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
| 1238 TEST_ASSERT_SUCCESS(status); | |
| 1239 | |
| 1240 logln("Constructing base BI\n"); | |
| 1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
| 1242 TEST_ASSERT_SUCCESS(status); | |
| 1243 | |
| 1244 logln("Building new BI\n"); | |
| 1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1246 TEST_ASSERT_SUCCESS(status); | |
| 1247 | |
| 1248 if (U_SUCCESS(status)) { | |
| 1249 logln("Testing:"); | |
| 1250 filteredBI->setText(text); | |
| 1251 TEST_ASSERT(20 == filteredBI->next()); // Mr. | |
| 1252 TEST_ASSERT(84 == filteredBI->next()); // recovered. | |
| 1253 TEST_ASSERT(90 == filteredBI->next()); // Capt. | |
| 1254 TEST_ASSERT(181 == filteredBI->next()); // Mr. | |
| 1255 TEST_ASSERT(278 == filteredBI->next()); // charge. | |
| 1256 filteredBI->first(); | |
| 1257 prtbrks(filteredBI.getAlias(), text, *this); | |
| 1258 } | |
| 1259 } | |
| 1260 | |
| 1261 { | |
| 1262 logln("Constructing empty builder\n"); | |
| 1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
| 1264 TEST_ASSERT_SUCCESS(status); | |
| 1265 | |
| 1266 if (U_SUCCESS(status)) { | |
| 1267 logln("Adding Mr. as an exception\n"); | |
| 1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
| 1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // a
lready have it | |
| 1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status)); | |
| 1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); //
already removed it | |
| 1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
| 1273 TEST_ASSERT_SUCCESS(status); | |
| 1274 | |
| 1275 logln("Constructing base BI\n"); | |
| 1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng
lish(), status)); | |
| 1277 TEST_ASSERT_SUCCESS(status); | |
| 1278 | |
| 1279 logln("Building new BI\n"); | |
| 1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1281 TEST_ASSERT_SUCCESS(status); | |
| 1282 | |
| 1283 logln("Testing:"); | |
| 1284 filteredBI->setText(text); | |
| 1285 TEST_ASSERT(84 == filteredBI->next()); | |
| 1286 TEST_ASSERT(90 == filteredBI->next());// Capt. | |
| 1287 TEST_ASSERT(278 == filteredBI->next()); | |
| 1288 filteredBI->first(); | |
| 1289 prtbrks(filteredBI.getAlias(), text, *this); | |
| 1290 } | |
| 1291 } | |
| 1292 | |
| 1293 | |
| 1294 { | |
| 1295 logln("Constructing empty builder\n"); | |
| 1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status)); | |
| 1297 TEST_ASSERT_SUCCESS(status); | |
| 1298 | |
| 1299 if (U_SUCCESS(status)) { | |
| 1300 logln("Adding Mr. and Capt as an exception\n"); | |
| 1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status)); | |
| 1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status)); | |
| 1303 TEST_ASSERT_SUCCESS(status); | |
| 1304 | |
| 1305 logln("Constructing base BI\n"); | |
| 1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEng
lish(), status)); | |
| 1307 TEST_ASSERT_SUCCESS(status); | |
| 1308 | |
| 1309 logln("Building new BI\n"); | |
| 1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1311 TEST_ASSERT_SUCCESS(status); | |
| 1312 | |
| 1313 logln("Testing:"); | |
| 1314 filteredBI->setText(text); | |
| 1315 TEST_ASSERT(84 == filteredBI->next()); | |
| 1316 TEST_ASSERT(278 == filteredBI->next()); | |
| 1317 filteredBI->first(); | |
| 1318 prtbrks(filteredBI.getAlias(), text, *this); | |
| 1319 } | |
| 1320 } | |
| 1321 | |
| 1322 | |
| 1323 { | |
| 1324 logln("Constructing English builder\n"); | |
| 1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tEnglish(), status)); | |
| 1326 TEST_ASSERT_SUCCESS(status); | |
| 1327 | |
| 1328 logln("Constructing base BI\n"); | |
| 1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
| 1330 TEST_ASSERT_SUCCESS(status); | |
| 1331 | |
| 1332 if (U_SUCCESS(status)) { | |
| 1333 logln("unsuppressing 'Capt'"); | |
| 1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status)); | |
| 1335 | |
| 1336 logln("Building new BI\n"); | |
| 1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1338 TEST_ASSERT_SUCCESS(status); | |
| 1339 | |
| 1340 if(filteredBI.isValid()) { | |
| 1341 logln("Testing:"); | |
| 1342 filteredBI->setText(text); | |
| 1343 TEST_ASSERT(84 == filteredBI->next()); | |
| 1344 TEST_ASSERT(90 == filteredBI->next()); | |
| 1345 TEST_ASSERT(278 == filteredBI->next()); | |
| 1346 filteredBI->first(); | |
| 1347 prtbrks(filteredBI.getAlias(), text, *this); | |
| 1348 } | |
| 1349 } | |
| 1350 } | |
| 1351 | |
| 1352 | |
| 1353 { | |
| 1354 logln("Constructing English builder\n"); | |
| 1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tEnglish(), status)); | |
| 1356 TEST_ASSERT_SUCCESS(status); | |
| 1357 | |
| 1358 logln("Constructing base BI\n"); | |
| 1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish
(), status)); | |
| 1360 TEST_ASSERT_SUCCESS(status); | |
| 1361 | |
| 1362 if (U_SUCCESS(status)) { | |
| 1363 logln("Building new BI\n"); | |
| 1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1365 TEST_ASSERT_SUCCESS(status); | |
| 1366 | |
| 1367 if(filteredBI.isValid()) { | |
| 1368 logln("Testing:"); | |
| 1369 filteredBI->setText(text); | |
| 1370 TEST_ASSERT(84 == filteredBI->next()); | |
| 1371 TEST_ASSERT(278 == filteredBI->next()); | |
| 1372 filteredBI->first(); | |
| 1373 prtbrks(filteredBI.getAlias(), text, *this); | |
| 1374 } | |
| 1375 } | |
| 1376 } | |
| 1377 | |
| 1378 // reenable once french is in | |
| 1379 { | |
| 1380 logln("Constructing French builder"); | |
| 1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::ge
tFrench(), status)); | |
| 1382 TEST_ASSERT_SUCCESS(status); | |
| 1383 | |
| 1384 logln("Constructing base BI\n"); | |
| 1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(
), status)); | |
| 1386 TEST_ASSERT_SUCCESS(status); | |
| 1387 | |
| 1388 if (U_SUCCESS(status)) { | |
| 1389 logln("Building new BI\n"); | |
| 1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status)); | |
| 1391 TEST_ASSERT_SUCCESS(status); | |
| 1392 } | |
| 1393 | |
| 1394 if(frenchBI.isValid()) { | |
| 1395 logln("Testing:"); | |
| 1396 UnicodeString frText("C'est MM. Duval."); | |
| 1397 frenchBI->setText(frText); | |
| 1398 TEST_ASSERT(16 == frenchBI->next()); | |
| 1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next()); | |
| 1400 frenchBI->first(); | |
| 1401 prtbrks(frenchBI.getAlias(), frText, *this); | |
| 1402 logln("Testing against English:"); | |
| 1403 filteredBI->setText(frText); | |
| 1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI i
s english. | |
| 1405 TEST_ASSERT(16 == filteredBI->next()); | |
| 1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next()); | |
| 1407 filteredBI->first(); | |
| 1408 prtbrks(filteredBI.getAlias(), frText, *this); | |
| 1409 | |
| 1410 // Verify == | |
| 1411 TEST_ASSERT_TRUE(*frenchBI == *frenchBI); | |
| 1412 TEST_ASSERT_TRUE(*filteredBI != *frenchBI); | |
| 1413 TEST_ASSERT_TRUE(*frenchBI != *filteredBI); | |
| 1414 } else { | |
| 1415 dataerrln("French BI: not valid."); | |
| 1416 } | |
| 1417 } | |
| 1418 | |
| 1419 #else | |
| 1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCON
FIG_NO_FILTERED_BREAK_ITERATION"); | |
| 1421 #endif | |
| 1422 } | |
| 1423 | |
| 1424 //--------------------------------------------- | |
| 1425 // runIndexedTest | |
| 1426 //--------------------------------------------- | |
| 1427 | |
| 1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) | |
| 1429 { | |
| 1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
| 1431 switch (index) { | |
| 1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break
; | |
| 1433 #if !UCONFIG_NO_FILE_IO | |
| 1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; | |
| 1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
| 1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
| 1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText();
break; | |
| 1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
| 1439 #else | |
| 1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; | |
| 1441 #endif | |
| 1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; | |
| 1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); brea
k; | |
| 1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); brea
k; | |
| 1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
| 1446 #if !UCONFIG_NO_FILE_IO | |
| 1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
| 1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
| 1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; | |
| 1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); br
eak; | |
| 1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIDa
ta(); break; | |
| 1452 #else | |
| 1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; | |
| 1454 #endif | |
| 1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText()
; break; | |
| 1456 | |
| 1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING | |
| 1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBre
akIteratorBuilder(); break; | |
| 1459 #else | |
| 1460 case 15: name="skip"; break; | |
| 1461 #endif | |
| 1462 default: name = ""; break; // needed to end loop | |
| 1463 } | |
| 1464 } | |
| 1465 | |
| 1466 //--------------------------------------------- | |
| 1467 //Internal subroutines | |
| 1468 //--------------------------------------------- | |
| 1469 | |
| 1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t
*boundaries){ | |
| 1471 logln((UnicodeString)"testIsBoundary():"); | |
| 1472 int32_t p = 0; | |
| 1473 UBool isB; | |
| 1474 for (int32_t i = 0; i < text.length(); i++) { | |
| 1475 isB = bi.isBoundary(i); | |
| 1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
| 1477 | |
| 1478 if (i == boundaries[p]) { | |
| 1479 if (!isB) | |
| 1480 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected true, got false"); | |
| 1481 p++; | |
| 1482 } | |
| 1483 else { | |
| 1484 if (isB) | |
| 1485 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected false, got true"); | |
| 1486 } | |
| 1487 } | |
| 1488 } | |
| 1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof
fset, int32_t expectedOffset, const char* expectedString){ | |
| 1490 UnicodeString selected; | |
| 1491 UnicodeString expected=CharsToUnicodeString(expectedString); | |
| 1492 | |
| 1493 if(gotoffset != expectedOffset) | |
| 1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeStrin
g)" instead of #" + expectedOffset); | |
| 1495 if(start <= gotoffset){ | |
| 1496 testString.extractBetween(start, gotoffset, selected); | |
| 1497 } | |
| 1498 else{ | |
| 1499 testString.extractBetween(gotoffset, start, selected); | |
| 1500 } | |
| 1501 if(selected.compare(expected) != 0) | |
| 1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\"
instead of \"" + expected + "\"")); | |
| 1503 else | |
| 1504 logln(prettify("****selected \"" + selected + "\"")); | |
| 1505 } | |
| 1506 | |
| 1507 //--------------------------------------------- | |
| 1508 //RBBIWithProtectedFunctions class functions | |
| 1509 //--------------------------------------------- | |
| 1510 | |
| 1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UEr
rorCode &status) | |
| 1512 : RuleBasedBreakIterator(data, status) | |
| 1513 { | |
| 1514 } | |
| 1515 | |
| 1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* dat
a, enum EDontAdopt, UErrorCode &status) | |
| 1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) | |
| 1518 { | |
| 1519 } | |
| 1520 | |
| 1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | |
| OLD | NEW |