OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * Copyright (c) 1999-2010, International Business Machines |
| 3 * Corporation and others. All Rights Reserved. |
| 4 ******************************************************************** |
| 5 * Date Name Description |
| 6 * 12/14/99 Madhu Creation. |
| 7 * 01/12/2000 Madhu updated for changed API |
| 8 ********************************************************************/ |
| 9 |
| 10 #include "unicode/utypes.h" |
| 11 |
| 12 #if !UCONFIG_NO_BREAK_ITERATION |
| 13 |
| 14 #include "unicode/uchar.h" |
| 15 #include "intltest.h" |
| 16 #include "unicode/rbbi.h" |
| 17 #include "unicode/schriter.h" |
| 18 #include "rbbiapts.h" |
| 19 #include "rbbidata.h" |
| 20 #include "cstring.h" |
| 21 #include "ubrkimpl.h" |
| 22 #include "unicode/ustring.h" |
| 23 #include "unicode/utext.h" |
| 24 #include "cmemory.h" |
| 25 |
| 26 /** |
| 27 * API Test the RuleBasedBreakIterator class |
| 28 */ |
| 29 |
| 30 |
| 31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ |
| 32 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName
(status));}} |
| 33 |
| 34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ |
| 35 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} |
| 36 |
| 37 void RBBIAPITest::TestCloneEquals() |
| 38 { |
| 39 |
| 40 UErrorCode status=U_ZERO_ERROR; |
| 41 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); |
| 42 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); |
| 43 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); |
| 44 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); |
| 45 if(U_FAILURE(status)){ |
| 46 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
| 47 return; |
| 48 } |
| 49 |
| 50 |
| 51 UnicodeString testString="Testing word break iterators's clone() and equals(
)"; |
| 52 bi1->setText(testString); |
| 53 bi2->setText(testString); |
| 54 biequal->setText(testString); |
| 55 |
| 56 bi3->setText("hello"); |
| 57 |
| 58 logln((UnicodeString)"Testing equals()"); |
| 59 |
| 60 logln((UnicodeString)"Testing == and !="); |
| 61 UBool b = (*bi1 != *biequal); |
| 62 b |= *bi1 == *bi2; |
| 63 b |= *bi1 == *bi3; |
| 64 if (b) { |
| 65 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); |
| 66 } |
| 67 |
| 68 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) |
| 69 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); |
| 70 |
| 71 |
| 72 // Quick test of RulesBasedBreakIterator assignment - |
| 73 // Check that |
| 74 // two different iterators are != |
| 75 // they are == after assignment |
| 76 // source and dest iterator produce the same next() after assignment. |
| 77 // deleting one doesn't disable the other. |
| 78 logln("Testing assignment"); |
| 79 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::creat
eLineInstance(Locale::getDefault(), status); |
| 80 if(U_FAILURE(status)){ |
| 81 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
| 82 return; |
| 83 } |
| 84 |
| 85 RuleBasedBreakIterator biDefault, biDefault2; |
| 86 if(U_FAILURE(status)){ |
| 87 errln((UnicodeString)"FAIL : in construction of default iterator"); |
| 88 return; |
| 89 } |
| 90 if (biDefault == *bix) { |
| 91 errln((UnicodeString)"ERROR: iterators should not compare =="); |
| 92 return; |
| 93 } |
| 94 if (biDefault != biDefault2) { |
| 95 errln((UnicodeString)"ERROR: iterators should compare =="); |
| 96 return; |
| 97 } |
| 98 |
| 99 |
| 100 UnicodeString HelloString("Hello Kitty"); |
| 101 bix->setText(HelloString); |
| 102 if (*bix == *bi2) { |
| 103 errln(UnicodeString("ERROR: strings should not be equal before assignmen
t.")); |
| 104 } |
| 105 *bix = *bi2; |
| 106 if (*bix != *bi2) { |
| 107 errln(UnicodeString("ERROR: strings should be equal before assignment.")
); |
| 108 } |
| 109 |
| 110 int bixnext = bix->next(); |
| 111 int bi2next = bi2->next(); |
| 112 if (! (bixnext == bi2next && bixnext == 7)) { |
| 113 errln(UnicodeString("ERROR: iterators behaved differently after assignme
nt.")); |
| 114 } |
| 115 delete bix; |
| 116 if (bi2->next() != 8) { |
| 117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."
)); |
| 118 } |
| 119 |
| 120 |
| 121 |
| 122 logln((UnicodeString)"Testing clone()"); |
| 123 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); |
| 124 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); |
| 125 |
| 126 if(*bi1clone != *bi1 || *bi1clone != *biequal || |
| 127 *bi1clone == *bi3 || *bi1clone == *bi2) |
| 128 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); |
| 129 |
| 130 if(*bi2clone == *bi1 || *bi2clone == *biequal || |
| 131 *bi2clone == *bi3 || *bi2clone != *bi2) |
| 132 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); |
| 133 |
| 134 if(bi1->getText() != bi1clone->getText() || |
| 135 bi2clone->getText() != bi2->getText() || |
| 136 *bi2clone == *bi1clone ) |
| 137 errln((UnicodeString)"ERROR: RBBI's clone() method failed"); |
| 138 |
| 139 delete bi1clone; |
| 140 delete bi2clone; |
| 141 delete bi1; |
| 142 delete bi3; |
| 143 delete bi2; |
| 144 delete biequal; |
| 145 } |
| 146 |
| 147 void RBBIAPITest::TestBoilerPlate() |
| 148 { |
| 149 UErrorCode status = U_ZERO_ERROR; |
| 150 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); |
| 151 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status)
; |
| 152 if (U_FAILURE(status)) { |
| 153 errcheckln(status, "Creation of break iterator failed %s", u_errorName(s
tatus)); |
| 154 return; |
| 155 } |
| 156 if(*a!=*b){ |
| 157 errln("Failed: boilerplate method operator!= does not return correct res
ults"); |
| 158 } |
| 159 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); |
| 160 if(a && c){ |
| 161 if(*c==*a){ |
| 162 errln("Failed: boilerplate method opertator== does not return correc
t results"); |
| 163 } |
| 164 }else{ |
| 165 errln("creation of break iterator failed"); |
| 166 } |
| 167 delete a; |
| 168 delete b; |
| 169 delete c; |
| 170 } |
| 171 |
| 172 void RBBIAPITest::TestgetRules() |
| 173 { |
| 174 UErrorCode status=U_ZERO_ERROR; |
| 175 |
| 176 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createCharacterInstance(Locale::getDefault(), status); |
| 177 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator:
:createWordInstance(Locale::getDefault(), status); |
| 178 if(U_FAILURE(status)){ |
| 179 errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); |
| 180 delete bi1; |
| 181 delete bi2; |
| 182 return; |
| 183 } |
| 184 |
| 185 |
| 186 |
| 187 logln((UnicodeString)"Testing toString()"); |
| 188 |
| 189 bi1->setText((UnicodeString)"Hello there"); |
| 190 |
| 191 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); |
| 192 |
| 193 UnicodeString temp=bi1->getRules(); |
| 194 UnicodeString temp2=bi2->getRules(); |
| 195 UnicodeString temp3=bi3->getRules(); |
| 196 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(tem
p3) != 0) |
| 197 errln((UnicodeString)"ERROR: error in getRules() method"); |
| 198 |
| 199 delete bi1; |
| 200 delete bi2; |
| 201 delete bi3; |
| 202 } |
| 203 void RBBIAPITest::TestHashCode() |
| 204 { |
| 205 UErrorCode status=U_ZERO_ERROR; |
| 206 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); |
| 207 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createCharacterInstance(Locale::getDefault(), status); |
| 208 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIte
rator::createWordInstance(Locale::getDefault(), status); |
| 209 if(U_FAILURE(status)){ |
| 210 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
| 211 delete bi1; |
| 212 delete bi2; |
| 213 delete bi3; |
| 214 return; |
| 215 } |
| 216 |
| 217 |
| 218 logln((UnicodeString)"Testing hashCode()"); |
| 219 |
| 220 bi1->setText((UnicodeString)"Hash code"); |
| 221 bi2->setText((UnicodeString)"Hash code"); |
| 222 bi3->setText((UnicodeString)"Hash code"); |
| 223 |
| 224 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); |
| 225 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); |
| 226 |
| 227 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashC
ode() || |
| 228 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->
hashCode()) |
| 229 errln((UnicodeString)"ERROR: identical objects have different hashcodes"
); |
| 230 |
| 231 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode()
|| |
| 232 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() ==
bi2->hashCode()) |
| 233 errln((UnicodeString)"ERROR: different objects have same hashcodes"); |
| 234 |
| 235 delete bi1clone; |
| 236 delete bi2clone; |
| 237 delete bi1; |
| 238 delete bi2; |
| 239 delete bi3; |
| 240 |
| 241 } |
| 242 void RBBIAPITest::TestGetSetAdoptText() |
| 243 { |
| 244 logln((UnicodeString)"Testing getText setText "); |
| 245 IcuTestErrorCode status(*this, "TestGetSetAdoptText"); |
| 246 UnicodeString str1="first string."; |
| 247 UnicodeString str2="Second string."; |
| 248 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); |
| 249 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)Rule
BasedBreakIterator::createWordInstance(Locale::getDefault(), status)); |
| 250 if(status.isFailure()){ |
| 251 errcheckln(status, "Fail : in construction - %s", status.errorName()); |
| 252 return; |
| 253 } |
| 254 |
| 255 |
| 256 CharacterIterator* text1= new StringCharacterIterator(str1); |
| 257 CharacterIterator* text1Clone = text1->clone(); |
| 258 CharacterIterator* text2= new StringCharacterIterator(str2); |
| 259 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "
ond str" |
| 260 |
| 261 wordIter1->setText(str1); |
| 262 CharacterIterator *tci = &wordIter1->getText(); |
| 263 UnicodeString tstr; |
| 264 tci->getText(tstr); |
| 265 TEST_ASSERT(tstr == str1); |
| 266 if(wordIter1->current() != 0) |
| 267 errln((UnicodeString)"ERROR:1 setText did not set the iteration position
to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\
n"); |
| 268 |
| 269 wordIter1->next(2); |
| 270 |
| 271 wordIter1->setText(str2); |
| 272 if(wordIter1->current() != 0) |
| 273 errln((UnicodeString)"ERROR:2 setText did not reset the iteration positi
on to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)
"\n"); |
| 274 |
| 275 |
| 276 charIter1->adoptText(text1Clone); |
| 277 TEST_ASSERT(wordIter1->getText() != charIter1->getText()); |
| 278 tci = &wordIter1->getText(); |
| 279 tci->getText(tstr); |
| 280 TEST_ASSERT(tstr == str2); |
| 281 tci = &charIter1->getText(); |
| 282 tci->getText(tstr); |
| 283 TEST_ASSERT(tstr == str1); |
| 284 |
| 285 |
| 286 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->
clone()); |
| 287 rb->adoptText(text1); |
| 288 if(rb->getText() != *text1) |
| 289 errln((UnicodeString)"ERROR:1 error in adoptText "); |
| 290 rb->adoptText(text2); |
| 291 if(rb->getText() != *text2) |
| 292 errln((UnicodeString)"ERROR:2 error in adoptText "); |
| 293 |
| 294 // Adopt where iterator range is less than the entire orignal source string. |
| 295 // (With the change of the break engine to working with UText internally, |
| 296 // CharacterIterators starting at positions other than zero are not suppo
rted) |
| 297 rb->adoptText(text3); |
| 298 TEST_ASSERT(rb->preceding(2) == 0); |
| 299 TEST_ASSERT(rb->following(11) == BreakIterator::DONE); |
| 300 //if(rb->preceding(2) != 3) { |
| 301 // errln((UnicodeString)"ERROR:3 error in adoptText "); |
| 302 //} |
| 303 //if(rb->following(11) != BreakIterator::DONE) { |
| 304 // errln((UnicodeString)"ERROR:4 error in adoptText "); |
| 305 //} |
| 306 |
| 307 // UText API |
| 308 // |
| 309 // Quick test to see if UText is working at all. |
| 310 // |
| 311 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello w
orld" in UTF-8 */ |
| 312 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ |
| 313 // 012345678901 |
| 314 |
| 315 status.reset(); |
| 316 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); |
| 317 wordIter1->setText(ut.getAlias(), status); |
| 318 TEST_ASSERT_SUCCESS(status); |
| 319 |
| 320 int32_t pos; |
| 321 pos = wordIter1->first(); |
| 322 TEST_ASSERT(pos==0); |
| 323 pos = wordIter1->next(); |
| 324 TEST_ASSERT(pos==5); |
| 325 pos = wordIter1->next(); |
| 326 TEST_ASSERT(pos==6); |
| 327 pos = wordIter1->next(); |
| 328 TEST_ASSERT(pos==11); |
| 329 pos = wordIter1->next(); |
| 330 TEST_ASSERT(pos==UBRK_DONE); |
| 331 |
| 332 status.reset(); |
| 333 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); |
| 334 TEST_ASSERT_SUCCESS(status); |
| 335 wordIter1->setText(ut2.getAlias(), status); |
| 336 TEST_ASSERT_SUCCESS(status); |
| 337 |
| 338 pos = wordIter1->first(); |
| 339 TEST_ASSERT(pos==0); |
| 340 pos = wordIter1->next(); |
| 341 TEST_ASSERT(pos==3); |
| 342 pos = wordIter1->next(); |
| 343 TEST_ASSERT(pos==4); |
| 344 |
| 345 pos = wordIter1->last(); |
| 346 TEST_ASSERT(pos==6); |
| 347 pos = wordIter1->previous(); |
| 348 TEST_ASSERT(pos==4); |
| 349 pos = wordIter1->previous(); |
| 350 TEST_ASSERT(pos==3); |
| 351 pos = wordIter1->previous(); |
| 352 TEST_ASSERT(pos==0); |
| 353 pos = wordIter1->previous(); |
| 354 TEST_ASSERT(pos==UBRK_DONE); |
| 355 |
| 356 status.reset(); |
| 357 UnicodeString sEmpty; |
| 358 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); |
| 359 wordIter1->getUText(gut2.getAlias(), status); |
| 360 TEST_ASSERT_SUCCESS(status); |
| 361 status.reset(); |
| 362 } |
| 363 |
| 364 |
| 365 void RBBIAPITest::TestIteration() |
| 366 { |
| 367 // This test just verifies that the API is present. |
| 368 // Testing for correct operation of the break rules happens elsewhere. |
| 369 |
| 370 UErrorCode status=U_ZERO_ERROR; |
| 371 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterato
r::createCharacterInstance(Locale::getDefault(), status); |
| 372 if (U_FAILURE(status) || bi == NULL) { |
| 373 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); |
| 374 } |
| 375 delete bi; |
| 376 |
| 377 status=U_ZERO_ERROR; |
| 378 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Lo
cale::getDefault(), status); |
| 379 if (U_FAILURE(status) || bi == NULL) { |
| 380 errcheckln(status, "Failure creating Word break iterator. Status = %s",
u_errorName(status)); |
| 381 } |
| 382 delete bi; |
| 383 |
| 384 status=U_ZERO_ERROR; |
| 385 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Lo
cale::getDefault(), status); |
| 386 if (U_FAILURE(status) || bi == NULL) { |
| 387 errcheckln(status, "Failure creating Line break iterator. Status = %s",
u_errorName(status)); |
| 388 } |
| 389 delete bi; |
| 390 |
| 391 status=U_ZERO_ERROR; |
| 392 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstanc
e(Locale::getDefault(), status); |
| 393 if (U_FAILURE(status) || bi == NULL) { |
| 394 errcheckln(status, "Failure creating Sentence break iterator. Status =
%s", u_errorName(status)); |
| 395 } |
| 396 delete bi; |
| 397 |
| 398 status=U_ZERO_ERROR; |
| 399 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(L
ocale::getDefault(), status); |
| 400 if (U_FAILURE(status) || bi == NULL) { |
| 401 errcheckln(status, "Failure creating Title break iterator. Status = %s"
, u_errorName(status)); |
| 402 } |
| 403 delete bi; |
| 404 |
| 405 status=U_ZERO_ERROR; |
| 406 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstan
ce(Locale::getDefault(), status); |
| 407 if (U_FAILURE(status) || bi == NULL) { |
| 408 errcheckln(status, "Failure creating character break iterator. Status =
%s", u_errorName(status)); |
| 409 return; // Skip the rest of these tests. |
| 410 } |
| 411 |
| 412 |
| 413 UnicodeString testString="0123456789"; |
| 414 bi->setText(testString); |
| 415 |
| 416 int32_t i; |
| 417 i = bi->first(); |
| 418 if (i != 0) { |
| 419 errln("Incorrect value from bi->first(). Expected 0, got %d.", i); |
| 420 } |
| 421 |
| 422 i = bi->last(); |
| 423 if (i != 10) { |
| 424 errln("Incorrect value from bi->last(). Expected 10, got %d", i); |
| 425 } |
| 426 |
| 427 // |
| 428 // Previous |
| 429 // |
| 430 bi->last(); |
| 431 i = bi->previous(); |
| 432 if (i != 9) { |
| 433 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d",
__LINE__, i); |
| 434 } |
| 435 |
| 436 |
| 437 bi->first(); |
| 438 i = bi->previous(); |
| 439 if (i != BreakIterator::DONE) { |
| 440 errln("Incorrect value from bi->previous() at line %d. Expected DONE, g
ot %d", __LINE__, i); |
| 441 } |
| 442 |
| 443 // |
| 444 // next() |
| 445 // |
| 446 bi->first(); |
| 447 i = bi->next(); |
| 448 if (i != 1) { |
| 449 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d",
__LINE__, i); |
| 450 } |
| 451 |
| 452 bi->last(); |
| 453 i = bi->next(); |
| 454 if (i != BreakIterator::DONE) { |
| 455 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %
d", __LINE__, i); |
| 456 } |
| 457 |
| 458 |
| 459 // |
| 460 // current() |
| 461 // |
| 462 bi->first(); |
| 463 i = bi->current(); |
| 464 if (i != 0) { |
| 465 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); |
| 466 } |
| 467 |
| 468 bi->next(); |
| 469 i = bi->current(); |
| 470 if (i != 1) { |
| 471 errln("Incorrect value from bi->previous() at line %d. Expected 1, got
%d", __LINE__, i); |
| 472 } |
| 473 |
| 474 bi->last(); |
| 475 bi->next(); |
| 476 i = bi->current(); |
| 477 if (i != 10) { |
| 478 errln("Incorrect value from bi->previous() at line %d. Expected 10, got
%d", __LINE__, i); |
| 479 } |
| 480 |
| 481 bi->first(); |
| 482 bi->previous(); |
| 483 i = bi->current(); |
| 484 if (i != 0) { |
| 485 errln("Incorrect value from bi->previous() at line %d. Expected 0, got
%d", __LINE__, i); |
| 486 } |
| 487 |
| 488 |
| 489 // |
| 490 // Following() |
| 491 // |
| 492 i = bi->following(4); |
| 493 if (i != 5) { |
| 494 errln("Incorrect value from bi->following() at line %d. Expected 5, got
%d", __LINE__, i); |
| 495 } |
| 496 |
| 497 i = bi->following(9); |
| 498 if (i != 10) { |
| 499 errln("Incorrect value from bi->following() at line %d. Expected 10, go
t %d", __LINE__, i); |
| 500 } |
| 501 |
| 502 i = bi->following(10); |
| 503 if (i != BreakIterator::DONE) { |
| 504 errln("Incorrect value from bi->following() at line %d. Expected DONE,
got %d", __LINE__, i); |
| 505 } |
| 506 |
| 507 |
| 508 // |
| 509 // Preceding |
| 510 // |
| 511 i = bi->preceding(4); |
| 512 if (i != 3) { |
| 513 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got
%d", __LINE__, i); |
| 514 } |
| 515 |
| 516 i = bi->preceding(10); |
| 517 if (i != 9) { |
| 518 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got
%d", __LINE__, i); |
| 519 } |
| 520 |
| 521 i = bi->preceding(1); |
| 522 if (i != 0) { |
| 523 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got
%d", __LINE__, i); |
| 524 } |
| 525 |
| 526 i = bi->preceding(0); |
| 527 if (i != BreakIterator::DONE) { |
| 528 errln("Incorrect value from bi->preceding() at line %d. Expected DONE,
got %d", __LINE__, i); |
| 529 } |
| 530 |
| 531 |
| 532 // |
| 533 // isBoundary() |
| 534 // |
| 535 bi->first(); |
| 536 if (bi->isBoundary(3) != TRUE) { |
| 537 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE,
got FALSE", __LINE__, i); |
| 538 } |
| 539 i = bi->current(); |
| 540 if (i != 3) { |
| 541 errln("Incorrect value from bi->current() at line %d. Expected 3, got %
d", __LINE__, i); |
| 542 } |
| 543 |
| 544 |
| 545 if (bi->isBoundary(11) != FALSE) { |
| 546 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE,
got TRUE", __LINE__, i); |
| 547 } |
| 548 i = bi->current(); |
| 549 if (i != 10) { |
| 550 errln("Incorrect value from bi->current() at line %d. Expected 10, got
%d", __LINE__, i); |
| 551 } |
| 552 |
| 553 // |
| 554 // next(n) |
| 555 // |
| 556 bi->first(); |
| 557 i = bi->next(4); |
| 558 if (i != 4) { |
| 559 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d",
__LINE__, i); |
| 560 } |
| 561 |
| 562 i = bi->next(6); |
| 563 if (i != 10) { |
| 564 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d"
, __LINE__, i); |
| 565 } |
| 566 |
| 567 bi->first(); |
| 568 i = bi->next(11); |
| 569 if (i != BreakIterator::DONE) { |
| 570 errln("Incorrect value from bi->next() at line %d. Expected BreakIterat
or::DONE, got %d", __LINE__, i); |
| 571 } |
| 572 |
| 573 delete bi; |
| 574 |
| 575 } |
| 576 |
| 577 |
| 578 |
| 579 |
| 580 |
| 581 |
| 582 void RBBIAPITest::TestBuilder() { |
| 583 UnicodeString rulesString1 = "$Letters = [:L:];\n" |
| 584 "$Numbers = [:N:];\n" |
| 585 "$Letters+;\n" |
| 586 "$Numbers+;\n" |
| 587 "[^$Letters $Numbers];\n" |
| 588 "!.*;\n"; |
| 589 UnicodeString testString1 = "abc123..abc"; |
| 590 // 01234567890 |
| 591 int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; |
| 592 UErrorCode status=U_ZERO_ERROR; |
| 593 UParseError parseError; |
| 594 |
| 595 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); |
| 596 if(U_FAILURE(status)) { |
| 597 dataerrln("Fail : in construction - %s", u_errorName(status)); |
| 598 } else { |
| 599 bi->setText(testString1); |
| 600 doBoundaryTest(*bi, testString1, bounds1); |
| 601 } |
| 602 delete bi; |
| 603 } |
| 604 |
| 605 |
| 606 // |
| 607 // TestQuoteGrouping |
| 608 // Single quotes within rules imply a grouping, so that a modifier |
| 609 // following the quoted text (* or +) applies to all of the quoted chars. |
| 610 // |
| 611 void RBBIAPITest::TestQuoteGrouping() { |
| 612 UnicodeString rulesString1 = "#Here comes the rule...\n" |
| 613 "'$@!'*;\n" // (\$\@\!)* |
| 614 ".;\n"; |
| 615 |
| 616 UnicodeString testString1 = "$@!$@!X$@!!X"; |
| 617 // 0123456789012 |
| 618 int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; |
| 619 UErrorCode status=U_ZERO_ERROR; |
| 620 UParseError parseError; |
| 621 |
| 622 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); |
| 623 if(U_FAILURE(status)) { |
| 624 dataerrln("Fail : in construction - %s", u_errorName(status)); |
| 625 } else { |
| 626 bi->setText(testString1); |
| 627 doBoundaryTest(*bi, testString1, bounds1); |
| 628 } |
| 629 delete bi; |
| 630 } |
| 631 |
| 632 // |
| 633 // TestRuleStatus |
| 634 // Test word break rule status constants. |
| 635 // |
| 636 void RBBIAPITest::TestRuleStatus() { |
| 637 UChar str[30]; |
| 638 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094"
, |
| 639 // 012345678901234567 8 9 0 1 2 3 4 5 6 |
| 640 // Ideographic Katakana Hiragana |
| 641 str, 30); |
| 642 UnicodeString testString1(str); |
| 643 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; |
| 644 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE,
UBRK_WORD_LETTER, |
| 645 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, |
| 646 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, |
| 647 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA,
UBRK_WORD_KANA}; |
| 648 |
| 649 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WOR
D_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, |
| 650 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WOR
D_NONE_LIMIT, |
| 651 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WOR
D_NONE_LIMIT, |
| 652 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WOR
D_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; |
| 653 |
| 654 UErrorCode status=U_ZERO_ERROR; |
| 655 |
| 656 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::creat
eWordInstance(Locale::getEnglish(), status); |
| 657 if(U_FAILURE(status)) { |
| 658 errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
| 659 } else { |
| 660 bi->setText(testString1); |
| 661 // First test that the breaks are in the right spots. |
| 662 doBoundaryTest(*bi, testString1, bounds1); |
| 663 |
| 664 // Then go back and check tag values |
| 665 int32_t i = 0; |
| 666 int32_t pos, tag; |
| 667 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i
++) { |
| 668 if (pos != bounds1[i]) { |
| 669 errln("FAIL: unexpected word break at postion %d", pos); |
| 670 break; |
| 671 } |
| 672 tag = bi->getRuleStatus(); |
| 673 if (tag < tag_lo[i] || tag >= tag_hi[i]) { |
| 674 errln("FAIL: incorrect tag value %d at position %d", tag, pos); |
| 675 break; |
| 676 } |
| 677 |
| 678 // Check that we get the same tag values from getRuleStatusVec() |
| 679 int32_t vec[10]; |
| 680 int t = bi->getRuleStatusVec(vec, 10, status); |
| 681 TEST_ASSERT_SUCCESS(status); |
| 682 TEST_ASSERT(t==1); |
| 683 TEST_ASSERT(vec[0] == tag); |
| 684 } |
| 685 } |
| 686 delete bi; |
| 687 |
| 688 // Now test line break status. This test mostly is to confirm that the sta
tus constants |
| 689 // are correctly declared in the header. |
| 690 testString1 = "test line. \n"; |
| 691 // break type s s h |
| 692 |
| 693 bi = (RuleBasedBreakIterator *) |
| 694 BreakIterator::createLineInstance(Locale::getEnglish(), status); |
| 695 if(U_FAILURE(status)) { |
| 696 errcheckln(status, "failed to create word break iterator. - %s", u_erro
rName(status)); |
| 697 } else { |
| 698 int32_t i = 0; |
| 699 int32_t pos, tag; |
| 700 UBool success; |
| 701 |
| 702 bi->setText(testString1); |
| 703 pos = bi->current(); |
| 704 tag = bi->getRuleStatus(); |
| 705 for (i=0; i<3; i++) { |
| 706 switch (i) { |
| 707 case 0: |
| 708 success = pos==0 && tag==UBRK_LINE_SOFT; break; |
| 709 case 1: |
| 710 success = pos==5 && tag==UBRK_LINE_SOFT; break; |
| 711 case 2: |
| 712 success = pos==12 && tag==UBRK_LINE_HARD; break; |
| 713 default: |
| 714 success = FALSE; break; |
| 715 } |
| 716 if (success == FALSE) { |
| 717 errln("Fail: incorrect word break status or position. i=%d, po
s=%d, tag=%d", |
| 718 i, pos, tag); |
| 719 break; |
| 720 } |
| 721 pos = bi->next(); |
| 722 tag = bi->getRuleStatus(); |
| 723 } |
| 724 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || |
| 725 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || |
| 726 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT
_LIMIT)) { |
| 727 errln("UBRK_LINE_* constants from header are inconsistent."); |
| 728 } |
| 729 } |
| 730 delete bi; |
| 731 |
| 732 } |
| 733 |
| 734 |
| 735 // |
| 736 // TestRuleStatusVec |
| 737 // Test the vector form of break rule status. |
| 738 // |
| 739 void RBBIAPITest::TestRuleStatusVec() { |
| 740 UnicodeString rulesString( "[A-N]{100}; \n" |
| 741 "[a-w]{200}; \n" |
| 742 "[\\p{L}]{300}; \n" |
| 743 "[\\p{N}]{400}; \n" |
| 744 "[0-5]{500}; \n" |
| 745 "!.*;\n", -1, US_INV); |
| 746 UnicodeString testString1 = "Aapz5?"; |
| 747 int32_t statusVals[10]; |
| 748 int32_t numStatuses; |
| 749 int32_t pos; |
| 750 |
| 751 UErrorCode status=U_ZERO_ERROR; |
| 752 UParseError parseError; |
| 753 |
| 754 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseE
rror, status); |
| 755 if (U_FAILURE(status)) { |
| 756 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__
, u_errorName(status)); |
| 757 } else { |
| 758 bi->setText(testString1); |
| 759 |
| 760 // A |
| 761 pos = bi->next(); |
| 762 TEST_ASSERT(pos==1); |
| 763 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 764 TEST_ASSERT_SUCCESS(status); |
| 765 TEST_ASSERT(numStatuses == 2); |
| 766 TEST_ASSERT(statusVals[0] == 100); |
| 767 TEST_ASSERT(statusVals[1] == 300); |
| 768 |
| 769 // a |
| 770 pos = bi->next(); |
| 771 TEST_ASSERT(pos==2); |
| 772 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 773 TEST_ASSERT_SUCCESS(status); |
| 774 TEST_ASSERT(numStatuses == 2); |
| 775 TEST_ASSERT(statusVals[0] == 200); |
| 776 TEST_ASSERT(statusVals[1] == 300); |
| 777 |
| 778 // p |
| 779 pos = bi->next(); |
| 780 TEST_ASSERT(pos==3); |
| 781 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 782 TEST_ASSERT_SUCCESS(status); |
| 783 TEST_ASSERT(numStatuses == 2); |
| 784 TEST_ASSERT(statusVals[0] == 200); |
| 785 TEST_ASSERT(statusVals[1] == 300); |
| 786 |
| 787 // z |
| 788 pos = bi->next(); |
| 789 TEST_ASSERT(pos==4); |
| 790 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 791 TEST_ASSERT_SUCCESS(status); |
| 792 TEST_ASSERT(numStatuses == 1); |
| 793 TEST_ASSERT(statusVals[0] == 300); |
| 794 |
| 795 // 5 |
| 796 pos = bi->next(); |
| 797 TEST_ASSERT(pos==5); |
| 798 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 799 TEST_ASSERT_SUCCESS(status); |
| 800 TEST_ASSERT(numStatuses == 2); |
| 801 TEST_ASSERT(statusVals[0] == 400); |
| 802 TEST_ASSERT(statusVals[1] == 500); |
| 803 |
| 804 // ? |
| 805 pos = bi->next(); |
| 806 TEST_ASSERT(pos==6); |
| 807 numStatuses = bi->getRuleStatusVec(statusVals, 10, status); |
| 808 TEST_ASSERT_SUCCESS(status); |
| 809 TEST_ASSERT(numStatuses == 1); |
| 810 TEST_ASSERT(statusVals[0] == 0); |
| 811 |
| 812 // |
| 813 // Check buffer overflow error handling. Char == A |
| 814 // |
| 815 bi->first(); |
| 816 pos = bi->next(); |
| 817 TEST_ASSERT(pos==1); |
| 818 memset(statusVals, -1, sizeof(statusVals)); |
| 819 numStatuses = bi->getRuleStatusVec(statusVals, 0, status); |
| 820 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 821 TEST_ASSERT(numStatuses == 2); |
| 822 TEST_ASSERT(statusVals[0] == -1); |
| 823 |
| 824 status = U_ZERO_ERROR; |
| 825 memset(statusVals, -1, sizeof(statusVals)); |
| 826 numStatuses = bi->getRuleStatusVec(statusVals, 1, status); |
| 827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); |
| 828 TEST_ASSERT(numStatuses == 2); |
| 829 TEST_ASSERT(statusVals[0] == 100); |
| 830 TEST_ASSERT(statusVals[1] == -1); |
| 831 |
| 832 status = U_ZERO_ERROR; |
| 833 memset(statusVals, -1, sizeof(statusVals)); |
| 834 numStatuses = bi->getRuleStatusVec(statusVals, 2, status); |
| 835 TEST_ASSERT_SUCCESS(status); |
| 836 TEST_ASSERT(numStatuses == 2); |
| 837 TEST_ASSERT(statusVals[0] == 100); |
| 838 TEST_ASSERT(statusVals[1] == 300); |
| 839 TEST_ASSERT(statusVals[2] == -1); |
| 840 } |
| 841 delete bi; |
| 842 |
| 843 } |
| 844 |
| 845 // |
| 846 // Bug 2190 Regression test. Builder crash on rule consisting of only a |
| 847 // $variable reference |
| 848 void RBBIAPITest::TestBug2190() { |
| 849 UnicodeString rulesString1 = "$aaa = abcd;\n" |
| 850 "$bbb = $aaa;\n" |
| 851 "$bbb;\n"; |
| 852 UnicodeString testString1 = "abcdabcd"; |
| 853 // 01234567890 |
| 854 int32_t bounds1[] = {0, 4, 8}; |
| 855 UErrorCode status=U_ZERO_ERROR; |
| 856 UParseError parseError; |
| 857 |
| 858 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parse
Error, status); |
| 859 if(U_FAILURE(status)) { |
| 860 dataerrln("Fail : in construction - %s", u_errorName(status)); |
| 861 } else { |
| 862 bi->setText(testString1); |
| 863 doBoundaryTest(*bi, testString1, bounds1); |
| 864 } |
| 865 delete bi; |
| 866 } |
| 867 |
| 868 |
| 869 void RBBIAPITest::TestRegistration() { |
| 870 #if !UCONFIG_NO_SERVICE |
| 871 UErrorCode status = U_ZERO_ERROR; |
| 872 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); |
| 873 |
| 874 // ok to not delete these if we exit because of error? |
| 875 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", sta
tus); |
| 876 BreakIterator* root_word = BreakIterator::createWordInstance("", status); |
| 877 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status
); |
| 878 |
| 879 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { |
| 880 dataerrln("Error creating instances of break interactors - %s", u_errorN
ame(status)); |
| 881 delete ja_word; |
| 882 delete ja_char; |
| 883 delete root_word; |
| 884 delete root_char; |
| 885 |
| 886 return; |
| 887 } |
| 888 |
| 889 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD,
status); |
| 890 { |
| 891 if (ja_word && *ja_word == *root_word) { |
| 892 errln("japan not different from root"); |
| 893 } |
| 894 } |
| 895 |
| 896 { |
| 897 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", statu
s); |
| 898 UBool fail = TRUE; |
| 899 if(result){ |
| 900 fail = *result != *ja_word; |
| 901 } |
| 902 delete result; |
| 903 if (fail) { |
| 904 errln("bad result for xx_XX/word"); |
| 905 } |
| 906 } |
| 907 |
| 908 { |
| 909 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP",
status); |
| 910 UBool fail = TRUE; |
| 911 if(result){ |
| 912 fail = *result != *ja_char; |
| 913 } |
| 914 delete result; |
| 915 if (fail) { |
| 916 errln("bad result for ja_JP/char"); |
| 917 } |
| 918 } |
| 919 |
| 920 { |
| 921 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX",
status); |
| 922 UBool fail = TRUE; |
| 923 if(result){ |
| 924 fail = *result != *root_char; |
| 925 } |
| 926 delete result; |
| 927 if (fail) { |
| 928 errln("bad result for xx_XX/char"); |
| 929 } |
| 930 } |
| 931 |
| 932 { |
| 933 StringEnumeration* avail = BreakIterator::getAvailableLocales(); |
| 934 UBool found = FALSE; |
| 935 const UnicodeString* p; |
| 936 while ((p = avail->snext(status))) { |
| 937 if (p->compare("xx") == 0) { |
| 938 found = TRUE; |
| 939 break; |
| 940 } |
| 941 } |
| 942 delete avail; |
| 943 if (!found) { |
| 944 errln("did not find test locale"); |
| 945 } |
| 946 } |
| 947 |
| 948 { |
| 949 UBool unreg = BreakIterator::unregister(key, status); |
| 950 if (!unreg) { |
| 951 errln("unable to unregister"); |
| 952 } |
| 953 } |
| 954 |
| 955 { |
| 956 BreakIterator* result = BreakIterator::createWordInstance("en_US", statu
s); |
| 957 BreakIterator* root = BreakIterator::createWordInstance("", status); |
| 958 UBool fail = TRUE; |
| 959 if(root){ |
| 960 fail = *root != *result; |
| 961 } |
| 962 delete root; |
| 963 delete result; |
| 964 if (fail) { |
| 965 errln("did not get root break"); |
| 966 } |
| 967 } |
| 968 |
| 969 { |
| 970 StringEnumeration* avail = BreakIterator::getAvailableLocales(); |
| 971 UBool found = FALSE; |
| 972 const UnicodeString* p; |
| 973 while ((p = avail->snext(status))) { |
| 974 if (p->compare("xx") == 0) { |
| 975 found = TRUE; |
| 976 break; |
| 977 } |
| 978 } |
| 979 delete avail; |
| 980 if (found) { |
| 981 errln("found test locale"); |
| 982 } |
| 983 } |
| 984 |
| 985 { |
| 986 int32_t count; |
| 987 UBool foundLocale = FALSE; |
| 988 const Locale *avail = BreakIterator::getAvailableLocales(count); |
| 989 for (int i=0; i<count; i++) { |
| 990 if (avail[i] == Locale::getEnglish()) { |
| 991 foundLocale = TRUE; |
| 992 break; |
| 993 } |
| 994 } |
| 995 if (foundLocale == FALSE) { |
| 996 errln("BreakIterator::getAvailableLocales(&count), failed to find EN
."); |
| 997 } |
| 998 } |
| 999 |
| 1000 |
| 1001 // ja_word was adopted by factory |
| 1002 delete ja_char; |
| 1003 delete root_word; |
| 1004 delete root_char; |
| 1005 #endif |
| 1006 } |
| 1007 |
| 1008 void RBBIAPITest::RoundtripRule(const char *dataFile) { |
| 1009 UErrorCode status = U_ZERO_ERROR; |
| 1010 UParseError parseError; |
| 1011 parseError.line = 0; |
| 1012 parseError.offset = 0; |
| 1013 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &
status)); |
| 1014 uint32_t length; |
| 1015 const UChar *builtSource; |
| 1016 const uint8_t *rbbiRules; |
| 1017 const uint8_t *builtRules; |
| 1018 |
| 1019 if (U_FAILURE(status)) { |
| 1020 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(statu
s)); |
| 1021 return; |
| 1022 } |
| 1023 |
| 1024 builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); |
| 1025 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fR
uleSource); |
| 1026 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, par
seError, status); |
| 1027 if (U_FAILURE(status)) { |
| 1028 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, colum
n %d\n", |
| 1029 u_errorName(status), parseError.line, parseError.offset); |
| 1030 return; |
| 1031 }; |
| 1032 rbbiRules = brkItr->getBinaryRules(length); |
| 1033 logln("Comparing \"%s\" len=%d", dataFile, length); |
| 1034 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { |
| 1035 errln("Built rules and rebuilt rules are different %s", dataFile); |
| 1036 return; |
| 1037 } |
| 1038 delete brkItr; |
| 1039 } |
| 1040 |
| 1041 void RBBIAPITest::TestRoundtripRules() { |
| 1042 RoundtripRule("word"); |
| 1043 RoundtripRule("title"); |
| 1044 RoundtripRule("sent"); |
| 1045 RoundtripRule("line"); |
| 1046 RoundtripRule("char"); |
| 1047 if (!quick) { |
| 1048 RoundtripRule("word_ja"); |
| 1049 RoundtripRule("word_POSIX"); |
| 1050 } |
| 1051 } |
| 1052 |
| 1053 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* |
| 1054 // (these are protected so we access them via a local class RBBIWithProtectedFun
ctions). |
| 1055 // This is just a sanity check, not a thorough test (e.g. we don't check that th
e |
| 1056 // first delete actually frees rulesCopy). |
| 1057 void RBBIAPITest::TestCreateFromRBBIData() { |
| 1058 // Get some handy RBBIData |
| 1059 const char *brkName = "word"; // or "sent", "line", "char", etc. |
| 1060 UErrorCode status = U_ZERO_ERROR; |
| 1061 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &s
tatus)); |
| 1062 if ( U_SUCCESS(status) ) { |
| 1063 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMem
ory(data.getAlias()); |
| 1064 uint32_t length = builtRules->fLength; |
| 1065 RBBIWithProtectedFunctions * brkItr; |
| 1066 |
| 1067 // Try the memory-adopting constructor, need to copy the data first |
| 1068 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); |
| 1069 if ( rulesCopy ) { |
| 1070 uprv_memcpy( rulesCopy, builtRules, length ); |
| 1071 |
| 1072 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); |
| 1073 if ( U_SUCCESS(status) ) { |
| 1074 delete brkItr; // this should free rulesCopy |
| 1075 } else { |
| 1076 errln("create RuleBasedBreakIterator from RBBIData (adopted): IC
U Error \"%s\"\n", u_errorName(status) ); |
| 1077 status = U_ZERO_ERROR;// reset for the next test |
| 1078 uprv_free( rulesCopy ); |
| 1079 } |
| 1080 } |
| 1081 |
| 1082 // Now try the non-adopting constructor |
| 1083 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFun
ctions::kDontAdopt, status); |
| 1084 if ( U_SUCCESS(status) ) { |
| 1085 delete brkItr; // this should NOT attempt to free builtRules |
| 1086 if (builtRules->fLength != length) { // sanity check |
| 1087 errln("create RuleBasedBreakIterator from RBBIData (non-adopted)
: delete affects data\n" ); |
| 1088 } |
| 1089 } else { |
| 1090 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): IC
U Error \"%s\"\n", u_errorName(status) ); |
| 1091 } |
| 1092 } |
| 1093 } |
| 1094 |
| 1095 //--------------------------------------------- |
| 1096 // runIndexedTest |
| 1097 //--------------------------------------------- |
| 1098 |
| 1099 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name,
char* /*par*/ ) |
| 1100 { |
| 1101 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); |
| 1102 switch (index) { |
| 1103 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break
; |
| 1104 #if !UCONFIG_NO_FILE_IO |
| 1105 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; |
| 1106 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; |
| 1107 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; |
| 1108 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText();
break; |
| 1109 case 4: name = "TestIteration"; if (exec) TestIteration(); break; |
| 1110 #else |
| 1111 case 0: case 1: case 2: case 3: case 4: name = "skip"; break; |
| 1112 #endif |
| 1113 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; |
| 1114 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); brea
k; |
| 1115 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); brea
k; |
| 1116 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; |
| 1117 #if !UCONFIG_NO_FILE_IO |
| 1118 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; |
| 1119 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; |
| 1120 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; |
| 1121 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); br
eak; |
| 1122 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIDa
ta(); break; |
| 1123 #else |
| 1124 case 9: case 10: case 11: case 12: case 13: name = "skip"; break; |
| 1125 #endif |
| 1126 |
| 1127 default: name = ""; break; // needed to end loop |
| 1128 } |
| 1129 } |
| 1130 |
| 1131 //--------------------------------------------- |
| 1132 //Internal subroutines |
| 1133 //--------------------------------------------- |
| 1134 |
| 1135 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text
, int32_t *boundaries){ |
| 1136 logln((UnicodeString)"testIsBoundary():"); |
| 1137 int32_t p = 0; |
| 1138 UBool isB; |
| 1139 for (int32_t i = 0; i < text.length(); i++) { |
| 1140 isB = bi.isBoundary(i); |
| 1141 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); |
| 1142 |
| 1143 if (i == boundaries[p]) { |
| 1144 if (!isB) |
| 1145 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected true, got false"); |
| 1146 p++; |
| 1147 } |
| 1148 else { |
| 1149 if (isB) |
| 1150 errln((UnicodeString)"Wrong result from isBoundary() for " +
i + (UnicodeString)": expected false, got true"); |
| 1151 } |
| 1152 } |
| 1153 } |
| 1154 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotof
fset, int32_t expectedOffset, const char* expectedString){ |
| 1155 UnicodeString selected; |
| 1156 UnicodeString expected=CharsToUnicodeString(expectedString); |
| 1157 |
| 1158 if(gotoffset != expectedOffset) |
| 1159 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeStrin
g)" instead of #" + expectedOffset); |
| 1160 if(start <= gotoffset){ |
| 1161 testString.extractBetween(start, gotoffset, selected); |
| 1162 } |
| 1163 else{ |
| 1164 testString.extractBetween(gotoffset, start, selected); |
| 1165 } |
| 1166 if(selected.compare(expected) != 0) |
| 1167 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\"
instead of \"" + expected + "\"")); |
| 1168 else |
| 1169 logln(prettify("****selected \"" + selected + "\"")); |
| 1170 } |
| 1171 |
| 1172 //--------------------------------------------- |
| 1173 //RBBIWithProtectedFunctions class functions |
| 1174 //--------------------------------------------- |
| 1175 |
| 1176 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UEr
rorCode &status) |
| 1177 : RuleBasedBreakIterator(data, status) |
| 1178 { |
| 1179 } |
| 1180 |
| 1181 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* dat
a, enum EDontAdopt, UErrorCode &status) |
| 1182 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) |
| 1183 { |
| 1184 } |
| 1185 |
| 1186 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
OLD | NEW |