OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 |
| 7 #include "unicode/utypes.h" |
| 8 |
| 9 #if !UCONFIG_NO_COLLATION |
| 10 |
| 11 #include "unicode/coll.h" |
| 12 #include "unicode/tblcoll.h" |
| 13 #include "unicode/unistr.h" |
| 14 #include "unicode/sortkey.h" |
| 15 #include "regcoll.h" |
| 16 #include "sfwdchit.h" |
| 17 #include "testutil.h" |
| 18 #include "cmemory.h" |
| 19 |
| 20 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) |
| 21 |
| 22 CollationRegressionTest::CollationRegressionTest() |
| 23 { |
| 24 UErrorCode status = U_ZERO_ERROR; |
| 25 |
| 26 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), statu
s); |
| 27 if(U_FAILURE(status)) { |
| 28 delete en_us; |
| 29 en_us = 0; |
| 30 errcheckln(status, "Collator creation failed with %s", u_errorName(status)
); |
| 31 return; |
| 32 } |
| 33 } |
| 34 |
| 35 CollationRegressionTest::~CollationRegressionTest() |
| 36 { |
| 37 delete en_us; |
| 38 } |
| 39 |
| 40 |
| 41 // @bug 4048446 |
| 42 // |
| 43 // CollationElementIterator.reset() doesn't work |
| 44 // |
| 45 void CollationRegressionTest::Test4048446(/* char* par */) |
| 46 { |
| 47 const UnicodeString test1 = "XFILE What subset of all possible test cases ha
s the highest probability of detecting the most errors?"; |
| 48 const UnicodeString test2 = "Xf_ile What subset of all possible test cases h
as the lowest probability of detecting the least errors?"; |
| 49 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); |
| 50 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); |
| 51 UErrorCode status = U_ZERO_ERROR; |
| 52 |
| 53 if (i1 == NULL|| i2 == NULL) |
| 54 { |
| 55 errln("Could not create CollationElementIterator's"); |
| 56 delete i1; |
| 57 delete i2; |
| 58 return; |
| 59 } |
| 60 |
| 61 while (i1->next(status) != CollationElementIterator::NULLORDER) |
| 62 { |
| 63 if (U_FAILURE(status)) |
| 64 { |
| 65 errln("error calling next()"); |
| 66 |
| 67 delete i1; |
| 68 delete i2; |
| 69 return; |
| 70 } |
| 71 } |
| 72 |
| 73 i1->reset(); |
| 74 |
| 75 assertEqual(*i1, *i2); |
| 76 |
| 77 delete i1; |
| 78 delete i2; |
| 79 } |
| 80 |
| 81 // @bug 4051866 |
| 82 // |
| 83 // Collator -> rules -> Collator round-trip broken for expanding characters |
| 84 // |
| 85 void CollationRegressionTest::Test4051866(/* char* par */) |
| 86 { |
| 87 /* |
| 88 RuleBasedCollator c1 = new RuleBasedCollator("< o " |
| 89 +"& oe ,o\u3080" |
| 90 +"& oe ,\u1530 ,O" |
| 91 +"& OE ,O\u3080" |
| 92 +"& OE ,\u1520" |
| 93 +"< p ,P"); |
| 94 */ |
| 95 |
| 96 UnicodeString rules; |
| 97 UErrorCode status = U_ZERO_ERROR; |
| 98 |
| 99 rules += "< o "; |
| 100 rules += "& oe ,o"; |
| 101 rules += (UChar)0x3080; |
| 102 rules += "& oe ,"; |
| 103 rules += (UChar)0x1530; |
| 104 rules += " ,O"; |
| 105 rules += "& OE ,O"; |
| 106 rules += (UChar)0x3080; |
| 107 rules += "& OE ,"; |
| 108 rules += (UChar)0x1520; |
| 109 rules += "< p ,P"; |
| 110 |
| 111 // Build a collator containing expanding characters |
| 112 RuleBasedCollator *c1 = new RuleBasedCollator(rules, status); |
| 113 |
| 114 // Build another using the rules from the first |
| 115 RuleBasedCollator *c2 = new RuleBasedCollator(c1->getRules(), status); |
| 116 |
| 117 // Make sure they're the same |
| 118 if (!(c1->getRules() == c2->getRules())) |
| 119 { |
| 120 errln("Rules are not equal"); |
| 121 } |
| 122 |
| 123 delete c2; |
| 124 delete c1; |
| 125 } |
| 126 |
| 127 // @bug 4053636 |
| 128 // |
| 129 // Collator thinks "black-bird" == "black" |
| 130 // |
| 131 void CollationRegressionTest::Test4053636(/* char* par */) |
| 132 { |
| 133 if (en_us->equals("black_bird", "black")) |
| 134 { |
| 135 errln("black-bird == black"); |
| 136 } |
| 137 } |
| 138 |
| 139 // @bug 4054238 |
| 140 // |
| 141 // CollationElementIterator will not work correctly if the associated |
| 142 // Collator object's mode is changed |
| 143 // |
| 144 void CollationRegressionTest::Test4054238(/* char* par */) |
| 145 { |
| 146 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x
72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; |
| 147 const UnicodeString test3(chars3); |
| 148 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 149 |
| 150 // NOTE: The Java code uses en_us to create the CollationElementIterators |
| 151 // but I'm pretty sure that's wrong, so I've changed this to use c. |
| 152 UErrorCode status = U_ZERO_ERROR; |
| 153 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 154 CollationElementIterator *i1 = c->createCollationElementIterator(test3); |
| 155 delete i1; |
| 156 delete c; |
| 157 } |
| 158 |
| 159 // @bug 4054734 |
| 160 // |
| 161 // Collator::IDENTICAL documented but not implemented |
| 162 // |
| 163 void CollationRegressionTest::Test4054734(/* char* par */) |
| 164 { |
| 165 /* |
| 166 Here's the original Java: |
| 167 |
| 168 String[] decomp = { |
| 169 "\u0001", "<", "\u0002", |
| 170 "\u0001", "=", "\u0001", |
| 171 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compar
ed bitwise |
| 172 "\u00C0", "=", "A\u0300" // Decomp should make these equa
l |
| 173 }; |
| 174 |
| 175 String[] nodecomp = { |
| 176 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave |
| 177 }; |
| 178 */ |
| 179 |
| 180 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 181 { |
| 182 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, |
| 183 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, |
| 184 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, |
| 185 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} |
| 186 }; |
| 187 |
| 188 |
| 189 UErrorCode status = U_ZERO_ERROR; |
| 190 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 191 |
| 192 c->setStrength(Collator::IDENTICAL); |
| 193 |
| 194 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 195 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); |
| 196 |
| 197 delete c; |
| 198 } |
| 199 |
| 200 // @bug 4054736 |
| 201 // |
| 202 // Full Decomposition mode not implemented |
| 203 // |
| 204 void CollationRegressionTest::Test4054736(/* char* par */) |
| 205 { |
| 206 UErrorCode status = U_ZERO_ERROR; |
| 207 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 208 |
| 209 c->setStrength(Collator::SECONDARY); |
| 210 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 211 |
| 212 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 213 { |
| 214 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed |
| 215 }; |
| 216 |
| 217 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 218 |
| 219 delete c; |
| 220 } |
| 221 |
| 222 // @bug 4058613 |
| 223 // |
| 224 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korea
n |
| 225 // |
| 226 void CollationRegressionTest::Test4058613(/* char* par */) |
| 227 { |
| 228 // Creating a default collator doesn't work when Korean is the default |
| 229 // locale |
| 230 |
| 231 Locale oldDefault = Locale::getDefault(); |
| 232 UErrorCode status = U_ZERO_ERROR; |
| 233 |
| 234 Locale::setDefault(Locale::getKorean(), status); |
| 235 |
| 236 if (U_FAILURE(status)) |
| 237 { |
| 238 errln("Could not set default locale to Locale::KOREAN"); |
| 239 return; |
| 240 } |
| 241 |
| 242 Collator *c = NULL; |
| 243 |
| 244 c = Collator::createInstance("en_US", status); |
| 245 |
| 246 if (c == NULL || U_FAILURE(status)) |
| 247 { |
| 248 errln("Could not create a Korean collator"); |
| 249 Locale::setDefault(oldDefault, status); |
| 250 delete c; |
| 251 return; |
| 252 } |
| 253 |
| 254 // Since the fix to this bug was to turn off decomposition for Korean collat
ors, |
| 255 // ensure that's what we got |
| 256 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) |
| 257 { |
| 258 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); |
| 259 } |
| 260 |
| 261 delete c; |
| 262 |
| 263 Locale::setDefault(oldDefault, status); |
| 264 } |
| 265 |
| 266 // @bug 4059820 |
| 267 // |
| 268 // RuleBasedCollator.getRules does not return the exact pattern as input |
| 269 // for expanding character sequences |
| 270 // |
| 271 void CollationRegressionTest::Test4059820(/* char* par */) |
| 272 { |
| 273 UErrorCode status = U_ZERO_ERROR; |
| 274 |
| 275 RuleBasedCollator *c = NULL; |
| 276 UnicodeString rules = "< a < b , c/a < d < z"; |
| 277 |
| 278 c = new RuleBasedCollator(rules, status); |
| 279 |
| 280 if (c == NULL || U_FAILURE(status)) |
| 281 { |
| 282 errln("Failure building a collator."); |
| 283 delete c; |
| 284 return; |
| 285 } |
| 286 |
| 287 if ( c->getRules().indexOf("c/a") == -1) |
| 288 { |
| 289 errln("returned rules do not contain 'c/a'"); |
| 290 } |
| 291 |
| 292 delete c; |
| 293 } |
| 294 |
| 295 // @bug 4060154 |
| 296 // |
| 297 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" |
| 298 // |
| 299 void CollationRegressionTest::Test4060154(/* char* par */) |
| 300 { |
| 301 UErrorCode status = U_ZERO_ERROR; |
| 302 UnicodeString rules; |
| 303 |
| 304 rules += "< g, G < h, H < i, I < j, J"; |
| 305 rules += " & H < "; |
| 306 rules += (UChar)0x0131; |
| 307 rules += ", "; |
| 308 rules += (UChar)0x0130; |
| 309 rules += ", i, I"; |
| 310 |
| 311 RuleBasedCollator *c = NULL; |
| 312 |
| 313 c = new RuleBasedCollator(rules, status); |
| 314 |
| 315 if (c == NULL || U_FAILURE(status)) |
| 316 { |
| 317 errln("failure building collator."); |
| 318 delete c; |
| 319 return; |
| 320 } |
| 321 |
| 322 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 323 |
| 324 /* |
| 325 String[] tertiary = { |
| 326 "A", "<", "B", |
| 327 "H", "<", "\u0131", |
| 328 "H", "<", "I", |
| 329 "\u0131", "<", "\u0130", |
| 330 "\u0130", "<", "i", |
| 331 "\u0130", ">", "H", |
| 332 }; |
| 333 */ |
| 334 |
| 335 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 336 { |
| 337 {0x41, 0}, {0x3c, 0}, {0x42, 0}, |
| 338 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, |
| 339 {0x48, 0}, {0x3c, 0}, {0x49, 0}, |
| 340 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, |
| 341 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, |
| 342 {0x0130, 0}, {0x3e, 0}, {0x48, 0} |
| 343 }; |
| 344 |
| 345 c->setStrength(Collator::TERTIARY); |
| 346 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); |
| 347 |
| 348 /* |
| 349 String[] secondary = { |
| 350 "H", "<", "I", |
| 351 "\u0131", "=", "\u0130", |
| 352 }; |
| 353 */ |
| 354 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 355 { |
| 356 {0x48, 0}, {0x3c, 0}, {0x49, 0}, |
| 357 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} |
| 358 }; |
| 359 |
| 360 c->setStrength(Collator::PRIMARY); |
| 361 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); |
| 362 |
| 363 delete c; |
| 364 } |
| 365 |
| 366 // @bug 4062418 |
| 367 // |
| 368 // Secondary/Tertiary comparison incorrect in French Secondary |
| 369 // |
| 370 void CollationRegressionTest::Test4062418(/* char* par */) |
| 371 { |
| 372 UErrorCode status = U_ZERO_ERROR; |
| 373 |
| 374 RuleBasedCollator *c = NULL; |
| 375 |
| 376 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench()
, status); |
| 377 |
| 378 if (c == NULL || U_FAILURE(status)) |
| 379 { |
| 380 errln("Failed to create collator for Locale::getCanadaFrench()"); |
| 381 delete c; |
| 382 return; |
| 383 } |
| 384 |
| 385 c->setStrength(Collator::SECONDARY); |
| 386 |
| 387 /* |
| 388 String[] tests = { |
| 389 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents
from end, p\u00e9ch\u00e9 is greater |
| 390 }; |
| 391 */ |
| 392 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 393 { |
| 394 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x6
8, 0x00E9, 0} |
| 395 }; |
| 396 |
| 397 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 398 |
| 399 delete c; |
| 400 } |
| 401 |
| 402 // @bug 4065540 |
| 403 // |
| 404 // Collator::compare() method broken if either string contains spaces |
| 405 // |
| 406 void CollationRegressionTest::Test4065540(/* char* par */) |
| 407 { |
| 408 if (en_us->compare("abcd e", "abcd f") == 0) |
| 409 { |
| 410 errln("'abcd e' == 'abcd f'"); |
| 411 } |
| 412 } |
| 413 |
| 414 // @bug 4066189 |
| 415 // |
| 416 // Unicode characters need to be recursively decomposed to get the |
| 417 // correct result. For example, |
| 418 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. |
| 419 // |
| 420 void CollationRegressionTest::Test4066189(/* char* par */) |
| 421 { |
| 422 static const UChar chars1[] = {0x1EB1, 0}; |
| 423 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; |
| 424 const UnicodeString test1(chars1); |
| 425 const UnicodeString test2(chars2); |
| 426 UErrorCode status = U_ZERO_ERROR; |
| 427 |
| 428 // NOTE: The java code used en_us to create the |
| 429 // CollationElementIterator's. I'm pretty sure that |
| 430 // was wrong, so I've change the code to use c1 and c2 |
| 431 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); |
| 432 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 433 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); |
| 434 |
| 435 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); |
| 436 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); |
| 437 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); |
| 438 |
| 439 assertEqual(*i1, *i2); |
| 440 |
| 441 delete i2; |
| 442 delete c2; |
| 443 delete i1; |
| 444 delete c1; |
| 445 } |
| 446 |
| 447 // @bug 4066696 |
| 448 // |
| 449 // French secondary collation checking at the end of compare iteration fails |
| 450 // |
| 451 void CollationRegressionTest::Test4066696(/* char* par */) |
| 452 { |
| 453 UErrorCode status = U_ZERO_ERROR; |
| 454 RuleBasedCollator *c = NULL; |
| 455 |
| 456 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(),
status); |
| 457 |
| 458 if (c == NULL || U_FAILURE(status)) |
| 459 { |
| 460 errln("Failure creating collator for Locale::getCanadaFrench()"); |
| 461 delete c; |
| 462 return; |
| 463 } |
| 464 |
| 465 c->setStrength(Collator::SECONDARY); |
| 466 |
| 467 /* |
| 468 String[] tests = { |
| 469 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute |
| 470 }; |
| 471 |
| 472 should be: |
| 473 |
| 474 String[] tests = { |
| 475 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute |
| 476 }; |
| 477 |
| 478 */ |
| 479 |
| 480 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 481 { |
| 482 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} |
| 483 }; |
| 484 |
| 485 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 486 |
| 487 delete c; |
| 488 } |
| 489 |
| 490 // @bug 4076676 |
| 491 // |
| 492 // Bad canonicalization of same-class combining characters |
| 493 // |
| 494 void CollationRegressionTest::Test4076676(/* char* par */) |
| 495 { |
| 496 // These combining characters are all in the same class, so they should not |
| 497 // be reordered, and they should compare as unequal. |
| 498 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; |
| 499 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; |
| 500 |
| 501 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 502 c->setStrength(Collator::TERTIARY); |
| 503 |
| 504 if (c->compare(s1,s2) == 0) |
| 505 { |
| 506 errln("Same-class combining chars were reordered"); |
| 507 } |
| 508 |
| 509 delete c; |
| 510 } |
| 511 |
| 512 // @bug 4079231 |
| 513 // |
| 514 // RuleBasedCollator::operator==(NULL) throws NullPointerException |
| 515 // |
| 516 void CollationRegressionTest::Test4079231(/* char* par */) |
| 517 { |
| 518 // I don't think there's any way to write this test |
| 519 // in C++. The following is equivalent to the Java, |
| 520 // but doesn't compile 'cause NULL can't be converted |
| 521 // to Collator& |
| 522 // |
| 523 // if (en_us->operator==(NULL)) |
| 524 // { |
| 525 // errln("en_us->operator==(NULL) returned TRUE"); |
| 526 // } |
| 527 |
| 528 /* |
| 529 try { |
| 530 if (en_us->equals(null)) { |
| 531 errln("en_us->equals(null) returned true"); |
| 532 } |
| 533 } |
| 534 catch (Exception e) { |
| 535 errln("en_us->equals(null) threw " + e.toString()); |
| 536 } |
| 537 */ |
| 538 } |
| 539 |
| 540 // @bug 4078588 |
| 541 // |
| 542 // RuleBasedCollator breaks on "< a < bb" rule |
| 543 // |
| 544 void CollationRegressionTest::Test4078588(/* char *par */) |
| 545 { |
| 546 UErrorCode status = U_ZERO_ERROR; |
| 547 RuleBasedCollator *rbc = new RuleBasedCollator((UnicodeString)"< a < bb", st
atus); |
| 548 |
| 549 if (rbc == NULL || U_FAILURE(status)) |
| 550 { |
| 551 errln("Failed to create RuleBasedCollator."); |
| 552 delete rbc; |
| 553 return; |
| 554 } |
| 555 |
| 556 Collator::EComparisonResult result = rbc->compare("a","bb"); |
| 557 |
| 558 if (result != Collator::LESS) |
| 559 { |
| 560 errln((UnicodeString)"Compare(a,bb) returned " + (int)result |
| 561 + (UnicodeString)"; expected -1"); |
| 562 } |
| 563 |
| 564 delete rbc; |
| 565 } |
| 566 |
| 567 // @bug 4081866 |
| 568 // |
| 569 // Combining characters in different classes not reordered properly. |
| 570 // |
| 571 void CollationRegressionTest::Test4081866(/* char* par */) |
| 572 { |
| 573 // These combining characters are all in different classes, |
| 574 // so they should be reordered and the strings should compare as equal. |
| 575 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; |
| 576 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; |
| 577 |
| 578 UErrorCode status = U_ZERO_ERROR; |
| 579 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 580 c->setStrength(Collator::TERTIARY); |
| 581 |
| 582 // Now that the default collators are set to NO_DECOMPOSITION |
| 583 // (as a result of fixing bug 4114077), we must set it explicitly |
| 584 // when we're testing reordering behavior. -- lwerner, 5/5/98 |
| 585 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 586 |
| 587 if (c->compare(s1,s2) != 0) |
| 588 { |
| 589 errln("Combining chars were not reordered"); |
| 590 } |
| 591 |
| 592 delete c; |
| 593 } |
| 594 |
| 595 // @bug 4087241 |
| 596 // |
| 597 // string comparison errors in Scandinavian collators |
| 598 // |
| 599 void CollationRegressionTest::Test4087241(/* char* par */) |
| 600 { |
| 601 UErrorCode status = U_ZERO_ERROR; |
| 602 Locale da_DK("da", "DK"); |
| 603 RuleBasedCollator *c = NULL; |
| 604 |
| 605 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); |
| 606 |
| 607 if (c == NULL || U_FAILURE(status)) |
| 608 { |
| 609 errln("Failed to create collator for da_DK locale"); |
| 610 delete c; |
| 611 return; |
| 612 } |
| 613 |
| 614 c->setStrength(Collator::SECONDARY); |
| 615 |
| 616 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 617 { |
| 618 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae |
| 619 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-unlaut < a-ri
ng |
| 620 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-u
mlaut |
| 621 }; |
| 622 |
| 623 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 624 |
| 625 delete c; |
| 626 } |
| 627 |
| 628 // @bug 4087243 |
| 629 // |
| 630 // CollationKey takes ignorable strings into account when it shouldn't |
| 631 // |
| 632 void CollationRegressionTest::Test4087243(/* char* par */) |
| 633 { |
| 634 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 635 c->setStrength(Collator::TERTIARY); |
| 636 |
| 637 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 638 { |
| 639 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1
2 3 = 1 2 3 ctrl-A |
| 640 }; |
| 641 |
| 642 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 643 |
| 644 delete c; |
| 645 } |
| 646 |
| 647 // @bug 4092260 |
| 648 // |
| 649 // Mu/micro conflict |
| 650 // Micro symbol and greek lowercase letter Mu should sort identically |
| 651 // |
| 652 void CollationRegressionTest::Test4092260(/* char* par */) |
| 653 { |
| 654 UErrorCode status = U_ZERO_ERROR; |
| 655 Locale el("el", ""); |
| 656 Collator *c = NULL; |
| 657 |
| 658 c = Collator::createInstance(el, status); |
| 659 |
| 660 if (c == NULL || U_FAILURE(status)) |
| 661 { |
| 662 errln("Failed to create collator for el locale."); |
| 663 delete c; |
| 664 return; |
| 665 } |
| 666 |
| 667 // These now have tertiary differences in UCA |
| 668 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); |
| 669 |
| 670 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 671 { |
| 672 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} |
| 673 }; |
| 674 |
| 675 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 676 |
| 677 delete c; |
| 678 } |
| 679 |
| 680 // @bug 4095316 |
| 681 // |
| 682 void CollationRegressionTest::Test4095316(/* char* par */) |
| 683 { |
| 684 UErrorCode status = U_ZERO_ERROR; |
| 685 Locale el_GR("el", "GR"); |
| 686 Collator *c = Collator::createInstance(el_GR, status); |
| 687 |
| 688 if (c == NULL || U_FAILURE(status)) |
| 689 { |
| 690 errln("Failed to create collator for el_GR locale"); |
| 691 delete c; |
| 692 return; |
| 693 } |
| 694 // These now have tertiary differences in UCA |
| 695 //c->setStrength(Collator::TERTIARY); |
| 696 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); |
| 697 |
| 698 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 699 { |
| 700 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} |
| 701 }; |
| 702 |
| 703 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 704 |
| 705 delete c; |
| 706 } |
| 707 |
| 708 // @bug 4101940 |
| 709 // |
| 710 void CollationRegressionTest::Test4101940(/* char* par */) |
| 711 { |
| 712 UErrorCode status = U_ZERO_ERROR; |
| 713 RuleBasedCollator *c = NULL; |
| 714 UnicodeString rules = "< a < b"; |
| 715 UnicodeString nothing = ""; |
| 716 |
| 717 c = new RuleBasedCollator(rules, status); |
| 718 |
| 719 if (c == NULL || U_FAILURE(status)) |
| 720 { |
| 721 errln("Failed to create RuleBasedCollator"); |
| 722 delete c; |
| 723 return; |
| 724 } |
| 725 |
| 726 CollationElementIterator *i = c->createCollationElementIterator(nothing); |
| 727 i->reset(); |
| 728 |
| 729 if (i->next(status) != CollationElementIterator::NULLORDER) |
| 730 { |
| 731 errln("next did not return NULLORDER"); |
| 732 } |
| 733 |
| 734 delete i; |
| 735 delete c; |
| 736 } |
| 737 |
| 738 // @bug 4103436 |
| 739 // |
| 740 // Collator::compare not handling spaces properly |
| 741 // |
| 742 void CollationRegressionTest::Test4103436(/* char* par */) |
| 743 { |
| 744 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 745 c->setStrength(Collator::TERTIARY); |
| 746 |
| 747 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 748 { |
| 749 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0
x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, |
| 750 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0
x63, 0x63, 0x65, 0x73, 0x73, 0} |
| 751 }; |
| 752 |
| 753 compareArray(*c, tests, ARRAY_LENGTH(tests)); |
| 754 |
| 755 delete c; |
| 756 } |
| 757 |
| 758 // @bug 4114076 |
| 759 // |
| 760 // Collation not Unicode conformant with Hangul syllables |
| 761 // |
| 762 void CollationRegressionTest::Test4114076(/* char* par */) |
| 763 { |
| 764 UErrorCode status = U_ZERO_ERROR; |
| 765 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 766 c->setStrength(Collator::TERTIARY); |
| 767 |
| 768 // |
| 769 // With Canonical decomposition, Hangul syllables should get decomposed |
| 770 // into Jamo, but Jamo characters should not be decomposed into |
| 771 // conjoining Jamo |
| 772 // |
| 773 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 774 { |
| 775 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} |
| 776 }; |
| 777 |
| 778 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 779 compareArray(*c, test1, ARRAY_LENGTH(test1)); |
| 780 |
| 781 // From UTR #15: |
| 782 // *In earlier versions of Unicode, jamo characters like ksf |
| 783 // had compatibility mappings to kf + sf. These mappings were |
| 784 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.
) |
| 785 // That is, the following test is obsolete as of 2.1.9 |
| 786 |
| 787 //obsolete- // With Full decomposition, it should go all the way down to |
| 788 //obsolete- // conjoining Jamo characters. |
| 789 //obsolete- // |
| 790 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN
] = |
| 791 //obsolete- { |
| 792 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11
c2, 0} |
| 793 //obsolete- }; |
| 794 //obsolete- |
| 795 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); |
| 796 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); |
| 797 |
| 798 delete c; |
| 799 } |
| 800 |
| 801 |
| 802 // @bug 4124632 |
| 803 // |
| 804 // Collator::getCollationKey was hanging on certain character sequences |
| 805 // |
| 806 void CollationRegressionTest::Test4124632(/* char* par */) |
| 807 { |
| 808 UErrorCode status = U_ZERO_ERROR; |
| 809 Collator *coll = NULL; |
| 810 |
| 811 coll = Collator::createInstance(Locale::getJapan(), status); |
| 812 |
| 813 if (coll == NULL || U_FAILURE(status)) |
| 814 { |
| 815 errln("Failed to create collator for Locale::JAPAN"); |
| 816 delete coll; |
| 817 return; |
| 818 } |
| 819 |
| 820 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; |
| 821 CollationKey key; |
| 822 |
| 823 coll->getCollationKey(test, key, status); |
| 824 |
| 825 if (key.isBogus() || U_FAILURE(status)) |
| 826 { |
| 827 errln("CollationKey creation failed."); |
| 828 } |
| 829 |
| 830 delete coll; |
| 831 } |
| 832 |
| 833 // @bug 4132736 |
| 834 // |
| 835 // sort order of french words with multiple accents has errors |
| 836 // |
| 837 void CollationRegressionTest::Test4132736(/* char* par */) |
| 838 { |
| 839 UErrorCode status = U_ZERO_ERROR; |
| 840 |
| 841 Collator *c = NULL; |
| 842 |
| 843 c = Collator::createInstance(Locale::getCanadaFrench(), status); |
| 844 c->setStrength(Collator::TERTIARY); |
| 845 |
| 846 if (c == NULL || U_FAILURE(status)) |
| 847 { |
| 848 errln("Failed to create a collator for Locale::getCanadaFrench()"); |
| 849 delete c; |
| 850 return; |
| 851 } |
| 852 |
| 853 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 854 { |
| 855 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300,
0}, |
| 856 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} |
| 857 }; |
| 858 |
| 859 compareArray(*c, test1, ARRAY_LENGTH(test1)); |
| 860 |
| 861 delete c; |
| 862 } |
| 863 |
| 864 // @bug 4133509 |
| 865 // |
| 866 // The sorting using java.text.CollationKey is not in the exact order |
| 867 // |
| 868 void CollationRegressionTest::Test4133509(/* char* par */) |
| 869 { |
| 870 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 871 { |
| 872 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0
x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x6
9, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72,
0}, |
| 873 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x
47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f
, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, |
| 874 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x
53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} |
| 875 }; |
| 876 |
| 877 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); |
| 878 } |
| 879 |
| 880 // @bug 4114077 |
| 881 // |
| 882 // Collation with decomposition off doesn't work for Europe |
| 883 // |
| 884 void CollationRegressionTest::Test4114077(/* char* par */) |
| 885 { |
| 886 // Ensure that we get the same results with decomposition off |
| 887 // as we do with it on.... |
| 888 |
| 889 UErrorCode status = U_ZERO_ERROR; |
| 890 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); |
| 891 c->setStrength(Collator::TERTIARY); |
| 892 |
| 893 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 894 { |
| 895 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0},
// Should be equivalent |
| 896 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x6
8, 0x00e9, 0}, |
| 897 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, |
| 898 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},
// a-ring-acute -> a-ring, acute |
| 899 // -> a, ring, acute |
| 900 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}
// No reordering --> unequal |
| 901 }; |
| 902 |
| 903 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); |
| 904 compareArray(*c, test1, ARRAY_LENGTH(test1)); |
| 905 |
| 906 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = |
| 907 { |
| 908 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} //
Reordering --> equal |
| 909 }; |
| 910 |
| 911 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); |
| 912 compareArray(*c, test2, ARRAY_LENGTH(test2)); |
| 913 |
| 914 delete c; |
| 915 } |
| 916 |
| 917 // @bug 4141640 |
| 918 // |
| 919 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) |
| 920 // |
| 921 void CollationRegressionTest::Test4141640(/* char* par */) |
| 922 { |
| 923 // |
| 924 // Rather than just creating a Swedish collator, we might as well |
| 925 // try to instantiate one for every locale available on the system |
| 926 // in order to prevent this sort of bug from cropping up in the future |
| 927 // |
| 928 UErrorCode status = U_ZERO_ERROR; |
| 929 int32_t i, localeCount; |
| 930 const Locale *locales = Locale::getAvailableLocales(localeCount); |
| 931 |
| 932 for (i = 0; i < localeCount; i += 1) |
| 933 { |
| 934 Collator *c = NULL; |
| 935 |
| 936 status = U_ZERO_ERROR; |
| 937 c = Collator::createInstance(locales[i], status); |
| 938 |
| 939 if (c == NULL || U_FAILURE(status)) |
| 940 { |
| 941 UnicodeString msg, localeName; |
| 942 |
| 943 msg += "Could not create collator for locale "; |
| 944 msg += locales[i].getName(); |
| 945 |
| 946 errln(msg); |
| 947 } |
| 948 |
| 949 delete c; |
| 950 } |
| 951 } |
| 952 |
| 953 // @bug 4139572 |
| 954 // |
| 955 // getCollationKey throws exception for spanish text |
| 956 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 |
| 957 // |
| 958 void CollationRegressionTest::Test4139572(/* char* par */) |
| 959 { |
| 960 // |
| 961 // Code pasted straight from the bug report |
| 962 // (and then translated to C++ ;-) |
| 963 // |
| 964 // create spanish locale and collator |
| 965 UErrorCode status = U_ZERO_ERROR; |
| 966 Locale l("es", "es"); |
| 967 Collator *col = NULL; |
| 968 |
| 969 col = Collator::createInstance(l, status); |
| 970 |
| 971 if (col == NULL || U_FAILURE(status)) |
| 972 { |
| 973 errln("Failed to create a collator for es_es locale."); |
| 974 delete col; |
| 975 return; |
| 976 } |
| 977 |
| 978 CollationKey key; |
| 979 |
| 980 // this spanish phrase kills it! |
| 981 col->getCollationKey("Nombre De Objeto", key, status); |
| 982 |
| 983 if (key.isBogus() || U_FAILURE(status)) |
| 984 { |
| 985 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); |
| 986 } |
| 987 |
| 988 delete col; |
| 989 } |
| 990 /* HSYS : RuleBasedCollator::compare() performance enhancements |
| 991 compare() does not create CollationElementIterator() anymore.*/ |
| 992 |
| 993 class My4146160Collator : public RuleBasedCollator |
| 994 { |
| 995 public: |
| 996 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); |
| 997 ~My4146160Collator(); |
| 998 |
| 999 CollationElementIterator *createCollationElementIterator(const UnicodeString
&text) const; |
| 1000 |
| 1001 CollationElementIterator *createCollationElementIterator(const CharacterIter
ator &text) const; |
| 1002 |
| 1003 static int32_t count; |
| 1004 }; |
| 1005 |
| 1006 int32_t My4146160Collator::count = 0; |
| 1007 |
| 1008 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) |
| 1009 : RuleBasedCollator(rbc.getRules(), status) |
| 1010 { |
| 1011 } |
| 1012 |
| 1013 My4146160Collator::~My4146160Collator() |
| 1014 { |
| 1015 } |
| 1016 |
| 1017 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t UnicodeString &text) const |
| 1018 { |
| 1019 count += 1; |
| 1020 return RuleBasedCollator::createCollationElementIterator(text); |
| 1021 } |
| 1022 |
| 1023 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t CharacterIterator &text) const |
| 1024 { |
| 1025 count += 1; |
| 1026 return RuleBasedCollator::createCollationElementIterator(text); |
| 1027 } |
| 1028 |
| 1029 // @bug 4146160 |
| 1030 // |
| 1031 // RuleBasedCollator doesn't use createCollationElementIterator internally |
| 1032 // |
| 1033 void CollationRegressionTest::Test4146160(/* char* par */) |
| 1034 { |
| 1035 #if 0 |
| 1036 // |
| 1037 // Use a custom collator class whose createCollationElementIterator |
| 1038 // methods increment a count.... |
| 1039 // |
| 1040 UErrorCode status = U_ZERO_ERROR; |
| 1041 CollationKey key; |
| 1042 |
| 1043 My4146160Collator::count = 0; |
| 1044 My4146160Collator *mc = NULL; |
| 1045 |
| 1046 mc = new My4146160Collator(*en_us, status); |
| 1047 |
| 1048 if (mc == NULL || U_FAILURE(status)) |
| 1049 { |
| 1050 errln("Failed to create a My4146160Collator."); |
| 1051 delete mc; |
| 1052 return; |
| 1053 } |
| 1054 |
| 1055 mc->getCollationKey("1", key, status); |
| 1056 |
| 1057 if (key.isBogus() || U_FAILURE(status)) |
| 1058 { |
| 1059 errln("Failure to get a CollationKey from a My4146160Collator."); |
| 1060 delete mc; |
| 1061 return; |
| 1062 } |
| 1063 |
| 1064 if (My4146160Collator::count < 1) |
| 1065 { |
| 1066 errln("My4146160Collator::createCollationElementIterator not called for
getCollationKey"); |
| 1067 } |
| 1068 |
| 1069 My4146160Collator::count = 0; |
| 1070 mc->compare("1", "2"); |
| 1071 |
| 1072 if (My4146160Collator::count < 1) |
| 1073 { |
| 1074 errln("My4146160Collator::createtCollationElementIterator not called for
compare"); |
| 1075 } |
| 1076 |
| 1077 delete mc; |
| 1078 #endif |
| 1079 } |
| 1080 |
| 1081 // Ticket 7189 |
| 1082 // |
| 1083 // nextSortKeyPart incorrect for EO_S1 collation |
| 1084 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t le
n, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { |
| 1085 UCharIterator uiter; |
| 1086 uint32_t state[2] = { 0, 0 }; |
| 1087 int32_t keyLen; |
| 1088 int32_t count = 8; |
| 1089 |
| 1090 uiter_setString(&uiter, text, len); |
| 1091 keyLen = 0; |
| 1092 while (TRUE) { |
| 1093 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[k
eyLen], count, &status); |
| 1094 if (U_FAILURE(status)) { |
| 1095 return -1; |
| 1096 } |
| 1097 if (keyPartLen == 0) { |
| 1098 break; |
| 1099 } |
| 1100 keyLen += keyPartLen; |
| 1101 } |
| 1102 return keyLen; |
| 1103 } |
| 1104 |
| 1105 void CollationRegressionTest::TestT7189() { |
| 1106 UErrorCode status = U_ZERO_ERROR; |
| 1107 UCollator *coll; |
| 1108 uint32_t i; |
| 1109 |
| 1110 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { |
| 1111 // "Achter De Hoven" |
| 1112 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, |
| 1113 // "ABC" |
| 1114 { 0x41, 0x42, 0x43, 0x00 }, |
| 1115 // "HELLO world!" |
| 1116 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } |
| 1117 }; |
| 1118 |
| 1119 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { |
| 1120 // "Achter de Hoven" |
| 1121 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, |
| 1122 // "abc" |
| 1123 { 0x61, 0x62, 0x63, 0x00 }, |
| 1124 // "hello world!" |
| 1125 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } |
| 1126 }; |
| 1127 |
| 1128 // Open the collator |
| 1129 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); |
| 1130 if (U_FAILURE(status)) { |
| 1131 errln("Failed to create a collator for short string EO_S1"); |
| 1132 return; |
| 1133 } |
| 1134 |
| 1135 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * si
zeof(UChar)); i++) { |
| 1136 uint8_t key1[100], key2[100]; |
| 1137 int32_t len1, len2; |
| 1138 |
| 1139 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status
); |
| 1140 if (U_FAILURE(status)) { |
| 1141 errln(UnicodeString("Failed to get a partial collation key for ") +
text1[i]); |
| 1142 break; |
| 1143 } |
| 1144 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status
); |
| 1145 if (U_FAILURE(status)) { |
| 1146 errln(UnicodeString("Failed to get a partial collation key for ") +
text2[i]); |
| 1147 break; |
| 1148 } |
| 1149 |
| 1150 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { |
| 1151 errln(UnicodeString("Failed: Identical key\n") + " text1: " + tex
t1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::he
x(key1, len1)); |
| 1152 } else { |
| 1153 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i]
+ "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + t
ext2[i] + "\n" + " key2 : " |
| 1154 + TestUtility::hex(key2, len2)); |
| 1155 } |
| 1156 } |
| 1157 ucol_close(coll); |
| 1158 } |
| 1159 |
| 1160 void CollationRegressionTest::compareArray(Collator &c, |
| 1161 const UChar tests[][CollationRegressi
onTest::MAX_TOKEN_LEN], |
| 1162 int32_t testCount) |
| 1163 { |
| 1164 int32_t i; |
| 1165 Collator::EComparisonResult expectedResult = Collator::EQUAL; |
| 1166 |
| 1167 for (i = 0; i < testCount; i += 3) |
| 1168 { |
| 1169 UnicodeString source(tests[i]); |
| 1170 UnicodeString comparison(tests[i + 1]); |
| 1171 UnicodeString target(tests[i + 2]); |
| 1172 |
| 1173 if (comparison == "<") |
| 1174 { |
| 1175 expectedResult = Collator::LESS; |
| 1176 } |
| 1177 else if (comparison == ">") |
| 1178 { |
| 1179 expectedResult = Collator::GREATER; |
| 1180 } |
| 1181 else if (comparison == "=") |
| 1182 { |
| 1183 expectedResult = Collator::EQUAL; |
| 1184 } |
| 1185 else |
| 1186 { |
| 1187 UnicodeString bogus1("Bogus comparison string \""); |
| 1188 UnicodeString bogus2("\""); |
| 1189 errln(bogus1 + comparison + bogus2); |
| 1190 } |
| 1191 |
| 1192 Collator::EComparisonResult compareResult = c.compare(source, target); |
| 1193 |
| 1194 CollationKey sourceKey, targetKey; |
| 1195 UErrorCode status = U_ZERO_ERROR; |
| 1196 |
| 1197 c.getCollationKey(source, sourceKey, status); |
| 1198 |
| 1199 if (U_FAILURE(status)) |
| 1200 { |
| 1201 errln("Couldn't get collationKey for source"); |
| 1202 continue; |
| 1203 } |
| 1204 |
| 1205 c.getCollationKey(target, targetKey, status); |
| 1206 |
| 1207 if (U_FAILURE(status)) |
| 1208 { |
| 1209 errln("Couldn't get collationKey for target"); |
| 1210 continue; |
| 1211 } |
| 1212 |
| 1213 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); |
| 1214 |
| 1215 reportCResult( source, target, sourceKey, targetKey, compareResult, keyR
esult, compareResult, expectedResult ); |
| 1216 |
| 1217 } |
| 1218 } |
| 1219 |
| 1220 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, Collatio
nElementIterator &i2) |
| 1221 { |
| 1222 int32_t c1, c2, count = 0; |
| 1223 UErrorCode status = U_ZERO_ERROR; |
| 1224 |
| 1225 do |
| 1226 { |
| 1227 c1 = i1.next(status); |
| 1228 c2 = i2.next(status); |
| 1229 |
| 1230 if (c1 != c2) |
| 1231 { |
| 1232 UnicodeString msg, msg1(" "); |
| 1233 |
| 1234 msg += msg1 + count; |
| 1235 msg += ": strength(0x"; |
| 1236 appendHex(c1, 8, msg); |
| 1237 msg += ") != strength(0x"; |
| 1238 appendHex(c2, 8, msg); |
| 1239 msg += ")"; |
| 1240 |
| 1241 errln(msg); |
| 1242 break; |
| 1243 } |
| 1244 |
| 1245 count += 1; |
| 1246 } |
| 1247 while (c1 != CollationElementIterator::NULLORDER); |
| 1248 } |
| 1249 |
| 1250 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
ar* &name, char* /* par */) |
| 1251 { |
| 1252 if (exec) |
| 1253 { |
| 1254 logln("Collation Regression Tests: "); |
| 1255 } |
| 1256 |
| 1257 if(en_us) { |
| 1258 switch (index) |
| 1259 { |
| 1260 case 0: name = "Test4048446"; if (exec) Test4048446(/* par */); break
; |
| 1261 case 1: name = "Test4051866"; if (exec) Test4051866(/* par */); break
; |
| 1262 case 2: name = "Test4053636"; if (exec) Test4053636(/* par */); break
; |
| 1263 case 3: name = "Test4054238"; if (exec) Test4054238(/* par */); break
; |
| 1264 case 4: name = "Test4054734"; if (exec) Test4054734(/* par */); break
; |
| 1265 case 5: name = "Test4054736"; if (exec) Test4054736(/* par */); break
; |
| 1266 case 6: name = "Test4058613"; if (exec) Test4058613(/* par */); break
; |
| 1267 case 7: name = "Test4059820"; if (exec) Test4059820(/* par */); break
; |
| 1268 case 8: name = "Test4060154"; if (exec) Test4060154(/* par */); break
; |
| 1269 case 9: name = "Test4062418"; if (exec) Test4062418(/* par */); break
; |
| 1270 case 10: name = "Test4065540"; if (exec) Test4065540(/* par */); break
; |
| 1271 case 11: name = "Test4066189"; if (exec) Test4066189(/* par */); break
; |
| 1272 case 12: name = "Test4066696"; if (exec) Test4066696(/* par */); break
; |
| 1273 case 13: name = "Test4076676"; if (exec) Test4076676(/* par */); break
; |
| 1274 case 14: name = "Test4078588"; if (exec) Test4078588(/* par */); break
; |
| 1275 case 15: name = "Test4079231"; if (exec) Test4079231(/* par */); break
; |
| 1276 case 16: name = "Test4081866"; if (exec) Test4081866(/* par */); break
; |
| 1277 case 17: name = "Test4087241"; if (exec) Test4087241(/* par */); break
; |
| 1278 case 18: name = "Test4087243"; if (exec) Test4087243(/* par */); break
; |
| 1279 case 19: name = "Test4092260"; if (exec) Test4092260(/* par */); break
; |
| 1280 case 20: name = "Test4095316"; if (exec) Test4095316(/* par */); break
; |
| 1281 case 21: name = "Test4101940"; if (exec) Test4101940(/* par */); break
; |
| 1282 case 22: name = "Test4103436"; if (exec) Test4103436(/* par */); break
; |
| 1283 case 23: name = "Test4114076"; if (exec) Test4114076(/* par */); break
; |
| 1284 case 24: name = "Test4114077"; if (exec) Test4114077(/* par */); break
; |
| 1285 case 25: name = "Test4124632"; if (exec) Test4124632(/* par */); break
; |
| 1286 case 26: name = "Test4132736"; if (exec) Test4132736(/* par */); break
; |
| 1287 case 27: name = "Test4133509"; if (exec) Test4133509(/* par */); break
; |
| 1288 case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break
; |
| 1289 case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break
; |
| 1290 case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break
; |
| 1291 case 31: name = "TestT7189"; if (exec) TestT7189(); break; |
| 1292 default: name = ""; break; |
| 1293 } |
| 1294 } else { |
| 1295 dataerrln("Class collator not instantiated"); |
| 1296 name = ""; |
| 1297 } |
| 1298 } |
| 1299 |
| 1300 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |