| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 1997-2014, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ********************************************************************/ | |
| 6 | |
| 7 #include "unicode/utypes.h" | |
| 8 | |
| 9 #if !UCONFIG_NO_COLLATION | |
| 10 | |
| 11 #include "unicode/coll.h" | |
| 12 #include "unicode/localpointer.h" | |
| 13 #include "unicode/tblcoll.h" | |
| 14 #include "unicode/unistr.h" | |
| 15 #include "unicode/sortkey.h" | |
| 16 #include "regcoll.h" | |
| 17 #include "sfwdchit.h" | |
| 18 #include "testutil.h" | |
| 19 #include "cmemory.h" | |
| 20 | |
| 21 #define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0])) | |
| 22 | |
| 23 CollationRegressionTest::CollationRegressionTest() | |
| 24 { | |
| 25 UErrorCode status = U_ZERO_ERROR; | |
| 26 | |
| 27 en_us = (RuleBasedCollator *)Collator::createInstance(Locale::getUS(), statu
s); | |
| 28 if(U_FAILURE(status)) { | |
| 29 delete en_us; | |
| 30 en_us = 0; | |
| 31 errcheckln(status, "Collator creation failed with %s", u_errorName(status)
); | |
| 32 return; | |
| 33 } | |
| 34 } | |
| 35 | |
| 36 CollationRegressionTest::~CollationRegressionTest() | |
| 37 { | |
| 38 delete en_us; | |
| 39 } | |
| 40 | |
| 41 | |
| 42 // @bug 4048446 | |
| 43 // | |
| 44 // CollationElementIterator.reset() doesn't work | |
| 45 // | |
| 46 void CollationRegressionTest::Test4048446(/* char* par */) | |
| 47 { | |
| 48 const UnicodeString test1 = "XFILE What subset of all possible test cases ha
s the highest probability of detecting the most errors?"; | |
| 49 const UnicodeString test2 = "Xf_ile What subset of all possible test cases h
as the lowest probability of detecting the least errors?"; | |
| 50 CollationElementIterator *i1 = en_us->createCollationElementIterator(test1); | |
| 51 CollationElementIterator *i2 = en_us->createCollationElementIterator(test1); | |
| 52 UErrorCode status = U_ZERO_ERROR; | |
| 53 | |
| 54 if (i1 == NULL|| i2 == NULL) | |
| 55 { | |
| 56 errln("Could not create CollationElementIterator's"); | |
| 57 delete i1; | |
| 58 delete i2; | |
| 59 return; | |
| 60 } | |
| 61 | |
| 62 while (i1->next(status) != CollationElementIterator::NULLORDER) | |
| 63 { | |
| 64 if (U_FAILURE(status)) | |
| 65 { | |
| 66 errln("error calling next()"); | |
| 67 | |
| 68 delete i1; | |
| 69 delete i2; | |
| 70 return; | |
| 71 } | |
| 72 } | |
| 73 | |
| 74 i1->reset(); | |
| 75 | |
| 76 assertEqual(*i1, *i2); | |
| 77 | |
| 78 delete i1; | |
| 79 delete i2; | |
| 80 } | |
| 81 | |
| 82 // @bug 4051866 | |
| 83 // | |
| 84 // Collator -> rules -> Collator round-trip broken for expanding characters | |
| 85 // | |
| 86 void CollationRegressionTest::Test4051866(/* char* par */) | |
| 87 { | |
| 88 UnicodeString rules; | |
| 89 UErrorCode status = U_ZERO_ERROR; | |
| 90 | |
| 91 rules += "&n < o "; | |
| 92 rules += "& oe ,o"; | |
| 93 rules += (UChar)0x3080; | |
| 94 rules += "& oe ,"; | |
| 95 rules += (UChar)0x1530; | |
| 96 rules += " ,O"; | |
| 97 rules += "& OE ,O"; | |
| 98 rules += (UChar)0x3080; | |
| 99 rules += "& OE ,"; | |
| 100 rules += (UChar)0x1520; | |
| 101 rules += "< p ,P"; | |
| 102 | |
| 103 // Build a collator containing expanding characters | |
| 104 LocalPointer<RuleBasedCollator> c1(new RuleBasedCollator(rules, status), sta
tus); | |
| 105 if (U_FAILURE(status)) { | |
| 106 errln("RuleBasedCollator(rule string) failed - %s", u_errorName(status))
; | |
| 107 return; | |
| 108 } | |
| 109 | |
| 110 // Build another using the rules from the first | |
| 111 LocalPointer<RuleBasedCollator> c2(new RuleBasedCollator(c1->getRules(), sta
tus), status); | |
| 112 if (U_FAILURE(status)) { | |
| 113 errln("RuleBasedCollator(rule string from other RBC) failed - %s", u_err
orName(status)); | |
| 114 return; | |
| 115 } | |
| 116 | |
| 117 // Make sure they're the same | |
| 118 if (!(c1->getRules() == c2->getRules())) | |
| 119 { | |
| 120 errln("Rules are not equal"); | |
| 121 } | |
| 122 } | |
| 123 | |
| 124 // @bug 4053636 | |
| 125 // | |
| 126 // Collator thinks "black-bird" == "black" | |
| 127 // | |
| 128 void CollationRegressionTest::Test4053636(/* char* par */) | |
| 129 { | |
| 130 if (en_us->equals("black_bird", "black")) | |
| 131 { | |
| 132 errln("black-bird == black"); | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 // @bug 4054238 | |
| 137 // | |
| 138 // CollationElementIterator will not work correctly if the associated | |
| 139 // Collator object's mode is changed | |
| 140 // | |
| 141 void CollationRegressionTest::Test4054238(/* char* par */) | |
| 142 { | |
| 143 const UChar chars3[] = {0x61, 0x00FC, 0x62, 0x65, 0x63, 0x6b, 0x20, 0x47, 0x
72, 0x00F6, 0x00DF, 0x65, 0x20, 0x4c, 0x00FC, 0x62, 0x63, 0x6b, 0}; | |
| 144 const UnicodeString test3(chars3); | |
| 145 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 146 | |
| 147 // NOTE: The Java code uses en_us to create the CollationElementIterators | |
| 148 // but I'm pretty sure that's wrong, so I've changed this to use c. | |
| 149 UErrorCode status = U_ZERO_ERROR; | |
| 150 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 151 CollationElementIterator *i1 = c->createCollationElementIterator(test3); | |
| 152 delete i1; | |
| 153 delete c; | |
| 154 } | |
| 155 | |
| 156 // @bug 4054734 | |
| 157 // | |
| 158 // Collator::IDENTICAL documented but not implemented | |
| 159 // | |
| 160 void CollationRegressionTest::Test4054734(/* char* par */) | |
| 161 { | |
| 162 /* | |
| 163 Here's the original Java: | |
| 164 | |
| 165 String[] decomp = { | |
| 166 "\u0001", "<", "\u0002", | |
| 167 "\u0001", "=", "\u0001", | |
| 168 "A\u0001", ">", "~\u0002", // Ensure A and ~ are not compar
ed bitwise | |
| 169 "\u00C0", "=", "A\u0300" // Decomp should make these equa
l | |
| 170 }; | |
| 171 | |
| 172 String[] nodecomp = { | |
| 173 "\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave | |
| 174 }; | |
| 175 */ | |
| 176 | |
| 177 static const UChar decomp[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 178 { | |
| 179 {0x0001, 0}, {0x3c, 0}, {0x0002, 0}, | |
| 180 {0x0001, 0}, {0x3d, 0}, {0x0001, 0}, | |
| 181 {0x41, 0x0001, 0}, {0x3e, 0}, {0x7e, 0x0002, 0}, | |
| 182 {0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0} | |
| 183 }; | |
| 184 | |
| 185 | |
| 186 UErrorCode status = U_ZERO_ERROR; | |
| 187 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 188 | |
| 189 c->setStrength(Collator::IDENTICAL); | |
| 190 | |
| 191 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 192 compareArray(*c, decomp, ARRAY_LENGTH(decomp)); | |
| 193 | |
| 194 delete c; | |
| 195 } | |
| 196 | |
| 197 // @bug 4054736 | |
| 198 // | |
| 199 // Full Decomposition mode not implemented | |
| 200 // | |
| 201 void CollationRegressionTest::Test4054736(/* char* par */) | |
| 202 { | |
| 203 UErrorCode status = U_ZERO_ERROR; | |
| 204 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 205 | |
| 206 c->setStrength(Collator::SECONDARY); | |
| 207 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 208 | |
| 209 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 210 { | |
| 211 {0xFB4F, 0}, {0x3d, 0}, {0x05D0, 0x05DC} // Alef-Lamed vs. Alef, Lamed | |
| 212 }; | |
| 213 | |
| 214 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 215 | |
| 216 delete c; | |
| 217 } | |
| 218 | |
| 219 // @bug 4058613 | |
| 220 // | |
| 221 // Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korea
n | |
| 222 // | |
| 223 void CollationRegressionTest::Test4058613(/* char* par */) | |
| 224 { | |
| 225 // Creating a default collator doesn't work when Korean is the default | |
| 226 // locale | |
| 227 | |
| 228 Locale oldDefault = Locale::getDefault(); | |
| 229 UErrorCode status = U_ZERO_ERROR; | |
| 230 | |
| 231 Locale::setDefault(Locale::getKorean(), status); | |
| 232 | |
| 233 if (U_FAILURE(status)) | |
| 234 { | |
| 235 errln("Could not set default locale to Locale::KOREAN"); | |
| 236 return; | |
| 237 } | |
| 238 | |
| 239 Collator *c = NULL; | |
| 240 | |
| 241 c = Collator::createInstance("en_US", status); | |
| 242 | |
| 243 if (c == NULL || U_FAILURE(status)) | |
| 244 { | |
| 245 errln("Could not create a Korean collator"); | |
| 246 Locale::setDefault(oldDefault, status); | |
| 247 delete c; | |
| 248 return; | |
| 249 } | |
| 250 | |
| 251 // Since the fix to this bug was to turn off decomposition for Korean collat
ors, | |
| 252 // ensure that's what we got | |
| 253 if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF) | |
| 254 { | |
| 255 errln("Decomposition is not set to NO_DECOMPOSITION for Korean collator"); | |
| 256 } | |
| 257 | |
| 258 delete c; | |
| 259 | |
| 260 Locale::setDefault(oldDefault, status); | |
| 261 } | |
| 262 | |
| 263 // @bug 4059820 | |
| 264 // | |
| 265 // RuleBasedCollator.getRules does not return the exact pattern as input | |
| 266 // for expanding character sequences | |
| 267 // | |
| 268 void CollationRegressionTest::Test4059820(/* char* par */) | |
| 269 { | |
| 270 UErrorCode status = U_ZERO_ERROR; | |
| 271 | |
| 272 RuleBasedCollator *c = NULL; | |
| 273 UnicodeString rules = "&9 < a < b , c/a < d < z"; | |
| 274 | |
| 275 c = new RuleBasedCollator(rules, status); | |
| 276 | |
| 277 if (c == NULL || U_FAILURE(status)) | |
| 278 { | |
| 279 errln("Failure building a collator."); | |
| 280 delete c; | |
| 281 return; | |
| 282 } | |
| 283 | |
| 284 if ( c->getRules().indexOf("c/a") == -1) | |
| 285 { | |
| 286 errln("returned rules do not contain 'c/a'"); | |
| 287 } | |
| 288 | |
| 289 delete c; | |
| 290 } | |
| 291 | |
| 292 // @bug 4060154 | |
| 293 // | |
| 294 // MergeCollation::fixEntry broken for "& H < \u0131, \u0130, i, I" | |
| 295 // | |
| 296 void CollationRegressionTest::Test4060154(/* char* par */) | |
| 297 { | |
| 298 UErrorCode status = U_ZERO_ERROR; | |
| 299 UnicodeString rules; | |
| 300 | |
| 301 rules += "&f < g, G < h, H < i, I < j, J"; | |
| 302 rules += " & H < "; | |
| 303 rules += (UChar)0x0131; | |
| 304 rules += ", "; | |
| 305 rules += (UChar)0x0130; | |
| 306 rules += ", i, I"; | |
| 307 | |
| 308 RuleBasedCollator *c = NULL; | |
| 309 | |
| 310 c = new RuleBasedCollator(rules, status); | |
| 311 | |
| 312 if (c == NULL || U_FAILURE(status)) | |
| 313 { | |
| 314 errln("failure building collator."); | |
| 315 delete c; | |
| 316 return; | |
| 317 } | |
| 318 | |
| 319 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 320 | |
| 321 /* | |
| 322 String[] tertiary = { | |
| 323 "A", "<", "B", | |
| 324 "H", "<", "\u0131", | |
| 325 "H", "<", "I", | |
| 326 "\u0131", "<", "\u0130", | |
| 327 "\u0130", "<", "i", | |
| 328 "\u0130", ">", "H", | |
| 329 }; | |
| 330 */ | |
| 331 | |
| 332 static const UChar tertiary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 333 { | |
| 334 {0x41, 0}, {0x3c, 0}, {0x42, 0}, | |
| 335 {0x48, 0}, {0x3c, 0}, {0x0131, 0}, | |
| 336 {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
| 337 {0x0131, 0}, {0x3c, 0}, {0x0130, 0}, | |
| 338 {0x0130, 0}, {0x3c, 0}, {0x69, 0}, | |
| 339 {0x0130, 0}, {0x3e, 0}, {0x48, 0} | |
| 340 }; | |
| 341 | |
| 342 c->setStrength(Collator::TERTIARY); | |
| 343 compareArray(*c, tertiary, ARRAY_LENGTH(tertiary)); | |
| 344 | |
| 345 /* | |
| 346 String[] secondary = { | |
| 347 "H", "<", "I", | |
| 348 "\u0131", "=", "\u0130", | |
| 349 }; | |
| 350 */ | |
| 351 static const UChar secondary[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 352 { | |
| 353 {0x48, 0}, {0x3c, 0}, {0x49, 0}, | |
| 354 {0x0131, 0}, {0x3d, 0}, {0x0130, 0} | |
| 355 }; | |
| 356 | |
| 357 c->setStrength(Collator::PRIMARY); | |
| 358 compareArray(*c, secondary, ARRAY_LENGTH(secondary)); | |
| 359 | |
| 360 delete c; | |
| 361 } | |
| 362 | |
| 363 // @bug 4062418 | |
| 364 // | |
| 365 // Secondary/Tertiary comparison incorrect in French Secondary | |
| 366 // | |
| 367 void CollationRegressionTest::Test4062418(/* char* par */) | |
| 368 { | |
| 369 UErrorCode status = U_ZERO_ERROR; | |
| 370 | |
| 371 RuleBasedCollator *c = NULL; | |
| 372 | |
| 373 c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench()
, status); | |
| 374 | |
| 375 if (c == NULL || U_FAILURE(status)) | |
| 376 { | |
| 377 errln("Failed to create collator for Locale::getCanadaFrench()"); | |
| 378 delete c; | |
| 379 return; | |
| 380 } | |
| 381 | |
| 382 c->setStrength(Collator::SECONDARY); | |
| 383 | |
| 384 /* | |
| 385 String[] tests = { | |
| 386 "p\u00eache", "<", "p\u00e9ch\u00e9", // Comparing accents
from end, p\u00e9ch\u00e9 is greater | |
| 387 }; | |
| 388 */ | |
| 389 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 390 { | |
| 391 {0x70, 0x00EA, 0x63, 0x68, 0x65, 0}, {0x3c, 0}, {0x70, 0x00E9, 0x63, 0x6
8, 0x00E9, 0} | |
| 392 }; | |
| 393 | |
| 394 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 395 | |
| 396 delete c; | |
| 397 } | |
| 398 | |
| 399 // @bug 4065540 | |
| 400 // | |
| 401 // Collator::compare() method broken if either string contains spaces | |
| 402 // | |
| 403 void CollationRegressionTest::Test4065540(/* char* par */) | |
| 404 { | |
| 405 if (en_us->compare("abcd e", "abcd f") == 0) | |
| 406 { | |
| 407 errln("'abcd e' == 'abcd f'"); | |
| 408 } | |
| 409 } | |
| 410 | |
| 411 // @bug 4066189 | |
| 412 // | |
| 413 // Unicode characters need to be recursively decomposed to get the | |
| 414 // correct result. For example, | |
| 415 // u1EB1 -> \u0103 + \u0300 -> a + \u0306 + \u0300. | |
| 416 // | |
| 417 void CollationRegressionTest::Test4066189(/* char* par */) | |
| 418 { | |
| 419 static const UChar chars1[] = {0x1EB1, 0}; | |
| 420 static const UChar chars2[] = {0x61, 0x0306, 0x0300, 0}; | |
| 421 const UnicodeString test1(chars1); | |
| 422 const UnicodeString test2(chars2); | |
| 423 UErrorCode status = U_ZERO_ERROR; | |
| 424 | |
| 425 // NOTE: The java code used en_us to create the | |
| 426 // CollationElementIterator's. I'm pretty sure that | |
| 427 // was wrong, so I've change the code to use c1 and c2 | |
| 428 RuleBasedCollator *c1 = (RuleBasedCollator *) en_us->clone(); | |
| 429 c1->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 430 CollationElementIterator *i1 = c1->createCollationElementIterator(test1); | |
| 431 | |
| 432 RuleBasedCollator *c2 = (RuleBasedCollator *) en_us->clone(); | |
| 433 c2->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
| 434 CollationElementIterator *i2 = c2->createCollationElementIterator(test2); | |
| 435 | |
| 436 assertEqual(*i1, *i2); | |
| 437 | |
| 438 delete i2; | |
| 439 delete c2; | |
| 440 delete i1; | |
| 441 delete c1; | |
| 442 } | |
| 443 | |
| 444 // @bug 4066696 | |
| 445 // | |
| 446 // French secondary collation checking at the end of compare iteration fails | |
| 447 // | |
| 448 void CollationRegressionTest::Test4066696(/* char* par */) | |
| 449 { | |
| 450 UErrorCode status = U_ZERO_ERROR; | |
| 451 RuleBasedCollator *c = NULL; | |
| 452 | |
| 453 c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(),
status); | |
| 454 | |
| 455 if (c == NULL || U_FAILURE(status)) | |
| 456 { | |
| 457 errln("Failure creating collator for Locale::getCanadaFrench()"); | |
| 458 delete c; | |
| 459 return; | |
| 460 } | |
| 461 | |
| 462 c->setStrength(Collator::SECONDARY); | |
| 463 | |
| 464 /* | |
| 465 String[] tests = { | |
| 466 "\u00e0", "<", "\u01fa", // a-grave < A-ring-acute | |
| 467 }; | |
| 468 | |
| 469 should be: | |
| 470 | |
| 471 String[] tests = { | |
| 472 "\u00e0", ">", "\u01fa", // a-grave < A-ring-acute | |
| 473 }; | |
| 474 | |
| 475 */ | |
| 476 | |
| 477 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 478 { | |
| 479 {0x00E0, 0}, {0x3e, 0}, {0x01FA, 0} | |
| 480 }; | |
| 481 | |
| 482 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 483 | |
| 484 delete c; | |
| 485 } | |
| 486 | |
| 487 // @bug 4076676 | |
| 488 // | |
| 489 // Bad canonicalization of same-class combining characters | |
| 490 // | |
| 491 void CollationRegressionTest::Test4076676(/* char* par */) | |
| 492 { | |
| 493 // These combining characters are all in the same class, so they should not | |
| 494 // be reordered, and they should compare as unequal. | |
| 495 static const UChar s1[] = {0x41, 0x0301, 0x0302, 0x0300, 0}; | |
| 496 static const UChar s2[] = {0x41, 0x0302, 0x0300, 0x0301, 0}; | |
| 497 | |
| 498 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 499 c->setStrength(Collator::TERTIARY); | |
| 500 | |
| 501 if (c->compare(s1,s2) == 0) | |
| 502 { | |
| 503 errln("Same-class combining chars were reordered"); | |
| 504 } | |
| 505 | |
| 506 delete c; | |
| 507 } | |
| 508 | |
| 509 // @bug 4079231 | |
| 510 // | |
| 511 // RuleBasedCollator::operator==(NULL) throws NullPointerException | |
| 512 // | |
| 513 void CollationRegressionTest::Test4079231(/* char* par */) | |
| 514 { | |
| 515 // I don't think there's any way to write this test | |
| 516 // in C++. The following is equivalent to the Java, | |
| 517 // but doesn't compile 'cause NULL can't be converted | |
| 518 // to Collator& | |
| 519 // | |
| 520 // if (en_us->operator==(NULL)) | |
| 521 // { | |
| 522 // errln("en_us->operator==(NULL) returned TRUE"); | |
| 523 // } | |
| 524 | |
| 525 /* | |
| 526 try { | |
| 527 if (en_us->equals(null)) { | |
| 528 errln("en_us->equals(null) returned true"); | |
| 529 } | |
| 530 } | |
| 531 catch (Exception e) { | |
| 532 errln("en_us->equals(null) threw " + e.toString()); | |
| 533 } | |
| 534 */ | |
| 535 } | |
| 536 | |
| 537 // @bug 4078588 | |
| 538 // | |
| 539 // RuleBasedCollator breaks on "< a < bb" rule | |
| 540 // | |
| 541 void CollationRegressionTest::Test4078588(/* char *par */) | |
| 542 { | |
| 543 UErrorCode status = U_ZERO_ERROR; | |
| 544 RuleBasedCollator *rbc = new RuleBasedCollator("&9 < a < bb", status); | |
| 545 | |
| 546 if (rbc == NULL || U_FAILURE(status)) | |
| 547 { | |
| 548 errln("Failed to create RuleBasedCollator."); | |
| 549 delete rbc; | |
| 550 return; | |
| 551 } | |
| 552 | |
| 553 Collator::EComparisonResult result = rbc->compare("a","bb"); | |
| 554 | |
| 555 if (result != Collator::LESS) | |
| 556 { | |
| 557 errln((UnicodeString)"Compare(a,bb) returned " + (int)result | |
| 558 + (UnicodeString)"; expected -1"); | |
| 559 } | |
| 560 | |
| 561 delete rbc; | |
| 562 } | |
| 563 | |
| 564 // @bug 4081866 | |
| 565 // | |
| 566 // Combining characters in different classes not reordered properly. | |
| 567 // | |
| 568 void CollationRegressionTest::Test4081866(/* char* par */) | |
| 569 { | |
| 570 // These combining characters are all in different classes, | |
| 571 // so they should be reordered and the strings should compare as equal. | |
| 572 static const UChar s1[] = {0x41, 0x0300, 0x0316, 0x0327, 0x0315, 0}; | |
| 573 static const UChar s2[] = {0x41, 0x0327, 0x0316, 0x0315, 0x0300, 0}; | |
| 574 | |
| 575 UErrorCode status = U_ZERO_ERROR; | |
| 576 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 577 c->setStrength(Collator::TERTIARY); | |
| 578 | |
| 579 // Now that the default collators are set to NO_DECOMPOSITION | |
| 580 // (as a result of fixing bug 4114077), we must set it explicitly | |
| 581 // when we're testing reordering behavior. -- lwerner, 5/5/98 | |
| 582 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 583 | |
| 584 if (c->compare(s1,s2) != 0) | |
| 585 { | |
| 586 errln("Combining chars were not reordered"); | |
| 587 } | |
| 588 | |
| 589 delete c; | |
| 590 } | |
| 591 | |
| 592 // @bug 4087241 | |
| 593 // | |
| 594 // string comparison errors in Scandinavian collators | |
| 595 // | |
| 596 void CollationRegressionTest::Test4087241(/* char* par */) | |
| 597 { | |
| 598 UErrorCode status = U_ZERO_ERROR; | |
| 599 Locale da_DK("da", "DK"); | |
| 600 RuleBasedCollator *c = NULL; | |
| 601 | |
| 602 c = (RuleBasedCollator *) Collator::createInstance(da_DK, status); | |
| 603 | |
| 604 if (c == NULL || U_FAILURE(status)) | |
| 605 { | |
| 606 errln("Failed to create collator for da_DK locale"); | |
| 607 delete c; | |
| 608 return; | |
| 609 } | |
| 610 | |
| 611 c->setStrength(Collator::SECONDARY); | |
| 612 | |
| 613 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 614 { | |
| 615 {0x7a, 0}, {0x3c, 0}, {0x00E6, 0}, // z < ae | |
| 616 {0x61, 0x0308, 0}, {0x3c, 0}, {0x61, 0x030A, 0}, // a-umlaut < a-r
ing | |
| 617 {0x59, 0}, {0x3c, 0}, {0x75, 0x0308, 0}, // Y < u-u
mlaut | |
| 618 }; | |
| 619 | |
| 620 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 621 | |
| 622 delete c; | |
| 623 } | |
| 624 | |
| 625 // @bug 4087243 | |
| 626 // | |
| 627 // CollationKey takes ignorable strings into account when it shouldn't | |
| 628 // | |
| 629 void CollationRegressionTest::Test4087243(/* char* par */) | |
| 630 { | |
| 631 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 632 c->setStrength(Collator::TERTIARY); | |
| 633 | |
| 634 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 635 { | |
| 636 {0x31, 0x32, 0x33, 0}, {0x3d, 0}, {0x31, 0x32, 0x33, 0x0001, 0} // 1
2 3 = 1 2 3 ctrl-A | |
| 637 }; | |
| 638 | |
| 639 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 640 | |
| 641 delete c; | |
| 642 } | |
| 643 | |
| 644 // @bug 4092260 | |
| 645 // | |
| 646 // Mu/micro conflict | |
| 647 // Micro symbol and greek lowercase letter Mu should sort identically | |
| 648 // | |
| 649 void CollationRegressionTest::Test4092260(/* char* par */) | |
| 650 { | |
| 651 UErrorCode status = U_ZERO_ERROR; | |
| 652 Locale el("el", ""); | |
| 653 Collator *c = NULL; | |
| 654 | |
| 655 c = Collator::createInstance(el, status); | |
| 656 | |
| 657 if (c == NULL || U_FAILURE(status)) | |
| 658 { | |
| 659 errln("Failed to create collator for el locale."); | |
| 660 delete c; | |
| 661 return; | |
| 662 } | |
| 663 | |
| 664 // These now have tertiary differences in UCA | |
| 665 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
| 666 | |
| 667 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 668 { | |
| 669 {0x00B5, 0}, {0x3d, 0}, {0x03BC, 0} | |
| 670 }; | |
| 671 | |
| 672 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 673 | |
| 674 delete c; | |
| 675 } | |
| 676 | |
| 677 // @bug 4095316 | |
| 678 // | |
| 679 void CollationRegressionTest::Test4095316(/* char* par */) | |
| 680 { | |
| 681 UErrorCode status = U_ZERO_ERROR; | |
| 682 Locale el_GR("el", "GR"); | |
| 683 Collator *c = Collator::createInstance(el_GR, status); | |
| 684 | |
| 685 if (c == NULL || U_FAILURE(status)) | |
| 686 { | |
| 687 errln("Failed to create collator for el_GR locale"); | |
| 688 delete c; | |
| 689 return; | |
| 690 } | |
| 691 // These now have tertiary differences in UCA | |
| 692 //c->setStrength(Collator::TERTIARY); | |
| 693 c->setAttribute(UCOL_STRENGTH, UCOL_SECONDARY, status); | |
| 694 | |
| 695 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 696 { | |
| 697 {0x03D4, 0}, {0x3d, 0}, {0x03AB, 0} | |
| 698 }; | |
| 699 | |
| 700 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 701 | |
| 702 delete c; | |
| 703 } | |
| 704 | |
| 705 // @bug 4101940 | |
| 706 // | |
| 707 void CollationRegressionTest::Test4101940(/* char* par */) | |
| 708 { | |
| 709 UErrorCode status = U_ZERO_ERROR; | |
| 710 RuleBasedCollator *c = NULL; | |
| 711 UnicodeString rules = "&9 < a < b"; | |
| 712 UnicodeString nothing = ""; | |
| 713 | |
| 714 c = new RuleBasedCollator(rules, status); | |
| 715 | |
| 716 if (c == NULL || U_FAILURE(status)) | |
| 717 { | |
| 718 errln("Failed to create RuleBasedCollator"); | |
| 719 delete c; | |
| 720 return; | |
| 721 } | |
| 722 | |
| 723 CollationElementIterator *i = c->createCollationElementIterator(nothing); | |
| 724 i->reset(); | |
| 725 | |
| 726 if (i->next(status) != CollationElementIterator::NULLORDER) | |
| 727 { | |
| 728 errln("next did not return NULLORDER"); | |
| 729 } | |
| 730 | |
| 731 delete i; | |
| 732 delete c; | |
| 733 } | |
| 734 | |
| 735 // @bug 4103436 | |
| 736 // | |
| 737 // Collator::compare not handling spaces properly | |
| 738 // | |
| 739 void CollationRegressionTest::Test4103436(/* char* par */) | |
| 740 { | |
| 741 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 742 c->setStrength(Collator::TERTIARY); | |
| 743 | |
| 744 static const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 745 { | |
| 746 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x20, 0
x61, 0x63, 0x63, 0x65, 0x73, 0x73, 0}, | |
| 747 {0x66, 0x69, 0x6c, 0x65, 0}, {0x3c, 0}, {0x66, 0x69, 0x6c, 0x65, 0x61, 0
x63, 0x63, 0x65, 0x73, 0x73, 0} | |
| 748 }; | |
| 749 | |
| 750 compareArray(*c, tests, ARRAY_LENGTH(tests)); | |
| 751 | |
| 752 delete c; | |
| 753 } | |
| 754 | |
| 755 // @bug 4114076 | |
| 756 // | |
| 757 // Collation not Unicode conformant with Hangul syllables | |
| 758 // | |
| 759 void CollationRegressionTest::Test4114076(/* char* par */) | |
| 760 { | |
| 761 UErrorCode status = U_ZERO_ERROR; | |
| 762 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 763 c->setStrength(Collator::TERTIARY); | |
| 764 | |
| 765 // | |
| 766 // With Canonical decomposition, Hangul syllables should get decomposed | |
| 767 // into Jamo, but Jamo characters should not be decomposed into | |
| 768 // conjoining Jamo | |
| 769 // | |
| 770 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 771 { | |
| 772 {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x1171, 0x11b6, 0} | |
| 773 }; | |
| 774 | |
| 775 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 776 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
| 777 | |
| 778 // From UTR #15: | |
| 779 // *In earlier versions of Unicode, jamo characters like ksf | |
| 780 // had compatibility mappings to kf + sf. These mappings were | |
| 781 // removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.
) | |
| 782 // That is, the following test is obsolete as of 2.1.9 | |
| 783 | |
| 784 //obsolete- // With Full decomposition, it should go all the way down to | |
| 785 //obsolete- // conjoining Jamo characters. | |
| 786 //obsolete- // | |
| 787 //obsolete- static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN
] = | |
| 788 //obsolete- { | |
| 789 //obsolete- {0xd4db, 0}, {0x3d, 0}, {0x1111, 0x116e, 0x1175, 0x11af, 0x11
c2, 0} | |
| 790 //obsolete- }; | |
| 791 //obsolete- | |
| 792 //obsolete- c->setDecomposition(Normalizer::DECOMP_COMPAT); | |
| 793 //obsolete- compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
| 794 | |
| 795 delete c; | |
| 796 } | |
| 797 | |
| 798 | |
| 799 // @bug 4124632 | |
| 800 // | |
| 801 // Collator::getCollationKey was hanging on certain character sequences | |
| 802 // | |
| 803 void CollationRegressionTest::Test4124632(/* char* par */) | |
| 804 { | |
| 805 UErrorCode status = U_ZERO_ERROR; | |
| 806 Collator *coll = NULL; | |
| 807 | |
| 808 coll = Collator::createInstance(Locale::getJapan(), status); | |
| 809 | |
| 810 if (coll == NULL || U_FAILURE(status)) | |
| 811 { | |
| 812 errln("Failed to create collator for Locale::JAPAN"); | |
| 813 delete coll; | |
| 814 return; | |
| 815 } | |
| 816 | |
| 817 static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0}; | |
| 818 CollationKey key; | |
| 819 | |
| 820 coll->getCollationKey(test, key, status); | |
| 821 | |
| 822 if (key.isBogus() || U_FAILURE(status)) | |
| 823 { | |
| 824 errln("CollationKey creation failed."); | |
| 825 } | |
| 826 | |
| 827 delete coll; | |
| 828 } | |
| 829 | |
| 830 // @bug 4132736 | |
| 831 // | |
| 832 // sort order of french words with multiple accents has errors | |
| 833 // | |
| 834 void CollationRegressionTest::Test4132736(/* char* par */) | |
| 835 { | |
| 836 UErrorCode status = U_ZERO_ERROR; | |
| 837 | |
| 838 Collator *c = NULL; | |
| 839 | |
| 840 c = Collator::createInstance(Locale::getCanadaFrench(), status); | |
| 841 c->setStrength(Collator::TERTIARY); | |
| 842 | |
| 843 if (c == NULL || U_FAILURE(status)) | |
| 844 { | |
| 845 errln("Failed to create a collator for Locale::getCanadaFrench()"); | |
| 846 delete c; | |
| 847 return; | |
| 848 } | |
| 849 | |
| 850 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 851 { | |
| 852 {0x65, 0x0300, 0x65, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x65, 0x0300,
0}, | |
| 853 {0x65, 0x0300, 0x0301, 0}, {0x3c, 0}, {0x65, 0x0301, 0x0300, 0} | |
| 854 }; | |
| 855 | |
| 856 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
| 857 | |
| 858 delete c; | |
| 859 } | |
| 860 | |
| 861 // @bug 4133509 | |
| 862 // | |
| 863 // The sorting using java.text.CollationKey is not in the exact order | |
| 864 // | |
| 865 void CollationRegressionTest::Test4133509(/* char* par */) | |
| 866 { | |
| 867 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 868 { | |
| 869 {0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0}, {0x3c, 0}, {0
x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x49, 0x6e, 0x49, 0x6e, 0x6
9, 0x74, 0x69, 0x61, 0x6c, 0x69, 0x7a, 0x65, 0x72, 0x45, 0x72, 0x72, 0x6f, 0x72,
0}, | |
| 870 {0x47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0}, {0x3c, 0}, {0x
47, 0x72, 0x61, 0x70, 0x68, 0x69, 0x63, 0x73, 0x45, 0x6e, 0x76, 0x69, 0x72, 0x6f
, 0x6e, 0x6d, 0x65, 0x6e, 0x74, 0}, | |
| 871 {0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0}, {0x3c, 0}, {0x
53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0} | |
| 872 }; | |
| 873 | |
| 874 compareArray(*en_us, test1, ARRAY_LENGTH(test1)); | |
| 875 } | |
| 876 | |
| 877 // @bug 4114077 | |
| 878 // | |
| 879 // Collation with decomposition off doesn't work for Europe | |
| 880 // | |
| 881 void CollationRegressionTest::Test4114077(/* char* par */) | |
| 882 { | |
| 883 // Ensure that we get the same results with decomposition off | |
| 884 // as we do with it on.... | |
| 885 | |
| 886 UErrorCode status = U_ZERO_ERROR; | |
| 887 RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone(); | |
| 888 c->setStrength(Collator::TERTIARY); | |
| 889 | |
| 890 static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 891 { | |
| 892 {0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0},
// Should be equivalent | |
| 893 {0x70, 0x00ea, 0x63, 0x68, 0x65, 0}, {0x3e, 0}, {0x70, 0x00e9, 0x63, 0x6
8, 0x00e9, 0}, | |
| 894 {0x0204, 0}, {0x3d, 0}, {0x45, 0x030F, 0}, | |
| 895 {0x01fa, 0}, {0x3d, 0}, {0x41, 0x030a, 0x0301, 0},
// a-ring-acute -> a-ring, acute | |
| 896 // -> a, ring, acute | |
| 897 {0x41, 0x0300, 0x0316, 0}, {0x3c, 0}, {0x41, 0x0316, 0x0300, 0}
// No reordering --> unequal | |
| 898 }; | |
| 899 | |
| 900 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_OFF, status); | |
| 901 compareArray(*c, test1, ARRAY_LENGTH(test1)); | |
| 902 | |
| 903 static const UChar test2[][CollationRegressionTest::MAX_TOKEN_LEN] = | |
| 904 { | |
| 905 {0x41, 0x0300, 0x0316, 0}, {0x3d, 0}, {0x41, 0x0316, 0x0300, 0} //
Reordering --> equal | |
| 906 }; | |
| 907 | |
| 908 c->setAttribute(UCOL_NORMALIZATION_MODE, UCOL_ON, status); | |
| 909 compareArray(*c, test2, ARRAY_LENGTH(test2)); | |
| 910 | |
| 911 delete c; | |
| 912 } | |
| 913 | |
| 914 // @bug 4141640 | |
| 915 // | |
| 916 // Support for Swedish gone in 1.1.6 (Can't create Swedish collator) | |
| 917 // | |
| 918 void CollationRegressionTest::Test4141640(/* char* par */) | |
| 919 { | |
| 920 // | |
| 921 // Rather than just creating a Swedish collator, we might as well | |
| 922 // try to instantiate one for every locale available on the system | |
| 923 // in order to prevent this sort of bug from cropping up in the future | |
| 924 // | |
| 925 UErrorCode status = U_ZERO_ERROR; | |
| 926 int32_t i, localeCount; | |
| 927 const Locale *locales = Locale::getAvailableLocales(localeCount); | |
| 928 | |
| 929 for (i = 0; i < localeCount; i += 1) | |
| 930 { | |
| 931 Collator *c = NULL; | |
| 932 | |
| 933 status = U_ZERO_ERROR; | |
| 934 c = Collator::createInstance(locales[i], status); | |
| 935 | |
| 936 if (c == NULL || U_FAILURE(status)) | |
| 937 { | |
| 938 UnicodeString msg, localeName; | |
| 939 | |
| 940 msg += "Could not create collator for locale "; | |
| 941 msg += locales[i].getName(); | |
| 942 | |
| 943 errln(msg); | |
| 944 } | |
| 945 | |
| 946 delete c; | |
| 947 } | |
| 948 } | |
| 949 | |
| 950 // @bug 4139572 | |
| 951 // | |
| 952 // getCollationKey throws exception for spanish text | |
| 953 // Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6 | |
| 954 // | |
| 955 void CollationRegressionTest::Test4139572(/* char* par */) | |
| 956 { | |
| 957 // | |
| 958 // Code pasted straight from the bug report | |
| 959 // (and then translated to C++ ;-) | |
| 960 // | |
| 961 // create spanish locale and collator | |
| 962 UErrorCode status = U_ZERO_ERROR; | |
| 963 Locale l("es", "es"); | |
| 964 Collator *col = NULL; | |
| 965 | |
| 966 col = Collator::createInstance(l, status); | |
| 967 | |
| 968 if (col == NULL || U_FAILURE(status)) | |
| 969 { | |
| 970 errln("Failed to create a collator for es_es locale."); | |
| 971 delete col; | |
| 972 return; | |
| 973 } | |
| 974 | |
| 975 CollationKey key; | |
| 976 | |
| 977 // this spanish phrase kills it! | |
| 978 col->getCollationKey("Nombre De Objeto", key, status); | |
| 979 | |
| 980 if (key.isBogus() || U_FAILURE(status)) | |
| 981 { | |
| 982 errln("Error creating CollationKey for \"Nombre De Ojbeto\""); | |
| 983 } | |
| 984 | |
| 985 delete col; | |
| 986 } | |
| 987 /* HSYS : RuleBasedCollator::compare() performance enhancements | |
| 988 compare() does not create CollationElementIterator() anymore.*/ | |
| 989 | |
| 990 class My4146160Collator : public RuleBasedCollator | |
| 991 { | |
| 992 public: | |
| 993 My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status); | |
| 994 ~My4146160Collator(); | |
| 995 | |
| 996 CollationElementIterator *createCollationElementIterator(const UnicodeString
&text) const; | |
| 997 | |
| 998 CollationElementIterator *createCollationElementIterator(const CharacterIter
ator &text) const; | |
| 999 | |
| 1000 static int32_t count; | |
| 1001 }; | |
| 1002 | |
| 1003 int32_t My4146160Collator::count = 0; | |
| 1004 | |
| 1005 My4146160Collator::My4146160Collator(RuleBasedCollator &rbc, UErrorCode &status) | |
| 1006 : RuleBasedCollator(rbc.getRules(), status) | |
| 1007 { | |
| 1008 } | |
| 1009 | |
| 1010 My4146160Collator::~My4146160Collator() | |
| 1011 { | |
| 1012 } | |
| 1013 | |
| 1014 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t UnicodeString &text) const | |
| 1015 { | |
| 1016 count += 1; | |
| 1017 return RuleBasedCollator::createCollationElementIterator(text); | |
| 1018 } | |
| 1019 | |
| 1020 CollationElementIterator *My4146160Collator::createCollationElementIterator(cons
t CharacterIterator &text) const | |
| 1021 { | |
| 1022 count += 1; | |
| 1023 return RuleBasedCollator::createCollationElementIterator(text); | |
| 1024 } | |
| 1025 | |
| 1026 // @bug 4146160 | |
| 1027 // | |
| 1028 // RuleBasedCollator doesn't use createCollationElementIterator internally | |
| 1029 // | |
| 1030 void CollationRegressionTest::Test4146160(/* char* par */) | |
| 1031 { | |
| 1032 #if 0 | |
| 1033 // | |
| 1034 // Use a custom collator class whose createCollationElementIterator | |
| 1035 // methods increment a count.... | |
| 1036 // | |
| 1037 UErrorCode status = U_ZERO_ERROR; | |
| 1038 CollationKey key; | |
| 1039 | |
| 1040 My4146160Collator::count = 0; | |
| 1041 My4146160Collator *mc = NULL; | |
| 1042 | |
| 1043 mc = new My4146160Collator(*en_us, status); | |
| 1044 | |
| 1045 if (mc == NULL || U_FAILURE(status)) | |
| 1046 { | |
| 1047 errln("Failed to create a My4146160Collator."); | |
| 1048 delete mc; | |
| 1049 return; | |
| 1050 } | |
| 1051 | |
| 1052 mc->getCollationKey("1", key, status); | |
| 1053 | |
| 1054 if (key.isBogus() || U_FAILURE(status)) | |
| 1055 { | |
| 1056 errln("Failure to get a CollationKey from a My4146160Collator."); | |
| 1057 delete mc; | |
| 1058 return; | |
| 1059 } | |
| 1060 | |
| 1061 if (My4146160Collator::count < 1) | |
| 1062 { | |
| 1063 errln("My4146160Collator::createCollationElementIterator not called for
getCollationKey"); | |
| 1064 } | |
| 1065 | |
| 1066 My4146160Collator::count = 0; | |
| 1067 mc->compare("1", "2"); | |
| 1068 | |
| 1069 if (My4146160Collator::count < 1) | |
| 1070 { | |
| 1071 errln("My4146160Collator::createtCollationElementIterator not called for
compare"); | |
| 1072 } | |
| 1073 | |
| 1074 delete mc; | |
| 1075 #endif | |
| 1076 } | |
| 1077 | |
| 1078 void CollationRegressionTest::Test4179216() { | |
| 1079 // you can position a CollationElementIterator in the middle of | |
| 1080 // a contracting character sequence, yielding a bogus collation | |
| 1081 // element | |
| 1082 IcuTestErrorCode errorCode(*this, "Test4179216"); | |
| 1083 RuleBasedCollator coll(en_us->getRules() + " & C < ch , cH , Ch , CH < cat <
crunchy", errorCode); | |
| 1084 UnicodeString testText = "church church catcatcher runcrunchynchy"; | |
| 1085 CollationElementIterator *iter = coll.createCollationElementIterator(testTex
t); | |
| 1086 | |
| 1087 // test that the "ch" combination works properly | |
| 1088 iter->setOffset(4, errorCode); | |
| 1089 int32_t elt4 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
| 1090 | |
| 1091 iter->reset(); | |
| 1092 int32_t elt0 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
| 1093 | |
| 1094 iter->setOffset(5, errorCode); | |
| 1095 int32_t elt5 = CollationElementIterator::primaryOrder(iter->next(errorCode))
; | |
| 1096 | |
| 1097 // Compares and prints only 16-bit primary weights. | |
| 1098 if (elt4 != elt0 || elt5 != elt0) { | |
| 1099 errln("The collation elements at positions 0 (0x%04x), " | |
| 1100 "4 (0x%04x), and 5 (0x%04x) don't match.", | |
| 1101 elt0, elt4, elt5); | |
| 1102 } | |
| 1103 | |
| 1104 // test that the "cat" combination works properly | |
| 1105 iter->setOffset(14, errorCode); | |
| 1106 int32_t elt14 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1107 | |
| 1108 iter->setOffset(15, errorCode); | |
| 1109 int32_t elt15 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1110 | |
| 1111 iter->setOffset(16, errorCode); | |
| 1112 int32_t elt16 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1113 | |
| 1114 iter->setOffset(17, errorCode); | |
| 1115 int32_t elt17 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1116 | |
| 1117 iter->setOffset(18, errorCode); | |
| 1118 int32_t elt18 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1119 | |
| 1120 iter->setOffset(19, errorCode); | |
| 1121 int32_t elt19 = CollationElementIterator::primaryOrder(iter->next(errorCode)
); | |
| 1122 | |
| 1123 // Compares and prints only 16-bit primary weights. | |
| 1124 if (elt14 != elt15 || elt14 != elt16 || elt14 != elt17 | |
| 1125 || elt14 != elt18 || elt14 != elt19) { | |
| 1126 errln("\"cat\" elements don't match: elt14 = 0x%04x, " | |
| 1127 "elt15 = 0x%04x, elt16 = 0x%04x, elt17 = 0x%04x, " | |
| 1128 "elt18 = 0x%04x, elt19 = 0x%04x", | |
| 1129 elt14, elt15, elt16, elt17, elt18, elt19); | |
| 1130 } | |
| 1131 | |
| 1132 // now generate a complete list of the collation elements, | |
| 1133 // first using next() and then using setOffset(), and | |
| 1134 // make sure both interfaces return the same set of elements | |
| 1135 iter->reset(); | |
| 1136 | |
| 1137 int32_t elt = iter->next(errorCode); | |
| 1138 int32_t count = 0; | |
| 1139 while (elt != CollationElementIterator::NULLORDER) { | |
| 1140 ++count; | |
| 1141 elt = iter->next(errorCode); | |
| 1142 } | |
| 1143 | |
| 1144 LocalArray<UnicodeString> nextElements(new UnicodeString[count]); | |
| 1145 LocalArray<UnicodeString> setOffsetElements(new UnicodeString[count]); | |
| 1146 int32_t lastPos = 0; | |
| 1147 | |
| 1148 iter->reset(); | |
| 1149 elt = iter->next(errorCode); | |
| 1150 count = 0; | |
| 1151 while (elt != CollationElementIterator::NULLORDER) { | |
| 1152 nextElements[count++] = testText.tempSubStringBetween(lastPos, iter->get
Offset()); | |
| 1153 lastPos = iter->getOffset(); | |
| 1154 elt = iter->next(errorCode); | |
| 1155 } | |
| 1156 int32_t nextElementsLength = count; | |
| 1157 count = 0; | |
| 1158 for (int32_t i = 0; i < testText.length(); ) { | |
| 1159 iter->setOffset(i, errorCode); | |
| 1160 lastPos = iter->getOffset(); | |
| 1161 elt = iter->next(errorCode); | |
| 1162 setOffsetElements[count++] = testText.tempSubStringBetween(lastPos, iter
->getOffset()); | |
| 1163 i = iter->getOffset(); | |
| 1164 } | |
| 1165 for (int32_t i = 0; i < nextElementsLength; i++) { | |
| 1166 if (nextElements[i] == setOffsetElements[i]) { | |
| 1167 logln(nextElements[i]); | |
| 1168 } else { | |
| 1169 errln(UnicodeString("Error: next() yielded ") + nextElements[i] + | |
| 1170 ", but setOffset() yielded " + setOffsetElements[i]); | |
| 1171 } | |
| 1172 } | |
| 1173 delete iter; | |
| 1174 } | |
| 1175 | |
| 1176 // Ticket 7189 | |
| 1177 // | |
| 1178 // nextSortKeyPart incorrect for EO_S1 collation | |
| 1179 static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t le
n, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { | |
| 1180 UCharIterator uiter; | |
| 1181 uint32_t state[2] = { 0, 0 }; | |
| 1182 int32_t keyLen; | |
| 1183 int32_t count = 8; | |
| 1184 | |
| 1185 uiter_setString(&uiter, text, len); | |
| 1186 keyLen = 0; | |
| 1187 while (TRUE) { | |
| 1188 int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[k
eyLen], count, &status); | |
| 1189 if (U_FAILURE(status)) { | |
| 1190 return -1; | |
| 1191 } | |
| 1192 if (keyPartLen == 0) { | |
| 1193 break; | |
| 1194 } | |
| 1195 keyLen += keyPartLen; | |
| 1196 } | |
| 1197 return keyLen; | |
| 1198 } | |
| 1199 | |
| 1200 void CollationRegressionTest::TestT7189() { | |
| 1201 UErrorCode status = U_ZERO_ERROR; | |
| 1202 UCollator *coll; | |
| 1203 uint32_t i; | |
| 1204 | |
| 1205 static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
| 1206 // "Achter De Hoven" | |
| 1207 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, | |
| 1208 // "ABC" | |
| 1209 { 0x41, 0x42, 0x43, 0x00 }, | |
| 1210 // "HELLO world!" | |
| 1211 { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } | |
| 1212 }; | |
| 1213 | |
| 1214 static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { | |
| 1215 // "Achter de Hoven" | |
| 1216 { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F
, 0x76, 0x65, 0x6E, 0x00 }, | |
| 1217 // "abc" | |
| 1218 { 0x61, 0x62, 0x63, 0x00 }, | |
| 1219 // "hello world!" | |
| 1220 { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21
, 0x00 } | |
| 1221 }; | |
| 1222 | |
| 1223 // Open the collator | |
| 1224 coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); | |
| 1225 if (U_FAILURE(status)) { | |
| 1226 errln("Failed to create a collator for short string EO_S1"); | |
| 1227 return; | |
| 1228 } | |
| 1229 | |
| 1230 for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * si
zeof(UChar)); i++) { | |
| 1231 uint8_t key1[100], key2[100]; | |
| 1232 int32_t len1, len2; | |
| 1233 | |
| 1234 len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status
); | |
| 1235 if (U_FAILURE(status)) { | |
| 1236 errln(UnicodeString("Failed to get a partial collation key for ") +
text1[i]); | |
| 1237 break; | |
| 1238 } | |
| 1239 len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status
); | |
| 1240 if (U_FAILURE(status)) { | |
| 1241 errln(UnicodeString("Failed to get a partial collation key for ") +
text2[i]); | |
| 1242 break; | |
| 1243 } | |
| 1244 | |
| 1245 if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { | |
| 1246 errln(UnicodeString("Failed: Identical key\n") + " text1: " + tex
t1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::he
x(key1, len1)); | |
| 1247 } else { | |
| 1248 logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i]
+ "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + t
ext2[i] + "\n" + " key2 : " | |
| 1249 + TestUtility::hex(key2, len2)); | |
| 1250 } | |
| 1251 } | |
| 1252 ucol_close(coll); | |
| 1253 } | |
| 1254 | |
| 1255 void CollationRegressionTest::TestCaseFirstCompression() { | |
| 1256 RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone(); | |
| 1257 UErrorCode status = U_ZERO_ERROR; | |
| 1258 | |
| 1259 // default | |
| 1260 caseFirstCompressionSub(col, "default"); | |
| 1261 | |
| 1262 // Upper first | |
| 1263 col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status); | |
| 1264 if (U_FAILURE(status)) { | |
| 1265 errln("Failed to set UCOL_UPPER_FIRST"); | |
| 1266 return; | |
| 1267 } | |
| 1268 caseFirstCompressionSub(col, "upper first"); | |
| 1269 | |
| 1270 // Lower first | |
| 1271 col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status); | |
| 1272 if (U_FAILURE(status)) { | |
| 1273 errln("Failed to set UCOL_LOWER_FIRST"); | |
| 1274 return; | |
| 1275 } | |
| 1276 caseFirstCompressionSub(col, "lower first"); | |
| 1277 | |
| 1278 delete col; | |
| 1279 } | |
| 1280 | |
| 1281 void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeStri
ng opt) { | |
| 1282 const int32_t maxLength = 50; | |
| 1283 | |
| 1284 UChar str1[maxLength]; | |
| 1285 UChar str2[maxLength]; | |
| 1286 | |
| 1287 CollationKey key1, key2; | |
| 1288 | |
| 1289 for (int32_t len = 1; len <= maxLength; len++) { | |
| 1290 int32_t i = 0; | |
| 1291 for (; i < len - 1; i++) { | |
| 1292 str1[i] = str2[i] = (UChar)0x61; // 'a' | |
| 1293 } | |
| 1294 str1[i] = (UChar)0x41; // 'A' | |
| 1295 str2[i] = (UChar)0x61; // 'a' | |
| 1296 | |
| 1297 UErrorCode status = U_ZERO_ERROR; | |
| 1298 col->getCollationKey(str1, len, key1, status); | |
| 1299 col->getCollationKey(str2, len, key2, status); | |
| 1300 | |
| 1301 UCollationResult cmpKey = key1.compareTo(key2, status); | |
| 1302 UCollationResult cmpCol = col->compare(str1, len, str2, len, status); | |
| 1303 | |
| 1304 if (U_FAILURE(status)) { | |
| 1305 errln("Error in caseFirstCompressionSub"); | |
| 1306 } else if (cmpKey != cmpCol) { | |
| 1307 errln((UnicodeString)"Inconsistent comparison(" + opt | |
| 1308 + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeStr
ing(str2, len) | |
| 1309 + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol); | |
| 1310 } | |
| 1311 } | |
| 1312 } | |
| 1313 | |
| 1314 void CollationRegressionTest::TestTrailingComment() { | |
| 1315 // ICU ticket #8070: | |
| 1316 // Check that the rule parser handles a comment without terminating end-of-l
ine. | |
| 1317 IcuTestErrorCode errorCode(*this, "TestTrailingComment"); | |
| 1318 RuleBasedCollator coll(UNICODE_STRING_SIMPLE("&c<b#comment1\n<a#comment2"),
errorCode); | |
| 1319 UnicodeString a((UChar)0x61), b((UChar)0x62), c((UChar)0x63); | |
| 1320 assertTrue("c<b", coll.compare(c, b) < 0); | |
| 1321 assertTrue("b<a", coll.compare(b, a) < 0); | |
| 1322 } | |
| 1323 | |
| 1324 void CollationRegressionTest::TestBeforeWithTooStrongAfter() { | |
| 1325 // ICU ticket #9959: | |
| 1326 // Forbid rules with a before-reset followed by a stronger relation. | |
| 1327 IcuTestErrorCode errorCode(*this, "TestBeforeWithTooStrongAfter"); | |
| 1328 RuleBasedCollator before2(UNICODE_STRING_SIMPLE("&[before 2]x<<q<p"), errorC
ode); | |
| 1329 if(errorCode.isSuccess()) { | |
| 1330 errln("should forbid before-2-reset followed by primary relation"); | |
| 1331 } else { | |
| 1332 errorCode.reset(); | |
| 1333 } | |
| 1334 RuleBasedCollator before3(UNICODE_STRING_SIMPLE("&[before 3]x<<<q<<s<p"), er
rorCode); | |
| 1335 if(errorCode.isSuccess()) { | |
| 1336 errln("should forbid before-3-reset followed by primary or secondary rel
ation"); | |
| 1337 } else { | |
| 1338 errorCode.reset(); | |
| 1339 } | |
| 1340 } | |
| 1341 | |
| 1342 void CollationRegressionTest::compareArray(Collator &c, | |
| 1343 const UChar tests[][CollationRegressi
onTest::MAX_TOKEN_LEN], | |
| 1344 int32_t testCount) | |
| 1345 { | |
| 1346 int32_t i; | |
| 1347 Collator::EComparisonResult expectedResult = Collator::EQUAL; | |
| 1348 | |
| 1349 for (i = 0; i < testCount; i += 3) | |
| 1350 { | |
| 1351 UnicodeString source(tests[i]); | |
| 1352 UnicodeString comparison(tests[i + 1]); | |
| 1353 UnicodeString target(tests[i + 2]); | |
| 1354 | |
| 1355 if (comparison == "<") | |
| 1356 { | |
| 1357 expectedResult = Collator::LESS; | |
| 1358 } | |
| 1359 else if (comparison == ">") | |
| 1360 { | |
| 1361 expectedResult = Collator::GREATER; | |
| 1362 } | |
| 1363 else if (comparison == "=") | |
| 1364 { | |
| 1365 expectedResult = Collator::EQUAL; | |
| 1366 } | |
| 1367 else | |
| 1368 { | |
| 1369 UnicodeString bogus1("Bogus comparison string \""); | |
| 1370 UnicodeString bogus2("\""); | |
| 1371 errln(bogus1 + comparison + bogus2); | |
| 1372 } | |
| 1373 | |
| 1374 Collator::EComparisonResult compareResult = c.compare(source, target); | |
| 1375 | |
| 1376 CollationKey sourceKey, targetKey; | |
| 1377 UErrorCode status = U_ZERO_ERROR; | |
| 1378 | |
| 1379 c.getCollationKey(source, sourceKey, status); | |
| 1380 | |
| 1381 if (U_FAILURE(status)) | |
| 1382 { | |
| 1383 errln("Couldn't get collationKey for source"); | |
| 1384 continue; | |
| 1385 } | |
| 1386 | |
| 1387 c.getCollationKey(target, targetKey, status); | |
| 1388 | |
| 1389 if (U_FAILURE(status)) | |
| 1390 { | |
| 1391 errln("Couldn't get collationKey for target"); | |
| 1392 continue; | |
| 1393 } | |
| 1394 | |
| 1395 Collator::EComparisonResult keyResult = sourceKey.compareTo(targetKey); | |
| 1396 | |
| 1397 reportCResult( source, target, sourceKey, targetKey, compareResult, keyR
esult, compareResult, expectedResult ); | |
| 1398 | |
| 1399 } | |
| 1400 } | |
| 1401 | |
| 1402 void CollationRegressionTest::assertEqual(CollationElementIterator &i1, Collatio
nElementIterator &i2) | |
| 1403 { | |
| 1404 int32_t c1, c2, count = 0; | |
| 1405 UErrorCode status = U_ZERO_ERROR; | |
| 1406 | |
| 1407 do | |
| 1408 { | |
| 1409 c1 = i1.next(status); | |
| 1410 c2 = i2.next(status); | |
| 1411 | |
| 1412 if (c1 != c2) | |
| 1413 { | |
| 1414 UnicodeString msg, msg1(" "); | |
| 1415 | |
| 1416 msg += msg1 + count; | |
| 1417 msg += ": strength(0x"; | |
| 1418 appendHex(c1, 8, msg); | |
| 1419 msg += ") != strength(0x"; | |
| 1420 appendHex(c2, 8, msg); | |
| 1421 msg += ")"; | |
| 1422 | |
| 1423 errln(msg); | |
| 1424 break; | |
| 1425 } | |
| 1426 | |
| 1427 count += 1; | |
| 1428 } | |
| 1429 while (c1 != CollationElementIterator::NULLORDER); | |
| 1430 } | |
| 1431 | |
| 1432 void CollationRegressionTest::runIndexedTest(int32_t index, UBool exec, const ch
ar* &name, char* /* par */) | |
| 1433 { | |
| 1434 if (exec) | |
| 1435 { | |
| 1436 logln("Collation Regression Tests: "); | |
| 1437 } | |
| 1438 | |
| 1439 if(en_us == NULL) { | |
| 1440 dataerrln("Class collator not instantiated"); | |
| 1441 name = ""; | |
| 1442 return; | |
| 1443 } | |
| 1444 TESTCASE_AUTO_BEGIN; | |
| 1445 TESTCASE_AUTO(Test4048446); | |
| 1446 TESTCASE_AUTO(Test4051866); | |
| 1447 TESTCASE_AUTO(Test4053636); | |
| 1448 TESTCASE_AUTO(Test4054238); | |
| 1449 TESTCASE_AUTO(Test4054734); | |
| 1450 TESTCASE_AUTO(Test4054736); | |
| 1451 TESTCASE_AUTO(Test4058613); | |
| 1452 TESTCASE_AUTO(Test4059820); | |
| 1453 TESTCASE_AUTO(Test4060154); | |
| 1454 TESTCASE_AUTO(Test4062418); | |
| 1455 TESTCASE_AUTO(Test4065540); | |
| 1456 TESTCASE_AUTO(Test4066189); | |
| 1457 TESTCASE_AUTO(Test4066696); | |
| 1458 TESTCASE_AUTO(Test4076676); | |
| 1459 TESTCASE_AUTO(Test4078588); | |
| 1460 TESTCASE_AUTO(Test4079231); | |
| 1461 TESTCASE_AUTO(Test4081866); | |
| 1462 TESTCASE_AUTO(Test4087241); | |
| 1463 TESTCASE_AUTO(Test4087243); | |
| 1464 TESTCASE_AUTO(Test4092260); | |
| 1465 TESTCASE_AUTO(Test4095316); | |
| 1466 TESTCASE_AUTO(Test4101940); | |
| 1467 TESTCASE_AUTO(Test4103436); | |
| 1468 TESTCASE_AUTO(Test4114076); | |
| 1469 TESTCASE_AUTO(Test4114077); | |
| 1470 TESTCASE_AUTO(Test4124632); | |
| 1471 TESTCASE_AUTO(Test4132736); | |
| 1472 TESTCASE_AUTO(Test4133509); | |
| 1473 TESTCASE_AUTO(Test4139572); | |
| 1474 TESTCASE_AUTO(Test4141640); | |
| 1475 TESTCASE_AUTO(Test4146160); | |
| 1476 TESTCASE_AUTO(Test4179216); | |
| 1477 TESTCASE_AUTO(TestT7189); | |
| 1478 TESTCASE_AUTO(TestCaseFirstCompression); | |
| 1479 TESTCASE_AUTO(TestTrailingComment); | |
| 1480 TESTCASE_AUTO(TestBeforeWithTooStrongAfter); | |
| 1481 TESTCASE_AUTO_END; | |
| 1482 } | |
| 1483 | |
| 1484 #endif /* #if !UCONFIG_NO_COLLATION */ | |
| OLD | NEW |