OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ****************************************************************************** |
| 3 * Copyright (C) 1996-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ****************************************************************************** |
| 6 */ |
| 7 |
| 8 /** |
| 9 * File tblcoll.cpp |
| 10 * |
| 11 * Created by: Helena Shih |
| 12 * |
| 13 * Modification History: |
| 14 * |
| 15 * Date Name Description |
| 16 * 2/5/97 aliu Added streamIn and streamOut methods. Added |
| 17 * constructor which reads RuleBasedCollator object fro
m |
| 18 * a binary file. Added writeToFile method which strea
ms |
| 19 * RuleBasedCollator out to a binary file. The streamI
n |
| 20 * and streamOut methods use istream and ostream object
s |
| 21 * in binary mode. |
| 22 * 2/11/97 aliu Moved declarations out of for loop initializer. |
| 23 * Added Mac compatibility #ifdef for ios::nocreate. |
| 24 * 2/12/97 aliu Modified to use TableCollationData sub-object to |
| 25 * hold invariant data. |
| 26 * 2/13/97 aliu Moved several methods into this class from Collation
. |
| 27 * Added a private RuleBasedCollator(Locale&) construct
or, |
| 28 * to be used by Collator::getInstance(). General |
| 29 * clean up. Made use of UErrorCode variables consiste
nt. |
| 30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and
copy |
| 31 * constructor and getDynamicClassID. |
| 32 * 3/5/97 aliu Changed compaction cycle to improve performance. We |
| 33 * use the maximum allowable value which is kBlockCount
. |
| 34 * Modified getRules() to load rules dynamically. Chan
ged |
| 35 * constructFromFile() call to accomodate this (added |
| 36 * parameter to specify whether binary loading is to |
| 37 * take place). |
| 38 * 05/06/97 helena Added memory allocation error check. |
| 39 * 6/20/97 helena Java class name change. |
| 40 * 6/23/97 helena Adding comments to make code more readable. |
| 41 * 09/03/97 helena Added createCollationKeyValues(). |
| 42 * 06/26/98 erm Changes for CollationKeys using byte arrays. |
| 43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java |
| 44 * 04/23/99 stephen Removed EDecompositionMode, merged with |
| 45 * Normalizer::EMode |
| 46 * 06/14/99 stephen Removed kResourceBundleSuffix |
| 47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx |
| 48 * files are no longer used. |
| 49 * 11/02/99 helena Collator performance enhancements. Special case |
| 50 * for NO_OP situations. |
| 51 * 11/17/99 srl More performance enhancements. Inlined some internal
functions. |
| 52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIt
erator |
| 53 * to implementation file. |
| 54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) |
| 55 */ |
| 56 |
| 57 #include <typeinfo> // for 'typeid' to work |
| 58 |
| 59 #include "unicode/utypes.h" |
| 60 |
| 61 #if !UCONFIG_NO_COLLATION |
| 62 |
| 63 #include "unicode/tblcoll.h" |
| 64 #include "unicode/coleitr.h" |
| 65 #include "unicode/ures.h" |
| 66 #include "unicode/uset.h" |
| 67 #include "ucol_imp.h" |
| 68 #include "uresimp.h" |
| 69 #include "uhash.h" |
| 70 #include "cmemory.h" |
| 71 #include "cstring.h" |
| 72 #include "putilimp.h" |
| 73 |
| 74 /* public RuleBasedCollator constructor ---------------------------------- */ |
| 75 |
| 76 U_NAMESPACE_BEGIN |
| 77 |
| 78 /** |
| 79 * Copy constructor, aliasing, not write-through |
| 80 */ |
| 81 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) |
| 82 : Collator(that) |
| 83 , dataIsOwned(FALSE) |
| 84 , isWriteThroughAlias(FALSE) |
| 85 , ucollator(NULL) |
| 86 { |
| 87 RuleBasedCollator::operator=(that); |
| 88 } |
| 89 |
| 90 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
| 91 UErrorCode& status) : |
| 92 dataIsOwned(FALSE) |
| 93 { |
| 94 construct(rules, |
| 95 UCOL_DEFAULT_STRENGTH, |
| 96 UCOL_DEFAULT, |
| 97 status); |
| 98 } |
| 99 |
| 100 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
| 101 ECollationStrength collationStrength, |
| 102 UErrorCode& status) : dataIsOwned(FALSE) |
| 103 { |
| 104 construct(rules, |
| 105 getUCollationStrength(collationStrength), |
| 106 UCOL_DEFAULT, |
| 107 status); |
| 108 } |
| 109 |
| 110 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
| 111 UColAttributeValue decompositionMode, |
| 112 UErrorCode& status) : |
| 113 dataIsOwned(FALSE) |
| 114 { |
| 115 construct(rules, |
| 116 UCOL_DEFAULT_STRENGTH, |
| 117 decompositionMode, |
| 118 status); |
| 119 } |
| 120 |
| 121 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, |
| 122 ECollationStrength collationStrength, |
| 123 UColAttributeValue decompositionMode, |
| 124 UErrorCode& status) : dataIsOwned(FALSE) |
| 125 { |
| 126 construct(rules, |
| 127 getUCollationStrength(collationStrength), |
| 128 decompositionMode, |
| 129 status); |
| 130 } |
| 131 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
| 132 const RuleBasedCollator *base, |
| 133 UErrorCode &status) : |
| 134 dataIsOwned(TRUE), |
| 135 isWriteThroughAlias(FALSE) |
| 136 { |
| 137 ucollator = ucol_openBinary(bin, length, base->ucollator, &status); |
| 138 } |
| 139 |
| 140 void |
| 141 RuleBasedCollator::setRuleStringFromCollator() |
| 142 { |
| 143 int32_t length; |
| 144 const UChar *r = ucol_getRules(ucollator, &length); |
| 145 |
| 146 if (r && length > 0) { |
| 147 // alias the rules string |
| 148 urulestring.setTo(TRUE, r, length); |
| 149 } |
| 150 else { |
| 151 urulestring.truncate(0); // Clear string. |
| 152 } |
| 153 } |
| 154 |
| 155 // not aliasing, not write-through |
| 156 void |
| 157 RuleBasedCollator::construct(const UnicodeString& rules, |
| 158 UColAttributeValue collationStrength, |
| 159 UColAttributeValue decompositionMode, |
| 160 UErrorCode& status) |
| 161 { |
| 162 ucollator = ucol_openRules(rules.getBuffer(), rules.length(), |
| 163 decompositionMode, collationStrength, |
| 164 NULL, &status); |
| 165 |
| 166 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it |
| 167 isWriteThroughAlias = FALSE; |
| 168 |
| 169 if(ucollator == NULL) { |
| 170 if(U_SUCCESS(status)) { |
| 171 status = U_MEMORY_ALLOCATION_ERROR; |
| 172 } |
| 173 return; // Failure |
| 174 } |
| 175 |
| 176 setRuleStringFromCollator(); |
| 177 } |
| 178 |
| 179 /* RuleBasedCollator public destructor ----------------------------------- */ |
| 180 |
| 181 RuleBasedCollator::~RuleBasedCollator() |
| 182 { |
| 183 if (dataIsOwned) |
| 184 { |
| 185 ucol_close(ucollator); |
| 186 } |
| 187 ucollator = 0; |
| 188 } |
| 189 |
| 190 /* RuleBaseCollator public methods --------------------------------------- */ |
| 191 |
| 192 UBool RuleBasedCollator::operator==(const Collator& that) const |
| 193 { |
| 194 /* only checks for address equals here */ |
| 195 if (Collator::operator==(that)) |
| 196 return TRUE; |
| 197 |
| 198 if (typeid(*this) != typeid(that)) |
| 199 return FALSE; /* not the same class */ |
| 200 |
| 201 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; |
| 202 |
| 203 // weiv: use C function, commented code below is wrong |
| 204 return ucol_equals(this->ucollator, thatAlias.ucollator); |
| 205 /* |
| 206 synwee : orginal code does not check for data compatibility |
| 207 */ |
| 208 /* |
| 209 if (ucollator != thatAlias.ucollator) |
| 210 return FALSE; |
| 211 |
| 212 return TRUE; |
| 213 */ |
| 214 } |
| 215 |
| 216 UBool RuleBasedCollator::operator!=(const Collator& other) const |
| 217 { |
| 218 return !(*this == other); |
| 219 } |
| 220 |
| 221 // aliasing, not write-through |
| 222 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) |
| 223 { |
| 224 if (this != &that) |
| 225 { |
| 226 if (dataIsOwned) |
| 227 { |
| 228 ucol_close(ucollator); |
| 229 } |
| 230 |
| 231 urulestring.truncate(0); // empty the rule string |
| 232 dataIsOwned = TRUE; |
| 233 isWriteThroughAlias = FALSE; |
| 234 |
| 235 UErrorCode intStatus = U_ZERO_ERROR; |
| 236 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; |
| 237 ucollator = ucol_safeClone(that.ucollator, NULL, &buffersize, |
| 238 &intStatus); |
| 239 if (U_SUCCESS(intStatus)) { |
| 240 setRuleStringFromCollator(); |
| 241 } |
| 242 } |
| 243 return *this; |
| 244 } |
| 245 |
| 246 // aliasing, not write-through |
| 247 Collator* RuleBasedCollator::clone() const |
| 248 { |
| 249 return new RuleBasedCollator(*this); |
| 250 } |
| 251 |
| 252 CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
| 253 (const UnicodeString& source) const |
| 254 { |
| 255 UErrorCode status = U_ZERO_ERROR; |
| 256 CollationElementIterator *result = new CollationElementIterator(source, this
, |
| 257 status); |
| 258 if (U_FAILURE(status)) { |
| 259 delete result; |
| 260 return NULL; |
| 261 } |
| 262 |
| 263 return result; |
| 264 } |
| 265 |
| 266 /** |
| 267 * Create a CollationElementIterator object that will iterate over the |
| 268 * elements in a string, using the collation rules defined in this |
| 269 * RuleBasedCollator |
| 270 */ |
| 271 CollationElementIterator* RuleBasedCollator::createCollationElementIterator |
| 272 (const CharacterIterator& source) const |
| 273 { |
| 274 UErrorCode status = U_ZERO_ERROR; |
| 275 CollationElementIterator *result = new CollationElementIterator(source, this
, |
| 276 status); |
| 277 |
| 278 if (U_FAILURE(status)) { |
| 279 delete result; |
| 280 return NULL; |
| 281 } |
| 282 |
| 283 return result; |
| 284 } |
| 285 |
| 286 /** |
| 287 * Return a string representation of this collator's rules. The string can |
| 288 * later be passed to the constructor that takes a UnicodeString argument, |
| 289 * which will construct a collator that's functionally identical to this one. |
| 290 * You can also allow users to edit the string in order to change the collation |
| 291 * data, or you can print it out for inspection, or whatever. |
| 292 */ |
| 293 const UnicodeString& RuleBasedCollator::getRules() const |
| 294 { |
| 295 return urulestring; |
| 296 } |
| 297 |
| 298 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) |
| 299 { |
| 300 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); |
| 301 |
| 302 if (rulesize > 0) { |
| 303 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); |
| 304 if(rules != NULL) { |
| 305 ucol_getRulesEx(ucollator, delta, rules, rulesize); |
| 306 buffer.setTo(rules, rulesize); |
| 307 uprv_free(rules); |
| 308 } else { // couldn't allocate |
| 309 buffer.remove(); |
| 310 } |
| 311 } |
| 312 else { |
| 313 buffer.remove(); |
| 314 } |
| 315 } |
| 316 |
| 317 UnicodeSet * |
| 318 RuleBasedCollator::getTailoredSet(UErrorCode &status) const |
| 319 { |
| 320 if(U_FAILURE(status)) { |
| 321 return NULL; |
| 322 } |
| 323 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); |
| 324 } |
| 325 |
| 326 |
| 327 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const |
| 328 { |
| 329 if (versionInfo!=NULL){ |
| 330 ucol_getVersion(ucollator, versionInfo); |
| 331 } |
| 332 } |
| 333 |
| 334 Collator::EComparisonResult RuleBasedCollator::compare( |
| 335 const UnicodeString& source, |
| 336 const UnicodeString& target, |
| 337 int32_t length) const |
| 338 { |
| 339 UErrorCode status = U_ZERO_ERROR; |
| 340 return getEComparisonResult(compare(source.getBuffer(), uprv_min(length,sour
ce.length()), target.getBuffer(), uprv_min(length,target.length()), status)); |
| 341 } |
| 342 |
| 343 UCollationResult RuleBasedCollator::compare( |
| 344 const UnicodeString& source, |
| 345 const UnicodeString& target, |
| 346 int32_t length, |
| 347 UErrorCode &status) const |
| 348 { |
| 349 return compare(source.getBuffer(), uprv_min(length,source.length()), target.
getBuffer(), uprv_min(length,target.length()), status); |
| 350 } |
| 351 |
| 352 Collator::EComparisonResult RuleBasedCollator::compare(const UChar* source, |
| 353 int32_t sourceLength, |
| 354 const UChar* target, |
| 355 int32_t targetLength) |
| 356 const |
| 357 { |
| 358 return getEComparisonResult(ucol_strcoll(ucollator, source, sourceLength, |
| 359 target, targetLength)); |
| 360 } |
| 361 |
| 362 UCollationResult RuleBasedCollator::compare(const UChar* source, |
| 363 int32_t sourceLength, |
| 364 const UChar* target, |
| 365 int32_t targetLength, |
| 366 UErrorCode &status) const |
| 367 { |
| 368 if(U_SUCCESS(status)) { |
| 369 return ucol_strcoll(ucollator, source, sourceLength, target, targetLeng
th); |
| 370 } else { |
| 371 return UCOL_EQUAL; |
| 372 } |
| 373 } |
| 374 |
| 375 /** |
| 376 * Compare two strings using this collator |
| 377 */ |
| 378 Collator::EComparisonResult RuleBasedCollator::compare( |
| 379 const UnicodeString& source, |
| 380 const UnicodeString& target) const |
| 381 { |
| 382 return getEComparisonResult(ucol_strcoll(ucollator, source.getBuffer(), sour
ce.length(), |
| 383 target.getBuffer(), targ
et.length())); |
| 384 } |
| 385 |
| 386 UCollationResult RuleBasedCollator::compare( |
| 387 const UnicodeString& source, |
| 388 const UnicodeString& target, |
| 389 UErrorCode &status) const |
| 390 { |
| 391 if(U_SUCCESS(status)) { |
| 392 return ucol_strcoll(ucollator, source.getBuffer(), source.length(), |
| 393 target.getBuffer(), target.length()); |
| 394 } else { |
| 395 return UCOL_EQUAL; |
| 396 } |
| 397 } |
| 398 |
| 399 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, |
| 400 UCharIterator &tIter, |
| 401 UErrorCode &status) const { |
| 402 if(U_SUCCESS(status)) { |
| 403 return ucol_strcollIter(ucollator, &sIter, &tIter, &status); |
| 404 } else { |
| 405 return UCOL_EQUAL; |
| 406 } |
| 407 } |
| 408 |
| 409 /** |
| 410 * Retrieve a collation key for the specified string. The key can be compared |
| 411 * with other collation keys using a bitwise comparison (e.g. memcmp) to find |
| 412 * the ordering of their respective source strings. This is handy when doing a |
| 413 * sort, where each sort key must be compared many times. |
| 414 * |
| 415 * The basic algorithm here is to find all of the collation elements for each |
| 416 * character in the source string, convert them to an ASCII representation, and |
| 417 * put them into the collation key. But it's trickier than that. Each |
| 418 * collation element in a string has three components: primary ('A' vs 'B'), |
| 419 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary differen
ce |
| 420 * at the end of a string takes precedence over a secondary or tertiary |
| 421 * difference earlier in the string. |
| 422 * |
| 423 * To account for this, we put all of the primary orders at the beginning of |
| 424 * the string, followed by the secondary and tertiary orders. Each set of |
| 425 * orders is terminated by nulls so that a key for a string which is a initial |
| 426 * substring of another key will compare less without any special case. |
| 427 * |
| 428 * Here's a hypothetical example, with the collation element represented as a |
| 429 * three-digit number, one digit for primary, one for secondary, etc. |
| 430 * |
| 431 * String: A a B \u00C9 |
| 432 * Collation Elements: 101 100 201 511 |
| 433 * Collation Key: 1125<null>0001<null>1011<null> |
| 434 * |
| 435 * To make things even trickier, secondary differences (accent marks) are |
| 436 * compared starting at the *end* of the string in languages with French |
| 437 * secondary ordering. But when comparing the accent marks on a single base |
| 438 * character, they are compared from the beginning. To handle this, we reverse |
| 439 * all of the accents that belong to each base character, then we reverse the |
| 440 * entire string of secondary orderings at the end. |
| 441 */ |
| 442 CollationKey& RuleBasedCollator::getCollationKey( |
| 443 const UnicodeString& source, |
| 444 CollationKey& sortkey, |
| 445 UErrorCode& status) const |
| 446 { |
| 447 return getCollationKey(source.getBuffer(), source.length(), sortkey, status)
; |
| 448 } |
| 449 |
| 450 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, |
| 451 int32_t sourceLen, |
| 452 CollationKey& sortkey, |
| 453 UErrorCode& status) const |
| 454 { |
| 455 if (U_FAILURE(status)) |
| 456 { |
| 457 return sortkey.setToBogus(); |
| 458 } |
| 459 |
| 460 if ((!source) || (sourceLen == 0)) { |
| 461 return sortkey.reset(); |
| 462 } |
| 463 |
| 464 uint8_t *result; |
| 465 int32_t resultLen = ucol_getSortKeyWithAllocation(ucollator, |
| 466 source, sourceLen, |
| 467 &result, |
| 468 &status); |
| 469 sortkey.adopt(result, resultLen); |
| 470 return sortkey; |
| 471 } |
| 472 |
| 473 /** |
| 474 * Return the maximum length of any expansion sequences that end with the |
| 475 * specified comparison order. |
| 476 * @param order a collation order returned by previous or next. |
| 477 * @return the maximum length of any expansion seuences ending with the |
| 478 * specified order or 1 if collation order does not occur at the end of
any |
| 479 * expansion sequence. |
| 480 * @see CollationElementIterator#getMaxExpansion |
| 481 */ |
| 482 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const |
| 483 { |
| 484 uint8_t result; |
| 485 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); |
| 486 return result; |
| 487 } |
| 488 |
| 489 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, |
| 490 UErrorCode &status) |
| 491 { |
| 492 return ucol_cloneRuleData(ucollator, &length, &status); |
| 493 } |
| 494 |
| 495 |
| 496 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UError
Code &status) |
| 497 { |
| 498 return ucol_cloneBinary(ucollator, buffer, capacity, &status); |
| 499 } |
| 500 |
| 501 void RuleBasedCollator::setAttribute(UColAttribute attr, |
| 502 UColAttributeValue value, |
| 503 UErrorCode &status) |
| 504 { |
| 505 if (U_FAILURE(status)) |
| 506 return; |
| 507 checkOwned(); |
| 508 ucol_setAttribute(ucollator, attr, value, &status); |
| 509 } |
| 510 |
| 511 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, |
| 512 UErrorCode &status) |
| 513 { |
| 514 if (U_FAILURE(status)) |
| 515 return UCOL_DEFAULT; |
| 516 return ucol_getAttribute(ucollator, attr, &status); |
| 517 } |
| 518 |
| 519 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UEr
rorCode &status) { |
| 520 checkOwned(); |
| 521 return ucol_setVariableTop(ucollator, varTop, len, &status); |
| 522 } |
| 523 |
| 524 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString varTop, UErrorCod
e &status) { |
| 525 checkOwned(); |
| 526 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &
status); |
| 527 } |
| 528 |
| 529 void RuleBasedCollator::setVariableTop(const uint32_t varTop, UErrorCode &status
) { |
| 530 checkOwned(); |
| 531 ucol_restoreVariableTop(ucollator, varTop, &status); |
| 532 } |
| 533 |
| 534 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { |
| 535 return ucol_getVariableTop(ucollator, &status); |
| 536 } |
| 537 |
| 538 Collator* RuleBasedCollator::safeClone(void) |
| 539 { |
| 540 UErrorCode intStatus = U_ZERO_ERROR; |
| 541 int32_t buffersize = U_COL_SAFECLONE_BUFFERSIZE; |
| 542 UCollator *ucol = ucol_safeClone(ucollator, NULL, &buffersize, |
| 543 &intStatus); |
| 544 if (U_FAILURE(intStatus)) { |
| 545 return NULL; |
| 546 } |
| 547 |
| 548 RuleBasedCollator *result = new RuleBasedCollator(); |
| 549 // Null pointer check |
| 550 if (result != NULL) { |
| 551 result->ucollator = ucol; |
| 552 result->dataIsOwned = TRUE; |
| 553 result->isWriteThroughAlias = FALSE; |
| 554 setRuleStringFromCollator(); |
| 555 } |
| 556 |
| 557 return result; |
| 558 } |
| 559 |
| 560 |
| 561 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, |
| 562 uint8_t *result, int32_t resultLength) |
| 563 const |
| 564 { |
| 565 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), resul
t, resultLength); |
| 566 } |
| 567 |
| 568 int32_t RuleBasedCollator::getSortKey(const UChar *source, |
| 569 int32_t sourceLength, uint8_t *result, |
| 570 int32_t resultLength) const |
| 571 { |
| 572 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength
); |
| 573 } |
| 574 |
| 575 Collator::ECollationStrength RuleBasedCollator::getStrength(void) const |
| 576 { |
| 577 UErrorCode intStatus = U_ZERO_ERROR; |
| 578 return getECollationStrength(ucol_getAttribute(ucollator, UCOL_STRENGTH, |
| 579 &intStatus)); |
| 580 } |
| 581 |
| 582 void RuleBasedCollator::setStrength(ECollationStrength newStrength) |
| 583 { |
| 584 checkOwned(); |
| 585 UErrorCode intStatus = U_ZERO_ERROR; |
| 586 UCollationStrength strength = getUCollationStrength(newStrength); |
| 587 ucol_setAttribute(ucollator, UCOL_STRENGTH, strength, &intStatus); |
| 588 } |
| 589 |
| 590 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, |
| 591 int32_t destCapacity, |
| 592 UErrorCode& status) const |
| 593 { |
| 594 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); |
| 595 } |
| 596 |
| 597 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, |
| 598 int32_t reorderCodesLength, |
| 599 UErrorCode& status) |
| 600 { |
| 601 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); |
| 602 } |
| 603 |
| 604 |
| 605 /** |
| 606 * Create a hash code for this collation. Just hash the main rule table -- that |
| 607 * should be good enough for almost any use. |
| 608 */ |
| 609 int32_t RuleBasedCollator::hashCode() const |
| 610 { |
| 611 int32_t length; |
| 612 const UChar *rules = ucol_getRules(ucollator, &length); |
| 613 return uhash_hashUCharsN(rules, length); |
| 614 } |
| 615 |
| 616 /** |
| 617 * return the locale of this collator |
| 618 */ |
| 619 const Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &s
tatus) const { |
| 620 const char *result = ucol_getLocaleByType(ucollator, type, &status); |
| 621 if(result == NULL) { |
| 622 Locale res(""); |
| 623 res.setToBogus(); |
| 624 return res; |
| 625 } else { |
| 626 return Locale(result); |
| 627 } |
| 628 } |
| 629 |
| 630 void |
| 631 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& valid
Locale, const Locale& actualLocale) { |
| 632 checkOwned(); |
| 633 char* rloc = uprv_strdup(requestedLocale.getName()); |
| 634 if (rloc) { |
| 635 char* vloc = uprv_strdup(validLocale.getName()); |
| 636 if (vloc) { |
| 637 char* aloc = uprv_strdup(actualLocale.getName()); |
| 638 if (aloc) { |
| 639 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); |
| 640 return; |
| 641 } |
| 642 uprv_free(vloc); |
| 643 } |
| 644 uprv_free(rloc); |
| 645 } |
| 646 } |
| 647 |
| 648 // RuleBaseCollatorNew private constructor ---------------------------------- |
| 649 |
| 650 RuleBasedCollator::RuleBasedCollator() |
| 651 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
| 652 { |
| 653 } |
| 654 |
| 655 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, |
| 656 UErrorCode& status) |
| 657 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) |
| 658 { |
| 659 if (U_FAILURE(status)) |
| 660 return; |
| 661 |
| 662 /* |
| 663 Try to load, in order: |
| 664 1. The desired locale's collation. |
| 665 2. A fallback of the desired locale. |
| 666 3. The default locale's collation. |
| 667 4. A fallback of the default locale. |
| 668 5. The default collation rules, which contains en_US collation rules. |
| 669 |
| 670 To reiterate, we try: |
| 671 Specific: |
| 672 language+country+variant |
| 673 language+country |
| 674 language |
| 675 Default: |
| 676 language+country+variant |
| 677 language+country |
| 678 language |
| 679 Root: (aka DEFAULTRULES) |
| 680 steps 1-5 are handled by resource bundle fallback mechanism. |
| 681 however, in a very unprobable situation that no resource bundle |
| 682 data exists, step 5 is repeated with hardcoded default rules. |
| 683 */ |
| 684 |
| 685 setUCollator(desiredLocale, status); |
| 686 |
| 687 if (U_FAILURE(status)) |
| 688 { |
| 689 status = U_ZERO_ERROR; |
| 690 |
| 691 setUCollator(kRootLocaleName, status); |
| 692 if (status == U_ZERO_ERROR) { |
| 693 status = U_USING_DEFAULT_WARNING; |
| 694 } |
| 695 } |
| 696 |
| 697 if (U_SUCCESS(status)) |
| 698 { |
| 699 setRuleStringFromCollator(); |
| 700 } |
| 701 } |
| 702 |
| 703 void |
| 704 RuleBasedCollator::setUCollator(const char *locale, |
| 705 UErrorCode &status) |
| 706 { |
| 707 if (U_FAILURE(status)) |
| 708 return; |
| 709 if (ucollator && dataIsOwned) |
| 710 ucol_close(ucollator); |
| 711 ucollator = ucol_open_internal(locale, &status); |
| 712 dataIsOwned = TRUE; |
| 713 isWriteThroughAlias = FALSE; |
| 714 } |
| 715 |
| 716 |
| 717 void |
| 718 RuleBasedCollator::checkOwned() { |
| 719 if (!(dataIsOwned || isWriteThroughAlias)) { |
| 720 UErrorCode status = U_ZERO_ERROR; |
| 721 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); |
| 722 setRuleStringFromCollator(); |
| 723 dataIsOwned = TRUE; |
| 724 isWriteThroughAlias = FALSE; |
| 725 } |
| 726 } |
| 727 |
| 728 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
| 729 |
| 730 U_NAMESPACE_END |
| 731 |
| 732 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |