| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ****************************************************************************** | |
| 3 * Copyright (C) 1996-2013, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ****************************************************************************** | |
| 6 */ | |
| 7 | |
| 8 /** | |
| 9 * File tblcoll.cpp | |
| 10 * | |
| 11 * Created by: Helena Shih | |
| 12 * | |
| 13 * Modification History: | |
| 14 * | |
| 15 * Date Name Description | |
| 16 * 2/5/97 aliu Added streamIn and streamOut methods. Added | |
| 17 * constructor which reads RuleBasedCollator object fro
m | |
| 18 * a binary file. Added writeToFile method which strea
ms | |
| 19 * RuleBasedCollator out to a binary file. The streamI
n | |
| 20 * and streamOut methods use istream and ostream object
s | |
| 21 * in binary mode. | |
| 22 * 2/11/97 aliu Moved declarations out of for loop initializer. | |
| 23 * Added Mac compatibility #ifdef for ios::nocreate. | |
| 24 * 2/12/97 aliu Modified to use TableCollationData sub-object to | |
| 25 * hold invariant data. | |
| 26 * 2/13/97 aliu Moved several methods into this class from Collation
. | |
| 27 * Added a private RuleBasedCollator(Locale&) construct
or, | |
| 28 * to be used by Collator::getInstance(). General | |
| 29 * clean up. Made use of UErrorCode variables consiste
nt. | |
| 30 * 2/20/97 helena Added clone, operator==, operator!=, operator=, and
copy | |
| 31 * constructor and getDynamicClassID. | |
| 32 * 3/5/97 aliu Changed compaction cycle to improve performance. We | |
| 33 * use the maximum allowable value which is kBlockCount
. | |
| 34 * Modified getRules() to load rules dynamically. Chan
ged | |
| 35 * constructFromFile() call to accomodate this (added | |
| 36 * parameter to specify whether binary loading is to | |
| 37 * take place). | |
| 38 * 05/06/97 helena Added memory allocation error check. | |
| 39 * 6/20/97 helena Java class name change. | |
| 40 * 6/23/97 helena Adding comments to make code more readable. | |
| 41 * 09/03/97 helena Added createCollationKeyValues(). | |
| 42 * 06/26/98 erm Changes for CollationKeys using byte arrays. | |
| 43 * 08/10/98 erm Synched with 1.2 version of RuleBasedCollator.java | |
| 44 * 04/23/99 stephen Removed EDecompositionMode, merged with | |
| 45 * Normalizer::EMode | |
| 46 * 06/14/99 stephen Removed kResourceBundleSuffix | |
| 47 * 06/22/99 stephen Fixed logic in constructFromFile() since .ctx | |
| 48 * files are no longer used. | |
| 49 * 11/02/99 helena Collator performance enhancements. Special case | |
| 50 * for NO_OP situations. | |
| 51 * 11/17/99 srl More performance enhancements. Inlined some internal
functions. | |
| 52 * 12/15/99 aliu Update to support Thai collation. Move NormalizerIt
erator | |
| 53 * to implementation file. | |
| 54 * 01/29/01 synwee Modified into a C++ wrapper calling C APIs (ucol.h) | |
| 55 */ | |
| 56 | |
| 57 #include "unicode/utypes.h" | |
| 58 | |
| 59 #if !UCONFIG_NO_COLLATION | |
| 60 | |
| 61 #include "unicode/tblcoll.h" | |
| 62 #include "unicode/coleitr.h" | |
| 63 #include "unicode/ures.h" | |
| 64 #include "unicode/uset.h" | |
| 65 #include "ucol_imp.h" | |
| 66 #include "uresimp.h" | |
| 67 #include "uhash.h" | |
| 68 #include "cmemory.h" | |
| 69 #include "cstring.h" | |
| 70 #include "putilimp.h" | |
| 71 #include "ustr_imp.h" | |
| 72 | |
| 73 /* public RuleBasedCollator constructor ---------------------------------- */ | |
| 74 | |
| 75 U_NAMESPACE_BEGIN | |
| 76 | |
| 77 /** | |
| 78 * Copy constructor, aliasing, not write-through | |
| 79 */ | |
| 80 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator& that) | |
| 81 : Collator(that) | |
| 82 , dataIsOwned(FALSE) | |
| 83 , isWriteThroughAlias(FALSE) | |
| 84 , ucollator(NULL) | |
| 85 { | |
| 86 RuleBasedCollator::operator=(that); | |
| 87 } | |
| 88 | |
| 89 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
| 90 UErrorCode& status) : | |
| 91 dataIsOwned(FALSE) | |
| 92 { | |
| 93 construct(rules, | |
| 94 UCOL_DEFAULT_STRENGTH, | |
| 95 UCOL_DEFAULT, | |
| 96 status); | |
| 97 } | |
| 98 | |
| 99 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
| 100 ECollationStrength collationStrength, | |
| 101 UErrorCode& status) : dataIsOwned(FALSE) | |
| 102 { | |
| 103 construct(rules, | |
| 104 (UColAttributeValue)collationStrength, | |
| 105 UCOL_DEFAULT, | |
| 106 status); | |
| 107 } | |
| 108 | |
| 109 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
| 110 UColAttributeValue decompositionMode, | |
| 111 UErrorCode& status) : | |
| 112 dataIsOwned(FALSE) | |
| 113 { | |
| 114 construct(rules, | |
| 115 UCOL_DEFAULT_STRENGTH, | |
| 116 decompositionMode, | |
| 117 status); | |
| 118 } | |
| 119 | |
| 120 RuleBasedCollator::RuleBasedCollator(const UnicodeString& rules, | |
| 121 ECollationStrength collationStrength, | |
| 122 UColAttributeValue decompositionMode, | |
| 123 UErrorCode& status) : dataIsOwned(FALSE) | |
| 124 { | |
| 125 construct(rules, | |
| 126 (UColAttributeValue)collationStrength, | |
| 127 decompositionMode, | |
| 128 status); | |
| 129 } | |
| 130 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, | |
| 131 const RuleBasedCollator *base, | |
| 132 UErrorCode &status) : | |
| 133 dataIsOwned(TRUE), | |
| 134 isWriteThroughAlias(FALSE) | |
| 135 { | |
| 136 ucollator = ucol_openBinary(bin, length, base->ucollator, &status); | |
| 137 } | |
| 138 | |
| 139 void | |
| 140 RuleBasedCollator::setRuleStringFromCollator() | |
| 141 { | |
| 142 int32_t length; | |
| 143 const UChar *r = ucol_getRules(ucollator, &length); | |
| 144 | |
| 145 if (r && length > 0) { | |
| 146 // alias the rules string | |
| 147 urulestring.setTo(TRUE, r, length); | |
| 148 } | |
| 149 else { | |
| 150 urulestring.truncate(0); // Clear string. | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 // not aliasing, not write-through | |
| 155 void | |
| 156 RuleBasedCollator::construct(const UnicodeString& rules, | |
| 157 UColAttributeValue collationStrength, | |
| 158 UColAttributeValue decompositionMode, | |
| 159 UErrorCode& status) | |
| 160 { | |
| 161 ucollator = ucol_openRules(rules.getBuffer(), rules.length(), | |
| 162 decompositionMode, collationStrength, | |
| 163 NULL, &status); | |
| 164 | |
| 165 dataIsOwned = TRUE; // since we own a collator now, we need to get rid of it | |
| 166 isWriteThroughAlias = FALSE; | |
| 167 | |
| 168 if(ucollator == NULL) { | |
| 169 if(U_SUCCESS(status)) { | |
| 170 status = U_MEMORY_ALLOCATION_ERROR; | |
| 171 } | |
| 172 return; // Failure | |
| 173 } | |
| 174 | |
| 175 setRuleStringFromCollator(); | |
| 176 } | |
| 177 | |
| 178 /* RuleBasedCollator public destructor ----------------------------------- */ | |
| 179 | |
| 180 RuleBasedCollator::~RuleBasedCollator() | |
| 181 { | |
| 182 if (dataIsOwned) | |
| 183 { | |
| 184 ucol_close(ucollator); | |
| 185 } | |
| 186 ucollator = 0; | |
| 187 } | |
| 188 | |
| 189 /* RuleBaseCollator public methods --------------------------------------- */ | |
| 190 | |
| 191 UBool RuleBasedCollator::operator==(const Collator& that) const | |
| 192 { | |
| 193 /* only checks for address equals here */ | |
| 194 if (this == &that) { | |
| 195 return TRUE; | |
| 196 } | |
| 197 if (!Collator::operator==(that)) { | |
| 198 return FALSE; /* not the same class */ | |
| 199 } | |
| 200 | |
| 201 RuleBasedCollator& thatAlias = (RuleBasedCollator&)that; | |
| 202 | |
| 203 return ucol_equals(this->ucollator, thatAlias.ucollator); | |
| 204 } | |
| 205 | |
| 206 // aliasing, not write-through | |
| 207 RuleBasedCollator& RuleBasedCollator::operator=(const RuleBasedCollator& that) | |
| 208 { | |
| 209 if (this == &that) { return *this; } | |
| 210 | |
| 211 UErrorCode intStatus = U_ZERO_ERROR; | |
| 212 UCollator *ucol = ucol_safeClone(that.ucollator, NULL, NULL, &intStatus); | |
| 213 if (U_FAILURE(intStatus)) { return *this; } | |
| 214 | |
| 215 if (dataIsOwned) { | |
| 216 ucol_close(ucollator); | |
| 217 } | |
| 218 ucollator = ucol; | |
| 219 dataIsOwned = TRUE; | |
| 220 isWriteThroughAlias = FALSE; | |
| 221 setRuleStringFromCollator(); | |
| 222 return *this; | |
| 223 } | |
| 224 | |
| 225 // aliasing, not write-through | |
| 226 Collator* RuleBasedCollator::clone() const | |
| 227 { | |
| 228 RuleBasedCollator* coll = new RuleBasedCollator(*this); | |
| 229 // There is a small chance that the internal ucol_safeClone() call fails. | |
| 230 if (coll != NULL && coll->ucollator == NULL) { | |
| 231 delete coll; | |
| 232 return NULL; | |
| 233 } | |
| 234 return coll; | |
| 235 } | |
| 236 | |
| 237 | |
| 238 CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
| 239 (const UnicodeString& source) const | |
| 240 { | |
| 241 UErrorCode status = U_ZERO_ERROR; | |
| 242 CollationElementIterator *result = new CollationElementIterator(source, this
, | |
| 243 status); | |
| 244 if (U_FAILURE(status)) { | |
| 245 delete result; | |
| 246 return NULL; | |
| 247 } | |
| 248 | |
| 249 return result; | |
| 250 } | |
| 251 | |
| 252 /** | |
| 253 * Create a CollationElementIterator object that will iterate over the | |
| 254 * elements in a string, using the collation rules defined in this | |
| 255 * RuleBasedCollator | |
| 256 */ | |
| 257 CollationElementIterator* RuleBasedCollator::createCollationElementIterator | |
| 258 (const CharacterIterator& source) const | |
| 259 { | |
| 260 UErrorCode status = U_ZERO_ERROR; | |
| 261 CollationElementIterator *result = new CollationElementIterator(source, this
, | |
| 262 status); | |
| 263 | |
| 264 if (U_FAILURE(status)) { | |
| 265 delete result; | |
| 266 return NULL; | |
| 267 } | |
| 268 | |
| 269 return result; | |
| 270 } | |
| 271 | |
| 272 /** | |
| 273 * Return a string representation of this collator's rules. The string can | |
| 274 * later be passed to the constructor that takes a UnicodeString argument, | |
| 275 * which will construct a collator that's functionally identical to this one. | |
| 276 * You can also allow users to edit the string in order to change the collation | |
| 277 * data, or you can print it out for inspection, or whatever. | |
| 278 */ | |
| 279 const UnicodeString& RuleBasedCollator::getRules() const | |
| 280 { | |
| 281 return urulestring; | |
| 282 } | |
| 283 | |
| 284 void RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) | |
| 285 { | |
| 286 int32_t rulesize = ucol_getRulesEx(ucollator, delta, NULL, -1); | |
| 287 | |
| 288 if (rulesize > 0) { | |
| 289 UChar *rules = (UChar*) uprv_malloc( sizeof(UChar) * (rulesize) ); | |
| 290 if(rules != NULL) { | |
| 291 ucol_getRulesEx(ucollator, delta, rules, rulesize); | |
| 292 buffer.setTo(rules, rulesize); | |
| 293 uprv_free(rules); | |
| 294 } else { // couldn't allocate | |
| 295 buffer.remove(); | |
| 296 } | |
| 297 } | |
| 298 else { | |
| 299 buffer.remove(); | |
| 300 } | |
| 301 } | |
| 302 | |
| 303 UnicodeSet * | |
| 304 RuleBasedCollator::getTailoredSet(UErrorCode &status) const | |
| 305 { | |
| 306 if(U_FAILURE(status)) { | |
| 307 return NULL; | |
| 308 } | |
| 309 return (UnicodeSet *)ucol_getTailoredSet(this->ucollator, &status); | |
| 310 } | |
| 311 | |
| 312 | |
| 313 void RuleBasedCollator::getVersion(UVersionInfo versionInfo) const | |
| 314 { | |
| 315 if (versionInfo!=NULL){ | |
| 316 ucol_getVersion(ucollator, versionInfo); | |
| 317 } | |
| 318 } | |
| 319 | |
| 320 /** | |
| 321 * Compare two strings using this collator | |
| 322 */ | |
| 323 UCollationResult RuleBasedCollator::compare( | |
| 324 const UnicodeString& source, | |
| 325 const UnicodeString& target, | |
| 326 int32_t length, | |
| 327 UErrorCode &status) const | |
| 328 { | |
| 329 return compare(source.getBuffer(), uprv_min(length,source.length()), target.
getBuffer(), uprv_min(length,target.length()), status); | |
| 330 } | |
| 331 | |
| 332 UCollationResult RuleBasedCollator::compare(const UChar* source, | |
| 333 int32_t sourceLength, | |
| 334 const UChar* target, | |
| 335 int32_t targetLength, | |
| 336 UErrorCode &status) const | |
| 337 { | |
| 338 if(U_SUCCESS(status)) { | |
| 339 return ucol_strcoll(ucollator, source, sourceLength, target, targetLeng
th); | |
| 340 } else { | |
| 341 return UCOL_EQUAL; | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 UCollationResult RuleBasedCollator::compare( | |
| 346 const UnicodeString& source, | |
| 347 const UnicodeString& target, | |
| 348 UErrorCode &status) const | |
| 349 { | |
| 350 if(U_SUCCESS(status)) { | |
| 351 return ucol_strcoll(ucollator, source.getBuffer(), source.length(), | |
| 352 target.getBuffer(), target.length()); | |
| 353 } else { | |
| 354 return UCOL_EQUAL; | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 UCollationResult RuleBasedCollator::compare(UCharIterator &sIter, | |
| 359 UCharIterator &tIter, | |
| 360 UErrorCode &status) const { | |
| 361 if(U_SUCCESS(status)) { | |
| 362 return ucol_strcollIter(ucollator, &sIter, &tIter, &status); | |
| 363 } else { | |
| 364 return UCOL_EQUAL; | |
| 365 } | |
| 366 } | |
| 367 | |
| 368 /** | |
| 369 * Retrieve a collation key for the specified string. The key can be compared | |
| 370 * with other collation keys using a bitwise comparison (e.g. memcmp) to find | |
| 371 * the ordering of their respective source strings. This is handy when doing a | |
| 372 * sort, where each sort key must be compared many times. | |
| 373 * | |
| 374 * The basic algorithm here is to find all of the collation elements for each | |
| 375 * character in the source string, convert them to an ASCII representation, and | |
| 376 * put them into the collation key. But it's trickier than that. Each | |
| 377 * collation element in a string has three components: primary ('A' vs 'B'), | |
| 378 * secondary ('u' vs '\u00FC'), and tertiary ('A' vs 'a'), and a primary differen
ce | |
| 379 * at the end of a string takes precedence over a secondary or tertiary | |
| 380 * difference earlier in the string. | |
| 381 * | |
| 382 * To account for this, we put all of the primary orders at the beginning of | |
| 383 * the string, followed by the secondary and tertiary orders. Each set of | |
| 384 * orders is terminated by nulls so that a key for a string which is a initial | |
| 385 * substring of another key will compare less without any special case. | |
| 386 * | |
| 387 * Here's a hypothetical example, with the collation element represented as a | |
| 388 * three-digit number, one digit for primary, one for secondary, etc. | |
| 389 * | |
| 390 * String: A a B \u00C9 | |
| 391 * Collation Elements: 101 100 201 511 | |
| 392 * Collation Key: 1125<null>0001<null>1011<null> | |
| 393 * | |
| 394 * To make things even trickier, secondary differences (accent marks) are | |
| 395 * compared starting at the *end* of the string in languages with French | |
| 396 * secondary ordering. But when comparing the accent marks on a single base | |
| 397 * character, they are compared from the beginning. To handle this, we reverse | |
| 398 * all of the accents that belong to each base character, then we reverse the | |
| 399 * entire string of secondary orderings at the end. | |
| 400 */ | |
| 401 CollationKey& RuleBasedCollator::getCollationKey( | |
| 402 const UnicodeString& source, | |
| 403 CollationKey& sortkey, | |
| 404 UErrorCode& status) const | |
| 405 { | |
| 406 return getCollationKey(source.getBuffer(), source.length(), sortkey, status)
; | |
| 407 } | |
| 408 | |
| 409 CollationKey& RuleBasedCollator::getCollationKey(const UChar* source, | |
| 410 int32_t sourceLen, | |
| 411 CollationKey& sortkey, | |
| 412 UErrorCode& status) const | |
| 413 { | |
| 414 if (U_FAILURE(status)) { | |
| 415 return sortkey.setToBogus(); | |
| 416 } | |
| 417 if (sourceLen < -1 || (source == NULL && sourceLen != 0)) { | |
| 418 status = U_ILLEGAL_ARGUMENT_ERROR; | |
| 419 return sortkey.setToBogus(); | |
| 420 } | |
| 421 | |
| 422 if (sourceLen < 0) { | |
| 423 sourceLen = u_strlen(source); | |
| 424 } | |
| 425 if (sourceLen == 0) { | |
| 426 return sortkey.reset(); | |
| 427 } | |
| 428 | |
| 429 int32_t resultLen = ucol_getCollationKey(ucollator, source, sourceLen, sortk
ey, status); | |
| 430 | |
| 431 if (U_SUCCESS(status)) { | |
| 432 sortkey.setLength(resultLen); | |
| 433 } else { | |
| 434 sortkey.setToBogus(); | |
| 435 } | |
| 436 return sortkey; | |
| 437 } | |
| 438 | |
| 439 /** | |
| 440 * Return the maximum length of any expansion sequences that end with the | |
| 441 * specified comparison order. | |
| 442 * @param order a collation order returned by previous or next. | |
| 443 * @return the maximum length of any expansion seuences ending with the | |
| 444 * specified order or 1 if collation order does not occur at the end of
any | |
| 445 * expansion sequence. | |
| 446 * @see CollationElementIterator#getMaxExpansion | |
| 447 */ | |
| 448 int32_t RuleBasedCollator::getMaxExpansion(int32_t order) const | |
| 449 { | |
| 450 uint8_t result; | |
| 451 UCOL_GETMAXEXPANSION(ucollator, (uint32_t)order, result); | |
| 452 return result; | |
| 453 } | |
| 454 | |
| 455 uint8_t* RuleBasedCollator::cloneRuleData(int32_t &length, | |
| 456 UErrorCode &status) | |
| 457 { | |
| 458 if (U_FAILURE(status)) { return NULL; } | |
| 459 LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); | |
| 460 if (buffer.isNull()) { | |
| 461 status = U_MEMORY_ALLOCATION_ERROR; | |
| 462 return NULL; | |
| 463 } | |
| 464 length = cloneBinary(buffer.getAlias(), 20000, status); | |
| 465 if (status == U_BUFFER_OVERFLOW_ERROR) { | |
| 466 if (buffer.allocateInsteadAndCopy(length, 0) == NULL) { | |
| 467 status = U_MEMORY_ALLOCATION_ERROR; | |
| 468 return NULL; | |
| 469 } | |
| 470 status = U_ZERO_ERROR; | |
| 471 length = cloneBinary(buffer.getAlias(), length, status); | |
| 472 } | |
| 473 if (U_FAILURE(status)) { return NULL; } | |
| 474 return buffer.orphan(); | |
| 475 } | |
| 476 | |
| 477 | |
| 478 int32_t RuleBasedCollator::cloneBinary(uint8_t *buffer, int32_t capacity, UError
Code &status) | |
| 479 { | |
| 480 return ucol_cloneBinary(ucollator, buffer, capacity, &status); | |
| 481 } | |
| 482 | |
| 483 void RuleBasedCollator::setAttribute(UColAttribute attr, | |
| 484 UColAttributeValue value, | |
| 485 UErrorCode &status) | |
| 486 { | |
| 487 if (U_FAILURE(status)) | |
| 488 return; | |
| 489 checkOwned(); | |
| 490 ucol_setAttribute(ucollator, attr, value, &status); | |
| 491 } | |
| 492 | |
| 493 UColAttributeValue RuleBasedCollator::getAttribute(UColAttribute attr, | |
| 494 UErrorCode &status) const | |
| 495 { | |
| 496 if (U_FAILURE(status)) | |
| 497 return UCOL_DEFAULT; | |
| 498 return ucol_getAttribute(ucollator, attr, &status); | |
| 499 } | |
| 500 | |
| 501 uint32_t RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UEr
rorCode &status) { | |
| 502 checkOwned(); | |
| 503 return ucol_setVariableTop(ucollator, varTop, len, &status); | |
| 504 } | |
| 505 | |
| 506 uint32_t RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCo
de &status) { | |
| 507 checkOwned(); | |
| 508 return ucol_setVariableTop(ucollator, varTop.getBuffer(), varTop.length(), &
status); | |
| 509 } | |
| 510 | |
| 511 void RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &status) { | |
| 512 checkOwned(); | |
| 513 ucol_restoreVariableTop(ucollator, varTop, &status); | |
| 514 } | |
| 515 | |
| 516 uint32_t RuleBasedCollator::getVariableTop(UErrorCode &status) const { | |
| 517 return ucol_getVariableTop(ucollator, &status); | |
| 518 } | |
| 519 | |
| 520 int32_t RuleBasedCollator::getSortKey(const UnicodeString& source, | |
| 521 uint8_t *result, int32_t resultLength) | |
| 522 const | |
| 523 { | |
| 524 return ucol_getSortKey(ucollator, source.getBuffer(), source.length(), resul
t, resultLength); | |
| 525 } | |
| 526 | |
| 527 int32_t RuleBasedCollator::getSortKey(const UChar *source, | |
| 528 int32_t sourceLength, uint8_t *result, | |
| 529 int32_t resultLength) const | |
| 530 { | |
| 531 return ucol_getSortKey(ucollator, source, sourceLength, result, resultLength
); | |
| 532 } | |
| 533 | |
| 534 int32_t RuleBasedCollator::getReorderCodes(int32_t *dest, | |
| 535 int32_t destCapacity, | |
| 536 UErrorCode& status) const | |
| 537 { | |
| 538 return ucol_getReorderCodes(ucollator, dest, destCapacity, &status); | |
| 539 } | |
| 540 | |
| 541 void RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, | |
| 542 int32_t reorderCodesLength, | |
| 543 UErrorCode& status) | |
| 544 { | |
| 545 checkOwned(); | |
| 546 ucol_setReorderCodes(ucollator, reorderCodes, reorderCodesLength, &status); | |
| 547 } | |
| 548 | |
| 549 int32_t RuleBasedCollator::getEquivalentReorderCodes(int32_t reorderCode, | |
| 550 int32_t* dest, | |
| 551 int32_t destCapacity, | |
| 552 UErrorCode& status) | |
| 553 { | |
| 554 return ucol_getEquivalentReorderCodes(reorderCode, dest, destCapacity, &stat
us); | |
| 555 } | |
| 556 | |
| 557 /** | |
| 558 * Create a hash code for this collation. Just hash the main rule table -- that | |
| 559 * should be good enough for almost any use. | |
| 560 */ | |
| 561 int32_t RuleBasedCollator::hashCode() const | |
| 562 { | |
| 563 int32_t length; | |
| 564 const UChar *rules = ucol_getRules(ucollator, &length); | |
| 565 return ustr_hashUCharsN(rules, length); | |
| 566 } | |
| 567 | |
| 568 /** | |
| 569 * return the locale of this collator | |
| 570 */ | |
| 571 Locale RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode &status)
const { | |
| 572 const char *result = ucol_getLocaleByType(ucollator, type, &status); | |
| 573 if(result == NULL) { | |
| 574 Locale res(""); | |
| 575 res.setToBogus(); | |
| 576 return res; | |
| 577 } else { | |
| 578 return Locale(result); | |
| 579 } | |
| 580 } | |
| 581 | |
| 582 void | |
| 583 RuleBasedCollator::setLocales(const Locale& requestedLocale, const Locale& valid
Locale, const Locale& actualLocale) { | |
| 584 checkOwned(); | |
| 585 char* rloc = uprv_strdup(requestedLocale.getName()); | |
| 586 if (rloc) { | |
| 587 char* vloc = uprv_strdup(validLocale.getName()); | |
| 588 if (vloc) { | |
| 589 char* aloc = uprv_strdup(actualLocale.getName()); | |
| 590 if (aloc) { | |
| 591 ucol_setReqValidLocales(ucollator, rloc, vloc, aloc); | |
| 592 return; | |
| 593 } | |
| 594 uprv_free(vloc); | |
| 595 } | |
| 596 uprv_free(rloc); | |
| 597 } | |
| 598 } | |
| 599 | |
| 600 // RuleBaseCollatorNew private constructor ---------------------------------- | |
| 601 | |
| 602 RuleBasedCollator::RuleBasedCollator() | |
| 603 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) | |
| 604 { | |
| 605 } | |
| 606 | |
| 607 RuleBasedCollator::RuleBasedCollator(const Locale& desiredLocale, | |
| 608 UErrorCode& status) | |
| 609 : dataIsOwned(FALSE), isWriteThroughAlias(FALSE), ucollator(NULL) | |
| 610 { | |
| 611 if (U_FAILURE(status)) | |
| 612 return; | |
| 613 | |
| 614 /* | |
| 615 Try to load, in order: | |
| 616 1. The desired locale's collation. | |
| 617 2. A fallback of the desired locale. | |
| 618 3. The default locale's collation. | |
| 619 4. A fallback of the default locale. | |
| 620 5. The default collation rules, which contains en_US collation rules. | |
| 621 | |
| 622 To reiterate, we try: | |
| 623 Specific: | |
| 624 language+country+variant | |
| 625 language+country | |
| 626 language | |
| 627 Default: | |
| 628 language+country+variant | |
| 629 language+country | |
| 630 language | |
| 631 Root: (aka DEFAULTRULES) | |
| 632 steps 1-5 are handled by resource bundle fallback mechanism. | |
| 633 however, in a very unprobable situation that no resource bundle | |
| 634 data exists, step 5 is repeated with hardcoded default rules. | |
| 635 */ | |
| 636 | |
| 637 setUCollator(desiredLocale, status); | |
| 638 | |
| 639 if (U_FAILURE(status)) | |
| 640 { | |
| 641 status = U_ZERO_ERROR; | |
| 642 | |
| 643 setUCollator(kRootLocaleName, status); | |
| 644 if (status == U_ZERO_ERROR) { | |
| 645 status = U_USING_DEFAULT_WARNING; | |
| 646 } | |
| 647 } | |
| 648 | |
| 649 if (U_SUCCESS(status)) | |
| 650 { | |
| 651 setRuleStringFromCollator(); | |
| 652 } | |
| 653 } | |
| 654 | |
| 655 void | |
| 656 RuleBasedCollator::setUCollator(const char *locale, | |
| 657 UErrorCode &status) | |
| 658 { | |
| 659 if (U_FAILURE(status)) { | |
| 660 return; | |
| 661 } | |
| 662 if (ucollator && dataIsOwned) | |
| 663 ucol_close(ucollator); | |
| 664 ucollator = ucol_open_internal(locale, &status); | |
| 665 dataIsOwned = TRUE; | |
| 666 isWriteThroughAlias = FALSE; | |
| 667 } | |
| 668 | |
| 669 | |
| 670 void | |
| 671 RuleBasedCollator::checkOwned() { | |
| 672 if (!(dataIsOwned || isWriteThroughAlias)) { | |
| 673 UErrorCode status = U_ZERO_ERROR; | |
| 674 ucollator = ucol_safeClone(ucollator, NULL, NULL, &status); | |
| 675 setRuleStringFromCollator(); | |
| 676 dataIsOwned = TRUE; | |
| 677 isWriteThroughAlias = FALSE; | |
| 678 } | |
| 679 } | |
| 680 | |
| 681 | |
| 682 int32_t RuleBasedCollator::internalGetShortDefinitionString(const char *locale, | |
| 683 char *buff
er, | |
| 684 int32_t ca
pacity, | |
| 685 UErrorCode
&status) const { | |
| 686 /* simply delegate */ | |
| 687 return ucol_getShortDefinitionString(ucollator, locale, buffer, capacity, &sta
tus); | |
| 688 } | |
| 689 | |
| 690 | |
| 691 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) | |
| 692 | |
| 693 U_NAMESPACE_END | |
| 694 | |
| 695 #endif /* #if !UCONFIG_NO_COLLATION */ | |
| OLD | NEW |