OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 1996-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * rulebasedcollator.cpp |
| 7 * |
| 8 * (replaced the former tblcoll.cpp) |
| 9 * |
| 10 * created on: 2012feb14 with new and old collation code |
| 11 * created by: Markus W. Scherer |
| 12 */ |
| 13 |
| 14 #include "unicode/utypes.h" |
| 15 |
| 16 #if !UCONFIG_NO_COLLATION |
| 17 |
| 18 #include "unicode/coll.h" |
| 19 #include "unicode/coleitr.h" |
| 20 #include "unicode/localpointer.h" |
| 21 #include "unicode/locid.h" |
| 22 #include "unicode/sortkey.h" |
| 23 #include "unicode/tblcoll.h" |
| 24 #include "unicode/ucol.h" |
| 25 #include "unicode/uiter.h" |
| 26 #include "unicode/uloc.h" |
| 27 #include "unicode/uniset.h" |
| 28 #include "unicode/unistr.h" |
| 29 #include "unicode/usetiter.h" |
| 30 #include "unicode/utf8.h" |
| 31 #include "unicode/uversion.h" |
| 32 #include "bocsu.h" |
| 33 #include "charstr.h" |
| 34 #include "cmemory.h" |
| 35 #include "collation.h" |
| 36 #include "collationcompare.h" |
| 37 #include "collationdata.h" |
| 38 #include "collationdatareader.h" |
| 39 #include "collationfastlatin.h" |
| 40 #include "collationiterator.h" |
| 41 #include "collationkeys.h" |
| 42 #include "collationroot.h" |
| 43 #include "collationsets.h" |
| 44 #include "collationsettings.h" |
| 45 #include "collationtailoring.h" |
| 46 #include "cstring.h" |
| 47 #include "uassert.h" |
| 48 #include "ucol_imp.h" |
| 49 #include "uhash.h" |
| 50 #include "uitercollationiterator.h" |
| 51 #include "ustr_imp.h" |
| 52 #include "utf16collationiterator.h" |
| 53 #include "utf8collationiterator.h" |
| 54 #include "uvectr64.h" |
| 55 |
| 56 U_NAMESPACE_BEGIN |
| 57 |
| 58 namespace { |
| 59 |
| 60 class FixedSortKeyByteSink : public SortKeyByteSink { |
| 61 public: |
| 62 FixedSortKeyByteSink(char *dest, int32_t destCapacity) |
| 63 : SortKeyByteSink(dest, destCapacity) {} |
| 64 virtual ~FixedSortKeyByteSink(); |
| 65 |
| 66 private: |
| 67 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t leng
th); |
| 68 virtual UBool Resize(int32_t appendCapacity, int32_t length); |
| 69 }; |
| 70 |
| 71 FixedSortKeyByteSink::~FixedSortKeyByteSink() {} |
| 72 |
| 73 void |
| 74 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int
32_t length) { |
| 75 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ |
| 76 // Fill the buffer completely. |
| 77 int32_t available = capacity_ - length; |
| 78 if (available > 0) { |
| 79 uprv_memcpy(buffer_ + length, bytes, available); |
| 80 } |
| 81 } |
| 82 |
| 83 UBool |
| 84 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) { |
| 85 return FALSE; |
| 86 } |
| 87 |
| 88 } // namespace |
| 89 |
| 90 // Not in an anonymous namespace, so that it can be a friend of CollationKey. |
| 91 class CollationKeyByteSink : public SortKeyByteSink { |
| 92 public: |
| 93 CollationKeyByteSink(CollationKey &key) |
| 94 : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getC
apacity()), |
| 95 key_(key) {} |
| 96 virtual ~CollationKeyByteSink(); |
| 97 |
| 98 private: |
| 99 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t leng
th); |
| 100 virtual UBool Resize(int32_t appendCapacity, int32_t length); |
| 101 |
| 102 CollationKey &key_; |
| 103 }; |
| 104 |
| 105 CollationKeyByteSink::~CollationKeyByteSink() {} |
| 106 |
| 107 void |
| 108 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t
length) { |
| 109 // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_ |
| 110 if (Resize(n, length)) { |
| 111 uprv_memcpy(buffer_ + length, bytes, n); |
| 112 } |
| 113 } |
| 114 |
| 115 UBool |
| 116 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { |
| 117 if (buffer_ == NULL) { |
| 118 return FALSE; // allocation failed before already |
| 119 } |
| 120 int32_t newCapacity = 2 * capacity_; |
| 121 int32_t altCapacity = length + 2 * appendCapacity; |
| 122 if (newCapacity < altCapacity) { |
| 123 newCapacity = altCapacity; |
| 124 } |
| 125 if (newCapacity < 200) { |
| 126 newCapacity = 200; |
| 127 } |
| 128 uint8_t *newBuffer = key_.reallocate(newCapacity, length); |
| 129 if (newBuffer == NULL) { |
| 130 SetNotOk(); |
| 131 return FALSE; |
| 132 } |
| 133 buffer_ = reinterpret_cast<char *>(newBuffer); |
| 134 capacity_ = newCapacity; |
| 135 return TRUE; |
| 136 } |
| 137 |
| 138 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other) |
| 139 : Collator(other), |
| 140 data(other.data), |
| 141 settings(other.settings), |
| 142 tailoring(other.tailoring), |
| 143 cacheEntry(other.cacheEntry), |
| 144 validLocale(other.validLocale), |
| 145 explicitlySetAttributes(other.explicitlySetAttributes), |
| 146 actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) { |
| 147 settings->addRef(); |
| 148 cacheEntry->addRef(); |
| 149 } |
| 150 |
| 151 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length, |
| 152 const RuleBasedCollator *base, UErrorCode &
errorCode) |
| 153 : data(NULL), |
| 154 settings(NULL), |
| 155 tailoring(NULL), |
| 156 cacheEntry(NULL), |
| 157 validLocale(""), |
| 158 explicitlySetAttributes(0), |
| 159 actualLocaleIsSameAsValid(FALSE) { |
| 160 if(U_FAILURE(errorCode)) { return; } |
| 161 if(bin == NULL || length == 0 || base == NULL) { |
| 162 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 163 return; |
| 164 } |
| 165 const CollationTailoring *root = CollationRoot::getRoot(errorCode); |
| 166 if(U_FAILURE(errorCode)) { return; } |
| 167 if(base->tailoring != root) { |
| 168 errorCode = U_UNSUPPORTED_ERROR; |
| 169 return; |
| 170 } |
| 171 LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->s
ettings)); |
| 172 if(t.isNull() || t->isBogus()) { |
| 173 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 174 return; |
| 175 } |
| 176 CollationDataReader::read(base->tailoring, bin, length, *t, errorCode); |
| 177 if(U_FAILURE(errorCode)) { return; } |
| 178 t->actualLocale.setToBogus(); |
| 179 adoptTailoring(t.orphan(), errorCode); |
| 180 } |
| 181 |
| 182 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry) |
| 183 : data(entry->tailoring->data), |
| 184 settings(entry->tailoring->settings), |
| 185 tailoring(entry->tailoring), |
| 186 cacheEntry(entry), |
| 187 validLocale(entry->validLocale), |
| 188 explicitlySetAttributes(0), |
| 189 actualLocaleIsSameAsValid(FALSE) { |
| 190 settings->addRef(); |
| 191 cacheEntry->addRef(); |
| 192 } |
| 193 |
| 194 RuleBasedCollator::~RuleBasedCollator() { |
| 195 SharedObject::clearPtr(settings); |
| 196 SharedObject::clearPtr(cacheEntry); |
| 197 } |
| 198 |
| 199 void |
| 200 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode)
{ |
| 201 if(U_FAILURE(errorCode)) { |
| 202 t->deleteIfZeroRefCount(); |
| 203 return; |
| 204 } |
| 205 U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry
== NULL); |
| 206 cacheEntry = new CollationCacheEntry(t->actualLocale, t); |
| 207 if(cacheEntry == NULL) { |
| 208 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 209 t->deleteIfZeroRefCount(); |
| 210 return; |
| 211 } |
| 212 data = t->data; |
| 213 settings = t->settings; |
| 214 settings->addRef(); |
| 215 tailoring = t; |
| 216 cacheEntry->addRef(); |
| 217 validLocale = t->actualLocale; |
| 218 actualLocaleIsSameAsValid = FALSE; |
| 219 } |
| 220 |
| 221 Collator * |
| 222 RuleBasedCollator::clone() const { |
| 223 return new RuleBasedCollator(*this); |
| 224 } |
| 225 |
| 226 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other)
{ |
| 227 if(this == &other) { return *this; } |
| 228 SharedObject::copyPtr(other.settings, settings); |
| 229 tailoring = other.tailoring; |
| 230 SharedObject::copyPtr(other.cacheEntry, cacheEntry); |
| 231 data = tailoring->data; |
| 232 validLocale = other.validLocale; |
| 233 explicitlySetAttributes = other.explicitlySetAttributes; |
| 234 actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid; |
| 235 return *this; |
| 236 } |
| 237 |
| 238 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator) |
| 239 |
| 240 UBool |
| 241 RuleBasedCollator::operator==(const Collator& other) const { |
| 242 if(this == &other) { return TRUE; } |
| 243 if(!Collator::operator==(other)) { return FALSE; } |
| 244 const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other); |
| 245 if(*settings != *o.settings) { return FALSE; } |
| 246 if(data == o.data) { return TRUE; } |
| 247 UBool thisIsRoot = data->base == NULL; |
| 248 UBool otherIsRoot = o.data->base == NULL; |
| 249 U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers sho
uld be == |
| 250 if(thisIsRoot != otherIsRoot) { return FALSE; } |
| 251 if((thisIsRoot || !tailoring->rules.isEmpty()) && |
| 252 (otherIsRoot || !o.tailoring->rules.isEmpty())) { |
| 253 // Shortcut: If both collators have valid rule strings, then compare tho
se. |
| 254 if(tailoring->rules == o.tailoring->rules) { return TRUE; } |
| 255 } |
| 256 // Different rule strings can result in the same or equivalent tailoring. |
| 257 // The rule strings are optional in ICU resource bundles, although included
by default. |
| 258 // cloneBinary() drops the rule string. |
| 259 UErrorCode errorCode = U_ZERO_ERROR; |
| 260 LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode)); |
| 261 LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode)); |
| 262 if(U_FAILURE(errorCode)) { return FALSE; } |
| 263 if(*thisTailored != *otherTailored) { return FALSE; } |
| 264 // For completeness, we should compare all of the mappings; |
| 265 // or we should create a list of strings, sort it with one collator, |
| 266 // and check if both collators compare adjacent strings the same |
| 267 // (order & strength, down to quaternary); or similar. |
| 268 // Testing equality of collators seems unusual. |
| 269 return TRUE; |
| 270 } |
| 271 |
| 272 int32_t |
| 273 RuleBasedCollator::hashCode() const { |
| 274 int32_t h = settings->hashCode(); |
| 275 if(data->base == NULL) { return h; } // root collator |
| 276 // Do not rely on the rule string, see comments in operator==(). |
| 277 UErrorCode errorCode = U_ZERO_ERROR; |
| 278 LocalPointer<UnicodeSet> set(getTailoredSet(errorCode)); |
| 279 if(U_FAILURE(errorCode)) { return 0; } |
| 280 UnicodeSetIterator iter(*set); |
| 281 while(iter.next() && !iter.isString()) { |
| 282 h ^= data->getCE32(iter.getCodepoint()); |
| 283 } |
| 284 return h; |
| 285 } |
| 286 |
| 287 void |
| 288 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid, |
| 289 const Locale &actual) { |
| 290 if(actual == tailoring->actualLocale) { |
| 291 actualLocaleIsSameAsValid = FALSE; |
| 292 } else { |
| 293 U_ASSERT(actual == valid); |
| 294 actualLocaleIsSameAsValid = TRUE; |
| 295 } |
| 296 // Do not modify tailoring.actualLocale: |
| 297 // We cannot be sure that that would be thread-safe. |
| 298 validLocale = valid; |
| 299 (void)requested; // Ignore, see also ticket #10477. |
| 300 } |
| 301 |
| 302 Locale |
| 303 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) con
st { |
| 304 if(U_FAILURE(errorCode)) { |
| 305 return Locale::getRoot(); |
| 306 } |
| 307 switch(type) { |
| 308 case ULOC_ACTUAL_LOCALE: |
| 309 return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale
; |
| 310 case ULOC_VALID_LOCALE: |
| 311 return validLocale; |
| 312 case ULOC_REQUESTED_LOCALE: |
| 313 default: |
| 314 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 315 return Locale::getRoot(); |
| 316 } |
| 317 } |
| 318 |
| 319 const char * |
| 320 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &erro
rCode) const { |
| 321 if(U_FAILURE(errorCode)) { |
| 322 return NULL; |
| 323 } |
| 324 const Locale *result; |
| 325 switch(type) { |
| 326 case ULOC_ACTUAL_LOCALE: |
| 327 result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLo
cale; |
| 328 break; |
| 329 case ULOC_VALID_LOCALE: |
| 330 result = &validLocale; |
| 331 break; |
| 332 case ULOC_REQUESTED_LOCALE: |
| 333 default: |
| 334 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 335 return NULL; |
| 336 } |
| 337 if(result->isBogus()) { return NULL; } |
| 338 const char *id = result->getName(); |
| 339 return id[0] == 0 ? "root" : id; |
| 340 } |
| 341 |
| 342 const UnicodeString& |
| 343 RuleBasedCollator::getRules() const { |
| 344 return tailoring->rules; |
| 345 } |
| 346 |
| 347 void |
| 348 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const { |
| 349 if(delta == UCOL_TAILORING_ONLY) { |
| 350 buffer = tailoring->rules; |
| 351 return; |
| 352 } |
| 353 // UCOL_FULL_RULES |
| 354 buffer.remove(); |
| 355 CollationLoader::appendRootRules(buffer); |
| 356 buffer.append(tailoring->rules).getTerminatedBuffer(); |
| 357 } |
| 358 |
| 359 void |
| 360 RuleBasedCollator::getVersion(UVersionInfo version) const { |
| 361 uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH); |
| 362 version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4); |
| 363 } |
| 364 |
| 365 UnicodeSet * |
| 366 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const { |
| 367 if(U_FAILURE(errorCode)) { return NULL; } |
| 368 UnicodeSet *tailored = new UnicodeSet(); |
| 369 if(tailored == NULL) { |
| 370 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 371 return NULL; |
| 372 } |
| 373 if(data->base != NULL) { |
| 374 TailoredSet(tailored).forData(data, errorCode); |
| 375 if(U_FAILURE(errorCode)) { |
| 376 delete tailored; |
| 377 return NULL; |
| 378 } |
| 379 } |
| 380 return tailored; |
| 381 } |
| 382 |
| 383 void |
| 384 RuleBasedCollator::internalGetContractionsAndExpansions( |
| 385 UnicodeSet *contractions, UnicodeSet *expansions, |
| 386 UBool addPrefixes, UErrorCode &errorCode) const { |
| 387 if(U_FAILURE(errorCode)) { return; } |
| 388 if(contractions != NULL) { |
| 389 contractions->clear(); |
| 390 } |
| 391 if(expansions != NULL) { |
| 392 expansions->clear(); |
| 393 } |
| 394 ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forDa
ta(data, errorCode); |
| 395 } |
| 396 |
| 397 void |
| 398 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCod
e &errorCode) const { |
| 399 if(U_FAILURE(errorCode)) { return; } |
| 400 ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, err
orCode); |
| 401 } |
| 402 |
| 403 const CollationSettings & |
| 404 RuleBasedCollator::getDefaultSettings() const { |
| 405 return *tailoring->settings; |
| 406 } |
| 407 |
| 408 UColAttributeValue |
| 409 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const
{ |
| 410 if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } |
| 411 int32_t option; |
| 412 switch(attr) { |
| 413 case UCOL_FRENCH_COLLATION: |
| 414 option = CollationSettings::BACKWARD_SECONDARY; |
| 415 break; |
| 416 case UCOL_ALTERNATE_HANDLING: |
| 417 return settings->getAlternateHandling(); |
| 418 case UCOL_CASE_FIRST: |
| 419 return settings->getCaseFirst(); |
| 420 case UCOL_CASE_LEVEL: |
| 421 option = CollationSettings::CASE_LEVEL; |
| 422 break; |
| 423 case UCOL_NORMALIZATION_MODE: |
| 424 option = CollationSettings::CHECK_FCD; |
| 425 break; |
| 426 case UCOL_STRENGTH: |
| 427 return (UColAttributeValue)settings->getStrength(); |
| 428 case UCOL_HIRAGANA_QUATERNARY_MODE: |
| 429 // Deprecated attribute, unsettable. |
| 430 return UCOL_OFF; |
| 431 case UCOL_NUMERIC_COLLATION: |
| 432 option = CollationSettings::NUMERIC; |
| 433 break; |
| 434 default: |
| 435 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 436 return UCOL_DEFAULT; |
| 437 } |
| 438 return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON; |
| 439 } |
| 440 |
| 441 void |
| 442 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value, |
| 443 UErrorCode &errorCode) { |
| 444 UColAttributeValue oldValue = getAttribute(attr, errorCode); |
| 445 if(U_FAILURE(errorCode)) { return; } |
| 446 if(value == oldValue) { |
| 447 setAttributeExplicitly(attr); |
| 448 return; |
| 449 } |
| 450 const CollationSettings &defaultSettings = getDefaultSettings(); |
| 451 if(settings == &defaultSettings) { |
| 452 if(value == UCOL_DEFAULT) { |
| 453 setAttributeDefault(attr); |
| 454 return; |
| 455 } |
| 456 } |
| 457 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings); |
| 458 if(ownedSettings == NULL) { |
| 459 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 460 return; |
| 461 } |
| 462 |
| 463 switch(attr) { |
| 464 case UCOL_FRENCH_COLLATION: |
| 465 ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value, |
| 466 defaultSettings.options, errorCode); |
| 467 break; |
| 468 case UCOL_ALTERNATE_HANDLING: |
| 469 ownedSettings->setAlternateHandling(value, defaultSettings.options, erro
rCode); |
| 470 break; |
| 471 case UCOL_CASE_FIRST: |
| 472 ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode); |
| 473 break; |
| 474 case UCOL_CASE_LEVEL: |
| 475 ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value, |
| 476 defaultSettings.options, errorCode); |
| 477 break; |
| 478 case UCOL_NORMALIZATION_MODE: |
| 479 ownedSettings->setFlag(CollationSettings::CHECK_FCD, value, |
| 480 defaultSettings.options, errorCode); |
| 481 break; |
| 482 case UCOL_STRENGTH: |
| 483 ownedSettings->setStrength(value, defaultSettings.options, errorCode); |
| 484 break; |
| 485 case UCOL_HIRAGANA_QUATERNARY_MODE: |
| 486 // Deprecated attribute. Check for valid values but do not change anythi
ng. |
| 487 if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) { |
| 488 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 489 } |
| 490 break; |
| 491 case UCOL_NUMERIC_COLLATION: |
| 492 ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSetting
s.options, errorCode); |
| 493 break; |
| 494 default: |
| 495 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 496 break; |
| 497 } |
| 498 if(U_FAILURE(errorCode)) { return; } |
| 499 setFastLatinOptions(*ownedSettings); |
| 500 if(value == UCOL_DEFAULT) { |
| 501 setAttributeDefault(attr); |
| 502 } else { |
| 503 setAttributeExplicitly(attr); |
| 504 } |
| 505 } |
| 506 |
| 507 Collator & |
| 508 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
{ |
| 509 if(U_FAILURE(errorCode)) { return *this; } |
| 510 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1. |
| 511 int32_t value; |
| 512 if(group == UCOL_REORDER_CODE_DEFAULT) { |
| 513 value = UCOL_DEFAULT; |
| 514 } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CUR
RENCY) { |
| 515 value = group - UCOL_REORDER_CODE_FIRST; |
| 516 } else { |
| 517 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 518 return *this; |
| 519 } |
| 520 CollationSettings::MaxVariable oldValue = settings->getMaxVariable(); |
| 521 if(value == oldValue) { |
| 522 setAttributeExplicitly(ATTR_VARIABLE_TOP); |
| 523 return *this; |
| 524 } |
| 525 const CollationSettings &defaultSettings = getDefaultSettings(); |
| 526 if(settings == &defaultSettings) { |
| 527 if(value == UCOL_DEFAULT) { |
| 528 setAttributeDefault(ATTR_VARIABLE_TOP); |
| 529 return *this; |
| 530 } |
| 531 } |
| 532 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings); |
| 533 if(ownedSettings == NULL) { |
| 534 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 535 return *this; |
| 536 } |
| 537 |
| 538 if(group == UCOL_REORDER_CODE_DEFAULT) { |
| 539 group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getM
axVariable()); |
| 540 } |
| 541 uint32_t varTop = data->getLastPrimaryForGroup(group); |
| 542 U_ASSERT(varTop != 0); |
| 543 ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode); |
| 544 if(U_FAILURE(errorCode)) { return *this; } |
| 545 ownedSettings->variableTop = varTop; |
| 546 setFastLatinOptions(*ownedSettings); |
| 547 if(value == UCOL_DEFAULT) { |
| 548 setAttributeDefault(ATTR_VARIABLE_TOP); |
| 549 } else { |
| 550 setAttributeExplicitly(ATTR_VARIABLE_TOP); |
| 551 } |
| 552 return *this; |
| 553 } |
| 554 |
| 555 UColReorderCode |
| 556 RuleBasedCollator::getMaxVariable() const { |
| 557 return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable(
)); |
| 558 } |
| 559 |
| 560 uint32_t |
| 561 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const { |
| 562 return settings->variableTop; |
| 563 } |
| 564 |
| 565 uint32_t |
| 566 RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &
errorCode) { |
| 567 if(U_FAILURE(errorCode)) { return 0; } |
| 568 if(varTop == NULL && len !=0) { |
| 569 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 570 return 0; |
| 571 } |
| 572 if(len < 0) { len = u_strlen(varTop); } |
| 573 if(len == 0) { |
| 574 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 575 return 0; |
| 576 } |
| 577 UBool numeric = settings->isNumeric(); |
| 578 int64_t ce1, ce2; |
| 579 if(settings->dontCheckFCD()) { |
| 580 UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len); |
| 581 ce1 = ci.nextCE(errorCode); |
| 582 ce2 = ci.nextCE(errorCode); |
| 583 } else { |
| 584 FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len
); |
| 585 ce1 = ci.nextCE(errorCode); |
| 586 ce2 = ci.nextCE(errorCode); |
| 587 } |
| 588 if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) { |
| 589 errorCode = U_CE_NOT_FOUND_ERROR; |
| 590 return 0; |
| 591 } |
| 592 setVariableTop((uint32_t)(ce1 >> 32), errorCode); |
| 593 return settings->variableTop; |
| 594 } |
| 595 |
| 596 uint32_t |
| 597 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &error
Code) { |
| 598 return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode); |
| 599 } |
| 600 |
| 601 void |
| 602 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) { |
| 603 if(U_FAILURE(errorCode)) { return; } |
| 604 if(varTop != settings->variableTop) { |
| 605 // Pin the variable top to the end of the reordering group which contain
s it. |
| 606 // Only a few special groups are supported. |
| 607 int32_t group = data->getGroupForPrimary(varTop); |
| 608 if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group
) { |
| 609 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 610 return; |
| 611 } |
| 612 uint32_t v = data->getLastPrimaryForGroup(group); |
| 613 U_ASSERT(v != 0 && v >= varTop); |
| 614 varTop = v; |
| 615 if(varTop != settings->variableTop) { |
| 616 CollationSettings *ownedSettings = SharedObject::copyOnWrite(setting
s); |
| 617 if(ownedSettings == NULL) { |
| 618 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 619 return; |
| 620 } |
| 621 ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST, |
| 622 getDefaultSettings().options, errorCod
e); |
| 623 if(U_FAILURE(errorCode)) { return; } |
| 624 ownedSettings->variableTop = varTop; |
| 625 setFastLatinOptions(*ownedSettings); |
| 626 } |
| 627 } |
| 628 if(varTop == getDefaultSettings().variableTop) { |
| 629 setAttributeDefault(ATTR_VARIABLE_TOP); |
| 630 } else { |
| 631 setAttributeExplicitly(ATTR_VARIABLE_TOP); |
| 632 } |
| 633 } |
| 634 |
| 635 int32_t |
| 636 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity, |
| 637 UErrorCode &errorCode) const { |
| 638 if(U_FAILURE(errorCode)) { return 0; } |
| 639 if(capacity < 0 || (dest == NULL && capacity > 0)) { |
| 640 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 641 return 0; |
| 642 } |
| 643 int32_t length = settings->reorderCodesLength; |
| 644 if(length == 0) { return 0; } |
| 645 if(length > capacity) { |
| 646 errorCode = U_BUFFER_OVERFLOW_ERROR; |
| 647 return length; |
| 648 } |
| 649 uprv_memcpy(dest, settings->reorderCodes, length * 4); |
| 650 return length; |
| 651 } |
| 652 |
| 653 void |
| 654 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length, |
| 655 UErrorCode &errorCode) { |
| 656 if(U_FAILURE(errorCode)) { return; } |
| 657 if(length < 0 || (reorderCodes == NULL && length > 0)) { |
| 658 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 659 return; |
| 660 } |
| 661 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) { |
| 662 length = 0; |
| 663 } |
| 664 if(length == settings->reorderCodesLength && |
| 665 uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0)
{ |
| 666 return; |
| 667 } |
| 668 const CollationSettings &defaultSettings = getDefaultSettings(); |
| 669 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) { |
| 670 if(settings != &defaultSettings) { |
| 671 CollationSettings *ownedSettings = SharedObject::copyOnWrite(setting
s); |
| 672 if(ownedSettings == NULL) { |
| 673 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 674 return; |
| 675 } |
| 676 ownedSettings->aliasReordering(defaultSettings.reorderCodes, |
| 677 defaultSettings.reorderCodesLength, |
| 678 defaultSettings.reorderTable); |
| 679 setFastLatinOptions(*ownedSettings); |
| 680 } |
| 681 return; |
| 682 } |
| 683 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings); |
| 684 if(ownedSettings == NULL) { |
| 685 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 686 return; |
| 687 } |
| 688 if(length == 0) { |
| 689 ownedSettings->resetReordering(); |
| 690 } else { |
| 691 uint8_t reorderTable[256]; |
| 692 data->makeReorderTable(reorderCodes, length, reorderTable, errorCode); |
| 693 if(U_FAILURE(errorCode)) { return; } |
| 694 if(!ownedSettings->setReordering(reorderCodes, length, reorderTable)) { |
| 695 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 696 return; |
| 697 } |
| 698 } |
| 699 setFastLatinOptions(*ownedSettings); |
| 700 } |
| 701 |
| 702 void |
| 703 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const { |
| 704 ownedSettings.fastLatinOptions = CollationFastLatin::getOptions( |
| 705 data, ownedSettings, |
| 706 ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLa
tinPrimaries)); |
| 707 } |
| 708 |
| 709 UCollationResult |
| 710 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right
, |
| 711 UErrorCode &errorCode) const { |
| 712 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } |
| 713 return doCompare(left.getBuffer(), left.length(), |
| 714 right.getBuffer(), right.length(), errorCode); |
| 715 } |
| 716 |
| 717 UCollationResult |
| 718 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right
, |
| 719 int32_t length, UErrorCode &errorCode) const { |
| 720 if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; } |
| 721 if(length < 0) { |
| 722 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 723 return UCOL_EQUAL; |
| 724 } |
| 725 int32_t leftLength = left.length(); |
| 726 int32_t rightLength = right.length(); |
| 727 if(leftLength > length) { leftLength = length; } |
| 728 if(rightLength > length) { rightLength = length; } |
| 729 return doCompare(left.getBuffer(), leftLength, |
| 730 right.getBuffer(), rightLength, errorCode); |
| 731 } |
| 732 |
| 733 UCollationResult |
| 734 RuleBasedCollator::compare(const UChar *left, int32_t leftLength, |
| 735 const UChar *right, int32_t rightLength, |
| 736 UErrorCode &errorCode) const { |
| 737 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } |
| 738 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0))
{ |
| 739 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 740 return UCOL_EQUAL; |
| 741 } |
| 742 // Make sure both or neither strings have a known length. |
| 743 // We do not optimize for mixed length/termination. |
| 744 if(leftLength >= 0) { |
| 745 if(rightLength < 0) { rightLength = u_strlen(right); } |
| 746 } else { |
| 747 if(rightLength >= 0) { leftLength = u_strlen(left); } |
| 748 } |
| 749 return doCompare(left, leftLength, right, rightLength, errorCode); |
| 750 } |
| 751 |
| 752 UCollationResult |
| 753 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right
, |
| 754 UErrorCode &errorCode) const { |
| 755 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } |
| 756 const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data()); |
| 757 const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data()); |
| 758 if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.emp
ty())) { |
| 759 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 760 return UCOL_EQUAL; |
| 761 } |
| 762 return doCompare(leftBytes, left.length(), rightBytes, right.length(), error
Code); |
| 763 } |
| 764 |
| 765 UCollationResult |
| 766 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength, |
| 767 const char *right, int32_t rightLength, |
| 768 UErrorCode &errorCode) const { |
| 769 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } |
| 770 if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0))
{ |
| 771 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 772 return UCOL_EQUAL; |
| 773 } |
| 774 // Make sure both or neither strings have a known length. |
| 775 // We do not optimize for mixed length/termination. |
| 776 if(leftLength >= 0) { |
| 777 if(rightLength < 0) { rightLength = uprv_strlen(right); } |
| 778 } else { |
| 779 if(rightLength >= 0) { leftLength = uprv_strlen(left); } |
| 780 } |
| 781 return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength, |
| 782 reinterpret_cast<const uint8_t *>(right), rightLength, erro
rCode); |
| 783 } |
| 784 |
| 785 namespace { |
| 786 |
| 787 /** |
| 788 * Abstract iterator for identical-level string comparisons. |
| 789 * Returns FCD code points and handles temporary switching to NFD. |
| 790 */ |
| 791 class NFDIterator : public UObject { |
| 792 public: |
| 793 NFDIterator() : index(-1), length(0) {} |
| 794 virtual ~NFDIterator() {} |
| 795 /** |
| 796 * Returns the next code point from the internal normalization buffer, |
| 797 * or else the next text code point. |
| 798 * Returns -1 at the end of the text. |
| 799 */ |
| 800 UChar32 nextCodePoint() { |
| 801 if(index >= 0) { |
| 802 if(index == length) { |
| 803 index = -1; |
| 804 } else { |
| 805 UChar32 c; |
| 806 U16_NEXT_UNSAFE(decomp, index, c); |
| 807 return c; |
| 808 } |
| 809 } |
| 810 return nextRawCodePoint(); |
| 811 } |
| 812 /** |
| 813 * @param nfcImpl |
| 814 * @param c the last code point returned by nextCodePoint() or nextDecompose
dCodePoint() |
| 815 * @return the first code point in c's decomposition, |
| 816 * or c itself if it was decomposed already or if it does not decomp
ose |
| 817 */ |
| 818 UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) { |
| 819 if(index >= 0) { return c; } |
| 820 decomp = nfcImpl.getDecomposition(c, buffer, length); |
| 821 if(decomp == NULL) { return c; } |
| 822 index = 0; |
| 823 U16_NEXT_UNSAFE(decomp, index, c); |
| 824 return c; |
| 825 } |
| 826 protected: |
| 827 /** |
| 828 * Returns the next text code point in FCD order. |
| 829 * Returns -1 at the end of the text. |
| 830 */ |
| 831 virtual UChar32 nextRawCodePoint() = 0; |
| 832 private: |
| 833 const UChar *decomp; |
| 834 UChar buffer[4]; |
| 835 int32_t index; |
| 836 int32_t length; |
| 837 }; |
| 838 |
| 839 class UTF16NFDIterator : public NFDIterator { |
| 840 public: |
| 841 UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit
(textLimit) {} |
| 842 protected: |
| 843 virtual UChar32 nextRawCodePoint() { |
| 844 if(s == limit) { return U_SENTINEL; } |
| 845 UChar32 c = *s++; |
| 846 if(limit == NULL && c == 0) { |
| 847 s = NULL; |
| 848 return U_SENTINEL; |
| 849 } |
| 850 UChar trail; |
| 851 if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) { |
| 852 ++s; |
| 853 c = U16_GET_SUPPLEMENTARY(c, trail); |
| 854 } |
| 855 return c; |
| 856 } |
| 857 |
| 858 const UChar *s; |
| 859 const UChar *limit; |
| 860 }; |
| 861 |
| 862 class FCDUTF16NFDIterator : public UTF16NFDIterator { |
| 863 public: |
| 864 FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const
UChar *textLimit) |
| 865 : UTF16NFDIterator(NULL, NULL) { |
| 866 UErrorCode errorCode = U_ZERO_ERROR; |
| 867 const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCod
e); |
| 868 if(U_FAILURE(errorCode)) { return; } |
| 869 if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) { |
| 870 s = text; |
| 871 limit = spanLimit; |
| 872 } else { |
| 873 str.setTo(text, (int32_t)(spanLimit - text)); |
| 874 { |
| 875 ReorderingBuffer buffer(nfcImpl, str); |
| 876 if(buffer.init(str.length(), errorCode)) { |
| 877 nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode); |
| 878 } |
| 879 } |
| 880 if(U_SUCCESS(errorCode)) { |
| 881 s = str.getBuffer(); |
| 882 limit = s + str.length(); |
| 883 } |
| 884 } |
| 885 } |
| 886 private: |
| 887 UnicodeString str; |
| 888 }; |
| 889 |
| 890 class UTF8NFDIterator : public NFDIterator { |
| 891 public: |
| 892 UTF8NFDIterator(const uint8_t *text, int32_t textLength) |
| 893 : s(text), pos(0), length(textLength) {} |
| 894 protected: |
| 895 virtual UChar32 nextRawCodePoint() { |
| 896 if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; } |
| 897 UChar32 c; |
| 898 U8_NEXT_OR_FFFD(s, pos, length, c); |
| 899 return c; |
| 900 } |
| 901 |
| 902 const uint8_t *s; |
| 903 int32_t pos; |
| 904 int32_t length; |
| 905 }; |
| 906 |
| 907 class FCDUTF8NFDIterator : public NFDIterator { |
| 908 public: |
| 909 FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t t
extLength) |
| 910 : u8ci(data, FALSE, text, 0, textLength) {} |
| 911 protected: |
| 912 virtual UChar32 nextRawCodePoint() { |
| 913 UErrorCode errorCode = U_ZERO_ERROR; |
| 914 return u8ci.nextCodePoint(errorCode); |
| 915 } |
| 916 private: |
| 917 FCDUTF8CollationIterator u8ci; |
| 918 }; |
| 919 |
| 920 class UIterNFDIterator : public NFDIterator { |
| 921 public: |
| 922 UIterNFDIterator(UCharIterator &it) : iter(it) {} |
| 923 protected: |
| 924 virtual UChar32 nextRawCodePoint() { |
| 925 return uiter_next32(&iter); |
| 926 } |
| 927 private: |
| 928 UCharIterator &iter; |
| 929 }; |
| 930 |
| 931 class FCDUIterNFDIterator : public NFDIterator { |
| 932 public: |
| 933 FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t st
artIndex) |
| 934 : uici(data, FALSE, it, startIndex) {} |
| 935 protected: |
| 936 virtual UChar32 nextRawCodePoint() { |
| 937 UErrorCode errorCode = U_ZERO_ERROR; |
| 938 return uici.nextCodePoint(errorCode); |
| 939 } |
| 940 private: |
| 941 FCDUIterCollationIterator uici; |
| 942 }; |
| 943 |
| 944 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl, |
| 945 NFDIterator &left, NFDIterator &right) { |
| 946 for(;;) { |
| 947 // Fetch the next FCD code point from each string. |
| 948 UChar32 leftCp = left.nextCodePoint(); |
| 949 UChar32 rightCp = right.nextCodePoint(); |
| 950 if(leftCp == rightCp) { |
| 951 if(leftCp < 0) { break; } |
| 952 continue; |
| 953 } |
| 954 // If they are different, then decompose each and compare again. |
| 955 if(leftCp < 0) { |
| 956 leftCp = -2; // end of string |
| 957 } else if(leftCp == 0xfffe) { |
| 958 leftCp = -1; // U+FFFE: merge separator |
| 959 } else { |
| 960 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp); |
| 961 } |
| 962 if(rightCp < 0) { |
| 963 rightCp = -2; // end of string |
| 964 } else if(rightCp == 0xfffe) { |
| 965 rightCp = -1; // U+FFFE: merge separator |
| 966 } else { |
| 967 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp); |
| 968 } |
| 969 if(leftCp < rightCp) { return UCOL_LESS; } |
| 970 if(leftCp > rightCp) { return UCOL_GREATER; } |
| 971 } |
| 972 return UCOL_EQUAL; |
| 973 } |
| 974 |
| 975 } // namespace |
| 976 |
| 977 UCollationResult |
| 978 RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength, |
| 979 const UChar *right, int32_t rightLength, |
| 980 UErrorCode &errorCode) const { |
| 981 // U_FAILURE(errorCode) checked by caller. |
| 982 if(left == right && leftLength == rightLength) { |
| 983 return UCOL_EQUAL; |
| 984 } |
| 985 |
| 986 // Identical-prefix test. |
| 987 const UChar *leftLimit; |
| 988 const UChar *rightLimit; |
| 989 int32_t equalPrefixLength = 0; |
| 990 if(leftLength < 0) { |
| 991 leftLimit = NULL; |
| 992 rightLimit = NULL; |
| 993 UChar c; |
| 994 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) { |
| 995 if(c == 0) { return UCOL_EQUAL; } |
| 996 ++equalPrefixLength; |
| 997 } |
| 998 } else { |
| 999 leftLimit = left + leftLength; |
| 1000 rightLimit = right + rightLength; |
| 1001 for(;;) { |
| 1002 if(equalPrefixLength == leftLength) { |
| 1003 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; } |
| 1004 break; |
| 1005 } else if(equalPrefixLength == rightLength || |
| 1006 left[equalPrefixLength] != right[equalPrefixLength]) { |
| 1007 break; |
| 1008 } |
| 1009 ++equalPrefixLength; |
| 1010 } |
| 1011 } |
| 1012 |
| 1013 UBool numeric = settings->isNumeric(); |
| 1014 if(equalPrefixLength > 0) { |
| 1015 if((equalPrefixLength != leftLength && |
| 1016 data->isUnsafeBackward(left[equalPrefixLength], numeric)) || |
| 1017 (equalPrefixLength != rightLength && |
| 1018 data->isUnsafeBackward(right[equalPrefixLength], numeric)))
{ |
| 1019 // Identical prefix: Back up to the start of a contraction or reorde
ring sequence. |
| 1020 while(--equalPrefixLength > 0 && |
| 1021 data->isUnsafeBackward(left[equalPrefixLength], numeric)) {} |
| 1022 } |
| 1023 // Notes: |
| 1024 // - A longer string can compare equal to a prefix of it if only ignorab
les follow. |
| 1025 // - With a backward level, a longer string can compare less-than a pref
ix of it. |
| 1026 |
| 1027 // Pass the actual start of each string into the CollationIterators, |
| 1028 // plus the equalPrefixLength position, |
| 1029 // so that prefix matches back into the equal prefix work. |
| 1030 } |
| 1031 |
| 1032 int32_t result; |
| 1033 int32_t fastLatinOptions = settings->fastLatinOptions; |
| 1034 if(fastLatinOptions >= 0 && |
| 1035 (equalPrefixLength == leftLength || |
| 1036 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) && |
| 1037 (equalPrefixLength == rightLength || |
| 1038 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) { |
| 1039 if(leftLength >= 0) { |
| 1040 result = CollationFastLatin::compareUTF16(data->fastLatinTable, |
| 1041 settings->fastLatinPrimari
es, |
| 1042 fastLatinOptions, |
| 1043 left + equalPrefixLength, |
| 1044 leftLength - equalPrefixLe
ngth, |
| 1045 right + equalPrefixLength, |
| 1046 rightLength - equalPrefixL
ength); |
| 1047 } else { |
| 1048 result = CollationFastLatin::compareUTF16(data->fastLatinTable, |
| 1049 settings->fastLatinPrimari
es, |
| 1050 fastLatinOptions, |
| 1051 left + equalPrefixLength,
-1, |
| 1052 right + equalPrefixLength,
-1); |
| 1053 } |
| 1054 } else { |
| 1055 result = CollationFastLatin::BAIL_OUT_RESULT; |
| 1056 } |
| 1057 |
| 1058 if(result == CollationFastLatin::BAIL_OUT_RESULT) { |
| 1059 if(settings->dontCheckFCD()) { |
| 1060 UTF16CollationIterator leftIter(data, numeric, |
| 1061 left, left + equalPrefixLength, left
Limit); |
| 1062 UTF16CollationIterator rightIter(data, numeric, |
| 1063 right, right + equalPrefixLength, ri
ghtLimit); |
| 1064 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter
, *settings, errorCode); |
| 1065 } else { |
| 1066 FCDUTF16CollationIterator leftIter(data, numeric, |
| 1067 left, left + equalPrefixLength, le
ftLimit); |
| 1068 FCDUTF16CollationIterator rightIter(data, numeric, |
| 1069 right, right + equalPrefixLength
, rightLimit); |
| 1070 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter
, *settings, errorCode); |
| 1071 } |
| 1072 } |
| 1073 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAI
LURE(errorCode)) { |
| 1074 return (UCollationResult)result; |
| 1075 } |
| 1076 |
| 1077 // Note: If NUL-terminated, we could get the actual limits from the iterator
s now. |
| 1078 // That would complicate the iterators a bit, NUL-terminated strings are onl
y a C convenience, |
| 1079 // and the benefit seems unlikely to be measurable. |
| 1080 |
| 1081 // Compare identical level. |
| 1082 const Normalizer2Impl &nfcImpl = data->nfcImpl; |
| 1083 left += equalPrefixLength; |
| 1084 right += equalPrefixLength; |
| 1085 if(settings->dontCheckFCD()) { |
| 1086 UTF16NFDIterator leftIter(left, leftLimit); |
| 1087 UTF16NFDIterator rightIter(right, rightLimit); |
| 1088 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1089 } else { |
| 1090 FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit); |
| 1091 FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit); |
| 1092 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1093 } |
| 1094 } |
| 1095 |
| 1096 UCollationResult |
| 1097 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength, |
| 1098 const uint8_t *right, int32_t rightLength, |
| 1099 UErrorCode &errorCode) const { |
| 1100 // U_FAILURE(errorCode) checked by caller. |
| 1101 if(left == right && leftLength == rightLength) { |
| 1102 return UCOL_EQUAL; |
| 1103 } |
| 1104 |
| 1105 // Identical-prefix test. |
| 1106 int32_t equalPrefixLength = 0; |
| 1107 if(leftLength < 0) { |
| 1108 uint8_t c; |
| 1109 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) { |
| 1110 if(c == 0) { return UCOL_EQUAL; } |
| 1111 ++equalPrefixLength; |
| 1112 } |
| 1113 } else { |
| 1114 for(;;) { |
| 1115 if(equalPrefixLength == leftLength) { |
| 1116 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; } |
| 1117 break; |
| 1118 } else if(equalPrefixLength == rightLength || |
| 1119 left[equalPrefixLength] != right[equalPrefixLength]) { |
| 1120 break; |
| 1121 } |
| 1122 ++equalPrefixLength; |
| 1123 } |
| 1124 } |
| 1125 // Back up to the start of a partially-equal code point. |
| 1126 if(equalPrefixLength > 0 && |
| 1127 ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLen
gth])) || |
| 1128 (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLe
ngth])))) { |
| 1129 while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {
} |
| 1130 } |
| 1131 |
| 1132 UBool numeric = settings->isNumeric(); |
| 1133 if(equalPrefixLength > 0) { |
| 1134 UBool unsafe = FALSE; |
| 1135 if(equalPrefixLength != leftLength) { |
| 1136 int32_t i = equalPrefixLength; |
| 1137 UChar32 c; |
| 1138 U8_NEXT_OR_FFFD(left, i, leftLength, c); |
| 1139 unsafe = data->isUnsafeBackward(c, numeric); |
| 1140 } |
| 1141 if(!unsafe && equalPrefixLength != rightLength) { |
| 1142 int32_t i = equalPrefixLength; |
| 1143 UChar32 c; |
| 1144 U8_NEXT_OR_FFFD(right, i, rightLength, c); |
| 1145 unsafe = data->isUnsafeBackward(c, numeric); |
| 1146 } |
| 1147 if(unsafe) { |
| 1148 // Identical prefix: Back up to the start of a contraction or reorde
ring sequence. |
| 1149 UChar32 c; |
| 1150 do { |
| 1151 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c); |
| 1152 } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric))
; |
| 1153 } |
| 1154 // See the notes in the UTF-16 version. |
| 1155 |
| 1156 // Pass the actual start of each string into the CollationIterators, |
| 1157 // plus the equalPrefixLength position, |
| 1158 // so that prefix matches back into the equal prefix work. |
| 1159 } |
| 1160 |
| 1161 int32_t result; |
| 1162 int32_t fastLatinOptions = settings->fastLatinOptions; |
| 1163 if(fastLatinOptions >= 0 && |
| 1164 (equalPrefixLength == leftLength || |
| 1165 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LE
AD) && |
| 1166 (equalPrefixLength == rightLength || |
| 1167 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_L
EAD)) { |
| 1168 if(leftLength >= 0) { |
| 1169 result = CollationFastLatin::compareUTF8(data->fastLatinTable, |
| 1170 settings->fastLatinPrimarie
s, |
| 1171 fastLatinOptions, |
| 1172 left + equalPrefixLength, |
| 1173 leftLength - equalPrefixLen
gth, |
| 1174 right + equalPrefixLength, |
| 1175 rightLength - equalPrefixLe
ngth); |
| 1176 } else { |
| 1177 result = CollationFastLatin::compareUTF8(data->fastLatinTable, |
| 1178 settings->fastLatinPrimarie
s, |
| 1179 fastLatinOptions, |
| 1180 left + equalPrefixLength, -
1, |
| 1181 right + equalPrefixLength,
-1); |
| 1182 } |
| 1183 } else { |
| 1184 result = CollationFastLatin::BAIL_OUT_RESULT; |
| 1185 } |
| 1186 |
| 1187 if(result == CollationFastLatin::BAIL_OUT_RESULT) { |
| 1188 if(settings->dontCheckFCD()) { |
| 1189 UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLengt
h, leftLength); |
| 1190 UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLen
gth, rightLength); |
| 1191 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter
, *settings, errorCode); |
| 1192 } else { |
| 1193 FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLe
ngth, leftLength); |
| 1194 FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefix
Length, rightLength); |
| 1195 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter
, *settings, errorCode); |
| 1196 } |
| 1197 } |
| 1198 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAI
LURE(errorCode)) { |
| 1199 return (UCollationResult)result; |
| 1200 } |
| 1201 |
| 1202 // Note: If NUL-terminated, we could get the actual limits from the iterator
s now. |
| 1203 // That would complicate the iterators a bit, NUL-terminated strings are onl
y a C convenience, |
| 1204 // and the benefit seems unlikely to be measurable. |
| 1205 |
| 1206 // Compare identical level. |
| 1207 const Normalizer2Impl &nfcImpl = data->nfcImpl; |
| 1208 left += equalPrefixLength; |
| 1209 right += equalPrefixLength; |
| 1210 if(leftLength > 0) { |
| 1211 leftLength -= equalPrefixLength; |
| 1212 rightLength -= equalPrefixLength; |
| 1213 } |
| 1214 if(settings->dontCheckFCD()) { |
| 1215 UTF8NFDIterator leftIter(left, leftLength); |
| 1216 UTF8NFDIterator rightIter(right, rightLength); |
| 1217 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1218 } else { |
| 1219 FCDUTF8NFDIterator leftIter(data, left, leftLength); |
| 1220 FCDUTF8NFDIterator rightIter(data, right, rightLength); |
| 1221 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1222 } |
| 1223 } |
| 1224 |
| 1225 UCollationResult |
| 1226 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right, |
| 1227 UErrorCode &errorCode) const { |
| 1228 if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; } |
| 1229 UBool numeric = settings->isNumeric(); |
| 1230 |
| 1231 // Identical-prefix test. |
| 1232 int32_t equalPrefixLength = 0; |
| 1233 { |
| 1234 UChar32 leftUnit; |
| 1235 UChar32 rightUnit; |
| 1236 while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right)))
{ |
| 1237 if(leftUnit < 0) { return UCOL_EQUAL; } |
| 1238 ++equalPrefixLength; |
| 1239 } |
| 1240 |
| 1241 // Back out the code units that differed, for the real collation compari
son. |
| 1242 if(leftUnit >= 0) { left.previous(&left); } |
| 1243 if(rightUnit >= 0) { right.previous(&right); } |
| 1244 |
| 1245 if(equalPrefixLength > 0) { |
| 1246 if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) || |
| 1247 (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric
))) { |
| 1248 // Identical prefix: Back up to the start of a contraction or re
ordering sequence. |
| 1249 do { |
| 1250 --equalPrefixLength; |
| 1251 leftUnit = left.previous(&left); |
| 1252 right.previous(&right); |
| 1253 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit
, numeric)); |
| 1254 } |
| 1255 // See the notes in the UTF-16 version. |
| 1256 } |
| 1257 } |
| 1258 |
| 1259 UCollationResult result; |
| 1260 if(settings->dontCheckFCD()) { |
| 1261 UIterCollationIterator leftIter(data, numeric, left); |
| 1262 UIterCollationIterator rightIter(data, numeric, right); |
| 1263 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *s
ettings, errorCode); |
| 1264 } else { |
| 1265 FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLengt
h); |
| 1266 FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLen
gth); |
| 1267 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *s
ettings, errorCode); |
| 1268 } |
| 1269 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAI
LURE(errorCode)) { |
| 1270 return result; |
| 1271 } |
| 1272 |
| 1273 // Compare identical level. |
| 1274 left.move(&left, equalPrefixLength, UITER_ZERO); |
| 1275 right.move(&right, equalPrefixLength, UITER_ZERO); |
| 1276 const Normalizer2Impl &nfcImpl = data->nfcImpl; |
| 1277 if(settings->dontCheckFCD()) { |
| 1278 UIterNFDIterator leftIter(left); |
| 1279 UIterNFDIterator rightIter(right); |
| 1280 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1281 } else { |
| 1282 FCDUIterNFDIterator leftIter(data, left, equalPrefixLength); |
| 1283 FCDUIterNFDIterator rightIter(data, right, equalPrefixLength); |
| 1284 return compareNFDIter(nfcImpl, leftIter, rightIter); |
| 1285 } |
| 1286 } |
| 1287 |
| 1288 CollationKey & |
| 1289 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key, |
| 1290 UErrorCode &errorCode) const { |
| 1291 return getCollationKey(s.getBuffer(), s.length(), key, errorCode); |
| 1292 } |
| 1293 |
| 1294 CollationKey & |
| 1295 RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey&
key, |
| 1296 UErrorCode &errorCode) const { |
| 1297 if(U_FAILURE(errorCode)) { |
| 1298 return key.setToBogus(); |
| 1299 } |
| 1300 if(s == NULL && length != 0) { |
| 1301 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 1302 return key.setToBogus(); |
| 1303 } |
| 1304 key.reset(); // resets the "bogus" state |
| 1305 CollationKeyByteSink sink(key); |
| 1306 writeSortKey(s, length, sink, errorCode); |
| 1307 if(U_FAILURE(errorCode)) { |
| 1308 key.setToBogus(); |
| 1309 } else if(key.isBogus()) { |
| 1310 errorCode = U_MEMORY_ALLOCATION_ERROR; |
| 1311 } else { |
| 1312 key.setLength(sink.NumberOfBytesAppended()); |
| 1313 } |
| 1314 return key; |
| 1315 } |
| 1316 |
| 1317 int32_t |
| 1318 RuleBasedCollator::getSortKey(const UnicodeString &s, |
| 1319 uint8_t *dest, int32_t capacity) const { |
| 1320 return getSortKey(s.getBuffer(), s.length(), dest, capacity); |
| 1321 } |
| 1322 |
| 1323 int32_t |
| 1324 RuleBasedCollator::getSortKey(const UChar *s, int32_t length, |
| 1325 uint8_t *dest, int32_t capacity) const { |
| 1326 if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity >
0)) { |
| 1327 return 0; |
| 1328 } |
| 1329 uint8_t noDest[1] = { 0 }; |
| 1330 if(dest == NULL) { |
| 1331 // Distinguish pure preflighting from an allocation error. |
| 1332 dest = noDest; |
| 1333 capacity = 0; |
| 1334 } |
| 1335 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity); |
| 1336 UErrorCode errorCode = U_ZERO_ERROR; |
| 1337 writeSortKey(s, length, sink, errorCode); |
| 1338 return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0; |
| 1339 } |
| 1340 |
| 1341 void |
| 1342 RuleBasedCollator::writeSortKey(const UChar *s, int32_t length, |
| 1343 SortKeyByteSink &sink, UErrorCode &errorCode) co
nst { |
| 1344 if(U_FAILURE(errorCode)) { return; } |
| 1345 const UChar *limit = (length >= 0) ? s + length : NULL; |
| 1346 UBool numeric = settings->isNumeric(); |
| 1347 CollationKeys::LevelCallback callback; |
| 1348 if(settings->dontCheckFCD()) { |
| 1349 UTF16CollationIterator iter(data, numeric, s, s, limit); |
| 1350 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes,
*settings, |
| 1351 sink, Collation::PRIMARY_LEVEL
, |
| 1352 callback, TRUE, errorCode); |
| 1353 } else { |
| 1354 FCDUTF16CollationIterator iter(data, numeric, s, s, limit); |
| 1355 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes,
*settings, |
| 1356 sink, Collation::PRIMARY_LEVEL
, |
| 1357 callback, TRUE, errorCode); |
| 1358 } |
| 1359 if(settings->getStrength() == UCOL_IDENTICAL) { |
| 1360 writeIdenticalLevel(s, limit, sink, errorCode); |
| 1361 } |
| 1362 static const char terminator = 0; // TERMINATOR_BYTE |
| 1363 sink.Append(&terminator, 1); |
| 1364 } |
| 1365 |
| 1366 void |
| 1367 RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit, |
| 1368 SortKeyByteSink &sink, UErrorCode &errorC
ode) const { |
| 1369 // NFD quick check |
| 1370 const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCo
de); |
| 1371 if(U_FAILURE(errorCode)) { return; } |
| 1372 sink.Append(Collation::LEVEL_SEPARATOR_BYTE); |
| 1373 UChar32 prev = 0; |
| 1374 if(nfdQCYesLimit != s) { |
| 1375 prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), s
ink); |
| 1376 } |
| 1377 // Is there non-NFD text? |
| 1378 int32_t destLengthEstimate; |
| 1379 if(limit != NULL) { |
| 1380 if(nfdQCYesLimit == limit) { return; } |
| 1381 destLengthEstimate = (int32_t)(limit - nfdQCYesLimit); |
| 1382 } else { |
| 1383 // s is NUL-terminated |
| 1384 if(*nfdQCYesLimit == 0) { return; } |
| 1385 destLengthEstimate = -1; |
| 1386 } |
| 1387 UnicodeString nfd; |
| 1388 data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, error
Code); |
| 1389 u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink); |
| 1390 } |
| 1391 |
| 1392 namespace { |
| 1393 |
| 1394 /** |
| 1395 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary() |
| 1396 * with an instance of this callback class. |
| 1397 * When another level is about to be written, the callback |
| 1398 * records the level and the number of bytes that will be written until |
| 1399 * the sink (which is actually a FixedSortKeyByteSink) fills up. |
| 1400 * |
| 1401 * When internalNextSortKeyPart() is called again, it restarts with the last lev
el |
| 1402 * and ignores as many bytes as were written previously for that level. |
| 1403 */ |
| 1404 class PartLevelCallback : public CollationKeys::LevelCallback { |
| 1405 public: |
| 1406 PartLevelCallback(const SortKeyByteSink &s) |
| 1407 : sink(s), level(Collation::PRIMARY_LEVEL) { |
| 1408 levelCapacity = sink.GetRemainingCapacity(); |
| 1409 } |
| 1410 virtual ~PartLevelCallback() {} |
| 1411 virtual UBool needToWrite(Collation::Level l) { |
| 1412 if(!sink.Overflowed()) { |
| 1413 // Remember a level that will be at least partially written. |
| 1414 level = l; |
| 1415 levelCapacity = sink.GetRemainingCapacity(); |
| 1416 return TRUE; |
| 1417 } else { |
| 1418 return FALSE; |
| 1419 } |
| 1420 } |
| 1421 Collation::Level getLevel() const { return level; } |
| 1422 int32_t getLevelCapacity() const { return levelCapacity; } |
| 1423 |
| 1424 private: |
| 1425 const SortKeyByteSink &sink; |
| 1426 Collation::Level level; |
| 1427 int32_t levelCapacity; |
| 1428 }; |
| 1429 |
| 1430 } // namespace |
| 1431 |
| 1432 int32_t |
| 1433 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2
], |
| 1434 uint8_t *dest, int32_t count, UErrorC
ode &errorCode) const { |
| 1435 if(U_FAILURE(errorCode)) { return 0; } |
| 1436 if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)
) { |
| 1437 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 1438 return 0; |
| 1439 } |
| 1440 if(count == 0) { return 0; } |
| 1441 |
| 1442 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count); |
| 1443 sink.IgnoreBytes((int32_t)state[1]); |
| 1444 iter->move(iter, 0, UITER_START); |
| 1445 |
| 1446 Collation::Level level = (Collation::Level)state[0]; |
| 1447 if(level <= Collation::QUATERNARY_LEVEL) { |
| 1448 UBool numeric = settings->isNumeric(); |
| 1449 PartLevelCallback callback(sink); |
| 1450 if(settings->dontCheckFCD()) { |
| 1451 UIterCollationIterator ci(data, numeric, *iter); |
| 1452 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleByte
s, *settings, |
| 1453 sink, level, callback, FAL
SE, errorCode); |
| 1454 } else { |
| 1455 FCDUIterCollationIterator ci(data, numeric, *iter, 0); |
| 1456 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleByte
s, *settings, |
| 1457 sink, level, callback, FAL
SE, errorCode); |
| 1458 } |
| 1459 if(U_FAILURE(errorCode)) { return 0; } |
| 1460 if(sink.NumberOfBytesAppended() > count) { |
| 1461 state[0] = (uint32_t)callback.getLevel(); |
| 1462 state[1] = (uint32_t)callback.getLevelCapacity(); |
| 1463 return count; |
| 1464 } |
| 1465 // All of the normal levels are done. |
| 1466 if(settings->getStrength() == UCOL_IDENTICAL) { |
| 1467 level = Collation::IDENTICAL_LEVEL; |
| 1468 iter->move(iter, 0, UITER_START); |
| 1469 } |
| 1470 // else fall through to setting ZERO_LEVEL |
| 1471 } |
| 1472 |
| 1473 if(level == Collation::IDENTICAL_LEVEL) { |
| 1474 int32_t levelCapacity = sink.GetRemainingCapacity(); |
| 1475 UnicodeString s; |
| 1476 for(;;) { |
| 1477 UChar32 c = iter->next(iter); |
| 1478 if(c < 0) { break; } |
| 1479 s.append((UChar)c); |
| 1480 } |
| 1481 const UChar *sArray = s.getBuffer(); |
| 1482 writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode); |
| 1483 if(U_FAILURE(errorCode)) { return 0; } |
| 1484 if(sink.NumberOfBytesAppended() > count) { |
| 1485 state[0] = (uint32_t)level; |
| 1486 state[1] = (uint32_t)levelCapacity; |
| 1487 return count; |
| 1488 } |
| 1489 } |
| 1490 |
| 1491 // ZERO_LEVEL: Fill the remainder of dest with 00 bytes. |
| 1492 state[0] = (uint32_t)Collation::ZERO_LEVEL; |
| 1493 state[1] = 0; |
| 1494 int32_t length = sink.NumberOfBytesAppended(); |
| 1495 int32_t i = length; |
| 1496 while(i < count) { dest[i++] = 0; } |
| 1497 return length; |
| 1498 } |
| 1499 |
| 1500 void |
| 1501 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces, |
| 1502 UErrorCode &errorCode) const { |
| 1503 if(U_FAILURE(errorCode)) { return; } |
| 1504 const UChar *s = str.getBuffer(); |
| 1505 const UChar *limit = s + str.length(); |
| 1506 UBool numeric = settings->isNumeric(); |
| 1507 if(settings->dontCheckFCD()) { |
| 1508 UTF16CollationIterator iter(data, numeric, s, s, limit); |
| 1509 int64_t ce; |
| 1510 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) { |
| 1511 ces.addElement(ce, errorCode); |
| 1512 } |
| 1513 } else { |
| 1514 FCDUTF16CollationIterator iter(data, numeric, s, s, limit); |
| 1515 int64_t ce; |
| 1516 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) { |
| 1517 ces.addElement(ce, errorCode); |
| 1518 } |
| 1519 } |
| 1520 } |
| 1521 |
| 1522 namespace { |
| 1523 |
| 1524 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length
, |
| 1525 UErrorCode &errorCode) { |
| 1526 if(U_FAILURE(errorCode) || length == 0) { return; } |
| 1527 if(!s.isEmpty()) { |
| 1528 s.append('_', errorCode); |
| 1529 } |
| 1530 s.append(letter, errorCode); |
| 1531 for(int32_t i = 0; i < length; ++i) { |
| 1532 s.append(uprv_toupper(subtag[i]), errorCode); |
| 1533 } |
| 1534 } |
| 1535 |
| 1536 void appendAttribute(CharString &s, char letter, UColAttributeValue value, |
| 1537 UErrorCode &errorCode) { |
| 1538 if(U_FAILURE(errorCode)) { return; } |
| 1539 if(!s.isEmpty()) { |
| 1540 s.append('_', errorCode); |
| 1541 } |
| 1542 static const char *valueChars = "1234...........IXO..SN..LU......"; |
| 1543 s.append(letter, errorCode); |
| 1544 s.append(valueChars[value], errorCode); |
| 1545 } |
| 1546 |
| 1547 } // namespace |
| 1548 |
| 1549 int32_t |
| 1550 RuleBasedCollator::internalGetShortDefinitionString(const char *locale, |
| 1551 char *buffer, int32_t capaci
ty, |
| 1552 UErrorCode &errorCode) const
{ |
| 1553 if(U_FAILURE(errorCode)) { return 0; } |
| 1554 if(buffer == NULL ? capacity != 0 : capacity < 0) { |
| 1555 errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 1556 return 0; |
| 1557 } |
| 1558 if(locale == NULL) { |
| 1559 locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode); |
| 1560 } |
| 1561 |
| 1562 char resultLocale[ULOC_FULLNAME_CAPACITY + 1]; |
| 1563 int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CA
PACITY, |
| 1564 "collation", locale, |
| 1565 NULL, &errorCode); |
| 1566 if(U_FAILURE(errorCode)) { return 0; } |
| 1567 if(length == 0) { |
| 1568 uprv_strcpy(resultLocale, "root"); |
| 1569 } else { |
| 1570 resultLocale[length] = 0; |
| 1571 } |
| 1572 |
| 1573 // Append items in alphabetic order of their short definition letters. |
| 1574 CharString result; |
| 1575 char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
| 1576 |
| 1577 if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) { |
| 1578 appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, error
Code), errorCode); |
| 1579 } |
| 1580 // ATTR_VARIABLE_TOP not supported because 'B' was broken. |
| 1581 // See ICU tickets #10372 and #10386. |
| 1582 if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) { |
| 1583 appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), e
rrorCode); |
| 1584 } |
| 1585 if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) { |
| 1586 appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorC
ode), errorCode); |
| 1587 } |
| 1588 if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) { |
| 1589 appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), e
rrorCode); |
| 1590 } |
| 1591 if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) { |
| 1592 appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCo
de), errorCode); |
| 1593 } |
| 1594 // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away
from default. |
| 1595 length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTH
OF(subtag), &errorCode); |
| 1596 appendSubtag(result, 'K', subtag, length, errorCode); |
| 1597 length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &erro
rCode); |
| 1598 appendSubtag(result, 'L', subtag, length, errorCode); |
| 1599 if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) { |
| 1600 appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, error
Code), errorCode); |
| 1601 } |
| 1602 length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &error
Code); |
| 1603 appendSubtag(result, 'R', subtag, length, errorCode); |
| 1604 if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) { |
| 1605 appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), err
orCode); |
| 1606 } |
| 1607 length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &error
Code); |
| 1608 appendSubtag(result, 'V', subtag, length, errorCode); |
| 1609 length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorC
ode); |
| 1610 appendSubtag(result, 'Z', subtag, length, errorCode); |
| 1611 |
| 1612 if(U_FAILURE(errorCode)) { return 0; } |
| 1613 if(result.length() <= capacity) { |
| 1614 uprv_memcpy(buffer, result.data(), result.length()); |
| 1615 } |
| 1616 return u_terminateChars(buffer, capacity, result.length(), &errorCode); |
| 1617 } |
| 1618 |
| 1619 UBool |
| 1620 RuleBasedCollator::isUnsafe(UChar32 c) const { |
| 1621 return data->isUnsafeBackward(c, settings->isNumeric()); |
| 1622 } |
| 1623 |
| 1624 void |
| 1625 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode
&errorCode) { |
| 1626 t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, e
rrorCode); |
| 1627 } |
| 1628 |
| 1629 UBool |
| 1630 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const { |
| 1631 umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailor
ing, errorCode); |
| 1632 return U_SUCCESS(errorCode); |
| 1633 } |
| 1634 |
| 1635 CollationElementIterator * |
| 1636 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) c
onst { |
| 1637 UErrorCode errorCode = U_ZERO_ERROR; |
| 1638 if(!initMaxExpansions(errorCode)) { return NULL; } |
| 1639 CollationElementIterator *cei = new CollationElementIterator(source, this, e
rrorCode); |
| 1640 if(U_FAILURE(errorCode)) { |
| 1641 delete cei; |
| 1642 return NULL; |
| 1643 } |
| 1644 return cei; |
| 1645 } |
| 1646 |
| 1647 CollationElementIterator * |
| 1648 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& sourc
e) const { |
| 1649 UErrorCode errorCode = U_ZERO_ERROR; |
| 1650 if(!initMaxExpansions(errorCode)) { return NULL; } |
| 1651 CollationElementIterator *cei = new CollationElementIterator(source, this, e
rrorCode); |
| 1652 if(U_FAILURE(errorCode)) { |
| 1653 delete cei; |
| 1654 return NULL; |
| 1655 } |
| 1656 return cei; |
| 1657 } |
| 1658 |
| 1659 int32_t |
| 1660 RuleBasedCollator::getMaxExpansion(int32_t order) const { |
| 1661 UErrorCode errorCode = U_ZERO_ERROR; |
| 1662 (void)initMaxExpansions(errorCode); |
| 1663 return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, o
rder); |
| 1664 } |
| 1665 |
| 1666 U_NAMESPACE_END |
| 1667 |
| 1668 #endif // !UCONFIG_NO_COLLATION |
OLD | NEW |