OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 1996-2006, International Business Machines Corporation and * |
| 4 * others. All Rights Reserved. * |
| 5 ******************************************************************************* |
| 6 */ |
| 7 //==============================================================================
= |
| 8 // |
| 9 // File sortkey.cpp |
| 10 // |
| 11 // |
| 12 // |
| 13 // Created by: Helena Shih |
| 14 // |
| 15 // Modification History: |
| 16 // |
| 17 // Date Name Description |
| 18 // |
| 19 // 6/20/97 helena Java class name change. |
| 20 // 6/23/97 helena Added comments to make code more readable. |
| 21 // 6/26/98 erm Canged to use byte arrays instead of UnicodeStrin
g |
| 22 // 7/31/98 erm hashCode: minimum inc should be 2 not 1, |
| 23 // Cleaned up operator= |
| 24 // 07/12/99 helena HPUX 11 CC port. |
| 25 // 03/06/01 synwee Modified compareTo, to handle the result of |
| 26 // 2 string similar in contents, but one is longer |
| 27 // than the other |
| 28 //==============================================================================
= |
| 29 |
| 30 #include "unicode/utypes.h" |
| 31 |
| 32 #if !UCONFIG_NO_COLLATION |
| 33 |
| 34 #include "unicode/sortkey.h" |
| 35 #include "cmemory.h" |
| 36 #include "uhash.h" |
| 37 |
| 38 U_NAMESPACE_BEGIN |
| 39 |
| 40 // A hash code of kInvalidHashCode indicates that the has code needs |
| 41 // to be computed. A hash code of kEmptyHashCode is used for empty keys |
| 42 // and for any key whose computed hash code is kInvalidHashCode. |
| 43 #define kInvalidHashCode ((int32_t)0) |
| 44 #define kEmptyHashCode ((int32_t)1) |
| 45 |
| 46 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
| 47 |
| 48 CollationKey::CollationKey() |
| 49 : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), |
| 50 fHashCode(kEmptyHashCode), fBytes(NULL) |
| 51 { |
| 52 } |
| 53 |
| 54 // Create a collation key from a bit array. |
| 55 CollationKey::CollationKey(const uint8_t* newValues, int32_t count) |
| 56 : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), |
| 57 fHashCode(kInvalidHashCode) |
| 58 { |
| 59 fBytes = (uint8_t *)uprv_malloc(count); |
| 60 |
| 61 if (fBytes == NULL) |
| 62 { |
| 63 setToBogus(); |
| 64 return; |
| 65 } |
| 66 |
| 67 uprv_memcpy(fBytes, newValues, fCount); |
| 68 } |
| 69 |
| 70 CollationKey::CollationKey(const CollationKey& other) |
| 71 : UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity
), |
| 72 fHashCode(other.fHashCode), fBytes(NULL) |
| 73 { |
| 74 if (other.fBogus) |
| 75 { |
| 76 setToBogus(); |
| 77 return; |
| 78 } |
| 79 |
| 80 fBytes = (uint8_t *)uprv_malloc(fCapacity); |
| 81 |
| 82 if (fBytes == NULL) |
| 83 { |
| 84 setToBogus(); |
| 85 return; |
| 86 } |
| 87 |
| 88 uprv_memcpy(fBytes, other.fBytes, other.fCount); |
| 89 if(fCapacity>fCount) { |
| 90 uprv_memset(fBytes+fCount, 0, fCapacity-fCount); |
| 91 } |
| 92 } |
| 93 |
| 94 CollationKey::~CollationKey() |
| 95 { |
| 96 uprv_free(fBytes); |
| 97 } |
| 98 |
| 99 void CollationKey::adopt(uint8_t *values, int32_t count) { |
| 100 if(fBytes != NULL) { |
| 101 uprv_free(fBytes); |
| 102 } |
| 103 fBogus = FALSE; |
| 104 fBytes = values; |
| 105 fCount = count; |
| 106 fCapacity = count; |
| 107 fHashCode = kInvalidHashCode; |
| 108 } |
| 109 |
| 110 // set the key to an empty state |
| 111 CollationKey& |
| 112 CollationKey::reset() |
| 113 { |
| 114 fCount = 0; |
| 115 fBogus = FALSE; |
| 116 fHashCode = kEmptyHashCode; |
| 117 |
| 118 return *this; |
| 119 } |
| 120 |
| 121 // set the key to a "bogus" or invalid state |
| 122 CollationKey& |
| 123 CollationKey::setToBogus() |
| 124 { |
| 125 uprv_free(fBytes); |
| 126 fBytes = NULL; |
| 127 |
| 128 fCapacity = 0; |
| 129 fCount = 0; |
| 130 fHashCode = kInvalidHashCode; |
| 131 |
| 132 return *this; |
| 133 } |
| 134 |
| 135 UBool |
| 136 CollationKey::operator==(const CollationKey& source) const |
| 137 { |
| 138 return (this->fCount == source.fCount && |
| 139 (this->fBytes == source.fBytes || |
| 140 uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); |
| 141 } |
| 142 |
| 143 const CollationKey& |
| 144 CollationKey::operator=(const CollationKey& other) |
| 145 { |
| 146 if (this != &other) |
| 147 { |
| 148 if (other.isBogus()) |
| 149 { |
| 150 return setToBogus(); |
| 151 } |
| 152 |
| 153 if (other.fBytes != NULL) |
| 154 { |
| 155 ensureCapacity(other.fCount); |
| 156 |
| 157 if (isBogus()) |
| 158 { |
| 159 return *this; |
| 160 } |
| 161 |
| 162 fHashCode = other.fHashCode; |
| 163 uprv_memcpy(fBytes, other.fBytes, fCount); |
| 164 } |
| 165 else |
| 166 { |
| 167 fCount = 0; |
| 168 fBogus = FALSE; |
| 169 fHashCode = kEmptyHashCode; |
| 170 } |
| 171 } |
| 172 |
| 173 return *this; |
| 174 } |
| 175 |
| 176 // Bitwise comparison for the collation keys. |
| 177 // NOTE: this is somewhat messy 'cause we can't count |
| 178 // on memcmp returning the exact values which match |
| 179 // Collator::EComparisonResult |
| 180 Collator::EComparisonResult |
| 181 CollationKey::compareTo(const CollationKey& target) const |
| 182 { |
| 183 uint8_t *src = this->fBytes; |
| 184 uint8_t *tgt = target.fBytes; |
| 185 |
| 186 // are we comparing the same string |
| 187 if (src == tgt) |
| 188 return Collator::EQUAL; |
| 189 |
| 190 /* |
| 191 int count = (this->fCount < target.fCount) ? this->fCount : target.fCoun
t; |
| 192 if (count == 0) |
| 193 { |
| 194 // If count is 0, at least one of the keys is empty. |
| 195 // An empty key is always LESS than a non-empty one |
| 196 // and EQUAL to another empty |
| 197 if (this->fCount < target.fCount) |
| 198 { |
| 199 return Collator::LESS; |
| 200 } |
| 201 |
| 202 if (this->fCount > target.fCount) |
| 203 { |
| 204 return Collator::GREATER; |
| 205 } |
| 206 return Collator::EQUAL; |
| 207 } |
| 208 */ |
| 209 |
| 210 int minLength; |
| 211 Collator::EComparisonResult result; |
| 212 |
| 213 // are we comparing different lengths? |
| 214 if (this->fCount != target.fCount) { |
| 215 if (this->fCount < target.fCount) { |
| 216 minLength = this->fCount; |
| 217 result = Collator::LESS; |
| 218 } |
| 219 else { |
| 220 minLength = target.fCount; |
| 221 result = Collator::GREATER; |
| 222 } |
| 223 } |
| 224 else { |
| 225 minLength = target.fCount; |
| 226 result = Collator::EQUAL; |
| 227 } |
| 228 |
| 229 if (minLength > 0) { |
| 230 int diff = uprv_memcmp(src, tgt, minLength); |
| 231 if (diff > 0) { |
| 232 return Collator::GREATER; |
| 233 } |
| 234 else |
| 235 if (diff < 0) { |
| 236 return Collator::LESS; |
| 237 } |
| 238 } |
| 239 |
| 240 return result; |
| 241 /* |
| 242 if (result < 0) |
| 243 { |
| 244 return Collator::LESS; |
| 245 } |
| 246 |
| 247 if (result > 0) |
| 248 { |
| 249 return Collator::GREATER; |
| 250 } |
| 251 return Collator::EQUAL; |
| 252 */ |
| 253 } |
| 254 |
| 255 // Bitwise comparison for the collation keys. |
| 256 UCollationResult |
| 257 CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const |
| 258 { |
| 259 if(U_SUCCESS(status)) { |
| 260 uint8_t *src = this->fBytes; |
| 261 uint8_t *tgt = target.fBytes; |
| 262 |
| 263 // are we comparing the same string |
| 264 if (src == tgt) |
| 265 return UCOL_EQUAL; |
| 266 |
| 267 int minLength; |
| 268 UCollationResult result; |
| 269 |
| 270 // are we comparing different lengths? |
| 271 if (this->fCount != target.fCount) { |
| 272 if (this->fCount < target.fCount) { |
| 273 minLength = this->fCount; |
| 274 result = UCOL_LESS; |
| 275 } |
| 276 else { |
| 277 minLength = target.fCount; |
| 278 result = UCOL_GREATER; |
| 279 } |
| 280 } |
| 281 else { |
| 282 minLength = target.fCount; |
| 283 result = UCOL_EQUAL; |
| 284 } |
| 285 |
| 286 if (minLength > 0) { |
| 287 int diff = uprv_memcmp(src, tgt, minLength); |
| 288 if (diff > 0) { |
| 289 return UCOL_GREATER; |
| 290 } |
| 291 else |
| 292 if (diff < 0) { |
| 293 return UCOL_LESS; |
| 294 } |
| 295 } |
| 296 |
| 297 return result; |
| 298 } else { |
| 299 return UCOL_EQUAL; |
| 300 } |
| 301 } |
| 302 |
| 303 CollationKey& |
| 304 CollationKey::ensureCapacity(int32_t newSize) |
| 305 { |
| 306 if (fCapacity < newSize) |
| 307 { |
| 308 uprv_free(fBytes); |
| 309 |
| 310 fBytes = (uint8_t *)uprv_malloc(newSize); |
| 311 |
| 312 if (fBytes == NULL) |
| 313 { |
| 314 return setToBogus(); |
| 315 } |
| 316 |
| 317 uprv_memset(fBytes, 0, fCapacity); |
| 318 fCapacity = newSize; |
| 319 } |
| 320 |
| 321 fBogus = FALSE; |
| 322 fCount = newSize; |
| 323 fHashCode = kInvalidHashCode; |
| 324 |
| 325 return *this; |
| 326 } |
| 327 |
| 328 #ifdef U_USE_COLLATION_KEY_DEPRECATES |
| 329 // Create a copy of the byte array. |
| 330 uint8_t* |
| 331 CollationKey::toByteArray(int32_t& count) const |
| 332 { |
| 333 uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); |
| 334 |
| 335 if (result == NULL) |
| 336 { |
| 337 count = 0; |
| 338 } |
| 339 else |
| 340 { |
| 341 count = fCount; |
| 342 uprv_memcpy(result, fBytes, fCount); |
| 343 } |
| 344 |
| 345 return result; |
| 346 } |
| 347 #endif |
| 348 |
| 349 int32_t |
| 350 CollationKey::hashCode() const |
| 351 { |
| 352 // (Cribbed from UnicodeString) |
| 353 // We cache the hashCode; when it becomes invalid, due to any change to the |
| 354 // string, we note this by setting it to kInvalidHashCode. [LIU] |
| 355 |
| 356 // Note: This method is semantically const, but physically non-const. |
| 357 |
| 358 if (fHashCode == kInvalidHashCode) |
| 359 { |
| 360 UHashTok key; |
| 361 key.pointer = fBytes; |
| 362 ((CollationKey *)this)->fHashCode = uhash_hashChars(key); |
| 363 #if 0 |
| 364 // We compute the hash by iterating sparsely over 64 (at most) character
s |
| 365 // spaced evenly through the string. For each character, we multiply th
e |
| 366 // previous hash value by a prime number and add the new character in, |
| 367 // in the manner of a additive linear congruential random number generat
or, |
| 368 // thus producing a pseudorandom deterministic value which should be wel
l |
| 369 // distributed over the output range. [LIU] |
| 370 const uint8_t *p = fBytes, *limit = fBytes + fCount; |
| 371 int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSi
ze/64, 1); |
| 372 int32_t hash = 0; |
| 373 |
| 374 while (p < limit) |
| 375 { |
| 376 hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); |
| 377 p += inc; |
| 378 } |
| 379 |
| 380 // If we happened to get kInvalidHashCode, replace it with kEmptyHashCod
e |
| 381 if (hash == kInvalidHashCode) |
| 382 { |
| 383 hash = kEmptyHashCode; |
| 384 } |
| 385 |
| 386 ((CollationKey *)this)->fHashCode = hash; // cast away const |
| 387 #endif |
| 388 } |
| 389 |
| 390 return fHashCode; |
| 391 } |
| 392 |
| 393 U_NAMESPACE_END |
| 394 |
| 395 U_CAPI int32_t U_EXPORT2 |
| 396 ucol_keyHashCode(const uint8_t *key, |
| 397 int32_t length) |
| 398 { |
| 399 U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); |
| 400 return newKey.hashCode(); |
| 401 } |
| 402 |
| 403 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |