Index: icu46/source/i18n/sortkey.cpp |
=================================================================== |
--- icu46/source/i18n/sortkey.cpp (revision 0) |
+++ icu46/source/i18n/sortkey.cpp (revision 0) |
@@ -0,0 +1,403 @@ |
+/* |
+******************************************************************************* |
+* Copyright (C) 1996-2006, International Business Machines Corporation and * |
+* others. All Rights Reserved. * |
+******************************************************************************* |
+*/ |
+//=============================================================================== |
+// |
+// File sortkey.cpp |
+// |
+// |
+// |
+// Created by: Helena Shih |
+// |
+// Modification History: |
+// |
+// Date Name Description |
+// |
+// 6/20/97 helena Java class name change. |
+// 6/23/97 helena Added comments to make code more readable. |
+// 6/26/98 erm Canged to use byte arrays instead of UnicodeString |
+// 7/31/98 erm hashCode: minimum inc should be 2 not 1, |
+// Cleaned up operator= |
+// 07/12/99 helena HPUX 11 CC port. |
+// 03/06/01 synwee Modified compareTo, to handle the result of |
+// 2 string similar in contents, but one is longer |
+// than the other |
+//=============================================================================== |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "unicode/sortkey.h" |
+#include "cmemory.h" |
+#include "uhash.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+// A hash code of kInvalidHashCode indicates that the has code needs |
+// to be computed. A hash code of kEmptyHashCode is used for empty keys |
+// and for any key whose computed hash code is kInvalidHashCode. |
+#define kInvalidHashCode ((int32_t)0) |
+#define kEmptyHashCode ((int32_t)1) |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey) |
+ |
+CollationKey::CollationKey() |
+ : UObject(), fBogus(FALSE), fCount(0), fCapacity(0), |
+ fHashCode(kEmptyHashCode), fBytes(NULL) |
+{ |
+} |
+ |
+// Create a collation key from a bit array. |
+CollationKey::CollationKey(const uint8_t* newValues, int32_t count) |
+ : UObject(), fBogus(FALSE), fCount(count), fCapacity(count), |
+ fHashCode(kInvalidHashCode) |
+{ |
+ fBytes = (uint8_t *)uprv_malloc(count); |
+ |
+ if (fBytes == NULL) |
+ { |
+ setToBogus(); |
+ return; |
+ } |
+ |
+ uprv_memcpy(fBytes, newValues, fCount); |
+} |
+ |
+CollationKey::CollationKey(const CollationKey& other) |
+: UObject(other), fBogus(FALSE), fCount(other.fCount), fCapacity(other.fCapacity), |
+ fHashCode(other.fHashCode), fBytes(NULL) |
+{ |
+ if (other.fBogus) |
+ { |
+ setToBogus(); |
+ return; |
+ } |
+ |
+ fBytes = (uint8_t *)uprv_malloc(fCapacity); |
+ |
+ if (fBytes == NULL) |
+ { |
+ setToBogus(); |
+ return; |
+ } |
+ |
+ uprv_memcpy(fBytes, other.fBytes, other.fCount); |
+ if(fCapacity>fCount) { |
+ uprv_memset(fBytes+fCount, 0, fCapacity-fCount); |
+ } |
+} |
+ |
+CollationKey::~CollationKey() |
+{ |
+ uprv_free(fBytes); |
+} |
+ |
+void CollationKey::adopt(uint8_t *values, int32_t count) { |
+ if(fBytes != NULL) { |
+ uprv_free(fBytes); |
+ } |
+ fBogus = FALSE; |
+ fBytes = values; |
+ fCount = count; |
+ fCapacity = count; |
+ fHashCode = kInvalidHashCode; |
+} |
+ |
+// set the key to an empty state |
+CollationKey& |
+CollationKey::reset() |
+{ |
+ fCount = 0; |
+ fBogus = FALSE; |
+ fHashCode = kEmptyHashCode; |
+ |
+ return *this; |
+} |
+ |
+// set the key to a "bogus" or invalid state |
+CollationKey& |
+CollationKey::setToBogus() |
+{ |
+ uprv_free(fBytes); |
+ fBytes = NULL; |
+ |
+ fCapacity = 0; |
+ fCount = 0; |
+ fHashCode = kInvalidHashCode; |
+ |
+ return *this; |
+} |
+ |
+UBool |
+CollationKey::operator==(const CollationKey& source) const |
+{ |
+ return (this->fCount == source.fCount && |
+ (this->fBytes == source.fBytes || |
+ uprv_memcmp(this->fBytes, source.fBytes, this->fCount) == 0)); |
+} |
+ |
+const CollationKey& |
+CollationKey::operator=(const CollationKey& other) |
+{ |
+ if (this != &other) |
+ { |
+ if (other.isBogus()) |
+ { |
+ return setToBogus(); |
+ } |
+ |
+ if (other.fBytes != NULL) |
+ { |
+ ensureCapacity(other.fCount); |
+ |
+ if (isBogus()) |
+ { |
+ return *this; |
+ } |
+ |
+ fHashCode = other.fHashCode; |
+ uprv_memcpy(fBytes, other.fBytes, fCount); |
+ } |
+ else |
+ { |
+ fCount = 0; |
+ fBogus = FALSE; |
+ fHashCode = kEmptyHashCode; |
+ } |
+ } |
+ |
+ return *this; |
+} |
+ |
+// Bitwise comparison for the collation keys. |
+// NOTE: this is somewhat messy 'cause we can't count |
+// on memcmp returning the exact values which match |
+// Collator::EComparisonResult |
+Collator::EComparisonResult |
+CollationKey::compareTo(const CollationKey& target) const |
+{ |
+ uint8_t *src = this->fBytes; |
+ uint8_t *tgt = target.fBytes; |
+ |
+ // are we comparing the same string |
+ if (src == tgt) |
+ return Collator::EQUAL; |
+ |
+ /* |
+ int count = (this->fCount < target.fCount) ? this->fCount : target.fCount; |
+ if (count == 0) |
+ { |
+ // If count is 0, at least one of the keys is empty. |
+ // An empty key is always LESS than a non-empty one |
+ // and EQUAL to another empty |
+ if (this->fCount < target.fCount) |
+ { |
+ return Collator::LESS; |
+ } |
+ |
+ if (this->fCount > target.fCount) |
+ { |
+ return Collator::GREATER; |
+ } |
+ return Collator::EQUAL; |
+ } |
+ */ |
+ |
+ int minLength; |
+ Collator::EComparisonResult result; |
+ |
+ // are we comparing different lengths? |
+ if (this->fCount != target.fCount) { |
+ if (this->fCount < target.fCount) { |
+ minLength = this->fCount; |
+ result = Collator::LESS; |
+ } |
+ else { |
+ minLength = target.fCount; |
+ result = Collator::GREATER; |
+ } |
+ } |
+ else { |
+ minLength = target.fCount; |
+ result = Collator::EQUAL; |
+ } |
+ |
+ if (minLength > 0) { |
+ int diff = uprv_memcmp(src, tgt, minLength); |
+ if (diff > 0) { |
+ return Collator::GREATER; |
+ } |
+ else |
+ if (diff < 0) { |
+ return Collator::LESS; |
+ } |
+ } |
+ |
+ return result; |
+ /* |
+ if (result < 0) |
+ { |
+ return Collator::LESS; |
+ } |
+ |
+ if (result > 0) |
+ { |
+ return Collator::GREATER; |
+ } |
+ return Collator::EQUAL; |
+ */ |
+} |
+ |
+// Bitwise comparison for the collation keys. |
+UCollationResult |
+CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const |
+{ |
+ if(U_SUCCESS(status)) { |
+ uint8_t *src = this->fBytes; |
+ uint8_t *tgt = target.fBytes; |
+ |
+ // are we comparing the same string |
+ if (src == tgt) |
+ return UCOL_EQUAL; |
+ |
+ int minLength; |
+ UCollationResult result; |
+ |
+ // are we comparing different lengths? |
+ if (this->fCount != target.fCount) { |
+ if (this->fCount < target.fCount) { |
+ minLength = this->fCount; |
+ result = UCOL_LESS; |
+ } |
+ else { |
+ minLength = target.fCount; |
+ result = UCOL_GREATER; |
+ } |
+ } |
+ else { |
+ minLength = target.fCount; |
+ result = UCOL_EQUAL; |
+ } |
+ |
+ if (minLength > 0) { |
+ int diff = uprv_memcmp(src, tgt, minLength); |
+ if (diff > 0) { |
+ return UCOL_GREATER; |
+ } |
+ else |
+ if (diff < 0) { |
+ return UCOL_LESS; |
+ } |
+ } |
+ |
+ return result; |
+ } else { |
+ return UCOL_EQUAL; |
+ } |
+} |
+ |
+CollationKey& |
+CollationKey::ensureCapacity(int32_t newSize) |
+{ |
+ if (fCapacity < newSize) |
+ { |
+ uprv_free(fBytes); |
+ |
+ fBytes = (uint8_t *)uprv_malloc(newSize); |
+ |
+ if (fBytes == NULL) |
+ { |
+ return setToBogus(); |
+ } |
+ |
+ uprv_memset(fBytes, 0, fCapacity); |
+ fCapacity = newSize; |
+ } |
+ |
+ fBogus = FALSE; |
+ fCount = newSize; |
+ fHashCode = kInvalidHashCode; |
+ |
+ return *this; |
+} |
+ |
+#ifdef U_USE_COLLATION_KEY_DEPRECATES |
+// Create a copy of the byte array. |
+uint8_t* |
+CollationKey::toByteArray(int32_t& count) const |
+{ |
+ uint8_t *result = (uint8_t*) uprv_malloc( sizeof(uint8_t) * fCount ); |
+ |
+ if (result == NULL) |
+ { |
+ count = 0; |
+ } |
+ else |
+ { |
+ count = fCount; |
+ uprv_memcpy(result, fBytes, fCount); |
+ } |
+ |
+ return result; |
+} |
+#endif |
+ |
+int32_t |
+CollationKey::hashCode() const |
+{ |
+ // (Cribbed from UnicodeString) |
+ // We cache the hashCode; when it becomes invalid, due to any change to the |
+ // string, we note this by setting it to kInvalidHashCode. [LIU] |
+ |
+ // Note: This method is semantically const, but physically non-const. |
+ |
+ if (fHashCode == kInvalidHashCode) |
+ { |
+ UHashTok key; |
+ key.pointer = fBytes; |
+ ((CollationKey *)this)->fHashCode = uhash_hashChars(key); |
+#if 0 |
+ // We compute the hash by iterating sparsely over 64 (at most) characters |
+ // spaced evenly through the string. For each character, we multiply the |
+ // previous hash value by a prime number and add the new character in, |
+ // in the manner of a additive linear congruential random number generator, |
+ // thus producing a pseudorandom deterministic value which should be well |
+ // distributed over the output range. [LIU] |
+ const uint8_t *p = fBytes, *limit = fBytes + fCount; |
+ int32_t inc = (fCount >= 256) ? fCount/128 : 2; // inc = max(fSize/64, 1); |
+ int32_t hash = 0; |
+ |
+ while (p < limit) |
+ { |
+ hash = ( hash * 37 ) + ((p[0] << 8) + p[1]); |
+ p += inc; |
+ } |
+ |
+ // If we happened to get kInvalidHashCode, replace it with kEmptyHashCode |
+ if (hash == kInvalidHashCode) |
+ { |
+ hash = kEmptyHashCode; |
+ } |
+ |
+ ((CollationKey *)this)->fHashCode = hash; // cast away const |
+#endif |
+ } |
+ |
+ return fHashCode; |
+} |
+ |
+U_NAMESPACE_END |
+ |
+U_CAPI int32_t U_EXPORT2 |
+ucol_keyHashCode(const uint8_t *key, |
+ int32_t length) |
+{ |
+ U_NAMESPACE_QUALIFIER CollationKey newKey(key, length); |
+ return newKey.hashCode(); |
+} |
+ |
+#endif /* #if !UCONFIG_NO_COLLATION */ |
Property changes on: icu46/source/i18n/sortkey.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |