Index: icu46/source/i18n/colldata.cpp |
=================================================================== |
--- icu46/source/i18n/colldata.cpp (revision 0) |
+++ icu46/source/i18n/colldata.cpp (revision 0) |
@@ -0,0 +1,1100 @@ |
+/* |
+ ****************************************************************************** |
+ * Copyright (C) 1996-2009, International Business Machines * |
+ * Corporation and others. All Rights Reserved. * |
+ ****************************************************************************** |
+ */ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION |
+ |
+#include "unicode/unistr.h" |
+#include "unicode/putil.h" |
+#include "unicode/usearch.h" |
+ |
+#include "cmemory.h" |
+#include "unicode/coll.h" |
+#include "unicode/tblcoll.h" |
+#include "unicode/coleitr.h" |
+#include "unicode/ucoleitr.h" |
+ |
+#include "unicode/regex.h" // TODO: make conditional on regexp being built. |
+ |
+#include "unicode/uniset.h" |
+#include "unicode/uset.h" |
+#include "unicode/ustring.h" |
+#include "hash.h" |
+#include "uhash.h" |
+#include "ucln_in.h" |
+#include "ucol_imp.h" |
+#include "umutex.h" |
+ |
+#include "unicode/colldata.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0])) |
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type)) |
+#define DELETE_ARRAY(array) uprv_free((void *) (array)) |
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList) |
+ |
+#ifdef INSTRUMENT_CELIST |
+int32_t CEList::_active = 0; |
+int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
+#endif |
+ |
+CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status) |
+ : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0) |
+{ |
+ UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status); |
+ UCollationStrength strength = ucol_getStrength(coll); |
+ UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; |
+ uint32_t variableTop = ucol_getVariableTop(coll, &status); |
+ uint32_t strengthMask = 0; |
+ int32_t order; |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ // **** only set flag if string has Han(gul) **** |
+ ucol_forceHanImplicit(elems, &status); |
+ |
+ switch (strength) |
+ { |
+ default: |
+ strengthMask |= UCOL_TERTIARYORDERMASK; |
+ /* fall through */ |
+ |
+ case UCOL_SECONDARY: |
+ strengthMask |= UCOL_SECONDARYORDERMASK; |
+ /* fall through */ |
+ |
+ case UCOL_PRIMARY: |
+ strengthMask |= UCOL_PRIMARYORDERMASK; |
+ } |
+ |
+#ifdef INSTRUMENT_CELIST |
+ _active += 1; |
+ _histogram[0] += 1; |
+#endif |
+ |
+ ces = ceBuffer; |
+ |
+ while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) { |
+ UBool cont = isContinuation(order); |
+ |
+ order &= strengthMask; |
+ |
+ if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) { |
+ if (strength >= UCOL_QUATERNARY) { |
+ order &= UCOL_PRIMARYORDERMASK; |
+ } else { |
+ order = UCOL_IGNORABLE; |
+ } |
+ } |
+ |
+ if (order == UCOL_IGNORABLE) { |
+ continue; |
+ } |
+ |
+ if (cont) { |
+ order |= UCOL_CONTINUATION_MARKER; |
+ } |
+ |
+ add(order, status); |
+ } |
+ |
+ ucol_closeElements(elems); |
+} |
+ |
+CEList::~CEList() |
+{ |
+#ifdef INSTRUMENT_CELIST |
+ _active -= 1; |
+#endif |
+ |
+ if (ces != ceBuffer) { |
+ DELETE_ARRAY(ces); |
+ } |
+} |
+ |
+void CEList::add(uint32_t ce, UErrorCode &status) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ if (listSize >= listMax) { |
+ int32_t newMax = listMax + CELIST_BUFFER_SIZE; |
+ |
+#ifdef INSTRUMENT_CELIST |
+ _histogram[listSize / CELIST_BUFFER_SIZE] += 1; |
+#endif |
+ |
+ uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax); |
+ |
+ if (newCEs == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t)); |
+ |
+ if (ces != ceBuffer) { |
+ DELETE_ARRAY(ces); |
+ } |
+ |
+ ces = newCEs; |
+ listMax = newMax; |
+ } |
+ |
+ ces[listSize++] = ce; |
+} |
+ |
+uint32_t CEList::get(int32_t index) const |
+{ |
+ if (index >= 0 && index < listSize) { |
+ return ces[index]; |
+ } |
+ |
+ return UCOL_NULLORDER; |
+} |
+ |
+uint32_t &CEList::operator[](int32_t index) const |
+{ |
+ return ces[index]; |
+} |
+ |
+UBool CEList::matchesAt(int32_t offset, const CEList *other) const |
+{ |
+ if (other == NULL || listSize - offset < other->size()) { |
+ return FALSE; |
+ } |
+ |
+ for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) { |
+ if (ces[i] != (*other)[j]) { |
+ return FALSE; |
+ } |
+ } |
+ |
+ return TRUE; |
+} |
+ |
+int32_t CEList::size() const |
+{ |
+ return listSize; |
+} |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList) |
+ |
+#ifdef INSTRUMENT_STRING_LIST |
+int32_t StringList::_lists = 0; |
+int32_t StringList::_strings = 0; |
+int32_t StringList::_histogram[101] = {0}; |
+#endif |
+ |
+StringList::StringList(UErrorCode &status) |
+ : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ strings = new UnicodeString [listMax]; |
+ |
+ if (strings == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+#ifdef INSTRUMENT_STRING_LIST |
+ _lists += 1; |
+ _histogram[0] += 1; |
+#endif |
+} |
+ |
+StringList::~StringList() |
+{ |
+ delete[] strings; |
+} |
+ |
+void StringList::add(const UnicodeString *string, UErrorCode &status) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+#ifdef INSTRUMENT_STRING_LIST |
+ _strings += 1; |
+#endif |
+ |
+ if (listSize >= listMax) { |
+ int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE; |
+ |
+ UnicodeString *newStrings = new UnicodeString[newMax]; |
+ |
+ uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString)); |
+ |
+#ifdef INSTRUMENT_STRING_LIST |
+ int32_t _h = listSize / STRING_LIST_BUFFER_SIZE; |
+ |
+ if (_h > 100) { |
+ _h = 100; |
+ } |
+ |
+ _histogram[_h] += 1; |
+#endif |
+ |
+ delete[] strings; |
+ strings = newStrings; |
+ listMax = newMax; |
+ } |
+ |
+ // The ctor initialized all the strings in |
+ // the array to empty strings, so this |
+ // is the same as copying the source string. |
+ strings[listSize++].append(*string); |
+} |
+ |
+void StringList::add(const UChar *chars, int32_t count, UErrorCode &status) |
+{ |
+ const UnicodeString string(chars, count); |
+ |
+ add(&string, status); |
+} |
+ |
+const UnicodeString *StringList::get(int32_t index) const |
+{ |
+ if (index >= 0 && index < listSize) { |
+ return &strings[index]; |
+ } |
+ |
+ return NULL; |
+} |
+ |
+int32_t StringList::size() const |
+{ |
+ return listSize; |
+} |
+ |
+ |
+U_CFUNC void deleteStringList(void *obj); |
+ |
+class CEToStringsMap : public UMemory |
+{ |
+public: |
+ |
+ CEToStringsMap(UErrorCode &status); |
+ ~CEToStringsMap(); |
+ |
+ void put(uint32_t ce, UnicodeString *string, UErrorCode &status); |
+ StringList *getStringList(uint32_t ce) const; |
+ |
+private: |
+ |
+ void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status); |
+ UHashtable *map; |
+}; |
+ |
+CEToStringsMap::CEToStringsMap(UErrorCode &status) |
+ : map(NULL) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ map = uhash_open(uhash_hashLong, uhash_compareLong, |
+ uhash_compareCaselessUnicodeString, |
+ &status); |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ uhash_setValueDeleter(map, deleteStringList); |
+} |
+ |
+CEToStringsMap::~CEToStringsMap() |
+{ |
+ uhash_close(map); |
+} |
+ |
+void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status) |
+{ |
+ StringList *strings = getStringList(ce); |
+ |
+ if (strings == NULL) { |
+ strings = new StringList(status); |
+ |
+ if (strings == NULL || U_FAILURE(status)) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ |
+ putStringList(ce, strings, status); |
+ } |
+ |
+ strings->add(string, status); |
+} |
+ |
+StringList *CEToStringsMap::getStringList(uint32_t ce) const |
+{ |
+ return (StringList *) uhash_iget(map, ce); |
+} |
+ |
+void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status) |
+{ |
+ uhash_iput(map, ce, (void *) stringList, &status); |
+} |
+ |
+U_CFUNC void deleteStringList(void *obj) |
+{ |
+ StringList *strings = (StringList *) obj; |
+ |
+ delete strings; |
+} |
+ |
+U_CFUNC void deleteCEList(void *obj); |
+U_CFUNC void deleteUnicodeStringKey(void *obj); |
+ |
+class StringToCEsMap : public UMemory |
+{ |
+public: |
+ StringToCEsMap(UErrorCode &status); |
+ ~StringToCEsMap(); |
+ |
+ void put(const UnicodeString *string, const CEList *ces, UErrorCode &status); |
+ const CEList *get(const UnicodeString *string); |
+ void free(const CEList *list); |
+ |
+private: |
+ |
+ |
+ UHashtable *map; |
+}; |
+ |
+StringToCEsMap::StringToCEsMap(UErrorCode &status) |
+ : map(NULL) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ map = uhash_open(uhash_hashUnicodeString, |
+ uhash_compareUnicodeString, |
+ uhash_compareLong, |
+ &status); |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ uhash_setValueDeleter(map, deleteCEList); |
+ uhash_setKeyDeleter(map, deleteUnicodeStringKey); |
+} |
+ |
+StringToCEsMap::~StringToCEsMap() |
+{ |
+ uhash_close(map); |
+} |
+ |
+void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status) |
+{ |
+ uhash_put(map, (void *) string, (void *) ces, &status); |
+} |
+ |
+const CEList *StringToCEsMap::get(const UnicodeString *string) |
+{ |
+ return (const CEList *) uhash_get(map, string); |
+} |
+ |
+U_CFUNC void deleteCEList(void *obj) |
+{ |
+ CEList *list = (CEList *) obj; |
+ |
+ delete list; |
+} |
+ |
+U_CFUNC void deleteUnicodeStringKey(void *obj) |
+{ |
+ UnicodeString *key = (UnicodeString *) obj; |
+ |
+ delete key; |
+} |
+ |
+class CollDataCacheEntry : public UMemory |
+{ |
+public: |
+ CollDataCacheEntry(CollData *theData); |
+ ~CollDataCacheEntry(); |
+ |
+ CollData *data; |
+ int32_t refCount; |
+}; |
+ |
+CollDataCacheEntry::CollDataCacheEntry(CollData *theData) |
+ : data(theData), refCount(1) |
+{ |
+ // nothing else to do |
+} |
+ |
+CollDataCacheEntry::~CollDataCacheEntry() |
+{ |
+ // check refCount? |
+ delete data; |
+} |
+ |
+class CollDataCache : public UMemory |
+{ |
+public: |
+ CollDataCache(UErrorCode &status); |
+ ~CollDataCache(); |
+ |
+ CollData *get(UCollator *collator, UErrorCode &status); |
+ void unref(CollData *collData); |
+ |
+ void flush(); |
+ |
+private: |
+ static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength); |
+ static void deleteKey(char *key); |
+ |
+ UMTX lock; |
+ UHashtable *cache; |
+}; |
+ |
+U_CFUNC void deleteChars(void * /*obj*/) |
+{ |
+ // char *chars = (char *) obj; |
+ // All the key strings are owned by the |
+ // CollData objects and don't need to |
+ // be freed here. |
+ //DELETE_ARRAY(chars); |
+} |
+ |
+U_CFUNC void deleteCollDataCacheEntry(void *obj) |
+{ |
+ CollDataCacheEntry *entry = (CollDataCacheEntry *) obj; |
+ |
+ delete entry; |
+} |
+ |
+CollDataCache::CollDataCache(UErrorCode &status) |
+ : lock(0), cache(NULL) |
+{ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status); |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ uhash_setValueDeleter(cache, deleteCollDataCacheEntry); |
+ uhash_setKeyDeleter(cache, deleteChars); |
+} |
+ |
+CollDataCache::~CollDataCache() |
+{ |
+ umtx_lock(&lock); |
+ uhash_close(cache); |
+ cache = NULL; |
+ umtx_unlock(&lock); |
+ |
+ umtx_destroy(&lock); |
+} |
+ |
+CollData *CollDataCache::get(UCollator *collator, UErrorCode &status) |
+{ |
+ char keyBuffer[KEY_BUFFER_SIZE]; |
+ int32_t keyLength = KEY_BUFFER_SIZE; |
+ char *key = getKey(collator, keyBuffer, &keyLength); |
+ CollData *result = NULL, *newData = NULL; |
+ CollDataCacheEntry *entry = NULL, *newEntry = NULL; |
+ |
+ umtx_lock(&lock); |
+ entry = (CollDataCacheEntry *) uhash_get(cache, key); |
+ |
+ if (entry == NULL) { |
+ umtx_unlock(&lock); |
+ |
+ newData = new CollData(collator, key, keyLength, status); |
+ newEntry = new CollDataCacheEntry(newData); |
+ |
+ if (U_FAILURE(status) || newData == NULL || newEntry == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return NULL; |
+ } |
+ |
+ umtx_lock(&lock); |
+ entry = (CollDataCacheEntry *) uhash_get(cache, key); |
+ |
+ if (entry == NULL) { |
+ uhash_put(cache, newData->key, newEntry, &status); |
+ umtx_unlock(&lock); |
+ |
+ if (U_FAILURE(status)) { |
+ delete newEntry; |
+ delete newData; |
+ |
+ return NULL; |
+ } |
+ |
+ return newData; |
+ } |
+ } |
+ |
+ result = entry->data; |
+ entry->refCount += 1; |
+ umtx_unlock(&lock); |
+ |
+ if (key != keyBuffer) { |
+ deleteKey(key); |
+ } |
+ |
+ if (newEntry != NULL) { |
+ delete newEntry; |
+ delete newData; |
+ } |
+ |
+ return result; |
+} |
+ |
+void CollDataCache::unref(CollData *collData) |
+{ |
+ CollDataCacheEntry *entry = NULL; |
+ |
+ umtx_lock(&lock); |
+ entry = (CollDataCacheEntry *) uhash_get(cache, collData->key); |
+ |
+ if (entry != NULL) { |
+ entry->refCount -= 1; |
+ } |
+ umtx_unlock(&lock); |
+} |
+ |
+char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status); |
+ |
+ if (len >= *keyBufferLength) { |
+ *keyBufferLength = (len + 2) & ~1; // round to even length, leaving room for terminating null |
+ keyBuffer = NEW_ARRAY(char, *keyBufferLength); |
+ status = U_ZERO_ERROR; |
+ |
+ len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status); |
+ } |
+ |
+ keyBuffer[len] = '\0'; |
+ |
+ return keyBuffer; |
+} |
+ |
+void CollDataCache::flush() |
+{ |
+ const UHashElement *element; |
+ int32_t pos = -1; |
+ |
+ umtx_lock(&lock); |
+ while ((element = uhash_nextElement(cache, &pos)) != NULL) { |
+ CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer; |
+ |
+ if (entry->refCount <= 0) { |
+ uhash_removeElement(cache, element); |
+ } |
+ } |
+ umtx_unlock(&lock); |
+} |
+ |
+void CollDataCache::deleteKey(char *key) |
+{ |
+ DELETE_ARRAY(key); |
+} |
+ |
+U_CDECL_BEGIN |
+static UBool coll_data_cleanup(void) { |
+ CollData::freeCollDataCache(); |
+ return TRUE; |
+} |
+U_CDECL_END |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData) |
+ |
+CollData::CollData() |
+{ |
+ // nothing |
+} |
+ |
+#define CLONE_COLLATOR |
+ |
+//#define CACHE_CELISTS |
+CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status) |
+ : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL) |
+{ |
+ // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]] |
+ // i.e. other, control, private use, format, surrogate |
+ U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20); |
+ U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20); |
+ USet *charsToTest = uset_openPattern(test_pattern, 20, &status); |
+ |
+ // Han ext. A, Han, Jamo, Hangul, Han Ext. B |
+ // i.e. all the characers we handle implicitly |
+ U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); |
+ U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70); |
+ USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status); |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ USet *expansions = uset_openEmpty(); |
+ USet *contractions = uset_openEmpty(); |
+ int32_t itemCount; |
+ |
+#ifdef CACHE_CELISTS |
+ charsToCEList = new StringToCEsMap(status); |
+ |
+ if (U_FAILURE(status)) { |
+ goto bail; |
+ } |
+#else |
+ charsToCEList = NULL; |
+#endif |
+ |
+ ceToCharsStartingWith = new CEToStringsMap(status); |
+ |
+ if (U_FAILURE(status)) { |
+ goto bail; |
+ } |
+ |
+ if (cacheKeyLength > KEY_BUFFER_SIZE) { |
+ key = NEW_ARRAY(char, cacheKeyLength); |
+ |
+ if (key == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ goto bail; |
+ } |
+ } else { |
+ key = keyBuffer; |
+ } |
+ |
+ ARRAY_COPY(key, cacheKey, cacheKeyLength); |
+ |
+#ifdef CLONE_COLLATOR |
+ coll = ucol_safeClone(collator, NULL, NULL, &status); |
+ |
+ if (U_FAILURE(status)) { |
+ goto bail; |
+ } |
+#else |
+ coll = collator; |
+#endif |
+ |
+ ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); |
+ |
+ uset_addAll(charsToTest, contractions); |
+ uset_addAll(charsToTest, expansions); |
+ uset_removeAll(charsToTest, charsToRemove); |
+ |
+ itemCount = uset_getItemCount(charsToTest); |
+ for(int32_t item = 0; item < itemCount; item += 1) { |
+ UChar32 start = 0, end = 0; |
+ UChar buffer[16]; |
+ int32_t len = uset_getItem(charsToTest, item, &start, &end, |
+ buffer, 16, &status); |
+ |
+ if (len == 0) { |
+ for (UChar32 ch = start; ch <= end; ch += 1) { |
+ UnicodeString *st = new UnicodeString(ch); |
+ |
+ if (st == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ break; |
+ } |
+ |
+ CEList *ceList = new CEList(coll, *st, status); |
+ |
+ ceToCharsStartingWith->put(ceList->get(0), st, status); |
+ |
+#ifdef CACHE_CELISTS |
+ charsToCEList->put(st, ceList, status); |
+#else |
+ delete ceList; |
+ delete st; |
+#endif |
+ } |
+ } else if (len > 0) { |
+ UnicodeString *st = new UnicodeString(buffer, len); |
+ |
+ if (st == NULL) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ break; |
+ } |
+ |
+ CEList *ceList = new CEList(coll, *st, status); |
+ |
+ ceToCharsStartingWith->put(ceList->get(0), st, status); |
+ |
+#ifdef CACHE_CELISTS |
+ charsToCEList->put(st, ceList, status); |
+#else |
+ delete ceList; |
+ delete st; |
+#endif |
+ } else { |
+ // shouldn't happen... |
+ } |
+ |
+ if (U_FAILURE(status)) { |
+ break; |
+ } |
+ } |
+ |
+bail: |
+ uset_close(contractions); |
+ uset_close(expansions); |
+ uset_close(charsToRemove); |
+ uset_close(charsToTest); |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A, |
+ UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B}; |
+ UChar jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_JAMO, UCOL_LAST_T_JAMO}; |
+ UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(hanRanges)); |
+ UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges)); |
+ CEList hanList(coll, hanString, status); |
+ CEList jamoList(coll, jamoString, status); |
+ int32_t j = 0; |
+ |
+ if (U_FAILURE(status)) { |
+ return; |
+ } |
+ |
+ for (int32_t c = 0; c < jamoList.size(); c += 1) { |
+ uint32_t jce = jamoList[c]; |
+ |
+ if (! isContinuation(jce)) { |
+ jamoLimits[j++] = jce; |
+ } |
+ } |
+ |
+ jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT); |
+ |
+ minHan = 0xFFFFFFFF; |
+ maxHan = 0; |
+ |
+ for(int32_t h = 0; h < hanList.size(); h += 2) { |
+ uint32_t han = (uint32_t) hanList[h]; |
+ |
+ if (han < minHan) { |
+ minHan = han; |
+ } |
+ |
+ if (han > maxHan) { |
+ maxHan = han; |
+ } |
+ } |
+ |
+ maxHan += (1 << UCOL_PRIMARYORDERSHIFT); |
+} |
+ |
+CollData::~CollData() |
+{ |
+#ifdef CLONE_COLLATOR |
+ ucol_close(coll); |
+#endif |
+ |
+ if (key != keyBuffer) { |
+ DELETE_ARRAY(key); |
+ } |
+ |
+ delete ceToCharsStartingWith; |
+ |
+#ifdef CACHE_CELISTS |
+ delete charsToCEList; |
+#endif |
+} |
+ |
+UCollator *CollData::getCollator() const |
+{ |
+ return coll; |
+} |
+ |
+const StringList *CollData::getStringList(int32_t ce) const |
+{ |
+ return ceToCharsStartingWith->getStringList(ce); |
+} |
+ |
+const CEList *CollData::getCEList(const UnicodeString *string) const |
+{ |
+#ifdef CACHE_CELISTS |
+ return charsToCEList->get(string); |
+#else |
+ UErrorCode status = U_ZERO_ERROR; |
+ const CEList *list = new CEList(coll, *string, status); |
+ |
+ if (U_FAILURE(status)) { |
+ delete list; |
+ list = NULL; |
+ } |
+ |
+ return list; |
+#endif |
+} |
+ |
+void CollData::freeCEList(const CEList *list) |
+{ |
+#ifndef CACHE_CELISTS |
+ delete list; |
+#endif |
+} |
+ |
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const |
+{ |
+ // find out shortest string for the longest sequence of ces. |
+ // this can probably be folded with the minLengthCache... |
+ |
+ if (history[offset] >= 0) { |
+ return history[offset]; |
+ } |
+ |
+ uint32_t ce = ceList->get(offset); |
+ int32_t maxOffset = ceList->size(); |
+ int32_t shortestLength = INT32_MAX; |
+ const StringList *strings = ceToCharsStartingWith->getStringList(ce); |
+ |
+ if (strings != NULL) { |
+ int32_t stringCount = strings->size(); |
+ |
+ for (int32_t s = 0; s < stringCount; s += 1) { |
+ const UnicodeString *string = strings->get(s); |
+#ifdef CACHE_CELISTS |
+ const CEList *ceList2 = charsToCEList->get(string); |
+#else |
+ UErrorCode status = U_ZERO_ERROR; |
+ const CEList *ceList2 = new CEList(coll, *string, status); |
+ |
+ if (U_FAILURE(status)) { |
+ delete ceList2; |
+ ceList2 = NULL; |
+ } |
+#endif |
+ |
+ if (ceList->matchesAt(offset, ceList2)) { |
+ int32_t clength = ceList2->size(); |
+ int32_t slength = string->length(); |
+ int32_t roffset = offset + clength; |
+ int32_t rlength = 0; |
+ |
+ if (roffset < maxOffset) { |
+ rlength = minLengthInChars(ceList, roffset, history); |
+ |
+ if (rlength <= 0) { |
+ // delete before continue to avoid memory leak. |
+#ifndef CACHE_CELISTS |
+ delete ceList2; |
+#endif |
+ // ignore any dead ends |
+ continue; |
+ } |
+ } |
+ |
+ if (shortestLength > slength + rlength) { |
+ shortestLength = slength + rlength; |
+ } |
+ } |
+ |
+#ifndef CACHE_CELISTS |
+ delete ceList2; |
+#endif |
+ } |
+ } |
+ |
+ if (shortestLength == INT32_MAX) { |
+ // No matching strings at this offset. See if |
+ // the CE is in a range we can handle manually. |
+ if (ce >= minHan && ce < maxHan) { |
+ // all han have implicit orders which |
+ // generate two CEs. |
+ int32_t roffset = offset + 2; |
+ int32_t rlength = 0; |
+ |
+ //history[roffset++] = -1; |
+ //history[roffset++] = 1; |
+ |
+ if (roffset < maxOffset) { |
+ rlength = minLengthInChars(ceList, roffset, history); |
+ } |
+ |
+ if (rlength < 0) { |
+ return -1; |
+ } |
+ |
+ shortestLength = 1 + rlength; |
+ goto have_shortest; |
+ } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) { |
+ int32_t roffset = offset; |
+ int32_t rlength = 0; |
+ |
+ // **** this loop may not handle archaic Hangul correctly **** |
+ for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) { |
+ uint32_t jce = ceList->get(roffset); |
+ |
+ // Some Jamo have 24-bit primary order; skip the |
+ // 2nd CE. This should always be OK because if |
+ // we're still in the loop all we've seen are |
+ // a series of Jamo in LVT order. |
+ if (isContinuation(jce)) { |
+ continue; |
+ } |
+ |
+ if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) { |
+ break; |
+ } |
+ } |
+ |
+ if (roffset == offset) { |
+ // we started with a non-L Jamo... |
+ // just say it comes from a single character |
+ roffset += 1; |
+ |
+ // See if the single Jamo has a 24-bit order. |
+ if (roffset < maxOffset && isContinuation(ceList->get(roffset))) { |
+ roffset += 1; |
+ } |
+ } |
+ |
+ if (roffset < maxOffset) { |
+ rlength = minLengthInChars(ceList, roffset, history); |
+ } |
+ |
+ if (rlength < 0) { |
+ return -1; |
+ } |
+ |
+ shortestLength = 1 + rlength; |
+ goto have_shortest; |
+ } |
+ |
+ // Can't handle it manually either. Just move on. |
+ return -1; |
+ } |
+ |
+have_shortest: |
+ history[offset] = shortestLength; |
+ |
+ return shortestLength; |
+} |
+ |
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const |
+{ |
+ int32_t clength = ceList->size(); |
+ int32_t *history = NEW_ARRAY(int32_t, clength); |
+ |
+ for (int32_t i = 0; i < clength; i += 1) { |
+ history[i] = -1; |
+ } |
+ |
+ int32_t minLength = minLengthInChars(ceList, offset, history); |
+ |
+ DELETE_ARRAY(history); |
+ |
+ return minLength; |
+} |
+ |
+CollData *CollData::open(UCollator *collator, UErrorCode &status) |
+{ |
+ if (U_FAILURE(status)) { |
+ return NULL; |
+ } |
+ |
+ CollDataCache *cache = getCollDataCache(); |
+ |
+ return cache->get(collator, status); |
+} |
+ |
+void CollData::close(CollData *collData) |
+{ |
+ CollDataCache *cache = getCollDataCache(); |
+ |
+ cache->unref(collData); |
+} |
+ |
+CollDataCache *CollData::collDataCache = NULL; |
+ |
+CollDataCache *CollData::getCollDataCache() |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ CollDataCache *cache = NULL; |
+ |
+ UMTX_CHECK(NULL, collDataCache, cache); |
+ |
+ if (cache == NULL) { |
+ cache = new CollDataCache(status); |
+ |
+ if (U_FAILURE(status)) { |
+ delete cache; |
+ return NULL; |
+ } |
+ |
+ umtx_lock(NULL); |
+ if (collDataCache == NULL) { |
+ collDataCache = cache; |
+ |
+ ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup); |
+ } |
+ umtx_unlock(NULL); |
+ |
+ if (collDataCache != cache) { |
+ delete cache; |
+ } |
+ } |
+ |
+ return collDataCache; |
+} |
+ |
+void CollData::freeCollDataCache() |
+{ |
+ CollDataCache *cache = NULL; |
+ |
+ UMTX_CHECK(NULL, collDataCache, cache); |
+ |
+ if (cache != NULL) { |
+ umtx_lock(NULL); |
+ if (collDataCache != NULL) { |
+ collDataCache = NULL; |
+ } else { |
+ cache = NULL; |
+ } |
+ umtx_unlock(NULL); |
+ |
+ delete cache; |
+ } |
+} |
+ |
+void CollData::flushCollDataCache() |
+{ |
+ CollDataCache *cache = NULL; |
+ |
+ UMTX_CHECK(NULL, collDataCache, cache); |
+ |
+ // **** this will fail if the another **** |
+ // **** thread deletes the cache here **** |
+ if (cache != NULL) { |
+ cache->flush(); |
+ } |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif // #if !UCONFIG_NO_COLLATION |
Property changes on: icu46/source/i18n/colldata.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |