Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(370)

Unified Diff: icu46/source/i18n/colldata.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « icu46/source/i18n/coll.cpp ('k') | icu46/source/i18n/coptccal.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: icu46/source/i18n/colldata.cpp
===================================================================
--- icu46/source/i18n/colldata.cpp (revision 0)
+++ icu46/source/i18n/colldata.cpp (revision 0)
@@ -0,0 +1,1100 @@
+/*
+ ******************************************************************************
+ * Copyright (C) 1996-2009, International Business Machines *
+ * Corporation and others. All Rights Reserved. *
+ ******************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/unistr.h"
+#include "unicode/putil.h"
+#include "unicode/usearch.h"
+
+#include "cmemory.h"
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+
+#include "unicode/regex.h" // TODO: make conditional on regexp being built.
+
+#include "unicode/uniset.h"
+#include "unicode/uset.h"
+#include "unicode/ustring.h"
+#include "hash.h"
+#include "uhash.h"
+#include "ucln_in.h"
+#include "ucol_imp.h"
+#include "umutex.h"
+
+#include "unicode/colldata.h"
+
+U_NAMESPACE_BEGIN
+
+#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+#define DELETE_ARRAY(array) uprv_free((void *) (array))
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CEList)
+
+#ifdef INSTRUMENT_CELIST
+int32_t CEList::_active = 0;
+int32_t CEList::_histogram[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+#endif
+
+CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
+ : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
+{
+ UCollationElements *elems = ucol_openElements(coll, string.getBuffer(), string.length(), &status);
+ UCollationStrength strength = ucol_getStrength(coll);
+ UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
+ uint32_t variableTop = ucol_getVariableTop(coll, &status);
+ uint32_t strengthMask = 0;
+ int32_t order;
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ // **** only set flag if string has Han(gul) ****
+ ucol_forceHanImplicit(elems, &status);
+
+ switch (strength)
+ {
+ default:
+ strengthMask |= UCOL_TERTIARYORDERMASK;
+ /* fall through */
+
+ case UCOL_SECONDARY:
+ strengthMask |= UCOL_SECONDARYORDERMASK;
+ /* fall through */
+
+ case UCOL_PRIMARY:
+ strengthMask |= UCOL_PRIMARYORDERMASK;
+ }
+
+#ifdef INSTRUMENT_CELIST
+ _active += 1;
+ _histogram[0] += 1;
+#endif
+
+ ces = ceBuffer;
+
+ while ((order = ucol_next(elems, &status)) != UCOL_NULLORDER) {
+ UBool cont = isContinuation(order);
+
+ order &= strengthMask;
+
+ if (toShift && variableTop > (uint32_t)order && (order & UCOL_PRIMARYORDERMASK) != 0) {
+ if (strength >= UCOL_QUATERNARY) {
+ order &= UCOL_PRIMARYORDERMASK;
+ } else {
+ order = UCOL_IGNORABLE;
+ }
+ }
+
+ if (order == UCOL_IGNORABLE) {
+ continue;
+ }
+
+ if (cont) {
+ order |= UCOL_CONTINUATION_MARKER;
+ }
+
+ add(order, status);
+ }
+
+ ucol_closeElements(elems);
+}
+
+CEList::~CEList()
+{
+#ifdef INSTRUMENT_CELIST
+ _active -= 1;
+#endif
+
+ if (ces != ceBuffer) {
+ DELETE_ARRAY(ces);
+ }
+}
+
+void CEList::add(uint32_t ce, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ if (listSize >= listMax) {
+ int32_t newMax = listMax + CELIST_BUFFER_SIZE;
+
+#ifdef INSTRUMENT_CELIST
+ _histogram[listSize / CELIST_BUFFER_SIZE] += 1;
+#endif
+
+ uint32_t *newCEs = NEW_ARRAY(uint32_t, newMax);
+
+ if (newCEs == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ uprv_memcpy(newCEs, ces, listSize * sizeof(uint32_t));
+
+ if (ces != ceBuffer) {
+ DELETE_ARRAY(ces);
+ }
+
+ ces = newCEs;
+ listMax = newMax;
+ }
+
+ ces[listSize++] = ce;
+}
+
+uint32_t CEList::get(int32_t index) const
+{
+ if (index >= 0 && index < listSize) {
+ return ces[index];
+ }
+
+ return UCOL_NULLORDER;
+}
+
+uint32_t &CEList::operator[](int32_t index) const
+{
+ return ces[index];
+}
+
+UBool CEList::matchesAt(int32_t offset, const CEList *other) const
+{
+ if (other == NULL || listSize - offset < other->size()) {
+ return FALSE;
+ }
+
+ for (int32_t i = offset, j = 0; j < other->size(); i += 1, j += 1) {
+ if (ces[i] != (*other)[j]) {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+int32_t CEList::size() const
+{
+ return listSize;
+}
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringList)
+
+#ifdef INSTRUMENT_STRING_LIST
+int32_t StringList::_lists = 0;
+int32_t StringList::_strings = 0;
+int32_t StringList::_histogram[101] = {0};
+#endif
+
+StringList::StringList(UErrorCode &status)
+ : strings(NULL), listMax(STRING_LIST_BUFFER_SIZE), listSize(0)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ strings = new UnicodeString [listMax];
+
+ if (strings == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+#ifdef INSTRUMENT_STRING_LIST
+ _lists += 1;
+ _histogram[0] += 1;
+#endif
+}
+
+StringList::~StringList()
+{
+ delete[] strings;
+}
+
+void StringList::add(const UnicodeString *string, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+#ifdef INSTRUMENT_STRING_LIST
+ _strings += 1;
+#endif
+
+ if (listSize >= listMax) {
+ int32_t newMax = listMax + STRING_LIST_BUFFER_SIZE;
+
+ UnicodeString *newStrings = new UnicodeString[newMax];
+
+ uprv_memcpy(newStrings, strings, listSize * sizeof(UnicodeString));
+
+#ifdef INSTRUMENT_STRING_LIST
+ int32_t _h = listSize / STRING_LIST_BUFFER_SIZE;
+
+ if (_h > 100) {
+ _h = 100;
+ }
+
+ _histogram[_h] += 1;
+#endif
+
+ delete[] strings;
+ strings = newStrings;
+ listMax = newMax;
+ }
+
+ // The ctor initialized all the strings in
+ // the array to empty strings, so this
+ // is the same as copying the source string.
+ strings[listSize++].append(*string);
+}
+
+void StringList::add(const UChar *chars, int32_t count, UErrorCode &status)
+{
+ const UnicodeString string(chars, count);
+
+ add(&string, status);
+}
+
+const UnicodeString *StringList::get(int32_t index) const
+{
+ if (index >= 0 && index < listSize) {
+ return &strings[index];
+ }
+
+ return NULL;
+}
+
+int32_t StringList::size() const
+{
+ return listSize;
+}
+
+
+U_CFUNC void deleteStringList(void *obj);
+
+class CEToStringsMap : public UMemory
+{
+public:
+
+ CEToStringsMap(UErrorCode &status);
+ ~CEToStringsMap();
+
+ void put(uint32_t ce, UnicodeString *string, UErrorCode &status);
+ StringList *getStringList(uint32_t ce) const;
+
+private:
+
+ void putStringList(uint32_t ce, StringList *stringList, UErrorCode &status);
+ UHashtable *map;
+};
+
+CEToStringsMap::CEToStringsMap(UErrorCode &status)
+ : map(NULL)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ map = uhash_open(uhash_hashLong, uhash_compareLong,
+ uhash_compareCaselessUnicodeString,
+ &status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ uhash_setValueDeleter(map, deleteStringList);
+}
+
+CEToStringsMap::~CEToStringsMap()
+{
+ uhash_close(map);
+}
+
+void CEToStringsMap::put(uint32_t ce, UnicodeString *string, UErrorCode &status)
+{
+ StringList *strings = getStringList(ce);
+
+ if (strings == NULL) {
+ strings = new StringList(status);
+
+ if (strings == NULL || U_FAILURE(status)) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ putStringList(ce, strings, status);
+ }
+
+ strings->add(string, status);
+}
+
+StringList *CEToStringsMap::getStringList(uint32_t ce) const
+{
+ return (StringList *) uhash_iget(map, ce);
+}
+
+void CEToStringsMap::putStringList(uint32_t ce, StringList *stringList, UErrorCode &status)
+{
+ uhash_iput(map, ce, (void *) stringList, &status);
+}
+
+U_CFUNC void deleteStringList(void *obj)
+{
+ StringList *strings = (StringList *) obj;
+
+ delete strings;
+}
+
+U_CFUNC void deleteCEList(void *obj);
+U_CFUNC void deleteUnicodeStringKey(void *obj);
+
+class StringToCEsMap : public UMemory
+{
+public:
+ StringToCEsMap(UErrorCode &status);
+ ~StringToCEsMap();
+
+ void put(const UnicodeString *string, const CEList *ces, UErrorCode &status);
+ const CEList *get(const UnicodeString *string);
+ void free(const CEList *list);
+
+private:
+
+
+ UHashtable *map;
+};
+
+StringToCEsMap::StringToCEsMap(UErrorCode &status)
+ : map(NULL)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ map = uhash_open(uhash_hashUnicodeString,
+ uhash_compareUnicodeString,
+ uhash_compareLong,
+ &status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ uhash_setValueDeleter(map, deleteCEList);
+ uhash_setKeyDeleter(map, deleteUnicodeStringKey);
+}
+
+StringToCEsMap::~StringToCEsMap()
+{
+ uhash_close(map);
+}
+
+void StringToCEsMap::put(const UnicodeString *string, const CEList *ces, UErrorCode &status)
+{
+ uhash_put(map, (void *) string, (void *) ces, &status);
+}
+
+const CEList *StringToCEsMap::get(const UnicodeString *string)
+{
+ return (const CEList *) uhash_get(map, string);
+}
+
+U_CFUNC void deleteCEList(void *obj)
+{
+ CEList *list = (CEList *) obj;
+
+ delete list;
+}
+
+U_CFUNC void deleteUnicodeStringKey(void *obj)
+{
+ UnicodeString *key = (UnicodeString *) obj;
+
+ delete key;
+}
+
+class CollDataCacheEntry : public UMemory
+{
+public:
+ CollDataCacheEntry(CollData *theData);
+ ~CollDataCacheEntry();
+
+ CollData *data;
+ int32_t refCount;
+};
+
+CollDataCacheEntry::CollDataCacheEntry(CollData *theData)
+ : data(theData), refCount(1)
+{
+ // nothing else to do
+}
+
+CollDataCacheEntry::~CollDataCacheEntry()
+{
+ // check refCount?
+ delete data;
+}
+
+class CollDataCache : public UMemory
+{
+public:
+ CollDataCache(UErrorCode &status);
+ ~CollDataCache();
+
+ CollData *get(UCollator *collator, UErrorCode &status);
+ void unref(CollData *collData);
+
+ void flush();
+
+private:
+ static char *getKey(UCollator *collator, char *keyBuffer, int32_t *charBufferLength);
+ static void deleteKey(char *key);
+
+ UMTX lock;
+ UHashtable *cache;
+};
+
+U_CFUNC void deleteChars(void * /*obj*/)
+{
+ // char *chars = (char *) obj;
+ // All the key strings are owned by the
+ // CollData objects and don't need to
+ // be freed here.
+ //DELETE_ARRAY(chars);
+}
+
+U_CFUNC void deleteCollDataCacheEntry(void *obj)
+{
+ CollDataCacheEntry *entry = (CollDataCacheEntry *) obj;
+
+ delete entry;
+}
+
+CollDataCache::CollDataCache(UErrorCode &status)
+ : lock(0), cache(NULL)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ cache = uhash_open(uhash_hashChars, uhash_compareChars, uhash_compareLong, &status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ uhash_setValueDeleter(cache, deleteCollDataCacheEntry);
+ uhash_setKeyDeleter(cache, deleteChars);
+}
+
+CollDataCache::~CollDataCache()
+{
+ umtx_lock(&lock);
+ uhash_close(cache);
+ cache = NULL;
+ umtx_unlock(&lock);
+
+ umtx_destroy(&lock);
+}
+
+CollData *CollDataCache::get(UCollator *collator, UErrorCode &status)
+{
+ char keyBuffer[KEY_BUFFER_SIZE];
+ int32_t keyLength = KEY_BUFFER_SIZE;
+ char *key = getKey(collator, keyBuffer, &keyLength);
+ CollData *result = NULL, *newData = NULL;
+ CollDataCacheEntry *entry = NULL, *newEntry = NULL;
+
+ umtx_lock(&lock);
+ entry = (CollDataCacheEntry *) uhash_get(cache, key);
+
+ if (entry == NULL) {
+ umtx_unlock(&lock);
+
+ newData = new CollData(collator, key, keyLength, status);
+ newEntry = new CollDataCacheEntry(newData);
+
+ if (U_FAILURE(status) || newData == NULL || newEntry == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+
+ umtx_lock(&lock);
+ entry = (CollDataCacheEntry *) uhash_get(cache, key);
+
+ if (entry == NULL) {
+ uhash_put(cache, newData->key, newEntry, &status);
+ umtx_unlock(&lock);
+
+ if (U_FAILURE(status)) {
+ delete newEntry;
+ delete newData;
+
+ return NULL;
+ }
+
+ return newData;
+ }
+ }
+
+ result = entry->data;
+ entry->refCount += 1;
+ umtx_unlock(&lock);
+
+ if (key != keyBuffer) {
+ deleteKey(key);
+ }
+
+ if (newEntry != NULL) {
+ delete newEntry;
+ delete newData;
+ }
+
+ return result;
+}
+
+void CollDataCache::unref(CollData *collData)
+{
+ CollDataCacheEntry *entry = NULL;
+
+ umtx_lock(&lock);
+ entry = (CollDataCacheEntry *) uhash_get(cache, collData->key);
+
+ if (entry != NULL) {
+ entry->refCount -= 1;
+ }
+ umtx_unlock(&lock);
+}
+
+char *CollDataCache::getKey(UCollator *collator, char *keyBuffer, int32_t *keyBufferLength)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
+
+ if (len >= *keyBufferLength) {
+ *keyBufferLength = (len + 2) & ~1; // round to even length, leaving room for terminating null
+ keyBuffer = NEW_ARRAY(char, *keyBufferLength);
+ status = U_ZERO_ERROR;
+
+ len = ucol_getShortDefinitionString(collator, NULL, keyBuffer, *keyBufferLength, &status);
+ }
+
+ keyBuffer[len] = '\0';
+
+ return keyBuffer;
+}
+
+void CollDataCache::flush()
+{
+ const UHashElement *element;
+ int32_t pos = -1;
+
+ umtx_lock(&lock);
+ while ((element = uhash_nextElement(cache, &pos)) != NULL) {
+ CollDataCacheEntry *entry = (CollDataCacheEntry *) element->value.pointer;
+
+ if (entry->refCount <= 0) {
+ uhash_removeElement(cache, element);
+ }
+ }
+ umtx_unlock(&lock);
+}
+
+void CollDataCache::deleteKey(char *key)
+{
+ DELETE_ARRAY(key);
+}
+
+U_CDECL_BEGIN
+static UBool coll_data_cleanup(void) {
+ CollData::freeCollDataCache();
+ return TRUE;
+}
+U_CDECL_END
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollData)
+
+CollData::CollData()
+{
+ // nothing
+}
+
+#define CLONE_COLLATOR
+
+//#define CACHE_CELISTS
+CollData::CollData(UCollator *collator, char *cacheKey, int32_t cacheKeyLength, UErrorCode &status)
+ : coll(NULL), charsToCEList(NULL), ceToCharsStartingWith(NULL), key(NULL)
+{
+ // [:c:] == [[:cn:][:cc:][:co:][:cf:][:cs:]]
+ // i.e. other, control, private use, format, surrogate
+ U_STRING_DECL(test_pattern, "[[:assigned:]-[:c:]]", 20);
+ U_STRING_INIT(test_pattern, "[[:assigned:]-[:c:]]", 20);
+ USet *charsToTest = uset_openPattern(test_pattern, 20, &status);
+
+ // Han ext. A, Han, Jamo, Hangul, Han Ext. B
+ // i.e. all the characers we handle implicitly
+ U_STRING_DECL(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
+ U_STRING_INIT(remove_pattern, "[[\\u3400-\\u9FFF][\\u1100-\\u11F9][\\uAC00-\\uD7AF][\\U00020000-\\U0002A6DF]]", 70);
+ USet *charsToRemove = uset_openPattern(remove_pattern, 70, &status);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ USet *expansions = uset_openEmpty();
+ USet *contractions = uset_openEmpty();
+ int32_t itemCount;
+
+#ifdef CACHE_CELISTS
+ charsToCEList = new StringToCEsMap(status);
+
+ if (U_FAILURE(status)) {
+ goto bail;
+ }
+#else
+ charsToCEList = NULL;
+#endif
+
+ ceToCharsStartingWith = new CEToStringsMap(status);
+
+ if (U_FAILURE(status)) {
+ goto bail;
+ }
+
+ if (cacheKeyLength > KEY_BUFFER_SIZE) {
+ key = NEW_ARRAY(char, cacheKeyLength);
+
+ if (key == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto bail;
+ }
+ } else {
+ key = keyBuffer;
+ }
+
+ ARRAY_COPY(key, cacheKey, cacheKeyLength);
+
+#ifdef CLONE_COLLATOR
+ coll = ucol_safeClone(collator, NULL, NULL, &status);
+
+ if (U_FAILURE(status)) {
+ goto bail;
+ }
+#else
+ coll = collator;
+#endif
+
+ ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);
+
+ uset_addAll(charsToTest, contractions);
+ uset_addAll(charsToTest, expansions);
+ uset_removeAll(charsToTest, charsToRemove);
+
+ itemCount = uset_getItemCount(charsToTest);
+ for(int32_t item = 0; item < itemCount; item += 1) {
+ UChar32 start = 0, end = 0;
+ UChar buffer[16];
+ int32_t len = uset_getItem(charsToTest, item, &start, &end,
+ buffer, 16, &status);
+
+ if (len == 0) {
+ for (UChar32 ch = start; ch <= end; ch += 1) {
+ UnicodeString *st = new UnicodeString(ch);
+
+ if (st == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+
+ CEList *ceList = new CEList(coll, *st, status);
+
+ ceToCharsStartingWith->put(ceList->get(0), st, status);
+
+#ifdef CACHE_CELISTS
+ charsToCEList->put(st, ceList, status);
+#else
+ delete ceList;
+ delete st;
+#endif
+ }
+ } else if (len > 0) {
+ UnicodeString *st = new UnicodeString(buffer, len);
+
+ if (st == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+
+ CEList *ceList = new CEList(coll, *st, status);
+
+ ceToCharsStartingWith->put(ceList->get(0), st, status);
+
+#ifdef CACHE_CELISTS
+ charsToCEList->put(st, ceList, status);
+#else
+ delete ceList;
+ delete st;
+#endif
+ } else {
+ // shouldn't happen...
+ }
+
+ if (U_FAILURE(status)) {
+ break;
+ }
+ }
+
+bail:
+ uset_close(contractions);
+ uset_close(expansions);
+ uset_close(charsToRemove);
+ uset_close(charsToTest);
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A,
+ UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B};
+ UChar jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_JAMO, UCOL_LAST_T_JAMO};
+ UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(hanRanges));
+ UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges));
+ CEList hanList(coll, hanString, status);
+ CEList jamoList(coll, jamoString, status);
+ int32_t j = 0;
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ for (int32_t c = 0; c < jamoList.size(); c += 1) {
+ uint32_t jce = jamoList[c];
+
+ if (! isContinuation(jce)) {
+ jamoLimits[j++] = jce;
+ }
+ }
+
+ jamoLimits[3] += (1 << UCOL_PRIMARYORDERSHIFT);
+
+ minHan = 0xFFFFFFFF;
+ maxHan = 0;
+
+ for(int32_t h = 0; h < hanList.size(); h += 2) {
+ uint32_t han = (uint32_t) hanList[h];
+
+ if (han < minHan) {
+ minHan = han;
+ }
+
+ if (han > maxHan) {
+ maxHan = han;
+ }
+ }
+
+ maxHan += (1 << UCOL_PRIMARYORDERSHIFT);
+}
+
+CollData::~CollData()
+{
+#ifdef CLONE_COLLATOR
+ ucol_close(coll);
+#endif
+
+ if (key != keyBuffer) {
+ DELETE_ARRAY(key);
+ }
+
+ delete ceToCharsStartingWith;
+
+#ifdef CACHE_CELISTS
+ delete charsToCEList;
+#endif
+}
+
+UCollator *CollData::getCollator() const
+{
+ return coll;
+}
+
+const StringList *CollData::getStringList(int32_t ce) const
+{
+ return ceToCharsStartingWith->getStringList(ce);
+}
+
+const CEList *CollData::getCEList(const UnicodeString *string) const
+{
+#ifdef CACHE_CELISTS
+ return charsToCEList->get(string);
+#else
+ UErrorCode status = U_ZERO_ERROR;
+ const CEList *list = new CEList(coll, *string, status);
+
+ if (U_FAILURE(status)) {
+ delete list;
+ list = NULL;
+ }
+
+ return list;
+#endif
+}
+
+void CollData::freeCEList(const CEList *list)
+{
+#ifndef CACHE_CELISTS
+ delete list;
+#endif
+}
+
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset, int32_t *history) const
+{
+ // find out shortest string for the longest sequence of ces.
+ // this can probably be folded with the minLengthCache...
+
+ if (history[offset] >= 0) {
+ return history[offset];
+ }
+
+ uint32_t ce = ceList->get(offset);
+ int32_t maxOffset = ceList->size();
+ int32_t shortestLength = INT32_MAX;
+ const StringList *strings = ceToCharsStartingWith->getStringList(ce);
+
+ if (strings != NULL) {
+ int32_t stringCount = strings->size();
+
+ for (int32_t s = 0; s < stringCount; s += 1) {
+ const UnicodeString *string = strings->get(s);
+#ifdef CACHE_CELISTS
+ const CEList *ceList2 = charsToCEList->get(string);
+#else
+ UErrorCode status = U_ZERO_ERROR;
+ const CEList *ceList2 = new CEList(coll, *string, status);
+
+ if (U_FAILURE(status)) {
+ delete ceList2;
+ ceList2 = NULL;
+ }
+#endif
+
+ if (ceList->matchesAt(offset, ceList2)) {
+ int32_t clength = ceList2->size();
+ int32_t slength = string->length();
+ int32_t roffset = offset + clength;
+ int32_t rlength = 0;
+
+ if (roffset < maxOffset) {
+ rlength = minLengthInChars(ceList, roffset, history);
+
+ if (rlength <= 0) {
+ // delete before continue to avoid memory leak.
+#ifndef CACHE_CELISTS
+ delete ceList2;
+#endif
+ // ignore any dead ends
+ continue;
+ }
+ }
+
+ if (shortestLength > slength + rlength) {
+ shortestLength = slength + rlength;
+ }
+ }
+
+#ifndef CACHE_CELISTS
+ delete ceList2;
+#endif
+ }
+ }
+
+ if (shortestLength == INT32_MAX) {
+ // No matching strings at this offset. See if
+ // the CE is in a range we can handle manually.
+ if (ce >= minHan && ce < maxHan) {
+ // all han have implicit orders which
+ // generate two CEs.
+ int32_t roffset = offset + 2;
+ int32_t rlength = 0;
+
+ //history[roffset++] = -1;
+ //history[roffset++] = 1;
+
+ if (roffset < maxOffset) {
+ rlength = minLengthInChars(ceList, roffset, history);
+ }
+
+ if (rlength < 0) {
+ return -1;
+ }
+
+ shortestLength = 1 + rlength;
+ goto have_shortest;
+ } else if (ce >= jamoLimits[0] && ce < jamoLimits[3]) {
+ int32_t roffset = offset;
+ int32_t rlength = 0;
+
+ // **** this loop may not handle archaic Hangul correctly ****
+ for (int32_t j = 0; roffset < maxOffset && j < 4; j += 1, roffset += 1) {
+ uint32_t jce = ceList->get(roffset);
+
+ // Some Jamo have 24-bit primary order; skip the
+ // 2nd CE. This should always be OK because if
+ // we're still in the loop all we've seen are
+ // a series of Jamo in LVT order.
+ if (isContinuation(jce)) {
+ continue;
+ }
+
+ if (j >= 3 || jce < jamoLimits[j] || jce >= jamoLimits[j + 1]) {
+ break;
+ }
+ }
+
+ if (roffset == offset) {
+ // we started with a non-L Jamo...
+ // just say it comes from a single character
+ roffset += 1;
+
+ // See if the single Jamo has a 24-bit order.
+ if (roffset < maxOffset && isContinuation(ceList->get(roffset))) {
+ roffset += 1;
+ }
+ }
+
+ if (roffset < maxOffset) {
+ rlength = minLengthInChars(ceList, roffset, history);
+ }
+
+ if (rlength < 0) {
+ return -1;
+ }
+
+ shortestLength = 1 + rlength;
+ goto have_shortest;
+ }
+
+ // Can't handle it manually either. Just move on.
+ return -1;
+ }
+
+have_shortest:
+ history[offset] = shortestLength;
+
+ return shortestLength;
+}
+
+int32_t CollData::minLengthInChars(const CEList *ceList, int32_t offset) const
+{
+ int32_t clength = ceList->size();
+ int32_t *history = NEW_ARRAY(int32_t, clength);
+
+ for (int32_t i = 0; i < clength; i += 1) {
+ history[i] = -1;
+ }
+
+ int32_t minLength = minLengthInChars(ceList, offset, history);
+
+ DELETE_ARRAY(history);
+
+ return minLength;
+}
+
+CollData *CollData::open(UCollator *collator, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ CollDataCache *cache = getCollDataCache();
+
+ return cache->get(collator, status);
+}
+
+void CollData::close(CollData *collData)
+{
+ CollDataCache *cache = getCollDataCache();
+
+ cache->unref(collData);
+}
+
+CollDataCache *CollData::collDataCache = NULL;
+
+CollDataCache *CollData::getCollDataCache()
+{
+ UErrorCode status = U_ZERO_ERROR;
+ CollDataCache *cache = NULL;
+
+ UMTX_CHECK(NULL, collDataCache, cache);
+
+ if (cache == NULL) {
+ cache = new CollDataCache(status);
+
+ if (U_FAILURE(status)) {
+ delete cache;
+ return NULL;
+ }
+
+ umtx_lock(NULL);
+ if (collDataCache == NULL) {
+ collDataCache = cache;
+
+ ucln_i18n_registerCleanup(UCLN_I18N_COLL_DATA, coll_data_cleanup);
+ }
+ umtx_unlock(NULL);
+
+ if (collDataCache != cache) {
+ delete cache;
+ }
+ }
+
+ return collDataCache;
+}
+
+void CollData::freeCollDataCache()
+{
+ CollDataCache *cache = NULL;
+
+ UMTX_CHECK(NULL, collDataCache, cache);
+
+ if (cache != NULL) {
+ umtx_lock(NULL);
+ if (collDataCache != NULL) {
+ collDataCache = NULL;
+ } else {
+ cache = NULL;
+ }
+ umtx_unlock(NULL);
+
+ delete cache;
+ }
+}
+
+void CollData::flushCollDataCache()
+{
+ CollDataCache *cache = NULL;
+
+ UMTX_CHECK(NULL, collDataCache, cache);
+
+ // **** this will fail if the another ****
+ // **** thread deletes the cache here ****
+ if (cache != NULL) {
+ cache->flush();
+ }
+}
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_COLLATION
Property changes on: icu46/source/i18n/colldata.cpp
___________________________________________________________________
Added: svn:eol-style
+ LF
« no previous file with comments | « icu46/source/i18n/coll.cpp ('k') | icu46/source/i18n/coptccal.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698