Index: source/i18n/collationsettings.cpp |
diff --git a/source/i18n/collationsettings.cpp b/source/i18n/collationsettings.cpp |
index b63d6bf3a604f320d669f53ec7bcb63ba2d1ae49..e3e76a476588b99262715f6b2310184886e27e8f 100644 |
--- a/source/i18n/collationsettings.cpp |
+++ b/source/i18n/collationsettings.cpp |
@@ -1,6 +1,6 @@ |
/* |
******************************************************************************* |
-* Copyright (C) 2013-2014, International Business Machines |
+* Copyright (C) 2013-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
******************************************************************************* |
* collationsettings.cpp |
@@ -16,10 +16,12 @@ |
#include "unicode/ucol.h" |
#include "cmemory.h" |
#include "collation.h" |
+#include "collationdata.h" |
#include "collationsettings.h" |
#include "sharedobject.h" |
#include "uassert.h" |
#include "umutex.h" |
+#include "uvectr32.h" |
U_NAMESPACE_BEGIN |
@@ -27,19 +29,12 @@ CollationSettings::CollationSettings(const CollationSettings &other) |
: SharedObject(other), |
options(other.options), variableTop(other.variableTop), |
reorderTable(NULL), |
+ minHighNoReorder(other.minHighNoReorder), |
+ reorderRanges(NULL), reorderRangesLength(0), |
reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), |
fastLatinOptions(other.fastLatinOptions) { |
- int32_t length = other.reorderCodesLength; |
- if(length == 0) { |
- U_ASSERT(other.reorderTable == NULL); |
- } else { |
- U_ASSERT(other.reorderTable != NULL); |
- if(other.reorderCodesCapacity == 0) { |
- aliasReordering(other.reorderCodes, length, other.reorderTable); |
- } else { |
- setReordering(other.reorderCodes, length, other.reorderTable); |
- } |
- } |
+ UErrorCode errorCode = U_ZERO_ERROR; |
+ copyReorderingFrom(other, errorCode); |
if(fastLatinOptions >= 0) { |
uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); |
} |
@@ -79,14 +74,22 @@ CollationSettings::resetReordering() { |
// rather than a no-op permutation. |
// Keep the memory via reorderCodes and its capacity. |
reorderTable = NULL; |
+ minHighNoReorder = 0; |
+ reorderRangesLength = 0; |
reorderCodesLength = 0; |
} |
void |
-CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table) { |
- if(length == 0) { |
- resetReordering(); |
- } else { |
+CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, |
+ const uint32_t *ranges, int32_t rangesLength, |
+ const uint8_t *table, UErrorCode &errorCode) { |
+ if(U_FAILURE(errorCode)) { return; } |
+ if(table != NULL && |
+ (rangesLength == 0 ? |
+ !reorderTableHasSplitBytes(table) : |
+ rangesLength >= 2 && |
+ // The first offset must be 0. The last offset must not be 0. |
+ (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { |
// We need to release the memory before setting the alias pointer. |
if(reorderCodesCapacity != 0) { |
uprv_free(const_cast<int32_t *>(reorderCodes)); |
@@ -95,36 +98,170 @@ CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u |
reorderTable = table; |
reorderCodes = codes; |
reorderCodesLength = length; |
+ // Drop ranges before the first split byte. They are reordered by the table. |
+ // This then speeds up reordering of the remaining ranges. |
+ int32_t firstSplitByteRangeIndex = 0; |
+ while(firstSplitByteRangeIndex < rangesLength && |
+ (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { |
+ // The second byte of the primary limit is 0. |
+ ++firstSplitByteRangeIndex; |
+ } |
+ if(firstSplitByteRangeIndex == rangesLength) { |
+ U_ASSERT(!reorderTableHasSplitBytes(table)); |
+ minHighNoReorder = 0; |
+ reorderRanges = NULL; |
+ reorderRangesLength = 0; |
+ } else { |
+ U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); |
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
+ reorderRanges = ranges + firstSplitByteRangeIndex; |
+ reorderRangesLength = rangesLength - firstSplitByteRangeIndex; |
+ } |
+ return; |
} |
+ // Regenerate missing data. |
+ setReordering(data, codes, length, errorCode); |
} |
-UBool |
-CollationSettings::setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]) { |
- if(length == 0) { |
+void |
+CollationSettings::setReordering(const CollationData &data, |
+ const int32_t *codes, int32_t codesLength, |
+ UErrorCode &errorCode) { |
+ if(U_FAILURE(errorCode)) { return; } |
+ if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { |
resetReordering(); |
- } else { |
- uint8_t *ownedTable; |
- int32_t *ownedCodes; |
- if(length <= reorderCodesCapacity) { |
- ownedTable = const_cast<uint8_t *>(reorderTable); |
- ownedCodes = const_cast<int32_t *>(reorderCodes); |
- } else { |
- // Allocate one memory block for the codes and the 16-aligned table. |
- int32_t capacity = (length + 3) & ~3; // round up to a multiple of 4 ints |
- uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4); |
- if(bytes == NULL) { return FALSE; } |
- if(reorderCodesCapacity != 0) { |
- uprv_free(const_cast<int32_t *>(reorderCodes)); |
+ return; |
+ } |
+ UVector32 rangesList(errorCode); |
+ data.makeReorderRanges(codes, codesLength, rangesList, errorCode); |
+ if(U_FAILURE(errorCode)) { return; } |
+ int32_t rangesLength = rangesList.size(); |
+ if(rangesLength == 0) { |
+ resetReordering(); |
+ return; |
+ } |
+ const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); |
+ // ranges[] contains at least two (limit, offset) pairs. |
+ // The first offset must be 0. The last offset must not be 0. |
+ // Separators (at the low end) and trailing weights (at the high end) |
+ // are never reordered. |
+ U_ASSERT(rangesLength >= 2); |
+ U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); |
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
+ |
+ // Write the lead byte permutation table. |
+ // Set a 0 for each lead byte that has a range boundary in the middle. |
+ uint8_t table[256]; |
+ int32_t b = 0; |
+ int32_t firstSplitByteRangeIndex = -1; |
+ for(int32_t i = 0; i < rangesLength; ++i) { |
+ uint32_t pair = ranges[i]; |
+ int32_t limit1 = (int32_t)(pair >> 24); |
+ while(b < limit1) { |
+ table[b] = (uint8_t)(b + pair); |
+ ++b; |
+ } |
+ // Check the second byte of the limit. |
+ if((pair & 0xff0000) != 0) { |
+ table[limit1] = 0; |
+ b = limit1 + 1; |
+ if(firstSplitByteRangeIndex < 0) { |
+ firstSplitByteRangeIndex = i; |
} |
- reorderTable = ownedTable = bytes + capacity * 4; |
- reorderCodes = ownedCodes = (int32_t *)bytes; |
- reorderCodesCapacity = capacity; |
} |
- uprv_memcpy(ownedTable, table, 256); |
- uprv_memcpy(ownedCodes, codes, length * 4); |
- reorderCodesLength = length; |
} |
- return TRUE; |
+ while(b <= 0xff) { |
+ table[b] = (uint8_t)b; |
+ ++b; |
+ } |
+ if(firstSplitByteRangeIndex < 0) { |
+ // The lead byte permutation table alone suffices for reordering. |
+ rangesLength = 0; |
+ } else { |
+ // Remove the ranges below the first split byte. |
+ ranges += firstSplitByteRangeIndex; |
+ rangesLength -= firstSplitByteRangeIndex; |
+ } |
+ setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); |
+} |
+ |
+void |
+CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, |
+ const uint32_t *ranges, int32_t rangesLength, |
+ const uint8_t *table, UErrorCode &errorCode) { |
+ if(U_FAILURE(errorCode)) { return; } |
+ int32_t *ownedCodes; |
+ int32_t totalLength = codesLength + rangesLength; |
+ U_ASSERT(totalLength > 0); |
+ if(totalLength <= reorderCodesCapacity) { |
+ ownedCodes = const_cast<int32_t *>(reorderCodes); |
+ } else { |
+ // Allocate one memory block for the codes, the ranges, and the 16-aligned table. |
+ int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints |
+ ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); |
+ if(ownedCodes == NULL) { |
+ resetReordering(); |
+ errorCode = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } |
+ if(reorderCodesCapacity != 0) { |
+ uprv_free(const_cast<int32_t *>(reorderCodes)); |
+ } |
+ reorderCodes = ownedCodes; |
+ reorderCodesCapacity = capacity; |
+ } |
+ uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); |
+ uprv_memcpy(ownedCodes, codes, codesLength * 4); |
+ uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); |
+ reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); |
+ reorderCodesLength = codesLength; |
+ reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; |
+ reorderRangesLength = rangesLength; |
+} |
+ |
+void |
+CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { |
+ if(U_FAILURE(errorCode)) { return; } |
+ if(!other.hasReordering()) { |
+ resetReordering(); |
+ return; |
+ } |
+ minHighNoReorder = other.minHighNoReorder; |
+ if(other.reorderCodesCapacity == 0) { |
+ // The reorder arrays are aliased to memory-mapped data. |
+ reorderTable = other.reorderTable; |
+ reorderRanges = other.reorderRanges; |
+ reorderRangesLength = other.reorderRangesLength; |
+ reorderCodes = other.reorderCodes; |
+ reorderCodesLength = other.reorderCodesLength; |
+ } else { |
+ setReorderArrays(other.reorderCodes, other.reorderCodesLength, |
+ other.reorderRanges, other.reorderRangesLength, |
+ other.reorderTable, errorCode); |
+ } |
+} |
+ |
+UBool |
+CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { |
+ U_ASSERT(table[0] == 0); |
+ for(int32_t i = 1; i < 256; ++i) { |
+ if(table[i] == 0) { |
+ return TRUE; |
+ } |
+ } |
+ return FALSE; |
+} |
+ |
+uint32_t |
+CollationSettings::reorderEx(uint32_t p) const { |
+ if(p >= minHighNoReorder) { return p; } |
+ // Round up p so that its lower 16 bits are >= any offset bits. |
+ // Then compare q directly with (limit, offset) pairs. |
+ uint32_t q = p | 0xffff; |
+ uint32_t r; |
+ const uint32_t *ranges = reorderRanges; |
+ while(q >= (r = *ranges)) { ++ranges; } |
+ return p + (r << 24); |
} |
void |