| Index: source/i18n/collationsettings.cpp
|
| diff --git a/source/i18n/collationsettings.cpp b/source/i18n/collationsettings.cpp
|
| index b63d6bf3a604f320d669f53ec7bcb63ba2d1ae49..e3e76a476588b99262715f6b2310184886e27e8f 100644
|
| --- a/source/i18n/collationsettings.cpp
|
| +++ b/source/i18n/collationsettings.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2013-2014, International Business Machines
|
| +* Copyright (C) 2013-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * collationsettings.cpp
|
| @@ -16,10 +16,12 @@
|
| #include "unicode/ucol.h"
|
| #include "cmemory.h"
|
| #include "collation.h"
|
| +#include "collationdata.h"
|
| #include "collationsettings.h"
|
| #include "sharedobject.h"
|
| #include "uassert.h"
|
| #include "umutex.h"
|
| +#include "uvectr32.h"
|
|
|
| U_NAMESPACE_BEGIN
|
|
|
| @@ -27,19 +29,12 @@ CollationSettings::CollationSettings(const CollationSettings &other)
|
| : SharedObject(other),
|
| options(other.options), variableTop(other.variableTop),
|
| reorderTable(NULL),
|
| + minHighNoReorder(other.minHighNoReorder),
|
| + reorderRanges(NULL), reorderRangesLength(0),
|
| reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
|
| fastLatinOptions(other.fastLatinOptions) {
|
| - int32_t length = other.reorderCodesLength;
|
| - if(length == 0) {
|
| - U_ASSERT(other.reorderTable == NULL);
|
| - } else {
|
| - U_ASSERT(other.reorderTable != NULL);
|
| - if(other.reorderCodesCapacity == 0) {
|
| - aliasReordering(other.reorderCodes, length, other.reorderTable);
|
| - } else {
|
| - setReordering(other.reorderCodes, length, other.reorderTable);
|
| - }
|
| - }
|
| + UErrorCode errorCode = U_ZERO_ERROR;
|
| + copyReorderingFrom(other, errorCode);
|
| if(fastLatinOptions >= 0) {
|
| uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
|
| }
|
| @@ -79,14 +74,22 @@ CollationSettings::resetReordering() {
|
| // rather than a no-op permutation.
|
| // Keep the memory via reorderCodes and its capacity.
|
| reorderTable = NULL;
|
| + minHighNoReorder = 0;
|
| + reorderRangesLength = 0;
|
| reorderCodesLength = 0;
|
| }
|
|
|
| void
|
| -CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table) {
|
| - if(length == 0) {
|
| - resetReordering();
|
| - } else {
|
| +CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
|
| + const uint32_t *ranges, int32_t rangesLength,
|
| + const uint8_t *table, UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return; }
|
| + if(table != NULL &&
|
| + (rangesLength == 0 ?
|
| + !reorderTableHasSplitBytes(table) :
|
| + rangesLength >= 2 &&
|
| + // The first offset must be 0. The last offset must not be 0.
|
| + (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
|
| // We need to release the memory before setting the alias pointer.
|
| if(reorderCodesCapacity != 0) {
|
| uprv_free(const_cast<int32_t *>(reorderCodes));
|
| @@ -95,36 +98,170 @@ CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u
|
| reorderTable = table;
|
| reorderCodes = codes;
|
| reorderCodesLength = length;
|
| + // Drop ranges before the first split byte. They are reordered by the table.
|
| + // This then speeds up reordering of the remaining ranges.
|
| + int32_t firstSplitByteRangeIndex = 0;
|
| + while(firstSplitByteRangeIndex < rangesLength &&
|
| + (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
|
| + // The second byte of the primary limit is 0.
|
| + ++firstSplitByteRangeIndex;
|
| + }
|
| + if(firstSplitByteRangeIndex == rangesLength) {
|
| + U_ASSERT(!reorderTableHasSplitBytes(table));
|
| + minHighNoReorder = 0;
|
| + reorderRanges = NULL;
|
| + reorderRangesLength = 0;
|
| + } else {
|
| + U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
|
| + minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
|
| + reorderRanges = ranges + firstSplitByteRangeIndex;
|
| + reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
|
| + }
|
| + return;
|
| }
|
| + // Regenerate missing data.
|
| + setReordering(data, codes, length, errorCode);
|
| }
|
|
|
| -UBool
|
| -CollationSettings::setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]) {
|
| - if(length == 0) {
|
| +void
|
| +CollationSettings::setReordering(const CollationData &data,
|
| + const int32_t *codes, int32_t codesLength,
|
| + UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return; }
|
| + if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
|
| resetReordering();
|
| - } else {
|
| - uint8_t *ownedTable;
|
| - int32_t *ownedCodes;
|
| - if(length <= reorderCodesCapacity) {
|
| - ownedTable = const_cast<uint8_t *>(reorderTable);
|
| - ownedCodes = const_cast<int32_t *>(reorderCodes);
|
| - } else {
|
| - // Allocate one memory block for the codes and the 16-aligned table.
|
| - int32_t capacity = (length + 3) & ~3; // round up to a multiple of 4 ints
|
| - uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4);
|
| - if(bytes == NULL) { return FALSE; }
|
| - if(reorderCodesCapacity != 0) {
|
| - uprv_free(const_cast<int32_t *>(reorderCodes));
|
| + return;
|
| + }
|
| + UVector32 rangesList(errorCode);
|
| + data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
|
| + if(U_FAILURE(errorCode)) { return; }
|
| + int32_t rangesLength = rangesList.size();
|
| + if(rangesLength == 0) {
|
| + resetReordering();
|
| + return;
|
| + }
|
| + const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
|
| + // ranges[] contains at least two (limit, offset) pairs.
|
| + // The first offset must be 0. The last offset must not be 0.
|
| + // Separators (at the low end) and trailing weights (at the high end)
|
| + // are never reordered.
|
| + U_ASSERT(rangesLength >= 2);
|
| + U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
|
| + minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
|
| +
|
| + // Write the lead byte permutation table.
|
| + // Set a 0 for each lead byte that has a range boundary in the middle.
|
| + uint8_t table[256];
|
| + int32_t b = 0;
|
| + int32_t firstSplitByteRangeIndex = -1;
|
| + for(int32_t i = 0; i < rangesLength; ++i) {
|
| + uint32_t pair = ranges[i];
|
| + int32_t limit1 = (int32_t)(pair >> 24);
|
| + while(b < limit1) {
|
| + table[b] = (uint8_t)(b + pair);
|
| + ++b;
|
| + }
|
| + // Check the second byte of the limit.
|
| + if((pair & 0xff0000) != 0) {
|
| + table[limit1] = 0;
|
| + b = limit1 + 1;
|
| + if(firstSplitByteRangeIndex < 0) {
|
| + firstSplitByteRangeIndex = i;
|
| }
|
| - reorderTable = ownedTable = bytes + capacity * 4;
|
| - reorderCodes = ownedCodes = (int32_t *)bytes;
|
| - reorderCodesCapacity = capacity;
|
| }
|
| - uprv_memcpy(ownedTable, table, 256);
|
| - uprv_memcpy(ownedCodes, codes, length * 4);
|
| - reorderCodesLength = length;
|
| }
|
| - return TRUE;
|
| + while(b <= 0xff) {
|
| + table[b] = (uint8_t)b;
|
| + ++b;
|
| + }
|
| + if(firstSplitByteRangeIndex < 0) {
|
| + // The lead byte permutation table alone suffices for reordering.
|
| + rangesLength = 0;
|
| + } else {
|
| + // Remove the ranges below the first split byte.
|
| + ranges += firstSplitByteRangeIndex;
|
| + rangesLength -= firstSplitByteRangeIndex;
|
| + }
|
| + setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
|
| +}
|
| +
|
| +void
|
| +CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
|
| + const uint32_t *ranges, int32_t rangesLength,
|
| + const uint8_t *table, UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return; }
|
| + int32_t *ownedCodes;
|
| + int32_t totalLength = codesLength + rangesLength;
|
| + U_ASSERT(totalLength > 0);
|
| + if(totalLength <= reorderCodesCapacity) {
|
| + ownedCodes = const_cast<int32_t *>(reorderCodes);
|
| + } else {
|
| + // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
|
| + int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
|
| + ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
|
| + if(ownedCodes == NULL) {
|
| + resetReordering();
|
| + errorCode = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + }
|
| + if(reorderCodesCapacity != 0) {
|
| + uprv_free(const_cast<int32_t *>(reorderCodes));
|
| + }
|
| + reorderCodes = ownedCodes;
|
| + reorderCodesCapacity = capacity;
|
| + }
|
| + uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
|
| + uprv_memcpy(ownedCodes, codes, codesLength * 4);
|
| + uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
|
| + reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
|
| + reorderCodesLength = codesLength;
|
| + reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
|
| + reorderRangesLength = rangesLength;
|
| +}
|
| +
|
| +void
|
| +CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return; }
|
| + if(!other.hasReordering()) {
|
| + resetReordering();
|
| + return;
|
| + }
|
| + minHighNoReorder = other.minHighNoReorder;
|
| + if(other.reorderCodesCapacity == 0) {
|
| + // The reorder arrays are aliased to memory-mapped data.
|
| + reorderTable = other.reorderTable;
|
| + reorderRanges = other.reorderRanges;
|
| + reorderRangesLength = other.reorderRangesLength;
|
| + reorderCodes = other.reorderCodes;
|
| + reorderCodesLength = other.reorderCodesLength;
|
| + } else {
|
| + setReorderArrays(other.reorderCodes, other.reorderCodesLength,
|
| + other.reorderRanges, other.reorderRangesLength,
|
| + other.reorderTable, errorCode);
|
| + }
|
| +}
|
| +
|
| +UBool
|
| +CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
|
| + U_ASSERT(table[0] == 0);
|
| + for(int32_t i = 1; i < 256; ++i) {
|
| + if(table[i] == 0) {
|
| + return TRUE;
|
| + }
|
| + }
|
| + return FALSE;
|
| +}
|
| +
|
| +uint32_t
|
| +CollationSettings::reorderEx(uint32_t p) const {
|
| + if(p >= minHighNoReorder) { return p; }
|
| + // Round up p so that its lower 16 bits are >= any offset bits.
|
| + // Then compare q directly with (limit, offset) pairs.
|
| + uint32_t q = p | 0xffff;
|
| + uint32_t r;
|
| + const uint32_t *ranges = reorderRanges;
|
| + while(q >= (r = *ranges)) { ++ranges; }
|
| + return p + (r << 24);
|
| }
|
|
|
| void
|
|
|