Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Unified Diff: source/i18n/collationsettings.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/i18n/collationsettings.h ('k') | source/i18n/collationtailoring.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/i18n/collationsettings.cpp
diff --git a/source/i18n/collationsettings.cpp b/source/i18n/collationsettings.cpp
index b63d6bf3a604f320d669f53ec7bcb63ba2d1ae49..e3e76a476588b99262715f6b2310184886e27e8f 100644
--- a/source/i18n/collationsettings.cpp
+++ b/source/i18n/collationsettings.cpp
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2013-2014, International Business Machines
+* Copyright (C) 2013-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* collationsettings.cpp
@@ -16,10 +16,12 @@
#include "unicode/ucol.h"
#include "cmemory.h"
#include "collation.h"
+#include "collationdata.h"
#include "collationsettings.h"
#include "sharedobject.h"
#include "uassert.h"
#include "umutex.h"
+#include "uvectr32.h"
U_NAMESPACE_BEGIN
@@ -27,19 +29,12 @@ CollationSettings::CollationSettings(const CollationSettings &other)
: SharedObject(other),
options(other.options), variableTop(other.variableTop),
reorderTable(NULL),
+ minHighNoReorder(other.minHighNoReorder),
+ reorderRanges(NULL), reorderRangesLength(0),
reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
fastLatinOptions(other.fastLatinOptions) {
- int32_t length = other.reorderCodesLength;
- if(length == 0) {
- U_ASSERT(other.reorderTable == NULL);
- } else {
- U_ASSERT(other.reorderTable != NULL);
- if(other.reorderCodesCapacity == 0) {
- aliasReordering(other.reorderCodes, length, other.reorderTable);
- } else {
- setReordering(other.reorderCodes, length, other.reorderTable);
- }
- }
+ UErrorCode errorCode = U_ZERO_ERROR;
+ copyReorderingFrom(other, errorCode);
if(fastLatinOptions >= 0) {
uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
}
@@ -79,14 +74,22 @@ CollationSettings::resetReordering() {
// rather than a no-op permutation.
// Keep the memory via reorderCodes and its capacity.
reorderTable = NULL;
+ minHighNoReorder = 0;
+ reorderRangesLength = 0;
reorderCodesLength = 0;
}
void
-CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table) {
- if(length == 0) {
- resetReordering();
- } else {
+CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
+ const uint32_t *ranges, int32_t rangesLength,
+ const uint8_t *table, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(table != NULL &&
+ (rangesLength == 0 ?
+ !reorderTableHasSplitBytes(table) :
+ rangesLength >= 2 &&
+ // The first offset must be 0. The last offset must not be 0.
+ (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
// We need to release the memory before setting the alias pointer.
if(reorderCodesCapacity != 0) {
uprv_free(const_cast<int32_t *>(reorderCodes));
@@ -95,36 +98,170 @@ CollationSettings::aliasReordering(const int32_t *codes, int32_t length, const u
reorderTable = table;
reorderCodes = codes;
reorderCodesLength = length;
+ // Drop ranges before the first split byte. They are reordered by the table.
+ // This then speeds up reordering of the remaining ranges.
+ int32_t firstSplitByteRangeIndex = 0;
+ while(firstSplitByteRangeIndex < rangesLength &&
+ (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
+ // The second byte of the primary limit is 0.
+ ++firstSplitByteRangeIndex;
+ }
+ if(firstSplitByteRangeIndex == rangesLength) {
+ U_ASSERT(!reorderTableHasSplitBytes(table));
+ minHighNoReorder = 0;
+ reorderRanges = NULL;
+ reorderRangesLength = 0;
+ } else {
+ U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+ reorderRanges = ranges + firstSplitByteRangeIndex;
+ reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
+ }
+ return;
}
+ // Regenerate missing data.
+ setReordering(data, codes, length, errorCode);
}
-UBool
-CollationSettings::setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]) {
- if(length == 0) {
+void
+CollationSettings::setReordering(const CollationData &data,
+ const int32_t *codes, int32_t codesLength,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
resetReordering();
- } else {
- uint8_t *ownedTable;
- int32_t *ownedCodes;
- if(length <= reorderCodesCapacity) {
- ownedTable = const_cast<uint8_t *>(reorderTable);
- ownedCodes = const_cast<int32_t *>(reorderCodes);
- } else {
- // Allocate one memory block for the codes and the 16-aligned table.
- int32_t capacity = (length + 3) & ~3; // round up to a multiple of 4 ints
- uint8_t *bytes = (uint8_t *)uprv_malloc(256 + capacity * 4);
- if(bytes == NULL) { return FALSE; }
- if(reorderCodesCapacity != 0) {
- uprv_free(const_cast<int32_t *>(reorderCodes));
+ return;
+ }
+ UVector32 rangesList(errorCode);
+ data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t rangesLength = rangesList.size();
+ if(rangesLength == 0) {
+ resetReordering();
+ return;
+ }
+ const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
+ // ranges[] contains at least two (limit, offset) pairs.
+ // The first offset must be 0. The last offset must not be 0.
+ // Separators (at the low end) and trailing weights (at the high end)
+ // are never reordered.
+ U_ASSERT(rangesLength >= 2);
+ U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
+ minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
+
+ // Write the lead byte permutation table.
+ // Set a 0 for each lead byte that has a range boundary in the middle.
+ uint8_t table[256];
+ int32_t b = 0;
+ int32_t firstSplitByteRangeIndex = -1;
+ for(int32_t i = 0; i < rangesLength; ++i) {
+ uint32_t pair = ranges[i];
+ int32_t limit1 = (int32_t)(pair >> 24);
+ while(b < limit1) {
+ table[b] = (uint8_t)(b + pair);
+ ++b;
+ }
+ // Check the second byte of the limit.
+ if((pair & 0xff0000) != 0) {
+ table[limit1] = 0;
+ b = limit1 + 1;
+ if(firstSplitByteRangeIndex < 0) {
+ firstSplitByteRangeIndex = i;
}
- reorderTable = ownedTable = bytes + capacity * 4;
- reorderCodes = ownedCodes = (int32_t *)bytes;
- reorderCodesCapacity = capacity;
}
- uprv_memcpy(ownedTable, table, 256);
- uprv_memcpy(ownedCodes, codes, length * 4);
- reorderCodesLength = length;
}
- return TRUE;
+ while(b <= 0xff) {
+ table[b] = (uint8_t)b;
+ ++b;
+ }
+ if(firstSplitByteRangeIndex < 0) {
+ // The lead byte permutation table alone suffices for reordering.
+ rangesLength = 0;
+ } else {
+ // Remove the ranges below the first split byte.
+ ranges += firstSplitByteRangeIndex;
+ rangesLength -= firstSplitByteRangeIndex;
+ }
+ setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
+}
+
+void
+CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
+ const uint32_t *ranges, int32_t rangesLength,
+ const uint8_t *table, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ int32_t *ownedCodes;
+ int32_t totalLength = codesLength + rangesLength;
+ U_ASSERT(totalLength > 0);
+ if(totalLength <= reorderCodesCapacity) {
+ ownedCodes = const_cast<int32_t *>(reorderCodes);
+ } else {
+ // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
+ int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
+ ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
+ if(ownedCodes == NULL) {
+ resetReordering();
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if(reorderCodesCapacity != 0) {
+ uprv_free(const_cast<int32_t *>(reorderCodes));
+ }
+ reorderCodes = ownedCodes;
+ reorderCodesCapacity = capacity;
+ }
+ uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
+ uprv_memcpy(ownedCodes, codes, codesLength * 4);
+ uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
+ reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
+ reorderCodesLength = codesLength;
+ reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
+ reorderRangesLength = rangesLength;
+}
+
+void
+CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return; }
+ if(!other.hasReordering()) {
+ resetReordering();
+ return;
+ }
+ minHighNoReorder = other.minHighNoReorder;
+ if(other.reorderCodesCapacity == 0) {
+ // The reorder arrays are aliased to memory-mapped data.
+ reorderTable = other.reorderTable;
+ reorderRanges = other.reorderRanges;
+ reorderRangesLength = other.reorderRangesLength;
+ reorderCodes = other.reorderCodes;
+ reorderCodesLength = other.reorderCodesLength;
+ } else {
+ setReorderArrays(other.reorderCodes, other.reorderCodesLength,
+ other.reorderRanges, other.reorderRangesLength,
+ other.reorderTable, errorCode);
+ }
+}
+
+UBool
+CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
+ U_ASSERT(table[0] == 0);
+ for(int32_t i = 1; i < 256; ++i) {
+ if(table[i] == 0) {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+uint32_t
+CollationSettings::reorderEx(uint32_t p) const {
+ if(p >= minHighNoReorder) { return p; }
+ // Round up p so that its lower 16 bits are >= any offset bits.
+ // Then compare q directly with (limit, offset) pairs.
+ uint32_t q = p | 0xffff;
+ uint32_t r;
+ const uint32_t *ranges = reorderRanges;
+ while(q >= (r = *ranges)) { ++ranges; }
+ return p + (r << 24);
}
void
« no previous file with comments | « source/i18n/collationsettings.h ('k') | source/i18n/collationtailoring.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698