| Index: source/i18n/collationdatareader.cpp
|
| diff --git a/source/i18n/collationdatareader.cpp b/source/i18n/collationdatareader.cpp
|
| index 519b5422ef6588dfc237323592245912d6955012..f7098f984199dda286b5843270e3d9c0f5554247 100644
|
| --- a/source/i18n/collationdatareader.cpp
|
| +++ b/source/i18n/collationdatareader.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2013-2014, International Business Machines
|
| +* Copyright (C) 2013-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * collationdatareader.cpp
|
| @@ -25,6 +25,7 @@
|
| #include "collationrootelements.h"
|
| #include "collationsettings.h"
|
| #include "collationtailoring.h"
|
| +#include "collunsafe.h"
|
| #include "normalizer2impl.h"
|
| #include "uassert.h"
|
| #include "ucmndata.h"
|
| @@ -102,6 +103,8 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| const CollationData *baseData = base == NULL ? NULL : base->data;
|
| const int32_t *reorderCodes = NULL;
|
| int32_t reorderCodesLength = 0;
|
| + const uint32_t *reorderRanges = NULL;
|
| + int32_t reorderRangesLength = 0;
|
| index = IX_REORDER_CODES_OFFSET;
|
| offset = getIndex(inIndexes, indexesLength, index);
|
| length = getIndex(inIndexes, indexesLength, index + 1) - offset;
|
| @@ -114,6 +117,20 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| }
|
| reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);
|
| reorderCodesLength = length / 4;
|
| +
|
| + // The reorderRanges (if any) are the trailing reorderCodes entries.
|
| + // Split the array at the boundary.
|
| + // Script or reorder codes do not exceed 16-bit values.
|
| + // Range limits are stored in the upper 16 bits, and are never 0.
|
| + while(reorderRangesLength < reorderCodesLength &&
|
| + (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) {
|
| + ++reorderRangesLength;
|
| + }
|
| + U_ASSERT(reorderRangesLength < reorderCodesLength);
|
| + if(reorderRangesLength != 0) {
|
| + reorderCodesLength -= reorderRangesLength;
|
| + reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + reorderCodesLength);
|
| + }
|
| }
|
|
|
| // There should be a reorder table only if there are reorder codes.
|
| @@ -246,6 +263,15 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| return;
|
| }
|
| if(baseData == NULL) {
|
| +#if defined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE)
|
| + tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
|
| + if(tailoring.unsafeBackwardSet == NULL) {
|
| + errorCode = U_MEMORY_ALLOCATION_ERROR;
|
| + return;
|
| + } else if (U_FAILURE(errorCode)) {
|
| + return;
|
| + }
|
| +#else
|
| // Create the unsafe-backward set for the root collator.
|
| // Include all non-zero combining marks and trail surrogates.
|
| // We do this at load time, rather than at build time,
|
| @@ -263,6 +289,7 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| return;
|
| }
|
| data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);
|
| +#endif // !COLLUNSAFE_SERIALIZE || !COLLUNSAFE_COLL_VERSION
|
| } else {
|
| // Clone the root collator's set contents.
|
| tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(
|
| @@ -337,13 +364,32 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| errorCode = U_INVALID_FORMAT_ERROR;
|
| return;
|
| }
|
| - data->scripts = reinterpret_cast<const uint16_t *>(inBytes + offset);
|
| - data->scriptsLength = length / 2;
|
| + const uint16_t *scripts = reinterpret_cast<const uint16_t *>(inBytes + offset);
|
| + int32_t scriptsLength = length / 2;
|
| + data->numScripts = scripts[0];
|
| + // There must be enough entries for both arrays, including more than two range starts.
|
| + data->scriptStartsLength = scriptsLength - (1 + data->numScripts + 16);
|
| + if(data->scriptStartsLength <= 2 ||
|
| + CollationData::MAX_NUM_SCRIPT_RANGES < data->scriptStartsLength) {
|
| + errorCode = U_INVALID_FORMAT_ERROR;
|
| + return;
|
| + }
|
| + data->scriptsIndex = scripts + 1;
|
| + data->scriptStarts = scripts + 1 + data->numScripts + 16;
|
| + if(!(data->scriptStarts[0] == 0 &&
|
| + data->scriptStarts[1] == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8) &&
|
| + data->scriptStarts[data->scriptStartsLength - 1] ==
|
| + (Collation::TRAIL_WEIGHT_BYTE << 8))) {
|
| + errorCode = U_INVALID_FORMAT_ERROR;
|
| + return;
|
| + }
|
| } else if(data == NULL) {
|
| // Nothing to do.
|
| } else if(baseData != NULL) {
|
| - data->scripts = baseData->scripts;
|
| - data->scriptsLength = baseData->scriptsLength;
|
| + data->numScripts = baseData->numScripts;
|
| + data->scriptsIndex = baseData->scriptsIndex;
|
| + data->scriptStarts = baseData->scriptStarts;
|
| + data->scriptStartsLength = baseData->scriptStartsLength;
|
| }
|
|
|
| index = IX_COMPRESSIBLE_BYTES_OFFSET;
|
| @@ -393,16 +439,10 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes
|
| return;
|
| }
|
|
|
| - if(reorderCodesLength == 0 || reorderTable != NULL) {
|
| - settings->aliasReordering(reorderCodes, reorderCodesLength, reorderTable);
|
| - } else {
|
| - uint8_t table[256];
|
| - baseData->makeReorderTable(reorderCodes, reorderCodesLength, table, errorCode);
|
| - if(U_FAILURE(errorCode)) { return; }
|
| - if(!settings->setReordering(reorderCodes, reorderCodesLength,table)) {
|
| - errorCode = U_MEMORY_ALLOCATION_ERROR;
|
| - return;
|
| - }
|
| + if(reorderCodesLength != 0) {
|
| + settings->aliasReordering(*baseData, reorderCodes, reorderCodesLength,
|
| + reorderRanges, reorderRangesLength,
|
| + reorderTable, errorCode);
|
| }
|
|
|
| settings->fastLatinOptions = CollationFastLatin::getOptions(
|
| @@ -422,7 +462,7 @@ CollationDataReader::isAcceptable(void *context,
|
| pInfo->dataFormat[1] == 0x43 &&
|
| pInfo->dataFormat[2] == 0x6f &&
|
| pInfo->dataFormat[3] == 0x6c &&
|
| - pInfo->formatVersion[0] == 4
|
| + pInfo->formatVersion[0] == 5
|
| ) {
|
| UVersionInfo *version = static_cast<UVersionInfo *>(context);
|
| if(version != NULL) {
|
|
|