Index: source/i18n/collationdatareader.cpp |
diff --git a/source/i18n/collationdatareader.cpp b/source/i18n/collationdatareader.cpp |
index 519b5422ef6588dfc237323592245912d6955012..f7098f984199dda286b5843270e3d9c0f5554247 100644 |
--- a/source/i18n/collationdatareader.cpp |
+++ b/source/i18n/collationdatareader.cpp |
@@ -1,6 +1,6 @@ |
/* |
******************************************************************************* |
-* Copyright (C) 2013-2014, International Business Machines |
+* Copyright (C) 2013-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
******************************************************************************* |
* collationdatareader.cpp |
@@ -25,6 +25,7 @@ |
#include "collationrootelements.h" |
#include "collationsettings.h" |
#include "collationtailoring.h" |
+#include "collunsafe.h" |
#include "normalizer2impl.h" |
#include "uassert.h" |
#include "ucmndata.h" |
@@ -102,6 +103,8 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
const CollationData *baseData = base == NULL ? NULL : base->data; |
const int32_t *reorderCodes = NULL; |
int32_t reorderCodesLength = 0; |
+ const uint32_t *reorderRanges = NULL; |
+ int32_t reorderRangesLength = 0; |
index = IX_REORDER_CODES_OFFSET; |
offset = getIndex(inIndexes, indexesLength, index); |
length = getIndex(inIndexes, indexesLength, index + 1) - offset; |
@@ -114,6 +117,20 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
} |
reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset); |
reorderCodesLength = length / 4; |
+ |
+ // The reorderRanges (if any) are the trailing reorderCodes entries. |
+ // Split the array at the boundary. |
+ // Script or reorder codes do not exceed 16-bit values. |
+ // Range limits are stored in the upper 16 bits, and are never 0. |
+ while(reorderRangesLength < reorderCodesLength && |
+ (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) { |
+ ++reorderRangesLength; |
+ } |
+ U_ASSERT(reorderRangesLength < reorderCodesLength); |
+ if(reorderRangesLength != 0) { |
+ reorderCodesLength -= reorderRangesLength; |
+ reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + reorderCodesLength); |
+ } |
} |
// There should be a reorder table only if there are reorder codes. |
@@ -246,6 +263,15 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
return; |
} |
if(baseData == NULL) { |
+#if defined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE) |
+ tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode); |
+ if(tailoring.unsafeBackwardSet == NULL) { |
+ errorCode = U_MEMORY_ALLOCATION_ERROR; |
+ return; |
+ } else if (U_FAILURE(errorCode)) { |
+ return; |
+ } |
+#else |
// Create the unsafe-backward set for the root collator. |
// Include all non-zero combining marks and trail surrogates. |
// We do this at load time, rather than at build time, |
@@ -263,6 +289,7 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
return; |
} |
data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet); |
+#endif // !COLLUNSAFE_SERIALIZE || !COLLUNSAFE_COLL_VERSION |
} else { |
// Clone the root collator's set contents. |
tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>( |
@@ -337,13 +364,32 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
errorCode = U_INVALID_FORMAT_ERROR; |
return; |
} |
- data->scripts = reinterpret_cast<const uint16_t *>(inBytes + offset); |
- data->scriptsLength = length / 2; |
+ const uint16_t *scripts = reinterpret_cast<const uint16_t *>(inBytes + offset); |
+ int32_t scriptsLength = length / 2; |
+ data->numScripts = scripts[0]; |
+ // There must be enough entries for both arrays, including more than two range starts. |
+ data->scriptStartsLength = scriptsLength - (1 + data->numScripts + 16); |
+ if(data->scriptStartsLength <= 2 || |
+ CollationData::MAX_NUM_SCRIPT_RANGES < data->scriptStartsLength) { |
+ errorCode = U_INVALID_FORMAT_ERROR; |
+ return; |
+ } |
+ data->scriptsIndex = scripts + 1; |
+ data->scriptStarts = scripts + 1 + data->numScripts + 16; |
+ if(!(data->scriptStarts[0] == 0 && |
+ data->scriptStarts[1] == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8) && |
+ data->scriptStarts[data->scriptStartsLength - 1] == |
+ (Collation::TRAIL_WEIGHT_BYTE << 8))) { |
+ errorCode = U_INVALID_FORMAT_ERROR; |
+ return; |
+ } |
} else if(data == NULL) { |
// Nothing to do. |
} else if(baseData != NULL) { |
- data->scripts = baseData->scripts; |
- data->scriptsLength = baseData->scriptsLength; |
+ data->numScripts = baseData->numScripts; |
+ data->scriptsIndex = baseData->scriptsIndex; |
+ data->scriptStarts = baseData->scriptStarts; |
+ data->scriptStartsLength = baseData->scriptStartsLength; |
} |
index = IX_COMPRESSIBLE_BYTES_OFFSET; |
@@ -393,16 +439,10 @@ CollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes |
return; |
} |
- if(reorderCodesLength == 0 || reorderTable != NULL) { |
- settings->aliasReordering(reorderCodes, reorderCodesLength, reorderTable); |
- } else { |
- uint8_t table[256]; |
- baseData->makeReorderTable(reorderCodes, reorderCodesLength, table, errorCode); |
- if(U_FAILURE(errorCode)) { return; } |
- if(!settings->setReordering(reorderCodes, reorderCodesLength,table)) { |
- errorCode = U_MEMORY_ALLOCATION_ERROR; |
- return; |
- } |
+ if(reorderCodesLength != 0) { |
+ settings->aliasReordering(*baseData, reorderCodes, reorderCodesLength, |
+ reorderRanges, reorderRangesLength, |
+ reorderTable, errorCode); |
} |
settings->fastLatinOptions = CollationFastLatin::getOptions( |
@@ -422,7 +462,7 @@ CollationDataReader::isAcceptable(void *context, |
pInfo->dataFormat[1] == 0x43 && |
pInfo->dataFormat[2] == 0x6f && |
pInfo->dataFormat[3] == 0x6c && |
- pInfo->formatVersion[0] == 4 |
+ pInfo->formatVersion[0] == 5 |
) { |
UVersionInfo *version = static_cast<UVersionInfo *>(context); |
if(version != NULL) { |