| Index: source/common/ucol_swp.cpp
|
| diff --git a/source/common/ucol_swp.cpp b/source/common/ucol_swp.cpp
|
| index b33268c8fef578c319634fb9c18d4869d90d36a2..0625dd35a8aa1a2d8aa0d442e302a57d774a961c 100644
|
| --- a/source/common/ucol_swp.cpp
|
| +++ b/source/common/ucol_swp.cpp
|
| @@ -1,7 +1,7 @@
|
| /*
|
| *******************************************************************************
|
| *
|
| -* Copyright (C) 2003-2012, International Business Machines
|
| +* Copyright (C) 2003-2014, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *
|
| *******************************************************************************
|
| @@ -18,6 +18,7 @@
|
|
|
| #include "unicode/udata.h" /* UDataInfo */
|
| #include "utrie.h"
|
| +#include "utrie2.h"
|
| #include "udataswp.h"
|
| #include "cmemory.h"
|
| #include "ucol_data.h"
|
| @@ -102,18 +103,28 @@ utrie_swap(const UDataSwapper *ds,
|
|
|
| #if !UCONFIG_NO_COLLATION
|
|
|
| -/* Modified copy of the beginning of ucol_swapBinary(). */
|
| U_CAPI UBool U_EXPORT2
|
| ucol_looksLikeCollationBinary(const UDataSwapper *ds,
|
| const void *inData, int32_t length) {
|
| - const UCATableHeader *inHeader;
|
| - UCATableHeader header;
|
| -
|
| if(ds==NULL || inData==NULL || length<-1) {
|
| return FALSE;
|
| }
|
|
|
| - inHeader=(const UCATableHeader *)inData;
|
| + // First check for format version 4+ which has a standard data header.
|
| + UErrorCode errorCode=U_ZERO_ERROR;
|
| + (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);
|
| + if(U_SUCCESS(errorCode)) {
|
| + const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
|
| + if(info.dataFormat[0]==0x55 && // dataFormat="UCol"
|
| + info.dataFormat[1]==0x43 &&
|
| + info.dataFormat[2]==0x6f &&
|
| + info.dataFormat[3]==0x6c) {
|
| + return TRUE;
|
| + }
|
| + }
|
| +
|
| + // Else check for format version 3.
|
| + const UCATableHeader *inHeader=(const UCATableHeader *)inData;
|
|
|
| /*
|
| * The collation binary must contain at least the UCATableHeader,
|
| @@ -121,6 +132,7 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
|
| * sizeof(UCATableHeader)==42*4 in ICU 2.8
|
| * check the length against the header size before reading the size field
|
| */
|
| + UCATableHeader header;
|
| uprv_memset(&header, 0, sizeof(header));
|
| if(length<0) {
|
| header.size=udata_readInt32(ds, inHeader->size);
|
| @@ -144,11 +156,13 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,
|
| return TRUE;
|
| }
|
|
|
| -/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */
|
| -U_CAPI int32_t U_EXPORT2
|
| -ucol_swapBinary(const UDataSwapper *ds,
|
| - const void *inData, int32_t length, void *outData,
|
| - UErrorCode *pErrorCode) {
|
| +namespace {
|
| +
|
| +/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */
|
| +int32_t
|
| +swapFormatVersion3(const UDataSwapper *ds,
|
| + const void *inData, int32_t length, void *outData,
|
| + UErrorCode *pErrorCode) {
|
| const uint8_t *inBytes;
|
| uint8_t *outBytes;
|
|
|
| @@ -159,7 +173,7 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| uint32_t count;
|
|
|
| /* argument checking in case we were not called from ucol_swap() */
|
| - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
| + if(U_FAILURE(*pErrorCode)) {
|
| return 0;
|
| }
|
| if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
|
| @@ -183,7 +197,7 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| if(length<0) {
|
| header.size=udata_readInt32(ds, inHeader->size);
|
| } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {
|
| - udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",
|
| + udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",
|
| length);
|
| *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| return 0;
|
| @@ -195,7 +209,7 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| inHeader->formatVersion[0]==3 /*&&
|
| inHeader->formatVersion[1]>=0*/
|
| )) {
|
| - udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
|
| + udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",
|
| header.magic,
|
| inHeader->formatVersion[0], inHeader->formatVersion[1]);
|
| *pErrorCode=U_UNSUPPORTED_ERROR;
|
| @@ -203,7 +217,7 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| }
|
|
|
| if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {
|
| - udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",
|
| + udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",
|
| inHeader->isBigEndian, inHeader->charSetFamily);
|
| *pErrorCode=U_INVALID_FORMAT_ERROR;
|
| return 0;
|
| @@ -293,7 +307,6 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| * if UCAConsts!=0 then contractionUCACombos because we are swapping
|
| * the UCA data file, and we know that the UCA contains contractions
|
| */
|
| - count=header.contractionUCACombos-header.UCAConsts;
|
| ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,
|
| outBytes+header.UCAConsts, pErrorCode);
|
| }
|
| @@ -327,44 +340,252 @@ ucol_swapBinary(const UDataSwapper *ds,
|
| return header.size;
|
| }
|
|
|
| +// swap formatVersion 4 ---------------------------------------------------- ***
|
| +
|
| +// The following are copied from CollationDataReader, trading an awkward copy of constants
|
| +// for an awkward relocation of the i18n collationdatareader.h file into the common library.
|
| +// Keep them in sync!
|
| +
|
| +enum {
|
| + IX_INDEXES_LENGTH, // 0
|
| + IX_OPTIONS,
|
| + IX_RESERVED2,
|
| + IX_RESERVED3,
|
| +
|
| + IX_JAMO_CE32S_START, // 4
|
| + IX_REORDER_CODES_OFFSET,
|
| + IX_REORDER_TABLE_OFFSET,
|
| + IX_TRIE_OFFSET,
|
| +
|
| + IX_RESERVED8_OFFSET, // 8
|
| + IX_CES_OFFSET,
|
| + IX_RESERVED10_OFFSET,
|
| + IX_CE32S_OFFSET,
|
| +
|
| + IX_ROOT_ELEMENTS_OFFSET, // 12
|
| + IX_CONTEXTS_OFFSET,
|
| + IX_UNSAFE_BWD_OFFSET,
|
| + IX_FAST_LATIN_TABLE_OFFSET,
|
| +
|
| + IX_SCRIPTS_OFFSET, // 16
|
| + IX_COMPRESSIBLE_BYTES_OFFSET,
|
| + IX_RESERVED18_OFFSET,
|
| + IX_TOTAL_SIZE
|
| +};
|
| +
|
| +int32_t
|
| +swapFormatVersion4(const UDataSwapper *ds,
|
| + const void *inData, int32_t length, void *outData,
|
| + UErrorCode &errorCode) {
|
| + if(U_FAILURE(errorCode)) { return 0; }
|
| +
|
| + const uint8_t *inBytes=(const uint8_t *)inData;
|
| + uint8_t *outBytes=(uint8_t *)outData;
|
| +
|
| + const int32_t *inIndexes=(const int32_t *)inBytes;
|
| + int32_t indexes[IX_TOTAL_SIZE+1];
|
| +
|
| + // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.
|
| + if(0<=length && length<8) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
|
| + "(%d after header) for collation data\n",
|
| + length);
|
| + errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);
|
| + if(0<=length && length<(indexesLength*4)) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
|
| + "(%d after header) for collation data\n",
|
| + length);
|
| + errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {
|
| + indexes[i]=udata_readInt32(ds, inIndexes[i]);
|
| + }
|
| + for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {
|
| + indexes[i]=-1;
|
| + }
|
| + inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].
|
| +
|
| + // Get the total length of the data.
|
| + int32_t size;
|
| + if(indexesLength>IX_TOTAL_SIZE) {
|
| + size=indexes[IX_TOTAL_SIZE];
|
| + } else if(indexesLength>IX_REORDER_CODES_OFFSET) {
|
| + size=indexes[indexesLength-1];
|
| + } else {
|
| + size=indexesLength*4;
|
| + }
|
| + if(length<0) { return size; }
|
| +
|
| + if(length<size) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "
|
| + "(%d after header) for collation data\n",
|
| + length);
|
| + errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + // Copy the data for inaccessible bytes and arrays of bytes.
|
| + if(inBytes!=outBytes) {
|
| + uprv_memcpy(outBytes, inBytes, size);
|
| + }
|
| +
|
| + // Swap the int32_t indexes[].
|
| + ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);
|
| +
|
| + // The following is a modified version of CollationDataReader::read().
|
| + // Here we use indexes[] not inIndexes[] because
|
| + // the inIndexes[] may not be in this machine's endianness.
|
| + int32_t index; // one of the indexes[] slots
|
| + int32_t offset; // byte offset for the index part
|
| + // int32_t length; // number of bytes in the index part
|
| +
|
| + index = IX_REORDER_CODES_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + // Skip the IX_REORDER_TABLE_OFFSET byte array.
|
| +
|
| + index = IX_TRIE_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_RESERVED8_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + index = IX_CES_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_RESERVED10_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + index = IX_CE32S_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_ROOT_ELEMENTS_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_CONTEXTS_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_UNSAFE_BWD_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_FAST_LATIN_TABLE_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + index = IX_SCRIPTS_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);
|
| + }
|
| +
|
| + // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.
|
| +
|
| + index = IX_RESERVED18_OFFSET;
|
| + offset = indexes[index];
|
| + length = indexes[index + 1] - offset;
|
| + if(length > 0) {
|
| + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);
|
| + errorCode = U_UNSUPPORTED_ERROR;
|
| + return 0;
|
| + }
|
| +
|
| + return size;
|
| +}
|
| +
|
| +} // namespace
|
| +
|
| /* swap ICU collation data like ucadata.icu */
|
| U_CAPI int32_t U_EXPORT2
|
| ucol_swap(const UDataSwapper *ds,
|
| const void *inData, int32_t length, void *outData,
|
| UErrorCode *pErrorCode) {
|
| -
|
| - const UDataInfo *pInfo;
|
| - int32_t headerSize, collationSize;
|
| + if(U_FAILURE(*pErrorCode)) { return 0; }
|
|
|
| /* udata_swapDataHeader checks the arguments */
|
| - headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
| - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
|
| - return 0;
|
| + int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
|
| + if(U_FAILURE(*pErrorCode)) {
|
| + // Try to swap the old format version which did not have a standard data header.
|
| + *pErrorCode=U_ZERO_ERROR;
|
| + return swapFormatVersion3(ds, inData, length, outData, pErrorCode);
|
| }
|
|
|
| /* check data format and format version */
|
| - pInfo=(const UDataInfo *)((const char *)inData+4);
|
| + const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);
|
| if(!(
|
| - pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */
|
| - pInfo->dataFormat[1]==0x43 &&
|
| - pInfo->dataFormat[2]==0x6f &&
|
| - pInfo->dataFormat[3]==0x6c &&
|
| - pInfo->formatVersion[0]==3 /*&&
|
| - pInfo->formatVersion[1]>=0*/
|
| + info.dataFormat[0]==0x55 && // dataFormat="UCol"
|
| + info.dataFormat[1]==0x43 &&
|
| + info.dataFormat[2]==0x6f &&
|
| + info.dataFormat[3]==0x6c &&
|
| + (info.formatVersion[0]==3 || info.formatVersion[0]==4)
|
| )) {
|
| - udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",
|
| - pInfo->dataFormat[0], pInfo->dataFormat[1],
|
| - pInfo->dataFormat[2], pInfo->dataFormat[3],
|
| - pInfo->formatVersion[0], pInfo->formatVersion[1]);
|
| + udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "
|
| + "(format version %02x.%02x) is not recognized as collation data\n",
|
| + info.dataFormat[0], info.dataFormat[1],
|
| + info.dataFormat[2], info.dataFormat[3],
|
| + info.formatVersion[0], info.formatVersion[1]);
|
| *pErrorCode=U_UNSUPPORTED_ERROR;
|
| return 0;
|
| }
|
|
|
| - collationSize=ucol_swapBinary(ds,
|
| - (const char *)inData+headerSize,
|
| - length>=0 ? length-headerSize : -1,
|
| - (char *)outData+headerSize,
|
| - pErrorCode);
|
| + inData=(const char *)inData+headerSize;
|
| + if(length>=0) { length-=headerSize; }
|
| + outData=(char *)outData+headerSize;
|
| + int32_t collationSize;
|
| + if(info.formatVersion[0]>=4) {
|
| + collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);
|
| + } else {
|
| + collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);
|
| + }
|
| if(U_SUCCESS(*pErrorCode)) {
|
| return headerSize+collationSize;
|
| } else {
|
|
|