source/common/ucol_swp.cpp - Issue 845603002: Update ICU to 54.1 step 1

Unified Diff: source/common/ucol_swp.cpp

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/common/ucol_swp.cpp

diff --git a/source/common/ucol_swp.cpp b/source/common/ucol_swp.cpp

index b33268c8fef578c319634fb9c18d4869d90d36a2..0625dd35a8aa1a2d8aa0d442e302a57d774a961c 100644

--- a/source/common/ucol_swp.cpp

+++ b/source/common/ucol_swp.cpp

@@ -1,7 +1,7 @@

*******************************************************************************

@@ -18,6 +18,7 @@

#include "unicode/udata.h" /* UDataInfo */

#include "utrie.h"

+#include "utrie2.h"

#include "udataswp.h"

#include "cmemory.h"

#include "ucol_data.h"

@@ -102,18 +103,28 @@ utrie_swap(const UDataSwapper *ds,

#if !UCONFIG_NO_COLLATION

-/* Modified copy of the beginning of ucol_swapBinary(). */

U_CAPI UBool U_EXPORT2

ucol_looksLikeCollationBinary(const UDataSwapper *ds,

const void *inData, int32_t length) {

- const UCATableHeader *inHeader;

- UCATableHeader header;

if(ds==NULL || inData==NULL || length<-1) {

return FALSE;

}

- inHeader=(const UCATableHeader *)inData;

+ // First check for format version 4+ which has a standard data header.

+ UErrorCode errorCode=U_ZERO_ERROR;

+ (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode);

+ if(U_SUCCESS(errorCode)) {

+ const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);

+ if(info.dataFormat[0]==0x55 && // dataFormat="UCol"

+ info.dataFormat[1]==0x43 &&

+ info.dataFormat[2]==0x6f &&

+ info.dataFormat[3]==0x6c) {

+ return TRUE;

+ }

+ // Else check for format version 3.

+ const UCATableHeader *inHeader=(const UCATableHeader *)inData;

* The collation binary must contain at least the UCATableHeader,

@@ -121,6 +132,7 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,

* sizeof(UCATableHeader)==42*4 in ICU 2.8

* check the length against the header size before reading the size field

+ UCATableHeader header;

uprv_memset(&header, 0, sizeof(header));

if(length<0) {

header.size=udata_readInt32(ds, inHeader->size);

@@ -144,11 +156,13 @@ ucol_looksLikeCollationBinary(const UDataSwapper *ds,

return TRUE;

}

-/* swap a header-less collation binary, inside a resource bundle or ucadata.icu */

-U_CAPI int32_t U_EXPORT2

-ucol_swapBinary(const UDataSwapper *ds,

- const void *inData, int32_t length, void *outData,

- UErrorCode *pErrorCode) {

+namespace {

+/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */

+int32_t

+swapFormatVersion3(const UDataSwapper *ds,

+ const void *inData, int32_t length, void *outData,

+ UErrorCode *pErrorCode) {

const uint8_t *inBytes;

uint8_t *outBytes;

@@ -159,7 +173,7 @@ ucol_swapBinary(const UDataSwapper *ds,

uint32_t count;

/* argument checking in case we were not called from ucol_swap() */

- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

+ if(U_FAILURE(*pErrorCode)) {

return 0;

}

if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {

@@ -183,7 +197,7 @@ ucol_swapBinary(const UDataSwapper *ds,

if(length<0) {

header.size=udata_readInt32(ds, inHeader->size);

} else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) {

- udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",

+ udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n",

length);

*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

return 0;

@@ -195,7 +209,7 @@ ucol_swapBinary(const UDataSwapper *ds,

inHeader->formatVersion[0]==3 /*&&

inHeader->formatVersion[1]>=0*/

)) {

- udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",

+ udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n",

header.magic,

inHeader->formatVersion[0], inHeader->formatVersion[1]);

*pErrorCode=U_UNSUPPORTED_ERROR;

@@ -203,7 +217,7 @@ ucol_swapBinary(const UDataSwapper *ds,

}

if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) {

- udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d does not match the swapper\n",

+ udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n",

inHeader->isBigEndian, inHeader->charSetFamily);

*pErrorCode=U_INVALID_FORMAT_ERROR;

return 0;

@@ -293,7 +307,6 @@ ucol_swapBinary(const UDataSwapper *ds,

* if UCAConsts!=0 then contractionUCACombos because we are swapping

* the UCA data file, and we know that the UCA contains contractions

- count=header.contractionUCACombos-header.UCAConsts;

ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts,

outBytes+header.UCAConsts, pErrorCode);

}

@@ -327,44 +340,252 @@ ucol_swapBinary(const UDataSwapper *ds,

return header.size;

}

+// swap formatVersion 4 ---------------------------------------------------- ***

+// The following are copied from CollationDataReader, trading an awkward copy of constants

+// for an awkward relocation of the i18n collationdatareader.h file into the common library.

+// Keep them in sync!

+enum {

+ IX_INDEXES_LENGTH, // 0

+ IX_OPTIONS,

+ IX_RESERVED2,

+ IX_RESERVED3,

+ IX_JAMO_CE32S_START, // 4

+ IX_REORDER_CODES_OFFSET,

+ IX_REORDER_TABLE_OFFSET,

+ IX_TRIE_OFFSET,

+ IX_RESERVED8_OFFSET, // 8

+ IX_CES_OFFSET,

+ IX_RESERVED10_OFFSET,

+ IX_CE32S_OFFSET,

+ IX_ROOT_ELEMENTS_OFFSET, // 12

+ IX_CONTEXTS_OFFSET,

+ IX_UNSAFE_BWD_OFFSET,

+ IX_FAST_LATIN_TABLE_OFFSET,

+ IX_SCRIPTS_OFFSET, // 16

+ IX_COMPRESSIBLE_BYTES_OFFSET,

+ IX_RESERVED18_OFFSET,

+ IX_TOTAL_SIZE

+};

+int32_t

+swapFormatVersion4(const UDataSwapper *ds,

+ const void *inData, int32_t length, void *outData,

+ UErrorCode &errorCode) {

+ if(U_FAILURE(errorCode)) { return 0; }

+ const uint8_t *inBytes=(const uint8_t *)inData;

+ uint8_t *outBytes=(uint8_t *)outData;

+ const int32_t *inIndexes=(const int32_t *)inBytes;

+ int32_t indexes[IX_TOTAL_SIZE+1];

+ // Need at least IX_INDEXES_LENGTH and IX_OPTIONS.

+ if(0<=length && length<8) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "

+ "(%d after header) for collation data\n",

+ length);

+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]);

+ if(0<=length && length<(indexesLength*4)) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "

+ "(%d after header) for collation data\n",

+ length);

+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) {

+ indexes[i]=udata_readInt32(ds, inIndexes[i]);

+ }

+ for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) {

+ indexes[i]=-1;

+ }

+ inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[].

+ // Get the total length of the data.

+ int32_t size;

+ if(indexesLength>IX_TOTAL_SIZE) {

+ size=indexes[IX_TOTAL_SIZE];

+ } else if(indexesLength>IX_REORDER_CODES_OFFSET) {

+ size=indexes[indexesLength-1];

+ } else {

+ size=indexesLength*4;

+ }

+ if(length<0) { return size; }

+ if(length<size) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes "

+ "(%d after header) for collation data\n",

+ length);

+ errorCode=U_INDEX_OUTOFBOUNDS_ERROR;

+ return 0;

+ }

+ // Copy the data for inaccessible bytes and arrays of bytes.

+ if(inBytes!=outBytes) {

+ uprv_memcpy(outBytes, inBytes, size);

+ }

+ // Swap the int32_t indexes[].

+ ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode);

+ // The following is a modified version of CollationDataReader::read().

+ // Here we use indexes[] not inIndexes[] because

+ // the inIndexes[] may not be in this machine's endianness.

+ int32_t index; // one of the indexes[] slots

+ int32_t offset; // byte offset for the index part

+ // int32_t length; // number of bytes in the index part

+ index = IX_REORDER_CODES_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ // Skip the IX_REORDER_TABLE_OFFSET byte array.

+ index = IX_TRIE_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_RESERVED8_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length);

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ index = IX_CES_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_RESERVED10_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length);

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ index = IX_CE32S_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_ROOT_ELEMENTS_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_CONTEXTS_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_UNSAFE_BWD_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_FAST_LATIN_TABLE_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ index = IX_SCRIPTS_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode);

+ }

+ // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array.

+ index = IX_RESERVED18_OFFSET;

+ offset = indexes[index];

+ length = indexes[index + 1] - offset;

+ if(length > 0) {

+ udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length);

+ errorCode = U_UNSUPPORTED_ERROR;

+ return 0;

+ }

+ return size;

+} // namespace

/* swap ICU collation data like ucadata.icu */

U_CAPI int32_t U_EXPORT2

ucol_swap(const UDataSwapper *ds,

const void *inData, int32_t length, void *outData,

UErrorCode *pErrorCode) {

- const UDataInfo *pInfo;

- int32_t headerSize, collationSize;

+ if(U_FAILURE(*pErrorCode)) { return 0; }

/* udata_swapDataHeader checks the arguments */

- headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

- if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {

- return 0;

+ int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

+ if(U_FAILURE(*pErrorCode)) {

+ // Try to swap the old format version which did not have a standard data header.

+ *pErrorCode=U_ZERO_ERROR;

+ return swapFormatVersion3(ds, inData, length, outData, pErrorCode);

}

/* check data format and format version */

- pInfo=(const UDataInfo *)((const char *)inData+4);

+ const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4);

if(!(

- pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */

- pInfo->dataFormat[1]==0x43 &&

- pInfo->dataFormat[2]==0x6f &&

- pInfo->dataFormat[3]==0x6c &&

- pInfo->formatVersion[0]==3 /*&&

- pInfo->formatVersion[1]>=0*/

+ info.dataFormat[0]==0x55 && // dataFormat="UCol"

+ info.dataFormat[1]==0x43 &&

+ info.dataFormat[2]==0x6f &&

+ info.dataFormat[3]==0x6c &&

+ (info.formatVersion[0]==3 || info.formatVersion[0]==4)

)) {

- udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a collation file\n",

- pInfo->dataFormat[0], pInfo->dataFormat[1],

- pInfo->dataFormat[2], pInfo->dataFormat[3],

- pInfo->formatVersion[0], pInfo->formatVersion[1]);

+ udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x "

+ "(format version %02x.%02x) is not recognized as collation data\n",

+ info.dataFormat[0], info.dataFormat[1],

+ info.dataFormat[2], info.dataFormat[3],

+ info.formatVersion[0], info.formatVersion[1]);

*pErrorCode=U_UNSUPPORTED_ERROR;

return 0;

}

- collationSize=ucol_swapBinary(ds,

- (const char *)inData+headerSize,

- length>=0 ? length-headerSize : -1,

- (char *)outData+headerSize,

- pErrorCode);

+ inData=(const char *)inData+headerSize;

+ if(length>=0) { length-=headerSize; }

+ outData=(char *)outData+headerSize;

+ int32_t collationSize;

+ if(info.formatVersion[0]>=4) {

+ collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode);

+ } else {

+ collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode);

+ }

if(U_SUCCESS(*pErrorCode)) {

return headerSize+collationSize;

} else {

« no previous file with comments | « source/common/ucol_swp.h ('k') | source/common/udata.cpp » ('j') | no next file with comments »