icu46/source/common/ucol_swp.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/common/ucol_swp.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2003-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: ucol_swp.c

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created on: 2003sep10

	14 * created by: Markus W. Scherer

	15 *

	16 * Swap collation binaries.

	17 */

	18

	19 #include "unicode/udata.h" /* UDataInfo */

	20 #include "utrie.h"

	21 #include "udataswp.h"

	22 #include "cmemory.h"

	23 #include "ucol_imp.h"

	24 #include "ucol_swp.h"

	25

	26 /* swapping ----------------------------------------------------------------- */

	27

	28 /*

	29 * This performs data swapping for a folded trie (see utrie.c for details).

	30 */

	31

	32 U_CAPI int32_t U_EXPORT2

	33 utrie_swap(const UDataSwapper *ds,

	34 const void inData, int32_t length, void outData,

	35 UErrorCode *pErrorCode) {

	36 const UTrieHeader *inTrie;

	37 UTrieHeader trie;

	38 int32_t size;

	39 UBool dataIs32;

	40

	41 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	42 return 0;

	43 }

	44 if(ds==NULL \|\| inData==NULL \|\| (length>=0 && outData==NULL)) {

	45 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	46 return 0;

	47 }

	48

	49 /* setup and swapping */

	50 if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {

	51 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	52 return 0;

	53 }

	54

	55 inTrie=(const UTrieHeader *)inData;

	56 trie.signature=ds->readUInt32(inTrie->signature);

	57 trie.options=ds->readUInt32(inTrie->options);

	58 trie.indexLength=udata_readInt32(ds, inTrie->indexLength);

	59 trie.dataLength=udata_readInt32(ds, inTrie->dataLength);

	60

	61 if( trie.signature!=0x54726965 \|\|

	62 (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT \|\|

	63 ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UT RIE_INDEX_SHIFT \|\|

	64 trie.indexLength<UTRIE_BMP_INDEX_LENGTH \|\|

	65 (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 \|\|

	66 trie.dataLength<UTRIE_DATA_BLOCK_LENGTH \|\|

	67 (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 \|\|

	68 ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UT RIE_DATA_BLOCK_LENGTH+0x100))

	69 ) {

	70 pErrorCode=U_INVALID_FORMAT_ERROR; / not a UTrie */

	71 return 0;

	72 }

	73

	74 dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0);

	75 size=sizeof(UTrieHeader)+trie.indexLength2+trie.dataLength(dataIs32?4:2);

	76

	77 if(length>=0) {

	78 UTrieHeader *outTrie;

	79

	80 if(length<size) {

	81 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	82 return 0;

	83 }

	84

	85 outTrie=(UTrieHeader *)outData;

	86

	87 /* swap the header */

	88 ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);

	89

	90 /* swap the index and the data */

	91 if(dataIs32) {

	92 ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorC ode);

	93 ds->swapArray32(ds, (const uint16_t )(inTrie+1)+trie.indexLength, t rie.dataLength4,

	94 (uint16_t *)(outTrie+1)+trie.indexLength, p ErrorCode);

	95 } else {

	96 ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);

	97 }

	98 }

	99

	100 return size;

	101 }

	102

	103 #if !UCONFIG_NO_COLLATION

	104

	105 /* Modified copy of the beginning of ucol_swapBinary(). */

	106 U_CAPI UBool U_EXPORT2

	107 ucol_looksLikeCollationBinary(const UDataSwapper *ds,

	108 const void *inData, int32_t length) {

	109 const uint8_t *inBytes;

	110 const UCATableHeader *inHeader;

	111 UCATableHeader header;

	112

	113 if(ds==NULL \|\| inData==NULL \|\| length<-1) {

	114 return FALSE;

	115 }

	116

	117 inBytes=(const uint8_t *)inData;

	118 inHeader=(const UCATableHeader *)inData;

	119

	120 /*

	121 * The collation binary must contain at least the UCATableHeader,

	122 * starting with its size field.

	123 * sizeof(UCATableHeader)==42*4 in ICU 2.8

	124 * check the length against the header size before reading the size field

	125 */

	126 uprv_memset(&header, 0, sizeof(header));

	127 if(length<0) {

	128 header.size=udata_readInt32(ds, inHeader->size);

	129 } else if((length<(42*4) \|\| length<(header.size=udata_readInt32(ds, inHeader ->size)))) {

	130 return FALSE;

	131 }

	132

	133 header.magic=ds->readUInt32(inHeader->magic);

	134 if(!(

	135 header.magic==UCOL_HEADER_MAGIC &&

	136 inHeader->formatVersion[0]==3 /*&&

	137 inHeader->formatVersion[1]>=0*/

	138 )) {

	139 return FALSE;

	140 }

	141

	142 if(inHeader->isBigEndian!=ds->inIsBigEndian \|\| inHeader->charSetFamily!=ds-> inCharset) {

	143 return FALSE;

	144 }

	145

	146 return TRUE;

	147 }

	148

	149 /* swap a header-less collation binary, inside a resource bundle or ucadata.icu */

	150 U_CAPI int32_t U_EXPORT2

	151 ucol_swapBinary(const UDataSwapper *ds,

	152 const void inData, int32_t length, void outData,

	153 UErrorCode *pErrorCode) {

	154 const uint8_t *inBytes;

	155 uint8_t *outBytes;

	156

	157 const UCATableHeader *inHeader;

	158 UCATableHeader *outHeader;

	159 UCATableHeader header;

	160

	161 uint32_t count;

	162

	163 /* argument checking in case we were not called from ucol_swap() */

	164 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	165 return 0;

	166 }

	167 if(ds==NULL \|\| inData==NULL \|\| length<-1 \|\| (length>0 && outData==NULL)) {

	168 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;

	169 return 0;

	170 }

	171

	172 inBytes=(const uint8_t *)inData;

	173 outBytes=(uint8_t *)outData;

	174

	175 inHeader=(const UCATableHeader *)inData;

	176 outHeader=(UCATableHeader *)outData;

	177

	178 /*

	179 * The collation binary must contain at least the UCATableHeader,

	180 * starting with its size field.

	181 * sizeof(UCATableHeader)==42*4 in ICU 2.8

	182 * check the length against the header size before reading the size field

	183 */

	184 uprv_memset(&header, 0, sizeof(header));

	185 if(length<0) {

	186 header.size=udata_readInt32(ds, inHeader->size);

	187 } else if((length<(42*4) \|\| length<(header.size=udata_readInt32(ds, inHeader ->size)))) {

	188 udata_printError(ds, "ucol_swapBinary(): too few bytes (%d after header) for collation data\n",

	189 length);

	190 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	191 return 0;

	192 }

	193

	194 header.magic=ds->readUInt32(inHeader->magic);

	195 if(!(

	196 header.magic==UCOL_HEADER_MAGIC &&

	197 inHeader->formatVersion[0]==3 /*&&

	198 inHeader->formatVersion[1]>=0*/

	199 )) {

	200 udata_printError(ds, "ucol_swapBinary(): magic 0x%08x or format version %02x.%02x is not a collation binary\n",

	201 header.magic,

	202 inHeader->formatVersion[0], inHeader->formatVersion[1]) ;

	203 *pErrorCode=U_UNSUPPORTED_ERROR;

	204 return 0;

	205 }

	206

	207 if(inHeader->isBigEndian!=ds->inIsBigEndian \|\| inHeader->charSetFamily!=ds-> inCharset) {

	208 udata_printError(ds, "ucol_swapBinary(): endianness %d or charset %d doe s not match the swapper\n",

	209 inHeader->isBigEndian, inHeader->charSetFamily);

	210 *pErrorCode=U_INVALID_FORMAT_ERROR;

	211 return 0;

	212 }

	213

	214 if(length>=0) {

	215 /* copy everything, takes care of data that needs no swapping */

	216 if(inBytes!=outBytes) {

	217 uprv_memcpy(outBytes, inBytes, header.size);

	218 }

	219

	220 /* swap the necessary pieces in the order of their occurrence in the dat a */

	221

	222 /* read more of the UCATableHeader (the size field was read above) */

	223 header.options= ds->readUInt32(inHeader->options);

	224 header.UCAConsts= ds->readUInt32(inHeader->UCAConsts);

	225 header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCAC ombos);

	226 header.mappingPosition= ds->readUInt32(inHeader->mappingPosition );

	227 header.expansion= ds->readUInt32(inHeader->expansion);

	228 header.contractionIndex= ds->readUInt32(inHeader->contractionInde x);

	229 header.contractionCEs= ds->readUInt32(inHeader->contractionCEs) ;

	230 header.contractionSize= ds->readUInt32(inHeader->contractionSize );

	231 header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE) ;

	232 header.expansionCESize= ds->readUInt32(inHeader->expansionCESize );

	233 header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansi onCECount);

	234 header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractio nUCACombosSize);

	235 header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByt e);

	236 header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScrip t);

	237

	238 /* swap the 32-bit integers in the header */

	239 ds->swapArray32(ds, inHeader, (int32_t)((const char )&inHeader->jamoSpe cial-(const char )inHeader),

	240 outHeader, pErrorCode);

	241 ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptT oLeadByte) + sizeof(header.leadByteToScript),

	242 &(outHeader->scriptToLeadByte), pErrorCode);

	243 /* set the output platform properties */

	244 outHeader->isBigEndian=ds->outIsBigEndian;

	245 outHeader->charSetFamily=ds->outCharset;

	246

	247 /* swap the options */

	248 if(header.options!=0) {

	249 ds->swapArray32(ds, inBytes+header.options, header.expansion-header. options,

	250 outBytes+header.options, pErrorCode);

	251 }

	252

	253 /* swap the expansions */

	254 if(header.mappingPosition!=0 && header.expansion!=0) {

	255 if(header.contractionIndex!=0) {

	256 /* expansions bounded by contractions */

	257 count=header.contractionIndex-header.expansion;

	258 } else {

	259 /* no contractions: expansions bounded by the main trie */

	260 count=header.mappingPosition-header.expansion;

	261 }

	262 ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count,

	263 outBytes+header.expansion, pErrorCode);

	264 }

	265

	266 /* swap the contractions */

	267 if(header.contractionSize!=0) {

	268 /* contractionIndex: UChar[] */

	269 ds->swapArray16(ds, inBytes+header.contractionIndex, header.contract ionSize*2,

	270 outBytes+header.contractionIndex, pErrorCode);

	271

	272 /* contractionCEs: CEs[] */

	273 ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractio nSize*4,

	274 outBytes+header.contractionCEs, pErrorCode);

	275 }

	276

	277 /* swap the main trie */

	278 if(header.mappingPosition!=0) {

	279 count=header.endExpansionCE-header.mappingPosition;

	280 utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count,

	281 outBytes+header.mappingPosition, pErrorCode);

	282 }

	283

	284 /* swap the max expansion table */

	285 if(header.endExpansionCECount!=0) {

	286 ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansi onCECount*4,

	287 outBytes+header.endExpansionCE, pErrorCode);

	288 }

	289

	290 /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */

	291

	292 /* swap UCA constants */

	293 if(header.UCAConsts!=0) {

	294 /*

	295 * if UCAConsts!=0 then contractionUCACombos because we are swapping

	296 * the UCA data file, and we know that the UCA contains contractions

	297 */

	298 count=header.contractionUCACombos-header.UCAConsts;

	299 ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCAC ombos-header.UCAConsts,

	300 outBytes+header.UCAConsts, pErrorCode);

	301 }

	302

	303 /* swap UCA contractions */

	304 if(header.contractionUCACombosSize!=0) {

	305 count=header.contractionUCACombosSizeinHeader->contractionUCACombos WidthU_SIZEOF_UCHAR;

	306 ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)co unt,

	307 outBytes+header.contractionUCACombos, pErrorCode) ;

	308 }

	309

	310 /* swap the script to lead bytes */

	311 if(header.scriptToLeadByte!=0) {

	312 int indexCount = ds->readUInt16(((uint16_t)(inBytes+header.scriptT oLeadByte))); // each entry = 2 * uint16

	313 int dataCount = ds->readUInt16(((uint16_t)(inBytes+header.scriptTo LeadByte + 2))); // each entry = uint16

	314 ds->swapArray16(ds, inBytes+header.scriptToLeadByte,

	315 4 + (4 * indexCount) + (2 * dataCount),

	316 outBytes+header.scriptToLeadByte, pErrorCode);

	317 }

	318

	319 /* swap the lead byte to scripts */

	320 if(header.leadByteToScript!=0) {

	321 int indexCount = ds->readUInt16(((uint16_t)(inBytes+header.leadByt eToScript))); // each entry = uint16

	322 int dataCount = ds->readUInt16(((uint16_t)(inBytes+header.leadByte ToScript + 2))); // each entry = uint16

	323 ds->swapArray16(ds, inBytes+header.leadByteToScript,

	324 4 + (2 * indexCount) + (2 * dataCount),

	325 outBytes+header.leadByteToScript, pErrorCode);

	326 }

	327 }

	328

	329 return header.size;

	330 }

	331

	332 /* swap ICU collation data like ucadata.icu */

	333 U_CAPI int32_t U_EXPORT2

	334 ucol_swap(const UDataSwapper *ds,

	335 const void inData, int32_t length, void outData,

	336 UErrorCode *pErrorCode) {

	337

	338 const UDataInfo *pInfo;

	339 int32_t headerSize, collationSize;

	340

	341 /* udata_swapDataHeader checks the arguments */

	342 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

	343 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	344 return 0;

	345 }

	346

	347 /* check data format and format version */

	348 pInfo=(const UDataInfo )((const char )inData+4);

	349 if(!(

	350 pInfo->dataFormat[0]==0x55 && /* dataFormat="UCol" */

	351 pInfo->dataFormat[1]==0x43 &&

	352 pInfo->dataFormat[2]==0x6f &&

	353 pInfo->dataFormat[3]==0x6c &&

	354 pInfo->formatVersion[0]==3 /*&&

	355 pInfo->formatVersion[1]>=0*/

	356 )) {

	357 udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x (form at version %02x.%02x) is not a collation file\n",

	358 pInfo->dataFormat[0], pInfo->dataFormat[1],

	359 pInfo->dataFormat[2], pInfo->dataFormat[3],

	360 pInfo->formatVersion[0], pInfo->formatVersion[1]);

	361 *pErrorCode=U_UNSUPPORTED_ERROR;

	362 return 0;

	363 }

	364

	365 collationSize=ucol_swapBinary(ds,

	366 (const char *)inData+headerSize,

	367 length>=0 ? length-headerSize : -1,

	368 (char *)outData+headerSize,

	369 pErrorCode);

	370 if(U_SUCCESS(*pErrorCode)) {

	371 return headerSize+collationSize;

	372 } else {

	373 return 0;

	374 }

	375 }

	376

	377 /* swap inverse UCA collation data (invuca.icu) */

	378 U_CAPI int32_t U_EXPORT2

	379 ucol_swapInverseUCA(const UDataSwapper *ds,

	380 const void inData, int32_t length, void outData,

	381 UErrorCode *pErrorCode) {

	382 const UDataInfo *pInfo;

	383 int32_t headerSize;

	384

	385 const uint8_t *inBytes;

	386 uint8_t *outBytes;

	387

	388 const InverseUCATableHeader *inHeader;

	389 InverseUCATableHeader *outHeader;

	390 InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} };

	391

	392 /* udata_swapDataHeader checks the arguments */

	393 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);

	394 if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {

	395 return 0;

	396 }

	397

	398 /* check data format and format version */

	399 pInfo=(const UDataInfo )((const char )inData+4);

	400 if(!(

	401 pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */

	402 pInfo->dataFormat[1]==0x6e &&

	403 pInfo->dataFormat[2]==0x76 &&

	404 pInfo->dataFormat[3]==0x43 &&

	405 pInfo->formatVersion[0]==2 &&

	406 pInfo->formatVersion[1]>=1

	407 )) {

	408 udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x. %02x (format version %02x.%02x) is not an inverse UCA collation file\n",

	409 pInfo->dataFormat[0], pInfo->dataFormat[1],

	410 pInfo->dataFormat[2], pInfo->dataFormat[3],

	411 pInfo->formatVersion[0], pInfo->formatVersion[1]);

	412 *pErrorCode=U_UNSUPPORTED_ERROR;

	413 return 0;

	414 }

	415

	416 inBytes=(const uint8_t *)inData+headerSize;

	417 outBytes=(uint8_t *)outData+headerSize;

	418

	419 inHeader=(const InverseUCATableHeader *)inBytes;

	420 outHeader=(InverseUCATableHeader *)outBytes;

	421

	422 /*

	423 * The inverse UCA collation binary must contain at least the InverseUCATabl eHeader,

	424 * starting with its size field.

	425 * sizeof(UCATableHeader)==8*4 in ICU 2.8

	426 * check the length against the header size before reading the size field

	427 */

	428 if(length<0) {

	429 header.byteSize=udata_readInt32(ds, inHeader->byteSize);

	430 } else if(

	431 ((length-headerSize)<(8*4) \|\|

	432 (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHe ader->byteSize)))

	433 ) {

	434 udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after hea der) for inverse UCA collation data\n",

	435 length);

	436 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;

	437 return 0;

	438 }

	439

	440 if(length>=0) {

	441 /* copy everything, takes care of data that needs no swapping */

	442 if(inBytes!=outBytes) {

	443 uprv_memcpy(outBytes, inBytes, header.byteSize);

	444 }

	445

	446 /* swap the necessary pieces in the order of their occurrence in the dat a */

	447

	448 /* read more of the InverseUCATableHeader (the byteSize field was read a bove) */

	449 header.tableSize= ds->readUInt32(inHeader->tableSize);

	450 header.contsSize= ds->readUInt32(inHeader->contsSize);

	451 header.table= ds->readUInt32(inHeader->table);

	452 header.conts= ds->readUInt32(inHeader->conts);

	453

	454 /* swap the 32-bit integers in the header */

	455 ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode);

	456

	457 /* swap the inverse table; tableSize counts uint32_t[3] rows */

	458 ds->swapArray32(ds, inBytes+header.table, header.tableSize34,

	459 outBytes+header.table, pErrorCode);

	460

	461 /* swap the continuation table; contsSize counts UChars */

	462 ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHA R,

	463 outBytes+header.conts, pErrorCode);

	464 }

	465

	466 return headerSize+header.byteSize;

	467 }

	468

	469 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « icu46/source/common/ucol_swp.h ('k') | icu46/source/common/udata.cpp » ('j') | no next file with comments »