source/i18n/collationdatareader.cpp - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/i18n/collationdatareader.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 * Copyright (C) 2013-2014, International Business Machines	3 * Copyright (C) 2013-2015, International Business Machines

4 * Corporation and others. All Rights Reserved.	4 * Corporation and others. All Rights Reserved.

5 *******************************************************************************	5 *******************************************************************************

6 * collationdatareader.cpp	6 * collationdatareader.cpp

7 *	7 *

8 * created on: 2013feb07	8 * created on: 2013feb07

9 * created by: Markus W. Scherer	9 * created by: Markus W. Scherer

10 */	10 */

11	11

12 #include "unicode/utypes.h"	12 #include "unicode/utypes.h"

13	13

14 #if !UCONFIG_NO_COLLATION	14 #if !UCONFIG_NO_COLLATION

15	15

16 #include "unicode/ucol.h"	16 #include "unicode/ucol.h"

17 #include "unicode/udata.h"	17 #include "unicode/udata.h"

18 #include "unicode/uscript.h"	18 #include "unicode/uscript.h"

19 #include "cmemory.h"	19 #include "cmemory.h"

20 #include "collation.h"	20 #include "collation.h"

21 #include "collationdata.h"	21 #include "collationdata.h"

22 #include "collationdatareader.h"	22 #include "collationdatareader.h"

23 #include "collationfastlatin.h"	23 #include "collationfastlatin.h"

24 #include "collationkeys.h"	24 #include "collationkeys.h"

25 #include "collationrootelements.h"	25 #include "collationrootelements.h"

26 #include "collationsettings.h"	26 #include "collationsettings.h"

27 #include "collationtailoring.h"	27 #include "collationtailoring.h"

	28 #include "collunsafe.h"

28 #include "normalizer2impl.h"	29 #include "normalizer2impl.h"

29 #include "uassert.h"	30 #include "uassert.h"

30 #include "ucmndata.h"	31 #include "ucmndata.h"

31 #include "utrie2.h"	32 #include "utrie2.h"

32	33

33 U_NAMESPACE_BEGIN	34 U_NAMESPACE_BEGIN

34	35

35 namespace {	36 namespace {

36	37

37 int32_t getIndex(const int32_t *indexes, int32_t length, int32_t i) {	38 int32_t getIndex(const int32_t *indexes, int32_t length, int32_t i) {

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
95 length = 0; // only indexes, and inLength was already checked for them	96 length = 0; // only indexes, and inLength was already checked for them

96 }	97 }

97 if(0 <= inLength && inLength < length) {	98 if(0 <= inLength && inLength < length) {

98 errorCode = U_INVALID_FORMAT_ERROR;	99 errorCode = U_INVALID_FORMAT_ERROR;

99 return;	100 return;

100 }	101 }

101	102

102 const CollationData *baseData = base == NULL ? NULL : base->data;	103 const CollationData *baseData = base == NULL ? NULL : base->data;

103 const int32_t *reorderCodes = NULL;	104 const int32_t *reorderCodes = NULL;

104 int32_t reorderCodesLength = 0;	105 int32_t reorderCodesLength = 0;

	106 const uint32_t *reorderRanges = NULL;

	107 int32_t reorderRangesLength = 0;

105 index = IX_REORDER_CODES_OFFSET;	108 index = IX_REORDER_CODES_OFFSET;

106 offset = getIndex(inIndexes, indexesLength, index);	109 offset = getIndex(inIndexes, indexesLength, index);

107 length = getIndex(inIndexes, indexesLength, index + 1) - offset;	110 length = getIndex(inIndexes, indexesLength, index + 1) - offset;

108 if(length >= 4) {	111 if(length >= 4) {

109 if(baseData == NULL) {	112 if(baseData == NULL) {

110 // We assume for collation settings that	113 // We assume for collation settings that

111 // the base data does not have a reordering.	114 // the base data does not have a reordering.

112 errorCode = U_INVALID_FORMAT_ERROR;	115 errorCode = U_INVALID_FORMAT_ERROR;

113 return;	116 return;

114 }	117 }

115 reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);	118 reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);

116 reorderCodesLength = length / 4;	119 reorderCodesLength = length / 4;

	120

	121 // The reorderRanges (if any) are the trailing reorderCodes entries.

	122 // Split the array at the boundary.

	123 // Script or reorder codes do not exceed 16-bit values.

	124 // Range limits are stored in the upper 16 bits, and are never 0.

	125 while(reorderRangesLength < reorderCodesLength &&

	126 (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0x ffff0000) != 0) {

	127 ++reorderRangesLength;

	128 }

	129 U_ASSERT(reorderRangesLength < reorderCodesLength);

	130 if(reorderRangesLength != 0) {

	131 reorderCodesLength -= reorderRangesLength;

	132 reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + re orderCodesLength);

	133 }

117 }	134 }

118	135

119 // There should be a reorder table only if there are reorder codes.	136 // There should be a reorder table only if there are reorder codes.

120 // However, when there are reorder codes the reorder table may be omitted to reduce	137 // However, when there are reorder codes the reorder table may be omitted to reduce

121 // the data size.	138 // the data size.

122 const uint8_t *reorderTable = NULL;	139 const uint8_t *reorderTable = NULL;

123 index = IX_REORDER_TABLE_OFFSET;	140 index = IX_REORDER_TABLE_OFFSET;

124 offset = getIndex(inIndexes, indexesLength, index);	141 offset = getIndex(inIndexes, indexesLength, index);

125 length = getIndex(inIndexes, indexesLength, index + 1) - offset;	142 length = getIndex(inIndexes, indexesLength, index + 1) - offset;

126 if(length >= 256) {	143 if(length >= 256) {

(...skipping 112 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239	256

240 index = IX_UNSAFE_BWD_OFFSET;	257 index = IX_UNSAFE_BWD_OFFSET;

241 offset = getIndex(inIndexes, indexesLength, index);	258 offset = getIndex(inIndexes, indexesLength, index);

242 length = getIndex(inIndexes, indexesLength, index + 1) - offset;	259 length = getIndex(inIndexes, indexesLength, index + 1) - offset;

243 if(length >= 2) {	260 if(length >= 2) {

244 if(data == NULL) {	261 if(data == NULL) {

245 errorCode = U_INVALID_FORMAT_ERROR;	262 errorCode = U_INVALID_FORMAT_ERROR;

246 return;	263 return;

247 }	264 }

248 if(baseData == NULL) {	265 if(baseData == NULL) {

	266 #if defined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE)

	267 tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, un safe_serializedCount, UnicodeSet::kSerialized, errorCode);

	268 if(tailoring.unsafeBackwardSet == NULL) {

	269 errorCode = U_MEMORY_ALLOCATION_ERROR;

	270 return;

	271 } else if (U_FAILURE(errorCode)) {

	272 return;

	273 }

	274 #else

249 // Create the unsafe-backward set for the root collator.	275 // Create the unsafe-backward set for the root collator.

250 // Include all non-zero combining marks and trail surrogates.	276 // Include all non-zero combining marks and trail surrogates.

251 // We do this at load time, rather than at build time,	277 // We do this at load time, rather than at build time,

252 // to simplify Unicode version bootstrapping:	278 // to simplify Unicode version bootstrapping:

253 // The root data builder only needs the new FractionalUCA.txt data,	279 // The root data builder only needs the new FractionalUCA.txt data,

254 // but it need not be built with a version of ICU already updated to	280 // but it need not be built with a version of ICU already updated to

255 // the corresponding new Unicode Character Database.	281 // the corresponding new Unicode Character Database.

256 //	282 //

257 // The following is an optimized version of	283 // The following is an optimized version of

258 // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]").	284 // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]").

259 // It is faster and requires fewer code dependencies.	285 // It is faster and requires fewer code dependencies.

260 tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff); // tr ail surrogates	286 tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff); // tr ail surrogates

261 if(tailoring.unsafeBackwardSet == NULL) {	287 if(tailoring.unsafeBackwardSet == NULL) {

262 errorCode = U_MEMORY_ALLOCATION_ERROR;	288 errorCode = U_MEMORY_ALLOCATION_ERROR;

263 return;	289 return;

264 }	290 }

265 data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);	291 data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);

	292 #endif // !COLLUNSAFE_SERIALIZE \|\| !COLLUNSAFE_COLL_VERSION

266 } else {	293 } else {

267 // Clone the root collator's set contents.	294 // Clone the root collator's set contents.

268 tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(	295 tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(

269 baseData->unsafeBackwardSet->cloneAsThawed());	296 baseData->unsafeBackwardSet->cloneAsThawed());

270 if(tailoring.unsafeBackwardSet == NULL) {	297 if(tailoring.unsafeBackwardSet == NULL) {

271 errorCode = U_MEMORY_ALLOCATION_ERROR;	298 errorCode = U_MEMORY_ALLOCATION_ERROR;

272 return;	299 return;

273 }	300 }

274 }	301 }

275 // Add the ranges from the data file to the unsafe-backward set.	302 // Add the ranges from the data file to the unsafe-backward set.

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
330 }	357 }

331	358

332 index = IX_SCRIPTS_OFFSET;	359 index = IX_SCRIPTS_OFFSET;

333 offset = getIndex(inIndexes, indexesLength, index);	360 offset = getIndex(inIndexes, indexesLength, index);

334 length = getIndex(inIndexes, indexesLength, index + 1) - offset;	361 length = getIndex(inIndexes, indexesLength, index + 1) - offset;

335 if(length >= 2) {	362 if(length >= 2) {

336 if(data == NULL) {	363 if(data == NULL) {

337 errorCode = U_INVALID_FORMAT_ERROR;	364 errorCode = U_INVALID_FORMAT_ERROR;

338 return;	365 return;

339 }	366 }

340 data->scripts = reinterpret_cast<const uint16_t *>(inBytes + offset);	367 const uint16_t scripts = reinterpret_cast<const uint16_t >(inBytes + o ffset);

341 data->scriptsLength = length / 2;	368 int32_t scriptsLength = length / 2;

	369 data->numScripts = scripts[0];

	370 // There must be enough entries for both arrays, including more than two range starts.

	371 data->scriptStartsLength = scriptsLength - (1 + data->numScripts + 16);

	372 if(data->scriptStartsLength <= 2 \|\|

	373 CollationData::MAX_NUM_SCRIPT_RANGES < data->scriptStartsLength) {

	374 errorCode = U_INVALID_FORMAT_ERROR;

	375 return;

	376 }

	377 data->scriptsIndex = scripts + 1;

	378 data->scriptStarts = scripts + 1 + data->numScripts + 16;

	379 if(!(data->scriptStarts[0] == 0 &&

	380 data->scriptStarts[1] == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8) &&

	381 data->scriptStarts[data->scriptStartsLength - 1] ==

	382 (Collation::TRAIL_WEIGHT_BYTE << 8))) {

	383 errorCode = U_INVALID_FORMAT_ERROR;

	384 return;

	385 }

342 } else if(data == NULL) {	386 } else if(data == NULL) {

343 // Nothing to do.	387 // Nothing to do.

344 } else if(baseData != NULL) {	388 } else if(baseData != NULL) {

345 data->scripts = baseData->scripts;	389 data->numScripts = baseData->numScripts;

346 data->scriptsLength = baseData->scriptsLength;	390 data->scriptsIndex = baseData->scriptsIndex;

	391 data->scriptStarts = baseData->scriptStarts;

	392 data->scriptStartsLength = baseData->scriptStartsLength;

347 }	393 }

348	394

349 index = IX_COMPRESSIBLE_BYTES_OFFSET;	395 index = IX_COMPRESSIBLE_BYTES_OFFSET;

350 offset = getIndex(inIndexes, indexesLength, index);	396 offset = getIndex(inIndexes, indexesLength, index);

351 length = getIndex(inIndexes, indexesLength, index + 1) - offset;	397 length = getIndex(inIndexes, indexesLength, index + 1) - offset;

352 if(length >= 256) {	398 if(length >= 256) {

353 if(data == NULL) {	399 if(data == NULL) {

354 errorCode = U_INVALID_FORMAT_ERROR;	400 errorCode = U_INVALID_FORMAT_ERROR;

355 return;	401 return;

356 }	402 }

(...skipping 29 matching lines...) Expand all Loading...
386 }	432 }

387 settings->options = options;	433 settings->options = options;

388 // Set variableTop from options and scripts data.	434 // Set variableTop from options and scripts data.

389 settings->variableTop = tailoring.data->getLastPrimaryForGroup(	435 settings->variableTop = tailoring.data->getLastPrimaryForGroup(

390 UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());	436 UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());

391 if(settings->variableTop == 0) {	437 if(settings->variableTop == 0) {

392 errorCode = U_INVALID_FORMAT_ERROR;	438 errorCode = U_INVALID_FORMAT_ERROR;

393 return;	439 return;

394 }	440 }

395	441

396 if(reorderCodesLength == 0 \|\| reorderTable != NULL) {	442 if(reorderCodesLength != 0) {

397 settings->aliasReordering(reorderCodes, reorderCodesLength, reorderTable );	443 settings->aliasReordering(*baseData, reorderCodes, reorderCodesLength,

398 } else {	444 reorderRanges, reorderRangesLength,

399 uint8_t table[256];	445 reorderTable, errorCode);

400 baseData->makeReorderTable(reorderCodes, reorderCodesLength, table, erro rCode);

401 if(U_FAILURE(errorCode)) { return; }

402 if(!settings->setReordering(reorderCodes, reorderCodesLength,table)) {

403 errorCode = U_MEMORY_ALLOCATION_ERROR;

404 return;

405 }

406 }	446 }

407	447

408 settings->fastLatinOptions = CollationFastLatin::getOptions(	448 settings->fastLatinOptions = CollationFastLatin::getOptions(

409 tailoring.data, *settings,	449 tailoring.data, *settings,

410 settings->fastLatinPrimaries, UPRV_LENGTHOF(settings->fastLatinPrimaries ));	450 settings->fastLatinPrimaries, UPRV_LENGTHOF(settings->fastLatinPrimaries ));

411 }	451 }

412	452

413 UBool U_CALLCONV	453 UBool U_CALLCONV

414 CollationDataReader::isAcceptable(void *context,	454 CollationDataReader::isAcceptable(void *context,

415 const char * /* type /, const char /name/ ,	455 const char * /* type /, const char /name/ ,

416 const UDataInfo *pInfo) {	456 const UDataInfo *pInfo) {

417 if(	457 if(

418 pInfo->size >= 20 &&	458 pInfo->size >= 20 &&

419 pInfo->isBigEndian == U_IS_BIG_ENDIAN &&	459 pInfo->isBigEndian == U_IS_BIG_ENDIAN &&

420 pInfo->charsetFamily == U_CHARSET_FAMILY &&	460 pInfo->charsetFamily == U_CHARSET_FAMILY &&

421 pInfo->dataFormat[0] == 0x55 && // dataFormat="UCol"	461 pInfo->dataFormat[0] == 0x55 && // dataFormat="UCol"

422 pInfo->dataFormat[1] == 0x43 &&	462 pInfo->dataFormat[1] == 0x43 &&

423 pInfo->dataFormat[2] == 0x6f &&	463 pInfo->dataFormat[2] == 0x6f &&

424 pInfo->dataFormat[3] == 0x6c &&	464 pInfo->dataFormat[3] == 0x6c &&

425 pInfo->formatVersion[0] == 4	465 pInfo->formatVersion[0] == 5

426 ) {	466 ) {

427 UVersionInfo version = static_cast<UVersionInfo >(context);	467 UVersionInfo version = static_cast<UVersionInfo >(context);

428 if(version != NULL) {	468 if(version != NULL) {

429 uprv_memcpy(version, pInfo->dataVersion, 4);	469 uprv_memcpy(version, pInfo->dataVersion, 4);

430 }	470 }

431 return TRUE;	471 return TRUE;

432 } else {	472 } else {

433 return FALSE;	473 return FALSE;

434 }	474 }

435 }	475 }

436	476

437 U_NAMESPACE_END	477 U_NAMESPACE_END

438	478

439 #endif // !UCONFIG_NO_COLLATION	479 #endif // !UCONFIG_NO_COLLATION

OLD	NEW

« no previous file with comments | « source/i18n/collationdatareader.h ('k') | source/i18n/collationdatawriter.cpp » ('j') | no next file with comments »