icu46/source/i18n/ucol.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/ucol.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 * Copyright (C) 1996-2010, International Business Machines

	4 * Corporation and others. All Rights Reserved.

	5 *******************************************************************************

	6 * file name: ucol.cpp

	7 * encoding: US-ASCII

	8 * tab size: 8 (not used)

	9 * indentation:4

	10 *

	11 * Modification history

	12 * Date Name Comments

	13 * 1996-1999 various members of ICU team maintained C API for collation framewo rk

	14 * 02/16/2001 synwee Added internal method getPrevSpecialCE

	15 * 03/01/2001 synwee Added maxexpansion functionality.

	16 * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compl iant

	17 */

	18

	19 #include "unicode/utypes.h"

	20

	21 #if !UCONFIG_NO_COLLATION

	22

	23 #include "unicode/coleitr.h"

	24 #include "unicode/unorm.h"

	25 #include "unicode/udata.h"

	26 #include "unicode/ustring.h"

	27

	28 #include "ucol_imp.h"

	29 #include "bocsu.h"

	30

	31 #include "normalizer2impl.h"

	32 #include "unorm_it.h"

	33 #include "umutex.h"

	34 #include "cmemory.h"

	35 #include "ucln_in.h"

	36 #include "cstring.h"

	37 #include "utracimp.h"

	38 #include "putilimp.h"

	39 #include "uassert.h"

	40

	41 #ifdef UCOL_DEBUG

	42 #include <stdio.h>

	43 #endif

	44

	45 U_NAMESPACE_USE

	46

	47 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

	48

	49 #define LAST_BYTE_MASK_ 0xFF

	50 #define SECOND_LAST_BYTE_SHIFT_ 8

	51

	52 #define ZERO_CC_LIMIT_ 0xC0

	53

	54 // this is static pointer to the normalizer fcdTrieIndex

	55 // it is always the same between calls to u_cleanup

	56 // and therefore writing to it is not synchronized.

	57 // It is cleaned in ucol_cleanup

	58 static const uint16_t *fcdTrieIndex=NULL;

	59 // Code points at fcdHighStart and above have a zero FCD value.

	60 static UChar32 fcdHighStart = 0;

	61

	62 // These are values from UCA required for

	63 // implicit generation and supressing sort key compression

	64 // they should regularly be in the UCA, but if one

	65 // is running without UCA, it could be a problem

	66 static const int32_t maxRegularPrimary = 0x7A;

	67 static const int32_t minImplicitPrimary = 0xE0;

	68 static const int32_t maxImplicitPrimary = 0xE4;

	69

	70 U_CDECL_BEGIN

	71 static UBool U_CALLCONV

	72 ucol_cleanup(void)

	73 {

	74 fcdTrieIndex = NULL;

	75 return TRUE;

	76 }

	77

	78 static int32_t U_CALLCONV

	79 _getFoldingOffset(uint32_t data) {

	80 return (int32_t)(data&0xFFFFFF);

	81 }

	82

	83 U_CDECL_END

	84

	85 // init FCD data

	86 static inline

	87 UBool initializeFCD(UErrorCode *status) {

	88 if (fcdTrieIndex != NULL) {

	89 return TRUE;

	90 } else {

	91 // The result is constant, until the library is reloaded.

	92 fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);

	93 ucln_i18n_registerCleanup(UCLN_I18N_UCOL, ucol_cleanup);

	94 return U_SUCCESS(*status);

	95 }

	96 }

	97

	98 static

	99 inline void IInit_collIterate(const UCollator collator, const UChar sourceStri ng,

	100 int32_t sourceLen, collIterate *s,

	101 UErrorCode *status)

	102 {

	103 (s)->string = (s)->pos = sourceString;

	104 (s)->origFlags = 0;

	105 (s)->flags = 0;

	106 if (sourceLen >= 0) {

	107 s->flags \|= UCOL_ITER_HASLEN;

	108 (s)->endp = (UChar *)sourceString+sourceLen;

	109 }

	110 else {

	111 /* change to enable easier checking for end of string for fcdpositon */

	112 (s)->endp = NULL;

	113 }

	114 (s)->extendCEs = NULL;

	115 (s)->extendCEsSize = 0;

	116 (s)->CEpos = (s)->toReturn = (s)->CEs;

	117 (s)->offsetBuffer = NULL;

	118 (s)->offsetBufferSize = 0;

	119 (s)->offsetReturn = (s)->offsetStore = NULL;

	120 (s)->offsetRepeatCount = (s)->offsetRepeatValue = 0;

	121 (s)->coll = (collator);

	122 (s)->nfd = Normalizer2Factory::getNFDInstance(*status);

	123 (s)->fcdPosition = 0;

	124 if(collator->normalizationMode == UCOL_ON) {

	125 (s)->flags \|= UCOL_ITER_NORM;

	126 }

	127 if(collator->hiraganaQ == UCOL_ON && collator->strength >= UCOL_QUATERNARY) {

	128 (s)->flags \|= UCOL_HIRAGANA_Q;

	129 }

	130 (s)->iterator = NULL;

	131 //(s)->iteratorIndex = 0;

	132 }

	133

	134 U_CAPI void U_EXPORT2

	135 uprv_init_collIterate(const UCollator collator, const UChar sourceString,

	136 int32_t sourceLen, collIterate *s,

	137 UErrorCode *status) {

	138 /* Out-of-line version for use from other files. */

	139 IInit_collIterate(collator, sourceString, sourceLen, s, status);

	140 }

	141

	142 U_CAPI collIterate * U_EXPORT2

	143 uprv_new_collIterate(UErrorCode *status) {

	144 if(U_FAILURE(*status)) {

	145 return NULL;

	146 }

	147 collIterate *s = new collIterate;

	148 if(s == NULL) {

	149 *status = U_MEMORY_ALLOCATION_ERROR;

	150 return NULL;

	151 }

	152 return s;

	153 }

	154

	155 U_CAPI void U_EXPORT2

	156 uprv_delete_collIterate(collIterate *s) {

	157 delete s;

	158 }

	159

	160 U_CAPI UBool U_EXPORT2

	161 uprv_collIterateAtEnd(collIterate *s) {

	162 return s == NULL \|\| s->pos == s->endp;

	163 }

	164

	165 /**

	166 * Backup the state of the collIterate struct data

	167 * @param data collIterate to backup

	168 * @param backup storage

	169 */

	170 static

	171 inline void backupState(const collIterate data, collIterateState backup)

	172 {

	173 backup->fcdPosition = data->fcdPosition;

	174 backup->flags = data->flags;

	175 backup->origFlags = data->origFlags;

	176 backup->pos = data->pos;

	177 backup->bufferaddress = data->writableBuffer.getBuffer();

	178 backup->buffersize = data->writableBuffer.length();

	179 backup->iteratorMove = 0;

	180 backup->iteratorIndex = 0;

	181 if(data->iterator != NULL) {

	182 //backup->iteratorIndex = data->iterator->getIndex(data->iterator, UITER _CURRENT);

	183 backup->iteratorIndex = data->iterator->getState(data->iterator);

	184 // no we try to fixup if we're using a normalizing iterator and we get U ITER_NO_STATE

	185 if(backup->iteratorIndex == UITER_NO_STATE) {

	186 while((backup->iteratorIndex = data->iterator->getState(data->iterat or)) == UITER_NO_STATE) {

	187 backup->iteratorMove++;

	188 data->iterator->move(data->iterator, -1, UITER_CURRENT);

	189 }

	190 data->iterator->move(data->iterator, backup->iteratorMove, UITER_CUR RENT);

	191 }

	192 }

	193 }

	194

	195 /**

	196 * Loads the state into the collIterate struct data

	197 * @param data collIterate to backup

	198 * @param backup storage

	199 * @param forwards boolean to indicate if forwards iteration is used,

	200 * false indicates backwards iteration

	201 */

	202 static

	203 inline void loadState(collIterate data, const collIterateState backup,

	204 UBool forwards)

	205 {

	206 UErrorCode status = U_ZERO_ERROR;

	207 data->flags = backup->flags;

	208 data->origFlags = backup->origFlags;

	209 if(data->iterator != NULL) {

	210 //data->iterator->move(data->iterator, backup->iteratorIndex, UITER_ZERO );

	211 data->iterator->setState(data->iterator, backup->iteratorIndex, &status) ;

	212 if(backup->iteratorMove != 0) {

	213 data->iterator->move(data->iterator, backup->iteratorMove, UITER_CUR RENT);

	214 }

	215 }

	216 data->pos = backup->pos;

	217

	218 if ((data->flags & UCOL_ITER_INNORMBUF) &&

	219 data->writableBuffer.getBuffer() != backup->bufferaddress) {

	220 /*

	221 this is when a new buffer has been reallocated and we'll have to

	222 calculate the new position.

	223 note the new buffer has to contain the contents of the old buffer.

	224 */

	225 if (forwards) {

	226 data->pos = data->writableBuffer.getTerminatedBuffer() +

	227 (data->pos - backup->bufferaddress);

	228 }

	229 else {

	230 /* backwards direction */

	231 int32_t temp = backup->buffersize -

	232 (int32_t)(data->pos - backup->bufferaddress);

	233 data->pos = data->writableBuffer.getTerminatedBuffer() + (data->writ ableBuffer.length() - temp);

	234 }

	235 }

	236 if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {

	237 /*

	238 this is alittle tricky.

	239 if we are initially not in the normalization buffer, even if we

	240 normalize in the later stage, the data in the buffer will be

	241 ignored, since we skip back up to the data string.

	242 however if we are already in the normalization buffer, any

	243 further normalization will pull data into the normalization

	244 buffer and modify the fcdPosition.

	245 since we are keeping the data in the buffer for use, the

	246 fcdPosition can not be reverted back.

	247 arrgghh....

	248 */

	249 data->fcdPosition = backup->fcdPosition;

	250 }

	251 }

	252

	253 static UBool

	254 reallocCEs(collIterate *data, int32_t newCapacity) {

	255 uint32_t *oldCEs = data->extendCEs;

	256 if(oldCEs == NULL) {

	257 oldCEs = data->CEs;

	258 }

	259 int32_t length = data->CEpos - oldCEs;

	260 uint32_t newCEs = (uint32_t )uprv_malloc(newCapacity * 4);

	261 if(newCEs == NULL) {

	262 return FALSE;

	263 }

	264 uprv_memcpy(newCEs, oldCEs, length * 4);

	265 uprv_free(data->extendCEs);

	266 data->extendCEs = newCEs;

	267 data->extendCEsSize = newCapacity;

	268 data->CEpos = newCEs + length;

	269 return TRUE;

	270 }

	271

	272 static UBool

	273 increaseCEsCapacity(collIterate *data) {

	274 int32_t oldCapacity;

	275 if(data->extendCEs != NULL) {

	276 oldCapacity = data->extendCEsSize;

	277 } else {

	278 oldCapacity = LENGTHOF(data->CEs);

	279 }

	280 return reallocCEs(data, 2 * oldCapacity);

	281 }

	282

	283 static UBool

	284 ensureCEsCapacity(collIterate *data, int32_t minCapacity) {

	285 int32_t oldCapacity;

	286 if(data->extendCEs != NULL) {

	287 oldCapacity = data->extendCEsSize;

	288 } else {

	289 oldCapacity = LENGTHOF(data->CEs);

	290 }

	291 if(minCapacity <= oldCapacity) {

	292 return TRUE;

	293 }

	294 oldCapacity *= 2;

	295 return reallocCEs(data, minCapacity > oldCapacity ? minCapacity : oldCapacit y);

	296 }

	297

	298 void collIterate::appendOffset(int32_t offset, UErrorCode &errorCode) {

	299 if(U_FAILURE(errorCode)) {

	300 return;

	301 }

	302 int32_t length = offsetStore == NULL ? 0 : (int32_t)(offsetStore - offsetBuf fer);

	303 if(length >= offsetBufferSize) {

	304 int32_t newCapacity = 2 * offsetBufferSize + UCOL_EXPAND_CE_BUFFER_SIZE;

	305 int32_t newBuffer = reinterpret_cast<int32_t >(uprv_malloc(newCapacity * 4));

	306 if(newBuffer == NULL) {

	307 errorCode = U_MEMORY_ALLOCATION_ERROR;

	308 return;

	309 }

	310 if(length > 0) {

	311 uprv_memcpy(newBuffer, offsetBuffer, length * 4);

	312 }

	313 uprv_free(offsetBuffer);

	314 offsetBuffer = newBuffer;

	315 offsetStore = offsetBuffer + length;

	316 offsetBufferSize = newCapacity;

	317 }

	318 *offsetStore++ = offset;

	319 }

	320

	321 /*

	322 * collIter_eos()

	323 * Checks for a collIterate being positioned at the end of

	324 * its source string.

	325 *

	326 */

	327 static

	328 inline UBool collIter_eos(collIterate *s) {

	329 if(s->flags & UCOL_USE_ITERATOR) {

	330 return !(s->iterator->hasNext(s->iterator));

	331 }

	332 if ((s->flags & UCOL_ITER_HASLEN) == 0 && *s->pos != 0) {

	333 // Null terminated string, but not at null, so not at end.

	334 // Whether in main or normalization buffer doesn't matter.

	335 return FALSE;

	336 }

	337

	338 // String with length. Can't be in normalization buffer, which is always

	339 // null termintated.

	340 if (s->flags & UCOL_ITER_HASLEN) {

	341 return (s->pos == s->endp);

	342 }

	343

	344 // We are at a null termination, could be either normalization buffer or mai n string.

	345 if ((s->flags & UCOL_ITER_INNORMBUF) == 0) {

	346 // At null at end of main string.

	347 return TRUE;

	348 }

	349

	350 // At null at end of normalization buffer. Need to check whether there ther e are

	351 // any characters left in the main buffer.

	352 if(s->origFlags & UCOL_USE_ITERATOR) {

	353 return !(s->iterator->hasNext(s->iterator));

	354 } else if ((s->origFlags & UCOL_ITER_HASLEN) == 0) {

	355 // Null terminated main string. fcdPosition is the 'return' position in to main buf.

	356 return (*s->fcdPosition == 0);

	357 }

	358 else {

	359 // Main string with an end pointer.

	360 return s->fcdPosition == s->endp;

	361 }

	362 }

	363

	364 /*

	365 * collIter_bos()

	366 * Checks for a collIterate being positioned at the start of

	367 * its source string.

	368 *

	369 */

	370 static

	371 inline UBool collIter_bos(collIterate *source) {

	372 // if we're going backwards, we need to know whether there is more in the

	373 // iterator, even if we are in the side buffer

	374 if(source->flags & UCOL_USE_ITERATOR \|\| source->origFlags & UCOL_USE_ITERATOR) {

	375 return !source->iterator->hasPrevious(source->iterator);

	376 }

	377 if (source->pos <= source->string \|\|

	378 ((source->flags & UCOL_ITER_INNORMBUF) &&

	379 *(source->pos - 1) == 0 && source->fcdPosition == NULL)) {

	380 return TRUE;

	381 }

	382 return FALSE;

	383 }

	384

	385 /*static

	386 inline UBool collIter_SimpleBos(collIterate *source) {

	387 // if we're going backwards, we need to know whether there is more in the

	388 // iterator, even if we are in the side buffer

	389 if(source->flags & UCOL_USE_ITERATOR \|\| source->origFlags & UCOL_USE_ITERATOR) {

	390 return !source->iterator->hasPrevious(source->iterator);

	391 }

	392 if (source->pos == source->string) {

	393 return TRUE;

	394 }

	395 return FALSE;

	396 }*/

	397 //return (data->pos == data->string) \|\|

	398

	399

	400 /****************************************************************************/

	401 /* Following are the open/close functions */

	402 /* */

	403 /****************************************************************************/

	404

	405 static UCollator*

	406 ucol_initFromBinary(const uint8_t *bin, int32_t length,

	407 const UCollator *base,

	408 UCollator *fillIn,

	409 UErrorCode *status)

	410 {

	411 UCollator *result = fillIn;

	412 if(U_FAILURE(*status)) {

	413 return NULL;

	414 }

	415 /*

	416 if(base == NULL) {

	417 // we don't support null base yet

	418 *status = U_ILLEGAL_ARGUMENT_ERROR;

	419 return NULL;

	420 }

	421 */

	422 // We need these and we could be running without UCA

	423 uprv_uca_initImplicitConstants(status);

	424 UCATableHeader colData = (UCATableHeader )bin;

	425 // do we want version check here? We're trying to figure out whether collato rs are compatible

	426 if((base && (uprv_memcmp(colData->UCAVersion, base->image->UCAVersion, sizeo f(UVersionInfo)) != 0 \|\|

	427 uprv_memcmp(colData->UCDVersion, base->image->UCDVersion, sizeof(UVersio nInfo)) != 0)) \|\|

	428 colData->version[0] != UCOL_BUILDER_VERSION)

	429 {

	430 *status = U_COLLATOR_VERSION_MISMATCH;

	431 return NULL;

	432 }

	433 else {

	434 if((uint32_t)length > (paddedsize(sizeof(UCATableHeader)) + paddedsize(s izeof(UColOptionSet)))) {

	435 result = ucol_initCollator((const UCATableHeader *)bin, result, base , status);

	436 if(U_FAILURE(*status)){

	437 return NULL;

	438 }

	439 result->hasRealData = TRUE;

	440 }

	441 else {

	442 if(base) {

	443 result = ucol_initCollator(base->image, result, base, status);

	444 ucol_setOptionsFromHeader(result, (UColOptionSet )(bin+((const UCATableHeader )bin)->options), status);

	445 if(U_FAILURE(*status)){

	446 return NULL;

	447 }

	448 result->hasRealData = FALSE;

	449 }

	450 else {

	451 *status = U_USELESS_COLLATOR_ERROR;

	452 return NULL;

	453 }

	454 }

	455 result->freeImageOnClose = FALSE;

	456 }

	457 result->actualLocale = NULL;

	458 result->validLocale = NULL;

	459 result->requestedLocale = NULL;

	460 result->rules = NULL;

	461 result->rulesLength = 0;

	462 result->freeRulesOnClose = FALSE;

	463 result->ucaRules = NULL;

	464 return result;

	465 }

	466

	467 U_CAPI UCollator* U_EXPORT2

	468 ucol_openBinary(const uint8_t *bin, int32_t length,

	469 const UCollator *base,

	470 UErrorCode *status)

	471 {

	472 return ucol_initFromBinary(bin, length, base, NULL, status);

	473 }

	474

	475 U_CAPI int32_t U_EXPORT2

	476 ucol_cloneBinary(const UCollator *coll,

	477 uint8_t *buffer, int32_t capacity,

	478 UErrorCode *status)

	479 {

	480 int32_t length = 0;

	481 if(U_FAILURE(*status)) {

	482 return length;

	483 }

	484 if(capacity < 0) {

	485 *status = U_ILLEGAL_ARGUMENT_ERROR;

	486 return length;

	487 }

	488 if(coll->hasRealData == TRUE) {

	489 length = coll->image->size;

	490 if(length <= capacity) {

	491 uprv_memcpy(buffer, coll->image, length);

	492 } else {

	493 *status = U_BUFFER_OVERFLOW_ERROR;

	494 }

	495 } else {

	496 length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof( UColOptionSet)));

	497 if(length <= capacity) {

	498 /* build the UCATableHeader with minimal entries */

	499 /* do not copy the header from the UCA file because its values are w rong! */

	500 /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */

	501

	502 /* reset everything */

	503 uprv_memset(buffer, 0, length);

	504

	505 /* set the tailoring-specific values */

	506 UCATableHeader myData = (UCATableHeader )buffer;

	507 myData->size = length;

	508

	509 /* offset for the options, the only part of the data that is present after the header */

	510 myData->options = sizeof(UCATableHeader);

	511

	512 /* need to always set the expansion value for an upper bound of the options */

	513 myData->expansion = myData->options + sizeof(UColOptionSet);

	514

	515 myData->magic = UCOL_HEADER_MAGIC;

	516 myData->isBigEndian = U_IS_BIG_ENDIAN;

	517 myData->charSetFamily = U_CHARSET_FAMILY;

	518

	519 /* copy UCA's version; genrb will override all but the builder versi on with tailoring data */

	520 uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionIn fo));

	521

	522 uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVer sionInfo));

	523 uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVer sionInfo));

	524 uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeo f(UVersionInfo));

	525 myData->jamoSpecial = coll->image->jamoSpecial;

	526

	527 /* copy the collator options */

	528 uprv_memcpy(buffer+paddedsize(sizeof(UCATableHeader)), coll->options , sizeof(UColOptionSet));

	529 } else {

	530 *status = U_BUFFER_OVERFLOW_ERROR;

	531 }

	532 }

	533 return length;

	534 }

	535

	536 U_CAPI UCollator* U_EXPORT2

	537 ucol_safeClone(const UCollator coll, void stackBuffer, int32_t * pBufferSize, UErrorCode *status)

	538 {

	539 UCollator * localCollator;

	540 int32_t bufferSizeNeeded = (int32_t)sizeof(UCollator);

	541 char stackBufferChars = (char )stackBuffer;

	542 int32_t imageSize = 0;

	543 int32_t rulesSize = 0;

	544 int32_t rulesPadding = 0;

	545 uint8_t *image;

	546 UChar *rules;

	547 UBool colAllocated = FALSE;

	548 UBool imageAllocated = FALSE;

	549

	550 if (status == NULL \|\| U_FAILURE(*status)){

	551 return 0;

	552 }

	553 if ((stackBuffer && !pBufferSize) \|\| !coll){

	554 *status = U_ILLEGAL_ARGUMENT_ERROR;

	555 return 0;

	556 }

	557 if (coll->rules && coll->freeRulesOnClose) {

	558 rulesSize = (int32_t)(coll->rulesLength + 1)*sizeof(UChar);

	559 rulesPadding = (int32_t)(bufferSizeNeeded % sizeof(UChar));

	560 bufferSizeNeeded += rulesSize + rulesPadding;

	561 }

	562

	563 if (stackBuffer && pBufferSize <= 0){ / 'preflighting' request - set neede d size into pBufferSize /

	564 *pBufferSize = bufferSizeNeeded;

	565 return 0;

	566 }

	567

	568 /* Pointers on 64-bit platforms need to be aligned

	569 * on a 64-bit boundry in memory.

	570 */

	571 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {

	572 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);

	573 if (*pBufferSize > offsetUp) {

	574 *pBufferSize -= offsetUp;

	575 stackBufferChars += offsetUp;

	576 }

	577 else {

	578 /* prevent using the stack buffer but keep the size > 0 so that we d o not just preflight */

	579 *pBufferSize = 1;

	580 }

	581 }

	582 stackBuffer = (void *)stackBufferChars;

	583

	584 if (stackBuffer == NULL \|\| *pBufferSize < bufferSizeNeeded) {

	585 /* allocate one here...*/

	586 stackBufferChars = (char *)uprv_malloc(bufferSizeNeeded);

	587 // Null pointer check.

	588 if (stackBufferChars == NULL) {

	589 *status = U_MEMORY_ALLOCATION_ERROR;

	590 return NULL;

	591 }

	592 colAllocated = TRUE;

	593 if (U_SUCCESS(*status)) {

	594 *status = U_SAFECLONE_ALLOCATED_WARNING;

	595 }

	596 }

	597 localCollator = (UCollator *)stackBufferChars;

	598 rules = (UChar *)(stackBufferChars + sizeof(UCollator) + rulesPadding);

	599 {

	600 UErrorCode tempStatus = U_ZERO_ERROR;

	601 imageSize = ucol_cloneBinary(coll, NULL, 0, &tempStatus);

	602 }

	603 if (coll->freeImageOnClose) {

	604 image = (uint8_t *)uprv_malloc(imageSize);

	605 // Null pointer check

	606 if (image == NULL) {

	607 *status = U_MEMORY_ALLOCATION_ERROR;

	608 return NULL;

	609 }

	610 ucol_cloneBinary(coll, image, imageSize, status);

	611 imageAllocated = TRUE;

	612 }

	613 else {

	614 image = (uint8_t *)coll->image;

	615 }

	616 localCollator = ucol_initFromBinary(image, imageSize, coll->UCA, localCollat or, status);

	617 if (U_FAILURE(*status)) {

	618 return NULL;

	619 }

	620

	621 if (coll->rules) {

	622 if (coll->freeRulesOnClose) {

	623 localCollator->rules = u_strcpy(rules, coll->rules);

	624 //bufferEnd += rulesSize;

	625 }

	626 else {

	627 localCollator->rules = coll->rules;

	628 }

	629 localCollator->freeRulesOnClose = FALSE;

	630 localCollator->rulesLength = coll->rulesLength;

	631 }

	632

	633 int32_t i;

	634 for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {

	635 ucol_setAttribute(localCollator, (UColAttribute)i, ucol_getAttribute(col l, (UColAttribute)i, status), status);

	636 }

	637 // zero copies of pointers

	638 localCollator->actualLocale = NULL;

	639 localCollator->validLocale = NULL;

	640 localCollator->requestedLocale = NULL;

	641 localCollator->ucaRules = coll->ucaRules; // There should only be one copy h ere.

	642 localCollator->freeOnClose = colAllocated;

	643 localCollator->freeImageOnClose = imageAllocated;

	644 return localCollator;

	645 }

	646

	647 U_CAPI void U_EXPORT2

	648 ucol_close(UCollator *coll)

	649 {

	650 UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);

	651 UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);

	652 if(coll != NULL) {

	653 // these are always owned by each UCollator struct,

	654 // so we always free them

	655 if(coll->validLocale != NULL) {

	656 uprv_free(coll->validLocale);

	657 }

	658 if(coll->actualLocale != NULL) {

	659 uprv_free(coll->actualLocale);

	660 }

	661 if(coll->requestedLocale != NULL) {

	662 uprv_free(coll->requestedLocale);

	663 }

	664 if(coll->latinOneCEs != NULL) {

	665 uprv_free(coll->latinOneCEs);

	666 }

	667 if(coll->options != NULL && coll->freeOptionsOnClose) {

	668 uprv_free(coll->options);

	669 }

	670 if(coll->rules != NULL && coll->freeRulesOnClose) {

	671 uprv_free((UChar *)coll->rules);

	672 }

	673 if(coll->image != NULL && coll->freeImageOnClose) {

	674 uprv_free((UCATableHeader *)coll->image);

	675 }

	676 if(coll->leadBytePermutationTable != NULL) {

	677 uprv_free(coll->leadBytePermutationTable);

	678 }

	679 if(coll->reorderCodes != NULL) {

	680 uprv_free(coll->reorderCodes);

	681 }

	682

	683 /* Here, it would be advisable to close: */

	684 /* - UData for UCA (unless we stuff it in the root resb */

	685 /* Again, do we need additional housekeeping... HMMM! */

	686 UTRACE_DATA1(UTRACE_INFO, "coll->freeOnClose: %d", coll->freeOnClose);

	687 if(coll->freeOnClose){

	688 /* for safeClone, if freeOnClose is FALSE,

	689 don't free the other instance data */

	690 uprv_free(coll);

	691 }

	692 }

	693 UTRACE_EXIT();

	694 }

	695

	696 /* This one is currently used by genrb & tests. After constructing from rules (t ailoring),*/

	697 /* you should be able to get the binary chunk to write out... Doesn't look very full now */

	698 U_CFUNC uint8_t* U_EXPORT2

	699 ucol_cloneRuleData(const UCollator coll, int32_t length, UErrorCode *status)

	700 {

	701 uint8_t *result = NULL;

	702 if(U_FAILURE(*status)) {

	703 return NULL;

	704 }

	705 if(coll->hasRealData == TRUE) {

	706 *length = coll->image->size;

	707 result = (uint8_t )uprv_malloc(length);

	708 /* test for NULL */

	709 if (result == NULL) {

	710 *status = U_MEMORY_ALLOCATION_ERROR;

	711 return NULL;

	712 }

	713 uprv_memcpy(result, coll->image, *length);

	714 } else {

	715 *length = (int32_t)(paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof (UColOptionSet)));

	716 result = (uint8_t )uprv_malloc(length);

	717 /* test for NULL */

	718 if (result == NULL) {

	719 *status = U_MEMORY_ALLOCATION_ERROR;

	720 return NULL;

	721 }

	722

	723 /* build the UCATableHeader with minimal entries */

	724 /* do not copy the header from the UCA file because its values are wrong ! */

	725 /* uprv_memcpy(result, UCA->image, sizeof(UCATableHeader)); */

	726

	727 /* reset everything */

	728 uprv_memset(result, 0, *length);

	729

	730 /* set the tailoring-specific values */

	731 UCATableHeader myData = (UCATableHeader )result;

	732 myData->size = *length;

	733

	734 /* offset for the options, the only part of the data that is present aft er the header */

	735 myData->options = sizeof(UCATableHeader);

	736

	737 /* need to always set the expansion value for an upper bound of the opti ons */

	738 myData->expansion = myData->options + sizeof(UColOptionSet);

	739

	740 myData->magic = UCOL_HEADER_MAGIC;

	741 myData->isBigEndian = U_IS_BIG_ENDIAN;

	742 myData->charSetFamily = U_CHARSET_FAMILY;

	743

	744 /* copy UCA's version; genrb will override all but the builder version w ith tailoring data */

	745 uprv_memcpy(myData->version, coll->image->version, sizeof(UVersionInfo)) ;

	746

	747 uprv_memcpy(myData->UCAVersion, coll->image->UCAVersion, sizeof(UVersion Info));

	748 uprv_memcpy(myData->UCDVersion, coll->image->UCDVersion, sizeof(UVersion Info));

	749 uprv_memcpy(myData->formatVersion, coll->image->formatVersion, sizeof(UV ersionInfo));

	750 myData->jamoSpecial = coll->image->jamoSpecial;

	751

	752 /* copy the collator options */

	753 uprv_memcpy(result+paddedsize(sizeof(UCATableHeader)), coll->options, si zeof(UColOptionSet));

	754 }

	755 return result;

	756 }

	757

	758 void ucol_setOptionsFromHeader(UCollator* result, UColOptionSet * opts, UErrorCo de *status) {

	759 if(U_FAILURE(*status)) {

	760 return;

	761 }

	762 result->caseFirst = (UColAttributeValue)opts->caseFirst;

	763 result->caseLevel = (UColAttributeValue)opts->caseLevel;

	764 result->frenchCollation = (UColAttributeValue)opts->frenchCollation;

	765 result->normalizationMode = (UColAttributeValue)opts->normalizationMode;

	766 if(result->normalizationMode == UCOL_ON && !initializeFCD(status)) {

	767 return;

	768 }

	769 result->strength = (UColAttributeValue)opts->strength;

	770 result->variableTopValue = opts->variableTopValue;

	771 result->alternateHandling = (UColAttributeValue)opts->alternateHandling;

	772 result->hiraganaQ = (UColAttributeValue)opts->hiraganaQ;

	773 result->numericCollation = (UColAttributeValue)opts->numericCollation;

	774 result->caseFirstisDefault = TRUE;

	775 result->caseLevelisDefault = TRUE;

	776 result->frenchCollationisDefault = TRUE;

	777 result->normalizationModeisDefault = TRUE;

	778 result->strengthisDefault = TRUE;

	779 result->variableTopValueisDefault = TRUE;

	780 result->alternateHandlingisDefault = TRUE;

	781 result->hiraganaQisDefault = TRUE;

	782 result->numericCollationisDefault = TRUE;

	783

	784 ucol_updateInternalState(result, status);

	785

	786 result->options = opts;

	787 }

	788

	789

	790 /**

	791 * Approximate determination if a character is at a contraction end.

	792 * Guaranteed to be TRUE if a character is at the end of a contraction,

	793 * otherwise it is not deterministic.

	794 * @param c character to be determined

	795 * @param coll collator

	796 */

	797 static

	798 inline UBool ucol_contractionEndCP(UChar c, const UCollator *coll) {

	799 if (c < coll->minContrEndCP) {

	800 return FALSE;

	801 }

	802

	803 int32_t hash = c;

	804 uint8_t htbyte;

	805 if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) {

	806 if (U16_IS_TRAIL(c)) {

	807 return TRUE;

	808 }

	809 hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256;

	810 }

	811 htbyte = coll->contrEndCP[hash>>3];

	812 return (((htbyte >> (hash & 7)) & 1) == 1);

	813 }

	814

	815

	816

	817 /*

	818 * i_getCombiningClass()

	819 * A fast, at least partly inline version of u_getCombiningClass()

	820 * This is a candidate for further optimization. Used heavily

	821 * in contraction processing.

	822 */

	823 static

	824 inline uint8_t i_getCombiningClass(UChar32 c, const UCollator *coll) {

	825 uint8_t sCC = 0;

	826 if ((c >= 0x300 && ucol_unsafeCP(c, coll)) \|\| c > 0xFFFF) {

	827 sCC = u_getCombiningClass(c);

	828 }

	829 return sCC;

	830 }

	831

	832 UCollator* ucol_initCollator(const UCATableHeader image, UCollator fillIn, con st UCollator UCA, UErrorCode status) {

	833 UChar c;

	834 UCollator *result = fillIn;

	835 if(U_FAILURE(*status) \|\| image == NULL) {

	836 return NULL;

	837 }

	838

	839 if(result == NULL) {

	840 result = (UCollator *)uprv_malloc(sizeof(UCollator));

	841 if(result == NULL) {

	842 *status = U_MEMORY_ALLOCATION_ERROR;

	843 return result;

	844 }

	845 result->freeOnClose = TRUE;

	846 } else {

	847 result->freeOnClose = FALSE;

	848 }

	849

	850 result->image = image;

	851 result->mapping.getFoldingOffset = _getFoldingOffset;

	852 const uint8_t mapping = (uint8_t)result->image+result->image->mappingPosit ion;

	853 utrie_unserialize(&result->mapping, mapping, result->image->endExpansionCE - result->image->mappingPosition, status);

	854 if(U_FAILURE(*status)) {

	855 if(result->freeOnClose == TRUE) {

	856 uprv_free(result);

	857 result = NULL;

	858 }

	859 return result;

	860 }

	861

	862 result->latinOneMapping = UTRIE_GET32_LATIN1(&result->mapping);

	863 result->contractionCEs = (uint32_t)((uint8_t)result->image+result->image-> contractionCEs);

	864 result->contractionIndex = (UChar)((uint8_t)result->image+result->image->c ontractionIndex);

	865 result->expansion = (uint32_t)((uint8_t)result->image+result->image->expan sion);

	866 result->rules = NULL;

	867 result->rulesLength = 0;

	868 result->freeRulesOnClose = FALSE;

	869 result->reorderCodes = NULL;

	870 result->reorderCodesLength = 0;

	871 result->leadBytePermutationTable = NULL;

	872

	873 /* get the version info from UCATableHeader and populate the Collator struct */

	874 result->dataVersion[0] = result->image->version[0]; /* UCA Builder version*/

	875 result->dataVersion[1] = result->image->version[1]; /* UCA Tailoring rules v ersion*/

	876 result->dataVersion[2] = 0;

	877 result->dataVersion[3] = 0;

	878

	879 result->unsafeCP = (uint8_t *)result->image + result->image->unsafeCP;

	880 result->minUnsafeCP = 0;

	881 for (c=0; c<0x300; c++) { // Find the smallest unsafe char.

	882 if (ucol_unsafeCP(c, result)) break;

	883 }

	884 result->minUnsafeCP = c;

	885

	886 result->contrEndCP = (uint8_t *)result->image + result->image->contrEndCP;

	887 result->minContrEndCP = 0;

	888 for (c=0; c<0x300; c++) { // Find the Contraction-ending char.

	889 if (ucol_contractionEndCP(c, result)) break;

	890 }

	891 result->minContrEndCP = c;

	892

	893 /* max expansion tables */

	894 result->endExpansionCE = (uint32_t)((uint8_t)result->image +

	895 result->image->endExpansionCE);

	896 result->lastEndExpansionCE = result->endExpansionCE +

	897 result->image->endExpansionCECount - 1;

	898 result->expansionCESize = (uint8_t*)result->image +

	899 result->image->expansionCESize;

	900

	901

	902 //result->errorCode = *status;

	903

	904 result->latinOneCEs = NULL;

	905

	906 result->latinOneRegenTable = FALSE;

	907 result->latinOneFailed = FALSE;

	908 result->UCA = UCA;

	909

	910 /* Normally these will be set correctly later. This is the default if you us e UCA or the default. */

	911 result->ucaRules = NULL;

	912 result->actualLocale = NULL;

	913 result->validLocale = NULL;

	914 result->requestedLocale = NULL;

	915 result->hasRealData = FALSE; // real data lives in .dat file...

	916 result->freeImageOnClose = FALSE;

	917

	918 /* set attributes */

	919 ucol_setOptionsFromHeader(

	920 result,

	921 (UColOptionSet)((uint8_t)result->image+result->image->options),

	922 status);

	923 result->freeOptionsOnClose = FALSE;

	924

	925 return result;

	926 }

	927

	928 /* new Mark's code */

	929

	930 /**

	931 * For generation of Implicit CEs

	932 * @author Davis

	933 *

	934 * Cleaned up so that changes can be made more easily.

	935 * Old values:

	936 # First Implicit: E26A792D

	937 # Last Implicit: E3DC70C0

	938 # First CJK: E0030300

	939 # Last CJK: E0A9DD00

	940 # First CJK_A: E0A9DF00

	941 # Last CJK_A: E0DE3100

	942 */

	943 /* Following is a port of Mark's code for new treatment of implicits.

	944 * It is positioned here, since ucol_initUCA need to initialize the

	945 * variables below according to the data in the fractional UCA.

	946 */

	947

	948 /**

	949 * Function used to:

	950 * a) collapse the 2 different Han ranges from UCA into one (in the right order) , and

	951 * b) bump any non-CJK characters by 10FFFF.

	952 * The relevant blocks are:

	953 * A: 4E00..9FFF; CJK Unified Ideographs

	954 * F900..FAFF; CJK Compatibility Ideographs

	955 * B: 3400..4DBF; CJK Unified Ideographs Extension A

	956 * 20000..XX; CJK Unified Ideographs Extension B (and others later on)

	957 * As long as

	958 * no new B characters are allocated between 4E00 and FAFF, and

	959 * no new A characters are outside of this range,

	960 * (very high probability) this simple code will work.

	961 * The reordered blocks are:

	962 * Block1 is CJK

	963 * Block2 is CJK_COMPAT_USED

	964 * Block3 is CJK_A

	965 * (all contiguous)

	966 * Any other CJK gets its normal code point

	967 * Any non-CJK gets +10FFFF

	968 * When we reorder Block1, we make sure that it is at the very start,

	969 * so that it will use a 3-byte form.

	970 * Warning: the we only pick up the compatibility characters that are

	971 * NOT decomposed, so that block is smaller!

	972 */

	973

	974 // CONSTANTS

	975 static const UChar32

	976 NON_CJK_OFFSET = 0x110000,

	977 UCOL_MAX_INPUT = 0x220001; // 2 * Unicode range + 2

	978

	979 /**

	980 * Precomputed by initImplicitConstants()

	981 */

	982 static int32_t

	983 final3Multiplier = 0,

	984 final4Multiplier = 0,

	985 final3Count = 0,

	986 final4Count = 0,

	987 medialCount = 0,

	988 min3Primary = 0,

	989 min4Primary = 0,

	990 max4Primary = 0,

	991 minTrail = 0,

	992 maxTrail = 0,

	993 max3Trail = 0,

	994 max4Trail = 0,

	995 min4Boundary = 0;

	996

	997 static const UChar32

	998 // 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;;

	999 // 9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;;

	1000 CJK_BASE = 0x4E00,

	1001 CJK_LIMIT = 0x9FCB+1,

	1002 // Unified CJK ideographs in the compatibility ideographs block.

	1003 CJK_COMPAT_USED_BASE = 0xFA0E,

	1004 CJK_COMPAT_USED_LIMIT = 0xFA2F+1,

	1005 // 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;

	1006 // 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;

	1007 CJK_A_BASE = 0x3400,

	1008 CJK_A_LIMIT = 0x4DB5+1,

	1009 // 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;;

	1010 // 2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;;

	1011 CJK_B_BASE = 0x20000,

	1012 CJK_B_LIMIT = 0x2A6D6+1,

	1013 // 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;;

	1014 // 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;;

	1015 CJK_C_BASE = 0x2A700,

	1016 CJK_C_LIMIT = 0x2B734+1,

	1017 // 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;;

	1018 // 2B81D;<CJK Ideograph Extension D, Last>;Lo;0;L;;;;;N;;;;;

	1019 CJK_D_BASE = 0x2B740,

	1020 CJK_D_LIMIT = 0x2B81D+1;

	1021 // when adding to this list, look for all occurrences (in project)

	1022 // of CJK_C_BASE and CJK_C_LIMIT, etc. to check for code that needs changing !!!!

	1023

	1024 static UChar32 swapCJK(UChar32 i) {

	1025 if (i < CJK_A_BASE) {

	1026 // non-CJK

	1027 } else if (i < CJK_A_LIMIT) {

	1028 // Extension A has lower code points than the original Unihan+compat

	1029 // but sorts higher.

	1030 return i - CJK_A_BASE

	1031 + (CJK_LIMIT - CJK_BASE)

	1032 + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE);

	1033 } else if (i < CJK_BASE) {

	1034 // non-CJK

	1035 } else if (i < CJK_LIMIT) {

	1036 return i - CJK_BASE;

	1037 } else if (i < CJK_COMPAT_USED_BASE) {

	1038 // non-CJK

	1039 } else if (i < CJK_COMPAT_USED_LIMIT) {

	1040 return i - CJK_COMPAT_USED_BASE

	1041 + (CJK_LIMIT - CJK_BASE);

	1042 } else if (i < CJK_B_BASE) {

	1043 // non-CJK

	1044 } else if (i < CJK_B_LIMIT) {

	1045 return i; // non-BMP-CJK

	1046 } else if (i < CJK_C_BASE) {

	1047 // non-CJK

	1048 } else if (i < CJK_C_LIMIT) {

	1049 return i; // non-BMP-CJK

	1050 } else if (i < CJK_D_BASE) {

	1051 // non-CJK

	1052 } else if (i < CJK_D_LIMIT) {

	1053 return i; // non-BMP-CJK

	1054 }

	1055 return i + NON_CJK_OFFSET; // non-CJK

	1056 }

	1057

	1058 U_CAPI UChar32 U_EXPORT2

	1059 uprv_uca_getRawFromCodePoint(UChar32 i) {

	1060 return swapCJK(i)+1;

	1061 }

	1062

	1063 U_CAPI UChar32 U_EXPORT2

	1064 uprv_uca_getCodePointFromRaw(UChar32 i) {

	1065 i--;

	1066 UChar32 result = 0;

	1067 if(i >= NON_CJK_OFFSET) {

	1068 result = i - NON_CJK_OFFSET;

	1069 } else if(i >= CJK_B_BASE) {

	1070 result = i;

	1071 } else if(i < CJK_A_LIMIT + (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMPAT_USED_BASE)) { // rest of CJKs, compacted

	1072 if(i < CJK_LIMIT - CJK_BASE) {

	1073 result = i + CJK_BASE;

	1074 } else if(i < (CJK_LIMIT - CJK_BASE) + (CJK_COMPAT_USED_LIMIT - CJK_COMP AT_USED_BASE)) {

	1075 result = i + CJK_COMPAT_USED_BASE - (CJK_LIMIT - CJK_BASE);

	1076 } else {

	1077 result = i + CJK_A_BASE - (CJK_LIMIT - CJK_BASE) - (CJK_COMPAT_USED_ LIMIT - CJK_COMPAT_USED_BASE);

	1078 }

	1079 } else {

	1080 result = -1;

	1081 }

	1082 return result;

	1083 }

	1084

	1085 // GET IMPLICIT PRIMARY WEIGHTS

	1086 // Return value is left justified primary key

	1087 U_CAPI uint32_t U_EXPORT2

	1088 uprv_uca_getImplicitFromRaw(UChar32 cp) {

	1089 /*

	1090 if (cp < 0 \|\| cp > UCOL_MAX_INPUT) {

	1091 throw new IllegalArgumentException("Code point out of range " + Utility. hex(cp));

	1092 }

	1093 */

	1094 int32_t last0 = cp - min4Boundary;

	1095 if (last0 < 0) {

	1096 int32_t last1 = cp / final3Count;

	1097 last0 = cp % final3Count;

	1098

	1099 int32_t last2 = last1 / medialCount;

	1100 last1 %= medialCount;

	1101

	1102 last0 = minTrail + last0*final3Multiplier; // spread out, leaving gap at start

	1103 last1 = minTrail + last1; // offset

	1104 last2 = min3Primary + last2; // offset

	1105 /*

	1106 if (last2 >= min4Primary) {

	1107 throw new IllegalArgumentException("4-byte out of range: " + Utility .hex(cp) + ", " + Utility.hex(last2));

	1108 }

	1109 */

	1110 return (last2 << 24) + (last1 << 16) + (last0 << 8);

	1111 } else {

	1112 int32_t last1 = last0 / final4Count;

	1113 last0 %= final4Count;

	1114

	1115 int32_t last2 = last1 / medialCount;

	1116 last1 %= medialCount;

	1117

	1118 int32_t last3 = last2 / medialCount;

	1119 last2 %= medialCount;

	1120

	1121 last0 = minTrail + last0*final4Multiplier; // spread out, leaving gap at start

	1122 last1 = minTrail + last1; // offset

	1123 last2 = minTrail + last2; // offset

	1124 last3 = min4Primary + last3; // offset

	1125 /*

	1126 if (last3 > max4Primary) {

	1127 throw new IllegalArgumentException("4-byte out of range: " + Utility .hex(cp) + ", " + Utility.hex(last3));

	1128 }

	1129 */

	1130 return (last3 << 24) + (last2 << 16) + (last1 << 8) + last0;

	1131 }

	1132 }

	1133

	1134 static uint32_t U_EXPORT2

	1135 uprv_uca_getImplicitPrimary(UChar32 cp) {

	1136 //fprintf(stdout, "Incoming: %04x\n", cp);

	1137 //if (DEBUG) System.out.println("Incoming: " + Utility.hex(cp));

	1138

	1139 cp = swapCJK(cp);

	1140 cp++;

	1141 // we now have a range of numbers from 0 to 21FFFF.

	1142

	1143 //if (DEBUG) System.out.println("CJK swapped: " + Utility.hex(cp));

	1144 //fprintf(stdout, "CJK swapped: %04x\n", cp);

	1145

	1146 return uprv_uca_getImplicitFromRaw(cp);

	1147 }

	1148

	1149 /**

	1150 * Converts implicit CE into raw integer ("code point")

	1151 * @param implicit

	1152 * @return -1 if illegal format

	1153 */

	1154 U_CAPI UChar32 U_EXPORT2

	1155 uprv_uca_getRawFromImplicit(uint32_t implicit) {

	1156 UChar32 result;

	1157 UChar32 b3 = implicit & 0xFF;

	1158 UChar32 b2 = (implicit >> 8) & 0xFF;

	1159 UChar32 b1 = (implicit >> 16) & 0xFF;

	1160 UChar32 b0 = (implicit >> 24) & 0xFF;

	1161

	1162 // simple parameter checks

	1163 if (b0 < min3Primary \|\| b0 > max4Primary

	1164 \|\| b1 < minTrail \|\| b1 > maxTrail)

	1165 return -1;

	1166 // normal offsets

	1167 b1 -= minTrail;

	1168

	1169 // take care of the final values, and compose

	1170 if (b0 < min4Primary) {

	1171 if (b2 < minTrail \|\| b2 > max3Trail \|\| b3 != 0)

	1172 return -1;

	1173 b2 -= minTrail;

	1174 UChar32 remainder = b2 % final3Multiplier;

	1175 if (remainder != 0)

	1176 return -1;

	1177 b0 -= min3Primary;

	1178 b2 /= final3Multiplier;

	1179 result = ((b0 * medialCount) + b1) * final3Count + b2;

	1180 } else {

	1181 if (b2 < minTrail \|\| b2 > maxTrail

	1182 \|\| b3 < minTrail \|\| b3 > max4Trail)

	1183 return -1;

	1184 b2 -= minTrail;

	1185 b3 -= minTrail;

	1186 UChar32 remainder = b3 % final4Multiplier;

	1187 if (remainder != 0)

	1188 return -1;

	1189 b3 /= final4Multiplier;

	1190 b0 -= min4Primary;

	1191 result = (((b0 * medialCount) + b1) * medialCount + b2) * final4Count + b3 + min4Boundary;

	1192 }

	1193 // final check

	1194 if (result < 0 \|\| result > UCOL_MAX_INPUT)

	1195 return -1;

	1196 return result;

	1197 }

	1198

	1199

	1200 static inline int32_t divideAndRoundUp(int a, int b) {

	1201 return 1 + (a-1)/b;

	1202 }

	1203

	1204 /* this function is either called from initUCA or from genUCA before

	1205 * doing canonical closure for the UCA.

	1206 */

	1207

	1208 /**

	1209 * Set up to generate implicits.

	1210 * Maintenance Note: this function may end up being called more than once, due

	1211 * to threading races during initialization. Make sure that

	1212 * none of the Constants is ever transiently assigned an

	1213 * incorrect value.

	1214 * @param minPrimary

	1215 * @param maxPrimary

	1216 * @param minTrail final byte

	1217 * @param maxTrail final byte

	1218 * @param gap3 the gap we leave for tailoring for 3-byte forms

	1219 * @param gap4 the gap we leave for tailoring for 4-byte forms

	1220 */

	1221 static void initImplicitConstants(int minPrimary, int maxPrimary,

	1222 int minTrailIn, int maxTrailIn,

	1223 int gap3, int primaries3count,

	1224 UErrorCode *status) {

	1225 // some simple parameter checks

	1226 if ((minPrimary < 0 \|\| minPrimary >= maxPrimary \|\| maxPrimary > 0xFF)

	1227 \|\| (minTrailIn < 0 \|\| minTrailIn >= maxTrailIn \|\| maxTrailIn > 0xFF)

	1228 \|\| (primaries3count < 1))

	1229 {

	1230 *status = U_ILLEGAL_ARGUMENT_ERROR;

	1231 return;

	1232 };

	1233

	1234 minTrail = minTrailIn;

	1235 maxTrail = maxTrailIn;

	1236

	1237 min3Primary = minPrimary;

	1238 max4Primary = maxPrimary;

	1239 // compute constants for use later.

	1240 // number of values we can use in trailing bytes

	1241 // leave room for empty values between AND above, e.g. if gap = 2

	1242 // range 3..7 => +3 -4 -5 -6 -7: so 1 value

	1243 // range 3..8 => +3 -4 -5 +6 -7 -8: so 2 values

	1244 // range 3..9 => +3 -4 -5 +6 -7 -8 -9: so 2 values

	1245 final3Multiplier = gap3 + 1;

	1246 final3Count = (maxTrail - minTrail + 1) / final3Multiplier;

	1247 max3Trail = minTrail + (final3Count - 1) * final3Multiplier;

	1248

	1249 // medials can use full range

	1250 medialCount = (maxTrail - minTrail + 1);

	1251 // find out how many values fit in each form

	1252 int32_t threeByteCount = medialCount * final3Count;

	1253 // now determine where the 3/4 boundary is.

	1254 // we use 3 bytes below the boundary, and 4 above

	1255 int32_t primariesAvailable = maxPrimary - minPrimary + 1;

	1256 int32_t primaries4count = primariesAvailable - primaries3count;

	1257

	1258

	1259 int32_t min3ByteCoverage = primaries3count * threeByteCount;

	1260 min4Primary = minPrimary + primaries3count;

	1261 min4Boundary = min3ByteCoverage;

	1262 // Now expand out the multiplier for the 4 bytes, and redo.

	1263

	1264 int32_t totalNeeded = UCOL_MAX_INPUT - min4Boundary;

	1265 int32_t neededPerPrimaryByte = divideAndRoundUp(totalNeeded, primaries4count );

	1266 int32_t neededPerFinalByte = divideAndRoundUp(neededPerPrimaryByte, medialCo unt * medialCount);

	1267 int32_t gap4 = (maxTrail - minTrail - 1) / neededPerFinalByte;

	1268 if (gap4 < 1) {

	1269 *status = U_ILLEGAL_ARGUMENT_ERROR;

	1270 return;

	1271 }

	1272 final4Multiplier = gap4 + 1;

	1273 final4Count = neededPerFinalByte;

	1274 max4Trail = minTrail + (final4Count - 1) * final4Multiplier;

	1275 }

	1276

	1277 /**

	1278 * Supply parameters for generating implicit CEs

	1279 */

	1280 U_CAPI void U_EXPORT2

	1281 uprv_uca_initImplicitConstants(UErrorCode *status) {

	1282 // 13 is the largest 4-byte gap we can use without getting 2 four-byte forms .

	1283 //initImplicitConstants(minPrimary, maxPrimary, 0x04, 0xFE, 1, 1, status);

	1284 initImplicitConstants(minImplicitPrimary, maxImplicitPrimary, 0x04, 0xFE, 1, 1, status);

	1285 }

	1286

	1287

	1288 /* collIterNormalize Incremental Normalization happens here. */

	1289 /* pick up the range of chars identifed by FCD, */

	1290 /* normalize it into the collIterate's writable buffer, */

	1291 /* switch the collIterate's state to use the writable b uffer. */

	1292 /* */

	1293 static

	1294 void collIterNormalize(collIterate *collationSource)

	1295 {

	1296 UErrorCode status = U_ZERO_ERROR;

	1297 const UChar srcP = collationSource->pos - 1; / Start of chars to nor malize */

	1298 const UChar endP = collationSource->fcdPosition; / End of region to norma lize+1 */

	1299

	1300 collationSource->nfd->normalize(UnicodeString(FALSE, srcP, (int32_t)(endP - srcP)),

	1301 collationSource->writableBuffer,

	1302 status);

	1303 if (U_FAILURE(status)) {

	1304 #ifdef UCOL_DEBUG

	1305 fprintf(stderr, "collIterNormalize(), NFD failed, status = %s\n", u_erro rName(status));

	1306 #endif

	1307 return;

	1308 }

	1309

	1310 collationSource->pos = collationSource->writableBuffer.getTerminatedB uffer();

	1311 collationSource->origFlags = collationSource->flags;

	1312 collationSource->flags \|= UCOL_ITER_INNORMBUF;

	1313 collationSource->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE _ITERATOR);

	1314 }

	1315

	1316

	1317 // This function takes the iterator and extracts normalized stuff up to the next boundary

	1318 // It is similar in the end results to the collIterNormalize, but for the cases when we

	1319 // use an iterator

	1320 /*static

	1321 inline void normalizeIterator(collIterate *collationSource) {

	1322 UErrorCode status = U_ZERO_ERROR;

	1323 UBool wasNormalized = FALSE;

	1324 //int32_t iterIndex = collationSource->iterator->getIndex(collationSource->ite rator, UITER_CURRENT);

	1325 uint32_t iterIndex = collationSource->iterator->getState(collationSource->iter ator);

	1326 int32_t normLen = unorm_next(collationSource->iterator, collationSource->writa bleBuffer,

	1327 (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalize d, &status);

	1328 if(status == U_BUFFER_OVERFLOW_ERROR \|\| normLen == (int32_t)collationSource->w ritableBufSize) {

	1329 // reallocate and terminate

	1330 if(!u_growBufferFromStatic(collationSource->stackWritableBuffer,

	1331 &collationSource->writableBuffer,

	1332 (int32_t *)&collationSource->writableBufSize, nor mLen + 1,

	1333 0)

	1334 ) {

	1335 #ifdef UCOL_DEBUG

	1336 fprintf(stderr, "normalizeIterator(), out of memory\n");

	1337 #endif

	1338 return;

	1339 }

	1340 status = U_ZERO_ERROR;

	1341 //collationSource->iterator->move(collationSource->iterator, iterIndex, UITE R_ZERO);

	1342 collationSource->iterator->setState(collationSource->iterator, iterIndex, &s tatus);

	1343 normLen = unorm_next(collationSource->iterator, collationSource->writableBuf fer,

	1344 (int32_t)collationSource->writableBufSize, UNORM_FCD, 0, TRUE, &wasNormalize d, &status);

	1345 }

	1346 // Terminate the buffer - we already checked that it is big enough

	1347 collationSource->writableBuffer[normLen] = 0;

	1348 if(collationSource->writableBuffer != collationSource->stackWritableBuffer) {

	1349 collationSource->flags \|= UCOL_ITER_ALLOCATED;

	1350 }

	1351 collationSource->pos = collationSource->writableBuffer;

	1352 collationSource->origFlags = collationSource->flags;

	1353 collationSource->flags \|= UCOL_ITER_INNORMBUF;

	1354 collationSource->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE_I TERATOR);

	1355 }*/

	1356

	1357

	1358 /* Incremental FCD check and normalize */

	1359 /* Called from getNextCE when normalization state is suspect. */

	1360 /* When entering, the state is known to be this: */

	1361 /* o We are working in the main buffer of the collIterate, not the side */

	1362 /* writable buffer. When in the side buffer, normalization mode is alw ays off, */

	1363 /* so we won't get here. */

	1364 /* o The leading combining class from the current character is 0 or */

	1365 /* the trailing combining class of the previous char was zero. */

	1366 /* True because the previous call to this function will have always exi ted */

	1367 /* that way, and we get called for every char where cc might be non-zer o. */

	1368 static

	1369 inline UBool collIterFCD(collIterate *collationSource) {

	1370 const UChar srcP, endP;

	1371 uint8_t leadingCC;

	1372 uint8_t prevTrailingCC = 0;

	1373 uint16_t fcd;

	1374 UBool needNormalize = FALSE;

	1375

	1376 srcP = collationSource->pos-1;

	1377

	1378 if (collationSource->flags & UCOL_ITER_HASLEN) {

	1379 endP = collationSource->endp;

	1380 } else {

	1381 endP = NULL;

	1382 }

	1383

	1384 // Get the trailing combining class of the current character. If it's zero,

	1385 // we are OK.

	1386 /* trie access */

	1387 fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);

	1388 if (fcd != 0) {

	1389 prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);

	1390

	1391 if (prevTrailingCC != 0) {

	1392 // The current char has a non-zero trailing CC. Scan forward until we find

	1393 // a char with a leading cc of zero.

	1394 while (endP == NULL \|\| srcP != endP)

	1395 {

	1396 const UChar *savedSrcP = srcP;

	1397

	1398 /* trie access */

	1399 fcd = unorm_nextFCD16(fcdTrieIndex, fcdHighStart, srcP, endP);

	1400 leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);

	1401 if (leadingCC == 0) {

	1402 srcP = savedSrcP; // Hit char that is not part of combi ning sequence.

	1403 // back up over it. (Could be surr ogate pair!)

	1404 break;

	1405 }

	1406

	1407 if (leadingCC < prevTrailingCC) {

	1408 needNormalize = TRUE;

	1409 }

	1410

	1411 prevTrailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);

	1412 }

	1413 }

	1414 }

	1415

	1416 collationSource->fcdPosition = (UChar *)srcP;

	1417

	1418 return needNormalize;

	1419 }

	1420

	1421 /****************************************************************************/

	1422 /* Following are the CE retrieval functions */

	1423 /* */

	1424 /****************************************************************************/

	1425

	1426 static uint32_t getImplicit(UChar32 cp, collIterate *collationSource);

	1427 static uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource);

	1428

	1429 /* there should be a macro version of this function in the header file */

	1430 /* This is the first function that tries to fetch a collation element */

	1431 /* If it's not succesfull or it encounters a more difficult situation */

	1432 /* some more sofisticated and slower functions are invoked */

	1433 static

	1434 inline uint32_t ucol_IGetNextCE(const UCollator coll, collIterate collationSou rce, UErrorCode *status) {

	1435 uint32_t order = 0;

	1436 if (collationSource->CEpos > collationSource->toReturn) { /* Are there any CEs from previous expansions? */

	1437 order = (collationSource->toReturn++); / if so , return them */

	1438 if(collationSource->CEpos == collationSource->toReturn) {

	1439 collationSource->CEpos = collationSource->toReturn = collationSource ->extendCEs ? collationSource->extendCEs : collationSource->CEs;

	1440 }

	1441 return order;

	1442 }

	1443

	1444 UChar ch = 0;

	1445 collationSource->offsetReturn = NULL;

	1446

	1447 for (;;) /* Loop handles case when incremental nor malize switches */

	1448 { /* to or from the side buffer / origina l string, and we */

	1449 /* need to start again to get the next character. */

	1450

	1451 if ((collationSource->flags & (UCOL_ITER_HASLEN \| UCOL_ITER_INNORMBUF \| UCOL_ITER_NORM \| UCOL_HIRAGANA_Q \| UCOL_USE_ITERATOR)) == 0)

	1452 {

	1453 // The source string is null terminated and we're not working from t he side buffer,

	1454 // and we're not normalizing. This is the fast path.

	1455 // (We can be in the side buffer for Thai pre-vowel reordering eve n when not normalizing.)

	1456 ch = *collationSource->pos++;

	1457 if (ch != 0) {

	1458 break;

	1459 }

	1460 else {

	1461 return UCOL_NO_MORE_CES;

	1462 }

	1463 }

	1464

	1465 if (collationSource->flags & UCOL_ITER_HASLEN) {

	1466 // Normal path for strings when length is specified.

	1467 // (We can't be in side buffer because it is always null terminate d.)

	1468 if (collationSource->pos >= collationSource->endp) {

	1469 // Ran off of the end of the main source string. We're done.

	1470 return UCOL_NO_MORE_CES;

	1471 }

	1472 ch = *collationSource->pos++;

	1473 }

	1474 else if(collationSource->flags & UCOL_USE_ITERATOR) {

	1475 UChar32 iterCh = collationSource->iterator->next(collationSource->it erator);

	1476 if(iterCh == U_SENTINEL) {

	1477 return UCOL_NO_MORE_CES;

	1478 }

	1479 ch = (UChar)iterCh;

	1480 }

	1481 else

	1482 {

	1483 // Null terminated string.

	1484 ch = *collationSource->pos++;

	1485 if (ch == 0) {

	1486 // Ran off end of buffer.

	1487 if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

	1488 // Ran off end of main string. backing up one character.

	1489 collationSource->pos--;

	1490 return UCOL_NO_MORE_CES;

	1491 }

	1492 else

	1493 {

	1494 // Hit null in the normalize side buffer.

	1495 // Usually this means the end of the normalized data,

	1496 // except for one odd case: a null followed by combining cha rs,

	1497 // which is the case if we are at the start of the buffer.

	1498 if (collationSource->pos == collationSource->writableBuffer. getBuffer()+1) {

	1499 break;

	1500 }

	1501

	1502 // Null marked end of side buffer.

	1503 // Revert to the main string and

	1504 // loop back to top to try again to get a character.

	1505 collationSource->pos = collationSource->fcdPosition;

	1506 collationSource->flags = collationSource->origFlags;

	1507 continue;

	1508 }

	1509 }

	1510 }

	1511

	1512 if(collationSource->flags&UCOL_HIRAGANA_Q) {

	1513 /* Codepoints \u3099-\u309C are both Hiragana and Katakana. Set the flag

	1514 * based on whether the previous codepoint was Hiragana or Katakana.

	1515 */

	1516 if(((ch>=0x3040 && ch<=0x3096) \|\| (ch >= 0x309d && ch <= 0x309f)) \|\|

	1517 ((collationSource->flags & UCOL_WAS_HIRAGANA) && (ch >= 0x30 99 && ch <= 0x309C))) {

	1518 collationSource->flags \|= UCOL_WAS_HIRAGANA;

	1519 } else {

	1520 collationSource->flags &= ~UCOL_WAS_HIRAGANA;

	1521 }

	1522 }

	1523

	1524 // We've got a character. See if there's any fcd and/or normalization s tuff to do.

	1525 // Note that UCOL_ITER_NORM flag is always zero when we are in the si de buffer.

	1526 if ((collationSource->flags & UCOL_ITER_NORM) == 0) {

	1527 break;

	1528 }

	1529

	1530 if (collationSource->fcdPosition >= collationSource->pos) {

	1531 // An earlier FCD check has already covered the current character.

	1532 // We can go ahead and process this char.

	1533 break;

	1534 }

	1535

	1536 if (ch < ZERO_CC_LIMIT_ ) {

	1537 // Fast fcd safe path. Trailing combining class == 0. This char is OK.

	1538 break;

	1539 }

	1540

	1541 if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

	1542 // We need to peek at the next character in order to tell if we are FCD

	1543 if ((collationSource->flags & UCOL_ITER_HASLEN) && collationSource-> pos >= collationSource->endp) {

	1544 // We are at the last char of source string.

	1545 // It is always OK for FCD check.

	1546 break;

	1547 }

	1548

	1549 // Not at last char of source string (or we'll check against termina ting null). Do the FCD fast test

	1550 if (*collationSource->pos < NFC_ZERO_CC_BLOCK_LIMIT_) {

	1551 break;

	1552 }

	1553 }

	1554

	1555

	1556 // Need a more complete FCD check and possible normalization.

	1557 if (collIterFCD(collationSource)) {

	1558 collIterNormalize(collationSource);

	1559 }

	1560 if ((collationSource->flags & UCOL_ITER_INNORMBUF) == 0) {

	1561 // No normalization was needed. Go ahead and process the char we a lready had.

	1562 break;

	1563 }

	1564

	1565 // Some normalization happened. Next loop iteration will pick up a char

	1566 // from the normalization buffer.

	1567

	1568 } // end for (;;)

	1569

	1570

	1571 if (ch <= 0xFF) {

	1572 /* For latin-1 characters we never need to fall back to the UCA table */

	1573 /* because all of the UCA data is replicated in the latinOneMapping a rray */

	1574 order = coll->latinOneMapping[ch];

	1575 if (order > UCOL_NOT_FOUND) {

	1576 order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, stat us);

	1577 }

	1578 }

	1579 else

	1580 {

	1581 // Always use UCA for Han, Hangul

	1582 // (Han extension A is before main Han block)

	1583 // ** Han compatibility chars ?? **

	1584 if ((collationSource->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

	1585 (ch >= UCOL_FIRST_HAN_A && ch <= UCOL_LAST_HANGUL)) {

	1586 if (ch > UCOL_LAST_HAN && ch < UCOL_FIRST_HANGUL) {

	1587 // between the two target ranges; do normal lookup

	1588 // ** this range is YI, Modifier tone letters, **

	1589 // ** Latin-D, Syloti Nagari, Phagas-pa. **

	1590 // ** Latin-D might be tailored, so we need to **

	1591 // ** do the normal lookup for these guys. **

	1592 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

	1593 } else {

	1594 // in one of the target ranges; use UCA

	1595 order = UCOL_NOT_FOUND;

	1596 }

	1597 } else {

	1598 order = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

	1599 }

	1600

	1601 if(order > UCOL_NOT_FOUND) { /* if a CE is special */

	1602 order = ucol_prv_getSpecialCE(coll, ch, order, collationSource, stat us); /* and try to get the special CE */

	1603 }

	1604

	1605 if(order == UCOL_NOT_FOUND && coll->UCA) { /* We couldn't find a good CE in the tailoring */

	1606 /* if we got here, the codepoint MUST be over 0xFF - so we look dire ctly in the trie */

	1607 order = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

	1608

	1609 if(order > UCOL_NOT_FOUND) { /* UCA also gives us a special CE */

	1610 order = ucol_prv_getSpecialCE(coll->UCA, ch, order, collationSou rce, status);

	1611 }

	1612 }

	1613 }

	1614 if(order == UCOL_NOT_FOUND) {

	1615 order = getImplicit(ch, collationSource);

	1616 }

	1617 return order; /* return the CE */

	1618 }

	1619

	1620 /* ucol_getNextCE, out-of-line version for use from other files. */

	1621 U_CAPI uint32_t U_EXPORT2

	1622 ucol_getNextCE(const UCollator coll, collIterate collationSource, UErrorCode * status) {

	1623 return ucol_IGetNextCE(coll, collationSource, status);

	1624 }

	1625

	1626

	1627 /**

	1628 * Incremental previous normalization happens here. Pick up the range of chars

	1629 * identifed by FCD, normalize it into the collIterate's writable buffer,

	1630 * switch the collIterate's state to use the writable buffer.

	1631 * @param data collation iterator data

	1632 */

	1633 static

	1634 void collPrevIterNormalize(collIterate *data)

	1635 {

	1636 UErrorCode status = U_ZERO_ERROR;

	1637 const UChar pEnd = data->pos; / End normalize + 1 */

	1638 const UChar *pStart;

	1639

	1640 /* Start normalize */

	1641 if (data->fcdPosition == NULL) {

	1642 pStart = data->string;

	1643 }

	1644 else {

	1645 pStart = data->fcdPosition + 1;

	1646 }

	1647

	1648 int32_t normLen =

	1649 data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)((pEnd - pSta rt) + 1)),

	1650 data->writableBuffer,

	1651 status).

	1652 length();

	1653 if(U_FAILURE(status)) {

	1654 return;

	1655 }

	1656 /*

	1657 this puts the null termination infront of the normalized string instead

	1658 of the end

	1659 */

	1660 data->writableBuffer.insert(0, (UChar)0);

	1661

	1662 /*

	1663 * The usual case at this point is that we've got a base

	1664 * character followed by marks that were normalized. If

	1665 * fcdPosition is NULL, that means that we backed up to

	1666 * the beginning of the string and there's no base character.

	1667 *

	1668 * Forward processing will usually normalize when it sees

	1669 * the first mark, so that mark will get it's natural offset

	1670 * and the rest will get the offset of the character following

	1671 * the marks. The base character will also get its natural offset.

	1672 *

	1673 * We write the offset of the base character, if there is one,

	1674 * followed by the offset of the first mark and then the offsets

	1675 * of the rest of the marks.

	1676 */

	1677 int32_t firstMarkOffset = 0;

	1678 int32_t trailOffset = (int32_t)(data->pos - data->string + 1);

	1679 int32_t trailCount = normLen - 1;

	1680

	1681 if (data->fcdPosition != NULL) {

	1682 int32_t baseOffset = (int32_t)(data->fcdPosition - data->string);

	1683 UChar baseChar = *data->fcdPosition;

	1684

	1685 firstMarkOffset = baseOffset + 1;

	1686

	1687 /*

	1688 * If the base character is the start of a contraction, forward processi ng

	1689 * will normalize the marks while checking for the contraction, which me ans

	1690 * that the offset of the first mark will the same as the other marks.

	1691 *

	1692 * ** THIS IS PROBABLY NOT A COMPLETE TEST **

	1693 */

	1694 if (baseChar >= 0x100) {

	1695 uint32_t baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->mapping, bas eChar);

	1696

	1697 if (baseOrder == UCOL_NOT_FOUND && data->coll->UCA) {

	1698 baseOrder = UTRIE_GET32_FROM_LEAD(&data->coll->UCA->mapping, bas eChar);

	1699 }

	1700

	1701 if (baseOrder > UCOL_NOT_FOUND && getCETag(baseOrder) == CONTRACTION _TAG) {

	1702 firstMarkOffset = trailOffset;

	1703 }

	1704 }

	1705

	1706 data->appendOffset(baseOffset, status);

	1707 }

	1708

	1709 data->appendOffset(firstMarkOffset, status);

	1710

	1711 for (int32_t i = 0; i < trailCount; i += 1) {

	1712 data->appendOffset(trailOffset, status);

	1713 }

	1714

	1715 data->offsetRepeatValue = trailOffset;

	1716

	1717 data->offsetReturn = data->offsetStore - 1;

	1718 if (data->offsetReturn == data->offsetBuffer) {

	1719 data->offsetStore = data->offsetBuffer;

	1720 }

	1721

	1722 data->pos = data->writableBuffer.getTerminatedBuffer() + 1 + normLen;

	1723 data->origFlags = data->flags;

	1724 data->flags \|= UCOL_ITER_INNORMBUF;

	1725 data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);

	1726 }

	1727

	1728

	1729 /**

	1730 * Incremental FCD check for previous iteration and normalize. Called from

	1731 * getPrevCE when normalization state is suspect.

	1732 * When entering, the state is known to be this:

	1733 * o We are working in the main buffer of the collIterate, not the side

	1734 * writable buffer. When in the side buffer, normalization mode is always

	1735 * off, so we won't get here.

	1736 * o The leading combining class from the current character is 0 or the

	1737 * trailing combining class of the previous char was zero.

	1738 * True because the previous call to this function will have always exited

	1739 * that way, and we get called for every char where cc might be non-zero.

	1740 * @param data collation iterate struct

	1741 * @return normalization status, TRUE for normalization to be done, FALSE

	1742 * otherwise

	1743 */

	1744 static

	1745 inline UBool collPrevIterFCD(collIterate *data)

	1746 {

	1747 const UChar src, start;

	1748 uint8_t leadingCC;

	1749 uint8_t trailingCC = 0;

	1750 uint16_t fcd;

	1751 UBool result = FALSE;

	1752

	1753 start = data->string;

	1754 src = data->pos + 1;

	1755

	1756 /* Get the trailing combining class of the current character. */

	1757 fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);

	1758

	1759 leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);

	1760

	1761 if (leadingCC != 0) {

	1762 /*

	1763 The current char has a non-zero leading combining class.

	1764 Scan backward until we find a char with a trailing cc of zero.

	1765 */

	1766 for (;;)

	1767 {

	1768 if (start == src) {

	1769 data->fcdPosition = NULL;

	1770 return result;

	1771 }

	1772

	1773 fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, start, src);

	1774

	1775 trailingCC = (uint8_t)(fcd & LAST_BYTE_MASK_);

	1776

	1777 if (trailingCC == 0) {

	1778 break;

	1779 }

	1780

	1781 if (leadingCC < trailingCC) {

	1782 result = TRUE;

	1783 }

	1784

	1785 leadingCC = (uint8_t)(fcd >> SECOND_LAST_BYTE_SHIFT_);

	1786 }

	1787 }

	1788

	1789 data->fcdPosition = (UChar *)src;

	1790

	1791 return result;

	1792 }

	1793

	1794 /** gets a code unit from the string at a given offset

	1795 * Handles both normal and iterative cases.

	1796 * No error checking - caller beware!

	1797 */

	1798 static inline

	1799 UChar peekCodeUnit(collIterate *source, int32_t offset) {

	1800 if(source->pos != NULL) {

	1801 return *(source->pos + offset);

	1802 } else if(source->iterator != NULL) {

	1803 UChar32 c;

	1804 if(offset != 0) {

	1805 source->iterator->move(source->iterator, offset, UITER_CURRENT);

	1806 c = source->iterator->next(source->iterator);

	1807 source->iterator->move(source->iterator, -offset-1, UITER_CURRENT);

	1808 } else {

	1809 c = source->iterator->current(source->iterator);

	1810 }

	1811 return c >= 0 ? (UChar)c : 0xfffd; // If the caller works properly, we should never see c<0.

	1812 } else {

	1813 return 0xfffd;

	1814 }

	1815 }

	1816

	1817 // Code point version. Treats the offset as a _code point_ delta.

	1818 // We cannot use U16_FWD_1_UNSAFE and similar because we might not have well-for med UTF-16.

	1819 // We cannot use U16_FWD_1 and similar because we do not know the start and limi t of the buffer.

	1820 static inline

	1821 UChar32 peekCodePoint(collIterate *source, int32_t offset) {

	1822 UChar32 c;

	1823 if(source->pos != NULL) {

	1824 const UChar *p = source->pos;

	1825 if(offset >= 0) {

	1826 // Skip forward over (offset-1) code points.

	1827 while(--offset >= 0) {

	1828 if(U16_IS_LEAD(p++) && U16_IS_TRAIL(p)) {

	1829 ++p;

	1830 }

	1831 }

	1832 // Read the code point there.

	1833 c = *p++;

	1834 UChar trail;

	1835 if(U16_IS_LEAD(c) && U16_IS_TRAIL(trail = *p)) {

	1836 c = U16_GET_SUPPLEMENTARY(c, trail);

	1837 }

	1838 } else /* offset<0 */ {

	1839 // Skip backward over (offset-1) code points.

	1840 while(++offset < 0) {

	1841 if(U16_IS_TRAIL(--p) && U16_IS_LEAD((p - 1))) {

	1842 --p;

	1843 }

	1844 }

	1845 // Read the code point before that.

	1846 c = *--p;

	1847 UChar lead;

	1848 if(U16_IS_TRAIL(c) && U16_IS_LEAD(lead = *(p - 1))) {

	1849 c = U16_GET_SUPPLEMENTARY(lead, c);

	1850 }

	1851 }

	1852 } else if(source->iterator != NULL) {

	1853 if(offset >= 0) {

	1854 // Skip forward over (offset-1) code points.

	1855 int32_t fwd = offset;

	1856 while(fwd-- > 0) {

	1857 uiter_next32(source->iterator);

	1858 }

	1859 // Read the code point there.

	1860 c = uiter_current32(source->iterator);

	1861 // Return to the starting point, skipping backward over (offset-1) c ode points.

	1862 while(offset-- > 0) {

	1863 uiter_previous32(source->iterator);

	1864 }

	1865 } else /* offset<0 */ {

	1866 // Read backward, reading offset code points, remember only the last -read one.

	1867 int32_t back = offset;

	1868 do {

	1869 c = uiter_previous32(source->iterator);

	1870 } while(++back < 0);

	1871 // Return to the starting position, skipping forward over offset cod e points.

	1872 do {

	1873 uiter_next32(source->iterator);

	1874 } while(++offset < 0);

	1875 }

	1876 } else {

	1877 c = U_SENTINEL;

	1878 }

	1879 return c;

	1880 }

	1881

	1882 /**

	1883 * Determines if we are at the start of the data string in the backwards

	1884 * collation iterator

	1885 * @param data collation iterator

	1886 * @return TRUE if we are at the start

	1887 */

	1888 static

	1889 inline UBool isAtStartPrevIterate(collIterate *data) {

	1890 if(data->pos == NULL && data->iterator != NULL) {

	1891 return !data->iterator->hasPrevious(data->iterator);

	1892 }

	1893 //return (collIter_bos(data)) \|\|

	1894 return (data->pos == data->string) \|\|

	1895 ((data->flags & UCOL_ITER_INNORMBUF) &&

	1896 *(data->pos - 1) == 0 && data->fcdPosition == NULL);

	1897 }

	1898

	1899 static

	1900 inline void goBackOne(collIterate *data) {

	1901 # if 0

	1902 // somehow, it looks like we need to keep iterator synced up

	1903 // at all times, as above.

	1904 if(data->pos) {

	1905 data->pos--;

	1906 }

	1907 if(data->iterator) {

	1908 data->iterator->previous(data->iterator);

	1909 }

	1910 #endif

	1911 if(data->iterator && (data->flags & UCOL_USE_ITERATOR)) {

	1912 data->iterator->previous(data->iterator);

	1913 }

	1914 if(data->pos) {

	1915 data->pos --;

	1916 }

	1917 }

	1918

	1919 /**

	1920 * Inline function that gets a simple CE.

	1921 * So what it does is that it will first check the expansion buffer. If the

	1922 * expansion buffer is not empty, ie the end pointer to the expansion buffer

	1923 * is different from the string pointer, we return the collation element at the

	1924 * return pointer and decrement it.

	1925 * For more complicated CEs it resorts to getComplicatedCE.

	1926 * @param coll collator data

	1927 * @param data collation iterator struct

	1928 * @param status error status

	1929 */

	1930 static

	1931 inline uint32_t ucol_IGetPrevCE(const UCollator coll, collIterate data,

	1932 UErrorCode *status)

	1933 {

	1934 uint32_t result = (uint32_t)UCOL_NULLORDER;

	1935

	1936 if (data->offsetReturn != NULL) {

	1937 if (data->offsetRepeatCount > 0) {

	1938 data->offsetRepeatCount -= 1;

	1939 } else {

	1940 if (data->offsetReturn == data->offsetBuffer) {

	1941 data->offsetReturn = NULL;

	1942 data->offsetStore = data->offsetBuffer;

	1943 } else {

	1944 data->offsetReturn -= 1;

	1945 }

	1946 }

	1947 }

	1948

	1949 if ((data->extendCEs && data->toReturn > data->extendCEs) \|\|

	1950 (!data->extendCEs && data->toReturn > data->CEs))

	1951 {

	1952 data->toReturn -= 1;

	1953 result = *(data->toReturn);

	1954 if (data->CEs == data->toReturn \|\| data->extendCEs == data->toReturn) {

	1955 data->CEpos = data->toReturn;

	1956 }

	1957 }

	1958 else {

	1959 UChar ch = 0;

	1960

	1961 /*

	1962 Loop handles case when incremental normalize switches to or from the

	1963 side buffer / original string, and we need to start again to get the

	1964 next character.

	1965 */

	1966 for (;;) {

	1967 if (data->flags & UCOL_ITER_HASLEN) {

	1968 /*

	1969 Normal path for strings when length is specified.

	1970 Not in side buffer because it is always null terminated.

	1971 */

	1972 if (data->pos <= data->string) {

	1973 /* End of the main source string */

	1974 return UCOL_NO_MORE_CES;

	1975 }

	1976 data->pos --;

	1977 ch = *data->pos;

	1978 }

	1979 // we are using an iterator to go back. Pray for us!

	1980 else if (data->flags & UCOL_USE_ITERATOR) {

	1981 UChar32 iterCh = data->iterator->previous(data->iterator);

	1982 if(iterCh == U_SENTINEL) {

	1983 return UCOL_NO_MORE_CES;

	1984 } else {

	1985 ch = (UChar)iterCh;

	1986 }

	1987 }

	1988 else {

	1989 data->pos --;

	1990 ch = *data->pos;

	1991 /* we are in the side buffer. */

	1992 if (ch == 0) {

	1993 /*

	1994 At the start of the normalize side buffer.

	1995 Go back to string.

	1996 Because pointer points to the last accessed character,

	1997 hence we have to increment it by one here.

	1998 */

	1999 data->flags = data->origFlags;

	2000 data->offsetRepeatValue = 0;

	2001

	2002 if (data->fcdPosition == NULL) {

	2003 data->pos = data->string;

	2004 return UCOL_NO_MORE_CES;

	2005 }

	2006 else {

	2007 data->pos = data->fcdPosition + 1;

	2008 }

	2009

	2010 continue;

	2011 }

	2012 }

	2013

	2014 if(data->flags&UCOL_HIRAGANA_Q) {

	2015 if(ch>=0x3040 && ch<=0x309f) {

	2016 data->flags \|= UCOL_WAS_HIRAGANA;

	2017 } else {

	2018 data->flags &= ~UCOL_WAS_HIRAGANA;

	2019 }

	2020 }

	2021

	2022 /*

	2023 * got a character to determine if there's fcd and/or normalization

	2024 * stuff to do.

	2025 * if the current character is not fcd.

	2026 * if current character is at the start of the string

	2027 * Trailing combining class == 0.

	2028 * Note if pos is in the writablebuffer, norm is always 0

	2029 */

	2030 if (ch < ZERO_CC_LIMIT_ \|\|

	2031 // this should propel us out of the loop in the iterator case

	2032 (data->flags & UCOL_ITER_NORM) == 0 \|\|

	2033 (data->fcdPosition != NULL && data->fcdPosition <= data->pos)

	2034 \|\| data->string == data->pos) {

	2035 break;

	2036 }

	2037

	2038 if (ch < NFC_ZERO_CC_BLOCK_LIMIT_) {

	2039 /* if next character is FCD */

	2040 if (data->pos == data->string) {

	2041 /* First char of string is always OK for FCD check */

	2042 break;

	2043 }

	2044

	2045 /* Not first char of string, do the FCD fast test */

	2046 if (*(data->pos - 1) < NFC_ZERO_CC_BLOCK_LIMIT_) {

	2047 break;

	2048 }

	2049 }

	2050

	2051 /* Need a more complete FCD check and possible normalization. */

	2052 if (collPrevIterFCD(data)) {

	2053 collPrevIterNormalize(data);

	2054 }

	2055

	2056 if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {

	2057 /* No normalization. Go ahead and process the char. */

	2058 break;

	2059 }

	2060

	2061 /*

	2062 Some normalization happened.

	2063 Next loop picks up a char from the normalization buffer.

	2064 */

	2065 }

	2066

	2067 /* attempt to handle contractions, after removal of the backwards

	2068 contraction

	2069 */

	2070 if (ucol_contractionEndCP(ch, coll) && !isAtStartPrevIterate(data)) {

	2071 result = ucol_prv_getSpecialPrevCE(coll, ch, UCOL_CONTRACTION, data, status);

	2072 } else {

	2073 if (ch <= 0xFF) {

	2074 result = coll->latinOneMapping[ch];

	2075 }

	2076 else {

	2077 // Always use UCA for [3400..9FFF], [AC00..D7AF]

	2078 // ** [FA0E..FA2F] ?? **

	2079 if ((data->flags & UCOL_FORCE_HAN_IMPLICIT) != 0 &&

	2080 (ch >= 0x3400 && ch <= 0xD7AF)) {

	2081 if (ch > 0x9FFF && ch < 0xAC00) {

	2082 // between the two target ranges; do normal lookup

	2083 // ** this range is YI, Modifier tone letters, **

	2084 // ** Latin-D, Syloti Nagari, Phagas-pa. **

	2085 // ** Latin-D might be tailored, so we need to **

	2086 // ** do the normal lookup for these guys. **

	2087 result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

	2088 } else {

	2089 result = UCOL_NOT_FOUND;

	2090 }

	2091 } else {

	2092 result = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

	2093 }

	2094 }

	2095 if (result > UCOL_NOT_FOUND) {

	2096 result = ucol_prv_getSpecialPrevCE(coll, ch, result, data, statu s);

	2097 }

	2098 if (result == UCOL_NOT_FOUND) { // Not found in master list

	2099 if (!isAtStartPrevIterate(data) &&

	2100 ucol_contractionEndCP(ch, data->coll))

	2101 {

	2102 result = UCOL_CONTRACTION;

	2103 } else {

	2104 if(coll->UCA) {

	2105 result = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

	2106 }

	2107 }

	2108

	2109 if (result > UCOL_NOT_FOUND) {

	2110 if(coll->UCA) {

	2111 result = ucol_prv_getSpecialPrevCE(coll->UCA, ch, result , data, status);

	2112 }

	2113 }

	2114 }

	2115 }

	2116

	2117 if(result == UCOL_NOT_FOUND) {

	2118 result = getPrevImplicit(ch, data);

	2119 }

	2120 }

	2121

	2122 return result;

	2123 }

	2124

	2125

	2126 /* ucol_getPrevCE, out-of-line version for use from other files. */

	2127 U_CFUNC uint32_t U_EXPORT2

	2128 ucol_getPrevCE(const UCollator coll, collIterate data,

	2129 UErrorCode *status) {

	2130 return ucol_IGetPrevCE(coll, data, status);

	2131 }

	2132

	2133

	2134 /* this should be connected to special Jamo handling */

	2135 U_CFUNC uint32_t U_EXPORT2

	2136 ucol_getFirstCE(const UCollator coll, UChar u, UErrorCode status) {

	2137 collIterate colIt;

	2138 IInit_collIterate(coll, &u, 1, &colIt, status);

	2139 if(U_FAILURE(*status)) {

	2140 return 0;

	2141 }

	2142 return ucol_IGetNextCE(coll, &colIt, status);

	2143 }

	2144

	2145 /**

	2146 * Inserts the argument character into the end of the buffer pushing back the

	2147 * null terminator.

	2148 * @param data collIterate struct data

	2149 * @param ch character to be appended

	2150 * @return the position of the new addition

	2151 */

	2152 static

	2153 inline const UChar * insertBufferEnd(collIterate *data, UChar ch)

	2154 {

	2155 int32_t oldLength = data->writableBuffer.length();

	2156 return data->writableBuffer.append(ch).getTerminatedBuffer() + oldLength;

	2157 }

	2158

	2159 /**

	2160 * Inserts the argument string into the end of the buffer pushing back the

	2161 * null terminator.

	2162 * @param data collIterate struct data

	2163 * @param string to be appended

	2164 * @param length of the string to be appended

	2165 * @return the position of the new addition

	2166 */

	2167 static

	2168 inline const UChar * insertBufferEnd(collIterate data, const UChar str, int32_ t length)

	2169 {

	2170 int32_t oldLength = data->writableBuffer.length();

	2171 return data->writableBuffer.append(str, length).getTerminatedBuffer() + oldL ength;

	2172 }

	2173

	2174 /**

	2175 * Special normalization function for contraction in the forwards iterator.

	2176 * This normalization sequence will place the current character at source->pos

	2177 * and its following normalized sequence into the buffer.

	2178 * The fcd position, pos will be changed.

	2179 * pos will now point to positions in the buffer.

	2180 * Flags will be changed accordingly.

	2181 * @param data collation iterator data

	2182 */

	2183 static

	2184 inline void normalizeNextContraction(collIterate *data)

	2185 {

	2186 int32_t strsize;

	2187 UErrorCode status = U_ZERO_ERROR;

	2188 /* because the pointer points to the next character */

	2189 const UChar *pStart = data->pos - 1;

	2190 const UChar *pEnd;

	2191

	2192 if ((data->flags & UCOL_ITER_INNORMBUF) == 0) {

	2193 data->writableBuffer.setTo(*(pStart - 1));

	2194 strsize = 1;

	2195 }

	2196 else {

	2197 strsize = data->writableBuffer.length();

	2198 }

	2199

	2200 pEnd = data->fcdPosition;

	2201

	2202 data->writableBuffer.append(

	2203 data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStar t)), status));

	2204 if(U_FAILURE(status)) {

	2205 return;

	2206 }

	2207

	2208 data->pos = data->writableBuffer.getTerminatedBuffer() + strsize;

	2209 data->origFlags = data->flags;

	2210 data->flags \|= UCOL_ITER_INNORMBUF;

	2211 data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);

	2212 }

	2213

	2214 /**

	2215 * Contraction character management function that returns the next character

	2216 * for the forwards iterator.

	2217 * Does nothing if the next character is in buffer and not the first character

	2218 * in it.

	2219 * Else it checks next character in data string to see if it is normalizable.

	2220 * If it is not, the character is simply copied into the buffer, else

	2221 * the whole normalized substring is copied into the buffer, including the

	2222 * current character.

	2223 * @param data collation element iterator data

	2224 * @return next character

	2225 */

	2226 static

	2227 inline UChar getNextNormalizedChar(collIterate *data)

	2228 {

	2229 UChar nextch;

	2230 UChar ch;

	2231 // Here we need to add the iterator code. One problem is the way

	2232 // end of string is handled. If we just return next char, it could

	2233 // be the sentinel. Most of the cases already check for this, but we

	2234 // need to be sure.

	2235 if ((data->flags & (UCOL_ITER_NORM \| UCOL_ITER_INNORMBUF)) == 0 ) {

	2236 /* if no normalization and not in buffer. */

	2237 if(data->flags & UCOL_USE_ITERATOR) {

	2238 return (UChar)data->iterator->next(data->iterator);

	2239 } else {

	2240 return *(data->pos ++);

	2241 }

	2242 }

	2243

	2244 //if (data->flags & UCOL_ITER_NORM && data->flags & UCOL_USE_ITERATOR) {

	2245 //normalizeIterator(data);

	2246 //}

	2247

	2248 UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);

	2249 if ((innormbuf && *data->pos != 0) \|\|

	2250 (data->fcdPosition != NULL && !innormbuf &&

	2251 data->pos < data->fcdPosition)) {

	2252 /*

	2253 if next character is in normalized buffer, no further normalization

	2254 is required

	2255 */

	2256 return *(data->pos ++);

	2257 }

	2258

	2259 if (data->flags & UCOL_ITER_HASLEN) {

	2260 /* in data string */

	2261 if (data->pos + 1 == data->endp) {

	2262 return *(data->pos ++);

	2263 }

	2264 }

	2265 else {

	2266 if (innormbuf) {

	2267 // inside the normalization buffer, but at the end

	2268 // (since we encountered zero). This means, in the

	2269 // case we're using char iterator, that we need to

	2270 // do another round of normalization.

	2271 //if(data->origFlags & UCOL_USE_ITERATOR) {

	2272 // we need to restore original flags,

	2273 // otherwise, we'll lose them

	2274 //data->flags = data->origFlags;

	2275 //normalizeIterator(data);

	2276 //return *(data->pos++);

	2277 //} else {

	2278 /*

	2279 in writable buffer, at this point fcdPosition can not be

	2280 pointing to the end of the data string. see contracting tag.

	2281 */

	2282 if(data->fcdPosition) {

	2283 if (*(data->fcdPosition + 1) == 0 \|\|

	2284 data->fcdPosition + 1 == data->endp) {

	2285 /* at the end of the string, dump it into the normalizer */

	2286 data->pos = insertBufferEnd(data, *(data->fcdPosition)) + 1;

	2287 // Check if data->pos received a null pointer

	2288 if (data->pos == NULL) {

	2289 return (UChar)-1; // Return to indicate error.

	2290 }

	2291 return *(data->fcdPosition ++);

	2292 }

	2293 data->pos = data->fcdPosition;

	2294 } else if(data->origFlags & UCOL_USE_ITERATOR) {

	2295 // if we are here, we're using a normalizing iterator.

	2296 // we should just continue further.

	2297 data->flags = data->origFlags;

	2298 data->pos = NULL;

	2299 return (UChar)data->iterator->next(data->iterator);

	2300 }

	2301 //}

	2302 }

	2303 else {

	2304 if (*(data->pos + 1) == 0) {

	2305 return *(data->pos ++);

	2306 }

	2307 }

	2308 }

	2309

	2310 ch = *data->pos ++;

	2311 nextch = *data->pos;

	2312

	2313 /*

	2314 * if the current character is not fcd.

	2315 * Trailing combining class == 0.

	2316 */

	2317 if ((data->fcdPosition == NULL \|\| data->fcdPosition < data->pos) &&

	2318 (nextch >= NFC_ZERO_CC_BLOCK_LIMIT_ \|\|

	2319 ch >= NFC_ZERO_CC_BLOCK_LIMIT_)) {

	2320 /*

	2321 Need a more complete FCD check and possible normalization.

	2322 normalize substring will be appended to buffer

	2323 */

	2324 if (collIterFCD(data)) {

	2325 normalizeNextContraction(data);

	2326 return *(data->pos ++);

	2327 }

	2328 else if (innormbuf) {

	2329 /* fcdposition shifted even when there's no normalization, if we

	2330 don't input the rest into this, we'll get the wrong position when

	2331 we reach the end of the writableBuffer */

	2332 int32_t length = (int32_t)(data->fcdPosition - data->pos + 1);

	2333 data->pos = insertBufferEnd(data, data->pos - 1, length);

	2334 // Check if data->pos received a null pointer

	2335 if (data->pos == NULL) {

	2336 return (UChar)-1; // Return to indicate error.

	2337 }

	2338 return *(data->pos ++);

	2339 }

	2340 }

	2341

	2342 if (innormbuf) {

	2343 /*

	2344 no normalization is to be done hence only one character will be

	2345 appended to the buffer.

	2346 */

	2347 data->pos = insertBufferEnd(data, ch) + 1;

	2348 // Check if data->pos received a null pointer

	2349 if (data->pos == NULL) {

	2350 return (UChar)-1; // Return to indicate error.

	2351 }

	2352 }

	2353

	2354 /* points back to the pos in string */

	2355 return ch;

	2356 }

	2357

	2358

	2359

	2360 /**

	2361 * Function to copy the buffer into writableBuffer and sets the fcd position to

	2362 * the correct position

	2363 * @param source data string source

	2364 * @param buffer character buffer

	2365 */

	2366 static

	2367 inline void setDiscontiguosAttribute(collIterate *source, const UnicodeString &b uffer)

	2368 {

	2369 /* okay confusing part here. to ensure that the skipped characters are

	2370 considered later, we need to place it in the appropriate position in the

	2371 normalization buffer and reassign the pos pointer. simple case if pos

	2372 reside in string, simply copy to normalization buffer and

	2373 fcdposition = pos, pos = start of normalization buffer. if pos in

	2374 normalization buffer, we'll insert the copy infront of pos and point pos

	2375 to the start of the normalization buffer. why am i doing these copies?

	2376 well, so that the whole chunk of codes in the getNextCE, ucol_prv_getSpecial CE does

	2377 not require any changes, which be really painful. */

	2378 if (source->flags & UCOL_ITER_INNORMBUF) {

	2379 int32_t replaceLength = source->pos - source->writableBuffer.getBuffer() ;

	2380 source->writableBuffer.replace(0, replaceLength, buffer);

	2381 }

	2382 else {

	2383 source->fcdPosition = source->pos;

	2384 source->origFlags = source->flags;

	2385 source->flags \|= UCOL_ITER_INNORMBUF;

	2386 source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN \| UCOL_USE_IT ERATOR);

	2387 source->writableBuffer = buffer;

	2388 }

	2389

	2390 source->pos = source->writableBuffer.getTerminatedBuffer();

	2391 }

	2392

	2393 /**

	2394 * Function to get the discontiguos collation element within the source.

	2395 * Note this function will set the position to the appropriate places.

	2396 * @param coll current collator used

	2397 * @param source data string source

	2398 * @param constart index to the start character in the contraction table

	2399 * @return discontiguos collation element offset

	2400 */

	2401 static

	2402 uint32_t getDiscontiguous(const UCollator coll, collIterate source,

	2403 const UChar *constart)

	2404 {

	2405 /* source->pos currently points to the second combining character after

	2406 the start character */

	2407 const UChar *temppos = source->pos;

	2408 UnicodeString buffer;

	2409 const UChar *tempconstart = constart;

	2410 uint8_t tempflags = source->flags;

	2411 UBool multicontraction = FALSE;

	2412 collIterateState discState;

	2413

	2414 backupState(source, &discState);

	2415

	2416 buffer.setTo(peekCodePoint(source, -1));

	2417 for (;;) {

	2418 UChar *UCharOffset;

	2419 UChar schar,

	2420 tchar;

	2421 uint32_t result;

	2422

	2423 if (((source->flags & UCOL_ITER_HASLEN) && source->pos >= source->endp)

	2424 \|\| (peekCodeUnit(source, 0) == 0 &&

	2425 //\|\| (*source->pos == 0 &&

	2426 ((source->flags & UCOL_ITER_INNORMBUF) == 0 \|\|

	2427 source->fcdPosition == NULL \|\|

	2428 source->fcdPosition == source->endp \|\|

	2429 *(source->fcdPosition) == 0 \|\|

	2430 u_getCombiningClass(*(source->fcdPosition)) == 0)) \|\|

	2431 /* end of string in null terminated string or stopped by a

	2432 null character, note fcd does not always point to a base

	2433 character after the discontiguos change */

	2434 u_getCombiningClass(peekCodePoint(source, 0)) == 0) {

	2435 //u_getCombiningClass(*(source->pos)) == 0) {

	2436 //constart = (UChar *)coll->image + getContractOffset(CE);

	2437 if (multicontraction) {

	2438 source->pos = temppos - 1;

	2439 setDiscontiguosAttribute(source, buffer);

	2440 return *(coll->contractionCEs +

	2441 (tempconstart - coll->contractionIndex));

	2442 }

	2443 constart = tempconstart;

	2444 break;

	2445 }

	2446

	2447 UCharOffset = (UChar )(tempconstart + 1); / skip the backward offset*/

	2448 schar = getNextNormalizedChar(source);

	2449

	2450 while (schar > (tchar = *UCharOffset)) {

	2451 UCharOffset++;

	2452 }

	2453

	2454 if (schar != tchar) {

	2455 /* not the correct codepoint. we stuff the current codepoint into

	2456 the discontiguos buffer and try the next character */

	2457 buffer.append(schar);

	2458 continue;

	2459 }

	2460 else {

	2461 if (u_getCombiningClass(schar) ==

	2462 u_getCombiningClass(peekCodePoint(source, -2))) {

	2463 buffer.append(schar);

	2464 continue;

	2465 }

	2466 result = *(coll->contractionCEs +

	2467 (UCharOffset - coll->contractionIndex));

	2468 }

	2469

	2470 if (result == UCOL_NOT_FOUND) {

	2471 break;

	2472 } else if (isContraction(result)) {

	2473 /* this is a multi-contraction*/

	2474 tempconstart = (UChar *)coll->image + getContractOffset(result);

	2475 if (*(coll->contractionCEs + (constart - coll->contractionIndex))

	2476 != UCOL_NOT_FOUND) {

	2477 multicontraction = TRUE;

	2478 temppos = source->pos + 1;

	2479 }

	2480 } else {

	2481 setDiscontiguosAttribute(source, buffer);

	2482 return result;

	2483 }

	2484 }

	2485

	2486 /* no problems simply reverting just like that,

	2487 if we are in string before getting into this function, points back to

	2488 string hence no problem.

	2489 if we are in normalization buffer before getting into this function,

	2490 since we'll never use another normalization within this function, we

	2491 know that fcdposition points to a base character. the normalization buffer

	2492 never change, hence this revert works. */

	2493 loadState(source, &discState, TRUE);

	2494 goBackOne(source);

	2495

	2496 //source->pos = temppos - 1;

	2497 source->flags = tempflags;

	2498 return *(coll->contractionCEs + (constart - coll->contractionIndex));

	2499 }

	2500

	2501 /* now uses Mark's getImplicitPrimary code */

	2502 static

	2503 inline uint32_t getImplicit(UChar32 cp, collIterate *collationSource) {

	2504 uint32_t r = uprv_uca_getImplicitPrimary(cp);

	2505 *(collationSource->CEpos++) = ((r & 0x0000FFFF)<<16) \| 0x000000C0;

	2506 collationSource->offsetRepeatCount += 1;

	2507 return (r & UCOL_PRIMARYMASK) \| 0x00000505; // This was 'order'

	2508 }

	2509

	2510 /**

	2511 * Inserts the argument character into the front of the buffer replacing the

	2512 * front null terminator.

	2513 * @param data collation element iterator data

	2514 * @param ch character to be appended

	2515 */

	2516 static

	2517 inline void insertBufferFront(collIterate *data, UChar ch)

	2518 {

	2519 data->pos = data->writableBuffer.setCharAt(0, ch).insert(0, (UChar)0).getTer minatedBuffer() + 2;

	2520 }

	2521

	2522 /**

	2523 * Special normalization function for contraction in the previous iterator.

	2524 * This normalization sequence will place the current character at source->pos

	2525 * and its following normalized sequence into the buffer.

	2526 * The fcd position, pos will be changed.

	2527 * pos will now point to positions in the buffer.

	2528 * Flags will be changed accordingly.

	2529 * @param data collation iterator data

	2530 */

	2531 static

	2532 inline void normalizePrevContraction(collIterate data, UErrorCode status)

	2533 {

	2534 const UChar pEnd = data->pos + 1; / End normalize + 1 */

	2535 const UChar *pStart;

	2536

	2537 UnicodeString endOfBuffer;

	2538 if (data->flags & UCOL_ITER_HASLEN) {

	2539 /*

	2540 normalization buffer not used yet, we'll pull down the next

	2541 character into the end of the buffer

	2542 */

	2543 endOfBuffer.setTo(*pEnd);

	2544 }

	2545 else {

	2546 endOfBuffer.setTo(data->writableBuffer, 1); // after the leading NUL

	2547 }

	2548

	2549 if (data->fcdPosition == NULL) {

	2550 pStart = data->string;

	2551 }

	2552 else {

	2553 pStart = data->fcdPosition + 1;

	2554 }

	2555 int32_t normLen =

	2556 data->nfd->normalize(UnicodeString(FALSE, pStart, (int32_t)(pEnd - pStar t)),

	2557 data->writableBuffer,

	2558 *status).

	2559 length();

	2560 if(U_FAILURE(*status)) {

	2561 return;

	2562 }

	2563 /*

	2564 this puts the null termination infront of the normalized string instead

	2565 of the end

	2566 */

	2567 data->pos =

	2568 data->writableBuffer.insert(0, (UChar)0).append(endOfBuffer).getTerminat edBuffer() +

	2569 1 + normLen;

	2570 data->origFlags = data->flags;

	2571 data->flags \|= UCOL_ITER_INNORMBUF;

	2572 data->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);

	2573 }

	2574

	2575 /**

	2576 * Contraction character management function that returns the previous character

	2577 * for the backwards iterator.

	2578 * Does nothing if the previous character is in buffer and not the first

	2579 * character in it.

	2580 * Else it checks previous character in data string to see if it is

	2581 * normalizable.

	2582 * If it is not, the character is simply copied into the buffer, else

	2583 * the whole normalized substring is copied into the buffer, including the

	2584 * current character.

	2585 * @param data collation element iterator data

	2586 * @return previous character

	2587 */

	2588 static

	2589 inline UChar getPrevNormalizedChar(collIterate data, UErrorCode status)

	2590 {

	2591 UChar prevch;

	2592 UChar ch;

	2593 const UChar *start;

	2594 UBool innormbuf = (UBool)(data->flags & UCOL_ITER_INNORMBUF);

	2595 if ((data->flags & (UCOL_ITER_NORM \| UCOL_ITER_INNORMBUF)) == 0 \|\|

	2596 (innormbuf && *(data->pos - 1) != 0)) {

	2597 /*

	2598 if no normalization.

	2599 if previous character is in normalized buffer, no further normalization

	2600 is required

	2601 */

	2602 if(data->flags & UCOL_USE_ITERATOR) {

	2603 data->iterator->move(data->iterator, -1, UITER_CURRENT);

	2604 return (UChar)data->iterator->next(data->iterator);

	2605 } else {

	2606 return *(data->pos - 1);

	2607 }

	2608 }

	2609

	2610 start = data->pos;

	2611 if ((data->fcdPosition==NULL)\|\|(data->flags & UCOL_ITER_HASLEN)) {

	2612 /* in data string */

	2613 if ((start - 1) == data->string) {

	2614 return *(start - 1);

	2615 }

	2616 start --;

	2617 ch = *start;

	2618 prevch = *(start - 1);

	2619 }

	2620 else {

	2621 /*

	2622 in writable buffer, at this point fcdPosition can not be NULL.

	2623 see contracting tag.

	2624 */

	2625 if (data->fcdPosition == data->string) {

	2626 /* at the start of the string, just dump it into the normalizer */

	2627 insertBufferFront(data, *(data->fcdPosition));

	2628 data->fcdPosition = NULL;

	2629 return *(data->pos - 1);

	2630 }

	2631 start = data->fcdPosition;

	2632 ch = *start;

	2633 prevch = *(start - 1);

	2634 }

	2635 /*

	2636 * if the current character is not fcd.

	2637 * Trailing combining class == 0.

	2638 */

	2639 if (data->fcdPosition > start &&

	2640 (ch >= NFC_ZERO_CC_BLOCK_LIMIT_ \|\| prevch >= NFC_ZERO_CC_BLOCK_LIMIT_))

	2641 {

	2642 /*

	2643 Need a more complete FCD check and possible normalization.

	2644 normalize substring will be appended to buffer

	2645 */

	2646 const UChar *backuppos = data->pos;

	2647 data->pos = start;

	2648 if (collPrevIterFCD(data)) {

	2649 normalizePrevContraction(data, status);

	2650 return *(data->pos - 1);

	2651 }

	2652 data->pos = backuppos;

	2653 data->fcdPosition ++;

	2654 }

	2655

	2656 if (innormbuf) {

	2657 /*

	2658 no normalization is to be done hence only one character will be

	2659 appended to the buffer.

	2660 */

	2661 insertBufferFront(data, ch);

	2662 data->fcdPosition --;

	2663 }

	2664

	2665 return ch;

	2666 }

	2667

	2668 /* This function handles the special CEs like contractions, expansions, surrogat es, Thai */

	2669 /* It is called by getNextCE */

	2670

	2671 /* The following should be even */

	2672 #define UCOL_MAX_DIGITS_FOR_NUMBER 254

	2673

	2674 uint32_t ucol_prv_getSpecialCE(const UCollator coll, UChar ch, uint32_t CE, col lIterate source, UErrorCode *status) {

	2675 collIterateState entryState;

	2676 backupState(source, &entryState);

	2677 UChar32 cp = ch;

	2678

	2679 for (;;) {

	2680 // This loop will repeat only in the case of contractions, and only when a contraction

	2681 // is found and the first CE resulting from that contraction is itself a special

	2682 // (an expansion, for example.) All other special CE types are fully handled the

	2683 // first time through, and the loop exits.

	2684

	2685 const uint32_t *CEOffset = NULL;

	2686 switch(getCETag(CE)) {

	2687 case NOT_FOUND_TAG:

	2688 /* This one is not found, and we'll let somebody else bother about i t... no more games */

	2689 return CE;

	2690 case SPEC_PROC_TAG:

	2691 {

	2692 // Special processing is getting a CE that is preceded by a cert ain prefix

	2693 // Currently this is only needed for optimizing Japanese length and iteration marks.

	2694 // When we encouter a special processing tag, we go backwards an d try to see if

	2695 // we have a match.

	2696 // Contraction tables are used - so the whole process is not unl ike contraction.

	2697 // prefix data is stored backwards in the table.

	2698 const UChar *UCharOffset;

	2699 UChar schar, tchar;

	2700 collIterateState prefixState;

	2701 backupState(source, &prefixState);

	2702 loadState(source, &entryState, TRUE);

	2703 goBackOne(source); // We want to look at the point where we ente red - actually one

	2704 // before that...

	2705

	2706 for(;;) {

	2707 // This loop will run once per source string character, for as long as we

	2708 // are matching a potential contraction sequence

	2709

	2710 // First we position ourselves at the begining of contractio n sequence

	2711 const UChar ContractionStart = UCharOffset = (UChar )coll- >image+getContractOffset(CE);

	2712 if (collIter_bos(source)) {

	2713 CE = *(coll->contractionCEs + (UCharOffset - coll->contr actionIndex));

	2714 break;

	2715 }

	2716 schar = getPrevNormalizedChar(source, status);

	2717 goBackOne(source);

	2718

	2719 while(schar > (tchar = UCharOffset)) { / since the contrac tion codepoints should be ordered, we skip all that are smaller */

	2720 UCharOffset++;

	2721 }

	2722

	2723 if (schar == tchar) {

	2724 // Found the source string char in the table.

	2725 // Pick up the corresponding CE from the table.

	2726 CE = *(coll->contractionCEs +

	2727 (UCharOffset - coll->contractionIndex));

	2728 }

	2729 else

	2730 {

	2731 // Source string char was not in the table.

	2732 // We have not found the prefix.

	2733 CE = *(coll->contractionCEs +

	2734 (ContractionStart - coll->contractionIndex));

	2735 }

	2736

	2737 if(!isPrefix(CE)) {

	2738 // The source string char was in the contraction table, and the corresponding

	2739 // CE is not a prefix CE. We found the prefix, break

	2740 // out of loop, this CE will end up being returned. T his is the normal

	2741 // way out of prefix handling when the source actually contained

	2742 // the prefix.

	2743 break;

	2744 }

	2745 }

	2746 if(CE != UCOL_NOT_FOUND) { // we found something and we can meri lly continue

	2747 loadState(source, &prefixState, TRUE);

	2748 if(source->origFlags & UCOL_USE_ITERATOR) {

	2749 source->flags = source->origFlags;

	2750 }

	2751 } else { // prefix search was a failure, we have to backup all t he way to the start

	2752 loadState(source, &entryState, TRUE);

	2753 }

	2754 break;

	2755 }

	2756 case CONTRACTION_TAG:

	2757 {

	2758 /* This should handle contractions */

	2759 collIterateState state;

	2760 backupState(source, &state);

	2761 uint32_t firstCE = (coll->contractionCEs + ((UChar )coll->imag e+getContractOffset(CE) - coll->contractionIndex)); //UCOL_NOT_FOUND;

	2762 const UChar *UCharOffset;

	2763 UChar schar, tchar;

	2764

	2765 for (;;) {

	2766 /* This loop will run once per source string character, for as long as we */

	2767 /* are matching a potential contraction sequence */

	2768

	2769 /* First we position ourselves at the begining of contractio n sequence */

	2770 const UChar ContractionStart = UCharOffset = (UChar )coll- >image+getContractOffset(CE);

	2771

	2772 if (collIter_eos(source)) {

	2773 // Ran off the end of the source string.

	2774 CE = *(coll->contractionCEs + (UCharOffset - coll->contr actionIndex));

	2775 // So we'll pick whatever we have at the point...

	2776 if (CE == UCOL_NOT_FOUND) {

	2777 // back up the source over all the chars we scanned going into this contraction.

	2778 CE = firstCE;

	2779 loadState(source, &state, TRUE);

	2780 if(source->origFlags & UCOL_USE_ITERATOR) {

	2781 source->flags = source->origFlags;

	2782 }

	2783 }

	2784 break;

	2785 }

	2786

	2787 uint8_t maxCC = (uint8_t)((UCharOffset)&0xFF); /get the di scontiguos stuff / / skip the backward offset, see above */

	2788 uint8_t allSame = (uint8_t)(*(UCharOffset++)>>8);

	2789

	2790 schar = getNextNormalizedChar(source);

	2791 while(schar > (tchar = UCharOffset)) { / since the contrac tion codepoints should be ordered, we skip all that are smaller */

	2792 UCharOffset++;

	2793 }

	2794

	2795 if (schar == tchar) {

	2796 // Found the source string char in the contraction table .

	2797 // Pick up the corresponding CE from the table.

	2798 CE = *(coll->contractionCEs +

	2799 (UCharOffset - coll->contractionIndex));

	2800 }

	2801 else

	2802 {

	2803 // Source string char was not in contraction table.

	2804 // Unless we have a discontiguous contraction, we have finished

	2805 // with this contraction.

	2806 // in order to do the proper detection, we

	2807 // need to see if we're dealing with a supplementary

	2808 /* We test whether the next two char are surrogate pairs .

	2809 * This test is done if the iterator is not NULL.

	2810 * If there is no surrogate pair, the iterator

	2811 * goes back one if needed. */

	2812 UChar32 miss = schar;

	2813 if (source->iterator) {

	2814 UChar32 surrNextChar; /* the next char in the iterat ion to test */

	2815 int32_t prevPos; /* holds the previous position befo re move forward of the source iterator */

	2816 if(U16_IS_LEAD(schar) && source->iterator->hasNext(s ource->iterator)) {

	2817 prevPos = source->iterator->index;

	2818 surrNextChar = getNextNormalizedChar(source);

	2819 if (U16_IS_TRAIL(surrNextChar)) {

	2820 miss = U16_GET_SUPPLEMENTARY(schar, surrNext Char);

	2821 } else if (prevPos < source->iterator->index){

	2822 goBackOne(source);

	2823 }

	2824 }

	2825 } else if (U16_IS_LEAD(schar)) {

	2826 miss = U16_GET_SUPPLEMENTARY(schar, getNextNormalize dChar(source));

	2827 }

	2828

	2829 uint8_t sCC;

	2830 if (miss < 0x300 \|\|

	2831 maxCC == 0 \|\|

	2832 (sCC = i_getCombiningClass(miss, coll)) == 0 \|\|

	2833 sCC>maxCC \|\|

	2834 (allSame != 0 && sCC == maxCC) \|\|

	2835 collIter_eos(source))

	2836 {

	2837 // Contraction can not be discontiguous.

	2838 goBackOne(source); // back up the source string by one,

	2839 // because the character we just looked at was

	2840 // not part of the contraction. */

	2841 if(U_IS_SUPPLEMENTARY(miss)) {

	2842 goBackOne(source);

	2843 }

	2844 CE = *(coll->contractionCEs +

	2845 (ContractionStart - coll->contractionIndex));

	2846 } else {

	2847 //

	2848 // Contraction is possibly discontiguous.

	2849 // Scan more of source string looking for a match

	2850 //

	2851 UChar tempchar;

	2852 /* find the next character if schar is not a base ch aracter

	2853 and we are not yet at the end of the string */

	2854 tempchar = getNextNormalizedChar(source);

	2855 // probably need another supplementary thingie here

	2856 goBackOne(source);

	2857 if (i_getCombiningClass(tempchar, coll) == 0) {

	2858 goBackOne(source);

	2859 if(U_IS_SUPPLEMENTARY(miss)) {

	2860 goBackOne(source);

	2861 }

	2862 /* Spit out the last char of the string, wasn't tasty enough */

	2863 CE = *(coll->contractionCEs +

	2864 (ContractionStart - coll->contractionIndex)) ;

	2865 } else {

	2866 CE = getDiscontiguous(coll, source, ContractionS tart);

	2867 }

	2868 }

	2869 } // else after if(schar == tchar)

	2870

	2871 if(CE == UCOL_NOT_FOUND) {

	2872 /* The Source string did not match the contraction that we were checking. */

	2873 /* Back up the source position to undo the effects of h aving partially */

	2874 /* scanned through what ultimately proved to not be a contraction. */

	2875 loadState(source, &state, TRUE);

	2876 CE = firstCE;

	2877 break;

	2878 }

	2879

	2880 if(!isContraction(CE)) {

	2881 // The source string char was in the contraction table, and the corresponding

	2882 // CE is not a contraction CE. We completed the contr action, break

	2883 // out of loop, this CE will end up being returned. T his is the normal

	2884 // way out of contraction handling when the source act ually contained

	2885 // the contraction.

	2886 break;

	2887 }

	2888

	2889

	2890 // The source string char was in the contraction table, and the corresponding

	2891 // CE is IS a contraction CE. We will continue looping t o check the source

	2892 // string for the remaining chars in the contraction.

	2893 uint32_t tempCE = *(coll->contractionCEs + (ContractionStart - coll->contractionIndex));

	2894 if(tempCE != UCOL_NOT_FOUND) {

	2895 // We have scanned a a section of source string for whic h there is a

	2896 // CE from the contraction table. Remember the CE and scan position, so

	2897 // that we can return to this point if further scanning fails to

	2898 // match a longer contraction sequence.

	2899 firstCE = tempCE;

	2900

	2901 goBackOne(source);

	2902 backupState(source, &state);

	2903 getNextNormalizedChar(source);

	2904

	2905 // Another way to do this is:

	2906 //collIterateState tempState;

	2907 //backupState(source, &tempState);

	2908 //goBackOne(source);

	2909 //backupState(source, &state);

	2910 //loadState(source, &tempState, TRUE);

	2911

	2912 // The problem is that for incomplete contractions we ha ve to remember the previous

	2913 // position. Before, the only thing I needed to do was s tate.pos--;

	2914 // After iterator introduction and especially after intr oduction of normalizing

	2915 // iterators, it became much more difficult to decrease the saved state.

	2916 // I'm not yet sure which of the two methods above is fa ster.

	2917 }

	2918 } // for(;;)

	2919 break;

	2920 } // case CONTRACTION_TAG:

	2921 case LONG_PRIMARY_TAG:

	2922 {

	2923 *(source->CEpos++) = ((CE & 0xFF)<<24)\|UCOL_CONTINUATION_MARKER;

	2924 CE = ((CE & 0xFFFF00) << 8) \| (UCOL_BYTE_COMMON << 8) \| UCOL_BYT E_COMMON;

	2925 source->offsetRepeatCount += 1;

	2926 return CE;

	2927 }

	2928 case EXPANSION_TAG:

	2929 {

	2930 /* This should handle expansion. */

	2931 /* NOTE: we can encounter both continuations and expansions in a n expansion! */

	2932 /* I have to decide where continuations are going to be dealt wi th */

	2933 uint32_t size;

	2934 uint32_t i; /* general counter */

	2935

	2936 CEOffset = (uint32_t )coll->image+getExpansionOffset(CE); / fi nd the offset to expansion table */

	2937 size = getExpansionCount(CE);

	2938 CE = *CEOffset++;

	2939 //source->offsetRepeatCount = -1;

	2940

	2941 if(size != 0) { /* if there are less than 16 elements in expansi on, we don't terminate */

	2942 for(i = 1; i<size; i++) {

	2943 (source->CEpos++) = CEOffset++;

	2944 source->offsetRepeatCount += 1;

	2945 }

	2946 } else { /* else, we do */

	2947 while(*CEOffset != 0) {

	2948 (source->CEpos++) = CEOffset++;

	2949 source->offsetRepeatCount += 1;

	2950 }

	2951 }

	2952

	2953 return CE;

	2954 }

	2955 case DIGIT_TAG:

	2956 {

	2957 /*

	2958 We do a check to see if we want to collate digits as numbers; if so we generate

	2959 a custom collation key. Otherwise we pull out the value stored i n the expansion table.

	2960 */

	2961 //uint32_t size;

	2962 uint32_t i; /* general counter */

	2963

	2964 if (source->coll->numericCollation == UCOL_ON){

	2965 collIterateState digitState = {0,0,0,0,0,0,0,0,0};

	2966 UChar32 char32 = 0;

	2967 int32_t digVal = 0;

	2968

	2969 uint32_t digIndx = 0;

	2970 uint32_t endIndex = 0;

	2971 uint32_t trailingZeroIndex = 0;

	2972

	2973 uint8_t collateVal = 0;

	2974

	2975 UBool nonZeroValReached = FALSE;

	2976

	2977 uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3]; // I j ust need a temporary place to store my generated CEs.

	2978 /*

	2979 We parse the source string until we hit a char that's N OT a digit.

	2980 Use this u_charDigitValue. This might be slow because we have to

	2981 handle surrogates...

	2982 */

	2983 /*

	2984 if (U16_IS_LEAD(ch)){

	2985 if (!collIter_eos(source)) {

	2986 backupState(source, &digitState);

	2987 UChar trail = getNextNormalizedChar(source);

	2988 if(U16_IS_TRAIL(trail)) {

	2989 char32 = U16_GET_SUPPLEMENTARY(ch, trail);

	2990 } else {

	2991 loadState(source, &digitState, TRUE);

	2992 char32 = ch;

	2993 }

	2994 } else {

	2995 char32 = ch;

	2996 }

	2997 } else {

	2998 char32 = ch;

	2999 }

	3000 digVal = u_charDigitValue(char32);

	3001 */

	3002 digVal = u_charDigitValue(cp); // if we have arrived here, w e have

	3003 // already processed possible supplementaries that trigered the digit tag -

	3004 // all supplementaries are marked in the UCA.

	3005 /*

	3006 We pad a zero in front of the first element anyways. Th is takes

	3007 care of the (probably) most common case where people are sorting things followed

	3008 by a single digit

	3009 */

	3010 digIndx++;

	3011 for(;;){

	3012 // Make sure we have enough space. No longer needed;

	3013 // at this point digIndx now has a max value of UCOL_MAX _DIGITS_FOR_NUMBER

	3014 // (it has been pre-incremented) so we just ensure that numTempBuf is big enough

	3015 // (UCOL_MAX_DIGITS_FOR_NUMBER/2 + 3).

	3016

	3017 // Skipping over leading zeroes.

	3018 if (digVal != 0) {

	3019 nonZeroValReached = TRUE;

	3020 }

	3021 if (nonZeroValReached) {

	3022 /*

	3023 We parse the digit string into base 100 numbers (thi s fits into a byte).

	3024 We only add to the buffer in twos, thus if we are pa rsing an odd character,

	3025 that serves as the 'tens' digit while the if we are parsing an even one, that

	3026 is the 'ones' digit. We dumped the parsed base 100 v alue (collateVal) into

	3027 a buffer. We multiply each collateVal by 2 (to give us room) and add 5 (to avoid

	3028 overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less

	3029 than all the other bytes.

	3030 */

	3031

	3032 if (digIndx % 2 == 1){

	3033 collateVal += (uint8_t)digVal;

	3034

	3035 // We don't enter the low-order-digit case unles s we've already seen

	3036 // the high order, or for the first digit, which is always non-zero.

	3037 if (collateVal != 0)

	3038 trailingZeroIndex = 0;

	3039

	3040 numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;

	3041 collateVal = 0;

	3042 }

	3043 else{

	3044 // We drop the collation value into the buffer s o if we need to do

	3045 // a "front patch" we don't have to check to see if we're hitting the

	3046 // last element.

	3047 collateVal = (uint8_t)(digVal * 10);

	3048

	3049 // Check for trailing zeroes.

	3050 if (collateVal == 0)

	3051 {

	3052 if (!trailingZeroIndex)

	3053 trailingZeroIndex = (digIndx/2) + 2;

	3054 }

	3055 else

	3056 trailingZeroIndex = 0;

	3057

	3058 numTempBuf[(digIndx/2) + 2] = collateVal*2 + 6;

	3059 }

	3060 digIndx++;

	3061 }

	3062

	3063 // Get next character.

	3064 if (!collIter_eos(source)){

	3065 ch = getNextNormalizedChar(source);

	3066 if (U16_IS_LEAD(ch)){

	3067 if (!collIter_eos(source)) {

	3068 backupState(source, &digitState);

	3069 UChar trail = getNextNormalizedChar(source);

	3070 if(U16_IS_TRAIL(trail)) {

	3071 char32 = U16_GET_SUPPLEMENTARY(ch, trail );

	3072 } else {

	3073 loadState(source, &digitState, TRUE);

	3074 char32 = ch;

	3075 }

	3076 }

	3077 } else {

	3078 char32 = ch;

	3079 }

	3080

	3081 if ((digVal = u_charDigitValue(char32)) == -1 \|\| dig Indx > UCOL_MAX_DIGITS_FOR_NUMBER){

	3082 // Resetting position to point to the next unpro cessed char. We

	3083 // overshot it when doing our test/set for numbe rs.

	3084 if (char32 > 0xFFFF) { // For surrogates.

	3085 loadState(source, &digitState, TRUE);

	3086 //goBackOne(source);

	3087 }

	3088 goBackOne(source);

	3089 break;

	3090 }

	3091 } else {

	3092 break;

	3093 }

	3094 }

	3095

	3096 if (nonZeroValReached == FALSE){

	3097 digIndx = 2;

	3098 numTempBuf[2] = 6;

	3099 }

	3100

	3101 endIndex = trailingZeroIndex ? trailingZeroIndex : ((digIndx /2) + 2) ;

	3102 if (digIndx % 2 != 0){

	3103 /*

	3104 We missed a value. Since digIndx isn't even, stuck too m any values into the buffer (this is what

	3105 we get for padding the first byte with a zero). "Front-p atch" now by pushing all nybbles forward.

	3106 Doing it this way ensures that at least 50% of the time (statistically speaking) we'll only be doing a

	3107 single pass and optimizes for strings with single digits . I'm just assuming that's the more common case.

	3108 */

	3109

	3110 for(i = 2; i < endIndex; i++){

	3111 numTempBuf[i] = (((((numTempBuf[i] - 6)/2) % 10) * 10) +

	3112 (((numTempBuf[i+1])-6)/2) / 10) * 2 + 6;

	3113 }

	3114 --digIndx;

	3115 }

	3116

	3117 // Subtract one off of the last byte.

	3118 numTempBuf[endIndex-1] -= 1;

	3119

	3120 /*

	3121 We want to skip over the first two slots in the buffer. The first slot

	3122 is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the

	3123 sign/exponent byte: 0x80 + (decimalPos/2) & 7f.

	3124 */

	3125 numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;

	3126 numTempBuf[1] = (uint8_t)(0x80 + ((digIndx/2) & 0x7F));

	3127

	3128 // Now transfer the collation key to our collIterate struct.

	3129 // The total size for our collation key is endIndx bumped up to the next largest even value divided by two.

	3130 //size = ((endIndex+1) & ~1)/2;

	3131 CE = (((numTempBuf[0] << 8) \| numTempBuf[1]) << UCOL_PRIMARY ORDERSHIFT) \| //Primary weight

	3132 (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) \| // Seco ndary weight

	3133 UCOL_BYTE_COMMON; // Tertiary weight.

	3134 i = 2; // Reset the index into the buffer.

	3135 while(i < endIndex)

	3136 {

	3137 uint32_t primWeight = numTempBuf[i++] << 8;

	3138 if ( i < endIndex)

	3139 primWeight \|= numTempBuf[i++];

	3140 *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHI FT) \| UCOL_CONTINUATION_MARKER;

	3141 }

	3142

	3143 } else {

	3144 // no numeric mode, we'll just switch to whatever we stashed and continue

	3145 CEOffset = (uint32_t )coll->image+getExpansionOffset(CE); / find the offset to expansion table */

	3146 CE = *CEOffset++;

	3147 break;

	3148 }

	3149 return CE;

	3150 }

	3151 /* various implicits optimization */

	3152 case IMPLICIT_TAG: /* everything that is not defined otherwise */

	3153 /* UCA is filled with these. Tailorings are NOT_FOUND */

	3154 return getImplicit(cp, source);

	3155 case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D */

	3156 // TODO: remove CJK_IMPLICIT_TAG completely - handled by the getImpl icit

	3157 return getImplicit(cp, source);

	3158 case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/

	3159 {

	3160 static const uint32_t

	3161 SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11 A7;

	3162 //const uint32_t LCount = 19;

	3163 static const uint32_t VCount = 21;

	3164 static const uint32_t TCount = 28;

	3165 //const uint32_t NCount = VCount * TCount; // 588

	3166 //const uint32_t SCount = LCount * NCount; // 11172

	3167 uint32_t L = ch - SBase;

	3168

	3169 // divide into pieces

	3170

	3171 uint32_t T = L % TCount; // we do it in this order since some co mpilers can do % and / in one operation

	3172 L /= TCount;

	3173 uint32_t V = L % VCount;

	3174 L /= VCount;

	3175

	3176 // offset them

	3177

	3178 L += LBase;

	3179 V += VBase;

	3180 T += TBase;

	3181

	3182 // return the first CE, but first put the rest into the expansio n buffer

	3183 if (!source->coll->image->jamoSpecial) { // FAST PATH

	3184

	3185 *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V );

	3186 if (T != TBase) {

	3187 *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mappin g, T);

	3188 }

	3189

	3190 return UTRIE_GET32_FROM_LEAD(&coll->mapping, L);

	3191

	3192 } else { // Jamo is Special

	3193 // Since Hanguls pass the FCD check, it is

	3194 // guaranteed that we won't be in

	3195 // the normalization buffer if something like this happens

	3196 // However, if we are using a uchar iterator and normalizati on

	3197 // is ON, the Hangul that lead us here is going to be in tha t

	3198 // normalization buffer. Here we want to restore the uchar

	3199 // iterator state and pull out of the normalization buffer

	3200 if(source->iterator != NULL && source->flags & UCOL_ITER_INN ORMBUF) {

	3201 source->flags = source->origFlags; // restore the iterat or

	3202 source->pos = NULL;

	3203 }

	3204 // Move Jamos into normalization buffer

	3205 UChar *buffer = source->writableBuffer.getBuffer(4);

	3206 int32_t bufferLength;

	3207 buffer[0] = (UChar)L;

	3208 buffer[1] = (UChar)V;

	3209 if (T != TBase) {

	3210 buffer[2] = (UChar)T;

	3211 bufferLength = 3;

	3212 } else {

	3213 bufferLength = 2;

	3214 }

	3215 source->writableBuffer.releaseBuffer(bufferLength);

	3216

	3217 source->fcdPosition = source->pos; // Indicate where to continue in main input string

	3218 // after exhausting the writableBuffer

	3219 source->pos = source->writableBuffer.getTerminatedBuffer() ;

	3220 source->origFlags = source->flags;

	3221 source->flags \|= UCOL_ITER_INNORMBUF;

	3222 source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HASLEN);

	3223

	3224 return(UCOL_IGNORABLE);

	3225 }

	3226 }

	3227 case SURROGATE_TAG:

	3228 /* we encountered a leading surrogate. We shall get the CE by using the following code unit */

	3229 /* two things can happen here: next code point can be a trailing sur rogate - we will use it */

	3230 /* to retrieve the CE, or it is not a trailing surrogate (or the str ing is done). In that case */

	3231 /* we treat it like an unassigned code point. */

	3232 {

	3233 UChar trail;

	3234 collIterateState state;

	3235 backupState(source, &state);

	3236 if (collIter_eos(source) \|\| !(U16_IS_TRAIL((trail = getNextNorma lizedChar(source))))) {

	3237 // we chould have stepped one char forward and it might have turned that it

	3238 // was not a trail surrogate. In that case, we have to backu p.

	3239 loadState(source, &state, TRUE);

	3240 return UCOL_NOT_FOUND;

	3241 } else {

	3242 /* TODO: CE contain the data from the previous CE + the mask . It should at least be unmasked */

	3243 CE = UTRIE_GET32_FROM_OFFSET_TRAIL(&coll->mapping, CE&0xFFFF FF, trail);

	3244 if(CE == UCOL_NOT_FOUND) { // there are tailored surrogates in this block, but not this one.

	3245 // We need to backup

	3246 loadState(source, &state, TRUE);

	3247 return CE;

	3248 }

	3249 // calculate the supplementary code point value, if surrogat e was not tailored

	3250 cp = ((((uint32_t)ch)<<10UL)+(trail)-(((uint32_t)0xd800<<10U L)+0xdc00-0x10000));

	3251 }

	3252 }

	3253 break;

	3254 case LEAD_SURROGATE_TAG: /* D800-DBFF*/

	3255 UChar nextChar;

	3256 if( source->flags & UCOL_USE_ITERATOR) {

	3257 if(U_IS_TRAIL(nextChar = (UChar)source->iterator->current(source ->iterator))) {

	3258 cp = U16_GET_SUPPLEMENTARY(ch, nextChar);

	3259 source->iterator->next(source->iterator);

	3260 return getImplicit(cp, source);

	3261 }

	3262 } else if((((source->flags & UCOL_ITER_HASLEN) == 0 ) \|\| (source->po s<source->endp)) &&

	3263 U_IS_TRAIL((nextChar=*source->pos))) {

	3264 cp = U16_GET_SUPPLEMENTARY(ch, nextChar);

	3265 source->pos++;

	3266 return getImplicit(cp, source);

	3267 }

	3268 return UCOL_NOT_FOUND;

	3269 case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/

	3270 return UCOL_NOT_FOUND; /* broken surrogate sequence */

	3271 case CHARSET_TAG:

	3272 /* not yet implemented */

	3273 /* probably after 1.8 */

	3274 return UCOL_NOT_FOUND;

	3275 default:

	3276 *status = U_INTERNAL_PROGRAM_ERROR;

	3277 CE=0;

	3278 break;

	3279 }

	3280 if (CE <= UCOL_NOT_FOUND) break;

	3281 }

	3282 return CE;

	3283 }

	3284

	3285

	3286 /* now uses Mark's getImplicitPrimary code */

	3287 static

	3288 inline uint32_t getPrevImplicit(UChar32 cp, collIterate *collationSource) {

	3289 uint32_t r = uprv_uca_getImplicitPrimary(cp);

	3290

	3291 *(collationSource->CEpos++) = (r & UCOL_PRIMARYMASK) \| 0x00000505;

	3292 collationSource->toReturn = collationSource->CEpos;

	3293

	3294 // ** doesn't work if using iterator **

	3295 if (collationSource->flags & UCOL_ITER_INNORMBUF) {

	3296 collationSource->offsetRepeatCount = 1;

	3297 } else {

	3298 int32_t firstOffset = (int32_t)(collationSource->pos - collationSource-> string);

	3299

	3300 UErrorCode errorCode = U_ZERO_ERROR;

	3301 collationSource->appendOffset(firstOffset, errorCode);

	3302 collationSource->appendOffset(firstOffset + 1, errorCode);

	3303

	3304 collationSource->offsetReturn = collationSource->offsetStore - 1;

	3305 *(collationSource->offsetBuffer) = firstOffset;

	3306 if (collationSource->offsetReturn == collationSource->offsetBuffer) {

	3307 collationSource->offsetStore = collationSource->offsetBuffer;

	3308 }

	3309 }

	3310

	3311 return ((r & 0x0000FFFF)<<16) \| 0x000000C0;

	3312 }

	3313

	3314 /**

	3315 * This function handles the special CEs like contractions, expansions,

	3316 * surrogates, Thai.

	3317 * It is called by both getPrevCE

	3318 */

	3319 uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,

	3320 collIterate *source,

	3321 UErrorCode *status)

	3322 {

	3323 const uint32_t *CEOffset = NULL;

	3324 UChar *UCharOffset = NULL;

	3325 UChar schar;

	3326 const UChar *constart = NULL;

	3327 uint32_t size;

	3328 UChar buffer[UCOL_MAX_BUFFER];

	3329 uint32_t *endCEBuffer;

	3330 UChar *strbuffer;

	3331 int32_t noChars = 0;

	3332 int32_t CECount = 0;

	3333

	3334 for(;;)

	3335 {

	3336 /* the only ces that loops are thai and contractions */

	3337 switch (getCETag(CE))

	3338 {

	3339 case NOT_FOUND_TAG: /* this tag always returns */

	3340 return CE;

	3341

	3342 case SPEC_PROC_TAG:

	3343 {

	3344 // Special processing is getting a CE that is preceded by a cert ain prefix

	3345 // Currently this is only needed for optimizing Japanese length and iteration marks.

	3346 // When we encouter a special processing tag, we go backwards an d try to see if

	3347 // we have a match.

	3348 // Contraction tables are used - so the whole process is not unl ike contraction.

	3349 // prefix data is stored backwards in the table.

	3350 const UChar *UCharOffset;

	3351 UChar schar, tchar;

	3352 collIterateState prefixState;

	3353 backupState(source, &prefixState);

	3354 for(;;) {

	3355 // This loop will run once per source string character, for as long as we

	3356 // are matching a potential contraction sequence

	3357

	3358 // First we position ourselves at the begining of contractio n sequence

	3359 const UChar ContractionStart = UCharOffset = (UChar )coll- >image+getContractOffset(CE);

	3360

	3361 if (collIter_bos(source)) {

	3362 CE = *(coll->contractionCEs + (UCharOffset - coll->contr actionIndex));

	3363 break;

	3364 }

	3365 schar = getPrevNormalizedChar(source, status);

	3366 goBackOne(source);

	3367

	3368 while(schar > (tchar = UCharOffset)) { / since the contrac tion codepoints should be ordered, we skip all that are smaller */

	3369 UCharOffset++;

	3370 }

	3371

	3372 if (schar == tchar) {

	3373 // Found the source string char in the table.

	3374 // Pick up the corresponding CE from the table.

	3375 CE = *(coll->contractionCEs +

	3376 (UCharOffset - coll->contractionIndex));

	3377 }

	3378 else

	3379 {

	3380 // if there is a completely ignorable code point in the middle of

	3381 // a prefix, we need to act as if it's not there

	3382 // assumption: 'real' noncharacters (fffe, ffff, fdd0- fdef are set to zero)

	3383 // lone surrogates cannot be set to zero as it would bre ak other processing

	3384 uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping , schar);

	3385 // it's easy for BMP code points

	3386 if(isZeroCE == 0) {

	3387 continue;

	3388 } else if(U16_IS_SURROGATE(schar)) {

	3389 // for supplementary code points, we have to check t he next one

	3390 // situations where we are going to ignore

	3391 // 1. beginning of the string: schar is a lone surro gate

	3392 // 2. schar is a lone surrogate

	3393 // 3. schar is a trail surrogate in a valid surrogat e sequence

	3394 // that is explicitly set to zero.

	3395 if (!collIter_bos(source)) {

	3396 UChar lead;

	3397 if(!U16_IS_SURROGATE_LEAD(schar) && U16_IS_LEAD( lead = getPrevNormalizedChar(source, status))) {

	3398 isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapp ing, lead);

	3399 if(isSpecial(isZeroCE) && getCETag(isZeroCE) == SURROGATE_TAG) {

	3400 uint32_t finalCE = UTRIE_GET32_FROM_OFFS ET_TRAIL(&coll->mapping, isZeroCE&0xFFFFFF, schar);

	3401 if(finalCE == 0) {

	3402 // this is a real, assigned complete ly ignorable code point

	3403 goBackOne(source);

	3404 continue;

	3405 }

	3406 }

	3407 } else {

	3408 // lone surrogate, treat like unassigned

	3409 return UCOL_NOT_FOUND;

	3410 }

	3411 } else {

	3412 // lone surrogate at the beggining, treat like u nassigned

	3413 return UCOL_NOT_FOUND;

	3414 }

	3415 }

	3416 // Source string char was not in the table.

	3417 // We have not found the prefix.

	3418 CE = *(coll->contractionCEs +

	3419 (ContractionStart - coll->contractionIndex));

	3420 }

	3421

	3422 if(!isPrefix(CE)) {

	3423 // The source string char was in the contraction table, and the corresponding

	3424 // CE is not a prefix CE. We found the prefix, break

	3425 // out of loop, this CE will end up being returned. T his is the normal

	3426 // way out of prefix handling when the source actually contained

	3427 // the prefix.

	3428 break;

	3429 }

	3430 }

	3431 loadState(source, &prefixState, TRUE);

	3432 break;

	3433 }

	3434

	3435 case CONTRACTION_TAG: {

	3436 /* to ensure that the backwards and forwards iteration matches, we

	3437 take the current region of most possible match and pass it through

	3438 the forward iteration. this will ensure that the obstinate problem o f

	3439 overlapping contractions will not occur.

	3440 */

	3441 schar = peekCodeUnit(source, 0);

	3442 constart = (UChar *)coll->image + getContractOffset(CE);

	3443 if (isAtStartPrevIterate(source)

	3444 /* commented away contraction end checks after adding the checks

	3445 in getPrevCE */) {

	3446 /* start of string or this is not the end of any contraction */

	3447 CE = *(coll->contractionCEs +

	3448 (constart - coll->contractionIndex));

	3449 break;

	3450 }

	3451 strbuffer = buffer;

	3452 UCharOffset = strbuffer + (UCOL_MAX_BUFFER - 1);

	3453 *(UCharOffset --) = 0;

	3454 noChars = 0;

	3455 // have to swap thai characters

	3456 while (ucol_unsafeCP(schar, coll)) {

	3457 *(UCharOffset) = schar;

	3458 noChars++;

	3459 UCharOffset --;

	3460 schar = getPrevNormalizedChar(source, status);

	3461 goBackOne(source);

	3462 // TODO: when we exhaust the contraction buffer,

	3463 // it needs to get reallocated. The problem is

	3464 // that the size depends on the string which is

	3465 // not iterated over. However, since we're travelling

	3466 // backwards, we already had to set the iterator at

	3467 // the end - so we might as well know where we are?

	3468 if (UCharOffset + 1 == buffer) {

	3469 /* we have exhausted the buffer */

	3470 int32_t newsize = 0;

	3471 if(source->pos) { // actually dealing with a position

	3472 newsize = (int32_t)(source->pos - source->string + 1);

	3473 } else { // iterator

	3474 newsize = 4 * UCOL_MAX_BUFFER;

	3475 }

	3476 strbuffer = (UChar )uprv_malloc(sizeof(UChar)

	3477 (newsize + UCOL_MAX_BUFFER));

	3478 /* test for NULL */

	3479 if (strbuffer == NULL) {

	3480 *status = U_MEMORY_ALLOCATION_ERROR;

	3481 return UCOL_NO_MORE_CES;

	3482 }

	3483 UCharOffset = strbuffer + newsize;

	3484 uprv_memcpy(UCharOffset, buffer,

	3485 UCOL_MAX_BUFFER * sizeof(UChar));

	3486 UCharOffset --;

	3487 }

	3488 if ((source->pos && (source->pos == source->string \|\|

	3489 ((source->flags & UCOL_ITER_INNORMBUF) &&

	3490 *(source->pos - 1) == 0 && source->fcdPosition == NULL)))

	3491 \|\| (source->iterator && !source->iterator->hasPrevious(sourc e->iterator))) {

	3492 break;

	3493 }

	3494 }

	3495 /* adds the initial base character to the string */

	3496 *(UCharOffset) = schar;

	3497 noChars++;

	3498

	3499 int32_t offsetBias;

	3500

	3501 // ** doesn't work if using iterator **

	3502 if (source->flags & UCOL_ITER_INNORMBUF) {

	3503 offsetBias = -1;

	3504 } else {

	3505 offsetBias = (int32_t)(source->pos - source->string);

	3506 }

	3507

	3508 /* a new collIterate is used to simplify things, since using the cur rent

	3509 collIterate will mean that the forward and backwards iteration will

	3510 share and change the same buffers. we don't want to get into that. * /

	3511 collIterate temp;

	3512 int32_t rawOffset;

	3513

	3514 IInit_collIterate(coll, UCharOffset, noChars, &temp, status);

	3515 if(U_FAILURE(*status)) {

	3516 return UCOL_NULLORDER;

	3517 }

	3518 temp.flags &= ~UCOL_ITER_NORM;

	3519 temp.flags \|= source->flags & UCOL_FORCE_HAN_IMPLICIT;

	3520

	3521 rawOffset = (int32_t)(temp.pos - temp.string); // should always be z ero?

	3522 CE = ucol_IGetNextCE(coll, &temp, status);

	3523

	3524 if (source->extendCEs) {

	3525 endCEBuffer = source->extendCEs + source->extendCEsSize;

	3526 CECount = (int32_t)((source->CEpos - source->extendCEs)/sizeof(u int32_t));

	3527 } else {

	3528 endCEBuffer = source->CEs + UCOL_EXPAND_CE_BUFFER_SIZE;

	3529 CECount = (int32_t)((source->CEpos - source->CEs)/sizeof(uint32_ t));

	3530 }

	3531

	3532 while (CE != UCOL_NO_MORE_CES) {

	3533 *(source->CEpos ++) = CE;

	3534

	3535 if (offsetBias >= 0) {

	3536 source->appendOffset(rawOffset + offsetBias, *status);

	3537 }

	3538

	3539 CECount++;

	3540 if (source->CEpos == endCEBuffer) {

	3541 /* ran out of CE space, reallocate to new buffer.

	3542 If reallocation fails, reset pointers and bail out,

	3543 there's no guarantee of the right character position after

	3544 this bail*/

	3545 if (!increaseCEsCapacity(source)) {

	3546 *status = U_MEMORY_ALLOCATION_ERROR;

	3547 break;

	3548 }

	3549

	3550 endCEBuffer = source->extendCEs + source->extendCEsSize;

	3551 }

	3552

	3553 if ((temp.flags & UCOL_ITER_INNORMBUF) != 0) {

	3554 rawOffset = (int32_t)(temp.fcdPosition - temp.string);

	3555 } else {

	3556 rawOffset = (int32_t)(temp.pos - temp.string);

	3557 }

	3558

	3559 CE = ucol_IGetNextCE(coll, &temp, status);

	3560 }

	3561

	3562 if (strbuffer != buffer) {

	3563 uprv_free(strbuffer);

	3564 }

	3565 if (U_FAILURE(*status)) {

	3566 return (uint32_t)UCOL_NULLORDER;

	3567 }

	3568

	3569 if (source->offsetRepeatValue != 0) {

	3570 if (CECount > noChars) {

	3571 source->offsetRepeatCount += temp.offsetRepeatCount;

	3572 } else {

	3573 // ** does this really skip the right offsets? **

	3574 source->offsetReturn -= (noChars - CECount);

	3575 }

	3576 }

	3577

	3578 if (offsetBias >= 0) {

	3579 source->offsetReturn = source->offsetStore - 1;

	3580 if (source->offsetReturn == source->offsetBuffer) {

	3581 source->offsetStore = source->offsetBuffer;

	3582 }

	3583 }

	3584

	3585 source->toReturn = source->CEpos - 1;

	3586 if (source->toReturn == source->CEs) {

	3587 source->CEpos = source->CEs;

	3588 }

	3589

	3590 return *(source->toReturn);

	3591 }

	3592 case LONG_PRIMARY_TAG:

	3593 {

	3594 *(source->CEpos++) = ((CE & 0xFFFF00) << 8) \| (UCOL_BYTE_COMMON << 8) \| UCOL_BYTE_COMMON;

	3595 *(source->CEpos++) = ((CE & 0xFF)<<24)\|UCOL_CONTINUATION_MARKER;

	3596 source->toReturn = source->CEpos - 1;

	3597

	3598 if (source->flags & UCOL_ITER_INNORMBUF) {

	3599 source->offsetRepeatCount = 1;

	3600 } else {

	3601 int32_t firstOffset = (int32_t)(source->pos - source->string );

	3602

	3603 source->appendOffset(firstOffset, *status);

	3604 source->appendOffset(firstOffset + 1, *status);

	3605

	3606 source->offsetReturn = source->offsetStore - 1;

	3607 *(source->offsetBuffer) = firstOffset;

	3608 if (source->offsetReturn == source->offsetBuffer) {

	3609 source->offsetStore = source->offsetBuffer;

	3610 }

	3611 }

	3612

	3613

	3614 return *(source->toReturn);

	3615 }

	3616

	3617 case EXPANSION_TAG: /* this tag always returns */

	3618 {

	3619 /*

	3620 This should handle expansion.

	3621 NOTE: we can encounter both continuations and expansions in an expan sion!

	3622 I have to decide where continuations are going to be dealt with

	3623 */

	3624 int32_t firstOffset = (int32_t)(source->pos - source->string);

	3625

	3626 // ** doesn't work if using iterator **

	3627 if (source->offsetReturn != NULL) {

	3628 if (! (source->flags & UCOL_ITER_INNORMBUF) && source->offsetRet urn == source->offsetBuffer) {

	3629 source->offsetStore = source->offsetBuffer;

	3630 }else {

	3631 firstOffset = -1;

	3632 }

	3633 }

	3634

	3635 /* find the offset to expansion table */

	3636 CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);

	3637 size = getExpansionCount(CE);

	3638 if (size != 0) {

	3639 /*

	3640 if there are less than 16 elements in expansion, we don't termin ate

	3641 */

	3642 uint32_t count;

	3643

	3644 for (count = 0; count < size; count++) {

	3645 (source->CEpos ++) = CEOffset++;

	3646

	3647 if (firstOffset >= 0) {

	3648 source->appendOffset(firstOffset + 1, *status);

	3649 }

	3650 }

	3651 } else {

	3652 /* else, we do */

	3653 while (*CEOffset != 0) {

	3654 (source->CEpos ++) = CEOffset ++;

	3655

	3656 if (firstOffset >= 0) {

	3657 source->appendOffset(firstOffset + 1, *status);

	3658 }

	3659 }

	3660 }

	3661

	3662 if (firstOffset >= 0) {

	3663 source->offsetReturn = source->offsetStore - 1;

	3664 *(source->offsetBuffer) = firstOffset;

	3665 if (source->offsetReturn == source->offsetBuffer) {

	3666 source->offsetStore = source->offsetBuffer;

	3667 }

	3668 } else {

	3669 source->offsetRepeatCount += size - 1;

	3670 }

	3671

	3672 source->toReturn = source->CEpos - 1;

	3673 // in case of one element expansion, we

	3674 // want to immediately return CEpos

	3675 if(source->toReturn == source->CEs) {

	3676 source->CEpos = source->CEs;

	3677 }

	3678

	3679 return *(source->toReturn);

	3680 }

	3681

	3682 case DIGIT_TAG:

	3683 {

	3684 /*

	3685 We do a check to see if we want to collate digits as numbers; if so we generate

	3686 a custom collation key. Otherwise we pull out the value stored i n the expansion table.

	3687 */

	3688 uint32_t i; /* general counter */

	3689

	3690 if (source->coll->numericCollation == UCOL_ON){

	3691 uint32_t digIndx = 0;

	3692 uint32_t endIndex = 0;

	3693 uint32_t leadingZeroIndex = 0;

	3694 uint32_t trailingZeroCount = 0;

	3695

	3696 uint8_t collateVal = 0;

	3697

	3698 UBool nonZeroValReached = FALSE;

	3699

	3700 uint8_t numTempBuf[UCOL_MAX_DIGITS_FOR_NUMBER/2 + 2]; // I j ust need a temporary place to store my generated CEs.

	3701 /*

	3702 We parse the source string until we hit a char that's NOT a digit.

	3703 Use this u_charDigitValue. This might be slow because we hav e to

	3704 handle surrogates...

	3705 */

	3706 /*

	3707 We need to break up the digit string into collection element s of UCOL_MAX_DIGITS_FOR_NUMBER or less,

	3708 with any chunks smaller than that being on the right end of the digit string - i.e. the first collation

	3709 element we process when going backward. To determine how lon g that chunk might be, we may need to make

	3710 two passes through the loop that collects digits - one to se e how long the string is (and how much is

	3711 leading zeros) to determine the length of that right-hand ch unk, and a second (if the whole string has

	3712 more than UCOL_MAX_DIGITS_FOR_NUMBER non-leading-zero digits ) to actually process that collation

	3713 element chunk after resetting the state to the initialState at the right side of the digit string.

	3714 */

	3715 uint32_t ceLimit = 0;

	3716 UChar initial_ch = ch;

	3717 collIterateState initialState = {0,0,0,0,0,0,0,0,0};

	3718 backupState(source, &initialState);

	3719

	3720 for(;;) {

	3721 collIterateState state = {0,0,0,0,0,0,0,0,0};

	3722 UChar32 char32 = 0;

	3723 int32_t digVal = 0;

	3724

	3725 if (U16_IS_TRAIL (ch)) {

	3726 if (!collIter_bos(source)){

	3727 UChar lead = getPrevNormalizedChar(source, statu s);

	3728 if(U16_IS_LEAD(lead)) {

	3729 char32 = U16_GET_SUPPLEMENTARY(lead,ch);

	3730 goBackOne(source);

	3731 } else {

	3732 char32 = ch;

	3733 }

	3734 } else {

	3735 char32 = ch;

	3736 }

	3737 } else {

	3738 char32 = ch;

	3739 }

	3740 digVal = u_charDigitValue(char32);

	3741

	3742 for(;;) {

	3743 // Make sure we have enough space. No longer needed;

	3744 // at this point the largest value of digIndx when w e need to save data in numTempBuf

	3745 // is UCOL_MAX_DIGITS_FOR_NUMBER-1 (digIndx is post- incremented) so we just ensure

	3746 // that numTempBuf is big enough (UCOL_MAX_DIGITS_FO R_NUMBER/2 + 2).

	3747

	3748 // Skip over trailing zeroes, and keep a count of th em.

	3749 if (digVal != 0)

	3750 nonZeroValReached = TRUE;

	3751

	3752 if (nonZeroValReached) {

	3753 /*

	3754 We parse the digit string into base 100 numbers (this fits into a byte).

	3755 We only add to the buffer in twos, thus if we ar e parsing an odd character,

	3756 that serves as the 'tens' digit while the if we are parsing an even one, that

	3757 is the 'ones' digit. We dumped the parsed base 1 00 value (collateVal) into

	3758 a buffer. We multiply each collateVal by 2 (to g ive us room) and add 5 (to avoid

	3759 overlapping magic CE byte values). The last byte we subtract 1 to ensure it is less

	3760 than all the other bytes.

	3761

	3762 Since we're doing in this reverse we want to put the first digit encountered into the

	3763 ones place and the second digit encountered into the tens place.

	3764 */

	3765

	3766 if ((digIndx + trailingZeroCount) % 2 == 1) {

	3767 // High-order digit case (tens place)

	3768 collateVal += (uint8_t)(digVal * 10);

	3769

	3770 // We cannot set leadingZeroIndex unless it has been set for the

	3771 // low-order digit. Therefore, all we can do for the high-order

	3772 // digit is turn it off, never on.

	3773 // The only time we will have a high digit w ithout a low is for

	3774 // the very first non-zero digit, so no zero check is necessary.

	3775 if (collateVal != 0)

	3776 leadingZeroIndex = 0;

	3777

	3778 // The first pass through, digIndx may excee d the limit, but in that case

	3779 // we no longer care about numTempBuf conten ts since they will be discarded

	3780 if ( digIndx < UCOL_MAX_DIGITS_FOR_NUMBER ) {

	3781 numTempBuf[(digIndx/2) + 2] = collateVal *2 + 6;

	3782 }

	3783 collateVal = 0;

	3784 } else {

	3785 // Low-order digit case (ones place)

	3786 collateVal = (uint8_t)digVal;

	3787

	3788 // Check for leading zeroes.

	3789 if (collateVal == 0) {

	3790 if (!leadingZeroIndex)

	3791 leadingZeroIndex = (digIndx/2) + 2;

	3792 } else

	3793 leadingZeroIndex = 0;

	3794

	3795 // No need to write to buffer; the case of a last odd digit

	3796 // is handled below.

	3797 }

	3798 ++digIndx;

	3799 } else

	3800 ++trailingZeroCount;

	3801

	3802 if (!collIter_bos(source)) {

	3803 ch = getPrevNormalizedChar(source, status);

	3804 //goBackOne(source);

	3805 if (U16_IS_TRAIL(ch)) {

	3806 backupState(source, &state);

	3807 if (!collIter_bos(source)) {

	3808 goBackOne(source);

	3809 UChar lead = getPrevNormalizedChar(sourc e, status);

	3810

	3811 if(U16_IS_LEAD(lead)) {

	3812 char32 = U16_GET_SUPPLEMENTARY(lead, ch);

	3813 } else {

	3814 loadState(source, &state, FALSE);

	3815 char32 = ch;

	3816 }

	3817 }

	3818 } else

	3819 char32 = ch;

	3820

	3821 if ((digVal = u_charDigitValue(char32)) == -1 \|\| (ceLimit > 0 && (digIndx + trailingZeroCount) >= ceLimit)) {

	3822 if (char32 > 0xFFFF) {// For surrogates.

	3823 loadState(source, &state, FALSE);

	3824 }

	3825 // Don't need to "reverse" the goBackOne cal l,

	3826 // as this points to the next position to pr ocess..

	3827 //if (char32 > 0xFFFF) // For surrogates.

	3828 //getNextNormalizedChar(source);

	3829 break;

	3830 }

	3831

	3832 goBackOne(source);

	3833 }else

	3834 break;

	3835 }

	3836

	3837 if (digIndx + trailingZeroCount <= UCOL_MAX_DIGITS_FOR_N UMBER) {

	3838 // our collation element is not too big, go ahead an d finish with it

	3839 break;

	3840 }

	3841 // our digit string is too long for a collation element;

	3842 // set the limit for it, reset the state and begin again

	3843 ceLimit = (digIndx + trailingZeroCount) % UCOL_MAX_DIGIT S_FOR_NUMBER;

	3844 if ( ceLimit == 0 ) {

	3845 ceLimit = UCOL_MAX_DIGITS_FOR_NUMBER;

	3846 }

	3847 ch = initial_ch;

	3848 loadState(source, &initialState, FALSE);

	3849 digIndx = endIndex = leadingZeroIndex = trailingZeroCoun t = 0;

	3850 collateVal = 0;

	3851 nonZeroValReached = FALSE;

	3852 }

	3853

	3854 if (! nonZeroValReached) {

	3855 digIndx = 2;

	3856 trailingZeroCount = 0;

	3857 numTempBuf[2] = 6;

	3858 }

	3859

	3860 if ((digIndx + trailingZeroCount) % 2 != 0) {

	3861 numTempBuf[((digIndx)/2) + 2] = collateVal*2 + 6;

	3862 digIndx += 1; // The implicit leading zero

	3863 }

	3864 if (trailingZeroCount % 2 != 0) {

	3865 // We had to consume one trailing zero for the low digit

	3866 // of the least significant byte

	3867 digIndx += 1; // The trailing zero not in the expo nent

	3868 trailingZeroCount -= 1;

	3869 }

	3870

	3871 endIndex = leadingZeroIndex ? leadingZeroIndex : ((digIndx/2 ) + 2) ;

	3872

	3873 // Subtract one off of the last byte. Really the first byte here, but it's reversed...

	3874 numTempBuf[2] -= 1;

	3875

	3876 /*

	3877 We want to skip over the first two slots in the buffer. The first slot

	3878 is reserved for the header byte UCOL_CODAN_PLACEHOLDER. The second slot is for the

	3879 sign/exponent byte: 0x80 + (decimalPos/2) & 7f.

	3880 The exponent must be adjusted by the number of leading zeroe s, and the number of

	3881 trailing zeroes.

	3882 */

	3883 numTempBuf[0] = UCOL_CODAN_PLACEHOLDER;

	3884 uint32_t exponent = (digIndx+trailingZeroCount)/2;

	3885 if (leadingZeroIndex)

	3886 exponent -= ((digIndx/2) + 2 - leadingZeroIndex);

	3887 numTempBuf[1] = (uint8_t)(0x80 + (exponent & 0x7F));

	3888

	3889 // Now transfer the collation key to our collIterate struct.

	3890 // The total size for our collation key is half of endIndex, rounded up.

	3891 int32_t size = (endIndex+1)/2;

	3892 if(!ensureCEsCapacity(source, size)) {

	3893 return UCOL_NULLORDER;

	3894 }

	3895 *(source->CEpos++) = (((numTempBuf[0] << 8) \| numTempBuf[1]) << UCOL_PRIMARYORDERSHIFT) \| //Primary weight

	3896 (UCOL_BYTE_COMMON << UCOL_SECONDARYORDERSHIFT) \| // Seco ndary weight

	3897 UCOL_BYTE_COMMON; // Tertiary weight.

	3898 i = endIndex - 1; // Reset the index into the buffer.

	3899 while(i >= 2) {

	3900 uint32_t primWeight = numTempBuf[i--] << 8;

	3901 if ( i >= 2)

	3902 primWeight \|= numTempBuf[i--];

	3903 *(source->CEpos++) = (primWeight << UCOL_PRIMARYORDERSHI FT) \| UCOL_CONTINUATION_MARKER;

	3904 }

	3905

	3906 source->toReturn = source->CEpos -1;

	3907 return *(source->toReturn);

	3908 } else {

	3909 CEOffset = (uint32_t *)coll->image + getExpansionOffset(CE);

	3910 CE = *(CEOffset++);

	3911 break;

	3912 }

	3913 }

	3914

	3915 case HANGUL_SYLLABLE_TAG: /* AC00-D7AF*/

	3916 {

	3917 static const uint32_t

	3918 SBase = 0xAC00, LBase = 0x1100, VBase = 0x1161, TBase = 0x11 A7;

	3919 //const uint32_t LCount = 19;

	3920 static const uint32_t VCount = 21;

	3921 static const uint32_t TCount = 28;

	3922 //const uint32_t NCount = VCount * TCount; /* 588 */

	3923 //const uint32_t SCount = LCount * NCount; /* 11172 */

	3924

	3925 uint32_t L = ch - SBase;

	3926 /*

	3927 divide into pieces.

	3928 we do it in this order since some compilers can do % and / in on e

	3929 operation

	3930 */

	3931 uint32_t T = L % TCount;

	3932 L /= TCount;

	3933 uint32_t V = L % VCount;

	3934 L /= VCount;

	3935

	3936 /* offset them */

	3937 L += LBase;

	3938 V += VBase;

	3939 T += TBase;

	3940

	3941 int32_t firstOffset = (int32_t)(source->pos - source->string);

	3942 source->appendOffset(firstOffset, *status);

	3943

	3944 /*

	3945 * return the first CE, but first put the rest into the expansio n buffer

	3946 */

	3947 if (!source->coll->image->jamoSpecial) {

	3948 *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, L );

	3949 *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mapping, V );

	3950 source->appendOffset(firstOffset + 1, *status);

	3951

	3952 if (T != TBase) {

	3953 *(source->CEpos++) = UTRIE_GET32_FROM_LEAD(&coll->mappin g, T);

	3954 source->appendOffset(firstOffset + 1, *status);

	3955 }

	3956

	3957 source->toReturn = source->CEpos - 1;

	3958

	3959 source->offsetReturn = source->offsetStore - 1;

	3960 if (source->offsetReturn == source->offsetBuffer) {

	3961 source->offsetStore = source->offsetBuffer;

	3962 }

	3963

	3964 return *(source->toReturn);

	3965 } else {

	3966 // Since Hanguls pass the FCD check, it is

	3967 // guaranteed that we won't be in

	3968 // the normalization buffer if something like this happens

	3969 // Move Jamos into normalization buffer

	3970 /*

	3971 Move the Jamos into the

	3972 normalization buffer

	3973 */

	3974 UChar *tempbuffer = source->writableBuffer.getBuffer(5);

	3975 int32_t tempbufferLength;

	3976 tempbuffer[0] = 0;

	3977 tempbuffer[1] = (UChar)L;

	3978 tempbuffer[2] = (UChar)V;

	3979 if (T != TBase) {

	3980 tempbuffer[3] = (UChar)T;

	3981 tempbufferLength = 4;

	3982 } else {

	3983 tempbufferLength = 3;

	3984 }

	3985 source->writableBuffer.releaseBuffer(tempbufferLength);

	3986

	3987 /*

	3988 Indicate where to continue in main input string after exhaus ting

	3989 the writableBuffer

	3990 */

	3991 if (source->pos == source->string) {

	3992 source->fcdPosition = NULL;

	3993 } else {

	3994 source->fcdPosition = source->pos-1;

	3995 }

	3996

	3997 source->pos = source->writableBuffer.getTermin atedBuffer() + tempbufferLength;

	3998 source->origFlags = source->flags;

	3999 source->flags \|= UCOL_ITER_INNORMBUF;

	4000 source->flags &= ~(UCOL_ITER_NORM \| UCOL_ITER_HAS LEN);

	4001

	4002 return(UCOL_IGNORABLE);

	4003 }

	4004 }

	4005

	4006 case IMPLICIT_TAG: /* everything that is not defined otherwise */

	4007 return getPrevImplicit(ch, source);

	4008

	4009 // TODO: Remove CJK implicits as they are handled by the getImplicit Primary function

	4010 case CJK_IMPLICIT_TAG: /* 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D */

	4011 return getPrevImplicit(ch, source);

	4012

	4013 case SURROGATE_TAG: /* This is a surrogate pair */

	4014 /* essentially an engaged lead surrogate. */

	4015 /* if you have encountered it here, it means that a */

	4016 /* broken sequence was encountered and this is an error */

	4017 return UCOL_NOT_FOUND;

	4018

	4019 case LEAD_SURROGATE_TAG: /* D800-DBFF*/

	4020 return UCOL_NOT_FOUND; /* broken surrogate sequence */

	4021

	4022 case TRAIL_SURROGATE_TAG: /* DC00-DFFF*/

	4023 {

	4024 UChar32 cp = 0;

	4025 UChar prevChar;

	4026 const UChar *prev;

	4027 if (isAtStartPrevIterate(source)) {

	4028 /* we are at the start of the string, wrong place to be at * /

	4029 return UCOL_NOT_FOUND;

	4030 }

	4031 if (source->pos != source->writableBuffer.getBuffer()) {

	4032 prev = source->pos - 1;

	4033 } else {

	4034 prev = source->fcdPosition;

	4035 }

	4036 prevChar = *prev;

	4037

	4038 /* Handles Han and Supplementary characters here.*/

	4039 if (U16_IS_LEAD(prevChar)) {

	4040 cp = ((((uint32_t)prevChar)<<10UL)+(ch)-(((uint32_t)0xd800<< 10UL)+0xdc00-0x10000));

	4041 source->pos = prev;

	4042 } else {

	4043 return UCOL_NOT_FOUND; /* like unassigned */

	4044 }

	4045

	4046 return getPrevImplicit(cp, source);

	4047 }

	4048

	4049 /* UCA is filled with these. Tailorings are NOT_FOUND */

	4050 /* not yet implemented */

	4051 case CHARSET_TAG: /* this tag always returns */

	4052 /* probably after 1.8 */

	4053 return UCOL_NOT_FOUND;

	4054

	4055 default: /* this tag always returns */

	4056 *status = U_INTERNAL_PROGRAM_ERROR;

	4057 CE=0;

	4058 break;

	4059 }

	4060

	4061 if (CE <= UCOL_NOT_FOUND) {

	4062 break;

	4063 }

	4064 }

	4065

	4066 return CE;

	4067 }

	4068

	4069 /* This should really be a macro */

	4070 /* However, it is used only when stack buffers are not sufficiently big, and the n we're messed up performance wise */

	4071 /* anyway */

	4072 static

	4073 uint8_t reallocateBuffer(uint8_t secondaries, uint8_t secStart, uint8_t sec ond, uint32_t secSize, uint32_t newSize, UErrorCode *status) {

	4074 #ifdef UCOL_DEBUG

	4075 fprintf(stderr, ".");

	4076 #endif

	4077 uint8_t *newStart = NULL;

	4078 uint32_t offset = (uint32_t)(*secondaries-secStart);

	4079

	4080 if(secStart==second) {

	4081 newStart=(uint8_t*)uprv_malloc(newSize);

	4082 if(newStart==NULL) {

	4083 *status = U_MEMORY_ALLOCATION_ERROR;

	4084 return NULL;

	4085 }

	4086 uprv_memcpy(newStart, secStart, *secondaries-secStart);

	4087 } else {

	4088 newStart=(uint8_t*)uprv_realloc(secStart, newSize);

	4089 if(newStart==NULL) {

	4090 *status = U_MEMORY_ALLOCATION_ERROR;

	4091 /* Since we're reallocating, return original reference so we don't l oose it. */

	4092 return secStart;

	4093 }

	4094 }

	4095 *secondaries=newStart+offset;

	4096 *secSize=newSize;

	4097 return newStart;

	4098 }

	4099

	4100

	4101 /* This should really be a macro */

	4102 /* This function is used to reverse parts of a buffer. We need this operation wh en doing continuation */

	4103 /* secondaries in French */

	4104 /*

	4105 void uprv_ucol_reverse_buffer(uint8_t start, uint8_t end) {

	4106 uint8_t temp;

	4107 while(start<end) {

	4108 temp = *start;

	4109 start++ = end;

	4110 *end-- = temp;

	4111 }

	4112 }

	4113 */

	4114

	4115 #define uprv_ucol_reverse_buffer(TYPE, start, end) { \

	4116 TYPE tempA; \

	4117 while((start)<(end)) { \

	4118 tempA = *(start); \

	4119 (start)++ = (end); \

	4120 *(end)-- = tempA; \

	4121 } \

	4122 }

	4123

	4124 /****************************************************************************/

	4125 /* Following are the sortkey generation functions */

	4126 /* */

	4127 /****************************************************************************/

	4128

	4129 /**

	4130 * Merge two sort keys.

	4131 * This is useful, for example, to combine sort keys from first and last names

	4132 * to sort such pairs.

	4133 * Merged sort keys consider on each collation level the first part first entire ly,

	4134 * then the second one.

	4135 * It is possible to merge multiple sort keys by consecutively merging

	4136 * another one with the intermediate result.

	4137 *

	4138 * The length of the merge result is the sum of the lengths of the input sort ke ys

	4139 * minus 1.

	4140 *

	4141 * @param src1 the first sort key

	4142 * @param src1Length the length of the first sort key, including the zero byte a t the end;

	4143 * can be -1 if the function is to find the length

	4144 * @param src2 the second sort key

	4145 * @param src2Length the length of the second sort key, including the zero byte at the end;

	4146 * can be -1 if the function is to find the length

	4147 * @param dest the buffer where the merged sort key is written,

	4148 * can be NULL if destCapacity==0

	4149 * @param destCapacity the number of bytes in the dest buffer

	4150 * @return the length of the merged sort key, src1Length+src2Length-1;

	4151 * can be larger than destCapacity, or 0 if an error occurs (only for il legal arguments),

	4152 * in which cases the contents of dest is undefined

	4153 *

	4154 * @draft

	4155 */

	4156 U_CAPI int32_t U_EXPORT2

	4157 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,

	4158 const uint8_t *src2, int32_t src2Length,

	4159 uint8_t *dest, int32_t destCapacity) {

	4160 int32_t destLength;

	4161 uint8_t b;

	4162

	4163 /* check arguments */

	4164 if( src1==NULL \|\| src1Length<-2 \|\| src1Length==0 \|\| (src1Length>0 && src1[sr c1Length-1]!=0) \|\|

	4165 src2==NULL \|\| src2Length<-2 \|\| src2Length==0 \|\| (src2Length>0 && src2[sr c2Length-1]!=0) \|\|

	4166 destCapacity<0 \|\| (destCapacity>0 && dest==NULL)

	4167 ) {

	4168 /* error, attempt to write a zero byte and return 0 */

	4169 if(dest!=NULL && destCapacity>0) {

	4170 *dest=0;

	4171 }

	4172 return 0;

	4173 }

	4174

	4175 /* check lengths and capacity */

	4176 if(src1Length<0) {

	4177 src1Length=(int32_t)uprv_strlen((const char *)src1)+1;

	4178 }

	4179 if(src2Length<0) {

	4180 src2Length=(int32_t)uprv_strlen((const char *)src2)+1;

	4181 }

	4182

	4183 destLength=src1Length+src2Length-1;

	4184 if(destLength>destCapacity) {

	4185 /* the merged sort key does not fit into the destination */

	4186 return destLength;

	4187 }

	4188

	4189 /* merge the sort keys with the same number of levels */

	4190 while(src1!=0 && src2!=0) { /* while both have another level */

	4191 /* copy level from src1 not including 00 or 01 */

	4192 while((b=*src1)>=2) {

	4193 ++src1;

	4194 *dest++=b;

	4195 }

	4196

	4197 /* add a 02 merge separator */

	4198 *dest++=2;

	4199

	4200 /* copy level from src2 not including 00 or 01 */

	4201 while((b=*src2)>=2) {

	4202 ++src2;

	4203 *dest++=b;

	4204 }

	4205

	4206 /* if both sort keys have another level, then add a 01 level separator a nd continue */

	4207 if(src1==1 && src2==1) {

	4208 ++src1;

	4209 ++src2;

	4210 *dest++=1;

	4211 }

	4212 }

	4213

	4214 /*

	4215 * here, at least one sort key is finished now, but the other one

	4216 * might have some contents left from containing more levels;

	4217 * that contents is just appended to the result

	4218 */

	4219 if(*src1!=0) {

	4220 /* src1 is not finished, therefore src2==0, and src1 is appended /

	4221 src2=src1;

	4222 }

	4223 /* append src2, "the other, unfinished sort key" */

	4224 uprv_strcpy((char )dest, (const char )src2);

	4225

	4226 /* trust that neither sort key contained illegally embedded zero bytes */

	4227 return destLength;

	4228 }

	4229

	4230 /* sortkey API */

	4231 U_CAPI int32_t U_EXPORT2

	4232 ucol_getSortKey(const UCollator *coll,

	4233 const UChar *source,

	4234 int32_t sourceLength,

	4235 uint8_t *result,

	4236 int32_t resultLength)

	4237 {

	4238 UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);

	4239 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {

	4240 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, sour ce,

	4241 ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLengt h));

	4242 }

	4243

	4244 UErrorCode status = U_ZERO_ERROR;

	4245 int32_t keySize = 0;

	4246

	4247 if(source != NULL) {

	4248 // source == NULL is actually an error situation, but we would need to

	4249 // have an error code to return it. Until we introduce a new

	4250 // API, it stays like this

	4251

	4252 /* this uses the function pointer that is set in updateinternalstate */

	4253 /* currently, there are two funcs: */

	4254 /ucol_calcSortKey(...);/

	4255 /ucol_calcSortKeySimpleTertiary(...);/

	4256

	4257 keySize = coll->sortKeyGen(coll, source, sourceLength, &result, resultLe ngth, FALSE, &status);

	4258 //if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && result & & resultLength > 0) {

	4259 // That's not good. Something unusual happened.

	4260 // We don't know how much we initialized before we failed.

	4261 // NULL terminate for safety.

	4262 // We have no way say that we have generated a partial sort key.

	4263 //result[0] = 0;

	4264 //keySize = 0;

	4265 //}

	4266 }

	4267 UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);

	4268 UTRACE_EXIT_STATUS(status);

	4269 return keySize;

	4270 }

	4271

	4272 /* this function is called by the C++ API for sortkey generation */

	4273 U_CFUNC int32_t

	4274 ucol_getSortKeyWithAllocation(const UCollator *coll,

	4275 const UChar *source, int32_t sourceLength,

	4276 uint8_t **pResult,

	4277 UErrorCode *pErrorCode) {

	4278 *pResult = 0;

	4279 return coll->sortKeyGen(coll, source, sourceLength, pResult, 0, TRUE, pError Code);

	4280 }

	4281

	4282 #define UCOL_FSEC_BUF_SIZE 256

	4283

	4284 // Is this primary weight compressible?

	4285 // Returns false for multi-lead-byte scripts (digits, Latin, Han, implicit).

	4286 // TODO: This should use per-lead-byte flags from FractionalUCA.txt.

	4287 static inline UBool

	4288 isCompressible(const UCollator * /coll/, uint8_t primary1) {

	4289 return UCOL_BYTE_FIRST_NON_LATIN_PRIMARY <= primary1 && primary1 <= maxRegul arPrimary;

	4290 }

	4291

	4292 /* This function tries to get the size of a sortkey. It will be invoked if the s ize of resulting buffer is 0 */

	4293 /* or if we run out of space while making a sortkey and want to return ASAP */

	4294 int32_t ucol_getSortKeySize(const UCollator coll, collIterate s, int32_t curre ntSize, UColAttributeValue strength, int32_t len) {

	4295 UErrorCode status = U_ZERO_ERROR;

	4296 //const UCAConstants UCAconsts = (UCAConstants )((uint8_t *)coll->UCA->ima ge + coll->image->UCAConsts);

	4297 uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);

	4298 uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);

	4299 uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);

	4300 UBool compareIdent = (strength == UCOL_IDENTICAL);

	4301 UBool doCase = (coll->caseLevel == UCOL_ON);

	4302 UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);

	4303 //UBool qShifted = shifted && (compareQuad == 0);

	4304 UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);

	4305 UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0) ;

	4306 uint8_t fSecsBuff[UCOL_FSEC_BUF_SIZE];

	4307 uint8_t *fSecs = fSecsBuff;

	4308 uint32_t fSecsLen = 0, fSecsMaxLen = UCOL_FSEC_BUF_SIZE;

	4309 uint8_t frenchStartPtr = NULL, frenchEndPtr = NULL;

	4310

	4311 uint32_t variableTopValue = coll->variableTopValue;

	4312 uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);

	4313 if(doHiragana) {

	4314 UCOL_COMMON_BOT4++;

	4315 /* allocate one more space for hiragana */

	4316 }

	4317 uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);

	4318

	4319 uint32_t order = UCOL_NO_MORE_CES;

	4320 uint8_t primary1 = 0;

	4321 uint8_t primary2 = 0;

	4322 uint8_t secondary = 0;

	4323 uint8_t tertiary = 0;

	4324 int32_t caseShift = 0;

	4325 uint32_t c2 = 0, c3 = 0, c4 = 0; /* variables for compression */

	4326

	4327 uint8_t caseSwitch = coll->caseSwitch;

	4328 uint8_t tertiaryMask = coll->tertiaryMask;

	4329 uint8_t tertiaryCommon = coll->tertiaryCommon;

	4330

	4331 UBool wasShifted = FALSE;

	4332 UBool notIsContinuation = FALSE;

	4333 uint8_t leadPrimary = 0;

	4334

	4335

	4336 for(;;) {

	4337 order = ucol_IGetNextCE(coll, s, &status);

	4338 if(order == UCOL_NO_MORE_CES) {

	4339 break;

	4340 }

	4341

	4342 if(order == 0) {

	4343 continue;

	4344 }

	4345

	4346 notIsContinuation = !isContinuation(order);

	4347

	4348

	4349 if(notIsContinuation) {

	4350 tertiary = (uint8_t)((order & UCOL_BYTE_SIZE_MASK));

	4351 } else {

	4352 tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));

	4353 }

	4354 secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	4355 primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	4356 primary1 = (uint8_t)(order >> 8);

	4357

	4358 /* no need to permute since the actual code values don't matter

	4359 if (coll->leadBytePermutationTable != NULL && notIsContinuation) {

	4360 primary1 = coll->leadBytePermutationTable[primary1];

	4361 }

	4362 */

	4363

	4364 if((shifted && ((notIsContinuation && order <= variableTopValue && prima ry1 > 0)

	4365 \|\| (!notIsContinuation && wasShifted)))

	4366 \|\| (wasShifted && primary1 == 0)) { /* amendment to the UCA says tha t primary ignorables */

	4367 /* and other ignorables should be removed if following a shifted code point */

	4368 if(primary1 == 0) { /* if we were shifted and we got an ignorabl e code point */

	4369 /* we should just completely ignore it */

	4370 continue;

	4371 }

	4372 if(compareQuad == 0) {

	4373 if(c4 > 0) {

	4374 currentSize += (c2/UCOL_BOT_COUNT4)+1;

	4375 c4 = 0;

	4376 }

	4377 currentSize++;

	4378 if(primary2 != 0) {

	4379 currentSize++;

	4380 }

	4381 }

	4382 wasShifted = TRUE;

	4383 } else {

	4384 wasShifted = FALSE;

	4385 /* Note: This code assumes that the table is well built i.e. not hav ing 0 bytes where they are not supposed to be. */

	4386 /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */

	4387 /* calculate sortkey size */

	4388 if(primary1 != UCOL_IGNORABLE) {

	4389 if(notIsContinuation) {

	4390 if(leadPrimary == primary1) {

	4391 currentSize++;

	4392 } else {

	4393 if(leadPrimary != 0) {

	4394 currentSize++;

	4395 }

	4396 if(primary2 == UCOL_IGNORABLE) {

	4397 /* one byter, not compressed */

	4398 currentSize++;

	4399 leadPrimary = 0;

	4400 } else if(isCompressible(coll, primary1)) {

	4401 /* compress */

	4402 leadPrimary = primary1;

	4403 currentSize+=2;

	4404 } else {

	4405 leadPrimary = 0;

	4406 currentSize+=2;

	4407 }

	4408 }

	4409 } else { /* we are in continuation, so we're gonna add primary t o the key don't care about compression */

	4410 currentSize++;

	4411 if(primary2 != UCOL_IGNORABLE) {

	4412 currentSize++;

	4413 }

	4414 }

	4415 }

	4416

	4417 if(secondary > compareSec) { /* I think that != 0 test should be != IGNORABLE */

	4418 if(!isFrenchSec){

	4419 if (secondary == UCOL_COMMON2 && notIsContinuation) {

	4420 c2++;

	4421 } else {

	4422 if(c2 > 0) {

	4423 if (secondary > UCOL_COMMON2) { // not necessary for 4th level.

	4424 currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+1;

	4425 } else {

	4426 currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+1;

	4427 }

	4428 c2 = 0;

	4429 }

	4430 currentSize++;

	4431 }

	4432 } else {

	4433 fSecs[fSecsLen++] = secondary;

	4434 if(fSecsLen == fSecsMaxLen) {

	4435 uint8_t *fSecsTemp;

	4436 if(fSecs == fSecsBuff) {

	4437 fSecsTemp = (uint8_t )uprv_malloc(2fSecsLen);

	4438 } else {

	4439 fSecsTemp = (uint8_t )uprv_realloc(fSecs, 2fSecsLe n);

	4440 }

	4441 if(fSecsTemp == NULL) {

	4442 status = U_MEMORY_ALLOCATION_ERROR;

	4443 return 0;

	4444 }

	4445 fSecs = fSecsTemp;

	4446 fSecsMaxLen *= 2;

	4447 }

	4448 if(notIsContinuation) {

	4449 if (frenchStartPtr != NULL) {

	4450 /* reverse secondaries from frenchStartPtr up to fre nchEndPtr */

	4451 uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, fr enchEndPtr);

	4452 frenchStartPtr = NULL;

	4453 }

	4454 } else {

	4455 if (frenchStartPtr == NULL) {

	4456 frenchStartPtr = fSecs+fSecsLen-2;

	4457 }

	4458 frenchEndPtr = fSecs+fSecsLen-1;

	4459 }

	4460 }

	4461 }

	4462

	4463 if(doCase && (primary1 > 0 \|\| strength >= UCOL_SECONDARY)) {

	4464 // do the case level if we need to do it. We don't want to calcu late

	4465 // case level for primary ignorables if we have only primary str ength and case level

	4466 // otherwise we would break well formedness of CEs

	4467 if (caseShift == 0) {

	4468 currentSize++;

	4469 caseShift = UCOL_CASE_SHIFT_START;

	4470 }

	4471 if((tertiary&0x3F) > 0 && notIsContinuation) {

	4472 caseShift--;

	4473 if((tertiary &0xC0) != 0) {

	4474 if (caseShift == 0) {

	4475 currentSize++;

	4476 caseShift = UCOL_CASE_SHIFT_START;

	4477 }

	4478 caseShift--;

	4479 }

	4480 }

	4481 } else {

	4482 if(notIsContinuation) {

	4483 tertiary ^= caseSwitch;

	4484 }

	4485 }

	4486

	4487 tertiary &= tertiaryMask;

	4488 if(tertiary > compareTer) { /* I think that != 0 test should be != I GNORABLE */

	4489 if (tertiary == tertiaryCommon && notIsContinuation) {

	4490 c3++;

	4491 } else {

	4492 if(c3 > 0) {

	4493 if((tertiary > tertiaryCommon && tertiaryCommon == UCOL_ COMMON3_NORMAL)

	4494 \|\| (tertiary <= tertiaryCommon && tertiaryCommon == UCOL_COMMON3_UPPERFIRST)) {

	4495 currentSize += (c3/(uint32_t)coll->tertiaryTopCo unt)+1;

	4496 } else {

	4497 currentSize += (c3/(uint32_t)coll->tertiaryBottomCou nt)+1;

	4498 }

	4499 c3 = 0;

	4500 }

	4501 currentSize++;

	4502 }

	4503 }

	4504

	4505 if(/qShifted/(compareQuad==0) && notIsContinuation) {

	4506 if(s->flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and we n eed to note it

	4507 if(c4>0) { // Close this part

	4508 currentSize += (c4/UCOL_BOT_COUNT4)+1;

	4509 c4 = 0;

	4510 }

	4511 currentSize++; // Add the Hiragana

	4512 } else { // This wasn't Hiragana, so we can continue adding stuf f

	4513 c4++;

	4514 }

	4515 }

	4516 }

	4517 }

	4518

	4519 if(!isFrenchSec){

	4520 if(c2 > 0) {

	4521 currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BO T_COUNT2 != 0)?1:0);

	4522 }

	4523 } else {

	4524 uint32_t i = 0;

	4525 if(frenchStartPtr != NULL) {

	4526 uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);

	4527 }

	4528 for(i = 0; i<fSecsLen; i++) {

	4529 secondary = *(fSecs+fSecsLen-i-1);

	4530 /* This is compression code. */

	4531 if (secondary == UCOL_COMMON2) {

	4532 ++c2;

	4533 } else {

	4534 if(c2 > 0) {

	4535 if (secondary > UCOL_COMMON2) { // not necessary for 4th lev el.

	4536 currentSize += (c2/(uint32_t)UCOL_TOP_COUNT2)+((c2%(uint 32_t)UCOL_TOP_COUNT2 != 0)?1:0);

	4537 } else {

	4538 currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint 32_t)UCOL_BOT_COUNT2 != 0)?1:0);

	4539 }

	4540 c2 = 0;

	4541 }

	4542 currentSize++;

	4543 }

	4544 }

	4545 if(c2 > 0) {

	4546 currentSize += (c2/(uint32_t)UCOL_BOT_COUNT2)+((c2%(uint32_t)UCOL_BO T_COUNT2 != 0)?1:0);

	4547 }

	4548 if(fSecs != fSecsBuff) {

	4549 uprv_free(fSecs);

	4550 }

	4551 }

	4552

	4553 if(c3 > 0) {

	4554 currentSize += (c3/(uint32_t)coll->tertiaryBottomCount) + ((c3%(uint32_t )coll->tertiaryBottomCount != 0)?1:0);

	4555 }

	4556

	4557 if(c4 > 0 && compareQuad == 0) {

	4558 currentSize += (c4/(uint32_t)UCOL_BOT_COUNT4)+((c4%(uint32_t)UCOL_BOT_CO UNT4 != 0)?1:0);

	4559 }

	4560

	4561 if(compareIdent) {

	4562 currentSize += u_lengthOfIdenticalLevelRun(s->string, len);

	4563 }

	4564 return currentSize;

	4565 }

	4566

	4567 static

	4568 inline void doCaseShift(uint8_t **cases, uint32_t &caseShift) {

	4569 if (caseShift == 0) {

	4570 (cases)++ = UCOL_CASE_BYTE_START;

	4571 caseShift = UCOL_CASE_SHIFT_START;

	4572 }

	4573 }

	4574

	4575 // Adds a value to the buffer if it's safe to add. Increments the number of adde d values, so that we

	4576 // know how many values we wanted to add, even if we didn't add them all

	4577 static

	4578 inline void addWithIncrement(uint8_t &primaries, uint8_t limit, uint32_t &size , const uint8_t value) {

	4579 size++;

	4580 if(primaries < limit) {

	4581 *(primaries)++ = value;

	4582 }

	4583 }

	4584

	4585 // Packs the secondary buffer when processing French locale. Adds the terminator .

	4586 static

	4587 inline uint8_t packFrench(uint8_t primaries, uint8_t primEnd, uint8_t second aries, uint32_t secsize, uint8_t frenchStartPtr, uint8_t *frenchEndPtr) {

	4588 uint8_t secondary;

	4589 int32_t count2 = 0;

	4590 uint32_t i = 0, size = 0;

	4591 // we use i here since the key size already accounts for terminators, so we' ll discard the increment

	4592 addWithIncrement(primaries, primEnd, i, UCOL_LEVELTERMINATOR);

	4593 /* If there are any unresolved continuation secondaries, reverse them here s o that we can reverse the whole secondary thing */

	4594 if(frenchStartPtr != NULL) {

	4595 uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr, frenchEndPtr);

	4596 }

	4597 for(i = 0; i<*secsize; i++) {

	4598 secondary = *(secondaries-i-1);

	4599 /* This is compression code. */

	4600 if (secondary == UCOL_COMMON2) {

	4601 ++count2;

	4602 } else {

	4603 if (count2 > 0) {

	4604 if (secondary > UCOL_COMMON2) { // not necessary for 4th level.

	4605 while (count2 > UCOL_TOP_COUNT2) {

	4606 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCO L_COMMON_TOP2 - UCOL_TOP_COUNT2));

	4607 count2 -= (uint32_t)UCOL_TOP_COUNT2;

	4608 }

	4609 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_CO MMON_TOP2 - (count2-1)));

	4610 } else {

	4611 while (count2 > UCOL_BOT_COUNT2) {

	4612 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCO L_COMMON_BOT2 + UCOL_BOT_COUNT2));

	4613 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	4614 }

	4615 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_CO MMON_BOT2 + (count2-1)));

	4616 }

	4617 count2 = 0;

	4618 }

	4619 addWithIncrement(primaries, primEnd, size, secondary);

	4620 }

	4621 }

	4622 if (count2 > 0) {

	4623 while (count2 > UCOL_BOT_COUNT2) {

	4624 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT 2 + UCOL_BOT_COUNT2));

	4625 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	4626 }

	4627 addWithIncrement(primaries, primEnd, size, (uint8_t)(UCOL_COMMON_BOT2 + (count2-1)));

	4628 }

	4629 *secsize = size;

	4630 return primaries;

	4631 }

	4632

	4633 #define DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY 0

	4634

	4635 /* This is the sortkey work horse function */

	4636 U_CFUNC int32_t U_CALLCONV

	4637 ucol_calcSortKey(const UCollator *coll,

	4638 const UChar *source,

	4639 int32_t sourceLength,

	4640 uint8_t **result,

	4641 uint32_t resultLength,

	4642 UBool allocateSKBuffer,

	4643 UErrorCode *status)

	4644 {

	4645 //const UCAConstants UCAconsts = (UCAConstants )((uint8_t *)coll->UCA->ima ge + coll->image->UCAConsts);

	4646

	4647 uint32_t i = 0; /* general purpose counter */

	4648

	4649 /* Stack allocated buffers for buffers we use */

	4650 uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], te rt[UCOL_TERTIARY_MAX_BUFFER], caseB[UCOL_CASE_MAX_BUFFER], quad[UCOL_QUAD_MAX_BU FFER];

	4651

	4652 uint8_t primaries = result, secondaries = second, tertiaries = tert, ca ses = caseB, quads = quad;

	4653

	4654 if(U_FAILURE(*status)) {

	4655 return 0;

	4656 }

	4657

	4658 if(primaries == NULL && allocateSKBuffer == TRUE) {

	4659 primaries = *result = prim;

	4660 resultLength = UCOL_PRIMARY_MAX_BUFFER;

	4661 }

	4662

	4663 uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BU FFER,

	4664 caseSize = UCOL_CASE_MAX_BUFFER, quadSize = UCOL_QUAD_MAX_BUFFER;

	4665

	4666 uint32_t sortKeySize = 1; /* it is always \0 terminated */

	4667

	4668 UnicodeString normSource;

	4669

	4670 int32_t len = (sourceLength == -1 ? u_strlen(source) : sourceLength);

	4671

	4672 UColAttributeValue strength = coll->strength;

	4673

	4674 uint8_t compareSec = (uint8_t)((strength >= UCOL_SECONDARY)?0:0xFF);

	4675 uint8_t compareTer = (uint8_t)((strength >= UCOL_TERTIARY)?0:0xFF);

	4676 uint8_t compareQuad = (uint8_t)((strength >= UCOL_QUATERNARY)?0:0xFF);

	4677 UBool compareIdent = (strength == UCOL_IDENTICAL);

	4678 UBool doCase = (coll->caseLevel == UCOL_ON);

	4679 UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && (compareSec == 0) ;

	4680 UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);

	4681 //UBool qShifted = shifted && (compareQuad == 0);

	4682 UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && (compareQuad == 0);

	4683

	4684 uint32_t variableTopValue = coll->variableTopValue;

	4685 // TODO: UCOL_COMMON_BOT4 should be a function of qShifted. If we have no

	4686 // qShifted, we don't need to set UCOL_COMMON_BOT4 so high.

	4687 uint8_t UCOL_COMMON_BOT4 = (uint8_t)((coll->variableTopValue>>8)+1);

	4688 uint8_t UCOL_HIRAGANA_QUAD = 0;

	4689 if(doHiragana) {

	4690 UCOL_HIRAGANA_QUAD=UCOL_COMMON_BOT4++;

	4691 /* allocate one more space for hiragana, value for hiragana */

	4692 }

	4693 uint8_t UCOL_BOT_COUNT4 = (uint8_t)(0xFF - UCOL_COMMON_BOT4);

	4694

	4695 /* support for special features like caselevel and funky secondaries */

	4696 uint8_t *frenchStartPtr = NULL;

	4697 uint8_t *frenchEndPtr = NULL;

	4698 uint32_t caseShift = 0;

	4699

	4700 sortKeySize += ((compareSec?0:1) + (compareTer?0:1) + (doCase?1:0) + /(qShi fted?1:0)/(compareQuad?0:1) + (compareIdent?1:0));

	4701

	4702 /* If we need to normalize, we'll do it all at once at the beginning! */

	4703 const Normalizer2 *norm2;

	4704 if(compareIdent) {

	4705 norm2 = Normalizer2Factory::getNFDInstance(*status);

	4706 } else if(coll->normalizationMode != UCOL_OFF) {

	4707 norm2 = Normalizer2Factory::getFCDInstance(*status);

	4708 } else {

	4709 norm2 = NULL;

	4710 }

	4711 if(norm2 != NULL) {

	4712 normSource.setTo(FALSE, source, len);

	4713 int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);

	4714 if(qcYesLength != len) {

	4715 UnicodeString unnormalized = normSource.tempSubString(qcYesLength);

	4716 normSource.truncate(qcYesLength);

	4717 norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);

	4718 source = normSource.getBuffer();

	4719 len = normSource.length();

	4720 }

	4721 }

	4722 collIterate s;

	4723 IInit_collIterate(coll, source, len, &s, status);

	4724 if(U_FAILURE(*status)) {

	4725 return 0;

	4726 }

	4727 s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was norma lized.

	4728

	4729 if(resultLength == 0 \|\| primaries == NULL) {

	4730 return ucol_getSortKeySize(coll, &s, sortKeySize, strength, len);

	4731 }

	4732 uint8_t *primarySafeEnd = primaries + resultLength - 1;

	4733 if(strength > UCOL_PRIMARY) {

	4734 primarySafeEnd--;

	4735 }

	4736

	4737 uint32_t minBufferSize = UCOL_MAX_BUFFER;

	4738

	4739 uint8_t *primStart = primaries;

	4740 uint8_t *secStart = secondaries;

	4741 uint8_t *terStart = tertiaries;

	4742 uint8_t *caseStart = cases;

	4743 uint8_t *quadStart = quads;

	4744

	4745 uint32_t order = 0;

	4746

	4747 uint8_t primary1 = 0;

	4748 uint8_t primary2 = 0;

	4749 uint8_t secondary = 0;

	4750 uint8_t tertiary = 0;

	4751 uint8_t caseSwitch = coll->caseSwitch;

	4752 uint8_t tertiaryMask = coll->tertiaryMask;

	4753 int8_t tertiaryAddition = coll->tertiaryAddition;

	4754 uint8_t tertiaryTop = coll->tertiaryTop;

	4755 uint8_t tertiaryBottom = coll->tertiaryBottom;

	4756 uint8_t tertiaryCommon = coll->tertiaryCommon;

	4757 uint8_t caseBits = 0;

	4758

	4759 UBool finished = FALSE;

	4760 UBool wasShifted = FALSE;

	4761 UBool notIsContinuation = FALSE;

	4762

	4763 uint32_t prevBuffSize = 0;

	4764

	4765 uint32_t count2 = 0, count3 = 0, count4 = 0;

	4766 uint8_t leadPrimary = 0;

	4767

	4768 for(;;) {

	4769 for(i=prevBuffSize; i<minBufferSize; ++i) {

	4770

	4771 order = ucol_IGetNextCE(coll, &s, status);

	4772 if(order == UCOL_NO_MORE_CES) {

	4773 finished = TRUE;

	4774 break;

	4775 }

	4776

	4777 if(order == 0) {

	4778 continue;

	4779 }

	4780

	4781 notIsContinuation = !isContinuation(order);

	4782

	4783 if(notIsContinuation) {

	4784 tertiary = (uint8_t)(order & UCOL_BYTE_SIZE_MASK);

	4785 } else {

	4786 tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));

	4787 }

	4788

	4789 secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	4790 primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	4791 primary1 = (uint8_t)(order >> 8);

	4792

	4793 uint8_t originalPrimary1 = primary1;

	4794 if(notIsContinuation && coll->leadBytePermutationTable != NULL) {

	4795 primary1 = coll->leadBytePermutationTable[primary1];

	4796 }

	4797

	4798 if((shifted && ((notIsContinuation && order <= variableTopValue && p rimary1 > 0)

	4799 \|\| (!notIsContinuation && wasShifted)))

	4800 \|\| (wasShifted && primary1 == 0)) /* amendment to the UCA says t hat primary ignorables */

	4801 {

	4802 /* and other ignorables should be removed if following a shifted code point */

	4803 if(primary1 == 0) { /* if we were shifted and we got an ignorabl e code point */

	4804 /* we should just completely ignore it */

	4805 continue;

	4806 }

	4807 if(compareQuad == 0) {

	4808 if(count4 > 0) {

	4809 while (count4 > UCOL_BOT_COUNT4) {

	4810 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COU NT4);

	4811 count4 -= UCOL_BOT_COUNT4;

	4812 }

	4813 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));

	4814 count4 = 0;

	4815 }

	4816 /* We are dealing with a variable and we're treating them as shifted */

	4817 /* This is a shifted ignorable */

	4818 if(primary1 != 0) { /* we need to check this since we could be in continuation */

	4819 *quads++ = primary1;

	4820 }

	4821 if(primary2 != 0) {

	4822 *quads++ = primary2;

	4823 }

	4824 }

	4825 wasShifted = TRUE;

	4826 } else {

	4827 wasShifted = FALSE;

	4828 /* Note: This code assumes that the table is well built i.e. not having 0 bytes where they are not supposed to be. */

	4829 /* Usually, we'll have non-zero primary1 & primary2, except in c ases of a-z and friends, when primary2 will */

	4830 /* regular and simple sortkey calc */

	4831 if(primary1 != UCOL_IGNORABLE) {

	4832 if(notIsContinuation) {

	4833 if(leadPrimary == primary1) {

	4834 *primaries++ = primary2;

	4835 } else {

	4836 if(leadPrimary != 0) {

	4837 *primaries++ = (uint8_t)((primary1 > leadPrimary ) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);

	4838 }

	4839 if(primary2 == UCOL_IGNORABLE) {

	4840 /* one byter, not compressed */

	4841 *primaries++ = primary1;

	4842 leadPrimary = 0;

	4843 } else if(isCompressible(coll, originalPrimary1)) {

	4844 /* compress */

	4845 *primaries++ = leadPrimary = primary1;

	4846 if(primaries <= primarySafeEnd) {

	4847 *primaries++ = primary2;

	4848 }

	4849 } else {

	4850 leadPrimary = 0;

	4851 *primaries++ = primary1;

	4852 if(primaries <= primarySafeEnd) {

	4853 *primaries++ = primary2;

	4854 }

	4855 }

	4856 }

	4857 } else { /* we are in continuation, so we're gonna add prima ry to the key don't care about compression */

	4858 *primaries++ = primary1;

	4859 if((primary2 != UCOL_IGNORABLE) && (primaries <= primary SafeEnd)) {

	4860 primaries++ = primary2; / second part */

	4861 }

	4862 }

	4863 }

	4864

	4865 if(secondary > compareSec) {

	4866 if(!isFrenchSec) {

	4867 /* This is compression code. */

	4868 if (secondary == UCOL_COMMON2 && notIsContinuation) {

	4869 ++count2;

	4870 } else {

	4871 if (count2 > 0) {

	4872 if (secondary > UCOL_COMMON2) { // not necessary for 4th level.

	4873 while (count2 > UCOL_TOP_COUNT2) {

	4874 *secondaries++ = (uint8_t)(UCOL_COMMON_T OP2 - UCOL_TOP_COUNT2);

	4875 count2 -= (uint32_t)UCOL_TOP_COUNT2;

	4876 }

	4877 *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count2-1));

	4878 } else {

	4879 while (count2 > UCOL_BOT_COUNT2) {

	4880 *secondaries++ = (uint8_t)(UCOL_COMMON_B OT2 + UCOL_BOT_COUNT2);

	4881 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	4882 }

	4883 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));

	4884 }

	4885 count2 = 0;

	4886 }

	4887 *secondaries++ = secondary;

	4888 }

	4889 } else {

	4890 *secondaries++ = secondary;

	4891 /* Do the special handling for French secondaries */

	4892 /* We need to get continuation elements and do intermedi ate restore */

	4893 /* abc1c2c3de with french secondaries need to be edc1c2c 3ba NOT edc3c2c1ba */

	4894 if(notIsContinuation) {

	4895 if (frenchStartPtr != NULL) {

	4896 /* reverse secondaries from frenchStartPtr up to frenchEndPtr */

	4897 uprv_ucol_reverse_buffer(uint8_t, frenchStartPtr , frenchEndPtr);

	4898 frenchStartPtr = NULL;

	4899 }

	4900 } else {

	4901 if (frenchStartPtr == NULL) {

	4902 frenchStartPtr = secondaries - 2;

	4903 }

	4904 frenchEndPtr = secondaries-1;

	4905 }

	4906 }

	4907 }

	4908

	4909 if(doCase && (primary1 > 0 \|\| strength >= UCOL_SECONDARY)) {

	4910 // do the case level if we need to do it. We don't want to c alculate

	4911 // case level for primary ignorables if we have only primary strength and case level

	4912 // otherwise we would break well formedness of CEs

	4913 doCaseShift(&cases, caseShift);

	4914 if(notIsContinuation) {

	4915 caseBits = (uint8_t)(tertiary & 0xC0);

	4916

	4917 if(tertiary != 0) {

	4918 if(coll->caseFirst == UCOL_UPPER_FIRST) {

	4919 if((caseBits & 0xC0) == 0) {

	4920 *(cases-1) \|= 1 << (--caseShift);

	4921 } else {

	4922 *(cases-1) \|= 0 << (--caseShift);

	4923 /* second bit */

	4924 doCaseShift(&cases, caseShift);

	4925 *(cases-1) \|= ((caseBits>>6)&1) << (--caseSh ift);

	4926 }

	4927 } else {

	4928 if((caseBits & 0xC0) == 0) {

	4929 *(cases-1) \|= 0 << (--caseShift);

	4930 } else {

	4931 *(cases-1) \|= 1 << (--caseShift);

	4932 /* second bit */

	4933 doCaseShift(&cases, caseShift);

	4934 *(cases-1) \|= ((caseBits>>7)&1) << (--caseSh ift);

	4935 }

	4936 }

	4937 }

	4938

	4939 }

	4940 } else {

	4941 if(notIsContinuation) {

	4942 tertiary ^= caseSwitch;

	4943 }

	4944 }

	4945

	4946 tertiary &= tertiaryMask;

	4947 if(tertiary > compareTer) {

	4948 /* This is compression code. */

	4949 /* sequence size check is included in the if clause */

	4950 if (tertiary == tertiaryCommon && notIsContinuation) {

	4951 ++count3;

	4952 } else {

	4953 if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_C OMMON3_NORMAL) {

	4954 tertiary += tertiaryAddition;

	4955 } else if(tertiary <= tertiaryCommon && tertiaryCommon = = UCOL_COMMON3_UPPERFIRST) {

	4956 tertiary -= tertiaryAddition;

	4957 }

	4958 if (count3 > 0) {

	4959 if ((tertiary > tertiaryCommon)) {

	4960 while (count3 > coll->tertiaryTopCount) {

	4961 *tertiaries++ = (uint8_t)(tertiaryTop - coll ->tertiaryTopCount);

	4962 count3 -= (uint32_t)coll->tertiaryTopCount;

	4963 }

	4964 *tertiaries++ = (uint8_t)(tertiaryTop - (count3- 1));

	4965 } else {

	4966 while (count3 > coll->tertiaryBottomCount) {

	4967 *tertiaries++ = (uint8_t)(tertiaryBottom + c oll->tertiaryBottomCount);

	4968 count3 -= (uint32_t)coll->tertiaryBottomCoun t;

	4969 }

	4970 *tertiaries++ = (uint8_t)(tertiaryBottom + (coun t3-1));

	4971 }

	4972 count3 = 0;

	4973 }

	4974 *tertiaries++ = tertiary;

	4975 }

	4976 }

	4977

	4978 if(/qShifted/(compareQuad==0) && notIsContinuation) {

	4979 if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana and w e need to note it

	4980 if(count4>0) { // Close this part

	4981 while (count4 > UCOL_BOT_COUNT4) {

	4982 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT _COUNT4);

	4983 count4 -= UCOL_BOT_COUNT4;

	4984 }

	4985 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));

	4986 count4 = 0;

	4987 }

	4988 *quads++ = UCOL_HIRAGANA_QUAD; // Add the Hiragana

	4989 } else { // This wasn't Hiragana, so we can continue adding stuff

	4990 count4++;

	4991 }

	4992 }

	4993 }

	4994

	4995 if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */

	4996 if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */

	4997 IInit_collIterate(coll, (UChar *)source, len, &s, status);

	4998 if(U_FAILURE(*status)) {

	4999 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5000 finished = TRUE;

	5001 break;

	5002 }

	5003 s.flags &= ~UCOL_ITER_NORM;

	5004 sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, str ength, len);

	5005 *status = U_BUFFER_OVERFLOW_ERROR;

	5006 finished = TRUE;

	5007 break;

	5008 } else { /* It's much nicer if we can actually reallocate */

	5009 int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+ (secondaries - secStart)+(tertiaries - terStart)+(cases-caseStart)+(quads-quadSt art));

	5010 primStart = reallocateBuffer(&primaries, result, prim, &res ultLength, 2sks, status);

	5011 if(U_SUCCESS(*status)) {

	5012 *result = primStart;

	5013 primarySafeEnd = primStart + resultLength - 1;

	5014 if(strength > UCOL_PRIMARY) {

	5015 primarySafeEnd--;

	5016 }

	5017 } else {

	5018 /* We ran out of memory!? We can't recover. */

	5019 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5020 finished = TRUE;

	5021 break;

	5022 }

	5023 }

	5024 }

	5025 }

	5026 if(finished) {

	5027 break;

	5028 } else {

	5029 prevBuffSize = minBufferSize;

	5030

	5031 uint32_t frenchStartOffset = 0, frenchEndOffset = 0;

	5032 if (frenchStartPtr != NULL) {

	5033 frenchStartOffset = (uint32_t)(frenchStartPtr - secStart);

	5034 frenchEndOffset = (uint32_t)(frenchEndPtr - secStart);

	5035 }

	5036 secStart = reallocateBuffer(&secondaries, secStart, second, &secSize , 2*secSize, status);

	5037 terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2 *terSize, status);

	5038 caseStart = reallocateBuffer(&cases, caseStart, caseB, &caseSize, 2* caseSize, status);

	5039 quadStart = reallocateBuffer(&quads, quadStart, quad, &quadSize, 2*q uadSize, status);

	5040 if(U_FAILURE(*status)) {

	5041 /* We ran out of memory!? We can't recover. */

	5042 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5043 break;

	5044 }

	5045 if (frenchStartPtr != NULL) {

	5046 frenchStartPtr = secStart + frenchStartOffset;

	5047 frenchEndPtr = secStart + frenchEndOffset;

	5048 }

	5049 minBufferSize *= 2;

	5050 }

	5051 }

	5052

	5053 /* Here, we are generally done with processing */

	5054 /* bailing out would not be too productive */

	5055

	5056 if(U_SUCCESS(*status)) {

	5057 sortKeySize += (uint32_t)(primaries - primStart);

	5058 /* we have done all the CE's, now let's put them together to form a key */

	5059 if(compareSec == 0) {

	5060 if (count2 > 0) {

	5061 while (count2 > UCOL_BOT_COUNT2) {

	5062 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT 2);

	5063 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	5064 }

	5065 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));

	5066 }

	5067 uint32_t secsize = (uint32_t)(secondaries-secStart);

	5068 if(!isFrenchSec) { // Regular situation, we know the length of secon daries

	5069 sortKeySize += secsize;

	5070 if(sortKeySize <= resultLength) {

	5071 *(primaries++) = UCOL_LEVELTERMINATOR;

	5072 uprv_memcpy(primaries, secStart, secsize);

	5073 primaries += secsize;

	5074 } else {

	5075 if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */

	5076 primStart = reallocateBuffer(&primaries, result, prim, &resultLength, 2sortKeySize, status);

	5077 if(U_SUCCESS(*status)) {

	5078 *result = primStart;

	5079 *(primaries++) = UCOL_LEVELTERMINATOR;

	5080 uprv_memcpy(primaries, secStart, secsize);

	5081 primaries += secsize;

	5082 }

	5083 else {

	5084 /* We ran out of memory!? We can't recover. */

	5085 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5086 goto cleanup;

	5087 }

	5088 } else {

	5089 *status = U_BUFFER_OVERFLOW_ERROR;

	5090 }

	5091 }

	5092 } else { // French secondary is on. We will need to pack French. pac kFrench will add the level terminator

	5093 uint8_t *newPrim = packFrench(primaries, primStart+resultLength, secondaries, &secsize, frenchStartPtr, frenchEndPtr);

	5094 sortKeySize += secsize;

	5095 if(sortKeySize <= resultLength) { // if we managed to pack fine

	5096 primaries = newPrim; // update the primary pointer

	5097 } else { // overflow, need to reallocate and redo

	5098 if(allocateSKBuffer == TRUE) { /* need to save our butts if we cannot reallocate */

	5099 primStart = reallocateBuffer(&primaries, result, prim, &resultLength, 2sortKeySize, status);

	5100 if(U_SUCCESS(*status)) {

	5101 primaries = packFrench(primaries, primStart+resultLe ngth, secondaries, &secsize, frenchStartPtr, frenchEndPtr);

	5102 }

	5103 else {

	5104 /* We ran out of memory!? We can't recover. */

	5105 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5106 goto cleanup;

	5107 }

	5108 } else {

	5109 *status = U_BUFFER_OVERFLOW_ERROR;

	5110 }

	5111 }

	5112 }

	5113 }

	5114

	5115 if(doCase) {

	5116 uint32_t casesize = (uint32_t)(cases - caseStart);

	5117 sortKeySize += casesize;

	5118 if(sortKeySize <= resultLength) {

	5119 *(primaries++) = UCOL_LEVELTERMINATOR;

	5120 uprv_memcpy(primaries, caseStart, casesize);

	5121 primaries += casesize;

	5122 } else {

	5123 if(allocateSKBuffer == TRUE) {

	5124 primStart = reallocateBuffer(&primaries, result, prim, &res ultLength, 2sortKeySize, status);

	5125 if(U_SUCCESS(*status)) {

	5126 *result = primStart;

	5127 *(primaries++) = UCOL_LEVELTERMINATOR;

	5128 uprv_memcpy(primaries, caseStart, casesize);

	5129 }

	5130 else {

	5131 /* We ran out of memory!? We can't recover. */

	5132 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5133 goto cleanup;

	5134 }

	5135 } else {

	5136 *status = U_BUFFER_OVERFLOW_ERROR;

	5137 }

	5138 }

	5139 }

	5140

	5141 if(compareTer == 0) {

	5142 if (count3 > 0) {

	5143 if (coll->tertiaryCommon != UCOL_COMMON_BOT3) {

	5144 while (count3 >= coll->tertiaryTopCount) {

	5145 *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTo pCount);

	5146 count3 -= (uint32_t)coll->tertiaryTopCount;

	5147 }

	5148 *tertiaries++ = (uint8_t)(tertiaryTop - count3);

	5149 } else {

	5150 while (count3 > coll->tertiaryBottomCount) {

	5151 *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiar yBottomCount);

	5152 count3 -= (uint32_t)coll->tertiaryBottomCount;

	5153 }

	5154 *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));

	5155 }

	5156 }

	5157 uint32_t tersize = (uint32_t)(tertiaries - terStart);

	5158 sortKeySize += tersize;

	5159 if(sortKeySize <= resultLength) {

	5160 *(primaries++) = UCOL_LEVELTERMINATOR;

	5161 uprv_memcpy(primaries, terStart, tersize);

	5162 primaries += tersize;

	5163 } else {

	5164 if(allocateSKBuffer == TRUE) {

	5165 primStart = reallocateBuffer(&primaries, result, prim, &res ultLength, 2sortKeySize, status);

	5166 if(U_SUCCESS(*status)) {

	5167 *result = primStart;

	5168 *(primaries++) = UCOL_LEVELTERMINATOR;

	5169 uprv_memcpy(primaries, terStart, tersize);

	5170 }

	5171 else {

	5172 /* We ran out of memory!? We can't recover. */

	5173 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5174 goto cleanup;

	5175 }

	5176 } else {

	5177 *status = U_BUFFER_OVERFLOW_ERROR;

	5178 }

	5179 }

	5180

	5181 if(compareQuad == 0/qShifted == TRUE/) {

	5182 if(count4 > 0) {

	5183 while (count4 > UCOL_BOT_COUNT4) {

	5184 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + UCOL_BOT_COUNT4) ;

	5185 count4 -= UCOL_BOT_COUNT4;

	5186 }

	5187 *quads++ = (uint8_t)(UCOL_COMMON_BOT4 + (count4-1));

	5188 }

	5189 uint32_t quadsize = (uint32_t)(quads - quadStart);

	5190 sortKeySize += quadsize;

	5191 if(sortKeySize <= resultLength) {

	5192 *(primaries++) = UCOL_LEVELTERMINATOR;

	5193 uprv_memcpy(primaries, quadStart, quadsize);

	5194 primaries += quadsize;

	5195 } else {

	5196 if(allocateSKBuffer == TRUE) {

	5197 primStart = reallocateBuffer(&primaries, result, prim, &resultLength, 2sortKeySize, status);

	5198 if(U_SUCCESS(*status)) {

	5199 *result = primStart;

	5200 *(primaries++) = UCOL_LEVELTERMINATOR;

	5201 uprv_memcpy(primaries, quadStart, quadsize);

	5202 }

	5203 else {

	5204 /* We ran out of memory!? We can't recover. */

	5205 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5206 goto cleanup;

	5207 }

	5208 } else {

	5209 *status = U_BUFFER_OVERFLOW_ERROR;

	5210 }

	5211 }

	5212 }

	5213

	5214 if(compareIdent) {

	5215 sortKeySize += u_lengthOfIdenticalLevelRun(s.string, len);

	5216 if(sortKeySize <= resultLength) {

	5217 *(primaries++) = UCOL_LEVELTERMINATOR;

	5218 primaries += u_writeIdenticalLevelRun(s.string, len, primari es);

	5219 } else {

	5220 if(allocateSKBuffer == TRUE) {

	5221 primStart = reallocateBuffer(&primaries, *result, prim, &resultLength, sortKeySize, status);

	5222 if(U_SUCCESS(*status)) {

	5223 *result = primStart;

	5224 *(primaries++) = UCOL_LEVELTERMINATOR;

	5225 u_writeIdenticalLevelRun(s.string, len, primaries);

	5226 }

	5227 else {

	5228 /* We ran out of memory!? We can't recover. */

	5229 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5230 goto cleanup;

	5231 }

	5232 } else {

	5233 *status = U_BUFFER_OVERFLOW_ERROR;

	5234 }

	5235 }

	5236 }

	5237 }

	5238 *(primaries++) = '\0';

	5239 }

	5240

	5241 if(allocateSKBuffer == TRUE) {

	5242 result = (uint8_t)uprv_malloc(sortKeySize);

	5243 /* test for NULL */

	5244 if (*result == NULL) {

	5245 *status = U_MEMORY_ALLOCATION_ERROR;

	5246 goto cleanup;

	5247 }

	5248 uprv_memcpy(*result, primStart, sortKeySize);

	5249 if(primStart != prim) {

	5250 uprv_free(primStart);

	5251 }

	5252 }

	5253

	5254 cleanup:

	5255 if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

	5256 /* NULL terminate for safety */

	5257 **result = 0;

	5258 }

	5259 if(terStart != tert) {

	5260 uprv_free(terStart);

	5261 uprv_free(secStart);

	5262 uprv_free(caseStart);

	5263 uprv_free(quadStart);

	5264 }

	5265

	5266 /* To avoid memory leak, free the offset buffer if necessary. */

	5267 ucol_freeOffsetBuffer(&s);

	5268

	5269 return sortKeySize;

	5270 }

	5271

	5272

	5273 U_CFUNC int32_t U_CALLCONV

	5274 ucol_calcSortKeySimpleTertiary(const UCollator *coll,

	5275 const UChar *source,

	5276 int32_t sourceLength,

	5277 uint8_t **result,

	5278 uint32_t resultLength,

	5279 UBool allocateSKBuffer,

	5280 UErrorCode *status)

	5281 {

	5282 U_ALIGN_CODE(16);

	5283

	5284 //const UCAConstants UCAconsts = (UCAConstants )((uint8_t *)coll->UCA->ima ge + coll->image->UCAConsts);

	5285 uint32_t i = 0; /* general purpose counter */

	5286

	5287 /* Stack allocated buffers for buffers we use */

	5288 uint8_t prim[UCOL_PRIMARY_MAX_BUFFER], second[UCOL_SECONDARY_MAX_BUFFER], te rt[UCOL_TERTIARY_MAX_BUFFER];

	5289

	5290 uint8_t primaries = result, secondaries = second, tertiaries = tert;

	5291

	5292 if(U_FAILURE(*status)) {

	5293 return 0;

	5294 }

	5295

	5296 if(primaries == NULL && allocateSKBuffer == TRUE) {

	5297 primaries = *result = prim;

	5298 resultLength = UCOL_PRIMARY_MAX_BUFFER;

	5299 }

	5300

	5301 uint32_t secSize = UCOL_SECONDARY_MAX_BUFFER, terSize = UCOL_TERTIARY_MAX_BU FFER;

	5302

	5303 uint32_t sortKeySize = 3; /* it is always \0 terminated plus separators for secondary and tertiary */

	5304

	5305 UnicodeString normSource;

	5306

	5307 int32_t len = sourceLength;

	5308

	5309 /* If we need to normalize, we'll do it all at once at the beginning! */

	5310 if(coll->normalizationMode != UCOL_OFF) {

	5311 normSource.setTo(len < 0, source, len);

	5312 const Normalizer2 norm2 = Normalizer2Factory::getFCDInstance(status);

	5313 int32_t qcYesLength = norm2->spanQuickCheckYes(normSource, *status);

	5314 if(qcYesLength != normSource.length()) {

	5315 UnicodeString unnormalized = normSource.tempSubString(qcYesLength);

	5316 normSource.truncate(qcYesLength);

	5317 norm2->normalizeSecondAndAppend(normSource, unnormalized, *status);

	5318 source = normSource.getBuffer();

	5319 len = normSource.length();

	5320 }

	5321 }

	5322 collIterate s;

	5323 IInit_collIterate(coll, (UChar *)source, len, &s, status);

	5324 if(U_FAILURE(*status)) {

	5325 return 0;

	5326 }

	5327 s.flags &= ~UCOL_ITER_NORM; // source passed the FCD test or else was norma lized.

	5328

	5329 if(resultLength == 0 \|\| primaries == NULL) {

	5330 return ucol_getSortKeySize(coll, &s, sortKeySize, coll->strength, len);

	5331 }

	5332

	5333 uint8_t *primarySafeEnd = primaries + resultLength - 2;

	5334

	5335 uint32_t minBufferSize = UCOL_MAX_BUFFER;

	5336

	5337 uint8_t *primStart = primaries;

	5338 uint8_t *secStart = secondaries;

	5339 uint8_t *terStart = tertiaries;

	5340

	5341 uint32_t order = 0;

	5342

	5343 uint8_t primary1 = 0;

	5344 uint8_t primary2 = 0;

	5345 uint8_t secondary = 0;

	5346 uint8_t tertiary = 0;

	5347 uint8_t caseSwitch = coll->caseSwitch;

	5348 uint8_t tertiaryMask = coll->tertiaryMask;

	5349 int8_t tertiaryAddition = coll->tertiaryAddition;

	5350 uint8_t tertiaryTop = coll->tertiaryTop;

	5351 uint8_t tertiaryBottom = coll->tertiaryBottom;

	5352 uint8_t tertiaryCommon = coll->tertiaryCommon;

	5353

	5354 uint32_t prevBuffSize = 0;

	5355

	5356 UBool finished = FALSE;

	5357 UBool notIsContinuation = FALSE;

	5358

	5359 uint32_t count2 = 0, count3 = 0;

	5360 uint8_t leadPrimary = 0;

	5361

	5362 for(;;) {

	5363 for(i=prevBuffSize; i<minBufferSize; ++i) {

	5364

	5365 order = ucol_IGetNextCE(coll, &s, status);

	5366

	5367 if(order == 0) {

	5368 continue;

	5369 }

	5370

	5371 if(order == UCOL_NO_MORE_CES) {

	5372 finished = TRUE;

	5373 break;

	5374 }

	5375

	5376 notIsContinuation = !isContinuation(order);

	5377

	5378 if(notIsContinuation) {

	5379 tertiary = (uint8_t)((order & tertiaryMask));

	5380 } else {

	5381 tertiary = (uint8_t)((order & UCOL_REMOVE_CONTINUATION));

	5382 }

	5383

	5384 secondary = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	5385 primary2 = (uint8_t)((order >>= 8) & UCOL_BYTE_SIZE_MASK);

	5386 primary1 = (uint8_t)(order >> 8);

	5387

	5388 uint8_t originalPrimary1 = primary1;

	5389 if (coll->leadBytePermutationTable != NULL && notIsContinuation) {

	5390 primary1 = coll->leadBytePermutationTable[primary1];

	5391 }

	5392

	5393 /* Note: This code assumes that the table is well built i.e. not hav ing 0 bytes where they are not supposed to be. */

	5394 /* Usually, we'll have non-zero primary1 & primary2, except in cases of a-z and friends, when primary2 will */

	5395 /* be zero with non zero primary1. primary3 is different than 0 only for long primaries - see above. */

	5396 /* regular and simple sortkey calc */

	5397 if(primary1 != UCOL_IGNORABLE) {

	5398 if(notIsContinuation) {

	5399 if(leadPrimary == primary1) {

	5400 *primaries++ = primary2;

	5401 } else {

	5402 if(leadPrimary != 0) {

	5403 *primaries++ = (uint8_t)((primary1 > leadPrimary) ? UCOL_BYTE_UNSHIFTED_MAX : UCOL_BYTE_UNSHIFTED_MIN);

	5404 }

	5405 if(primary2 == UCOL_IGNORABLE) {

	5406 /* one byter, not compressed */

	5407 *primaries++ = primary1;

	5408 leadPrimary = 0;

	5409 } else if(isCompressible(coll, originalPrimary1)) {

	5410 /* compress */

	5411 *primaries++ = leadPrimary = primary1;

	5412 *primaries++ = primary2;

	5413 } else {

	5414 leadPrimary = 0;

	5415 *primaries++ = primary1;

	5416 *primaries++ = primary2;

	5417 }

	5418 }

	5419 } else { /* we are in continuation, so we're gonna add primary t o the key don't care about compression */

	5420 *primaries++ = primary1;

	5421 if(primary2 != UCOL_IGNORABLE) {

	5422 primaries++ = primary2; / second part */

	5423 }

	5424 }

	5425 }

	5426

	5427 if(secondary > 0) { /* I think that != 0 test should be != IGNORABLE */

	5428 /* This is compression code. */

	5429 if (secondary == UCOL_COMMON2 && notIsContinuation) {

	5430 ++count2;

	5431 } else {

	5432 if (count2 > 0) {

	5433 if (secondary > UCOL_COMMON2) { // not necessary for 4th level.

	5434 while (count2 > UCOL_TOP_COUNT2) {

	5435 *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - UC OL_TOP_COUNT2);

	5436 count2 -= (uint32_t)UCOL_TOP_COUNT2;

	5437 }

	5438 *secondaries++ = (uint8_t)(UCOL_COMMON_TOP2 - (count 2-1));

	5439 } else {

	5440 while (count2 > UCOL_BOT_COUNT2) {

	5441 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UC OL_BOT_COUNT2);

	5442 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	5443 }

	5444 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count 2-1));

	5445 }

	5446 count2 = 0;

	5447 }

	5448 *secondaries++ = secondary;

	5449 }

	5450 }

	5451

	5452 if(notIsContinuation) {

	5453 tertiary ^= caseSwitch;

	5454 }

	5455

	5456 if(tertiary > 0) {

	5457 /* This is compression code. */

	5458 /* sequence size check is included in the if clause */

	5459 if (tertiary == tertiaryCommon && notIsContinuation) {

	5460 ++count3;

	5461 } else {

	5462 if(tertiary > tertiaryCommon && tertiaryCommon == UCOL_COMMO N3_NORMAL) {

	5463 tertiary += tertiaryAddition;

	5464 } else if (tertiary <= tertiaryCommon && tertiaryCommon == U COL_COMMON3_UPPERFIRST) {

	5465 tertiary -= tertiaryAddition;

	5466 }

	5467 if (count3 > 0) {

	5468 if ((tertiary > tertiaryCommon)) {

	5469 while (count3 > coll->tertiaryTopCount) {

	5470 *tertiaries++ = (uint8_t)(tertiaryTop - coll->te rtiaryTopCount);

	5471 count3 -= (uint32_t)coll->tertiaryTopCount;

	5472 }

	5473 *tertiaries++ = (uint8_t)(tertiaryTop - (count3-1));

	5474 } else {

	5475 while (count3 > coll->tertiaryBottomCount) {

	5476 *tertiaries++ = (uint8_t)(tertiaryBottom + coll- >tertiaryBottomCount);

	5477 count3 -= (uint32_t)coll->tertiaryBottomCount;

	5478 }

	5479 *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1 ));

	5480 }

	5481 count3 = 0;

	5482 }

	5483 *tertiaries++ = tertiary;

	5484 }

	5485 }

	5486

	5487 if(primaries > primarySafeEnd) { /* We have stepped over the primary buffer */

	5488 if(allocateSKBuffer == FALSE) { /* need to save our butts if we cannot reallocate */

	5489 IInit_collIterate(coll, (UChar *)source, len, &s, status);

	5490 if(U_FAILURE(*status)) {

	5491 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5492 finished = TRUE;

	5493 break;

	5494 }

	5495 s.flags &= ~UCOL_ITER_NORM;

	5496 sortKeySize = ucol_getSortKeySize(coll, &s, sortKeySize, col l->strength, len);

	5497 *status = U_BUFFER_OVERFLOW_ERROR;

	5498 finished = TRUE;

	5499 break;

	5500 } else { /* It's much nicer if we can actually reallocate */

	5501 int32_t sks = sortKeySize+(int32_t)((primaries - primStart)+ (secondaries - secStart)+(tertiaries - terStart));

	5502 primStart = reallocateBuffer(&primaries, result, prim, &res ultLength, 2sks, status);

	5503 if(U_SUCCESS(*status)) {

	5504 *result = primStart;

	5505 primarySafeEnd = primStart + resultLength - 2;

	5506 } else {

	5507 /* We ran out of memory!? We can't recover. */

	5508 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5509 finished = TRUE;

	5510 break;

	5511 }

	5512 }

	5513 }

	5514 }

	5515 if(finished) {

	5516 break;

	5517 } else {

	5518 prevBuffSize = minBufferSize;

	5519 secStart = reallocateBuffer(&secondaries, secStart, second, &secSize , 2*secSize, status);

	5520 terStart = reallocateBuffer(&tertiaries, terStart, tert, &terSize, 2 *terSize, status);

	5521 minBufferSize *= 2;

	5522 if(U_FAILURE(*status)) { // if we cannot reallocate buffers, we can at least give the sortkey size

	5523 /* We ran out of memory!? We can't recover. */

	5524 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5525 break;

	5526 }

	5527 }

	5528 }

	5529

	5530 if(U_SUCCESS(*status)) {

	5531 sortKeySize += (uint32_t)(primaries - primStart);

	5532 /* we have done all the CE's, now let's put them together to form a key */

	5533 if (count2 > 0) {

	5534 while (count2 > UCOL_BOT_COUNT2) {

	5535 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2);

	5536 count2 -= (uint32_t)UCOL_BOT_COUNT2;

	5537 }

	5538 *secondaries++ = (uint8_t)(UCOL_COMMON_BOT2 + (count2-1));

	5539 }

	5540 uint32_t secsize = (uint32_t)(secondaries-secStart);

	5541 sortKeySize += secsize;

	5542 if(sortKeySize <= resultLength) {

	5543 *(primaries++) = UCOL_LEVELTERMINATOR;

	5544 uprv_memcpy(primaries, secStart, secsize);

	5545 primaries += secsize;

	5546 } else {

	5547 if(allocateSKBuffer == TRUE) {

	5548 primStart = reallocateBuffer(&primaries, result, prim, &resultL ength, 2sortKeySize, status);

	5549 if(U_SUCCESS(*status)) {

	5550 *(primaries++) = UCOL_LEVELTERMINATOR;

	5551 *result = primStart;

	5552 uprv_memcpy(primaries, secStart, secsize);

	5553 }

	5554 else {

	5555 /* We ran out of memory!? We can't recover. */

	5556 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5557 goto cleanup;

	5558 }

	5559 } else {

	5560 *status = U_BUFFER_OVERFLOW_ERROR;

	5561 }

	5562 }

	5563

	5564 if (count3 > 0) {

	5565 if (coll->tertiaryCommon != UCOL_COMMON3_NORMAL) {

	5566 while (count3 >= coll->tertiaryTopCount) {

	5567 *tertiaries++ = (uint8_t)(tertiaryTop - coll->tertiaryTopCou nt);

	5568 count3 -= (uint32_t)coll->tertiaryTopCount;

	5569 }

	5570 *tertiaries++ = (uint8_t)(tertiaryTop - count3);

	5571 } else {

	5572 while (count3 > coll->tertiaryBottomCount) {

	5573 *tertiaries++ = (uint8_t)(tertiaryBottom + coll->tertiaryBot tomCount);

	5574 count3 -= (uint32_t)coll->tertiaryBottomCount;

	5575 }

	5576 *tertiaries++ = (uint8_t)(tertiaryBottom + (count3-1));

	5577 }

	5578 }

	5579 uint32_t tersize = (uint32_t)(tertiaries - terStart);

	5580 sortKeySize += tersize;

	5581 if(sortKeySize <= resultLength) {

	5582 *(primaries++) = UCOL_LEVELTERMINATOR;

	5583 uprv_memcpy(primaries, terStart, tersize);

	5584 primaries += tersize;

	5585 } else {

	5586 if(allocateSKBuffer == TRUE) {

	5587 primStart = reallocateBuffer(&primaries, result, prim, &resultL ength, 2sortKeySize, status);

	5588 if(U_SUCCESS(*status)) {

	5589 *result = primStart;

	5590 *(primaries++) = UCOL_LEVELTERMINATOR;

	5591 uprv_memcpy(primaries, terStart, tersize);

	5592 }

	5593 else {

	5594 /* We ran out of memory!? We can't recover. */

	5595 sortKeySize = DEFAULT_ERROR_SIZE_FOR_CALCSORTKEY;

	5596 goto cleanup;

	5597 }

	5598 } else {

	5599 *status = U_BUFFER_OVERFLOW_ERROR;

	5600 }

	5601 }

	5602

	5603 *(primaries++) = '\0';

	5604 }

	5605

	5606 if(allocateSKBuffer == TRUE) {

	5607 result = (uint8_t)uprv_malloc(sortKeySize);

	5608 /* test for NULL */

	5609 if (*result == NULL) {

	5610 *status = U_MEMORY_ALLOCATION_ERROR;

	5611 goto cleanup;

	5612 }

	5613 uprv_memcpy(*result, primStart, sortKeySize);

	5614 if(primStart != prim) {

	5615 uprv_free(primStart);

	5616 }

	5617 }

	5618

	5619 cleanup:

	5620 if (allocateSKBuffer == FALSE && resultLength > 0 && U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {

	5621 /* NULL terminate for safety */

	5622 **result = 0;

	5623 }

	5624 if(terStart != tert) {

	5625 uprv_free(terStart);

	5626 uprv_free(secStart);

	5627 }

	5628

	5629 /* To avoid memory leak, free the offset buffer if necessary. */

	5630 ucol_freeOffsetBuffer(&s);

	5631

	5632 return sortKeySize;

	5633 }

	5634

	5635 static inline

	5636 UBool isShiftedCE(uint32_t CE, uint32_t LVT, UBool *wasShifted) {

	5637 UBool notIsContinuation = !isContinuation(CE);

	5638 uint8_t primary1 = (uint8_t)((CE >> 24) & 0xFF);

	5639 if((LVT && ((notIsContinuation && (CE & 0xFFFF0000)<= LVT && primary1 > 0)

	5640 \|\| (!notIsContinuation && *wasShifted)))

	5641 \|\| (wasShifted && primary1 == 0)) / amendment to the UCA says that pri mary ignorables */

	5642 {

	5643 // The stuff below should probably be in the sortkey code... maybe not.. .

	5644 if(primary1 != 0) { /* if we were shifted and we got an ignorable code p oint */

	5645 /* we should just completely ignore it */

	5646 *wasShifted = TRUE;

	5647 //continue;

	5648 }

	5649 //*wasShifted = TRUE;

	5650 return TRUE;

	5651 } else {

	5652 *wasShifted = FALSE;

	5653 return FALSE;

	5654 }

	5655 }

	5656 static inline

	5657 void terminatePSKLevel(int32_t level, int32_t maxLevel, int32_t &i, uint8_t *des t) {

	5658 if(level < maxLevel) {

	5659 dest[i++] = UCOL_LEVELTERMINATOR;

	5660 } else {

	5661 dest[i++] = 0;

	5662 }

	5663 }

	5664

	5665 /** enumeration of level identifiers for partial sort key generation */

	5666 enum {

	5667 UCOL_PSK_PRIMARY = 0,

	5668 UCOL_PSK_SECONDARY = 1,

	5669 UCOL_PSK_CASE = 2,

	5670 UCOL_PSK_TERTIARY = 3,

	5671 UCOL_PSK_QUATERNARY = 4,

	5672 UCOL_PSK_QUIN = 5, /** This is an extra level, not used - but we have t hree bits to blow */

	5673 UCOL_PSK_IDENTICAL = 6,

	5674 UCOL_PSK_NULL = 7, /** level for the end of sort key. Will just produce zeros */

	5675 UCOL_PSK_LIMIT

	5676 };

	5677

	5678 /** collation state enum. *_SHIFT value is how much to shift right

	5679 * to get the state piece to the right. *_MASK value should be

	5680 * ANDed with the shifted state. This data is stored in state[1]

	5681 * field.

	5682 */

	5683 enum {

	5684 UCOL_PSK_LEVEL_SHIFT = 0, /** level identificator. stores an enum value from above */

	5685 UCOL_PSK_LEVEL_MASK = 7, /** three bits */

	5686 UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT = 3, /** number of bytes of primary or quaternary already written */

	5687 UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK = 1,

	5688 /** can be only 0 or 1, since we get up to two bytes from primary or quatern ary

	5689 * This field is also used to denote that the French secondary level is fin ished

	5690 */

	5691 UCOL_PSK_WAS_SHIFTED_SHIFT = 4,/** was the last value shifted */

	5692 UCOL_PSK_WAS_SHIFTED_MASK = 1, /** can be 0 or 1 (Boolean) */

	5693 UCOL_PSK_USED_FRENCH_SHIFT = 5,/** how many French bytes have we already wri tten */

	5694 UCOL_PSK_USED_FRENCH_MASK = 3, /** up to 4 bytes. See comment just below */

	5695 /** When we do French we need to reverse secondary values. However, continua tions

	5696 * need to stay the same. So if you had abc1c2c3de, you need to have edc1c2 c3ba

	5697 */

	5698 UCOL_PSK_BOCSU_BYTES_SHIFT = 7,

	5699 UCOL_PSK_BOCSU_BYTES_MASK = 3,

	5700 UCOL_PSK_CONSUMED_CES_SHIFT = 9,

	5701 UCOL_PSK_CONSUMED_CES_MASK = 0x7FFFF

	5702 };

	5703

	5704 // macro calculating the number of expansion CEs available

	5705 #define uprv_numAvailableExpCEs(s) (s).CEpos - (s).toReturn

	5706

	5707

	5708 /** main sortkey part procedure. On the first call,

	5709 * you should pass in a collator, an iterator, empty state

	5710 * state[0] == state[1] == 0, a buffer to hold results

	5711 * number of bytes you need and an error code pointer.

	5712 * Make sure your buffer is big enough to hold the wanted

	5713 * number of sortkey bytes. I don't check.

	5714 * The only meaningful status you can get back is

	5715 * U_BUFFER_OVERFLOW_ERROR, which basically means that you

	5716 * have been dealt a raw deal and that you probably won't

	5717 * be able to use partial sortkey generation for this

	5718 * particular combination of string and collator. This

	5719 * is highly unlikely, but you should still check the error code.

	5720 * Any other status means that you're not in a sane situation

	5721 * anymore. After the first call, preserve state values and

	5722 * use them on subsequent calls to obtain more bytes of a sortkey.

	5723 * Use until the number of bytes written is smaller than the requested

	5724 * number of bytes. Generated sortkey is not compatible with the

	5725 * one generated by ucol_getSortKey, as we don't do any compression.

	5726 * However, levels are still terminated by a 1 (one) and the sortkey

	5727 * is terminated by a 0 (zero). Identical level is the same as in the

	5728 * regular sortkey - internal bocu-1 implementation is used.

	5729 * For curious, although you cannot do much about this, here is

	5730 * the structure of state words.

	5731 * state[0] - iterator state. Depends on the iterator implementation,

	5732 * but allows the iterator to continue where it stopped in

	5733 * the last iteration.

	5734 * state[1] - collation processing state. Here is the distribution

	5735 * of the bits:

	5736 * 0, 1, 2 - level of the sortkey - primary, secondary, case, tertiary

	5737 * quaternary, quin (we don't use this one), identical and

	5738 * null (producing only zeroes - first one to terminate the

	5739 * sortkey and subsequent to fill the buffer).

	5740 * 3 - byte count. Number of bytes written on the primary level.

	5741 * 4 - was shifted. Whether the previous iteration finished in the

	5742 * shifted state.

	5743 * 5, 6 - French continuation bytes written. See the comment in the enum

	5744 * 7,8 - Bocsu bytes used. Number of bytes from a bocu sequence on

	5745 * the identical level.

	5746 * 9..31 - CEs consumed. Number of getCE or next32 operations performed

	5747 * since thes last successful update of the iterator state.

	5748 */

	5749 U_CAPI int32_t U_EXPORT2

	5750 ucol_nextSortKeyPart(const UCollator *coll,

	5751 UCharIterator *iter,

	5752 uint32_t state[2],

	5753 uint8_t *dest, int32_t count,

	5754 UErrorCode *status)

	5755 {

	5756 /* error checking */

	5757 if(status==NULL \|\| U_FAILURE(*status)) {

	5758 return 0;

	5759 }

	5760 UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);

	5761 if( coll==NULL \|\| iter==NULL \|\|

	5762 state==NULL \|\|

	5763 count<0 \|\| (count>0 && dest==NULL)

	5764 ) {

	5765 *status=U_ILLEGAL_ARGUMENT_ERROR;

	5766 UTRACE_EXIT_STATUS(status);

	5767 return 0;

	5768 }

	5769

	5770 UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count= %d",

	5771 coll, iter, state[0], state[1], dest, count);

	5772

	5773 if(count==0) {

	5774 /* nothing to do */

	5775 UTRACE_EXIT_VALUE(0);

	5776 return 0;

	5777 }

	5778 /** Setting up situation according to the state we got from the previous ite ration */

	5779 // The state of the iterator from the previous invocation

	5780 uint32_t iterState = state[0];

	5781 // Has the last iteration ended in the shifted state

	5782 UBool wasShifted = ((state[1] >> UCOL_PSK_WAS_SHIFTED_SHIFT) & UCOL_PSK_WAS_ SHIFTED_MASK)?TRUE:FALSE;

	5783 // What is the current level of the sortkey?

	5784 int32_t level= (state[1] >> UCOL_PSK_LEVEL_SHIFT) & UCOL_PSK_LEVEL_MASK;

	5785 // Have we written only one byte from a two byte primary in the previous ite ration?

	5786 // Also on secondary level - have we finished with the French secondary?

	5787 int32_t byteCountOrFrenchDone = (state[1] >> UCOL_PSK_BYTE_COUNT_OR_FRENCH_D ONE_SHIFT) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK;

	5788 // number of bytes in the continuation buffer for French

	5789 int32_t usedFrench = (state[1] >> UCOL_PSK_USED_FRENCH_SHIFT) & UCOL_PSK_USE D_FRENCH_MASK;

	5790 // Number of bytes already written from a bocsu sequence. Since

	5791 // the longes bocsu sequence is 4 long, this can be up to 3.

	5792 int32_t bocsuBytesUsed = (state[1] >> UCOL_PSK_BOCSU_BYTES_SHIFT) & UCOL_PSK _BOCSU_BYTES_MASK;

	5793 // Number of elements that need to be consumed in this iteration because

	5794 // the iterator returned UITER_NO_STATE at the end of the last iteration,

	5795 // so we had to save the last valid state.

	5796 int32_t cces = (state[1] >> UCOL_PSK_CONSUMED_CES_SHIFT) & UCOL_PSK_CONSUMED _CES_MASK;

	5797

	5798 /** values that depend on the collator attributes */

	5799 // strength of the collator.

	5800 int32_t strength = ucol_getAttribute(coll, UCOL_STRENGTH, status);

	5801 // maximal level of the partial sortkey. Need to take whether case level is done

	5802 int32_t maxLevel = 0;

	5803 if(strength < UCOL_TERTIARY) {

	5804 if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {

	5805 maxLevel = UCOL_PSK_CASE;

	5806 } else {

	5807 maxLevel = strength;

	5808 }

	5809 } else {

	5810 if(strength == UCOL_TERTIARY) {

	5811 maxLevel = UCOL_PSK_TERTIARY;

	5812 } else if(strength == UCOL_QUATERNARY) {

	5813 maxLevel = UCOL_PSK_QUATERNARY;

	5814 } else { // identical

	5815 maxLevel = UCOL_IDENTICAL;

	5816 }

	5817 }

	5818 // value for the quaternary level if Hiragana is encountered. Used for JIS X 4061 collation

	5819 uint8_t UCOL_HIRAGANA_QUAD =

	5820 (ucol_getAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, status) == UCOL_ON )?0xFE:0xFF;

	5821 // Boundary value that decides whether a CE is shifted or not

	5822 uint32_t LVT = (coll->alternateHandling == UCOL_SHIFTED)?(coll->variableTopV alue<<16):0;

	5823 // Are we doing French collation?

	5824 UBool doingFrench = (ucol_getAttribute(coll, UCOL_FRENCH_COLLATION, status) == UCOL_ON);

	5825

	5826 /** initializing the collation state */

	5827 UBool notIsContinuation = FALSE;

	5828 uint32_t CE = UCOL_NO_MORE_CES;

	5829

	5830 collIterate s;

	5831 IInit_collIterate(coll, NULL, -1, &s, status);

	5832 if(U_FAILURE(*status)) {

	5833 UTRACE_EXIT_STATUS(*status);

	5834 return 0;

	5835 }

	5836 s.iterator = iter;

	5837 s.flags \|= UCOL_USE_ITERATOR;

	5838 // This variable tells us whether we have produced some other levels in this iteration

	5839 // before we moved to the identical level. In that case, we need to switch t he

	5840 // type of the iterator.

	5841 UBool doingIdenticalFromStart = FALSE;

	5842 // Normalizing iterator

	5843 // The division for the array length may truncate the array size to

	5844 // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high

	5845 // for all platforms anyway.

	5846 UAlignedMemory stackNormIter[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];

	5847 UNormIterator *normIter = NULL;

	5848 // If the normalization is turned on for the collator and we are below ident ical level

	5849 // we will use a FCD normalizing iterator

	5850 if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON && le vel < UCOL_PSK_IDENTICAL) {

	5851 normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);

	5852 s.iterator = unorm_setIter(normIter, iter, UNORM_FCD, status);

	5853 s.flags &= ~UCOL_ITER_NORM;

	5854 if(U_FAILURE(*status)) {

	5855 UTRACE_EXIT_STATUS(*status);

	5856 return 0;

	5857 }

	5858 } else if(level == UCOL_PSK_IDENTICAL) {

	5859 // for identical level, we need a NFD iterator. We need to instantiate i t here, since we

	5860 // will be updating the state - and this cannot be done on an ordinary i terator.

	5861 normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);

	5862 s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);

	5863 s.flags &= ~UCOL_ITER_NORM;

	5864 if(U_FAILURE(*status)) {

	5865 UTRACE_EXIT_STATUS(*status);

	5866 return 0;

	5867 }

	5868 doingIdenticalFromStart = TRUE;

	5869 }

	5870

	5871 // This is the tentative new state of the iterator. The problem

	5872 // is that the iterator might return an undefined state, in

	5873 // which case we should save the last valid state and increase

	5874 // the iterator skip value.

	5875 uint32_t newState = 0;

	5876

	5877 // First, we set the iterator to the last valid position

	5878 // from the last iteration. This was saved in state[0].

	5879 if(iterState == 0) {

	5880 /* initial state */

	5881 if(level == UCOL_PSK_SECONDARY && doingFrench && !byteCountOrFrenchDone) {

	5882 s.iterator->move(s.iterator, 0, UITER_LIMIT);

	5883 } else {

	5884 s.iterator->move(s.iterator, 0, UITER_START);

	5885 }

	5886 } else {

	5887 /* reset to previous state */

	5888 s.iterator->setState(s.iterator, iterState, status);

	5889 if(U_FAILURE(*status)) {

	5890 UTRACE_EXIT_STATUS(*status);

	5891 return 0;

	5892 }

	5893 }

	5894

	5895

	5896

	5897 // This variable tells us whether we can attempt to update the state

	5898 // of iterator. Situations where we don't want to update iterator state

	5899 // are the existence of expansion CEs that are not yet processed, and

	5900 // finishing the case level without enough space in the buffer to insert

	5901 // a level terminator.

	5902 UBool canUpdateState = TRUE;

	5903

	5904 // Consume all the CEs that were consumed at the end of the previous

	5905 // iteration without updating the iterator state. On identical level,

	5906 // consume the code points.

	5907 int32_t counter = cces;

	5908 if(level < UCOL_PSK_IDENTICAL) {

	5909 while(counter-->0) {

	5910 // If we're doing French and we are on the secondary level,

	5911 // we go backwards.

	5912 if(level == UCOL_PSK_SECONDARY && doingFrench) {

	5913 CE = ucol_IGetPrevCE(coll, &s, status);

	5914 } else {

	5915 CE = ucol_IGetNextCE(coll, &s, status);

	5916 }

	5917 if(CE==UCOL_NO_MORE_CES) {

	5918 /* should not happen */

	5919 *status=U_INTERNAL_PROGRAM_ERROR;

	5920 UTRACE_EXIT_STATUS(*status);

	5921 return 0;

	5922 }

	5923 if(uprv_numAvailableExpCEs(s)) {

	5924 canUpdateState = FALSE;

	5925 }

	5926 }

	5927 } else {

	5928 while(counter-->0) {

	5929 uiter_next32(s.iterator);

	5930 }

	5931 }

	5932

	5933 // French secondary needs to know whether the iterator state of zero came fr om previous level OR

	5934 // from a new invocation...

	5935 UBool wasDoingPrimary = FALSE;

	5936 // destination buffer byte counter. When this guy

	5937 // gets to count, we're done with the iteration

	5938 int32_t i = 0;

	5939 // used to count the zero bytes written after we

	5940 // have finished with the sort key

	5941 int32_t j = 0;

	5942

	5943

	5944 // Hm.... I think we're ready to plunge in. Basic story is as following:

	5945 // we have a fall through case based on level. This is used for initial

	5946 // positioning on iteration start. Every level processor contains a

	5947 // for(;;) which will be broken when we exhaust all the CEs. Other

	5948 // way to exit is a goto saveState, which happens when we have filled

	5949 // out our buffer.

	5950 switch(level) {

	5951 case UCOL_PSK_PRIMARY:

	5952 wasDoingPrimary = TRUE;

	5953 for(;;) {

	5954 if(i==count) {

	5955 goto saveState;

	5956 }

	5957 // We should save the state only if we

	5958 // are sure that we are done with the

	5959 // previous iterator state

	5960 if(canUpdateState && byteCountOrFrenchDone == 0) {

	5961 newState = s.iterator->getState(s.iterator);

	5962 if(newState != UITER_NO_STATE) {

	5963 iterState = newState;

	5964 cces = 0;

	5965 }

	5966 }

	5967 CE = ucol_IGetNextCE(coll, &s, status);

	5968 cces++;

	5969 if(CE==UCOL_NO_MORE_CES) {

	5970 // Add the level separator

	5971 terminatePSKLevel(level, maxLevel, i, dest);

	5972 byteCountOrFrenchDone=0;

	5973 // Restart the iteration an move to the

	5974 // second level

	5975 s.iterator->move(s.iterator, 0, UITER_START);

	5976 cces = 0;

	5977 level = UCOL_PSK_SECONDARY;

	5978 break;

	5979 }

	5980 if(!isContinuation(CE)){

	5981 if(coll->leadBytePermutationTable != NULL){

	5982 CE = (coll->leadBytePermutationTable[CE>>24] << 24) \| (CE & 0x00FFFFFF);

	5983 }

	5984 }

	5985 if(!isShiftedCE(CE, LVT, &wasShifted)) {

	5986 CE >>= UCOL_PRIMARYORDERSHIFT; /* get primary */

	5987 if(CE != 0) {

	5988 if(byteCountOrFrenchDone == 0) {

	5989 // get the second byte of primary

	5990 dest[i++]=(uint8_t)(CE >> 8);

	5991 } else {

	5992 byteCountOrFrenchDone = 0;

	5993 }

	5994 if((CE &=0xff)!=0) {

	5995 if(i==count) {

	5996 /* overflow */

	5997 byteCountOrFrenchDone = 1;

	5998 cces--;

	5999 goto saveState;

	6000 }

	6001 dest[i++]=(uint8_t)CE;

	6002 }

	6003 }

	6004 }

	6005 if(uprv_numAvailableExpCEs(s)) {

	6006 canUpdateState = FALSE;

	6007 } else {

	6008 canUpdateState = TRUE;

	6009 }

	6010 }

	6011 /* fall through to next level */

	6012 case UCOL_PSK_SECONDARY:

	6013 if(strength >= UCOL_SECONDARY) {

	6014 if(!doingFrench) {

	6015 for(;;) {

	6016 if(i == count) {

	6017 goto saveState;

	6018 }

	6019 // We should save the state only if we

	6020 // are sure that we are done with the

	6021 // previous iterator state

	6022 if(canUpdateState) {

	6023 newState = s.iterator->getState(s.iterator);

	6024 if(newState != UITER_NO_STATE) {

	6025 iterState = newState;

	6026 cces = 0;

	6027 }

	6028 }

	6029 CE = ucol_IGetNextCE(coll, &s, status);

	6030 cces++;

	6031 if(CE==UCOL_NO_MORE_CES) {

	6032 // Add the level separator

	6033 terminatePSKLevel(level, maxLevel, i, dest);

	6034 byteCountOrFrenchDone = 0;

	6035 // Restart the iteration an move to the

	6036 // second level

	6037 s.iterator->move(s.iterator, 0, UITER_START);

	6038 cces = 0;

	6039 level = UCOL_PSK_CASE;

	6040 break;

	6041 }

	6042 if(!isShiftedCE(CE, LVT, &wasShifted)) {

	6043 CE >>= 8; /* get secondary */

	6044 if(CE != 0) {

	6045 dest[i++]=(uint8_t)CE;

	6046 }

	6047 }

	6048 if(uprv_numAvailableExpCEs(s)) {

	6049 canUpdateState = FALSE;

	6050 } else {

	6051 canUpdateState = TRUE;

	6052 }

	6053 }

	6054 } else { // French secondary processing

	6055 uint8_t frenchBuff[UCOL_MAX_BUFFER];

	6056 int32_t frenchIndex = 0;

	6057 // Here we are going backwards.

	6058 // If the iterator is at the beggining, it should be

	6059 // moved to end.

	6060 if(wasDoingPrimary) {

	6061 s.iterator->move(s.iterator, 0, UITER_LIMIT);

	6062 cces = 0;

	6063 }

	6064 for(;;) {

	6065 if(i == count) {

	6066 goto saveState;

	6067 }

	6068 if(canUpdateState) {

	6069 newState = s.iterator->getState(s.iterator);

	6070 if(newState != UITER_NO_STATE) {

	6071 iterState = newState;

	6072 cces = 0;

	6073 }

	6074 }

	6075 CE = ucol_IGetPrevCE(coll, &s, status);

	6076 cces++;

	6077 if(CE==UCOL_NO_MORE_CES) {

	6078 // Add the level separator

	6079 terminatePSKLevel(level, maxLevel, i, dest);

	6080 byteCountOrFrenchDone = 0;

	6081 // Restart the iteration an move to the next level

	6082 s.iterator->move(s.iterator, 0, UITER_START);

	6083 level = UCOL_PSK_CASE;

	6084 break;

	6085 }

	6086 if(isContinuation(CE)) { // if it's a continuation, we want to save it and

	6087 // reverse when we get a first non-continuation CE.

	6088 CE >>= 8;

	6089 frenchBuff[frenchIndex++] = (uint8_t)CE;

	6090 } else if(!isShiftedCE(CE, LVT, &wasShifted)) {

	6091 CE >>= 8; /* get secondary */

	6092 if(!frenchIndex) {

	6093 if(CE != 0) {

	6094 dest[i++]=(uint8_t)CE;

	6095 }

	6096 } else {

	6097 frenchBuff[frenchIndex++] = (uint8_t)CE;

	6098 frenchIndex -= usedFrench;

	6099 usedFrench = 0;

	6100 while(i < count && frenchIndex) {

	6101 dest[i++] = frenchBuff[--frenchIndex];

	6102 usedFrench++;

	6103 }

	6104 }

	6105 }

	6106 if(uprv_numAvailableExpCEs(s)) {

	6107 canUpdateState = FALSE;

	6108 } else {

	6109 canUpdateState = TRUE;

	6110 }

	6111 }

	6112 }

	6113 } else {

	6114 level = UCOL_PSK_CASE;

	6115 }

	6116 /* fall through to next level */

	6117 case UCOL_PSK_CASE:

	6118 if(ucol_getAttribute(coll, UCOL_CASE_LEVEL, status) == UCOL_ON) {

	6119 uint32_t caseShift = UCOL_CASE_SHIFT_START;

	6120 uint8_t caseByte = UCOL_CASE_BYTE_START;

	6121 uint8_t caseBits = 0;

	6122

	6123 for(;;) {

	6124 U_ASSERT(caseShift <= UCOL_CASE_SHIFT_START);

	6125 if(i == count) {

	6126 goto saveState;

	6127 }

	6128 // We should save the state only if we

	6129 // are sure that we are done with the

	6130 // previous iterator state

	6131 if(canUpdateState) {

	6132 newState = s.iterator->getState(s.iterator);

	6133 if(newState != UITER_NO_STATE) {

	6134 iterState = newState;

	6135 cces = 0;

	6136 }

	6137 }

	6138 CE = ucol_IGetNextCE(coll, &s, status);

	6139 cces++;

	6140 if(CE==UCOL_NO_MORE_CES) {

	6141 // On the case level we might have an unfinished

	6142 // case byte. Add one if it's started.

	6143 if(caseShift != UCOL_CASE_SHIFT_START) {

	6144 dest[i++] = caseByte;

	6145 }

	6146 cces = 0;

	6147 // We have finished processing CEs on this level.

	6148 // However, we don't know if we have enough space

	6149 // to add a case level terminator.

	6150 if(i < count) {

	6151 // Add the level separator

	6152 terminatePSKLevel(level, maxLevel, i, dest);

	6153 // Restart the iteration and move to the

	6154 // next level

	6155 s.iterator->move(s.iterator, 0, UITER_START);

	6156 level = UCOL_PSK_TERTIARY;

	6157 } else {

	6158 canUpdateState = FALSE;

	6159 }

	6160 break;

	6161 }

	6162

	6163 if(!isShiftedCE(CE, LVT, &wasShifted)) {

	6164 if(!isContinuation(CE) && ((CE & UCOL_PRIMARYMASK) != 0 \|\| s trength > UCOL_PRIMARY)) {

	6165 // do the case level if we need to do it. We don't want to calculate

	6166 // case level for primary ignorables if we have only pri mary strength and case level

	6167 // otherwise we would break well formedness of CEs

	6168 CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);

	6169 caseBits = (uint8_t)(CE & 0xC0);

	6170 // this copies the case level logic from the

	6171 // sort key generation code

	6172 if(CE != 0) {

	6173 if (caseShift == 0) {

	6174 dest[i++] = caseByte;

	6175 caseShift = UCOL_CASE_SHIFT_START;

	6176 caseByte = UCOL_CASE_BYTE_START;

	6177 }

	6178 if(coll->caseFirst == UCOL_UPPER_FIRST) {

	6179 if((caseBits & 0xC0) == 0) {

	6180 caseByte \|= 1 << (--caseShift);

	6181 } else {

	6182 caseByte \|= 0 << (--caseShift);

	6183 /* second bit */

	6184 if(caseShift == 0) {

	6185 dest[i++] = caseByte;

	6186 caseShift = UCOL_CASE_SHIFT_START;

	6187 caseByte = UCOL_CASE_BYTE_START;

	6188 }

	6189 caseByte \|= ((caseBits>>6)&1) << (--caseShif t);

	6190 }

	6191 } else {

	6192 if((caseBits & 0xC0) == 0) {

	6193 caseByte \|= 0 << (--caseShift);

	6194 } else {

	6195 caseByte \|= 1 << (--caseShift);

	6196 /* second bit */

	6197 if(caseShift == 0) {

	6198 dest[i++] = caseByte;

	6199 caseShift = UCOL_CASE_SHIFT_START;

	6200 caseByte = UCOL_CASE_BYTE_START;

	6201 }

	6202 caseByte \|= ((caseBits>>7)&1) << (--caseShif t);

	6203 }

	6204 }

	6205 }

	6206

	6207 }

	6208 }

	6209 // Not sure this is correct for the case level - revisit

	6210 if(uprv_numAvailableExpCEs(s)) {

	6211 canUpdateState = FALSE;

	6212 } else {

	6213 canUpdateState = TRUE;

	6214 }

	6215 }

	6216 } else {

	6217 level = UCOL_PSK_TERTIARY;

	6218 }

	6219 /* fall through to next level */

	6220 case UCOL_PSK_TERTIARY:

	6221 if(strength >= UCOL_TERTIARY) {

	6222 for(;;) {

	6223 if(i == count) {

	6224 goto saveState;

	6225 }

	6226 // We should save the state only if we

	6227 // are sure that we are done with the

	6228 // previous iterator state

	6229 if(canUpdateState) {

	6230 newState = s.iterator->getState(s.iterator);

	6231 if(newState != UITER_NO_STATE) {

	6232 iterState = newState;

	6233 cces = 0;

	6234 }

	6235 }

	6236 CE = ucol_IGetNextCE(coll, &s, status);

	6237 cces++;

	6238 if(CE==UCOL_NO_MORE_CES) {

	6239 // Add the level separator

	6240 terminatePSKLevel(level, maxLevel, i, dest);

	6241 byteCountOrFrenchDone = 0;

	6242 // Restart the iteration an move to the

	6243 // second level

	6244 s.iterator->move(s.iterator, 0, UITER_START);

	6245 cces = 0;

	6246 level = UCOL_PSK_QUATERNARY;

	6247 break;

	6248 }

	6249 if(!isShiftedCE(CE, LVT, &wasShifted)) {

	6250 notIsContinuation = !isContinuation(CE);

	6251

	6252 if(notIsContinuation) {

	6253 CE = (uint8_t)(CE & UCOL_BYTE_SIZE_MASK);

	6254 CE ^= coll->caseSwitch;

	6255 CE &= coll->tertiaryMask;

	6256 } else {

	6257 CE = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));

	6258 }

	6259

	6260 if(CE != 0) {

	6261 dest[i++]=(uint8_t)CE;

	6262 }

	6263 }

	6264 if(uprv_numAvailableExpCEs(s)) {

	6265 canUpdateState = FALSE;

	6266 } else {

	6267 canUpdateState = TRUE;

	6268 }

	6269 }

	6270 } else {

	6271 // if we're not doing tertiary

	6272 // skip to the end

	6273 level = UCOL_PSK_NULL;

	6274 }

	6275 /* fall through to next level */

	6276 case UCOL_PSK_QUATERNARY:

	6277 if(strength >= UCOL_QUATERNARY) {

	6278 for(;;) {

	6279 if(i == count) {

	6280 goto saveState;

	6281 }

	6282 // We should save the state only if we

	6283 // are sure that we are done with the

	6284 // previous iterator state

	6285 if(canUpdateState) {

	6286 newState = s.iterator->getState(s.iterator);

	6287 if(newState != UITER_NO_STATE) {

	6288 iterState = newState;

	6289 cces = 0;

	6290 }

	6291 }

	6292 CE = ucol_IGetNextCE(coll, &s, status);

	6293 cces++;

	6294 if(CE==UCOL_NO_MORE_CES) {

	6295 // Add the level separator

	6296 terminatePSKLevel(level, maxLevel, i, dest);

	6297 //dest[i++] = UCOL_LEVELTERMINATOR;

	6298 byteCountOrFrenchDone = 0;

	6299 // Restart the iteration an move to the

	6300 // second level

	6301 s.iterator->move(s.iterator, 0, UITER_START);

	6302 cces = 0;

	6303 level = UCOL_PSK_QUIN;

	6304 break;

	6305 }

	6306 if(CE==0)

	6307 continue;

	6308 if(isShiftedCE(CE, LVT, &wasShifted)) {

	6309 CE >>= 16; /* get primary */

	6310 if(CE != 0) {

	6311 if(byteCountOrFrenchDone == 0) {

	6312 dest[i++]=(uint8_t)(CE >> 8);

	6313 } else {

	6314 byteCountOrFrenchDone = 0;

	6315 }

	6316 if((CE &=0xff)!=0) {

	6317 if(i==count) {

	6318 /* overflow */

	6319 byteCountOrFrenchDone = 1;

	6320 goto saveState;

	6321 }

	6322 dest[i++]=(uint8_t)CE;

	6323 }

	6324 }

	6325 } else {

	6326 notIsContinuation = !isContinuation(CE);

	6327 if(notIsContinuation) {

	6328 if(s.flags & UCOL_WAS_HIRAGANA) { // This was Hiragana a nd we need to note it

	6329 dest[i++] = UCOL_HIRAGANA_QUAD;

	6330 } else {

	6331 dest[i++] = 0xFF;

	6332 }

	6333 }

	6334 }

	6335 if(uprv_numAvailableExpCEs(s)) {

	6336 canUpdateState = FALSE;

	6337 } else {

	6338 canUpdateState = TRUE;

	6339 }

	6340 }

	6341 } else {

	6342 // if we're not doing quaternary

	6343 // skip to the end

	6344 level = UCOL_PSK_NULL;

	6345 }

	6346 /* fall through to next level */

	6347 case UCOL_PSK_QUIN:

	6348 level = UCOL_PSK_IDENTICAL;

	6349 /* fall through to next level */

	6350 case UCOL_PSK_IDENTICAL:

	6351 if(strength >= UCOL_IDENTICAL) {

	6352 UChar32 first, second;

	6353 int32_t bocsuBytesWritten = 0;

	6354 // We always need to do identical on

	6355 // the NFD form of the string.

	6356 if(normIter == NULL) {

	6357 // we arrived from the level below and

	6358 // normalization was not turned on.

	6359 // therefore, we need to make a fresh NFD iterator

	6360 normIter = unorm_openIter(stackNormIter, sizeof(stackNormIter), status);

	6361 s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);

	6362 } else if(!doingIdenticalFromStart) {

	6363 // there is an iterator, but we did some other levels.

	6364 // therefore, we have a FCD iterator - need to make

	6365 // a NFD one.

	6366 // normIter being at the beginning does not guarantee

	6367 // that the underlying iterator is at the beginning

	6368 iter->move(iter, 0, UITER_START);

	6369 s.iterator = unorm_setIter(normIter, iter, UNORM_NFD, status);

	6370 }

	6371 // At this point we have a NFD iterator that is positioned

	6372 // in the right place

	6373 if(U_FAILURE(*status)) {

	6374 UTRACE_EXIT_STATUS(*status);

	6375 return 0;

	6376 }

	6377 first = uiter_previous32(s.iterator);

	6378 // maybe we're at the start of the string

	6379 if(first == U_SENTINEL) {

	6380 first = 0;

	6381 } else {

	6382 uiter_next32(s.iterator);

	6383 }

	6384

	6385 j = 0;

	6386 for(;;) {

	6387 if(i == count) {

	6388 if(j+1 < bocsuBytesWritten) {

	6389 bocsuBytesUsed = j+1;

	6390 }

	6391 goto saveState;

	6392 }

	6393

	6394 // On identical level, we will always save

	6395 // the state if we reach this point, since

	6396 // we don't depend on getNextCE for content

	6397 // all the content is in our buffer and we

	6398 // already either stored the full buffer OR

	6399 // otherwise we won't arrive here.

	6400 newState = s.iterator->getState(s.iterator);

	6401 if(newState != UITER_NO_STATE) {

	6402 iterState = newState;

	6403 cces = 0;

	6404 }

	6405

	6406 uint8_t buff[4];

	6407 second = uiter_next32(s.iterator);

	6408 cces++;

	6409

	6410 // end condition for identical level

	6411 if(second == U_SENTINEL) {

	6412 terminatePSKLevel(level, maxLevel, i, dest);

	6413 level = UCOL_PSK_NULL;

	6414 break;

	6415 }

	6416 bocsuBytesWritten = u_writeIdenticalLevelRunTwoChars(first, seco nd, buff);

	6417 first = second;

	6418

	6419 j = 0;

	6420 if(bocsuBytesUsed != 0) {

	6421 while(bocsuBytesUsed-->0) {

	6422 j++;

	6423 }

	6424 }

	6425

	6426 while(i < count && j < bocsuBytesWritten) {

	6427 dest[i++] = buff[j++];

	6428 }

	6429 }

	6430

	6431 } else {

	6432 level = UCOL_PSK_NULL;

	6433 }

	6434 /* fall through to next level */

	6435 case UCOL_PSK_NULL:

	6436 j = i;

	6437 while(j<count) {

	6438 dest[j++]=0;

	6439 }

	6440 break;

	6441 default:

	6442 *status = U_INTERNAL_PROGRAM_ERROR;

	6443 UTRACE_EXIT_STATUS(*status);

	6444 return 0;

	6445 }

	6446

	6447 saveState:

	6448 // Now we need to return stuff. First we want to see whether we have

	6449 // done everything for the current state of iterator.

	6450 if(byteCountOrFrenchDone

	6451 \|\| canUpdateState == FALSE

	6452 \|\| (newState = s.iterator->getState(s.iterator)) == UITER_NO_STATE)

	6453 {

	6454 // Any of above mean that the previous transaction

	6455 // wasn't finished and that we should store the

	6456 // previous iterator state.

	6457 state[0] = iterState;

	6458 } else {

	6459 // The transaction is complete. We will continue in the next iteration.

	6460 state[0] = s.iterator->getState(s.iterator);

	6461 cces = 0;

	6462 }

	6463 // Store the number of bocsu bytes written.

	6464 if((bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) != bocsuBytesUsed) {

	6465 *status = U_INDEX_OUTOFBOUNDS_ERROR;

	6466 }

	6467 state[1] = (bocsuBytesUsed & UCOL_PSK_BOCSU_BYTES_MASK) << UCOL_PSK_BOCSU_BY TES_SHIFT;

	6468

	6469 // Next we put in the level of comparison

	6470 state[1] \|= ((level & UCOL_PSK_LEVEL_MASK) << UCOL_PSK_LEVEL_SHIFT);

	6471

	6472 // If we are doing French, we need to store whether we have just finished th e French level

	6473 if(level == UCOL_PSK_SECONDARY && doingFrench) {

	6474 state[1] \|= (((state[0] == 0) & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);

	6475 } else {

	6476 state[1] \|= ((byteCountOrFrenchDone & UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE _MASK) << UCOL_PSK_BYTE_COUNT_OR_FRENCH_DONE_SHIFT);

	6477 }

	6478

	6479 // Was the latest CE shifted

	6480 if(wasShifted) {

	6481 state[1] \|= 1 << UCOL_PSK_WAS_SHIFTED_SHIFT;

	6482 }

	6483 // Check for cces overflow

	6484 if((cces & UCOL_PSK_CONSUMED_CES_MASK) != cces) {

	6485 *status = U_INDEX_OUTOFBOUNDS_ERROR;

	6486 }

	6487 // Store cces

	6488 state[1] \|= ((cces & UCOL_PSK_CONSUMED_CES_MASK) << UCOL_PSK_CONSUMED_CES_SH IFT);

	6489

	6490 // Check for French overflow

	6491 if((usedFrench & UCOL_PSK_USED_FRENCH_MASK) != usedFrench) {

	6492 *status = U_INDEX_OUTOFBOUNDS_ERROR;

	6493 }

	6494 // Store number of bytes written in the French secondary continuation sequen ce

	6495 state[1] \|= ((usedFrench & UCOL_PSK_USED_FRENCH_MASK) << UCOL_PSK_USED_FRENC H_SHIFT);

	6496

	6497

	6498 // If we have used normalizing iterator, get rid of it

	6499 if(normIter != NULL) {

	6500 unorm_closeIter(normIter);

	6501 }

	6502

	6503 /* To avoid memory leak, free the offset buffer if necessary. */

	6504 ucol_freeOffsetBuffer(&s);

	6505

	6506 // Return number of meaningful sortkey bytes.

	6507 UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",

	6508 dest,i, state[0], state[1]);

	6509 UTRACE_EXIT_VALUE(i);

	6510 return i;

	6511 }

	6512

	6513 /**

	6514 * Produce a bound for a given sortkey and a number of levels.

	6515 */

	6516 U_CAPI int32_t U_EXPORT2

	6517 ucol_getBound(const uint8_t *source,

	6518 int32_t sourceLength,

	6519 UColBoundMode boundType,

	6520 uint32_t noOfLevels,

	6521 uint8_t *result,

	6522 int32_t resultLength,

	6523 UErrorCode *status)

	6524 {

	6525 // consistency checks

	6526 if(status == NULL \|\| U_FAILURE(*status)) {

	6527 return 0;

	6528 }

	6529 if(source == NULL) {

	6530 *status = U_ILLEGAL_ARGUMENT_ERROR;

	6531 return 0;

	6532 }

	6533

	6534 int32_t sourceIndex = 0;

	6535 // Scan the string until we skip enough of the key OR reach the end of the k ey

	6536 do {

	6537 sourceIndex++;

	6538 if(source[sourceIndex] == UCOL_LEVELTERMINATOR) {

	6539 noOfLevels--;

	6540 }

	6541 } while (noOfLevels > 0

	6542 && (source[sourceIndex] != 0 \|\| sourceIndex < sourceLength));

	6543

	6544 if((source[sourceIndex] == 0 \|\| sourceIndex == sourceLength)

	6545 && noOfLevels > 0) {

	6546 *status = U_SORT_KEY_TOO_SHORT_WARNING;

	6547 }

	6548

	6549

	6550 // READ ME: this code assumes that the values for boundType

	6551 // enum will not changes. They are set so that the enum value

	6552 // corresponds to the number of extra bytes each bound type

	6553 // needs.

	6554 if(result != NULL && resultLength >= sourceIndex+boundType) {

	6555 uprv_memcpy(result, source, sourceIndex);

	6556 switch(boundType) {

	6557 // Lower bound just gets terminated. No extra bytes

	6558 case UCOL_BOUND_LOWER: // = 0

	6559 break;

	6560 // Upper bound needs one extra byte

	6561 case UCOL_BOUND_UPPER: // = 1

	6562 result[sourceIndex++] = 2;

	6563 break;

	6564 // Upper long bound needs two extra bytes

	6565 case UCOL_BOUND_UPPER_LONG: // = 2

	6566 result[sourceIndex++] = 0xFF;

	6567 result[sourceIndex++] = 0xFF;

	6568 break;

	6569 default:

	6570 *status = U_ILLEGAL_ARGUMENT_ERROR;

	6571 return 0;

	6572 }

	6573 result[sourceIndex++] = 0;

	6574

	6575 return sourceIndex;

	6576 } else {

	6577 return sourceIndex+boundType+1;

	6578 }

	6579 }

	6580

	6581 /****************************************************************************/

	6582 /* Following are the functions that deal with the properties of a collator */

	6583 /* there are new APIs and some compatibility APIs */

	6584 /****************************************************************************/

	6585

	6586 static inline void

	6587 ucol_addLatinOneEntry(UCollator *coll, UChar ch, uint32_t CE,

	6588 int32_t primShift, int32_t secShift, int32_t *terShift)

	6589 {

	6590 uint8_t primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0;

	6591 UBool reverseSecondary = FALSE;

	6592 UBool continuation = isContinuation(CE);

	6593 if(!continuation) {

	6594 tertiary = (uint8_t)((CE & coll->tertiaryMask));

	6595 tertiary ^= coll->caseSwitch;

	6596 reverseSecondary = TRUE;

	6597 } else {

	6598 tertiary = (uint8_t)((CE & UCOL_REMOVE_CONTINUATION));

	6599 tertiary &= UCOL_REMOVE_CASE;

	6600 reverseSecondary = FALSE;

	6601 }

	6602

	6603 secondary = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);

	6604 primary2 = (uint8_t)((CE >>= 8) & UCOL_BYTE_SIZE_MASK);

	6605 primary1 = (uint8_t)(CE >> 8);

	6606

	6607 if(primary1 != 0) {

	6608 if (coll->leadBytePermutationTable != NULL && !continuation) {

	6609 primary1 = coll->leadBytePermutationTable[primary1];

	6610 }

	6611

	6612 coll->latinOneCEs[ch] \|= (primary1 << *primShift);

	6613 *primShift -= 8;

	6614 }

	6615 if(primary2 != 0) {

	6616 if(*primShift < 0) {

	6617 coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;

	6618 coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;

	6619 coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BAIL_OUT_CE;

	6620 return;

	6621 }

	6622 coll->latinOneCEs[ch] \|= (primary2 << *primShift);

	6623 *primShift -= 8;

	6624 }

	6625 if(secondary != 0) {

	6626 if(reverseSecondary && coll->frenchCollation == UCOL_ON) { // reverse se condary

	6627 coll->latinOneCEs[coll->latinOneTableLen+ch] >>= 8; // make space fo r secondary

	6628 coll->latinOneCEs[coll->latinOneTableLen+ch] \|= (secondary << 24);

	6629 } else { // normal case

	6630 coll->latinOneCEs[coll->latinOneTableLen+ch] \|= (secondary << *secSh ift);

	6631 }

	6632 *secShift -= 8;

	6633 }

	6634 if(tertiary != 0) {

	6635 coll->latinOneCEs[2coll->latinOneTableLen+ch] \|= (tertiary << terShift );

	6636 *terShift -= 8;

	6637 }

	6638 }

	6639

	6640 static inline UBool

	6641 ucol_resizeLatinOneTable(UCollator coll, int32_t size, UErrorCode status) {

	6642 uint32_t newTable = (uint32_t )uprv_malloc(sizesizeof(uint32_t)3);

	6643 if(newTable == NULL) {

	6644 *status = U_MEMORY_ALLOCATION_ERROR;

	6645 coll->latinOneFailed = TRUE;

	6646 return FALSE;

	6647 }

	6648 int32_t sizeToCopy = ((size<coll->latinOneTableLen)?size:coll->latinOneTable Len)*sizeof(uint32_t);

	6649 uprv_memset(newTable, 0, sizesizeof(uint32_t)3);

	6650 uprv_memcpy(newTable, coll->latinOneCEs, sizeToCopy);

	6651 uprv_memcpy(newTable+size, coll->latinOneCEs+coll->latinOneTableLen, sizeToC opy);

	6652 uprv_memcpy(newTable+2size, coll->latinOneCEs+2coll->latinOneTableLen, siz eToCopy);

	6653 coll->latinOneTableLen = size;

	6654 uprv_free(coll->latinOneCEs);

	6655 coll->latinOneCEs = newTable;

	6656 return TRUE;

	6657 }

	6658

	6659 static UBool

	6660 ucol_setUpLatinOne(UCollator coll, UErrorCode status) {

	6661 UBool result = TRUE;

	6662 if(coll->latinOneCEs == NULL) {

	6663 coll->latinOneCEs = (uint32_t )uprv_malloc(sizeof(uint32_t)UCOL_LATINO NETABLELEN*3);

	6664 if(coll->latinOneCEs == NULL) {

	6665 *status = U_MEMORY_ALLOCATION_ERROR;

	6666 return FALSE;

	6667 }

	6668 coll->latinOneTableLen = UCOL_LATINONETABLELEN;

	6669 }

	6670 UChar ch = 0;

	6671 UCollationElements *it = ucol_openElements(coll, &ch, 1, status);

	6672 // Check for null pointer

	6673 if (U_FAILURE(*status)) {

	6674 return FALSE;

	6675 }

	6676 uprv_memset(coll->latinOneCEs, 0, sizeof(uint32_t)coll->latinOneTableLen3) ;

	6677

	6678 int32_t primShift = 24, secShift = 24, terShift = 24;

	6679 uint32_t CE = 0;

	6680 int32_t contractionOffset = UCOL_ENDOFLATINONERANGE+1;

	6681

	6682 // TODO: make safe if you get more than you wanted...

	6683 for(ch = 0; ch <= UCOL_ENDOFLATINONERANGE; ch++) {

	6684 primShift = 24; secShift = 24; terShift = 24;

	6685 if(ch < 0x100) {

	6686 CE = coll->latinOneMapping[ch];

	6687 } else {

	6688 CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, ch);

	6689 if(CE == UCOL_NOT_FOUND && coll->UCA) {

	6690 CE = UTRIE_GET32_FROM_LEAD(&coll->UCA->mapping, ch);

	6691 }

	6692 }

	6693 if(CE < UCOL_NOT_FOUND) {

	6694 ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, &terShift );

	6695 } else {

	6696 switch (getCETag(CE)) {

	6697 case EXPANSION_TAG:

	6698 case DIGIT_TAG:

	6699 ucol_setText(it, &ch, 1, status);

	6700 while((int32_t)(CE = ucol_next(it, status)) != UCOL_NULLORDER) {

	6701 if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {

	6702 coll->latinOneCEs[ch] = UCOL_BAIL_OUT_CE;

	6703 coll->latinOneCEs[coll->latinOneTableLen+ch] = UCOL_BAIL _OUT_CE;

	6704 coll->latinOneCEs[2*coll->latinOneTableLen+ch] = UCOL_BA IL_OUT_CE;

	6705 break;

	6706 }

	6707 ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShift, & terShift);

	6708 }

	6709 break;

	6710 case CONTRACTION_TAG:

	6711 // here is the trick

	6712 // F2 is contraction. We do something very similar to contractio ns

	6713 // but have two indices, one in the real contraction table and t he

	6714 // other to where we stuffed things. This hopes that we don't ha ve

	6715 // many contractions (this should work for latin-1 tables).

	6716 {

	6717 if((CE & 0x00FFF000) != 0) {

	6718 *status = U_UNSUPPORTED_ERROR;

	6719 goto cleanup_after_failure;

	6720 }

	6721

	6722 const UChar UCharOffset = (UChar )coll->image+getContractO ffset(CE);

	6723

	6724 CE \|= (contractionOffset & 0xFFF) << 12; // insert the offse t in latin-1 table

	6725

	6726 coll->latinOneCEs[ch] = CE;

	6727 coll->latinOneCEs[coll->latinOneTableLen+ch] = CE;

	6728 coll->latinOneCEs[2*coll->latinOneTableLen+ch] = CE;

	6729

	6730 // We're going to jump into contraction table, pick the elem ents

	6731 // and use them

	6732 do {

	6733 CE = *(coll->contractionCEs +

	6734 (UCharOffset - coll->contractionIndex));

	6735 if(CE > UCOL_NOT_FOUND && getCETag(CE) == EXPANSION_TAG) {

	6736 uint32_t size;

	6737 uint32_t i; /* general counter */

	6738 uint32_t CEOffset = (uint32_t )coll->image+getExpa nsionOffset(CE); /* find the offset to expansion table */

	6739 size = getExpansionCount(CE);

	6740 //CE = *CEOffset++;

	6741 if(size != 0) { /* if there are less than 16 element s in expansion, we don't terminate */

	6742 for(i = 0; i<size; i++) {

	6743 if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {

	6744 coll->latinOneCEs[(UChar)contractionOffs et] = UCOL_BAIL_OUT_CE;

	6745 coll->latinOneCEs[coll->latinOneTableLen +(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;

	6746 coll->latinOneCEs[2*coll->latinOneTableL en+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;

	6747 break;

	6748 }

	6749 ucol_addLatinOneEntry(coll, (UChar)contracti onOffset, *CEOffset++, &primShift, &secShift, &terShift);

	6750 }

	6751 } else { /* else, we do */

	6752 while(*CEOffset != 0) {

	6753 if(primShift < 0 \|\| secShift < 0 \|\| terShift < 0) {

	6754 coll->latinOneCEs[(UChar)contractionOffs et] = UCOL_BAIL_OUT_CE;

	6755 coll->latinOneCEs[coll->latinOneTableLen +(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;

	6756 coll->latinOneCEs[2*coll->latinOneTableL en+(UChar)contractionOffset] = UCOL_BAIL_OUT_CE;

	6757 break;

	6758 }

	6759 ucol_addLatinOneEntry(coll, (UChar)contracti onOffset, *CEOffset++, &primShift, &secShift, &terShift);

	6760 }

	6761 }

	6762 contractionOffset++;

	6763 } else if(CE < UCOL_NOT_FOUND) {

	6764 ucol_addLatinOneEntry(coll, (UChar)contractionOffset ++, CE, &primShift, &secShift, &terShift);

	6765 } else {

	6766 coll->latinOneCEs[(UChar)contractionOffset] = UCOL_B AIL_OUT_CE;

	6767 coll->latinOneCEs[coll->latinOneTableLen+(UChar)cont ractionOffset] = UCOL_BAIL_OUT_CE;

	6768 coll->latinOneCEs[2*coll->latinOneTableLen+(UChar)co ntractionOffset] = UCOL_BAIL_OUT_CE;

	6769 contractionOffset++;

	6770 }

	6771 UCharOffset++;

	6772 primShift = 24; secShift = 24; terShift = 24;

	6773 if(contractionOffset == coll->latinOneTableLen) { // we need to reallocate

	6774 if(!ucol_resizeLatinOneTable(coll, 2*coll->latinOneT ableLen, status)) {

	6775 goto cleanup_after_failure;

	6776 }

	6777 }

	6778 } while(*UCharOffset != 0xFFFF);

	6779 }

	6780 break;;

	6781 case SPEC_PROC_TAG:

	6782 {

	6783 // 0xB7 is a precontext character defined in UCA5.1, a speci al

	6784 // handle is implemeted in order to save LatinOne table for

	6785 // most locales.

	6786 if (ch==0xb7) {

	6787 ucol_addLatinOneEntry(coll, ch, CE, &primShift, &secShif t, &terShift);

	6788 }

	6789 else {

	6790 goto cleanup_after_failure;

	6791 }

	6792 }

	6793 break;

	6794 default:

	6795 goto cleanup_after_failure;

	6796 }

	6797 }

	6798 }

	6799 // compact table

	6800 if(contractionOffset < coll->latinOneTableLen) {

	6801 if(!ucol_resizeLatinOneTable(coll, contractionOffset, status)) {

	6802 goto cleanup_after_failure;

	6803 }

	6804 }

	6805 ucol_closeElements(it);

	6806 return result;

	6807

	6808 cleanup_after_failure:

	6809 // status should already be set before arriving here.

	6810 coll->latinOneFailed = TRUE;

	6811 ucol_closeElements(it);

	6812 return FALSE;

	6813 }

	6814

	6815 void ucol_updateInternalState(UCollator coll, UErrorCode status) {

	6816 if(U_SUCCESS(*status)) {

	6817 if(coll->caseFirst == UCOL_UPPER_FIRST) {

	6818 coll->caseSwitch = UCOL_CASE_SWITCH;

	6819 } else {

	6820 coll->caseSwitch = UCOL_NO_CASE_SWITCH;

	6821 }

	6822

	6823 if(coll->caseLevel == UCOL_ON \|\| coll->caseFirst == UCOL_OFF) {

	6824 coll->tertiaryMask = UCOL_REMOVE_CASE;

	6825 coll->tertiaryCommon = UCOL_COMMON3_NORMAL;

	6826 coll->tertiaryAddition = (int8_t)UCOL_FLAG_BIT_MASK_CASE_SW_OFF; /* Should be 0x80 */

	6827 coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_OFF;

	6828 coll->tertiaryBottom = UCOL_COMMON_BOT3;

	6829 } else {

	6830 coll->tertiaryMask = UCOL_KEEP_CASE;

	6831 coll->tertiaryAddition = UCOL_FLAG_BIT_MASK_CASE_SW_ON;

	6832 if(coll->caseFirst == UCOL_UPPER_FIRST) {

	6833 coll->tertiaryCommon = UCOL_COMMON3_UPPERFIRST;

	6834 coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_UPPER;

	6835 coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_UPPER;

	6836 } else {

	6837 coll->tertiaryCommon = UCOL_COMMON3_NORMAL;

	6838 coll->tertiaryTop = UCOL_COMMON_TOP3_CASE_SW_LOWER;

	6839 coll->tertiaryBottom = UCOL_COMMON_BOTTOM3_CASE_SW_LOWER;

	6840 }

	6841 }

	6842

	6843 /* Set the compression values */

	6844 uint8_t tertiaryTotal = (uint8_t)(coll->tertiaryTop - UCOL_COMMON_BOT3-1 );

	6845 coll->tertiaryTopCount = (uint8_t)(UCOL_PROPORTION3tertiaryTotal); / w e multilply double with int, but need only int */

	6846 coll->tertiaryBottomCount = (uint8_t)(tertiaryTotal - coll->tertiaryTopC ount);

	6847

	6848 if(coll->caseLevel == UCOL_OFF && coll->strength == UCOL_TERTIARY

	6849 && coll->frenchCollation == UCOL_OFF && coll->alternateHandling == U COL_NON_IGNORABLE)

	6850 {

	6851 coll->sortKeyGen = ucol_calcSortKeySimpleTertiary;

	6852 } else {

	6853 coll->sortKeyGen = ucol_calcSortKey;

	6854 }

	6855 if(coll->caseLevel == UCOL_OFF && coll->strength <= UCOL_TERTIARY && col l->numericCollation == UCOL_OFF

	6856 && coll->alternateHandling == UCOL_NON_IGNORABLE && !coll->latinOneF ailed)

	6857 {

	6858 if(coll->latinOneCEs == NULL \|\| coll->latinOneRegenTable) {

	6859 if(ucol_setUpLatinOne(coll, status)) { // if we succeed in build ing latin1 table, we'll use it

	6860 //fprintf(stderr, "F");

	6861 coll->latinOneUse = TRUE;

	6862 } else {

	6863 coll->latinOneUse = FALSE;

	6864 }

	6865 if(*status == U_UNSUPPORTED_ERROR) {

	6866 *status = U_ZERO_ERROR;

	6867 }

	6868 } else { // latin1Table exists and it doesn't need to be regenerated , just use it

	6869 coll->latinOneUse = TRUE;

	6870 }

	6871 } else {

	6872 coll->latinOneUse = FALSE;

	6873 }

	6874 }

	6875 }

	6876

	6877 U_CAPI uint32_t U_EXPORT2

	6878 ucol_setVariableTop(UCollator coll, const UChar varTop, int32_t len, UErrorCod e *status) {

	6879 if(U_FAILURE(*status) \|\| coll == NULL) {

	6880 return 0;

	6881 }

	6882 if(len == -1) {

	6883 len = u_strlen(varTop);

	6884 }

	6885 if(len == 0) {

	6886 *status = U_ILLEGAL_ARGUMENT_ERROR;

	6887 return 0;

	6888 }

	6889

	6890 collIterate s;

	6891 IInit_collIterate(coll, varTop, len, &s, status);

	6892 if(U_FAILURE(*status)) {

	6893 return 0;

	6894 }

	6895

	6896 uint32_t CE = ucol_IGetNextCE(coll, &s, status);

	6897

	6898 /* here we check if we have consumed all characters */

	6899 /* you can put in either one character or a contraction */

	6900 /* you shouldn't put more... */

	6901 if(s.pos != s.endp \|\| CE == UCOL_NO_MORE_CES) {

	6902 *status = U_CE_NOT_FOUND_ERROR;

	6903 return 0;

	6904 }

	6905

	6906 uint32_t nextCE = ucol_IGetNextCE(coll, &s, status);

	6907

	6908 if(isContinuation(nextCE) && (nextCE & UCOL_PRIMARYMASK) != 0) {

	6909 *status = U_PRIMARY_TOO_LONG_ERROR;

	6910 return 0;

	6911 }

	6912 if(coll->variableTopValue != (CE & UCOL_PRIMARYMASK)>>16) {

	6913 coll->variableTopValueisDefault = FALSE;

	6914 coll->variableTopValue = (CE & UCOL_PRIMARYMASK)>>16;

	6915 }

	6916

	6917 /* To avoid memory leak, free the offset buffer if necessary. */

	6918 ucol_freeOffsetBuffer(&s);

	6919

	6920 return CE & UCOL_PRIMARYMASK;

	6921 }

	6922

	6923 U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator coll, UErrorCode status) {

	6924 if(U_FAILURE(*status) \|\| coll == NULL) {

	6925 return 0;

	6926 }

	6927 return coll->variableTopValue<<16;

	6928 }

	6929

	6930 U_CAPI void U_EXPORT2

	6931 ucol_restoreVariableTop(UCollator coll, const uint32_t varTop, UErrorCode stat us) {

	6932 if(U_FAILURE(*status) \|\| coll == NULL) {

	6933 return;

	6934 }

	6935

	6936 if(coll->variableTopValue != (varTop & UCOL_PRIMARYMASK)>>16) {

	6937 coll->variableTopValueisDefault = FALSE;

	6938 coll->variableTopValue = (varTop & UCOL_PRIMARYMASK)>>16;

	6939 }

	6940 }

	6941 /* Attribute setter API */

	6942 U_CAPI void U_EXPORT2

	6943 ucol_setAttribute(UCollator coll, UColAttribute attr, UColAttributeValue value, UErrorCode status) {

	6944 if(U_FAILURE(*status) \|\| coll == NULL) {

	6945 return;

	6946 }

	6947 UColAttributeValue oldFrench = coll->frenchCollation;

	6948 UColAttributeValue oldCaseFirst = coll->caseFirst;

	6949 switch(attr) {

	6950 case UCOL_NUMERIC_COLLATION: /* sort substrings of digits as numbers */

	6951 if(value == UCOL_ON) {

	6952 coll->numericCollation = UCOL_ON;

	6953 coll->numericCollationisDefault = FALSE;

	6954 } else if (value == UCOL_OFF) {

	6955 coll->numericCollation = UCOL_OFF;

	6956 coll->numericCollationisDefault = FALSE;

	6957 } else if (value == UCOL_DEFAULT) {

	6958 coll->numericCollationisDefault = TRUE;

	6959 coll->numericCollation = (UColAttributeValue)coll->options->numericC ollation;

	6960 } else {

	6961 *status = U_ILLEGAL_ARGUMENT_ERROR;

	6962 }

	6963 break;

	6964 case UCOL_HIRAGANA_QUATERNARY_MODE: /* special quaternary values for Hiragan a */

	6965 if(value == UCOL_ON) {

	6966 coll->hiraganaQ = UCOL_ON;

	6967 coll->hiraganaQisDefault = FALSE;

	6968 } else if (value == UCOL_OFF) {

	6969 coll->hiraganaQ = UCOL_OFF;

	6970 coll->hiraganaQisDefault = FALSE;

	6971 } else if (value == UCOL_DEFAULT) {

	6972 coll->hiraganaQisDefault = TRUE;

	6973 coll->hiraganaQ = (UColAttributeValue)coll->options->hiraganaQ;

	6974 } else {

	6975 *status = U_ILLEGAL_ARGUMENT_ERROR;

	6976 }

	6977 break;

	6978 case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights* /

	6979 if(value == UCOL_ON) {

	6980 coll->frenchCollation = UCOL_ON;

	6981 coll->frenchCollationisDefault = FALSE;

	6982 } else if (value == UCOL_OFF) {

	6983 coll->frenchCollation = UCOL_OFF;

	6984 coll->frenchCollationisDefault = FALSE;

	6985 } else if (value == UCOL_DEFAULT) {

	6986 coll->frenchCollationisDefault = TRUE;

	6987 coll->frenchCollation = (UColAttributeValue)coll->options->frenchCol lation;

	6988 } else {

	6989 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	6990 }

	6991 break;

	6992 case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/

	6993 if(value == UCOL_SHIFTED) {

	6994 coll->alternateHandling = UCOL_SHIFTED;

	6995 coll->alternateHandlingisDefault = FALSE;

	6996 } else if (value == UCOL_NON_IGNORABLE) {

	6997 coll->alternateHandling = UCOL_NON_IGNORABLE;

	6998 coll->alternateHandlingisDefault = FALSE;

	6999 } else if (value == UCOL_DEFAULT) {

	7000 coll->alternateHandlingisDefault = TRUE;

	7001 coll->alternateHandling = (UColAttributeValue)coll->options->alterna teHandling ;

	7002 } else {

	7003 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	7004 }

	7005 break;

	7006 case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */

	7007 if(value == UCOL_LOWER_FIRST) {

	7008 coll->caseFirst = UCOL_LOWER_FIRST;

	7009 coll->caseFirstisDefault = FALSE;

	7010 } else if (value == UCOL_UPPER_FIRST) {

	7011 coll->caseFirst = UCOL_UPPER_FIRST;

	7012 coll->caseFirstisDefault = FALSE;

	7013 } else if (value == UCOL_OFF) {

	7014 coll->caseFirst = UCOL_OFF;

	7015 coll->caseFirstisDefault = FALSE;

	7016 } else if (value == UCOL_DEFAULT) {

	7017 coll->caseFirst = (UColAttributeValue)coll->options->caseFirst;

	7018 coll->caseFirstisDefault = TRUE;

	7019 } else {

	7020 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	7021 }

	7022 break;

	7023 case UCOL_CASE_LEVEL: /* do we have an extra case level */

	7024 if(value == UCOL_ON) {

	7025 coll->caseLevel = UCOL_ON;

	7026 coll->caseLevelisDefault = FALSE;

	7027 } else if (value == UCOL_OFF) {

	7028 coll->caseLevel = UCOL_OFF;

	7029 coll->caseLevelisDefault = FALSE;

	7030 } else if (value == UCOL_DEFAULT) {

	7031 coll->caseLevel = (UColAttributeValue)coll->options->caseLevel;

	7032 coll->caseLevelisDefault = TRUE;

	7033 } else {

	7034 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	7035 }

	7036 break;

	7037 case UCOL_NORMALIZATION_MODE: /* attribute for normalization */

	7038 if(value == UCOL_ON) {

	7039 coll->normalizationMode = UCOL_ON;

	7040 coll->normalizationModeisDefault = FALSE;

	7041 initializeFCD(status);

	7042 } else if (value == UCOL_OFF) {

	7043 coll->normalizationMode = UCOL_OFF;

	7044 coll->normalizationModeisDefault = FALSE;

	7045 } else if (value == UCOL_DEFAULT) {

	7046 coll->normalizationModeisDefault = TRUE;

	7047 coll->normalizationMode = (UColAttributeValue)coll->options->normali zationMode;

	7048 if(coll->normalizationMode == UCOL_ON) {

	7049 initializeFCD(status);

	7050 }

	7051 } else {

	7052 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	7053 }

	7054 break;

	7055 case UCOL_STRENGTH: /* attribute for strength */

	7056 if (value == UCOL_DEFAULT) {

	7057 coll->strengthisDefault = TRUE;

	7058 coll->strength = (UColAttributeValue)coll->options->strength;

	7059 } else if (value <= UCOL_IDENTICAL) {

	7060 coll->strengthisDefault = FALSE;

	7061 coll->strength = value;

	7062 } else {

	7063 *status = U_ILLEGAL_ARGUMENT_ERROR ;

	7064 }

	7065 break;

	7066 case UCOL_ATTRIBUTE_COUNT:

	7067 default:

	7068 *status = U_ILLEGAL_ARGUMENT_ERROR;

	7069 break;

	7070 }

	7071 if(oldFrench != coll->frenchCollation \|\| oldCaseFirst != coll->caseFirst) {

	7072 coll->latinOneRegenTable = TRUE;

	7073 } else {

	7074 coll->latinOneRegenTable = FALSE;

	7075 }

	7076 ucol_updateInternalState(coll, status);

	7077 }

	7078

	7079 U_CAPI UColAttributeValue U_EXPORT2

	7080 ucol_getAttribute(const UCollator coll, UColAttribute attr, UErrorCode status) {

	7081 if(U_FAILURE(*status) \|\| coll == NULL) {

	7082 return UCOL_DEFAULT;

	7083 }

	7084 switch(attr) {

	7085 case UCOL_NUMERIC_COLLATION:

	7086 return coll->numericCollation;

	7087 case UCOL_HIRAGANA_QUATERNARY_MODE:

	7088 return coll->hiraganaQ;

	7089 case UCOL_FRENCH_COLLATION: /* attribute for direction of secondary weights* /

	7090 return coll->frenchCollation;

	7091 case UCOL_ALTERNATE_HANDLING: /* attribute for handling variable elements*/

	7092 return coll->alternateHandling;

	7093 case UCOL_CASE_FIRST: /* who goes first, lower case or uppercase */

	7094 return coll->caseFirst;

	7095 case UCOL_CASE_LEVEL: /* do we have an extra case level */

	7096 return coll->caseLevel;

	7097 case UCOL_NORMALIZATION_MODE: /* attribute for normalization */

	7098 return coll->normalizationMode;

	7099 case UCOL_STRENGTH: /* attribute for strength */

	7100 return coll->strength;

	7101 case UCOL_ATTRIBUTE_COUNT:

	7102 default:

	7103 *status = U_ILLEGAL_ARGUMENT_ERROR;

	7104 break;

	7105 }

	7106 return UCOL_DEFAULT;

	7107 }

	7108

	7109 U_CAPI void U_EXPORT2

	7110 ucol_setStrength( UCollator *coll,

	7111 UCollationStrength strength)

	7112 {

	7113 UErrorCode status = U_ZERO_ERROR;

	7114 ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);

	7115 }

	7116

	7117 U_CAPI UCollationStrength U_EXPORT2

	7118 ucol_getStrength(const UCollator *coll)

	7119 {

	7120 UErrorCode status = U_ZERO_ERROR;

	7121 return ucol_getAttribute(coll, UCOL_STRENGTH, &status);

	7122 }

	7123

	7124 U_INTERNAL int32_t U_EXPORT2

	7125 ucol_getReorderCodes(const UCollator *coll,

	7126 int32_t *dest,

	7127 int32_t destCapacity,

	7128 UErrorCode *pErrorCode) {

	7129 if (U_FAILURE(*pErrorCode)) {

	7130 return 0;

	7131 }

	7132

	7133 if (destCapacity < 0 \|\| (destCapacity > 0 && dest == NULL)) {

	7134 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;

	7135 return 0;

	7136 }

	7137

	7138 if (coll->reorderCodesLength > destCapacity) {

	7139 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;

	7140 return coll->reorderCodesLength;

	7141 }

	7142 for (int32_t i = 0; i < coll->reorderCodesLength; i++) {

	7143 dest[i] = coll->reorderCodes[i];

	7144 }

	7145 return coll->reorderCodesLength;

	7146 }

	7147

	7148 U_INTERNAL void U_EXPORT2

	7149 ucol_setReorderCodes(UCollator *coll,

	7150 const int32_t *reorderCodes,

	7151 int32_t reorderCodesLength,

	7152 UErrorCode *pErrorCode) {

	7153 if (U_FAILURE(*pErrorCode)) {

	7154 return;

	7155 }

	7156

	7157 if (reorderCodesLength < 0 \|\| (reorderCodesLength > 0 && reorderCodes == NUL L)) {

	7158 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;

	7159 return;

	7160 }

	7161

	7162 uprv_free(coll->reorderCodes);

	7163 coll->reorderCodes = NULL;

	7164 coll->reorderCodesLength = 0;

	7165 if (reorderCodesLength == 0) {

	7166 uprv_free(coll->leadBytePermutationTable);

	7167 coll->leadBytePermutationTable = NULL;

	7168 return;

	7169 }

	7170 coll->reorderCodes = (int32_t) uprv_malloc(reorderCodesLength sizeof(int3 2_t));

	7171 if (coll->reorderCodes == NULL) {

	7172 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;

	7173 return;

	7174 }

	7175 for (int32_t i = 0; i < reorderCodesLength; i++) {

	7176 coll->reorderCodes[i] = reorderCodes[i];

	7177 }

	7178 coll->reorderCodesLength = reorderCodesLength;

	7179 ucol_buildPermutationTable(coll, pErrorCode);

	7180 if (U_FAILURE(*pErrorCode)) {

	7181 uprv_free(coll->reorderCodes);

	7182 coll->reorderCodes = NULL;

	7183 coll->reorderCodesLength = 0;

	7184 }

	7185 }

	7186

	7187

	7188 /****************************************************************************/

	7189 /* Following are misc functions */

	7190 /* there are new APIs and some compatibility APIs */

	7191 /****************************************************************************/

	7192

	7193 U_CAPI void U_EXPORT2

	7194 ucol_getVersion(const UCollator* coll,

	7195 UVersionInfo versionInfo)

	7196 {

	7197 /* RunTime version */

	7198 uint8_t rtVersion = UCOL_RUNTIME_VERSION;

	7199 /* Builder version*/

	7200 uint8_t bdVersion = coll->image->version[0];

	7201

	7202 /* Charset Version. Need to get the version from cnv files

	7203 * makeconv should populate cnv files with version and

	7204 * an api has to be provided in ucnv.h to obtain this version

	7205 */

	7206 uint8_t csVersion = 0;

	7207

	7208 /* combine the version info */

	7209 uint16_t cmbVersion = (uint16_t)((rtVersion<<11) \| (bdVersion<<6) \| (csVersi on));

	7210

	7211 /* Tailoring rules */

	7212 versionInfo[0] = (uint8_t)(cmbVersion>>8);

	7213 versionInfo[1] = (uint8_t)cmbVersion;

	7214 versionInfo[2] = coll->image->version[1];

	7215 if(coll->UCA) {

	7216 /* Include the minor number when getting the UCA version. (major & 1f) < < 3 \| (minor & 7) */

	7217 versionInfo[3] = (coll->UCA->image->UCAVersion[0] & 0x1f) << 3 \| (coll-> UCA->image->UCAVersion[1] & 0x07);

	7218 } else {

	7219 versionInfo[3] = 0;

	7220 }

	7221 }

	7222

	7223

	7224 /* This internal API checks whether a character is tailored or not */

	7225 U_CAPI UBool U_EXPORT2

	7226 ucol_isTailored(const UCollator coll, const UChar u, UErrorCode status) {

	7227 if(U_FAILURE(*status) \|\| coll == NULL \|\| coll == coll->UCA) {

	7228 return FALSE;

	7229 }

	7230

	7231 uint32_t CE = UCOL_NOT_FOUND;

	7232 const UChar *ContractionStart = NULL;

	7233 if(u < 0x100) { /* latin-1 */

	7234 CE = coll->latinOneMapping[u];

	7235 if(coll->UCA && CE == coll->UCA->latinOneMapping[u]) {

	7236 return FALSE;

	7237 }

	7238 } else { /* regular */

	7239 CE = UTRIE_GET32_FROM_LEAD(&coll->mapping, u);

	7240 }

	7241

	7242 if(isContraction(CE)) {

	7243 ContractionStart = (UChar *)coll->image+getContractOffset(CE);

	7244 CE = *(coll->contractionCEs + (ContractionStart- coll->contractionIndex) );

	7245 }

	7246

	7247 return (UBool)(CE != UCOL_NOT_FOUND);

	7248 }

	7249

	7250

	7251 /****************************************************************************/

	7252 /* Following are the string compare functions */

	7253 /* */

	7254 /****************************************************************************/

	7255

	7256

	7257 /* ucol_checkIdent internal function. Does byte level string compare. */

	7258 /* Used by strcoll if strength == identical and strings */

	7259 /* are otherwise equal. */

	7260 /* */

	7261 /* Comparison must be done on NFD normalized strings. */

	7262 /* FCD is not good enough. */

	7263

	7264 static

	7265 UCollationResult ucol_checkIdent(collIterate sColl, collIterate tColl, UBoo l normalize, UErrorCode *status)

	7266 {

	7267 // When we arrive here, we can have normal strings or UCharIterators. Curren tly they are both

	7268 // of same type, but that doesn't really mean that it will stay that way.

	7269 int32_t comparison;

	7270

	7271 if (sColl->flags & UCOL_USE_ITERATOR) {

	7272 // The division for the array length may truncate the array size to

	7273 // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high

	7274 // for all platforms anyway.

	7275 UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];

	7276 UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];

	7277 UNormIterator sNIt = NULL, tNIt = NULL;

	7278 sNIt = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), status);

	7279 tNIt = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), status);

	7280 sColl->iterator->move(sColl->iterator, 0, UITER_START);

	7281 tColl->iterator->move(tColl->iterator, 0, UITER_START);

	7282 UCharIterator *sIt = unorm_setIter(sNIt, sColl->iterator, UNORM_NFD, sta tus);

	7283 UCharIterator *tIt = unorm_setIter(tNIt, tColl->iterator, UNORM_NFD, sta tus);

	7284 comparison = u_strCompareIter(sIt, tIt, TRUE);

	7285 unorm_closeIter(sNIt);

	7286 unorm_closeIter(tNIt);

	7287 } else {

	7288 int32_t sLen = (sColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(sColl- >endp - sColl->string) : -1;

	7289 const UChar *sBuf = sColl->string;

	7290 int32_t tLen = (tColl->flags & UCOL_ITER_HASLEN) ? (int32_t)(tColl- >endp - tColl->string) : -1;

	7291 const UChar *tBuf = tColl->string;

	7292

	7293 if (normalize) {

	7294 *status = U_ZERO_ERROR;

	7295 // Note: We could use Normalizer::compare() or similar, but for shor t strings

	7296 // which may not be in FCD it might be faster to just NFD them.

	7297 // Note: spanQuickCheckYes() + normalizeSecondAndAppend() rather tha n

	7298 // NFD'ing immediately might be faster for long strings,

	7299 // but string comparison is usually done on relatively short strings .

	7300 sColl->nfd->normalize(UnicodeString((sColl->flags & UCOL_ITER_HASLEN ) == 0, sBuf, sLen),

	7301 sColl->writableBuffer,

	7302 *status);

	7303 tColl->nfd->normalize(UnicodeString((tColl->flags & UCOL_ITER_HASLEN ) == 0, tBuf, tLen),

	7304 tColl->writableBuffer,

	7305 *status);

	7306 if(U_FAILURE(*status)) {

	7307 return UCOL_LESS;

	7308 }

	7309 comparison = sColl->writableBuffer.compareCodePointOrder(tColl->writ ableBuffer);

	7310 } else {

	7311 comparison = u_strCompare(sBuf, sLen, tBuf, tLen, TRUE);

	7312 }

	7313 }

	7314

	7315 if (comparison < 0) {

	7316 return UCOL_LESS;

	7317 } else if (comparison == 0) {

	7318 return UCOL_EQUAL;

	7319 } else /* comparison > 0 */ {

	7320 return UCOL_GREATER;

	7321 }

	7322 }

	7323

	7324 /* CEBuf - A struct and some inline functions to handle the saving */

	7325 /* of CEs in a buffer within ucol_strcoll */

	7326

	7327 #define UCOL_CEBUF_SIZE 512

	7328 typedef struct ucol_CEBuf {

	7329 uint32_t *buf;

	7330 uint32_t *endp;

	7331 uint32_t *pos;

	7332 uint32_t localArray[UCOL_CEBUF_SIZE];

	7333 } ucol_CEBuf;

	7334

	7335

	7336 static

	7337 inline void UCOL_INIT_CEBUF(ucol_CEBuf *b) {

	7338 (b)->buf = (b)->pos = (b)->localArray;

	7339 (b)->endp = (b)->buf + UCOL_CEBUF_SIZE;

	7340 }

	7341

	7342 static

	7343 void ucol_CEBuf_Expand(ucol_CEBuf b, collIterate ci, UErrorCode *status) {

	7344 uint32_t oldSize;

	7345 uint32_t newSize;

	7346 uint32_t *newBuf;

	7347

	7348 ci->flags \|= UCOL_ITER_ALLOCATED;

	7349 oldSize = (uint32_t)(b->pos - b->buf);

	7350 newSize = oldSize * 2;

	7351 newBuf = (uint32_t )uprv_malloc(newSize sizeof(uint32_t));

	7352 if(newBuf == NULL) {

	7353 *status = U_MEMORY_ALLOCATION_ERROR;

	7354 }

	7355 else {

	7356 uprv_memcpy(newBuf, b->buf, oldSize * sizeof(uint32_t));

	7357 if (b->buf != b->localArray) {

	7358 uprv_free(b->buf);

	7359 }

	7360 b->buf = newBuf;

	7361 b->endp = b->buf + newSize;

	7362 b->pos = b->buf + oldSize;

	7363 }

	7364 }

	7365

	7366 static

	7367 inline void UCOL_CEBUF_PUT(ucol_CEBuf b, uint32_t ce, collIterate ci, UErrorCo de *status) {

	7368 if (b->pos == b->endp) {

	7369 ucol_CEBuf_Expand(b, ci, status);

	7370 }

	7371 if (U_SUCCESS(*status)) {

	7372 *(b)->pos++ = ce;

	7373 }

	7374 }

	7375

	7376 /* This is a trick string compare function that goes in and uses sortkeys to com pare */

	7377 /* It is used when compare gets in trouble and needs to bail out */

	7378 static UCollationResult ucol_compareUsingSortKeys(collIterate *sColl,

	7379 collIterate *tColl,

	7380 UErrorCode *status)

	7381 {

	7382 uint8_t sourceKey[UCOL_MAX_BUFFER], targetKey[UCOL_MAX_BUFFER];

	7383 uint8_t *sourceKeyP = sourceKey;

	7384 uint8_t *targetKeyP = targetKey;

	7385 int32_t sourceKeyLen = UCOL_MAX_BUFFER, targetKeyLen = UCOL_MAX_BUFFER;

	7386 const UCollator *coll = sColl->coll;

	7387 const UChar *source = NULL;

	7388 const UChar *target = NULL;

	7389 int32_t result = UCOL_EQUAL;

	7390 UnicodeString sourceString, targetString;

	7391 int32_t sourceLength;

	7392 int32_t targetLength;

	7393

	7394 if(sColl->flags & UCOL_USE_ITERATOR) {

	7395 sColl->iterator->move(sColl->iterator, 0, UITER_START);

	7396 tColl->iterator->move(tColl->iterator, 0, UITER_START);

	7397 UChar32 c;

	7398 while((c=sColl->iterator->next(sColl->iterator))>=0) {

	7399 sourceString.append((UChar)c);

	7400 }

	7401 while((c=tColl->iterator->next(tColl->iterator))>=0) {

	7402 targetString.append((UChar)c);

	7403 }

	7404 source = sourceString.getBuffer();

	7405 sourceLength = sourceString.length();

	7406 target = targetString.getBuffer();

	7407 targetLength = targetString.length();

	7408 } else { // no iterators

	7409 sourceLength = (sColl->flags&UCOL_ITER_HASLEN)?(int32_t)(sColl->endp-sCo ll->string):-1;

	7410 targetLength = (tColl->flags&UCOL_ITER_HASLEN)?(int32_t)(tColl->endp-tCo ll->string):-1;

	7411 source = sColl->string;

	7412 target = tColl->string;

	7413 }

	7414

	7415

	7416

	7417 sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, sourc eKeyLen);

	7418 if(sourceKeyLen > UCOL_MAX_BUFFER) {

	7419 sourceKeyP = (uint8_t)uprv_malloc(sourceKeyLensizeof(uint8_t));

	7420 if(sourceKeyP == NULL) {

	7421 *status = U_MEMORY_ALLOCATION_ERROR;

	7422 goto cleanup_and_do_compare;

	7423 }

	7424 sourceKeyLen = ucol_getSortKey(coll, source, sourceLength, sourceKeyP, s ourceKeyLen);

	7425 }

	7426

	7427 targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, targe tKeyLen);

	7428 if(targetKeyLen > UCOL_MAX_BUFFER) {

	7429 targetKeyP = (uint8_t)uprv_malloc(targetKeyLensizeof(uint8_t));

	7430 if(targetKeyP == NULL) {

	7431 *status = U_MEMORY_ALLOCATION_ERROR;

	7432 goto cleanup_and_do_compare;

	7433 }

	7434 targetKeyLen = ucol_getSortKey(coll, target, targetLength, targetKeyP, t argetKeyLen);

	7435 }

	7436

	7437 result = uprv_strcmp((const char)sourceKeyP, (const char)targetKeyP);

	7438

	7439 cleanup_and_do_compare:

	7440 if(sourceKeyP != NULL && sourceKeyP != sourceKey) {

	7441 uprv_free(sourceKeyP);

	7442 }

	7443

	7444 if(targetKeyP != NULL && targetKeyP != targetKey) {

	7445 uprv_free(targetKeyP);

	7446 }

	7447

	7448 if(result<0) {

	7449 return UCOL_LESS;

	7450 } else if(result>0) {

	7451 return UCOL_GREATER;

	7452 } else {

	7453 return UCOL_EQUAL;

	7454 }

	7455 }

	7456

	7457

	7458 static UCollationResult

	7459 ucol_strcollRegular(collIterate sColl, collIterate tColl, UErrorCode *status)

	7460 {

	7461 U_ALIGN_CODE(16);

	7462

	7463 const UCollator *coll = sColl->coll;

	7464

	7465

	7466 // setting up the collator parameters

	7467 UColAttributeValue strength = coll->strength;

	7468 UBool initialCheckSecTer = (strength >= UCOL_SECONDARY);

	7469

	7470 UBool checkSecTer = initialCheckSecTer;

	7471 UBool checkTertiary = (strength >= UCOL_TERTIARY);

	7472 UBool checkQuad = (strength >= UCOL_QUATERNARY);

	7473 UBool checkIdent = (strength == UCOL_IDENTICAL);

	7474 UBool checkCase = (coll->caseLevel == UCOL_ON);

	7475 UBool isFrenchSec = (coll->frenchCollation == UCOL_ON) && checkSecTer;

	7476 UBool shifted = (coll->alternateHandling == UCOL_SHIFTED);

	7477 UBool qShifted = shifted && checkQuad;

	7478 UBool doHiragana = (coll->hiraganaQ == UCOL_ON) && checkQuad;

	7479

	7480 if(doHiragana && shifted) {

	7481 return (ucol_compareUsingSortKeys(sColl, tColl, status));

	7482 }

	7483 uint8_t caseSwitch = coll->caseSwitch;

	7484 uint8_t tertiaryMask = coll->tertiaryMask;

	7485

	7486 // This is the lowest primary value that will not be ignored if shifted

	7487 uint32_t LVT = (shifted)?(coll->variableTopValue<<16):0;

	7488

	7489 UCollationResult result = UCOL_EQUAL;

	7490 UCollationResult hirResult = UCOL_EQUAL;

	7491

	7492 // Preparing the CE buffers. They will be filled during the primary phase

	7493 ucol_CEBuf sCEs;

	7494 ucol_CEBuf tCEs;

	7495 UCOL_INIT_CEBUF(&sCEs);

	7496 UCOL_INIT_CEBUF(&tCEs);

	7497

	7498 uint32_t secS = 0, secT = 0;

	7499 uint32_t sOrder=0, tOrder=0;

	7500

	7501 // Non shifted primary processing is quite simple

	7502 if(!shifted) {

	7503 for(;;) {

	7504

	7505 // We fetch CEs until we hit a non ignorable primary or end.

	7506 do {

	7507 // We get the next CE

	7508 sOrder = ucol_IGetNextCE(coll, sColl, status);

	7509 // Stuff it in the buffer

	7510 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7511 // And keep just the primary part.

	7512 sOrder &= UCOL_PRIMARYMASK;

	7513 } while(sOrder == 0);

	7514

	7515 // see the comments on the above block

	7516 do {

	7517 tOrder = ucol_IGetNextCE(coll, tColl, status);

	7518 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7519 tOrder &= UCOL_PRIMARYMASK;

	7520 } while(tOrder == 0);

	7521

	7522 // if both primaries are the same

	7523 if(sOrder == tOrder) {

	7524 // and there are no more CEs, we advance to the next level

	7525 if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {

	7526 break;

	7527 }

	7528 if(doHiragana && hirResult == UCOL_EQUAL) {

	7529 if((sColl->flags & UCOL_WAS_HIRAGANA) != (tColl->flags & UCO L_WAS_HIRAGANA)) {

	7530 hirResult = ((sColl->flags & UCOL_WAS_HIRAGANA) > (tColl ->flags & UCOL_WAS_HIRAGANA))

	7531 ? UCOL_LESS:UCOL_GREATER;

	7532 }

	7533 }

	7534 } else {

	7535 // only need to check one for continuation

	7536 // if one is then the other must be or the preceding CE would be a prefix of the other

	7537 if (coll->leadBytePermutationTable != NULL && !isContinuation(sO rder)) {

	7538 sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) \| (sOrder & 0x00FFFFFF);

	7539 tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) \| (tOrder & 0x00FFFFFF);

	7540 }

	7541 // if two primaries are different, we are done

	7542 result = (sOrder < tOrder) ? UCOL_LESS: UCOL_GREATER;

	7543 goto commonReturn;

	7544 }

	7545 } // no primary difference... do the rest from the buffers

	7546 } else { // shifted - do a slightly more complicated processing :)

	7547 for(;;) {

	7548 UBool sInShifted = FALSE;

	7549 UBool tInShifted = FALSE;

	7550 // This version of code can be refactored. However, it seems easier to understand this way.

	7551 // Source loop. Sam as the target loop.

	7552 for(;;) {

	7553 sOrder = ucol_IGetNextCE(coll, sColl, status);

	7554 if(sOrder == UCOL_NO_MORE_CES) {

	7555 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7556 break;

	7557 } else if(sOrder == 0 \|\| (sInShifted && (sOrder & UCOL_PRIMARYMA SK) == 0)) {

	7558 /* UCA amendment - ignore ignorables that follow shifted cod e points */

	7559 continue;

	7560 } else if(isContinuation(sOrder)) {

	7561 if((sOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary va lue */

	7562 if(sInShifted) {

	7563 sOrder = (sOrder & UCOL_PRIMARYMASK) \| 0xC0; /* pres erve interesting continuation */

	7564 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7565 continue;

	7566 } else {

	7567 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7568 break;

	7569 }

	7570 } else { /* Just lower level values */

	7571 if(sInShifted) {

	7572 continue;

	7573 } else {

	7574 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7575 continue;

	7576 }

	7577 }

	7578 } else { /* regular */

	7579 if(coll->leadBytePermutationTable != NULL){

	7580 sOrder = (coll->leadBytePermutationTable[sOrder>>24] << 24) \| (sOrder & 0x00FFFFFF);

	7581 }

	7582 if((sOrder & UCOL_PRIMARYMASK) > LVT) {

	7583 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7584 break;

	7585 } else {

	7586 if((sOrder & UCOL_PRIMARYMASK) > 0) {

	7587 sInShifted = TRUE;

	7588 sOrder &= UCOL_PRIMARYMASK;

	7589 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7590 continue;

	7591 } else {

	7592 UCOL_CEBUF_PUT(&sCEs, sOrder, sColl, status);

	7593 sInShifted = FALSE;

	7594 continue;

	7595 }

	7596 }

	7597 }

	7598 }

	7599 sOrder &= UCOL_PRIMARYMASK;

	7600 sInShifted = FALSE;

	7601

	7602 for(;;) {

	7603 tOrder = ucol_IGetNextCE(coll, tColl, status);

	7604 if(tOrder == UCOL_NO_MORE_CES) {

	7605 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7606 break;

	7607 } else if(tOrder == 0 \|\| (tInShifted && (tOrder & UCOL_PRIMARYMA SK) == 0)) {

	7608 /* UCA amendment - ignore ignorables that follow shifted cod e points */

	7609 continue;

	7610 } else if(isContinuation(tOrder)) {

	7611 if((tOrder & UCOL_PRIMARYMASK) > 0) { /* There is primary va lue */

	7612 if(tInShifted) {

	7613 tOrder = (tOrder & UCOL_PRIMARYMASK) \| 0xC0; /* pres erve interesting continuation */

	7614 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7615 continue;

	7616 } else {

	7617 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7618 break;

	7619 }

	7620 } else { /* Just lower level values */

	7621 if(tInShifted) {

	7622 continue;

	7623 } else {

	7624 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7625 continue;

	7626 }

	7627 }

	7628 } else { /* regular */

	7629 if(coll->leadBytePermutationTable != NULL){

	7630 tOrder = (coll->leadBytePermutationTable[tOrder>>24] << 24) \| (tOrder & 0x00FFFFFF);

	7631 }

	7632 if((tOrder & UCOL_PRIMARYMASK) > LVT) {

	7633 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7634 break;

	7635 } else {

	7636 if((tOrder & UCOL_PRIMARYMASK) > 0) {

	7637 tInShifted = TRUE;

	7638 tOrder &= UCOL_PRIMARYMASK;

	7639 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7640 continue;

	7641 } else {

	7642 UCOL_CEBUF_PUT(&tCEs, tOrder, tColl, status);

	7643 tInShifted = FALSE;

	7644 continue;

	7645 }

	7646 }

	7647 }

	7648 }

	7649 tOrder &= UCOL_PRIMARYMASK;

	7650 tInShifted = FALSE;

	7651

	7652 if(sOrder == tOrder) {

	7653 /*

	7654 if(doHiragana && hirResult == UCOL_EQUAL) {

	7655 if((sColl.flags & UCOL_WAS_HIRAGANA) != (tColl.flags & UCOL_WAS_ HIRAGANA)) {

	7656 hirResult = ((sColl.flags & UCOL_WAS_HIRAGANA) > (tColl.flags & UCOL_WAS_HIRAGANA))

	7657 ? UCOL_LESS:UCOL_GREATER;

	7658 }

	7659 }

	7660 */

	7661 if(sOrder == UCOL_NO_MORE_CES_PRIMARY) {

	7662 break;

	7663 } else {

	7664 sOrder = 0;

	7665 tOrder = 0;

	7666 continue;

	7667 }

	7668 } else {

	7669 result = (sOrder < tOrder) ? UCOL_LESS : UCOL_GREATER;

	7670 goto commonReturn;

	7671 }

	7672 } /* no primary difference... do the rest from the buffers */

	7673 }

	7674

	7675 /* now, we're gonna reexamine collected CEs */

	7676 uint32_t *sCE;

	7677 uint32_t *tCE;

	7678

	7679 /* This is the secondary level of comparison */

	7680 if(checkSecTer) {

	7681 if(!isFrenchSec) { /* normal */

	7682 sCE = sCEs.buf;

	7683 tCE = tCEs.buf;

	7684 for(;;) {

	7685 while (secS == 0) {

	7686 secS = *(sCE++) & UCOL_SECONDARYMASK;

	7687 }

	7688

	7689 while(secT == 0) {

	7690 secT = *(tCE++) & UCOL_SECONDARYMASK;

	7691 }

	7692

	7693 if(secS == secT) {

	7694 if(secS == UCOL_NO_MORE_CES_SECONDARY) {

	7695 break;

	7696 } else {

	7697 secS = 0; secT = 0;

	7698 continue;

	7699 }

	7700 } else {

	7701 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;

	7702 goto commonReturn;

	7703 }

	7704 }

	7705 } else { /* do the French */

	7706 uint32_t *sCESave = NULL;

	7707 uint32_t *tCESave = NULL;

	7708 sCE = sCEs.pos-2; /* this could also be sCEs-- if needs to be optimi zed */

	7709 tCE = tCEs.pos-2;

	7710 for(;;) {

	7711 while (secS == 0 && sCE >= sCEs.buf) {

	7712 if(sCESave == NULL) {

	7713 secS = *(sCE--);

	7714 if(isContinuation(secS)) {

	7715 while(isContinuation(secS = *(sCE--)))

	7716 ;

	7717 /* after this, secS has the start of continuation, a nd sCEs points before that */

	7718 sCESave = sCE; /* we save it, so that we know where to come back AND that we need to go forward */

	7719 sCE+=2; /* need to point to the first continuation CP */

	7720 /* However, now you can just continue doing stuff */

	7721 }

	7722 } else {

	7723 secS = *(sCE++);

	7724 if(!isContinuation(secS)) { /* This means we have finish ed with this cont */

	7725 sCE = sCESave; /* reset the pointer to be fore continuation */

	7726 sCESave = NULL;

	7727 secS = 0; /* Fetch a fresh CE before the continuati on sequence. */

	7728 continue;

	7729 }

	7730 }

	7731 secS &= UCOL_SECONDARYMASK; /* remove the continuation bit * /

	7732 }

	7733

	7734 while(secT == 0 && tCE >= tCEs.buf) {

	7735 if(tCESave == NULL) {

	7736 secT = *(tCE--);

	7737 if(isContinuation(secT)) {

	7738 while(isContinuation(secT = *(tCE--)))

	7739 ;

	7740 /* after this, secS has the start of continuation, a nd sCEs points before that */

	7741 tCESave = tCE; /* we save it, so that we know where to come back AND that we need to go forward */

	7742 tCE+=2; /* need to point to the first continuation CP */

	7743 /* However, now you can just continue doing stuff */

	7744 }

	7745 } else {

	7746 secT = *(tCE++);

	7747 if(!isContinuation(secT)) { /* This means we have finish ed with this cont */

	7748 tCE = tCESave; /* reset the pointer to befo re continuation */

	7749 tCESave = NULL;

	7750 secT = 0; /* Fetch a fresh CE before the continuati on sequence. */

	7751 continue;

	7752 }

	7753 }

	7754 secT &= UCOL_SECONDARYMASK; /* remove the continuation bit * /

	7755 }

	7756

	7757 if(secS == secT) {

	7758 if(secS == UCOL_NO_MORE_CES_SECONDARY \|\| (sCE < sCEs.buf && tCE < tCEs.buf)) {

	7759 break;

	7760 } else {

	7761 secS = 0; secT = 0;

	7762 continue;

	7763 }

	7764 } else {

	7765 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;

	7766 goto commonReturn;

	7767 }

	7768 }

	7769 }

	7770 }

	7771

	7772 /* doing the case bit */

	7773 if(checkCase) {

	7774 sCE = sCEs.buf;

	7775 tCE = tCEs.buf;

	7776 for(;;) {

	7777 while((secS & UCOL_REMOVE_CASE) == 0) {

	7778 if(!isContinuation(*sCE++)) {

	7779 secS =*(sCE-1);

	7780 if(((secS & UCOL_PRIMARYMASK) != 0) \|\| strength > UCOL_PRIMA RY) {

	7781 // primary ignorables should not be considered on the ca se level when the strength is primary

	7782 // otherwise, the CEs stop being well-formed

	7783 secS &= UCOL_TERT_CASE_MASK;

	7784 secS ^= caseSwitch;

	7785 } else {

	7786 secS = 0;

	7787 }

	7788 } else {

	7789 secS = 0;

	7790 }

	7791 }

	7792

	7793 while((secT & UCOL_REMOVE_CASE) == 0) {

	7794 if(!isContinuation(*tCE++)) {

	7795 secT = *(tCE-1);

	7796 if(((secT & UCOL_PRIMARYMASK) != 0) \|\| strength > UCOL_PRIMA RY) {

	7797 // primary ignorables should not be considered on the ca se level when the strength is primary

	7798 // otherwise, the CEs stop being well-formed

	7799 secT &= UCOL_TERT_CASE_MASK;

	7800 secT ^= caseSwitch;

	7801 } else {

	7802 secT = 0;

	7803 }

	7804 } else {

	7805 secT = 0;

	7806 }

	7807 }

	7808

	7809 if((secS & UCOL_CASE_BIT_MASK) < (secT & UCOL_CASE_BIT_MASK)) {

	7810 result = UCOL_LESS;

	7811 goto commonReturn;

	7812 } else if((secS & UCOL_CASE_BIT_MASK) > (secT & UCOL_CASE_BIT_MASK)) {

	7813 result = UCOL_GREATER;

	7814 goto commonReturn;

	7815 }

	7816

	7817 if((secS & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY \|\| (secT & UCOL_REMOVE_CASE) == UCOL_NO_MORE_CES_TERTIARY ) {

	7818 break;

	7819 } else {

	7820 secS = 0;

	7821 secT = 0;

	7822 }

	7823 }

	7824 }

	7825

	7826 /* Tertiary level */

	7827 if(checkTertiary) {

	7828 secS = 0;

	7829 secT = 0;

	7830 sCE = sCEs.buf;

	7831 tCE = tCEs.buf;

	7832 for(;;) {

	7833 while((secS & UCOL_REMOVE_CASE) == 0) {

	7834 secS = *(sCE++) & tertiaryMask;

	7835 if(!isContinuation(secS)) {

	7836 secS ^= caseSwitch;

	7837 } else {

	7838 secS &= UCOL_REMOVE_CASE;

	7839 }

	7840 }

	7841

	7842 while((secT & UCOL_REMOVE_CASE) == 0) {

	7843 secT = *(tCE++) & tertiaryMask;

	7844 if(!isContinuation(secT)) {

	7845 secT ^= caseSwitch;

	7846 } else {

	7847 secT &= UCOL_REMOVE_CASE;

	7848 }

	7849 }

	7850

	7851 if(secS == secT) {

	7852 if((secS & UCOL_REMOVE_CASE) == 1) {

	7853 break;

	7854 } else {

	7855 secS = 0; secT = 0;

	7856 continue;

	7857 }

	7858 } else {

	7859 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;

	7860 goto commonReturn;

	7861 }

	7862 }

	7863 }

	7864

	7865

	7866 if(qShifted /checkQuad/) {

	7867 UBool sInShifted = TRUE;

	7868 UBool tInShifted = TRUE;

	7869 secS = 0;

	7870 secT = 0;

	7871 sCE = sCEs.buf;

	7872 tCE = tCEs.buf;

	7873 for(;;) {

	7874 while((secS == 0 && secS != UCOL_NO_MORE_CES) \|\| (isContinuation(sec S) && !sInShifted)) {

	7875 secS = *(sCE++);

	7876 if(isContinuation(secS)) {

	7877 if(!sInShifted) {

	7878 continue;

	7879 }

	7880 } else if(secS > LVT \|\| (secS & UCOL_PRIMARYMASK) == 0) { /* non continuation */

	7881 secS = UCOL_PRIMARYMASK;

	7882 sInShifted = FALSE;

	7883 } else {

	7884 sInShifted = TRUE;

	7885 }

	7886 }

	7887 secS &= UCOL_PRIMARYMASK;

	7888

	7889

	7890 while((secT == 0 && secT != UCOL_NO_MORE_CES) \|\| (isContinuation(sec T) && !tInShifted)) {

	7891 secT = *(tCE++);

	7892 if(isContinuation(secT)) {

	7893 if(!tInShifted) {

	7894 continue;

	7895 }

	7896 } else if(secT > LVT \|\| (secT & UCOL_PRIMARYMASK) == 0) {

	7897 secT = UCOL_PRIMARYMASK;

	7898 tInShifted = FALSE;

	7899 } else {

	7900 tInShifted = TRUE;

	7901 }

	7902 }

	7903 secT &= UCOL_PRIMARYMASK;

	7904

	7905 if(secS == secT) {

	7906 if(secS == UCOL_NO_MORE_CES_PRIMARY) {

	7907 break;

	7908 } else {

	7909 secS = 0; secT = 0;

	7910 continue;

	7911 }

	7912 } else {

	7913 result = (secS < secT) ? UCOL_LESS : UCOL_GREATER;

	7914 goto commonReturn;

	7915 }

	7916 }

	7917 } else if(doHiragana && hirResult != UCOL_EQUAL) {

	7918 // If we're fine on quaternaries, we might be different

	7919 // on Hiragana. This, however, might fail us in shifted.

	7920 result = hirResult;

	7921 goto commonReturn;

	7922 }

	7923

	7924 /* For IDENTICAL comparisons, we use a bitwise character comparison */

	7925 /* as a tiebreaker if all else is equal. */

	7926 /* Getting here should be quite rare - strings are not identical - */

	7927 /* that is checked first, but compared == through all other checks. */

	7928 if(checkIdent)

	7929 {

	7930 //result = ucol_checkIdent(&sColl, &tColl, coll->normalizationMode == UC OL_ON);

	7931 result = ucol_checkIdent(sColl, tColl, TRUE, status);

	7932 }

	7933

	7934 commonReturn:

	7935 if ((sColl->flags \| tColl->flags) & UCOL_ITER_ALLOCATED) {

	7936 if (sCEs.buf != sCEs.localArray ) {

	7937 uprv_free(sCEs.buf);

	7938 }

	7939 if (tCEs.buf != tCEs.localArray ) {

	7940 uprv_free(tCEs.buf);

	7941 }

	7942 }

	7943

	7944 return result;

	7945 }

	7946

	7947 static UCollationResult

	7948 ucol_strcollRegular(const UCollator *coll,

	7949 const UChar *source, int32_t sourceLength,

	7950 const UChar *target, int32_t targetLength,

	7951 UErrorCode *status) {

	7952 collIterate sColl, tColl;

	7953 // Preparing the context objects for iterating over strings

	7954 IInit_collIterate(coll, source, sourceLength, &sColl, status);

	7955 IInit_collIterate(coll, target, targetLength, &tColl, status);

	7956 if(U_FAILURE(*status)) {

	7957 return UCOL_LESS;

	7958 }

	7959 return ucol_strcollRegular(&sColl, &tColl, status);

	7960 }

	7961

	7962 static inline uint32_t

	7963 ucol_getLatinOneContraction(const UCollator *coll, int32_t strength,

	7964 uint32_t CE, const UChar s, int32_t index, int32_t l en)

	7965 {

	7966 const UChar UCharOffset = (UChar )coll->image+getContractOffset(CE&0xFFF);

	7967 int32_t latinOneOffset = (CE & 0x00FFF000) >> 12;

	7968 int32_t offset = 1;

	7969 UChar schar = 0, tchar = 0;

	7970

	7971 for(;;) {

	7972 if(len == -1) {

	7973 if(s[*index] == 0) { // end of string

	7974 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOn eOffset]);

	7975 } else {

	7976 schar = s[*index];

	7977 }

	7978 } else {

	7979 if(*index == len) {

	7980 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOn eOffset]);

	7981 } else {

	7982 schar = s[*index];

	7983 }

	7984 }

	7985

	7986 while(schar > (tchar = (UCharOffset+offset))) { / since the contractio n codepoints should be ordered, we skip all that are smaller */

	7987 offset++;

	7988 }

	7989

	7990 if (schar == tchar) {

	7991 (*index)++;

	7992 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff set+offset]);

	7993 }

	7994 else

	7995 {

	7996 if(schar & 0xFF00 /> UCOL_ENDOFLATIN1RANGE/) {

	7997 return UCOL_BAIL_OUT_CE;

	7998 }

	7999 // skip completely ignorables

	8000 uint32_t isZeroCE = UTRIE_GET32_FROM_LEAD(&coll->mapping, schar);

	8001 if(isZeroCE == 0) { // we have to ignore completely ignorables

	8002 (*index)++;

	8003 continue;

	8004 }

	8005

	8006 return(coll->latinOneCEs[strength*coll->latinOneTableLen+latinOneOff set]);

	8007 }

	8008 }

	8009 }

	8010

	8011

	8012 /**

	8013 * This is a fast strcoll, geared towards text in Latin-1.

	8014 * It supports contractions of size two, French secondaries

	8015 * and case switching. You can use it with strengths primary

	8016 * to tertiary. It does not support shifted and case level.

	8017 * It relies on the table build by setupLatin1Table. If it

	8018 * doesn't understand something, it will go to the regular

	8019 * strcoll.

	8020 */

	8021 static UCollationResult

	8022 ucol_strcollUseLatin1( const UCollator *coll,

	8023 const UChar *source,

	8024 int32_t sLen,

	8025 const UChar *target,

	8026 int32_t tLen,

	8027 UErrorCode *status)

	8028 {

	8029 U_ALIGN_CODE(16);

	8030 int32_t strength = coll->strength;

	8031

	8032 int32_t sIndex = 0, tIndex = 0;

	8033 UChar sChar = 0, tChar = 0;

	8034 uint32_t sOrder=0, tOrder=0;

	8035

	8036 UBool endOfSource = FALSE;

	8037

	8038 uint32_t *elements = coll->latinOneCEs;

	8039

	8040 UBool haveContractions = FALSE; // if we have contractions in our string

	8041 // we cannot do French secondary

	8042

	8043 // Do the primary level

	8044 for(;;) {

	8045 while(sOrder==0) { // this loop skips primary ignorables

	8046 // sOrder=getNextlatinOneCE(source);

	8047 if(sLen==-1) { // handling zero terminated strings

	8048 sChar=source[sIndex++];

	8049 if(sChar==0) {

	8050 endOfSource = TRUE;

	8051 break;

	8052 }

	8053 } else { // handling strings with known length

	8054 if(sIndex==sLen) {

	8055 endOfSource = TRUE;

	8056 break;

	8057 }

	8058 sChar=source[sIndex++];

	8059 }

	8060 if(sChar&0xFF00) { // if we encounter non-latin-1, we bail out (sCha r > 0xFF, but this is faster on win32)

	8061 //fprintf(stderr, "R");

	8062 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta tus);

	8063 }

	8064 sOrder = elements[sChar];

	8065 if(sOrder >= UCOL_NOT_FOUND) { // if we got a special

	8066 // specials can basically be either contractions or bail-out sig ns. If we get anything

	8067 // else, we'll bail out anywasy

	8068 if(getCETag(sOrder) == CONTRACTION_TAG) {

	8069 sOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, sOr der, source, &sIndex, sLen);

	8070 haveContractions = TRUE; // if there are contractions, we ca nnot do French secondary

	8071 // However, if there are contractions in the table, but we a lways use just one char,

	8072 // we might be able to do French. This should be checked out .

	8073 }

	8074 if(sOrder >= UCOL_NOT_FOUND /== UCOL_BAIL_OUT_CE/) {

	8075 //fprintf(stderr, "S");

	8076 return ucol_strcollRegular(coll, source, sLen, target, tLen, status);

	8077 }

	8078 }

	8079 }

	8080

	8081 while(tOrder==0) { // this loop skips primary ignorables

	8082 // tOrder=getNextlatinOneCE(target);

	8083 if(tLen==-1) { // handling zero terminated strings

	8084 tChar=target[tIndex++];

	8085 if(tChar==0) {

	8086 if(endOfSource) { // this is different than source loop,

	8087 // as we already know that source loop is done here,

	8088 // so we can either finish the primary loop if both

	8089 // strings are done or anounce the result if only

	8090 // target is done. Same below.

	8091 goto endOfPrimLoop;

	8092 } else {

	8093 return UCOL_GREATER;

	8094 }

	8095 }

	8096 } else { // handling strings with known length

	8097 if(tIndex==tLen) {

	8098 if(endOfSource) {

	8099 goto endOfPrimLoop;

	8100 } else {

	8101 return UCOL_GREATER;

	8102 }

	8103 }

	8104 tChar=target[tIndex++];

	8105 }

	8106 if(tChar&0xFF00) { // if we encounter non-latin-1, we bail out (sCha r > 0xFF, but this is faster on win32)

	8107 //fprintf(stderr, "R");

	8108 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta tus);

	8109 }

	8110 tOrder = elements[tChar];

	8111 if(tOrder >= UCOL_NOT_FOUND) {

	8112 // Handling specials, see the comments for source

	8113 if(getCETag(tOrder) == CONTRACTION_TAG) {

	8114 tOrder = ucol_getLatinOneContraction(coll, UCOL_PRIMARY, tOr der, target, &tIndex, tLen);

	8115 haveContractions = TRUE;

	8116 }

	8117 if(tOrder >= UCOL_NOT_FOUND /== UCOL_BAIL_OUT_CE/) {

	8118 //fprintf(stderr, "S");

	8119 return ucol_strcollRegular(coll, source, sLen, target, tLen, status);

	8120 }

	8121 }

	8122 }

	8123 if(endOfSource) { // source is finished, but target is not, say the resu lt.

	8124 return UCOL_LESS;

	8125 }

	8126

	8127 if(sOrder == tOrder) { // if we have same CEs, we continue the loop

	8128 sOrder = 0; tOrder = 0;

	8129 continue;

	8130 } else {

	8131 // compare current top bytes

	8132 if(((sOrder^tOrder)&0xFF000000)!=0) {

	8133 // top bytes differ, return difference

	8134 if(sOrder < tOrder) {

	8135 return UCOL_LESS;

	8136 } else if(sOrder > tOrder) {

	8137 return UCOL_GREATER;

	8138 }

	8139 // instead of return (int32_t)(sOrder>>24)-(int32_t)(tOrder>>24) ;

	8140 // since we must return enum value

	8141 }

	8142

	8143 // top bytes match, continue with following bytes

	8144 sOrder<<=8;

	8145 tOrder<<=8;

	8146 }

	8147 }

	8148

	8149 endOfPrimLoop:

	8150 // after primary loop, we definitely know the sizes of strings,

	8151 // so we set it and use simpler loop for secondaries and tertiaries

	8152 sLen = sIndex; tLen = tIndex;

	8153 if(strength >= UCOL_SECONDARY) {

	8154 // adjust the table beggining

	8155 elements += coll->latinOneTableLen;

	8156 endOfSource = FALSE;

	8157

	8158 if(coll->frenchCollation == UCOL_OFF) { // non French

	8159 // This loop is a simplified copy of primary loop

	8160 // at this point we know that whole strings are latin-1, so we don't

	8161 // check for that. We also know that we only have contractions as

	8162 // specials.

	8163 sIndex = 0; tIndex = 0;

	8164 for(;;) {

	8165 while(sOrder==0) {

	8166 if(sIndex==sLen) {

	8167 endOfSource = TRUE;

	8168 break;

	8169 }

	8170 sChar=source[sIndex++];

	8171 sOrder = elements[sChar];

	8172 if(sOrder > UCOL_NOT_FOUND) {

	8173 sOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDAR Y, sOrder, source, &sIndex, sLen);

	8174 }

	8175 }

	8176

	8177 while(tOrder==0) {

	8178 if(tIndex==tLen) {

	8179 if(endOfSource) {

	8180 goto endOfSecLoop;

	8181 } else {

	8182 return UCOL_GREATER;

	8183 }

	8184 }

	8185 tChar=target[tIndex++];

	8186 tOrder = elements[tChar];

	8187 if(tOrder > UCOL_NOT_FOUND) {

	8188 tOrder = ucol_getLatinOneContraction(coll, UCOL_SECONDAR Y, tOrder, target, &tIndex, tLen);

	8189 }

	8190 }

	8191 if(endOfSource) {

	8192 return UCOL_LESS;

	8193 }

	8194

	8195 if(sOrder == tOrder) {

	8196 sOrder = 0; tOrder = 0;

	8197 continue;

	8198 } else {

	8199 // see primary loop for comments on this

	8200 if(((sOrder^tOrder)&0xFF000000)!=0) {

	8201 if(sOrder < tOrder) {

	8202 return UCOL_LESS;

	8203 } else if(sOrder > tOrder) {

	8204 return UCOL_GREATER;

	8205 }

	8206 }

	8207 sOrder<<=8;

	8208 tOrder<<=8;

	8209 }

	8210 }

	8211 } else { // French

	8212 if(haveContractions) { // if we have contractions, we have to bail o ut

	8213 // since we don't really know how to handle them here

	8214 return ucol_strcollRegular(coll, source, sLen, target, tLen, sta tus);

	8215 }

	8216 // For French, we go backwards

	8217 sIndex = sLen; tIndex = tLen;

	8218 for(;;) {

	8219 while(sOrder==0) {

	8220 if(sIndex==0) {

	8221 endOfSource = TRUE;

	8222 break;

	8223 }

	8224 sChar=source[--sIndex];

	8225 sOrder = elements[sChar];

	8226 // don't even look for contractions

	8227 }

	8228

	8229 while(tOrder==0) {

	8230 if(tIndex==0) {

	8231 if(endOfSource) {

	8232 goto endOfSecLoop;

	8233 } else {

	8234 return UCOL_GREATER;

	8235 }

	8236 }

	8237 tChar=target[--tIndex];

	8238 tOrder = elements[tChar];

	8239 // don't even look for contractions

	8240 }

	8241 if(endOfSource) {

	8242 return UCOL_LESS;

	8243 }

	8244

	8245 if(sOrder == tOrder) {

	8246 sOrder = 0; tOrder = 0;

	8247 continue;

	8248 } else {

	8249 // see the primary loop for comments

	8250 if(((sOrder^tOrder)&0xFF000000)!=0) {

	8251 if(sOrder < tOrder) {

	8252 return UCOL_LESS;

	8253 } else if(sOrder > tOrder) {

	8254 return UCOL_GREATER;

	8255 }

	8256 }

	8257 sOrder<<=8;

	8258 tOrder<<=8;

	8259 }

	8260 }

	8261 }

	8262 }

	8263

	8264 endOfSecLoop:

	8265 if(strength >= UCOL_TERTIARY) {

	8266 // tertiary loop is the same as secondary (except no French)

	8267 elements += coll->latinOneTableLen;

	8268 sIndex = 0; tIndex = 0;

	8269 endOfSource = FALSE;

	8270 for(;;) {

	8271 while(sOrder==0) {

	8272 if(sIndex==sLen) {

	8273 endOfSource = TRUE;

	8274 break;

	8275 }

	8276 sChar=source[sIndex++];

	8277 sOrder = elements[sChar];

	8278 if(sOrder > UCOL_NOT_FOUND) {

	8279 sOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, sO rder, source, &sIndex, sLen);

	8280 }

	8281 }

	8282 while(tOrder==0) {

	8283 if(tIndex==tLen) {

	8284 if(endOfSource) {

	8285 return UCOL_EQUAL; // if both strings are at the end, th ey are equal

	8286 } else {

	8287 return UCOL_GREATER;

	8288 }

	8289 }

	8290 tChar=target[tIndex++];

	8291 tOrder = elements[tChar];

	8292 if(tOrder > UCOL_NOT_FOUND) {

	8293 tOrder = ucol_getLatinOneContraction(coll, UCOL_TERTIARY, tO rder, target, &tIndex, tLen);

	8294 }

	8295 }

	8296 if(endOfSource) {

	8297 return UCOL_LESS;

	8298 }

	8299 if(sOrder == tOrder) {

	8300 sOrder = 0; tOrder = 0;

	8301 continue;

	8302 } else {

	8303 if(((sOrder^tOrder)&0xff000000)!=0) {

	8304 if(sOrder < tOrder) {

	8305 return UCOL_LESS;

	8306 } else if(sOrder > tOrder) {

	8307 return UCOL_GREATER;

	8308 }

	8309 }

	8310 sOrder<<=8;

	8311 tOrder<<=8;

	8312 }

	8313 }

	8314 }

	8315 return UCOL_EQUAL;

	8316 }

	8317

	8318

	8319 U_CAPI UCollationResult U_EXPORT2

	8320 ucol_strcollIter( const UCollator *coll,

	8321 UCharIterator *sIter,

	8322 UCharIterator *tIter,

	8323 UErrorCode *status)

	8324 {

	8325 if(!status \|\| U_FAILURE(*status)) {

	8326 return UCOL_EQUAL;

	8327 }

	8328

	8329 UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);

	8330 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIt er);

	8331

	8332 if (sIter == tIter) {

	8333 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)

	8334 return UCOL_EQUAL;

	8335 }

	8336 if(sIter == NULL \|\| tIter == NULL \|\| coll == NULL) {

	8337 *status = U_ILLEGAL_ARGUMENT_ERROR;

	8338 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)

	8339 return UCOL_EQUAL;

	8340 }

	8341

	8342 UCollationResult result = UCOL_EQUAL;

	8343

	8344 // Preparing the context objects for iterating over strings

	8345 collIterate sColl, tColl;

	8346 IInit_collIterate(coll, NULL, -1, &sColl, status);

	8347 IInit_collIterate(coll, NULL, -1, &tColl, status);

	8348 if(U_FAILURE(*status)) {

	8349 UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status)

	8350 return UCOL_EQUAL;

	8351 }

	8352 // The division for the array length may truncate the array size to

	8353 // a little less than UNORM_ITER_SIZE, but that size is dimensioned too high

	8354 // for all platforms anyway.

	8355 UAlignedMemory stackNormIter1[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];

	8356 UAlignedMemory stackNormIter2[UNORM_ITER_SIZE/sizeof(UAlignedMemory)];

	8357 UNormIterator sNormIter = NULL, tNormIter = NULL;

	8358

	8359 sColl.iterator = sIter;

	8360 sColl.flags \|= UCOL_USE_ITERATOR;

	8361 tColl.flags \|= UCOL_USE_ITERATOR;

	8362 tColl.iterator = tIter;

	8363

	8364 if(ucol_getAttribute(coll, UCOL_NORMALIZATION_MODE, status) == UCOL_ON) {

	8365 sNormIter = unorm_openIter(stackNormIter1, sizeof(stackNormIter1), statu s);

	8366 sColl.iterator = unorm_setIter(sNormIter, sIter, UNORM_FCD, status);

	8367 sColl.flags &= ~UCOL_ITER_NORM;

	8368

	8369 tNormIter = unorm_openIter(stackNormIter2, sizeof(stackNormIter2), statu s);

	8370 tColl.iterator = unorm_setIter(tNormIter, tIter, UNORM_FCD, status);

	8371 tColl.flags &= ~UCOL_ITER_NORM;

	8372 }

	8373

	8374 UChar32 sChar = U_SENTINEL, tChar = U_SENTINEL;

	8375

	8376 while((sChar = sColl.iterator->next(sColl.iterator)) ==

	8377 (tChar = tColl.iterator->next(tColl.iterator))) {

	8378 if(sChar == U_SENTINEL) {

	8379 result = UCOL_EQUAL;

	8380 goto end_compare;

	8381 }

	8382 }

	8383

	8384 if(sChar == U_SENTINEL) {

	8385 tChar = tColl.iterator->previous(tColl.iterator);

	8386 }

	8387

	8388 if(tChar == U_SENTINEL) {

	8389 sChar = sColl.iterator->previous(sColl.iterator);

	8390 }

	8391

	8392 sChar = sColl.iterator->previous(sColl.iterator);

	8393 tChar = tColl.iterator->previous(tColl.iterator);

	8394

	8395 if (ucol_unsafeCP((UChar)sChar, coll) \|\| ucol_unsafeCP((UChar)tChar, coll))

	8396 {

	8397 // We are stopped in the middle of a contraction.

	8398 // Scan backwards through the == part of the string looking for the star t of the contraction.

	8399 // It doesn't matter which string we scan, since they are the same in this region.

	8400 do

	8401 {

	8402 sChar = sColl.iterator->previous(sColl.iterator);

	8403 tChar = tColl.iterator->previous(tColl.iterator);

	8404 }

	8405 while (sChar != U_SENTINEL && ucol_unsafeCP((UChar)sChar, coll));

	8406 }

	8407

	8408

	8409 if(U_SUCCESS(*status)) {

	8410 result = ucol_strcollRegular(&sColl, &tColl, status);

	8411 }

	8412

	8413 end_compare:

	8414 if(sNormIter \|\| tNormIter) {

	8415 unorm_closeIter(sNormIter);

	8416 unorm_closeIter(tNormIter);

	8417 }

	8418

	8419 UTRACE_EXIT_VALUE_STATUS(result, *status)

	8420 return result;

	8421 }

	8422

	8423

	8424 /* */

	8425 /* ucol_strcoll Main public API string comparison function */

	8426 /* */

	8427 U_CAPI UCollationResult U_EXPORT2

	8428 ucol_strcoll( const UCollator *coll,

	8429 const UChar *source,

	8430 int32_t sourceLength,

	8431 const UChar *target,

	8432 int32_t targetLength)

	8433 {

	8434 U_ALIGN_CODE(16);

	8435

	8436 UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);

	8437 if (UTRACE_LEVEL(UTRACE_VERBOSE)) {

	8438 UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, sour ce, target);

	8439 UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLengt h);

	8440 UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLengt h);

	8441 }

	8442

	8443 if(source == NULL \|\| target == NULL) {

	8444 // do not crash, but return. Should have

	8445 // status argument to return error.

	8446 UTRACE_EXIT_VALUE(UCOL_EQUAL);

	8447 return UCOL_EQUAL;

	8448 }

	8449

	8450 /* Quick check if source and target are same strings. */

	8451 /* They should either both be NULL terminated or the explicit length should be set on both. */

	8452 if (source==target && sourceLength==targetLength) {

	8453 UTRACE_EXIT_VALUE(UCOL_EQUAL);

	8454 return UCOL_EQUAL;

	8455 }

	8456

	8457 /* Scan the strings. Find: */

	8458 /* The length of any leading portion that is equal */

	8459 /* Whether they are exactly equal. (in which case we just return) */

	8460 const UChar *pSrc = source;

	8461 const UChar *pTarg = target;

	8462 int32_t equalLength;

	8463

	8464 if (sourceLength == -1 && targetLength == -1) {

	8465 // Both strings are null terminated.

	8466 // Scan through any leading equal portion.

	8467 while (pSrc == pTarg && *pSrc != 0) {

	8468 pSrc++;

	8469 pTarg++;

	8470 }

	8471 if (pSrc == 0 && pTarg == 0) {

	8472 UTRACE_EXIT_VALUE(UCOL_EQUAL);

	8473 return UCOL_EQUAL;

	8474 }

	8475 equalLength = (int32_t)(pSrc - source);

	8476 }

	8477 else

	8478 {

	8479 // One or both strings has an explicit length.

	8480 const UChar *pSrcEnd = source + sourceLength;

	8481 const UChar *pTargEnd = target + targetLength;

	8482

	8483 // Scan while the strings are bitwise ==, or until one is exhausted.

	8484 for (;;) {

	8485 if (pSrc == pSrcEnd \|\| pTarg == pTargEnd) {

	8486 break;

	8487 }

	8488 if ((pSrc == 0 && sourceLength == -1) \|\| (pTarg == 0 && targetLeng th == -1)) {

	8489 break;

	8490 }

	8491 if (pSrc != pTarg) {

	8492 break;

	8493 }

	8494 pSrc++;

	8495 pTarg++;

	8496 }

	8497 equalLength = (int32_t)(pSrc - source);

	8498

	8499 // If we made it all the way through both strings, we are done. They ar e ==

	8500 if ((pSrc ==pSrcEnd \|\| (pSrcEnd <pSrc && pSrc==0)) && / At end of src string, however it was specified. */

	8501 (pTarg==pTargEnd \|\| (pTargEnd<pTarg && pTarg==0))) / and also at end of dest string */

	8502 {

	8503 UTRACE_EXIT_VALUE(UCOL_EQUAL);

	8504 return UCOL_EQUAL;

	8505 }

	8506 }

	8507 if (equalLength > 0) {

	8508 /* There is an identical portion at the beginning of the two strings. */

	8509 /* If the identical portion ends within a contraction or a comibining */

	8510 /* character sequence, back up to the start of that sequence. */

	8511

	8512 // These values should already be set by the code above.

	8513 //pSrc = source + equalLength; /* point to the first differing c hars */

	8514 //pTarg = target + equalLength;

	8515 if ((pSrc != source+sourceLength && ucol_unsafeCP(*pSrc, coll)) \|\|

	8516 (pTarg != target+targetLength && ucol_unsafeCP(*pTarg, coll)))

	8517 {

	8518 // We are stopped in the middle of a contraction.

	8519 // Scan backwards through the == part of the string looking for the start of the contraction.

	8520 // It doesn't matter which string we scan, since they are the same in this region.

	8521 do

	8522 {

	8523 equalLength--;

	8524 pSrc--;

	8525 }

	8526 while (equalLength>0 && ucol_unsafeCP(*pSrc, coll));

	8527 }

	8528

	8529 source += equalLength;

	8530 target += equalLength;

	8531 if (sourceLength > 0) {

	8532 sourceLength -= equalLength;

	8533 }

	8534 if (targetLength > 0) {

	8535 targetLength -= equalLength;

	8536 }

	8537 }

	8538

	8539 UErrorCode status = U_ZERO_ERROR;

	8540 UCollationResult returnVal;

	8541 if(!coll->latinOneUse \|\| (sourceLength > 0 && source&0xff00) \|\| (targetLeng th > 0 && target&0xff00)) {

	8542 returnVal = ucol_strcollRegular(coll, source, sourceLength, target, targ etLength, &status);

	8543 } else {

	8544 returnVal = ucol_strcollUseLatin1(coll, source, sourceLength, target, ta rgetLength, &status);

	8545 }

	8546 UTRACE_EXIT_VALUE(returnVal);

	8547 return returnVal;

	8548 }

	8549

	8550 /* convenience function for comparing strings */

	8551 U_CAPI UBool U_EXPORT2

	8552 ucol_greater( const UCollator *coll,

	8553 const UChar *source,

	8554 int32_t sourceLength,

	8555 const UChar *target,

	8556 int32_t targetLength)

	8557 {

	8558 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)

	8559 == UCOL_GREATER);

	8560 }

	8561

	8562 /* convenience function for comparing strings */

	8563 U_CAPI UBool U_EXPORT2

	8564 ucol_greaterOrEqual( const UCollator *coll,

	8565 const UChar *source,

	8566 int32_t sourceLength,

	8567 const UChar *target,

	8568 int32_t targetLength)

	8569 {

	8570 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)

	8571 != UCOL_LESS);

	8572 }

	8573

	8574 /* convenience function for comparing strings */

	8575 U_CAPI UBool U_EXPORT2

	8576 ucol_equal( const UCollator *coll,

	8577 const UChar *source,

	8578 int32_t sourceLength,

	8579 const UChar *target,

	8580 int32_t targetLength)

	8581 {

	8582 return (ucol_strcoll(coll, source, sourceLength, target, targetLength)

	8583 == UCOL_EQUAL);

	8584 }

	8585

	8586 U_CAPI void U_EXPORT2

	8587 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {

	8588 if(coll && coll->UCA) {

	8589 uprv_memcpy(info, coll->UCA->image->UCAVersion, sizeof(UVersionInfo));

	8590 }

	8591 }

	8592

	8593 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « icu46/source/i18n/ucln_in.c ('k') | icu46/source/i18n/ucol_bld.h » ('j') | no next file with comments »