icu46/source/i18n/ucol_bld.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/ucol_bld.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 *******************************************************************************

	3 *

	4 * Copyright (C) 2001-2010, International Business Machines

	5 * Corporation and others. All Rights Reserved.

	6 *

	7 *******************************************************************************

	8 * file name: ucol_bld.cpp

	9 * encoding: US-ASCII

	10 * tab size: 8 (not used)

	11 * indentation:4

	12 *

	13 * created 02/22/2001

	14 * created by: Vladimir Weinstein

	15 *

	16 * This module builds a collator based on the rule set.

	17 *

	18 */

	19

	20 #include "unicode/utypes.h"

	21

	22 #if !UCONFIG_NO_COLLATION

	23

	24 #include "unicode/ucoleitr.h"

	25 #include "unicode/udata.h"

	26 #include "unicode/uchar.h"

	27 #include "unicode/uniset.h"

	28 #include "unicode/uscript.h"

	29 #include "unicode/ustring.h"

	30 #include "normalizer2impl.h"

	31 #include "ucol_bld.h"

	32 #include "ucol_elm.h"

	33 #include "ucol_cnt.h"

	34 #include "ucln_in.h"

	35 #include "umutex.h"

	36 #include "cmemory.h"

	37 #include "cstring.h"

	38

	39 U_NAMESPACE_BEGIN

	40

	41 static const InverseUCATableHeader* _staticInvUCA = NULL;

	42 static UDataMemory* invUCA_DATA_MEM = NULL;

	43

	44 U_CDECL_BEGIN

	45 static UBool U_CALLCONV

	46 isAcceptableInvUCA(void * /context/,

	47 const char * /type/, const char * /name/,

	48 const UDataInfo *pInfo)

	49 {

	50 /* context, type & name are intentionally not used */

	51 if( pInfo->size>=20 &&

	52 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&

	53 pInfo->charsetFamily==U_CHARSET_FAMILY &&

	54 pInfo->dataFormat[0]==INVUCA_DATA_FORMAT_0 && /* dataFormat="InvC" */

	55 pInfo->dataFormat[1]==INVUCA_DATA_FORMAT_1 &&

	56 pInfo->dataFormat[2]==INVUCA_DATA_FORMAT_2 &&

	57 pInfo->dataFormat[3]==INVUCA_DATA_FORMAT_3 &&

	58 pInfo->formatVersion[0]==INVUCA_FORMAT_VERSION_0 &&

	59 pInfo->formatVersion[1]>=INVUCA_FORMAT_VERSION_1 //&&

	60 //pInfo->formatVersion[1]==INVUCA_FORMAT_VERSION_1 &&

	61 //pInfo->formatVersion[2]==INVUCA_FORMAT_VERSION_2 &&

	62 //pInfo->formatVersion[3]==INVUCA_FORMAT_VERSION_3 &&

	63 )

	64 {

	65 UVersionInfo UCDVersion;

	66 u_getUnicodeVersion(UCDVersion);

	67 return (pInfo->dataVersion[0]==UCDVersion[0] &&

	68 pInfo->dataVersion[1]==UCDVersion[1]);

	69 //pInfo->dataVersion[1]==invUcaDataInfo.dataVersion[1] &&

	70 //pInfo->dataVersion[2]==invUcaDataInfo.dataVersion[2] &&

	71 //pInfo->dataVersion[3]==invUcaDataInfo.dataVersion[3]) {

	72 } else {

	73 return FALSE;

	74 }

	75 }

	76 U_CDECL_END

	77

	78 /*

	79 * Takes two CEs (lead and continuation) and

	80 * compares them as CEs should be compared:

	81 * primary vs. primary, secondary vs. secondary

	82 * tertiary vs. tertiary

	83 */

	84 static int32_t compareCEs(uint32_t source0, uint32_t source1, uint32_t target0, uint32_t target1) {

	85 uint32_t s1 = source0, s2, t1 = target0, t2;

	86 if(isContinuation(source1)) {

	87 s2 = source1;

	88 } else {

	89 s2 = 0;

	90 }

	91 if(isContinuation(target1)) {

	92 t2 = target1;

	93 } else {

	94 t2 = 0;

	95 }

	96

	97 uint32_t s = 0, t = 0;

	98 if(s1 == t1 && s2 == t2) {

	99 return 0;

	100 }

	101 s = (s1 & 0xFFFF0000)\|((s2 & 0xFFFF0000)>>16);

	102 t = (t1 & 0xFFFF0000)\|((t2 & 0xFFFF0000)>>16);

	103 if(s < t) {

	104 return -1;

	105 } else if(s > t) {

	106 return 1;

	107 } else {

	108 s = (s1 & 0x0000FF00) \| (s2 & 0x0000FF00)>>8;

	109 t = (t1 & 0x0000FF00) \| (t2 & 0x0000FF00)>>8;

	110 if(s < t) {

	111 return -1;

	112 } else if(s > t) {

	113 return 1;

	114 } else {

	115 s = (s1 & 0x000000FF)<<8 \| (s2 & 0x000000FF);

	116 t = (t1 & 0x000000FF)<<8 \| (t2 & 0x000000FF);

	117 if(s < t) {

	118 return -1;

	119 } else {

	120 return 1;

	121 }

	122 }

	123 }

	124 }

	125

	126 static

	127 int32_t ucol_inv_findCE(const UColTokenParser *src, uint32_t CE, uint32_t Second CE) {

	128 uint32_t bottom = 0, top = src->invUCA->tableSize;

	129 uint32_t i = 0;

	130 uint32_t first = 0, second = 0;

	131 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	132 int32_t res = 0;

	133

	134 while(bottom < top-1) {

	135 i = (top+bottom)/2;

	136 first = (CETable+3i);

	137 second = (CETable+3i+1);

	138 res = compareCEs(first, second, CE, SecondCE);

	139 if(res > 0) {

	140 top = i;

	141 } else if(res < 0) {

	142 bottom = i;

	143 } else {

	144 break;

	145 }

	146 }

	147

	148 /* weiv: */

	149 /* in searching for elements, I have removed the failure */

	150 /* The reason for this is that the builder does not rely */

	151 /* on search mechanism telling it that it didn't find an */

	152 /* element. However, indirect positioning relies on being */

	153 /* able to find the elements around any CE, even if it is */

	154 /* not defined in the UCA. */

	155 return i;

	156 /*

	157 if((first == CE && second == SecondCE)) {

	158 return i;

	159 } else {

	160 return -1;

	161 }

	162 */

	163 }

	164

	165 static const uint32_t strengthMask[UCOL_CE_STRENGTH_LIMIT] = {

	166 0xFFFF0000,

	167 0xFFFFFF00,

	168 0xFFFFFFFF

	169 };

	170

	171 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,

	172 uint32_t CE, uint32_t contCE,

	173 uint32_t nextCE, uint32_t nextCont CE,

	174 uint32_t strength)

	175 {

	176 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	177 int32_t iCE;

	178

	179 iCE = ucol_inv_findCE(src, CE, contCE);

	180

	181 if(iCE<0) {

	182 *nextCE = UCOL_NOT_FOUND;

	183 return -1;

	184 }

	185

	186 CE &= strengthMask[strength];

	187 contCE &= strengthMask[strength];

	188

	189 *nextCE = CE;

	190 *nextContCE = contCE;

	191

	192 while((*nextCE & strengthMask[strength]) == CE

	193 && (*nextContCE & strengthMask[strength]) == contCE)

	194 {

	195 nextCE = ((CETable+3*(++iCE)));

	196 nextContCE = ((CETable+3*(iCE)+1));

	197 }

	198

	199 return iCE;

	200 }

	201

	202 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,

	203 uint32_t CE, uint32_t contCE,

	204 uint32_t prevCE, uint32_t prevCont CE,

	205 uint32_t strength)

	206 {

	207 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	208 int32_t iCE;

	209

	210 iCE = ucol_inv_findCE(src, CE, contCE);

	211

	212 if(iCE<0) {

	213 *prevCE = UCOL_NOT_FOUND;

	214 return -1;

	215 }

	216

	217 CE &= strengthMask[strength];

	218 contCE &= strengthMask[strength];

	219

	220 *prevCE = CE;

	221 *prevContCE = contCE;

	222

	223 while((*prevCE & strengthMask[strength]) == CE

	224 && (*prevContCE & strengthMask[strength])== contCE

	225 && iCE > 0) /* this condition should prevent falling off the edge of the world */

	226 {

	227 /* here, we end up in a singularity - zero */

	228 prevCE = ((CETable+3*(--iCE)));

	229 prevContCE = ((CETable+3*(iCE)+1));

	230 }

	231

	232 return iCE;

	233 }

	234

	235 U_CFUNC uint32_t U_EXPORT2 ucol_getCEStrengthDifference(uint32_t CE, uint32_t co ntCE,

	236 uint32_t prevCE, uint32_t prevContCE)

	237 {

	238 if(prevCE == CE && prevContCE == contCE) {

	239 return UCOL_IDENTICAL;

	240 }

	241 if((prevCE & strengthMask[UCOL_PRIMARY]) != (CE & strengthMask[UCOL_PRIMARY] )

	242 \|\| (prevContCE & strengthMask[UCOL_PRIMARY]) != (contCE & strengthMask[U COL_PRIMARY]))

	243 {

	244 return UCOL_PRIMARY;

	245 }

	246 if((prevCE & strengthMask[UCOL_SECONDARY]) != (CE & strengthMask[UCOL_SECOND ARY])

	247 \|\| (prevContCE & strengthMask[UCOL_SECONDARY]) != (contCE & strengthMask [UCOL_SECONDARY]))

	248 {

	249 return UCOL_SECONDARY;

	250 }

	251 return UCOL_TERTIARY;

	252 }

	253

	254

	255 /*static

	256 inline int32_t ucol_inv_getPrevious(UColTokenParser src, UColTokListHeader lh, uint32_t strength) {

	257

	258 uint32_t CE = lh->baseCE;

	259 uint32_t SecondCE = lh->baseContCE;

	260

	261 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	262 uint32_t previousCE, previousContCE;

	263 int32_t iCE;

	264

	265 iCE = ucol_inv_findCE(src, CE, SecondCE);

	266

	267 if(iCE<0) {

	268 return -1;

	269 }

	270

	271 CE &= strengthMask[strength];

	272 SecondCE &= strengthMask[strength];

	273

	274 previousCE = CE;

	275 previousContCE = SecondCE;

	276

	277 while((previousCE & strengthMask[strength]) == CE && (previousContCE & str engthMask[strength])== SecondCE) {

	278 previousCE = ((CETable+3(--iCE)));

	279 previousContCE = ((CETable+3(iCE)+1));

	280 }

	281 lh->previousCE = previousCE;

	282 lh->previousContCE = previousContCE;

	283

	284 return iCE;

	285 }*/

	286

	287 static

	288 inline int32_t ucol_inv_getNext(UColTokenParser src, UColTokListHeader lh, uin t32_t strength) {

	289 uint32_t CE = lh->baseCE;

	290 uint32_t SecondCE = lh->baseContCE;

	291

	292 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	293 uint32_t nextCE, nextContCE;

	294 int32_t iCE;

	295

	296 iCE = ucol_inv_findCE(src, CE, SecondCE);

	297

	298 if(iCE<0) {

	299 return -1;

	300 }

	301

	302 CE &= strengthMask[strength];

	303 SecondCE &= strengthMask[strength];

	304

	305 nextCE = CE;

	306 nextContCE = SecondCE;

	307

	308 while((nextCE & strengthMask[strength]) == CE

	309 && (nextContCE & strengthMask[strength]) == SecondCE)

	310 {

	311 nextCE = ((CETable+3(++iCE)));

	312 nextContCE = ((CETable+3(iCE)+1));

	313 }

	314

	315 lh->nextCE = nextCE;

	316 lh->nextContCE = nextContCE;

	317

	318 return iCE;

	319 }

	320

	321 static void ucol_inv_getGapPositions(UColTokenParser src, UColTokListHeader lh , UErrorCode *status) {

	322 /* reset all the gaps */

	323 int32_t i = 0;

	324 uint32_t CETable = (uint32_t )((uint8_t *)src->invUCA+src->invUCA->table);

	325 uint32_t st = 0;

	326 uint32_t t1, t2;

	327 int32_t pos;

	328

	329 UColToken *tok = lh->first;

	330 uint32_t tokStrength = tok->strength;

	331

	332 for(i = 0; i<3; i++) {

	333 lh->gapsHi[3*i] = 0;

	334 lh->gapsHi[3*i+1] = 0;

	335 lh->gapsHi[3*i+2] = 0;

	336 lh->gapsLo[3*i] = 0;

	337 lh->gapsLo[3*i+1] = 0;

	338 lh->gapsLo[3*i+2] = 0;

	339 lh->numStr[i] = 0;

	340 lh->fStrToken[i] = NULL;

	341 lh->lStrToken[i] = NULL;

	342 lh->pos[i] = -1;

	343 }

	344

	345 UCAConstants consts = (UCAConstants )((uint8_t *)src->UCA->image + src->UC A->image->UCAConsts);

	346

	347 if((lh->baseCE & 0xFF000000)>= (consts->UCA_PRIMARY_IMPLICIT_MIN<<24) && (lh ->baseCE & 0xFF000000) <= (consts->UCA_PRIMARY_IMPLICIT_MAX<<24) ) { /* implicit s - */

	348 //if(lh->baseCE >= PRIMARY_IMPLICIT_MIN && lh->baseCE < PRIMARY_IMPLICIT _MAX ) { /* implicits - */

	349 lh->pos[0] = 0;

	350 t1 = lh->baseCE;

	351 t2 = lh->baseContCE & UCOL_REMOVE_CONTINUATION;

	352 lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYMASK) >> 16;

	353 lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCOL_SECONDARYMA SK) << 8;

	354 lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 \| (UCOL_TERTIARYORDER(t2) ) << 16;

	355 uint32_t primaryCE = (t1 & UCOL_PRIMARYMASK) \| ((t2 & UCOL_PRIMARYMASK) >> 16);

	356 primaryCE = uprv_uca_getImplicitFromRaw(uprv_uca_getRawFromImplicit(prim aryCE)+1);

	357

	358 t1 = (primaryCE & UCOL_PRIMARYMASK) \| 0x0505;

	359 t2 = (primaryCE << 16) & UCOL_PRIMARYMASK; // \| UCOL_CONTINUATION_MARKER ;

	360

	361 lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYMASK) >> 16;

	362 lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCOL_SECONDARYMA SK) << 8;

	363 lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 \| (UCOL_TERTIARYORDER(t2) ) << 16;

	364 } else if(lh->indirect == TRUE && lh->nextCE != 0) {

	365 //} else if(lh->baseCE == UCOL_RESET_TOP_VALUE && lh->baseContCE == 0) {

	366 lh->pos[0] = 0;

	367 t1 = lh->baseCE;

	368 t2 = lh->baseContCE&UCOL_REMOVE_CONTINUATION;

	369 lh->gapsLo[0] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYMASK) >> 16;

	370 lh->gapsLo[1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCOL_SECONDARYMA SK) << 8;

	371 lh->gapsLo[2] = (UCOL_TERTIARYORDER(t1)) << 24 \| (UCOL_TERTIARYORDER(t2) ) << 16;

	372 t1 = lh->nextCE;

	373 t2 = lh->nextContCE&UCOL_REMOVE_CONTINUATION;

	374 lh->gapsHi[0] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYMASK) >> 16;

	375 lh->gapsHi[1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCOL_SECONDARYMA SK) << 8;

	376 lh->gapsHi[2] = (UCOL_TERTIARYORDER(t1)) << 24 \| (UCOL_TERTIARYORDER(t2) ) << 16;

	377 } else {

	378 for(;;) {

	379 if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {

	380 if((lh->pos[tokStrength] = ucol_inv_getNext(src, lh, tokStrength )) >= 0) {

	381 lh->fStrToken[tokStrength] = tok;

	382 } else { /* The CE must be implicit, since it's not in the table */

	383 /* Error */

	384 *status = U_INTERNAL_PROGRAM_ERROR;

	385 }

	386 }

	387

	388 while(tok != NULL && tok->strength >= tokStrength) {

	389 if(tokStrength < UCOL_CE_STRENGTH_LIMIT) {

	390 lh->lStrToken[tokStrength] = tok;

	391 }

	392 tok = tok->next;

	393 }

	394 if(tokStrength < UCOL_CE_STRENGTH_LIMIT-1) {

	395 /* check if previous interval is the same and merge the interval s if it is so */

	396 if(lh->pos[tokStrength] == lh->pos[tokStrength+1]) {

	397 lh->fStrToken[tokStrength] = lh->fStrToken[tokStrength+1];

	398 lh->fStrToken[tokStrength+1] = NULL;

	399 lh->lStrToken[tokStrength+1] = NULL;

	400 lh->pos[tokStrength+1] = -1;

	401 }

	402 }

	403 if(tok != NULL) {

	404 tokStrength = tok->strength;

	405 } else {

	406 break;

	407 }

	408 }

	409 for(st = 0; st < 3; st++) {

	410 if((pos = lh->pos[st]) >= 0) {

	411 t1 = (CETable+3(pos));

	412 t2 = (CETable+3(pos)+1);

	413 lh->gapsHi[3*st] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYM ASK) >> 16;

	414 lh->gapsHi[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCO L_SECONDARYMASK) << 8;

	415 //lh->gapsHi[3*st+2] = (UCOL_TERTIARYORDER(t1)) << 24 \| (UCOL_TE RTIARYORDER(t2)) << 16;

	416 lh->gapsHi[3*st+2] = (t1&0x3f) << 24 \| (t2&0x3f) << 16;

	417 //pos--;

	418 //t1 = (CETable+3(pos));

	419 //t2 = (CETable+3(pos)+1);

	420 t1 = lh->baseCE;

	421 t2 = lh->baseContCE;

	422 lh->gapsLo[3*st] = (t1 & UCOL_PRIMARYMASK) \| (t2 & UCOL_PRIMARYM ASK) >> 16;

	423 lh->gapsLo[3*st+1] = (t1 & UCOL_SECONDARYMASK) << 16 \| (t2 & UCO L_SECONDARYMASK) << 8;

	424 lh->gapsLo[3*st+2] = (t1&0x3f) << 24 \| (t2&0x3f) << 16;

	425 }

	426 }

	427 }

	428 }

	429

	430

	431 #define ucol_countBytes(value, noOfBytes) \

	432 { \

	433 uint32_t mask = 0xFFFFFFFF; \

	434 (noOfBytes) = 0; \

	435 while(mask != 0) { \

	436 if(((value) & mask) != 0) { \

	437 (noOfBytes)++; \

	438 } \

	439 mask >>= 8; \

	440 } \

	441 }

	442

	443 static uint32_t ucol_getNextGenerated(ucolCEGenerator g, UErrorCode status) {

	444 if(U_SUCCESS(*status)) {

	445 g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);

	446 }

	447 return g->current;

	448 }

	449

	450 static uint32_t ucol_getSimpleCEGenerator(ucolCEGenerator g, UColToken tok, ui nt32_t strength, UErrorCode *status) {

	451 /* TODO: rename to enum names */

	452 uint32_t high, low, count=1;

	453 uint32_t maxByte = (strength == UCOL_TERTIARY)?0x3F:0xFF;

	454

	455 if(strength == UCOL_SECONDARY) {

	456 low = UCOL_COMMON_TOP2<<24;

	457 high = 0xFFFFFFFF;

	458 count = 0xFF - UCOL_COMMON_TOP2;

	459 } else {

	460 low = UCOL_BYTE_COMMON << 24; //0x05000000;

	461 high = 0x40000000;

	462 count = 0x40 - UCOL_BYTE_COMMON;

	463 }

	464

	465 if(tok->next != NULL && tok->next->strength == strength) {

	466 count = tok->next->toInsert;

	467 }

	468

	469 g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);

	470 g->current = UCOL_BYTE_COMMON<<24;

	471

	472 if(g->noOfRanges == 0) {

	473 *status = U_INTERNAL_PROGRAM_ERROR;

	474 }

	475 return g->current;

	476 }

	477

	478 static uint32_t ucol_getCEGenerator(ucolCEGenerator g, uint32_t lows, uint32_t * highs, UColToken tok, uint32_t fStrength, UErrorCode status) {

	479 uint32_t strength = tok->strength;

	480 uint32_t low = lows[fStrength*3+strength];

	481 uint32_t high = highs[fStrength*3+strength];

	482 uint32_t maxByte = 0;

	483 if(strength == UCOL_TERTIARY) {

	484 maxByte = 0x3F;

	485 } else if(strength == UCOL_PRIMARY) {

	486 maxByte = 0xFE;

	487 } else {

	488 maxByte = 0xFF;

	489 }

	490

	491 uint32_t count = tok->toInsert;

	492

	493 if(low >= high && strength > UCOL_PRIMARY) {

	494 int32_t s = strength;

	495 for(;;) {

	496 s--;

	497 if(lows[fStrength3+s] != highs[fStrength3+s]) {

	498 if(strength == UCOL_SECONDARY) {

	499 if (low < UCOL_COMMON_TOP2<<24 ) {

	500 // Override if low range is less than UCOL_COMMON_TOP2.

	501 low = UCOL_COMMON_TOP2<<24;

	502 }

	503 high = 0xFFFFFFFF;

	504 } else {

	505 // Override if low range is less than UCOL_COMMON_BOT3.

	506 if ( low < UCOL_COMMON_BOT3<<24 ) {

	507 low = UCOL_COMMON_BOT3<<24;

	508 }

	509 high = 0x40000000;

	510 }

	511 break;

	512 }

	513 if(s<0) {

	514 *status = U_INTERNAL_PROGRAM_ERROR;

	515 return 0;

	516 }

	517 }

	518 }

	519

	520 if(low < 0x02000000) {

	521 // We must not use CE weight byte 02, so we set it as the minimum lower bound.

	522 // See http://site.icu-project.org/design/collation/bytes

	523 low = 0x02000000;

	524 }

	525

	526 if(strength == UCOL_SECONDARY) { /* similar as simple */

	527 if(low >= (UCOL_COMMON_BOT2<<24) && low < (uint32_t)(UCOL_COMMON_TOP2<<2 4)) {

	528 low = UCOL_COMMON_TOP2<<24;

	529 }

	530 if(high > (UCOL_COMMON_BOT2<<24) && high < (uint32_t)(UCOL_COMMON_TOP2<< 24)) {

	531 high = UCOL_COMMON_TOP2<<24;

	532 }

	533 if(low < (UCOL_COMMON_BOT2<<24)) {

	534 g->noOfRanges = ucol_allocWeights(UCOL_BYTE_UNSHIFTED_MIN<<24, high, count, maxByte, g->ranges);

	535 g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);

	536 //g->current = UCOL_COMMON_BOT2<<24;

	537 return g->current;

	538 }

	539 }

	540

	541 g->noOfRanges = ucol_allocWeights(low, high, count, maxByte, g->ranges);

	542 if(g->noOfRanges == 0) {

	543 *status = U_INTERNAL_PROGRAM_ERROR;

	544 }

	545 g->current = ucol_nextWeight(g->ranges, &g->noOfRanges);

	546 return g->current;

	547 }

	548

	549 static

	550 uint32_t u_toLargeKana(const UChar source, const uint32_t sourceLen, UChar res Buf, const uint32_t resLen, UErrorCode *status) {

	551 uint32_t i = 0;

	552 UChar c;

	553

	554 if(U_FAILURE(*status)) {

	555 return 0;

	556 }

	557

	558 if(sourceLen > resLen) {

	559 *status = U_MEMORY_ALLOCATION_ERROR;

	560 return 0;

	561 }

	562

	563 for(i = 0; i < sourceLen; i++) {

	564 c = source[i];

	565 if(0x3041 <= c && c <= 0x30FA) { /* Kana range */

	566 switch(c - 0x3000) {

	567 case 0x41: case 0x43: case 0x45: case 0x47: case 0x49: case 0x63: ca se 0x83: case 0x85: case 0x8E:

	568 case 0xA1: case 0xA3: case 0xA5: case 0xA7: case 0xA9: case 0xC3: ca se 0xE3: case 0xE5: case 0xEE:

	569 c++;

	570 break;

	571 case 0xF5:

	572 c = 0x30AB;

	573 break;

	574 case 0xF6:

	575 c = 0x30B1;

	576 break;

	577 }

	578 }

	579 resBuf[i] = c;

	580 }

	581 return sourceLen;

	582 }

	583

	584 static

	585 uint32_t u_toSmallKana(const UChar source, const uint32_t sourceLen, UChar res Buf, const uint32_t resLen, UErrorCode *status) {

	586 uint32_t i = 0;

	587 UChar c;

	588

	589 if(U_FAILURE(*status)) {

	590 return 0;

	591 }

	592

	593 if(sourceLen > resLen) {

	594 *status = U_MEMORY_ALLOCATION_ERROR;

	595 return 0;

	596 }

	597

	598 for(i = 0; i < sourceLen; i++) {

	599 c = source[i];

	600 if(0x3041 <= c && c <= 0x30FA) { /* Kana range */

	601 switch(c - 0x3000) {

	602 case 0x42: case 0x44: case 0x46: case 0x48: case 0x4A: case 0x64: ca se 0x84: case 0x86: case 0x8F:

	603 case 0xA2: case 0xA4: case 0xA6: case 0xA8: case 0xAA: case 0xC4: ca se 0xE4: case 0xE6: case 0xEF:

	604 c--;

	605 break;

	606 case 0xAB:

	607 c = 0x30F5;

	608 break;

	609 case 0xB1:

	610 c = 0x30F6;

	611 break;

	612 }

	613 }

	614 resBuf[i] = c;

	615 }

	616 return sourceLen;

	617 }

	618

	619 static

	620 uint8_t ucol_uprv_getCaseBits(const UCollator UCA, const UChar src, uint32_t l en, UErrorCode *status) {

	621 uint32_t i = 0;

	622 UChar n[128];

	623 uint32_t nLen = 0;

	624 uint32_t uCount = 0, lCount = 0;

	625

	626 collIterate s;

	627 uint32_t order = 0;

	628

	629 if(U_FAILURE(*status)) {

	630 return UCOL_LOWER_CASE;

	631 }

	632

	633 nLen = unorm_normalize(src, len, UNORM_NFKD, 0, n, 128, status);

	634 if(U_SUCCESS(*status)) {

	635 for(i = 0; i < nLen; i++) {

	636 uprv_init_collIterate(UCA, &n[i], 1, &s, status);

	637 order = ucol_getNextCE(UCA, &s, status);

	638 if(isContinuation(order)) {

	639 *status = U_INTERNAL_PROGRAM_ERROR;

	640 return UCOL_LOWER_CASE;

	641 }

	642 if((order&UCOL_CASE_BIT_MASK)== UCOL_UPPER_CASE) {

	643 uCount++;

	644 } else {

	645 if(u_islower(n[i])) {

	646 lCount++;

	647 } else if(U_SUCCESS(*status)) {

	648 UChar sk[1], lk[1];

	649 u_toSmallKana(&n[i], 1, sk, 1, status);

	650 u_toLargeKana(&n[i], 1, lk, 1, status);

	651 if(sk[0] == n[i] && lk[0] != n[i]) {

	652 lCount++;

	653 }

	654 }

	655 }

	656 }

	657 }

	658

	659 if(uCount != 0 && lCount != 0) {

	660 return UCOL_MIXED_CASE;

	661 } else if(uCount != 0) {

	662 return UCOL_UPPER_CASE;

	663 } else {

	664 return UCOL_LOWER_CASE;

	665 }

	666 }

	667

	668

	669 U_CFUNC void ucol_doCE(UColTokenParser src, uint32_t CEparts, UColToken tok, UErrorCode status) {

	670 /* this one makes the table and stuff */

	671 uint32_t noOfBytes[3];

	672 uint32_t i;

	673

	674 for(i = 0; i<3; i++) {

	675 ucol_countBytes(CEparts[i], noOfBytes[i]);

	676 }

	677

	678 /* Here we have to pack CEs from parts */

	679

	680 uint32_t CEi = 0;

	681 uint32_t value = 0;

	682

	683 while(2*CEi<noOfBytes[0] \|\| CEi<noOfBytes[1] \|\| CEi<noOfBytes[2]) {

	684 if(CEi > 0) {

	685 value = UCOL_CONTINUATION_MARKER; /* Continuation marker */

	686 } else {

	687 value = 0;

	688 }

	689

	690 if(2*CEi<noOfBytes[0]) {

	691 value \|= ((CEparts[0]>>(32-16*(CEi+1))) & 0xFFFF) << 16;

	692 }

	693 if(CEi<noOfBytes[1]) {

	694 value \|= ((CEparts[1]>>(32-8*(CEi+1))) & 0xFF) << 8;

	695 }

	696 if(CEi<noOfBytes[2]) {

	697 value \|= ((CEparts[2]>>(32-8*(CEi+1))) & 0x3F);

	698 }

	699 tok->CEs[CEi] = value;

	700 CEi++;

	701 }

	702 if(CEi == 0) { /* totally ignorable */

	703 tok->noOfCEs = 1;

	704 tok->CEs[0] = 0;

	705 } else { /* there is at least something */

	706 tok->noOfCEs = CEi;

	707 }

	708

	709

	710 // we want to set case bits here and now, not later.

	711 // Case bits handling

	712 if(tok->CEs[0] != 0) { // case bits should be set only for non-ignorables

	713 tok->CEs[0] &= 0xFFFFFF3F; // Clean the case bits field

	714 int32_t cSize = (tok->source & 0xFF000000) >> 24;

	715 UChar *cPoints = (tok->source & 0x00FFFFFF) + src->source;

	716

	717 if(cSize > 1) {

	718 // Do it manually

	719 tok->CEs[0] \|= ucol_uprv_getCaseBits(src->UCA, cPoints, cSize, statu s);

	720 } else {

	721 // Copy it from the UCA

	722 uint32_t caseCE = ucol_getFirstCE(src->UCA, cPoints[0], status);

	723 tok->CEs[0] \|= (caseCE & 0xC0);

	724 }

	725 }

	726

	727 #if UCOL_DEBUG==2

	728 fprintf(stderr, "%04X str: %i, [%08X, %08X, %08X]: tok: ", tok->debugSource, tok->strength, CEparts[0] >> (32-8noOfBytes[0]), CEparts[1] >> (32-8noOfBytes [1]), CEparts[2]>> (32-8*noOfBytes[2]));

	729 for(i = 0; i<tok->noOfCEs; i++) {

	730 fprintf(stderr, "%08X ", tok->CEs[i]);

	731 }

	732 fprintf(stderr, "\n");

	733 #endif

	734 }

	735

	736 U_CFUNC void ucol_initBuffers(UColTokenParser src, UColTokListHeader lh, UErro rCode *status) {

	737 ucolCEGenerator Gens[UCOL_CE_STRENGTH_LIMIT];

	738 uint32_t CEparts[UCOL_CE_STRENGTH_LIMIT];

	739

	740 UColToken *tok = lh->last;

	741 uint32_t t[UCOL_STRENGTH_LIMIT];

	742

	743 uprv_memset(t, 0, UCOL_STRENGTH_LIMIT*sizeof(uint32_t));

	744

	745 tok->toInsert = 1;

	746 t[tok->strength] = 1;

	747

	748 while(tok->previous != NULL) {

	749 if(tok->previous->strength < tok->strength) { /* going up */

	750 t[tok->strength] = 0;

	751 t[tok->previous->strength]++;

	752 } else if(tok->previous->strength > tok->strength) { /* going down */

	753 t[tok->previous->strength] = 1;

	754 } else {

	755 t[tok->strength]++;

	756 }

	757 tok=tok->previous;

	758 tok->toInsert = t[tok->strength];

	759 }

	760

	761 tok->toInsert = t[tok->strength];

	762 ucol_inv_getGapPositions(src, lh, status);

	763

	764 #if UCOL_DEBUG

	765 fprintf(stderr, "BaseCE: %08X %08X\n", lh->baseCE, lh->baseContCE);

	766 int32_t j = 2;

	767 for(j = 2; j >= 0; j--) {

	768 fprintf(stderr, "gapsLo[%i] [%08X %08X %08X]\n", j, lh->gapsLo[j3], lh- >gapsLo[j3+1], lh->gapsLo[j*3+2]);

	769 fprintf(stderr, "gapsHi[%i] [%08X %08X %08X]\n", j, lh->gapsHi[j3], lh- >gapsHi[j3+1], lh->gapsHi[j*3+2]);

	770 }

	771 tok=&lh->first[UCOL_TOK_POLARITY_POSITIVE];

	772

	773 do {

	774 fprintf(stderr,"%i", tok->strength);

	775 tok = tok->next;

	776 } while(tok != NULL);

	777 fprintf(stderr, "\n");

	778

	779 tok=&lh->first[UCOL_TOK_POLARITY_POSITIVE];

	780

	781 do {

	782 fprintf(stderr,"%i", tok->toInsert);

	783 tok = tok->next;

	784 } while(tok != NULL);

	785 #endif

	786

	787 tok = lh->first;

	788 uint32_t fStrength = UCOL_IDENTICAL;

	789 uint32_t initStrength = UCOL_IDENTICAL;

	790

	791

	792 CEparts[UCOL_PRIMARY] = (lh->baseCE & UCOL_PRIMARYMASK) \| (lh->baseContCE & UCOL_PRIMARYMASK) >> 16;

	793 CEparts[UCOL_SECONDARY] = (lh->baseCE & UCOL_SECONDARYMASK) << 16 \| (lh->bas eContCE & UCOL_SECONDARYMASK) << 8;

	794 CEparts[UCOL_TERTIARY] = (UCOL_TERTIARYORDER(lh->baseCE)) << 24 \| (UCOL_TERT IARYORDER(lh->baseContCE)) << 16;

	795

	796 while (tok != NULL && U_SUCCESS(*status)) {

	797 fStrength = tok->strength;

	798 if(fStrength < initStrength) {

	799 initStrength = fStrength;

	800 if(lh->pos[fStrength] == -1) {

	801 while(lh->pos[fStrength] == -1 && fStrength > 0) {

	802 fStrength--;

	803 }

	804 if(lh->pos[fStrength] == -1) {

	805 *status = U_INTERNAL_PROGRAM_ERROR;

	806 return;

	807 }

	808 }

	809 if(initStrength == UCOL_TERTIARY) { /* starting with tertiary */

	810 CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];

	811 CEparts[UCOL_SECONDARY] = lh->gapsLo[fStrength*3+1];

	812 /CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[2], lh->gap sLo[fStrength3+2], lh->gapsHi[fStrength3+2], tok, UCOL_TERTIARY); /

	813 CEparts[UCOL_TERTIARY] = ucol_getCEGenerator(&Gens[UCOL_TERTIARY ], lh->gapsLo, lh->gapsHi, tok, fStrength, status);

	814 } else if(initStrength == UCOL_SECONDARY) { /* secondaries */

	815 CEparts[UCOL_PRIMARY] = lh->gapsLo[fStrength*3];

	816 /CEparts[1] = ucol_getCEGenerator(&Gens[1], lh->gapsLo[fStrengt h3+1], lh->gapsHi[fStrength3+1], tok, 1);/

	817 CEparts[UCOL_SECONDARY] = ucol_getCEGenerator(&Gens[UCOL_SECONDA RY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);

	818 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TE RTIARY], tok, UCOL_TERTIARY, status);

	819 } else { /* primaries */

	820 /CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[0], lh->gaps Lo[0], lh->gapsHi[0], tok, UCOL_PRIMARY);/

	821 CEparts[UCOL_PRIMARY] = ucol_getCEGenerator(&Gens[UCOL_PRIMARY], lh->gapsLo, lh->gapsHi, tok, fStrength, status);

	822 CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_S ECONDARY], tok, UCOL_SECONDARY, status);

	823 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TE RTIARY], tok, UCOL_TERTIARY, status);

	824 }

	825 } else {

	826 if(tok->strength == UCOL_TERTIARY) {

	827 CEparts[UCOL_TERTIARY] = ucol_getNextGenerated(&Gens[UCOL_TERTIA RY], status);

	828 } else if(tok->strength == UCOL_SECONDARY) {

	829 CEparts[UCOL_SECONDARY] = ucol_getNextGenerated(&Gens[UCOL_SECON DARY], status);

	830 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TE RTIARY], tok, UCOL_TERTIARY, status);

	831 } else if(tok->strength == UCOL_PRIMARY) {

	832 CEparts[UCOL_PRIMARY] = ucol_getNextGenerated(&Gens[UCOL_PRIMARY ], status);

	833 CEparts[UCOL_SECONDARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_S ECONDARY], tok, UCOL_SECONDARY, status);

	834 CEparts[UCOL_TERTIARY] = ucol_getSimpleCEGenerator(&Gens[UCOL_TE RTIARY], tok, UCOL_TERTIARY, status);

	835 }

	836 }

	837 ucol_doCE(src, CEparts, tok, status);

	838 tok = tok->next;

	839 }

	840 }

	841

	842 U_CFUNC void ucol_createElements(UColTokenParser src, tempUCATable t, UColTokL istHeader lh, UErrorCode status) {

	843 UCAElements el;

	844 UColToken *tok = lh->first;

	845 UColToken *expt = NULL;

	846 uint32_t i = 0, j = 0;

	847 UChar32 fcdHighStart;

	848 const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status);

	849

	850 while(tok != NULL && U_SUCCESS(*status)) {

	851 /* first, check if there are any expansions */

	852 /* if there are expansions, we need to do a little bit more processing * /

	853 /* since parts of expansion can be tailored, while others are not */

	854 if(tok->expansion != 0) {

	855 uint32_t len = tok->expansion >> 24;

	856 uint32_t currentSequenceLen = len;

	857 uint32_t expOffset = tok->expansion & 0x00FFFFFF;

	858 //uint32_t exp = currentSequenceLen \| expOffset;

	859 UColToken exp;

	860 exp.source = currentSequenceLen \| expOffset;

	861 exp.rulesToParseHdl = &(src->source);

	862

	863 while(len > 0) {

	864 currentSequenceLen = len;

	865 while(currentSequenceLen > 0) {

	866 exp.source = (currentSequenceLen << 24) \| expOffset;

	867 if((expt = (UColToken )uhash_get(src->tailored, &exp)) != N ULL && expt->strength != UCOL_TOK_RESET) { / expansion is tailored */

	868 uint32_t noOfCEsToCopy = expt->noOfCEs;

	869 for(j = 0; j<noOfCEsToCopy; j++) {

	870 tok->expCEs[tok->noOfExpCEs + j] = expt->CEs[j];

	871 }

	872 tok->noOfExpCEs += noOfCEsToCopy;

	873 // Smart people never try to add codepoints and CEs.

	874 // For some odd reason, it won't work.

	875 expOffset += currentSequenceLen; //noOfCEsToCopy;

	876 len -= currentSequenceLen; //noOfCEsToCopy;

	877 break;

	878 } else {

	879 currentSequenceLen--;

	880 }

	881 }

	882 if(currentSequenceLen == 0) { /* couldn't find any tailored subs equence */

	883 /* will have to get one from UCA */

	884 /* first, get the UChars from the rules */

	885 /* then pick CEs out until there is no more and stuff them i nto expansion */

	886 collIterate s;

	887 uint32_t order = 0;

	888 uprv_init_collIterate(src->UCA, expOffset + src->source, 1, &s, status);

	889

	890 for(;;) {

	891 order = ucol_getNextCE(src->UCA, &s, status);

	892 if(order == UCOL_NO_MORE_CES) {

	893 break;

	894 }

	895 tok->expCEs[tok->noOfExpCEs++] = order;

	896 }

	897 expOffset++;

	898 len--;

	899 }

	900 }

	901 } else {

	902 tok->noOfExpCEs = 0;

	903 }

	904

	905 /* set the ucaelement with obtained values */

	906 el.noOfCEs = tok->noOfCEs + tok->noOfExpCEs;

	907 /* copy CEs */

	908 for(i = 0; i<tok->noOfCEs; i++) {

	909 el.CEs[i] = tok->CEs[i];

	910 }

	911 for(i = 0; i<tok->noOfExpCEs; i++) {

	912 el.CEs[i+tok->noOfCEs] = tok->expCEs[i];

	913 }

	914

	915 /* copy UChars */

	916 // We kept prefix and source kind of together, as it is a kind of a cont raction.

	917 // However, now we have to slice the prefix off the main thing -

	918 el.prefix = el.prefixChars;

	919 el.cPoints = el.uchars;

	920 if(tok->prefix != 0) { // we will just copy the prefix here, and adjust accordingly in the

	921 // addPrefix function in ucol_elm. The reason is that we need to add both composed AND

	922 // decomposed elements to the unsaf table.

	923 el.prefixSize = tok->prefix>>24;

	924 uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el. prefixSize*sizeof(UChar));

	925

	926 el.cSize = (tok->source >> 24)-(tok->prefix>>24);

	927 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar));

	928 } else {

	929 el.prefixSize = 0;

	930 *el.prefix = 0;

	931

	932 el.cSize = (tok->source >> 24);

	933 uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el. cSize*sizeof(UChar));

	934 }

	935 if(src->UCA != NULL) {

	936 for(i = 0; i<el.cSize; i++) {

	937 if(UCOL_ISJAMO(el.cPoints[i])) {

	938 t->image->jamoSpecial = TRUE;

	939 }

	940 }

	941 if (!src->buildCCTabFlag && el.cSize > 0) {

	942 // Check the trailing canonical combining class (tccc) of the la st character.

	943 const UChar *s = el.cPoints + el.cSize;

	944 uint16_t fcd = unorm_prevFCD16(fcdTrieIndex, fcdHighStart, el.cP oints, s);

	945 if ((fcd & 0xff) != 0) {

	946 src->buildCCTabFlag = TRUE;

	947 }

	948 }

	949 }

	950

	951 /* and then, add it */

	952 #if UCOL_DEBUG==2

	953 fprintf(stderr, "Adding: %04X with %08X\n", el.cPoints[0], el.CEs[0]);

	954 #endif

	955 uprv_uca_addAnElement(t, &el, status);

	956

	957 #if UCOL_DEBUG_DUPLICATES

	958 if(*status != U_ZERO_ERROR) {

	959 fprintf(stderr, "replaced CE for %04X with CE for %04X\n", el.cPoint s[0], tok->debugSource);

	960 *status = U_ZERO_ERROR;

	961 }

	962 #endif

	963

	964 tok = tok->next;

	965 }

	966 }

	967

	968 U_CDECL_BEGIN

	969 static UBool U_CALLCONV

	970 _processUCACompleteIgnorables(const void *context, UChar32 start, UChar32 limit, uint32_t value) {

	971 UErrorCode status = U_ZERO_ERROR;

	972 tempUCATable t = (tempUCATable )context;

	973 if(value == 0) {

	974 while(start < limit) {

	975 uint32_t CE = utrie_get32(t->mapping, start, NULL);

	976 if(CE == UCOL_NOT_FOUND) {

	977 UCAElements el;

	978 el.isThai = FALSE;

	979 el.prefixSize = 0;

	980 el.prefixChars[0] = 0;

	981 el.prefix = el.prefixChars;

	982 el.cPoints = el.uchars;

	983

	984 el.cSize = 0;

	985 UTF_APPEND_CHAR(el.uchars, el.cSize, 1024, start);

	986

	987 el.noOfCEs = 1;

	988 el.CEs[0] = 0;

	989 uprv_uca_addAnElement(t, &el, &status);

	990

	991 }

	992 start++;

	993 }

	994 }

	995 if(U_FAILURE(status)) {

	996 return FALSE;

	997 } else {

	998 return TRUE;

	999 }

	1000 }

	1001 U_CDECL_END

	1002

	1003 static void

	1004 ucol_uprv_bld_copyRangeFromUCA(UColTokenParser src, tempUCATable t,

	1005 UChar32 start, UChar32 end,

	1006 UErrorCode *status)

	1007 {

	1008 //UChar decomp[256];

	1009 uint32_t CE = UCOL_NOT_FOUND;

	1010 UChar32 u = 0;

	1011 UCAElements el;

	1012 el.isThai = FALSE;

	1013 el.prefixSize = 0;

	1014 el.prefixChars[0] = 0;

	1015 collIterate colIt;

	1016

	1017 if(U_SUCCESS(*status)) {

	1018 for(u = start; u<=end; u++) {

	1019 if((CE = utrie_get32(t->mapping, u, NULL)) == UCOL_NOT_FOUND

	1020 /* this test is for contractions that are missing the starting e lement. */

	1021 \|\| ((isCntTableElement(CE)) &&

	1022 (uprv_cnttab_getCE(t->contractions, CE, 0, status) == UCOL_NOT_F OUND))

	1023 )

	1024 {

	1025 el.cSize = 0;

	1026 U16_APPEND_UNSAFE(el.uchars, el.cSize, u);

	1027 //decomp[0] = (UChar)u;

	1028 //el.uchars[0] = (UChar)u;

	1029 el.cPoints = el.uchars;

	1030 //el.cSize = 1;

	1031 el.noOfCEs = 0;

	1032 el.prefix = el.prefixChars;

	1033 el.prefixSize = 0;

	1034 //uprv_init_collIterate(src->UCA, decomp, 1, &colIt);

	1035 // We actually want to check whether this element is a special

	1036 // If it is an implicit element (hangul, CJK - we want to copy t he

	1037 // special, not the resolved CEs) - for hangul, copying resolved

	1038 // would just make things the same (there is an expansion and it

	1039 // takes approximately the same amount of time to resolve as

	1040 // falling back to the UCA).

	1041 /*

	1042 UTRIE_GET32(src->UCA->mapping, u, CE);

	1043 tag = getCETag(CE);

	1044 if(tag == HANGUL_SYLLABLE_TAG \|\| tag == CJK_IMPLICIT_TAG

	1045 \|\| tag == IMPLICIT_TAG \|\| tag == TRAIL_SURROGATE_TAG

	1046 \|\| tag == LEAD_SURROGATE_TAG) {

	1047 el.CEs[el.noOfCEs++] = CE;

	1048 } else {

	1049 */

	1050 // It turns out that it does not make sense to keep implicits

	1051 // unresolved. The cost of resolving them is big enough so that

	1052 // it doesn't make any difference whether we have to go to the U CA

	1053 // or not.

	1054 {

	1055 uprv_init_collIterate(src->UCA, el.uchars, el.cSize, &colIt, status);

	1056 while(CE != UCOL_NO_MORE_CES) {

	1057 CE = ucol_getNextCE(src->UCA, &colIt, status);

	1058 if(CE != UCOL_NO_MORE_CES) {

	1059 el.CEs[el.noOfCEs++] = CE;

	1060 }

	1061 }

	1062 }

	1063 uprv_uca_addAnElement(t, &el, status);

	1064 }

	1065 }

	1066 }

	1067 }

	1068

	1069 U_CFUNC UCATableHeader *

	1070 ucol_assembleTailoringTable(UColTokenParser src, UErrorCode status) {

	1071 U_NAMESPACE_USE

	1072

	1073 uint32_t i = 0;

	1074 if(U_FAILURE(*status)) {

	1075 return NULL;

	1076 }

	1077 /*

	1078 2. Eliminate the negative lists by doing the following for each non-null ne gative list:

	1079 o if previousCE(baseCE, strongestN) != some ListHeader X's baseCE,

	1080 create new ListHeader X

	1081 o reverse the list, add to the end of X's positive list. Reset the strengt h of the

	1082 first item you add, based on the stronger strength levels of the two lists.

	1083 */

	1084 /*

	1085 3. For each ListHeader with a non-null positive list:

	1086 */

	1087 /*

	1088 o Find all character strings with CEs between the baseCE and the

	1089 next/previous CE, at the strength of the first token. Add these to the

	1090 tailoring.

	1091 ? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the

	1092 tailoring has & x < z...

	1093 ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...

	1094 */

	1095 /* It is possible that this part should be done even while constructing list */

	1096 /* The problem is that it is unknown what is going to be the strongest weigh t */

	1097 /* So we might as well do it here */

	1098

	1099 /*

	1100 o Allocate CEs for each token in the list, based on the total number N of the

	1101 largest level difference, and the gap G between baseCE and nextCE at that

	1102 level. The relation * between the last item and nextCE is the same as the

	1103 strongest strength.

	1104 o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1)

	1105 ? There are 3 primary items: a, d, e. Fit them into the primary gap.

	1106 Then fit b and c into the secondary gap between a and d, then fit q

	1107 into the tertiary gap between b and c.

	1108

	1109 o Example: baseCE << b <<< q << c * nextCE(X,2)

	1110 ? There are 2 secondary items: b, c. Fit them into the secondary gap.

	1111 Then fit q into the tertiary gap between b and c.

	1112 o When incrementing primary values, we will not cross high byte

	1113 boundaries except where there is only a single-byte primary. That is to

	1114 ensure that the script reordering will continue to work.

	1115 */

	1116 UCATableHeader image = (UCATableHeader )uprv_malloc(sizeof(UCATableHeader) );

	1117 /* test for NULL */

	1118 if (image == NULL) {

	1119 *status = U_MEMORY_ALLOCATION_ERROR;

	1120 return NULL;

	1121 }

	1122 uprv_memcpy(image, src->UCA->image, sizeof(UCATableHeader));

	1123

	1124 for(i = 0; i<src->resultLen; i++) {

	1125 /* now we need to generate the CEs */

	1126 /* We stuff the initial value in the buffers, and increase the appropria te buffer */

	1127 /* According to strength */

	1128 if(U_SUCCESS(*status)) {

	1129 if(src->lh[i].first) { // if there are any elements

	1130 // due to the way parser works, subsequent tailorings

	1131 // may remove all the elements from a sequence, therefore

	1132 // leaving an empty tailoring sequence.

	1133 ucol_initBuffers(src, &src->lh[i], status);

	1134 }

	1135 }

	1136 if(U_FAILURE(*status)) {

	1137 uprv_free(image);

	1138 return NULL;

	1139 }

	1140 }

	1141

	1142 if(src->varTop != NULL) { /* stuff the variable top value */

	1143 src->opts->variableTopValue = (*(src->varTop->CEs))>>16;

	1144 /* remove it from the list */

	1145 if(src->varTop->listHeader->first == src->varTop) { /* first in list */

	1146 src->varTop->listHeader->first = src->varTop->next;

	1147 }

	1148 if(src->varTop->listHeader->last == src->varTop) { /* first in list */

	1149 src->varTop->listHeader->last = src->varTop->previous;

	1150 }

	1151 if(src->varTop->next != NULL) {

	1152 src->varTop->next->previous = src->varTop->previous;

	1153 }

	1154 if(src->varTop->previous != NULL) {

	1155 src->varTop->previous->next = src->varTop->next;

	1156 }

	1157 }

	1158

	1159

	1160 tempUCATable *t = uprv_uca_initTempTable(image, src->opts, src->UCA, NOT_FOU ND_TAG, NOT_FOUND_TAG, status);

	1161 if(U_FAILURE(*status)) {

	1162 uprv_free(image);

	1163 return NULL;

	1164 }

	1165

	1166

	1167 /* After this, we have assigned CE values to all regular CEs */

	1168 /* now we will go through list once more and resolve expansions, */

	1169 /* make UCAElements structs and add them to table */

	1170 for(i = 0; i<src->resultLen; i++) {

	1171 /* now we need to generate the CEs */

	1172 /* We stuff the initial value in the buffers, and increase the appropria te buffer */

	1173 /* According to strength */

	1174 if(U_SUCCESS(*status)) {

	1175 ucol_createElements(src, t, &src->lh[i], status);

	1176 }

	1177 }

	1178

	1179 UCAElements el;

	1180 el.isThai = FALSE;

	1181 el.prefixSize = 0;

	1182 el.prefixChars[0] = 0;

	1183

	1184 /* add latin-1 stuff */

	1185 ucol_uprv_bld_copyRangeFromUCA(src, t, 0, 0xFF, status);

	1186

	1187 /* add stuff for copying */

	1188 if(src->copySet != NULL) {

	1189 int32_t i = 0;

	1190 UnicodeSet set = (UnicodeSet )src->copySet;

	1191 for(i = 0; i < set->getRangeCount(); i++) {

	1192 ucol_uprv_bld_copyRangeFromUCA(src, t, set->getRangeStart(i), set->g etRangeEnd(i), status);

	1193 }

	1194 }

	1195

	1196 if(U_SUCCESS(*status)) {

	1197 /* copy contractions from the UCA - this is felt mostly for cyrillic*/

	1198

	1199 uint32_t tailoredCE = UCOL_NOT_FOUND;

	1200 //UChar conts = (UChar )((uint8_t *)src->UCA->image + src->UCA->image- >UCAConsts+sizeof(UCAConstants));

	1201 UChar conts = (UChar )((uint8_t *)src->UCA->image + src->UCA->image->c ontractionUCACombos);

	1202 UCollationElements *ucaEl = ucol_openElements(src->UCA, NULL, 0, status) ;

	1203 // Check for null pointer

	1204 if (ucaEl == NULL) {

	1205 *status = U_MEMORY_ALLOCATION_ERROR;

	1206 return NULL;

	1207 }

	1208 while(*conts != 0) {

	1209 /tailoredCE = ucmpe32_get(t->mapping, conts);*/

	1210 tailoredCE = utrie_get32(t->mapping, *conts, NULL);

	1211 if(tailoredCE != UCOL_NOT_FOUND) {

	1212 UBool needToAdd = TRUE;

	1213 if(isCntTableElement(tailoredCE)) {

	1214 if(uprv_cnttab_isTailored(t->contractions, tailoredCE, conts +1, status) == TRUE) {

	1215 needToAdd = FALSE;

	1216 }

	1217 }

	1218 if (!needToAdd && isPrefix(tailoredCE) && *(conts+1)==0) {

	1219 UCAElements elm;

	1220 elm.cPoints = el.uchars;

	1221 elm.noOfCEs = 0;

	1222 elm.uchars[0] = *conts;

	1223 elm.uchars[1] = 0;

	1224 elm.cSize = 1;

	1225 elm.prefixChars[0] = *(conts+2);

	1226 elm.isThai = FALSE;

	1227 elm.prefix = elm.prefixChars;

	1228 elm.prefixSize = 1;

	1229 UCAElements prefixEnt=(UCAElements )uhash_get(t->prefixLoo kup, &elm);

	1230 if ((prefixEnt==NULL) \|\| (prefixEnt->prefix)!=(conts+2)) {

	1231 needToAdd = TRUE;

	1232 }

	1233 }

	1234 if(src->removeSet != NULL && uset_contains(src->removeSet, *cont s)) {

	1235 needToAdd = FALSE;

	1236 }

	1237

	1238 if(needToAdd == TRUE) { // we need to add if this contraction is not tailored.

	1239 if (*(conts+1) != 0) { // contractions

	1240 el.prefix = el.prefixChars;

	1241 el.prefixSize = 0;

	1242 el.cPoints = el.uchars;

	1243 el.noOfCEs = 0;

	1244 el.uchars[0] = *conts;

	1245 el.uchars[1] = *(conts+1);

	1246 if(*(conts+2)!=0) {

	1247 el.uchars[2] = *(conts+2);

	1248 el.cSize = 3;

	1249 } else {

	1250 el.cSize = 2;

	1251 }

	1252 ucol_setText(ucaEl, el.uchars, el.cSize, status);

	1253 }

	1254 else { // pre-context character

	1255 UChar str[4] = { 0 };

	1256 int32_t len=0;

	1257 int32_t preKeyLen=0;

	1258

	1259 el.cPoints = el.uchars;

	1260 el.noOfCEs = 0;

	1261 el.uchars[0] = *conts;

	1262 el.uchars[1] = 0;

	1263 el.cSize = 1;

	1264 el.prefixChars[0] = *(conts+2);

	1265 el.prefix = el.prefixChars;

	1266 el.prefixSize = 1;

	1267 if (el.prefixChars[0]!=0) {

	1268 // get CE of prefix character first

	1269 str[0]=el.prefixChars[0];

	1270 str[1]=0;

	1271 ucol_setText(ucaEl, str, 1, status);

	1272 while ((int32_t)(el.CEs[el.noOfCEs] = ucol_next(ucaE l, status))

	1273 != UCOL_NULLORDER) {

	1274 preKeyLen++; // count number of keys for prefix character

	1275 }

	1276 str[len++] = el.prefixChars[0];

	1277 }

	1278

	1279 str[len++] = el.uchars[0];

	1280 str[len]=0;

	1281 ucol_setText(ucaEl, str, len, status);

	1282 // Skip the keys for prefix character, then copy the res t to el.

	1283 while ((preKeyLen-->0) &&

	1284 (int32_t)(el.CEs[el.noOfCEs] = ucol_next(ucaEl, s tatus)) != UCOL_NULLORDER) {

	1285 continue;

	1286 }

	1287

	1288 }

	1289 while ((int32_t)(el.CEs[el.noOfCEs] = ucol_next(ucaEl, statu s)) != UCOL_NULLORDER) {

	1290 el.noOfCEs++;

	1291 }

	1292 uprv_uca_addAnElement(t, &el, status);

	1293 }

	1294

	1295 } else if(src->removeSet != NULL && uset_contains(src->removeSet, *c onts)) {

	1296 ucol_uprv_bld_copyRangeFromUCA(src, t, conts, conts, status);

	1297 }

	1298 conts+=3;

	1299 }

	1300 ucol_closeElements(ucaEl);

	1301 }

	1302

	1303 // Add completely ignorable elements

	1304 utrie_enum(&t->UCA->mapping, NULL, _processUCACompleteIgnorables, t);

	1305

	1306 // add tailoring characters related canonical closures

	1307 uprv_uca_canonicalClosure(t, src, NULL, status);

	1308

	1309 /* still need to produce compatibility closure */

	1310

	1311 UCATableHeader *myData = uprv_uca_assembleTable(t, status);

	1312

	1313 uprv_uca_closeTempTable(t);

	1314 uprv_free(image);

	1315

	1316 return myData;

	1317 }

	1318

	1319 U_CDECL_BEGIN

	1320 static UBool U_CALLCONV

	1321 ucol_bld_cleanup(void)

	1322 {

	1323 udata_close(invUCA_DATA_MEM);

	1324 invUCA_DATA_MEM = NULL;

	1325 _staticInvUCA = NULL;

	1326 return TRUE;

	1327 }

	1328 U_CDECL_END

	1329

	1330 U_CAPI const InverseUCATableHeader * U_EXPORT2

	1331 ucol_initInverseUCA(UErrorCode *status)

	1332 {

	1333 if(U_FAILURE(*status)) return NULL;

	1334

	1335 UBool needsInit;

	1336 UMTX_CHECK(NULL, (_staticInvUCA == NULL), needsInit);

	1337

	1338 if(needsInit) {

	1339 InverseUCATableHeader *newInvUCA = NULL;

	1340 UDataMemory *result = udata_openChoice(U_ICUDATA_COLL, INVC_DATA_TYPE, I NVC_DATA_NAME, isAcceptableInvUCA, NULL, status);

	1341

	1342 if(U_FAILURE(*status)) {

	1343 if (result) {

	1344 udata_close(result);

	1345 }

	1346 // This is not needed, as we are talking about

	1347 // memory we got from UData

	1348 //uprv_free(newInvUCA);

	1349 }

	1350

	1351 if(result != NULL) { /* It looks like sometimes we can fail to find the data file */

	1352 newInvUCA = (InverseUCATableHeader *)udata_getMemory(result);

	1353 UCollator *UCA = ucol_initUCA(status);

	1354 // UCA versions of UCA and inverse UCA should match

	1355 if(uprv_memcmp(newInvUCA->UCAVersion, UCA->image->UCAVersion, sizeof (UVersionInfo)) != 0) {

	1356 *status = U_INVALID_FORMAT_ERROR;

	1357 udata_close(result);

	1358 return NULL;

	1359 }

	1360

	1361 umtx_lock(NULL);

	1362 if(_staticInvUCA == NULL) {

	1363 invUCA_DATA_MEM = result;

	1364 _staticInvUCA = newInvUCA;

	1365 result = NULL;

	1366 newInvUCA = NULL;

	1367 }

	1368 umtx_unlock(NULL);

	1369

	1370 if(newInvUCA != NULL) {

	1371 udata_close(result);

	1372 // This is not needed, as we are talking about

	1373 // memory we got from UData

	1374 //uprv_free(newInvUCA);

	1375 }

	1376 else {

	1377 ucln_i18n_registerCleanup(UCLN_I18N_UCOL_BLD, ucol_bld_cleanup);

	1378 }

	1379 }

	1380 }

	1381 return _staticInvUCA;

	1382 }

	1383

	1384 /* This is the data that is used for non-script reordering codes. These _must_ b e kept

	1385 * in order that they are to be applied as defaults and in synch with the UColRe orderCode enum.

	1386 */

	1387 static const char* ReorderingTokenNames[] = {

	1388 "SPACE",

	1389 "PUNCT",

	1390 "SYMBOL",

	1391 "CURRENCY",

	1392 "DIGIT",

	1393 NULL

	1394 };

	1395

	1396 static void toUpper(const char* src, char* dst, uint32_t length) {

	1397 for (uint32_t i = 0; *src != '\0' && i < length - 1; ++src, ++dst, ++i) {

	1398 dst = toupper(src);

	1399 }

	1400 *dst = '\0';

	1401 }

	1402

	1403 U_INTERNAL int32_t U_EXPORT2

	1404 ucol_findReorderingEntry(const char* name) {

	1405 char buffer[32];

	1406 toUpper(name, buffer, 32);

	1407 for (uint32_t entry = 0; ReorderingTokenNames[entry] != NULL; entry++) {

	1408 if (uprv_strcmp(buffer, ReorderingTokenNames[entry]) == 0) {

	1409 return entry + UCOL_REORDER_CODE_FIRST;

	1410 }

	1411 }

	1412 return USCRIPT_INVALID_CODE;

	1413 }

	1414

	1415 U_NAMESPACE_END

	1416

	1417 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « icu46/source/i18n/ucol_bld.h ('k') | icu46/source/i18n/ucol_cnt.h » ('j') | no next file with comments »