icu46/source/test/cintltst/cmsccoll.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/cintltst/cmsccoll.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1

	2 /********************************************************************

	3 * COPYRIGHT:

	4 * Copyright (c) 2001-2010, International Business Machines Corporation and

	5 * others. All Rights Reserved.

	6 ********************************************************************/

	7 /*******************************************************************************

	8 *

	9 * File cmsccoll.C

	10 *

	11 *******************************************************************************/

	12 /**

	13 * These are the tests specific to ICU 1.8 and above, that I didn't know where

	14 * to fit.

	15 */

	16

	17 #include <stdio.h>

	18

	19 #include "unicode/utypes.h"

	20

	21 #if !UCONFIG_NO_COLLATION

	22

	23 #include "unicode/ucol.h"

	24 #include "unicode/ucoleitr.h"

	25 #include "unicode/uloc.h"

	26 #include "cintltst.h"

	27 #include "ccolltst.h"

	28 #include "callcoll.h"

	29 #include "unicode/ustring.h"

	30 #include "string.h"

	31 #include "ucol_imp.h"

	32 #include "ucol_tok.h"

	33 #include "cmemory.h"

	34 #include "cstring.h"

	35 #include "uassert.h"

	36 #include "unicode/parseerr.h"

	37 #include "unicode/ucnv.h"

	38 #include "unicode/ures.h"

	39 #include "unicode/uscript.h"

	40 #include "uparse.h"

	41 #include "putilimp.h"

	42

	43

	44 #define LEN(a) (sizeof(a)/sizeof(a[0]))

	45

	46 #define MAX_TOKEN_LEN 16

	47

	48 typedef UCollationResult tst_strcoll(void *collator, const int object,

	49 const UChar *source, const int sLen,

	50 const UChar *target, const int tLen);

	51

	52

	53

	54 const static char cnt1[][10] = {

	55

	56 "AA",

	57 "AC",

	58 "AZ",

	59 "AQ",

	60 "AB",

	61 "ABZ",

	62 "ABQ",

	63 "Z",

	64 "ABC",

	65 "Q",

	66 "B"

	67 };

	68

	69 const static char cnt2[][10] = {

	70 "DA",

	71 "DAD",

	72 "DAZ",

	73 "MAR",

	74 "Z",

	75 "DAVIS",

	76 "MARK",

	77 "DAV",

	78 "DAVI"

	79 };

	80

	81 static void IncompleteCntTest(void)

	82 {

	83 UErrorCode status = U_ZERO_ERROR;

	84 UChar temp[90];

	85 UChar t1[90];

	86 UChar t2[90];

	87

	88 UCollator *coll = NULL;

	89 uint32_t i = 0, j = 0;

	90 uint32_t size = 0;

	91

	92 u_uastrcpy(temp, " & Z < ABC < Q < B");

	93

	94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N ULL,&status);

	95

	96 if(U_SUCCESS(status)) {

	97 size = sizeof(cnt1)/sizeof(cnt1[0]);

	98 for(i = 0; i < size-1; i++) {

	99 for(j = i+1; j < size; j++) {

	100 UCollationElements *iter;

	101 u_uastrcpy(t1, cnt1[i]);

	102 u_uastrcpy(t2, cnt1[j]);

	103 doTest(coll, t1, t2, UCOL_LESS);

	104 /* synwee : added collation element iterator test */

	105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

	106 if (U_FAILURE(status)) {

	107 log_err("Creation of iterator failed\n");

	108 break;

	109 }

	110 backAndForth(iter);

	111 ucol_closeElements(iter);

	112 }

	113 }

	114 }

	115

	116 ucol_close(coll);

	117

	118

	119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");

	120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NU LL, &status);

	121

	122 if(U_SUCCESS(status)) {

	123 size = sizeof(cnt2)/sizeof(cnt2[0]);

	124 for(i = 0; i < size-1; i++) {

	125 for(j = i+1; j < size; j++) {

	126 UCollationElements *iter;

	127 u_uastrcpy(t1, cnt2[i]);

	128 u_uastrcpy(t2, cnt2[j]);

	129 doTest(coll, t1, t2, UCOL_LESS);

	130

	131 /* synwee : added collation element iterator test */

	132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

	133 if (U_FAILURE(status)) {

	134 log_err("Creation of iterator failed\n");

	135 break;

	136 }

	137 backAndForth(iter);

	138 ucol_closeElements(iter);

	139 }

	140 }

	141 }

	142

	143 ucol_close(coll);

	144

	145

	146 }

	147

	148 const static char shifted[][20] = {

	149 "black bird",

	150 "black-bird",

	151 "blackbird",

	152 "black Bird",

	153 "black-Bird",

	154 "blackBird",

	155 "black birds",

	156 "black-birds",

	157 "blackbirds"

	158 };

	159

	160 const static UCollationResult shiftedTert[] = {

	161 UCOL_EQUAL,

	162 UCOL_EQUAL,

	163 UCOL_EQUAL,

	164 UCOL_LESS,

	165 UCOL_EQUAL,

	166 UCOL_EQUAL,

	167 UCOL_LESS,

	168 UCOL_EQUAL,

	169 UCOL_EQUAL

	170 };

	171

	172 const static char nonignorable[][20] = {

	173 "black bird",

	174 "black Bird",

	175 "black birds",

	176 "black-bird",

	177 "black-Bird",

	178 "black-birds",

	179 "blackbird",

	180 "blackBird",

	181 "blackbirds"

	182 };

	183

	184 static void BlackBirdTest(void) {

	185 UErrorCode status = U_ZERO_ERROR;

	186 UChar t1[90];

	187 UChar t2[90];

	188

	189 uint32_t i = 0, j = 0;

	190 uint32_t size = 0;

	191 UCollator *coll = ucol_open("en_US", &status);

	192

	193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

	194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);

	195

	196 if(U_SUCCESS(status)) {

	197 size = sizeof(nonignorable)/sizeof(nonignorable[0]);

	198 for(i = 0; i < size-1; i++) {

	199 for(j = i+1; j < size; j++) {

	200 u_uastrcpy(t1, nonignorable[i]);

	201 u_uastrcpy(t2, nonignorable[j]);

	202 doTest(coll, t1, t2, UCOL_LESS);

	203 }

	204 }

	205 }

	206

	207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

	208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);

	209

	210 if(U_SUCCESS(status)) {

	211 size = sizeof(shifted)/sizeof(shifted[0]);

	212 for(i = 0; i < size-1; i++) {

	213 for(j = i+1; j < size; j++) {

	214 u_uastrcpy(t1, shifted[i]);

	215 u_uastrcpy(t2, shifted[j]);

	216 doTest(coll, t1, t2, UCOL_LESS);

	217 }

	218 }

	219 }

	220

	221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);

	222 if(U_SUCCESS(status)) {

	223 size = sizeof(shifted)/sizeof(shifted[0]);

	224 for(i = 1; i < size; i++) {

	225 u_uastrcpy(t1, shifted[i-1]);

	226 u_uastrcpy(t2, shifted[i]);

	227 doTest(coll, t1, t2, shiftedTert[i]);

	228 }

	229 }

	230

	231 ucol_close(coll);

	232 }

	233

	234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {

	235 {0x0041/'A'/, 0x0300, 0x0301, 0x0000},

	236 {0x0041/'A'/, 0x0300, 0x0316, 0x0000},

	237 {0x0041/'A'/, 0x0300, 0x0000},

	238 {0x00C0, 0x0301, 0x0000},

	239 /* this would work with forced normalization */

	240 {0x00C0, 0x0316, 0x0000}

	241 };

	242

	243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {

	244 {0x0041/'A'/, 0x0301, 0x0300, 0x0000},

	245 {0x0041/'A'/, 0x0316, 0x0300, 0x0000},

	246 {0x00C0, 0},

	247 {0x0041/'A'/, 0x0301, 0x0300, 0x0000},

	248 /* this would work with forced normalization */

	249 {0x0041/'A'/, 0x0316, 0x0300, 0x0000}

	250 };

	251

	252 const static UCollationResult results[] = {

	253 UCOL_GREATER,

	254 UCOL_EQUAL,

	255 UCOL_EQUAL,

	256 UCOL_GREATER,

	257 UCOL_EQUAL

	258 };

	259

	260 static void FunkyATest(void)

	261 {

	262

	263 int32_t i;

	264 UErrorCode status = U_ZERO_ERROR;

	265 UCollator *myCollation;

	266 myCollation = ucol_open("en_US", &status);

	267 if(U_FAILURE(status)){

	268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	269 return;

	270 }

	271 log_verbose("Testing some A letters, for some reason\n");

	272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	273 ucol_setStrength(myCollation, UCOL_TERTIARY);

	274 for (i = 0; i < 4 ; i++)

	275 {

	276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);

	277 }

	278 ucol_close(myCollation);

	279 }

	280

	281 UColAttributeValue caseFirst[] = {

	282 UCOL_OFF,

	283 UCOL_LOWER_FIRST,

	284 UCOL_UPPER_FIRST

	285 };

	286

	287

	288 UColAttributeValue alternateHandling[] = {

	289 UCOL_NON_IGNORABLE,

	290 UCOL_SHIFTED

	291 };

	292

	293 UColAttributeValue caseLevel[] = {

	294 UCOL_OFF,

	295 UCOL_ON

	296 };

	297

	298 UColAttributeValue strengths[] = {

	299 UCOL_PRIMARY,

	300 UCOL_SECONDARY,

	301 UCOL_TERTIARY,

	302 UCOL_QUATERNARY,

	303 UCOL_IDENTICAL

	304 };

	305

	306 #if 0

	307 static const char * strengthsC[] = {

	308 "UCOL_PRIMARY",

	309 "UCOL_SECONDARY",

	310 "UCOL_TERTIARY",

	311 "UCOL_QUATERNARY",

	312 "UCOL_IDENTICAL"

	313 };

	314

	315 static const char * caseFirstC[] = {

	316 "UCOL_OFF",

	317 "UCOL_LOWER_FIRST",

	318 "UCOL_UPPER_FIRST"

	319 };

	320

	321

	322 static const char * alternateHandlingC[] = {

	323 "UCOL_NON_IGNORABLE",

	324 "UCOL_SHIFTED"

	325 };

	326

	327 static const char * caseLevelC[] = {

	328 "UCOL_OFF",

	329 "UCOL_ON"

	330 };

	331

	332 /* not used currently - does not test only prints */

	333 static void PrintMarkDavis(void)

	334 {

	335 UErrorCode status = U_ZERO_ERROR;

	336 UChar m[256];

	337 uint8_t sortkey[256];

	338 UCollator *coll = ucol_open("en_US", &status);

	339 uint32_t h,i,j,k, sortkeysize;

	340 uint32_t sizem = 0;

	341 char buffer[512];

	342 uint32_t len = 512;

	343

	344 log_verbose("PrintMarkDavis");

	345

	346 u_uastrcpy(m, "Mark Davis");

	347 sizem = u_strlen(m);

	348

	349

	350 m[1] = 0xe4;

	351

	352 for(i = 0; i<sizem; i++) {

	353 fprintf(stderr, "\\u%04X ", m[i]);

	354 }

	355 fprintf(stderr, "\n");

	356

	357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {

	358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);

	359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);

	360

	361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {

	362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &st atus);

	363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);

	364

	365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {

	366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);

	367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);

	368

	369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {

	370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);

	371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);

	372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);

	373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &l en));

	374 }

	375

	376 }

	377

	378 }

	379

	380 }

	381 }

	382 #endif

	383

	384 static void BillFairmanTest(void) {

	385 /*

	386 ** check for actual locale via ICU resource bundles

	387 **

	388 ** lp points to the original locale ("fr_FR_....")

	389 */

	390

	391 UResourceBundle lr,cr;

	392 UErrorCode lec = U_ZERO_ERROR;

	393 const char *lp = "fr_FR_you_ll_never_find_this_locale";

	394

	395 log_verbose("BillFairmanTest\n");

	396

	397 lr = ures_open(NULL,lp,&lec);

	398 if (lr) {

	399 cr = ures_getByKey(lr,"collations",0,&lec);

	400 if (cr) {

	401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);

	402 if (lp) {

	403 if (U_SUCCESS(lec)) {

	404 if(strcmp(lp, "fr") != 0) {

	405 log_err("Wrong locale for French Collation Data, expecte d \"fr\" got %s", lp);

	406 }

	407 }

	408 }

	409 ures_close(cr);

	410 }

	411 ures_close(lr);

	412 }

	413 }

	414

	415 static void testPrimary(UCollator* col, const UChar* p,const UChar* q){

	416 UChar source[256] = { '\0'};

	417 UChar target[256] = { '\0'};

	418 UChar preP = 0x31a3;

	419 UChar preQ = 0x310d;

	420 /*

	421 UChar preP = (p>0x0400 && p<0x0500)?0x00e1:0x491;

	422 UChar preQ = (p>0x0400 && p<0x0500)?0x0041:0x413;

	423 */

	424 /log_verbose("Testing primary\n");/

	425

	426 doTest(col, p, q, UCOL_LESS);

	427 /*

	428 UCollationResult result = ucol_strcoll(col,p,u_strlen(p),q,u_strlen(q));

	429

	430 if(result!=UCOL_LESS){

	431 aescstrdup(p,utfSource,256);

	432 aescstrdup(q,utfTarget,256);

	433 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTar get);

	434 }

	435 */

	436 source[0] = preP;

	437 u_strcpy(source+1,p);

	438 target[0] = preQ;

	439 u_strcpy(target+1,q);

	440 doTest(col, source, target, UCOL_LESS);

	441 /*

	442 fprintf(file,"Primary swamps 2nd failed source: %s target: %s \n", utfSourc e,utfTarget);

	443 */

	444 }

	445

	446 static void testSecondary(UCollator* col, const UChar* p,const UChar* q){

	447 UChar source[256] = { '\0'};

	448 UChar target[256] = { '\0'};

	449

	450 /log_verbose("Testing secondary\n");/

	451

	452 doTest(col, p, q, UCOL_LESS);

	453 /*

	454 fprintf(file,"secondary failed source: %s target: %s \n", utfSource,utfTarg et);

	455 */

	456 source[0] = 0x0053;

	457 u_strcpy(source+1,p);

	458 target[0]= 0x0073;

	459 u_strcpy(target+1,q);

	460

	461 doTest(col, source, target, UCOL_LESS);

	462 /*

	463 fprintf(file,"secondary swamps 3rd failed source: %s target: %s \n",utfSour ce,utfTarget);

	464 */

	465

	466

	467 u_strcpy(source,p);

	468 source[u_strlen(p)] = 0x62;

	469 source[u_strlen(p)+1] = 0;

	470

	471

	472 u_strcpy(target,q);

	473 target[u_strlen(q)] = 0x61;

	474 target[u_strlen(q)+1] = 0;

	475

	476 doTest(col, source, target, UCOL_GREATER);

	477

	478 /*

	479 fprintf(file,"secondary is swamped by 1 failed source: %s target: %s \n",u tfSource,utfTarget);

	480 */

	481 }

	482

	483 static void testTertiary(UCollator* col, const UChar* p,const UChar* q){

	484 UChar source[256] = { '\0'};

	485 UChar target[256] = { '\0'};

	486

	487 /log_verbose("Testing tertiary\n");/

	488

	489 doTest(col, p, q, UCOL_LESS);

	490 /*

	491 fprintf(file,"Tertiary failed source: %s target: %s \n",utfSource,utfTarget );

	492 */

	493 source[0] = 0x0020;

	494 u_strcpy(source+1,p);

	495 target[0]= 0x002D;

	496 u_strcpy(target+1,q);

	497

	498 doTest(col, source, target, UCOL_LESS);

	499 /*

	500 fprintf(file,"Tertiary swamps 4th failed source: %s target: %s \n", utfSour ce,utfTarget);

	501 */

	502

	503 u_strcpy(source,p);

	504 source[u_strlen(p)] = 0xE0;

	505 source[u_strlen(p)+1] = 0;

	506

	507 u_strcpy(target,q);

	508 target[u_strlen(q)] = 0x61;

	509 target[u_strlen(q)+1] = 0;

	510

	511 doTest(col, source, target, UCOL_GREATER);

	512

	513 /*

	514 fprintf(file,"Tertiary is swamped by 3rd failed source: %s target: %s \n",u tfSource,utfTarget);

	515 */

	516 }

	517

	518 static void testEquality(UCollator* col, const UChar* p,const UChar* q){

	519 /*

	520 UChar source[256] = { '\0'};

	521 UChar target[256] = { '\0'};

	522 */

	523

	524 doTest(col, p, q, UCOL_EQUAL);

	525 /*

	526 fprintf(file,"Primary failed source: %s target: %s \n", utfSource,utfTarget );

	527 */

	528 }

	529

	530 static void testCollator(UCollator coll, UErrorCode status) {

	531 const UChar rules = NULL, current = NULL;

	532 int32_t ruleLen = 0;

	533 uint32_t strength = 0;

	534 uint32_t chOffset = 0; uint32_t chLen = 0;

	535 uint32_t exOffset = 0; uint32_t exLen = 0;

	536 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;

	537 uint32_t firstEx = 0;

	538 /* uint32_t rExpsLen = 0; */

	539 uint32_t firstLen = 0;

	540 UBool varT = FALSE; UBool top_ = TRUE;

	541 uint16_t specs = 0;

	542 UBool startOfRules = TRUE;

	543 UBool lastReset = FALSE;

	544 UBool before = FALSE;

	545 uint32_t beforeStrength = 0;

	546 UColTokenParser src;

	547 UColOptionSet opts;

	548

	549 UChar first[256];

	550 UChar second[256];

	551 UChar tempB[256];

	552 uint32_t tempLen;

	553 UChar *rulesCopy = NULL;

	554 UParseError parseError;

	555

	556 uprv_memset(&src, 0, sizeof(UColTokenParser));

	557

	558 src.opts = &opts;

	559

	560 rules = ucol_getRules(coll, &ruleLen);

	561 if(U_SUCCESS(*status) && ruleLen > 0) {

	562 rulesCopy = (UChar )uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)si zeof(UChar));

	563 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));

	564 src.current = src.source = rulesCopy;

	565 src.end = rulesCopy+ruleLen;

	566 src.extraCurrent = src.end;

	567 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	568 first = second = 0;

	569

	570 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToke n can cause the pointer to

	571 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

	572 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, st atus)) != NULL) {

	573 strength = src.parsedToken.strength;

	574 chOffset = src.parsedToken.charsOffset;

	575 chLen = src.parsedToken.charsLen;

	576 exOffset = src.parsedToken.extensionOffset;

	577 exLen = src.parsedToken.extensionLen;

	578 prefixOffset = src.parsedToken.prefixOffset;

	579 prefixLen = src.parsedToken.prefixLen;

	580 specs = src.parsedToken.flags;

	581

	582 startOfRules = FALSE;

	583 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);

	584 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);

	585 if(top_) { /* if reset is on top, the sequence is broken. We should have a n empty string */

	586 second[0] = 0;

	587 } else {

	588 u_strncpy(second,src.source+chOffset, chLen);

	589 second[chLen] = 0;

	590

	591 if(exLen > 0 && firstEx == 0) {

	592 u_strncat(first, src.source+exOffset, exLen);

	593 first[firstLen+exLen] = 0;

	594 }

	595

	596 if(lastReset == TRUE && prefixLen != 0) {

	597 u_strncpy(first+prefixLen, first, firstLen);

	598 u_strncpy(first, src.source+prefixOffset, prefixLen);

	599 first[firstLen+prefixLen] = 0;

	600 firstLen = firstLen+prefixLen;

	601 }

	602

	603 if(before == TRUE) { /* swap first and second */

	604 u_strcpy(tempB, first);

	605 u_strcpy(first, second);

	606 u_strcpy(second, tempB);

	607

	608 tempLen = firstLen;

	609 firstLen = chLen;

	610 chLen = tempLen;

	611

	612 tempLen = firstEx;

	613 firstEx = exLen;

	614 exLen = tempLen;

	615 if(beforeStrength < strength) {

	616 strength = beforeStrength;

	617 }

	618 }

	619 }

	620 lastReset = FALSE;

	621

	622 switch(strength){

	623 case UCOL_IDENTICAL:

	624 testEquality(coll,first,second);

	625 break;

	626 case UCOL_PRIMARY:

	627 testPrimary(coll,first,second);

	628 break;

	629 case UCOL_SECONDARY:

	630 testSecondary(coll,first,second);

	631 break;

	632 case UCOL_TERTIARY:

	633 testTertiary(coll,first,second);

	634 break;

	635 case UCOL_TOK_RESET:

	636 lastReset = TRUE;

	637 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);

	638 if(before) {

	639 beforeStrength = (specs & UCOL_TOK_BEFORE)-1;

	640 }

	641 break;

	642 default:

	643 break;

	644 }

	645

	646 if(before == TRUE && strength != UCOL_TOK_RESET) { /* first and second wer e swapped */

	647 before = FALSE;

	648 } else {

	649 firstLen = chLen;

	650 firstEx = exLen;

	651 u_strcpy(first, second);

	652 }

	653 }

	654 uprv_free(src.source);

	655 }

	656 }

	657

	658 static UCollationResult ucaTest(void collator, const int object, const UChar s ource, const int sLen, const UChar *target, const int tLen) {

	659 UCollator UCA = (UCollator )collator;

	660 return ucol_strcoll(UCA, source, sLen, target, tLen);

	661 }

	662

	663 /*

	664 static UCollationResult winTest(void collator, const int object, const UChar s ource, const int sLen, const UChar *target, const int tLen) {

	665 #ifdef U_WINDOWS

	666 LCID lcid = (LCID)collator;

	667 return (UCollationResult)CompareString(lcid, 0, source, sLen, target, tLen);

	668 #else

	669 return 0;

	670 #endif

	671 }

	672 */

	673

	674 static UCollationResult swampEarlier(tst_strcoll* func, void *collator, int opts ,

	675 UChar s1, UChar s2,

	676 const UChar *s, const uint32_t sLen,

	677 const UChar *t, const uint32_t tLen) {

	678 UChar source[256] = {0};

	679 UChar target[256] = {0};

	680

	681 source[0] = s1;

	682 u_strcpy(source+1, s);

	683 target[0] = s2;

	684 u_strcpy(target+1, t);

	685

	686 return func(collator, opts, source, sLen+1, target, tLen+1);

	687 }

	688

	689 static UCollationResult swampLater(tst_strcoll* func, void *collator, int opts,

	690 UChar s1, UChar s2,

	691 const UChar *s, const uint32_t sLen,

	692 const UChar *t, const uint32_t tLen) {

	693 UChar source[256] = {0};

	694 UChar target[256] = {0};

	695

	696 u_strcpy(source, s);

	697 source[sLen] = s1;

	698 u_strcpy(target, t);

	699 target[tLen] = s2;

	700

	701 return func(collator, opts, source, sLen+1, target, tLen+1);

	702 }

	703

	704 static uint32_t probeStrength(tst_strcoll* func, void *collator, int opts,

	705 const UChar *s, const uint32_t sLen,

	706 const UChar *t, const uint32_t tLen,

	707 UCollationResult result) {

	708 /UChar fPrimary = 0x6d;/

	709 /UChar sPrimary = 0x6e;/

	710 UChar fSecondary = 0x310d;

	711 UChar sSecondary = 0x31a3;

	712 UChar fTertiary = 0x310f;

	713 UChar sTertiary = 0x31b7;

	714

	715 UCollationResult oposite;

	716 if(result == UCOL_EQUAL) {

	717 return UCOL_IDENTICAL;

	718 } else if(result == UCOL_GREATER) {

	719 oposite = UCOL_LESS;

	720 } else {

	721 oposite = UCOL_GREATER;

	722 }

	723

	724 if(swampEarlier(func, collator, opts, sSecondary, fSecondary, s, sLen, t, tLen ) == result) {

	725 return UCOL_PRIMARY;

	726 } else if((swampEarlier(func, collator, opts, sTertiary, 0x310f, s, sLen, t, t Len) == result) &&

	727 (swampEarlier(func, collator, opts, 0x310f, sTertiary, s, sLen, t, tLen) == result)) {

	728 return UCOL_SECONDARY;

	729 } else if((swampLater(func, collator, opts, sTertiary, fTertiary, s, sLen, t, tLen) == result) &&

	730 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == result)) {

	731 return UCOL_TERTIARY;

	732 } else if((swampLater(func, collator, opts, sTertiary, 0x310f, s, sLen, t, tLe n) == oposite) &&

	733 (swampLater(func, collator, opts, fTertiary, sTertiary, s, sLen, t, tLen) == oposite)) {

	734 return UCOL_QUATERNARY;

	735 } else {

	736 return UCOL_IDENTICAL;

	737 }

	738 }

	739

	740 static char getRelationSymbol(UCollationResult res, uint32_t strength, char bu ffer) {

	741 uint32_t i = 0;

	742

	743 if(res == UCOL_EQUAL \|\| strength == 0xdeadbeef) {

	744 buffer[0] = '=';

	745 buffer[1] = '=';

	746 buffer[2] = '\0';

	747 } else if(res == UCOL_GREATER) {

	748 for(i = 0; i<strength+1; i++) {

	749 buffer[i] = '>';

	750 }

	751 buffer[strength+1] = '\0';

	752 } else {

	753 for(i = 0; i<strength+1; i++) {

	754 buffer[i] = '<';

	755 }

	756 buffer[strength+1] = '\0';

	757 }

	758

	759 return buffer;

	760 }

	761

	762

	763

	764 static void logFailure (const char platform, const char test,

	765 const UChar *source, const uint32_t sLen,

	766 const UChar *target, const uint32_t tLen,

	767 UCollationResult realRes, uint32_t realStrength,

	768 UCollationResult expRes, uint32_t expStrength, UBool err or) {

	769

	770 uint32_t i = 0;

	771

	772 char sEsc[256], s[256], tEsc[256], t[256], b[256], output[512], relation[256];

	773 static int32_t maxOutputLength = 0;

	774 int32_t outputLength;

	775

	776 sEsc = tEsc = s = t = 0;

	777 if(error == TRUE) {

	778 log_err("Difference between expected and generated order. Run test with -v f or more info\n");

	779 } else if(getTestOption(VERBOSITY_OPTION) == 0) {

	780 return;

	781 }

	782 for(i = 0; i<sLen; i++) {

	783 sprintf(b, "%04X", source[i]);

	784 strcat(sEsc, "\\u");

	785 strcat(sEsc, b);

	786 strcat(s, b);

	787 strcat(s, " ");

	788 if(source[i] < 0x80) {

	789 sprintf(b, "(%c)", source[i]);

	790 strcat(sEsc, b);

	791 }

	792 }

	793 for(i = 0; i<tLen; i++) {

	794 sprintf(b, "%04X", target[i]);

	795 strcat(tEsc, "\\u");

	796 strcat(tEsc, b);

	797 strcat(t, b);

	798 strcat(t, " ");

	799 if(target[i] < 0x80) {

	800 sprintf(b, "(%c)", target[i]);

	801 strcat(tEsc, b);

	802 }

	803 }

	804 /*

	805 strcpy(output, "[[ ");

	806 strcat(output, sEsc);

	807 strcat(output, getRelationSymbol(expRes, expStrength, relation));

	808 strcat(output, tEsc);

	809

	810 strcat(output, " : ");

	811

	812 strcat(output, sEsc);

	813 strcat(output, getRelationSymbol(realRes, realStrength, relation));

	814 strcat(output, tEsc);

	815 strcat(output, " ]] ");

	816

	817 log_verbose("%s", output);

	818 */

	819

	820

	821 strcpy(output, "DIFF: ");

	822

	823 strcat(output, s);

	824 strcat(output, " : ");

	825 strcat(output, t);

	826

	827 strcat(output, test);

	828 strcat(output, ": ");

	829

	830 strcat(output, sEsc);

	831 strcat(output, getRelationSymbol(expRes, expStrength, relation));

	832 strcat(output, tEsc);

	833

	834 strcat(output, " ");

	835

	836 strcat(output, platform);

	837 strcat(output, ": ");

	838

	839 strcat(output, sEsc);

	840 strcat(output, getRelationSymbol(realRes, realStrength, relation));

	841 strcat(output, tEsc);

	842

	843 outputLength = (int32_t)strlen(output);

	844 if(outputLength > maxOutputLength) {

	845 maxOutputLength = outputLength;

	846 U_ASSERT(outputLength < sizeof(output));

	847 }

	848

	849 log_verbose("%s\n", output);

	850

	851 }

	852

	853 /*

	854 static void printOutRules(const UChar *rules) {

	855 uint32_t len = u_strlen(rules);

	856 uint32_t i = 0;

	857 char toPrint;

	858 uint32_t line = 0;

	859

	860 fprintf(stdout, "Rules:");

	861

	862 for(i = 0; i<len; i++) {

	863 if(rules[i]<0x7f && rules[i]>=0x20) {

	864 toPrint = (char)rules[i];

	865 if(toPrint == '&') {

	866 line = 1;

	867 fprintf(stdout, "\n&");

	868 } else if(toPrint == ';') {

	869 fprintf(stdout, "<<");

	870 line+=2;

	871 } else if(toPrint == ',') {

	872 fprintf(stdout, "<<<");

	873 line+=3;

	874 } else {

	875 fprintf(stdout, "%c", toPrint);

	876 line++;

	877 }

	878 } else if(rules[i]<0x3400 \|\| rules[i]>=0xa000) {

	879 fprintf(stdout, "\\u%04X", rules[i]);

	880 line+=6;

	881 }

	882 if(line>72) {

	883 fprintf(stdout, "\n");

	884 line = 0;

	885 }

	886 }

	887

	888 log_verbose("\n");

	889

	890 }

	891 */

	892

	893 static uint32_t testSwitch(tst_strcoll* func, void collator, int opts, uint32_t strength, const UChar first, const UChar second, const char msg, UBool error ) {

	894 uint32_t diffs = 0;

	895 UCollationResult realResult;

	896 uint32_t realStrength;

	897

	898 uint32_t sLen = u_strlen(first);

	899 uint32_t tLen = u_strlen(second);

	900

	901 realResult = func(collator, opts, first, sLen, second, tLen);

	902 realStrength = probeStrength(func, collator, opts, first, sLen, second, tLen, realResult);

	903

	904 if(strength == UCOL_IDENTICAL && realResult != UCOL_IDENTICAL) {

	905 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre ngth, UCOL_EQUAL, strength, error);

	906 diffs++;

	907 } else if(realResult != UCOL_LESS \|\| realStrength != strength) {

	908 logFailure(msg, "tailoring", first, sLen, second, tLen, realResult, realStre ngth, UCOL_LESS, strength, error);

	909 diffs++;

	910 }

	911 return diffs;

	912 }

	913

	914

	915 static void testAgainstUCA(UCollator coll, UCollator UCA, const char refName, UBool error, UErrorCode status) {

	916 const UChar rules = NULL, current = NULL;

	917 int32_t ruleLen = 0;

	918 uint32_t strength = 0;

	919 uint32_t chOffset = 0; uint32_t chLen = 0;

	920 uint32_t exOffset = 0; uint32_t exLen = 0;

	921 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;

	922 /* uint32_t rExpsLen = 0; */

	923 uint32_t firstLen = 0, secondLen = 0;

	924 UBool varT = FALSE; UBool top_ = TRUE;

	925 uint16_t specs = 0;

	926 UBool startOfRules = TRUE;

	927 UColTokenParser src;

	928 UColOptionSet opts;

	929

	930 UChar first[256];

	931 UChar second[256];

	932 UChar *rulesCopy = NULL;

	933

	934 uint32_t UCAdiff = 0;

	935 uint32_t Windiff = 1;

	936 UParseError parseError;

	937

	938 uprv_memset(&src, 0, sizeof(UColTokenParser));

	939 src.opts = &opts;

	940

	941 rules = ucol_getRules(coll, &ruleLen);

	942

	943 /printOutRules(rules);/

	944

	945 if(U_SUCCESS(*status) && ruleLen > 0) {

	946 rulesCopy = (UChar )uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)si zeof(UChar));

	947 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));

	948 src.current = src.source = rulesCopy;

	949 src.end = rulesCopy+ruleLen;

	950 src.extraCurrent = src.end;

	951 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	952 first = second = 0;

	953

	954 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextToken ca n cause the pointer to

	955 the rules copy in src.source to get reallocated, freeing the original poi nter in rulesCopy */

	956 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,st atus)) != NULL) {

	957 strength = src.parsedToken.strength;

	958 chOffset = src.parsedToken.charsOffset;

	959 chLen = src.parsedToken.charsLen;

	960 exOffset = src.parsedToken.extensionOffset;

	961 exLen = src.parsedToken.extensionLen;

	962 prefixOffset = src.parsedToken.prefixOffset;

	963 prefixLen = src.parsedToken.prefixLen;

	964 specs = src.parsedToken.flags;

	965

	966 startOfRules = FALSE;

	967 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);

	968 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);

	969

	970 u_strncpy(second,src.source+chOffset, chLen);

	971 second[chLen] = 0;

	972 secondLen = chLen;

	973

	974 if(exLen > 0) {

	975 u_strncat(first, src.source+exOffset, exLen);

	976 first[firstLen+exLen] = 0;

	977 firstLen += exLen;

	978 }

	979

	980 if(strength != UCOL_TOK_RESET) {

	981 if((first<0x3400 \|\| first>=0xa000) && (second<0x3400 \|\| second>=0xa0 00)) {

	982 UCAdiff += testSwitch(&ucaTest, (void *)UCA, 0, strength, first, secon d, refName, error);

	983 /Windiff += testSwitch(&winTest, (void )lcid, 0, strength, first, se cond, "Win32");*/

	984 }

	985 }

	986

	987

	988 firstLen = chLen;

	989 u_strcpy(first, second);

	990

	991 }

	992 if(UCAdiff != 0 && Windiff != 0) {

	993 log_verbose("\n");

	994 }

	995 if(UCAdiff == 0) {

	996 log_verbose("No immediate difference with %s!\n", refName);

	997 }

	998 if(Windiff == 0) {

	999 log_verbose("No immediate difference with Win32!\n");

	1000 }

	1001 uprv_free(src.source);

	1002 }

	1003 }

	1004

	1005 /*

	1006 * Takes two CEs (lead and continuation) and

	1007 * compares them as CEs should be compared:

	1008 * primary vs. primary, secondary vs. secondary

	1009 * tertiary vs. tertiary

	1010 */

	1011 static int32_t compareCEs(uint32_t s1, uint32_t s2,

	1012 uint32_t t1, uint32_t t2) {

	1013 uint32_t s = 0, t = 0;

	1014 if(s1 == t1 && s2 == t2) {

	1015 return 0;

	1016 }

	1017 s = (s1 & 0xFFFF0000)\|((s2 & 0xFFFF0000)>>16);

	1018 t = (t1 & 0xFFFF0000)\|((t2 & 0xFFFF0000)>>16);

	1019 if(s < t) {

	1020 return -1;

	1021 } else if(s > t) {

	1022 return 1;

	1023 } else {

	1024 s = (s1 & 0x0000FF00) \| (s2 & 0x0000FF00)>>8;

	1025 t = (t1 & 0x0000FF00) \| (t2 & 0x0000FF00)>>8;

	1026 if(s < t) {

	1027 return -1;

	1028 } else if(s > t) {

	1029 return 1;

	1030 } else {

	1031 s = (s1 & 0x000000FF)<<8 \| (s2 & 0x000000FF);

	1032 t = (t1 & 0x000000FF)<<8 \| (t2 & 0x000000FF);

	1033 if(s < t) {

	1034 return -1;

	1035 } else {

	1036 return 1;

	1037 }

	1038 }

	1039 }

	1040 }

	1041

	1042 typedef struct {

	1043 uint32_t startCE;

	1044 uint32_t startContCE;

	1045 uint32_t limitCE;

	1046 uint32_t limitContCE;

	1047 } indirectBoundaries;

	1048

	1049 /* these values are used for finding CE values for indirect positioning. */

	1050 /* Indirect positioning is a mechanism for allowing resets on symbolic */

	1051 /* values. It only works for resets and you cannot tailor indirect names */

	1052 /* An indirect name can define either an anchor point or a range. An */

	1053 /* anchor point behaves in exactly the same way as a code point in reset */

	1054 /* would, except that it cannot be tailored. A range (we currently only */

	1055 /* know for the [top] range will explicitly set the upper bound for */

	1056 /* generated CEs, thus allowing for better control over how many CEs can */

	1057 /* be squeezed between in the range without performance penalty. */

	1058 /* In that respect, we use [top] for tailoring of locales that use CJK */

	1059 /* characters. Other indirect values are currently a pure convenience, */

	1060 /* they can be used to assure that the CEs will be always positioned in */

	1061 /* the same place relative to a point with known properties (e.g. first */

	1062 /* primary ignorable). */

	1063 static indirectBoundaries ucolIndirectBoundaries[15];

	1064 static UBool indirectBoundariesSet = FALSE;

	1065 static void setIndirectBoundaries(uint32_t indexR, uint32_t start, uint32_t en d) {

	1066 /* Set values for the top - TODO: once we have values for all the indirects, we are going */

	1067 /* to initalize here. */

	1068 ucolIndirectBoundaries[indexR].startCE = start[0];

	1069 ucolIndirectBoundaries[indexR].startContCE = start[1];

	1070 if(end) {

	1071 ucolIndirectBoundaries[indexR].limitCE = end[0];

	1072 ucolIndirectBoundaries[indexR].limitContCE = end[1];

	1073 } else {

	1074 ucolIndirectBoundaries[indexR].limitCE = 0;

	1075 ucolIndirectBoundaries[indexR].limitContCE = 0;

	1076 }

	1077 }

	1078

	1079 static void testCEs(UCollator coll, UErrorCode status) {

	1080 const UChar rules = NULL, current = NULL;

	1081 int32_t ruleLen = 0;

	1082

	1083 uint32_t strength = 0;

	1084 uint32_t maxStrength = UCOL_IDENTICAL;

	1085 uint32_t baseCE, baseContCE, nextCE, nextContCE, currCE, currContCE;

	1086 uint32_t lastCE;

	1087 uint32_t lastContCE;

	1088

	1089 int32_t result = 0;

	1090 uint32_t chOffset = 0; uint32_t chLen = 0;

	1091 uint32_t exOffset = 0; uint32_t exLen = 0;

	1092 uint32_t prefixOffset = 0; uint32_t prefixLen = 0;

	1093 uint32_t oldOffset = 0;

	1094

	1095 /* uint32_t rExpsLen = 0; */

	1096 /* uint32_t firstLen = 0; */

	1097 uint16_t specs = 0;

	1098 UBool varT = FALSE; UBool top_ = TRUE;

	1099 UBool startOfRules = TRUE;

	1100 UBool before = FALSE;

	1101 UColTokenParser src;

	1102 UColOptionSet opts;

	1103 UParseError parseError;

	1104 UChar *rulesCopy = NULL;

	1105 collIterate *c = uprv_new_collIterate(status);

	1106 UCAConstants *consts = NULL;

	1107 uint32_t UCOL_RESET_TOP_VALUE, /UCOL_RESET_TOP_CONT, /

	1108 UCOL_NEXT_TOP_VALUE, UCOL_NEXT_TOP_CONT;

	1109 const char *colLoc;

	1110 UCollator *UCA = ucol_open("root", status);

	1111

	1112 if (U_FAILURE(*status)) {

	1113 log_err("Could not open root collator %s\n", u_errorName(*status));

	1114 uprv_delete_collIterate(c);

	1115 return;

	1116 }

	1117

	1118 colLoc = ucol_getLocaleByType(coll, ULOC_ACTUAL_LOCALE, status);

	1119 if (U_FAILURE(*status)) {

	1120 log_err("Could not get collator name: %s\n", u_errorName(*status));

	1121 ucol_close(UCA);

	1122 uprv_delete_collIterate(c);

	1123 return;

	1124 }

	1125

	1126 uprv_memset(&src, 0, sizeof(UColTokenParser));

	1127

	1128 consts = (UCAConstants )((uint8_t )UCA->image + UCA->image->UCAConsts);

	1129 UCOL_RESET_TOP_VALUE = consts->UCA_LAST_NON_VARIABLE[0];

	1130 /UCOL_RESET_TOP_CONT = consts->UCA_LAST_NON_VARIABLE[1]; /

	1131 UCOL_NEXT_TOP_VALUE = consts->UCA_FIRST_IMPLICIT[0];

	1132 UCOL_NEXT_TOP_CONT = consts->UCA_FIRST_IMPLICIT[1];

	1133

	1134 baseCE=baseContCE=nextCE=nextContCE=currCE=currContCE=lastCE=lastContCE = UC OL_NOT_FOUND;

	1135

	1136 src.opts = &opts;

	1137

	1138 rules = ucol_getRules(coll, &ruleLen);

	1139

	1140 src.invUCA = ucol_initInverseUCA(status);

	1141

	1142 if(indirectBoundariesSet == FALSE) {

	1143 /* UCOL_RESET_TOP_VALUE */

	1144 setIndirectBoundaries(0, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIRS T_IMPLICIT);

	1145 /* UCOL_FIRST_PRIMARY_IGNORABLE */

	1146 setIndirectBoundaries(1, consts->UCA_FIRST_PRIMARY_IGNORABLE, 0);

	1147 /* UCOL_LAST_PRIMARY_IGNORABLE */

	1148 setIndirectBoundaries(2, consts->UCA_LAST_PRIMARY_IGNORABLE, 0);

	1149 /* UCOL_FIRST_SECONDARY_IGNORABLE */

	1150 setIndirectBoundaries(3, consts->UCA_FIRST_SECONDARY_IGNORABLE, 0);

	1151 /* UCOL_LAST_SECONDARY_IGNORABLE */

	1152 setIndirectBoundaries(4, consts->UCA_LAST_SECONDARY_IGNORABLE, 0);

	1153 /* UCOL_FIRST_TERTIARY_IGNORABLE */

	1154 setIndirectBoundaries(5, consts->UCA_FIRST_TERTIARY_IGNORABLE, 0);

	1155 /* UCOL_LAST_TERTIARY_IGNORABLE */

	1156 setIndirectBoundaries(6, consts->UCA_LAST_TERTIARY_IGNORABLE, 0);

	1157 /* UCOL_FIRST_VARIABLE */

	1158 setIndirectBoundaries(7, consts->UCA_FIRST_VARIABLE, 0);

	1159 /* UCOL_LAST_VARIABLE */

	1160 setIndirectBoundaries(8, consts->UCA_LAST_VARIABLE, 0);

	1161 /* UCOL_FIRST_NON_VARIABLE */

	1162 setIndirectBoundaries(9, consts->UCA_FIRST_NON_VARIABLE, 0);

	1163 /* UCOL_LAST_NON_VARIABLE */

	1164 setIndirectBoundaries(10, consts->UCA_LAST_NON_VARIABLE, consts->UCA_FIR ST_IMPLICIT);

	1165 /* UCOL_FIRST_IMPLICIT */

	1166 setIndirectBoundaries(11, consts->UCA_FIRST_IMPLICIT, 0);

	1167 /* UCOL_LAST_IMPLICIT */

	1168 setIndirectBoundaries(12, consts->UCA_LAST_IMPLICIT, consts->UCA_FIRST_T RAILING);

	1169 /* UCOL_FIRST_TRAILING */

	1170 setIndirectBoundaries(13, consts->UCA_FIRST_TRAILING, 0);

	1171 /* UCOL_LAST_TRAILING */

	1172 setIndirectBoundaries(14, consts->UCA_LAST_TRAILING, 0);

	1173 ucolIndirectBoundaries[14].limitCE = (consts->UCA_PRIMARY_SPECIAL_MIN<<2 4);

	1174 indirectBoundariesSet = TRUE;

	1175 }

	1176

	1177

	1178 if(U_SUCCESS(*status) && ruleLen > 0) {

	1179 rulesCopy = (UChar )uprv_malloc((ruleLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE )sizeof(UChar));

	1180 uprv_memcpy(rulesCopy, rules, ruleLen*sizeof(UChar));

	1181 src.current = src.source = rulesCopy;

	1182 src.end = rulesCopy+ruleLen;

	1183 src.extraCurrent = src.end;

	1184 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	1185

	1186 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNext Token can cause the pointer to

	1187 the rules copy in src.source to get reallocated, freeing the orig inal pointer in rulesCopy */

	1188 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseErro r,status)) != NULL) {

	1189 strength = src.parsedToken.strength;

	1190 chOffset = src.parsedToken.charsOffset;

	1191 chLen = src.parsedToken.charsLen;

	1192 exOffset = src.parsedToken.extensionOffset;

	1193 exLen = src.parsedToken.extensionLen;

	1194 prefixOffset = src.parsedToken.prefixOffset;

	1195 prefixLen = src.parsedToken.prefixLen;

	1196 specs = src.parsedToken.flags;

	1197

	1198 startOfRules = FALSE;

	1199 varT = (UBool)((specs & UCOL_TOK_VARIABLE_TOP) != 0);

	1200 top_ = (UBool)((specs & UCOL_TOK_TOP) != 0);

	1201

	1202 uprv_init_collIterate(coll, src.source+chOffset, chLen, c, status);

	1203

	1204 currCE = ucol_getNextCE(coll, c, status);

	1205 if(currCE == 0 && UCOL_ISTHAIPREVOWEL(*(src.source+chOffset))) {

	1206 log_verbose("Thai prevowel detected. Will pick next CE\n");

	1207 currCE = ucol_getNextCE(coll, c, status);

	1208 }

	1209

	1210 currContCE = ucol_getNextCE(coll, c, status);

	1211 if(!isContinuation(currContCE)) {

	1212 currContCE = 0;

	1213 }

	1214

	1215 /* we need to repack CEs here */

	1216

	1217 if(strength == UCOL_TOK_RESET) {

	1218 before = (UBool)((specs & UCOL_TOK_BEFORE) != 0);

	1219 if(top_ == TRUE) {

	1220 int32_t tokenIndex = src.parsedToken.indirectIndex;

	1221

	1222 nextCE = baseCE = currCE = ucolIndirectBoundaries[tokenIndex ].startCE;

	1223 nextContCE = baseContCE = currContCE = ucolIndirectBoundarie s[tokenIndex].startContCE;

	1224 } else {

	1225 nextCE = baseCE = currCE;

	1226 nextContCE = baseContCE = currContCE;

	1227 }

	1228 maxStrength = UCOL_IDENTICAL;

	1229 } else {

	1230 if(strength < maxStrength) {

	1231 maxStrength = strength;

	1232 if(baseCE == UCOL_RESET_TOP_VALUE) {

	1233 log_verbose("Resetting to [top]\n");

	1234 nextCE = UCOL_NEXT_TOP_VALUE;

	1235 nextContCE = UCOL_NEXT_TOP_CONT;

	1236 } else {

	1237 result = ucol_inv_getNextCE(&src, baseCE & 0xFFFFFF3F, b aseContCE, &nextCE, &nextContCE, maxStrength);

	1238 }

	1239 if(result < 0) {

	1240 if(ucol_isTailored(coll, *(src.source+oldOffset), status )) {

	1241 log_verbose("Reset is tailored codepoint %04X, don't know how to continue, taking next test\n", *(src.source+oldOffset));

	1242 return;

	1243 } else {

	1244 log_err("%s: couldn't find the CE\n", colLoc);

	1245 return;

	1246 }

	1247 }

	1248 }

	1249

	1250 currCE &= 0xFFFFFF3F;

	1251 currContCE &= 0xFFFFFFBF;

	1252

	1253 if(maxStrength == UCOL_IDENTICAL) {

	1254 if(baseCE != currCE \|\| baseContCE != currContCE) {

	1255 log_err("%s: current CE (initial strength UCOL_EQUAL)\n ", colLoc);

	1256 }

	1257 } else {

	1258 if(strength == UCOL_IDENTICAL) {

	1259 if(lastCE != currCE \|\| lastContCE != currContCE) {

	1260 log_err("%s: current CE (initial strength UCOL_EQUA L)\n", colLoc);

	1261 }

	1262 } else {

	1263 if(compareCEs(currCE, currContCE, nextCE, nextContCE) > 0) {

	1264 /if(currCE > nextCE \|\| (currCE == nextCE && currCon tCE >= nextContCE)) {/

	1265 log_err("%s: current CE is not less than base CE\n", colLoc);

	1266 }

	1267 if(!before) {

	1268 if(compareCEs(currCE, currContCE, lastCE, lastContCE ) < 0) {

	1269 /if(currCE < lastCE \|\| (currCE == lastCE && cur rContCE <= lastContCE)) {/

	1270 log_err("%s: sequence of generated CEs is broken \n", colLoc);

	1271 }

	1272 } else {

	1273 before = FALSE;

	1274 if(compareCEs(currCE, currContCE, lastCE, lastContCE ) > 0) {

	1275 /if(currCE < lastCE \|\| (currCE == lastCE && cur rContCE <= lastContCE)) {/

	1276 log_err("%s: sequence of generated CEs is broken \n", colLoc);

	1277 }

	1278 }

	1279 }

	1280 }

	1281

	1282 }

	1283

	1284 oldOffset = chOffset;

	1285 lastCE = currCE & 0xFFFFFF3F;

	1286 lastContCE = currContCE & 0xFFFFFFBF;

	1287 }

	1288 uprv_free(src.source);

	1289 }

	1290 ucol_close(UCA);

	1291 uprv_delete_collIterate(c);

	1292 }

	1293

	1294 #if 0

	1295 /* these locales are now picked from index RB */

	1296 static const char* localesToTest[] = {

	1297 "ar", "bg", "ca", "cs", "da",

	1298 "el", "en_BE", "en_US_POSIX",

	1299 "es", "et", "fi", "fr", "hi",

	1300 "hr", "hu", "is", "iw", "ja",

	1301 "ko", "lt", "lv", "mk", "mt",

	1302 "nb", "nn", "nn_NO", "pl", "ro",

	1303 "ru", "sh", "sk", "sl", "sq",

	1304 "sr", "sv", "th", "tr", "uk",

	1305 "vi", "zh", "zh_TW"

	1306 };

	1307 #endif

	1308

	1309 static const char* rulesToTest[] = {

	1310 /* Funky fa rule */

	1311 "&\\u0622 < \\u0627 << \\u0671 < \\u0621",

	1312 /"& Z < p, P",/

	1313 /* Cui Mins rules */

	1314 "&[top]<o,O<p,P<q,Q<'?'/u<r,R<u,U", /"<o,O<p,P<q,Q<r,R<u,U & Qu<'?'",/

	1315 "&[top]<o,O<p,P<q,Q;'?'/u<r,R<u,U", /"<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",/

	1316 "&[top]<o,O<p,P<q,Q,'?'/u<r,R<u,U", /"<o,O<p,P<q,Q<r,R<u,U&'Qu','?'",/

	1317 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/u<r,R<u,U", /"<'?'<3<4<5<a,A<f,F <m,M<o,O<p,P<q,Q<r,R<u,U & Qu;'?'",/

	1318 "&[top]<'?';Qu<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U", /"<'?'<3<4<5<a,A<f, F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qu",/

	1319 "&[top]<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q;'?'/um<r,R<u,U", /"<'?'<3<4<5<a,A<f,F <m,M<o,O<p,P<q,Q<r,R<u,U & Qum;'?'",/

	1320 "&[top]<'?';Qum<3<4<5<c,C<f,F<m,M<o,O<p,P<q,Q<r,R<u,U" /"<'?'<3<4<5<a,A<f, F<m,M<o,O<p,P<q,Q<r,R<u,U & '?';Qum"/

	1321 };

	1322

	1323

	1324 static void TestCollations(void) {

	1325 int32_t noOfLoc = uloc_countAvailable();

	1326 int32_t i = 0, j = 0;

	1327

	1328 UErrorCode status = U_ZERO_ERROR;

	1329 char cName[256];

	1330 UChar name[256];

	1331 int32_t nameSize;

	1332

	1333

	1334 const char *locName = NULL;

	1335 UCollator *coll = NULL;

	1336 UCollator *UCA = ucol_open("", &status);

	1337 UColAttributeValue oldStrength = ucol_getAttribute(UCA, UCOL_STRENGTH, &stat us);

	1338 if (U_FAILURE(status)) {

	1339 log_err_status(status, "Could not open UCA collator %s\n", u_errorName(s tatus));

	1340 return;

	1341 }

	1342 ucol_setAttribute(UCA, UCOL_STRENGTH, UCOL_QUATERNARY, &status);

	1343

	1344 for(i = 0; i<noOfLoc; i++) {

	1345 status = U_ZERO_ERROR;

	1346 locName = uloc_getAvailable(i);

	1347 if(uprv_strcmp("ja", locName) == 0) {

	1348 log_verbose("Don't know how to test prefixes\n");

	1349 continue;

	1350 }

	1351 if(hasCollationElements(locName)) {

	1352 nameSize = uloc_getDisplayName(locName, NULL, name, 256, &status);

	1353 for(j = 0; j<nameSize; j++) {

	1354 cName[j] = (char)name[j];

	1355 }

	1356 cName[nameSize] = 0;

	1357 log_verbose("\nTesting locale %s (%s)\n", locName, cName);

	1358 coll = ucol_open(locName, &status);

	1359 if(U_SUCCESS(status)) {

	1360 testAgainstUCA(coll, UCA, "UCA", FALSE, &status);

	1361 ucol_close(coll);

	1362 } else {

	1363 log_err("Couldn't instantiate collator for locale %s, error: %s\ n", locName, u_errorName(status));

	1364 status = U_ZERO_ERROR;

	1365 }

	1366 }

	1367 }

	1368 ucol_setAttribute(UCA, UCOL_STRENGTH, oldStrength, &status);

	1369 ucol_close(UCA);

	1370 }

	1371

	1372 static void RamsRulesTest(void) {

	1373 UErrorCode status = U_ZERO_ERROR;

	1374 int32_t i = 0;

	1375 UCollator *coll = NULL;

	1376 UChar rule[2048];

	1377 uint32_t ruleLen;

	1378 int32_t noOfLoc = uloc_countAvailable();

	1379 const char *locName = NULL;

	1380

	1381 log_verbose("RamsRulesTest\n");

	1382

	1383 if (uprv_strcmp("km", uloc_getDefault())==0 \|\| uprv_strcmp("km_KH", uloc_get Default())==0) {

	1384 /* This test will fail if the default locale is "km" or "km_KH". Enable after trac#6040. */

	1385 return;

	1386 }

	1387

	1388 for(i = 0; i<noOfLoc; i++) {

	1389 locName = uloc_getAvailable(i);

	1390 if(hasCollationElements(locName)) {

	1391 if (uprv_strcmp("ja", locName)==0) {

	1392 log_verbose("Don't know how to test Japanese because of prefixes \n");

	1393 continue;

	1394 }

	1395 if (uprv_strcmp("de__PHONEBOOK", locName)==0) {

	1396 log_verbose("Don't know how to test Phonebook because the reset is on an expanding character\n");

	1397 continue;

	1398 }

	1399 if (uprv_strcmp("bn", locName)==0 \|\|

	1400 uprv_strcmp("en_US_POSIX", locName)==0 \|\|

	1401 uprv_strcmp("km", locName)==0 \|\|

	1402 uprv_strcmp("km_KH", locName)==0 \|\|

	1403 uprv_strcmp("my", locName)==0 \|\|

	1404 uprv_strcmp("si", locName)==0 \|\|

	1405 uprv_strcmp("si_LK", locName)==0 \|\|

	1406 uprv_strcmp("zh", locName)==0 \|\|

	1407 uprv_strcmp("zh_Hant", locName)==0

	1408 ) {

	1409 log_verbose("Don't know how to test %s. "

	1410 "TODO: Fix ticket #6040 and reenable RamsRulesTest f or this locale.\n", locName);

	1411 continue;

	1412 }

	1413 log_verbose("Testing locale %s\n", locName);

	1414 status = U_ZERO_ERROR;

	1415 coll = ucol_open(locName, &status);

	1416 if(U_SUCCESS(status)) {

	1417 if((status != U_USING_DEFAULT_WARNING) && (status != U_USING_FALLB ACK_WARNING)) {

	1418 if(coll->image->jamoSpecial == TRUE) {

	1419 log_err("%s has special JAMOs\n", locName);

	1420 }

	1421 ucol_setAttribute(coll, UCOL_CASE_FIRST, UCOL_OFF, &status);

	1422 testCollator(coll, &status);

	1423 testCEs(coll, &status);

	1424 } else {

	1425 log_verbose("Skipping %s: %s\n", locName, u_errorName(status));

	1426 }

	1427 ucol_close(coll);

	1428 } else {

	1429 log_err("Could not open %s: %s\n", locName, u_errorName(status));

	1430 }

	1431 }

	1432 }

	1433

	1434 for(i = 0; i<sizeof(rulesToTest)/sizeof(rulesToTest[0]); i++) {

	1435 log_verbose("Testing rule: %s\n", rulesToTest[i]);

	1436 ruleLen = u_unescape(rulesToTest[i], rule, 2048);

	1437 status = U_ZERO_ERROR;

	1438 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat us);

	1439 if(U_SUCCESS(status)) {

	1440 testCollator(coll, &status);

	1441 testCEs(coll, &status);

	1442 ucol_close(coll);

	1443 } else {

	1444 log_err_status(status, "Could not test rule: %s: '%s'\n", u_errorName( status), rulesToTest[i]);

	1445 }

	1446 }

	1447

	1448 }

	1449

	1450 static void IsTailoredTest(void) {

	1451 UErrorCode status = U_ZERO_ERROR;

	1452 uint32_t i = 0;

	1453 UCollator *coll = NULL;

	1454 UChar rule[2048];

	1455 UChar tailored[2048];

	1456 UChar notTailored[2048];

	1457 uint32_t ruleLen, tailoredLen, notTailoredLen;

	1458

	1459 log_verbose("IsTailoredTest\n");

	1460

	1461 u_uastrcpy(rule, "&Z < A, B, C;c < d");

	1462 ruleLen = u_strlen(rule);

	1463

	1464 u_uastrcpy(tailored, "ABCcd");

	1465 tailoredLen = u_strlen(tailored);

	1466

	1467 u_uastrcpy(notTailored, "ZabD");

	1468 notTailoredLen = u_strlen(notTailored);

	1469

	1470 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

	1471 if(U_SUCCESS(status)) {

	1472 for(i = 0; i<tailoredLen; i++) {

	1473 if(!ucol_isTailored(coll, tailored[i], &status)) {

	1474 log_err("%i: %04X should be tailored - it is reported as not\n", i, tailored[i]);

	1475 }

	1476 }

	1477 for(i = 0; i<notTailoredLen; i++) {

	1478 if(ucol_isTailored(coll, notTailored[i], &status)) {

	1479 log_err("%i: %04X should not be tailored - it is reported as it is\n", i, notTailored[i]);

	1480 }

	1481 }

	1482 ucol_close(coll);

	1483 }

	1484 else {

	1485 log_err_status(status, "Can't tailor rules\n");

	1486 }

	1487 /* Code coverage */

	1488 status = U_ZERO_ERROR;

	1489 coll = ucol_open("ja", &status);

	1490 if(!ucol_isTailored(coll, 0x4E9C, &status)) {

	1491 log_err_status(status, "0x4E9C should be tailored - it is reported as no t\n");

	1492 }

	1493 ucol_close(coll);

	1494 }

	1495

	1496

	1497 const static char chTest[][20] = {

	1498 "c",

	1499 "C",

	1500 "ca", "cb", "cx", "cy", "CZ",

	1501 "c\\u030C", "C\\u030C",

	1502 "h",

	1503 "H",

	1504 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",

	1505 "ch", "cH", "Ch", "CH",

	1506 "cha", "charly", "che", "chh", "chch", "chr",

	1507 "i", "I", "iarly",

	1508 "r", "R",

	1509 "r\\u030C", "R\\u030C",

	1510 "s",

	1511 "S",

	1512 "s\\u030C", "S\\u030C",

	1513 "z", "Z",

	1514 "z\\u030C", "Z\\u030C"

	1515 };

	1516

	1517 static void TestChMove(void) {

	1518 UChar t1[256] = {0};

	1519 UChar t2[256] = {0};

	1520

	1521 uint32_t i = 0, j = 0;

	1522 uint32_t size = 0;

	1523 UErrorCode status = U_ZERO_ERROR;

	1524

	1525 UCollator *coll = ucol_open("cs", &status);

	1526

	1527 if(U_SUCCESS(status)) {

	1528 size = sizeof(chTest)/sizeof(chTest[0]);

	1529 for(i = 0; i < size-1; i++) {

	1530 for(j = i+1; j < size; j++) {

	1531 u_unescape(chTest[i], t1, 256);

	1532 u_unescape(chTest[j], t2, 256);

	1533 doTest(coll, t1, t2, UCOL_LESS);

	1534 }

	1535 }

	1536 }

	1537 else {

	1538 log_data_err("Can't open collator");

	1539 }

	1540 ucol_close(coll);

	1541 }

	1542

	1543

	1544

	1545

	1546 const static char impTest[][20] = {

	1547 "\\u4e00",

	1548 "a",

	1549 "A",

	1550 "b",

	1551 "B",

	1552 "\\u4e01"

	1553 };

	1554

	1555

	1556 static void TestImplicitTailoring(void) {

	1557 static const struct {

	1558 const char *rules;

	1559 const char *data[10];

	1560 const uint32_t len;

	1561 } tests[] = {

	1562 { "&[before 1]\\u4e00 < b < c &[before 1]\\u4e00 < d < e", { "d", "e", "b" , "c", "\\u4e00"}, 5 },

	1563 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4 e01"}, 6 },

	1564 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e 00"}, 3},

	1565 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e 01"}, 3}

	1566 };

	1567

	1568 int32_t i = 0;

	1569

	1570 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

	1571 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

	1572 }

	1573

	1574 /*

	1575 UChar t1[256] = {0};

	1576 UChar t2[256] = {0};

	1577

	1578 const char *rule = "&\\u4e00 < a <<< A < b <<< B";

	1579

	1580 uint32_t i = 0, j = 0;

	1581 uint32_t size = 0;

	1582 uint32_t ruleLen = 0;

	1583 UErrorCode status = U_ZERO_ERROR;

	1584 UCollator *coll = NULL;

	1585 ruleLen = u_unescape(rule, t1, 256);

	1586

	1587 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);

	1588

	1589 if(U_SUCCESS(status)) {

	1590 size = sizeof(impTest)/sizeof(impTest[0]);

	1591 for(i = 0; i < size-1; i++) {

	1592 for(j = i+1; j < size; j++) {

	1593 u_unescape(impTest[i], t1, 256);

	1594 u_unescape(impTest[j], t2, 256);

	1595 doTest(coll, t1, t2, UCOL_LESS);

	1596 }

	1597 }

	1598 }

	1599 else {

	1600 log_err("Can't open collator");

	1601 }

	1602 ucol_close(coll);

	1603 */

	1604 }

	1605

	1606 static void TestFCDProblem(void) {

	1607 UChar t1[256] = {0};

	1608 UChar t2[256] = {0};

	1609

	1610 const char *s1 = "\\u0430\\u0306\\u0325";

	1611 const char *s2 = "\\u04D1\\u0325";

	1612

	1613 UErrorCode status = U_ZERO_ERROR;

	1614 UCollator *coll = ucol_open("", &status);

	1615 u_unescape(s1, t1, 256);

	1616 u_unescape(s2, t2, 256);

	1617

	1618 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

	1619 doTest(coll, t1, t2, UCOL_EQUAL);

	1620

	1621 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	1622 doTest(coll, t1, t2, UCOL_EQUAL);

	1623

	1624 ucol_close(coll);

	1625 }

	1626

	1627 /*

	1628 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC

	1629 We're only using NFC/NFD in this test.

	1630 */

	1631 #define NORM_BUFFER_TEST_LEN 18

	1632 typedef struct {

	1633 UChar32 u;

	1634 UChar NFC[NORM_BUFFER_TEST_LEN];

	1635 UChar NFD[NORM_BUFFER_TEST_LEN];

	1636 } tester;

	1637

	1638 static void TestComposeDecompose(void) {

	1639 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */

	1640 static const UChar UNICODESET_STR[] = {

	1641 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x 61,

	1642 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x 72,

	1643 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0

	1644 };

	1645 int32_t noOfLoc;

	1646 int32_t i = 0, j = 0;

	1647

	1648 UErrorCode status = U_ZERO_ERROR;

	1649 const char *locName = NULL;

	1650 uint32_t nfcSize;

	1651 uint32_t nfdSize;

	1652 tester **t;

	1653 uint32_t noCases = 0;

	1654 UCollator *coll = NULL;

	1655 UChar32 u = 0;

	1656 UChar comp[NORM_BUFFER_TEST_LEN];

	1657 uint32_t len = 0;

	1658 UCollationElements *iter;

	1659 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);

	1660 int32_t charsToTestSize;

	1661

	1662 noOfLoc = uloc_countAvailable();

	1663

	1664 coll = ucol_open("", &status);

	1665 if (U_FAILURE(status)) {

	1666 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u _errorName(status));

	1667 return;

	1668 }

	1669 charsToTestSize = uset_size(charsToTest);

	1670 if (charsToTestSize <= 0) {

	1671 log_err("Set was zero. Missing data?\n");

	1672 return;

	1673 }

	1674 t = malloc(charsToTestSize * sizeof(tester *));

	1675 t[0] = (tester *)malloc(sizeof(tester));

	1676 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);

	1677

	1678 for(u = 0; u < charsToTestSize; u++) {

	1679 UChar32 ch = uset_charAt(charsToTest, u);

	1680 len = 0;

	1681 UTF_APPEND_CHAR_UNSAFE(comp, len, ch);

	1682 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM _BUFFER_TEST_LEN, &status);

	1683 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM _BUFFER_TEST_LEN, &status);

	1684

	1685 if(nfcSize != nfdSize \|\| (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)

	1686 \|\| (len != nfdSize \|\| (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * si zeof(UChar)) != 0))) {

	1687 t[noCases]->u = ch;

	1688 if(len != nfdSize \|\| (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * s izeof(UChar)) != 0)) {

	1689 u_strncpy(t[noCases]->NFC, comp, len);

	1690 t[noCases]->NFC[len] = 0;

	1691 }

	1692 noCases++;

	1693 t[noCases] = (tester *)malloc(sizeof(tester));

	1694 uprv_memset(t[noCases], 0, sizeof(tester));

	1695 }

	1696 }

	1697 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSi ze);

	1698 uset_close(charsToTest);

	1699 charsToTest = NULL;

	1700

	1701 for(u=0; u<(UChar32)noCases; u++) {

	1702 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

	1703 log_err("Failure: codePoint %05X fails TestComposeDecompose in the U CA\n", t[u]->u);

	1704 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

	1705 }

	1706 }

	1707 /*

	1708 for(u = 0; u < charsToTestSize; u++) {

	1709 if(!(u&0xFFFF)) {

	1710 log_verbose("%08X ", u);

	1711 }

	1712 uprv_memset(t[noCases], 0, sizeof(tester));

	1713 t[noCases]->u = u;

	1714 len = 0;

	1715 UTF_APPEND_CHAR_UNSAFE(comp, len, u);

	1716 comp[len] = 0;

	1717 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_B UFFER_TEST_LEN, &status);

	1718 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_B UFFER_TEST_LEN, &status);

	1719 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);

	1720 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);

	1721 }

	1722 */

	1723

	1724 ucol_close(coll);

	1725

	1726 log_verbose("Testing locales, number of cases = %i\n", noCases);

	1727 for(i = 0; i<noOfLoc; i++) {

	1728 status = U_ZERO_ERROR;

	1729 locName = uloc_getAvailable(i);

	1730 if(hasCollationElements(locName)) {

	1731 char cName[256];

	1732 UChar name[256];

	1733 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(c Name), &status);

	1734

	1735 for(j = 0; j<nameSize; j++) {

	1736 cName[j] = (char)name[j];

	1737 }

	1738 cName[nameSize] = 0;

	1739 log_verbose("\nTesting locale %s (%s)\n", locName, cName);

	1740

	1741 coll = ucol_open(locName, &status);

	1742 ucol_setStrength(coll, UCOL_IDENTICAL);

	1743 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &stat us);

	1744

	1745 for(u=0; u<(UChar32)noCases; u++) {

	1746 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

	1747 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);

	1748 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

	1749 log_verbose("Testing NFC\n");

	1750 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);

	1751 backAndForth(iter);

	1752 log_verbose("Testing NFD\n");

	1753 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);

	1754 backAndForth(iter);

	1755 }

	1756 }

	1757 ucol_closeElements(iter);

	1758 ucol_close(coll);

	1759 }

	1760 }

	1761 for(u = 0; u <= (UChar32)noCases; u++) {

	1762 free(t[u]);

	1763 }

	1764 free(t);

	1765 }

	1766

	1767 static void TestEmptyRule(void) {

	1768 UErrorCode status = U_ZERO_ERROR;

	1769 UChar rulez[] = { 0 };

	1770 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &stat us);

	1771

	1772 ucol_close(coll);

	1773 }

	1774

	1775 static void TestUCARules(void) {

	1776 UErrorCode status = U_ZERO_ERROR;

	1777 UChar b[256];

	1778 UChar *rules = b;

	1779 uint32_t ruleLen = 0;

	1780 UCollator *UCAfromRules = NULL;

	1781 UCollator *coll = ucol_open("", &status);

	1782 if(status == U_FILE_ACCESS_ERROR) {

	1783 log_data_err("Is your data around?\n");

	1784 return;

	1785 } else if(U_FAILURE(status)) {

	1786 log_err("Error opening collator\n");

	1787 return;

	1788 }

	1789 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);

	1790

	1791 log_verbose("TestUCARules\n");

	1792 if(ruleLen > 256) {

	1793 rules = (UChar )malloc((ruleLen+1)sizeof(UChar));

	1794 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);

	1795 }

	1796 log_verbose("Rules length is %d\n", ruleLen);

	1797 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&s tatus);

	1798 if(U_SUCCESS(status)) {

	1799 ucol_close(UCAfromRules);

	1800 } else {

	1801 log_verbose("Unable to create a collator from UCARules!\n");

	1802 }

	1803 /*

	1804 u_unescape(blah, b, 256);

	1805 ucol_getSortKey(coll, b, 1, res, 256);

	1806 */

	1807 ucol_close(coll);

	1808 if(rules != b) {

	1809 free(rules);

	1810 }

	1811 }

	1812

	1813

	1814 /* Pinyin tonal order */

	1815 /*

	1816 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)

	1817 (w/macron)< (w/acute)< (w/caron)< (w/grave)

	1818 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)

	1819 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)

	1820 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)

	1821 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)

	1822 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <

	1823 .. (\u00fc)

	1824

	1825 However, in testing we got the following order:

	1826 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)

	1827 (w/acute)< (w/grave)< (w/caron)< (w/macron)

	1828 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <

	1829 .. (\u0113)

	1830 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)

	1831 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)

	1832 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <

	1833 .. (\u01d8)

	1834 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)

	1835 */

	1836

	1837 static void TestBefore(void) {

	1838 const static char *data[] = {

	1839 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",

	1840 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",

	1841 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",

	1842 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",

	1843 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",

	1844 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"

	1845 };

	1846 genericRulesStarter(

	1847 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"

	1848 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"

	1849 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"

	1850 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"

	1851 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"

	1852 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",

	1853 data, sizeof(data)/sizeof(data[0]));

	1854 }

	1855

	1856 #if 0

	1857 /* superceded by TestBeforePinyin */

	1858 static void TestJ784(void) {

	1859 const static char *data[] = {

	1860 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",

	1861 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",

	1862 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",

	1863 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",

	1864 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",

	1865 "\\u00fc",

	1866 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"

	1867 };

	1868 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));

	1869 }

	1870 #endif

	1871

	1872 #if 0

	1873 /* superceded by the changes to the lv locale */

	1874 static void TestJ831(void) {

	1875 const static char *data[] = {

	1876 "I",

	1877 "i",

	1878 "Y",

	1879 "y"

	1880 };

	1881 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));

	1882 }

	1883 #endif

	1884

	1885 static void TestJ815(void) {

	1886 const static char *data[] = {

	1887 "aa",

	1888 "Aa",

	1889 "ab",

	1890 "Ab",

	1891 "ad",

	1892 "Ad",

	1893 "ae",

	1894 "Ae",

	1895 "\\u00e6",

	1896 "\\u00c6",

	1897 "af",

	1898 "Af",

	1899 "b",

	1900 "B"

	1901 };

	1902 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));

	1903 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat a)/sizeof(data[0]));

	1904 }

	1905

	1906

	1907 /*

	1908 "& a < b < c < d& r < c", "& a < b < d& r < c" ,

	1909 "& a < b < c < d& c < m", "& a < b < c < m < d ",

	1910 "& a < b < c < d& a < m", "& a < m < b < c < d ",

	1911 "& a <<< b << c < d& a < m", "& a <<< b << c < m < d",

	1912 "& a < b < c < d& [before 1] c < m", "& a < b < m < c < d ",

	1913 "& a < b <<< c << d <<< e& [before 3] e <<< x", "& a < b <<< c << d < << x <<< e",

	1914 "& a < b <<< c << d <<< e& [before 2] e <<< x", "& a < b <<< c <<< x << d <<< e",

	1915 "& a < b <<< c << d <<< e& [before 1] e <<< x", "& a <<< x < b <<< c << d <<< e",

	1916 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x", "& a < b <<< c << d < << e <<< f < x < g",

	1917 */

	1918 static void TestRedundantRules(void) {

	1919 int32_t i;

	1920

	1921 static const struct {

	1922 const char *rules;

	1923 const char *expectedRules;

	1924 const char *testdata[8];

	1925 uint32_t testdatalen;

	1926 } tests[] = {

	1927 /* this test conflicts with positioning of CODAN placeholder */

	1928 /*{

	1929 "& a <<< b <<< c << d <<< e& [before 1] e <<< x",

	1930 "&\\u2089<<<x",

	1931 {"\\u2089", "x"}, 2

	1932 }, */

	1933 /* this test conflicts with the [before x] syntax tightening */

	1934 /*{

	1935 "& b <<< c <<< d << e <<< f& [before 1] f <<< x",

	1936 "&\\u0252<<<x",

	1937 {"\\u0252", "x"}, 2

	1938 }, */

	1939 /* this test conflicts with the [before x] syntax tightening */

	1940 /*{

	1941 "& a < b <<< c << d <<< e& [before 1] e <<< x",

	1942 "& a <<< x < b <<< c << d <<< e",

	1943 {"a", "x", "b", "c", "d", "e"}, 6

	1944 }, */

	1945 {

	1946 "& a < b < c < d& [before 1] c < m",

	1947 "& a < b < m < c < d",

	1948 {"a", "b", "m", "c", "d"}, 5

	1949 },

	1950 {

	1951 "& a < b <<< c << d <<< e& [before 3] e <<< x",

	1952 "& a < b <<< c << d <<< x <<< e",

	1953 {"a", "b", "c", "d", "x", "e"}, 6

	1954 },

	1955 /* this test conflicts with the [before x] syntax tightening */

	1956 /* {

	1957 "& a < b <<< c << d <<< e& [before 2] e <<< x",

	1958 "& a < b <<< c <<< x << d <<< e",

	1959 {"a", "b", "c", "x", "d", "e"},, 6

	1960 }, */

	1961 {

	1962 "& a < b <<< c << d <<< e <<< f < g& [before 1] g < x",

	1963 "& a < b <<< c << d <<< e <<< f < x < g",

	1964 {"a", "b", "c", "d", "e", "f", "x", "g"}, 8

	1965 },

	1966 {

	1967 "& a <<< b << c < d& a < m",

	1968 "& a <<< b << c < m < d",

	1969 {"a", "b", "c", "m", "d"}, 5

	1970 },

	1971 {

	1972 "&a<b<<b\\u0301 &z<b",

	1973 "&a<b\\u0301 &z<b",

	1974 {"a", "b\\u0301", "z", "b"}, 4

	1975 },

	1976 {

	1977 "&z<m<<<q<<<m",

	1978 "&z<q<<<m",

	1979 {"z", "q", "m"},3

	1980 },

	1981 {

	1982 "&z<<<m<q<<<m",

	1983 "&z<q<<<m",

	1984 {"z", "q", "m"}, 3

	1985 },

	1986 {

	1987 "& a < b < c < d& r < c",

	1988 "& a < b < d& r < c",

	1989 {"a", "b", "d"}, 3

	1990 },

	1991 {

	1992 "& a < b < c < d& r < c",

	1993 "& a < b < d& r < c",

	1994 {"r", "c"}, 2

	1995 },

	1996 {

	1997 "& a < b < c < d& c < m",

	1998 "& a < b < c < m < d",

	1999 {"a", "b", "c", "m", "d"}, 5

	2000 },

	2001 {

	2002 "& a < b < c < d& a < m",

	2003 "& a < m < b < c < d",

	2004 {"a", "m", "b", "c", "d"}, 5

	2005 }

	2006 };

	2007

	2008

	2009 UCollator *credundant = NULL;

	2010 UCollator *cresulting = NULL;

	2011 UErrorCode status = U_ZERO_ERROR;

	2012 UChar rlz[2048] = { 0 };

	2013 uint32_t rlen = 0;

	2014

	2015 for(i = 0; i<sizeof(tests)/sizeof(tests[0]); i++) {

	2016 log_verbose("testing rule %s, expected to be %s\n", tests[i].rules, tests[i] .expectedRules);

	2017 rlen = u_unescape(tests[i].rules, rlz, 2048);

	2018

	2019 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);

	2020 if(status == U_FILE_ACCESS_ERROR) {

	2021 log_data_err("Is your data around?\n");

	2022 return;

	2023 } else if(U_FAILURE(status)) {

	2024 log_err("Error opening collator\n");

	2025 return;

	2026 }

	2027

	2028 rlen = u_unescape(tests[i].expectedRules, rlz, 2048);

	2029 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);

	2030

	2031 testAgainstUCA(cresulting, credundant, "expected", TRUE, &status);

	2032

	2033 ucol_close(credundant);

	2034 ucol_close(cresulting);

	2035

	2036 log_verbose("testing using data\n");

	2037

	2038 genericRulesStarter(tests[i].rules, tests[i].testdata, tests[i].testdatalen) ;

	2039 }

	2040

	2041 }

	2042

	2043 static void TestExpansionSyntax(void) {

	2044 int32_t i;

	2045

	2046 const static char *rules[] = {

	2047 "&AE <<< a << b <<< c &d <<< f",

	2048 "&AE <<< a <<< b << c << d < e < f <<< g",

	2049 "&AE <<< B <<< C / D <<< F"

	2050 };

	2051

	2052 const static char *expectedRules[] = {

	2053 "&A <<< a / E << b / E <<< c /E &d <<< f",

	2054 "&A <<< a / E <<< b / E << c / E << d / E < e < f <<< g",

	2055 "&A <<< B / E <<< C / ED <<< F / E"

	2056 };

	2057

	2058 const static char *testdata[][8] = {

	2059 {"AE", "a", "b", "c"},

	2060 {"AE", "a", "b", "c", "d", "e", "f", "g"},

	2061 {"AE", "B", "C"} /* / ED <<< F / E"},*/

	2062 };

	2063

	2064 const static uint32_t testdatalen[] = {

	2065 4,

	2066 8,

	2067 3

	2068 };

	2069

	2070

	2071

	2072 UCollator *credundant = NULL;

	2073 UCollator *cresulting = NULL;

	2074 UErrorCode status = U_ZERO_ERROR;

	2075 UChar rlz[2048] = { 0 };

	2076 uint32_t rlen = 0;

	2077

	2078 for(i = 0; i<sizeof(rules)/sizeof(rules[0]); i++) {

	2079 log_verbose("testing rule %s, expected to be %s\n", rules[i], expectedRules[ i]);

	2080 rlen = u_unescape(rules[i], rlz, 2048);

	2081

	2082 credundant = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st atus);

	2083 if(status == U_FILE_ACCESS_ERROR) {

	2084 log_data_err("Is your data around?\n");

	2085 return;

	2086 } else if(U_FAILURE(status)) {

	2087 log_err("Error opening collator\n");

	2088 return;

	2089 }

	2090 rlen = u_unescape(expectedRules[i], rlz, 2048);

	2091 cresulting = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,&sta tus);

	2092

	2093 /* testAgainstUCA still doesn't handle expansions correctly, so this is not run */

	2094 /* as a hard error test, but only in information mode */

	2095 testAgainstUCA(cresulting, credundant, "expected", FALSE, &status);

	2096

	2097 ucol_close(credundant);

	2098 ucol_close(cresulting);

	2099

	2100 log_verbose("testing using data\n");

	2101

	2102 genericRulesStarter(rules[i], testdata[i], testdatalen[i]);

	2103 }

	2104 }

	2105

	2106 static void TestCase(void)

	2107 {

	2108 const static UChar gRules[MAX_TOKEN_LEN] =

	2109 /" & 0 < 1,\u2461<a,A"/

	2110 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x 0041, 0x0000 };

	2111

	2112 const static UChar testCase[][MAX_TOKEN_LEN] =

	2113 {

	2114 /0/ {0x0031 /'1'/, 0x0061/'a'/, 0x0000},

	2115 /1/ {0x0031 /'1'/, 0x0041/'A'/, 0x0000},

	2116 /2/ {0x2460 /circ'1'/, 0x0061/'a'/, 0x0000},

	2117 /3/ {0x2460 /circ'1'/, 0x0041/'A'/, 0x0000}

	2118 };

	2119

	2120 const static UCollationResult caseTestResults[][9] =

	2121 {

	2122 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

	2123 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },

	2124 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

	2125 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }

	2126 };

	2127

	2128 const static UColAttributeValue caseTestAttributes[][2] =

	2129 {

	2130 { UCOL_LOWER_FIRST, UCOL_OFF},

	2131 { UCOL_UPPER_FIRST, UCOL_OFF},

	2132 { UCOL_LOWER_FIRST, UCOL_ON},

	2133 { UCOL_UPPER_FIRST, UCOL_ON}

	2134 };

	2135 int32_t i,j,k;

	2136 UErrorCode status = U_ZERO_ERROR;

	2137 UCollationElements *iter;

	2138 UCollator *myCollation;

	2139 myCollation = ucol_open("en_US", &status);

	2140

	2141 if(U_FAILURE(status)){

	2142 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	2143 return;

	2144 }

	2145 log_verbose("Testing different case settings\n");

	2146 ucol_setStrength(myCollation, UCOL_TERTIARY);

	2147

	2148 for(k = 0; k<4; k++) {

	2149 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

	2150 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

	2151 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0] , caseTestAttributes[k][1]);

	2152 for (i = 0; i < 3 ; i++) {

	2153 for(j = i+1; j<4; j++) {

	2154 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j -1]);

	2155 }

	2156 }

	2157 }

	2158 ucol_close(myCollation);

	2159

	2160 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIA RY,NULL, &status);

	2161 if(U_FAILURE(status)){

	2162 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(s tatus));

	2163 return;

	2164 }

	2165 log_verbose("Testing different case settings with custom rules\n");

	2166 ucol_setStrength(myCollation, UCOL_TERTIARY);

	2167

	2168 for(k = 0; k<4; k++) {

	2169 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

	2170 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

	2171 for (i = 0; i < 3 ; i++) {

	2172 for(j = i+1; j<4; j++) {

	2173 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);

	2174 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j -1]);

	2175 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]) , &status);

	2176 backAndForth(iter);

	2177 ucol_closeElements(iter);

	2178 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]) , &status);

	2179 backAndForth(iter);

	2180 ucol_closeElements(iter);

	2181 }

	2182 }

	2183 }

	2184 ucol_close(myCollation);

	2185 {

	2186 const static char *lowerFirst[] = {

	2187 "h",

	2188 "H",

	2189 "ch",

	2190 "Ch",

	2191 "CH",

	2192 "cha",

	2193 "chA",

	2194 "Cha",

	2195 "ChA",

	2196 "CHa",

	2197 "CHA",

	2198 "i",

	2199 "I"

	2200 };

	2201

	2202 const static char *upperFirst[] = {

	2203 "H",

	2204 "h",

	2205 "CH",

	2206 "Ch",

	2207 "ch",

	2208 "CHA",

	2209 "CHa",

	2210 "ChA",

	2211 "Cha",

	2212 "chA",

	2213 "cha",

	2214 "I",

	2215 "i"

	2216 };

	2217 log_verbose("mixed case test\n");

	2218 log_verbose("lower first, case level off\n");

	2219 genericRulesStarter("[casefirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof (lowerFirst)/sizeof(lowerFirst[0]));

	2220 log_verbose("upper first, case level off\n");

	2221 genericRulesStarter("[casefirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof (upperFirst)/sizeof(upperFirst[0]));

	2222 log_verbose("lower first, case level on\n");

	2223 genericRulesStarter("[casefirst lower][caselevel on]&H<ch<<<Ch<<<CH", lowe rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));

	2224 log_verbose("upper first, case level on\n");

	2225 genericRulesStarter("[casefirst upper][caselevel on]&H<ch<<<Ch<<<CH", uppe rFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));

	2226 }

	2227

	2228 }

	2229

	2230 static void TestIncrementalNormalize(void) {

	2231

	2232 /UChar baseA =0x61;/

	2233 UChar baseA =0x41;

	2234 /* UChar baseB = 0x42;*/

	2235 static const UChar ccMix[] = {0x316, 0x321, 0x300};

	2236 /UChar ccMix[] = {0x61, 0x61, 0x61};/

	2237 /*

	2238 0x316 is combining grave accent below, cc=220

	2239 0x321 is combining palatalized hook below, cc=202

	2240 0x300 is combining grave accent, cc=230

	2241 */

	2242

	2243 #define MAXSLEN 2000

	2244 /int maxSLen = 64000;/

	2245 int sLen;

	2246 int i;

	2247

	2248 UCollator *coll;

	2249 UErrorCode status = U_ZERO_ERROR;

	2250 UCollationResult result;

	2251

	2252 int32_t myQ = getTestOption(QUICK_OPTION);

	2253

	2254 if(getTestOption(QUICK_OPTION) < 0) {

	2255 setTestOption(QUICK_OPTION, 1);

	2256 }

	2257

	2258 {

	2259 /* Test 1. Run very long unnormalized strings, to force overflow of*/

	2260 /* most buffers along the way.*/

	2261 UChar strA[MAXSLEN+1];

	2262 UChar strB[MAXSLEN+1];

	2263

	2264 coll = ucol_open("en_US", &status);

	2265 if(status == U_FILE_ACCESS_ERROR) {

	2266 log_data_err("Is your data around?\n");

	2267 return;

	2268 } else if(U_FAILURE(status)) {

	2269 log_err("Error opening collator\n");

	2270 return;

	2271 }

	2272 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	2273

	2274 /for (sLen = 257; sLen<MAXSLEN; sLen++) {/

	2275 /for (sLen = 4; sLen<MAXSLEN; sLen++) {/

	2276 /for (sLen = 1000; sLen<1001; sLen++) {/

	2277 for (sLen = 500; sLen<501; sLen++) {

	2278 /for (sLen = 40000; sLen<65000; sLen+=1000) {/

	2279 strA[0] = baseA;

	2280 strB[0] = baseA;

	2281 for (i=1; i<=sLen-1; i++) {

	2282 strA[i] = ccMix[i % 3];

	2283 strB[sLen-i] = ccMix[i % 3];

	2284 }

	2285 strA[sLen] = 0;

	2286 strB[sLen] = 0;

	2287

	2288 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default str ength, which runs*/

	2289 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/

	2290 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/

	2291 doTest(coll, strA, strB, UCOL_EQUAL);

	2292 }

	2293 }

	2294

	2295 setTestOption(QUICK_OPTION, myQ);

	2296

	2297

	2298 /* Test 2: Non-normal sequence in a string that extends to the last charac ter*/

	2299 /* of the string. Checks a couple of edge cases.*/

	2300

	2301 {

	2302 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};

	2303 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};

	2304 ucol_setStrength(coll, UCOL_TERTIARY);

	2305 doTest(coll, strA, strB, UCOL_EQUAL);

	2306 }

	2307

	2308 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/

	2309

	2310 {

	2311 /* New UCA 3.1.1.

	2312 * test below used a code point from Desseret, which sorts differently

	2313 * than d800 dc00

	2314 */

	2315 /UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};/

	2316 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0 };

	2317 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};

	2318 ucol_setStrength(coll, UCOL_TERTIARY);

	2319 doTest(coll, strA, strB, UCOL_GREATER);

	2320 }

	2321

	2322 /* Test 4: Imbedded nulls do not terminate a string when length is specifi ed.*/

	2323

	2324 {

	2325 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};

	2326 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};

	2327 char sortKeyA[50];

	2328 char sortKeyAz[50];

	2329 char sortKeyB[50];

	2330 char sortKeyBz[50];

	2331 int r;

	2332

	2333 /* there used to be -3 here. Hmmmm.... */

	2334 /result = ucol_strcoll(coll, strA, -3, strB, -3);/

	2335 result = ucol_strcoll(coll, strA, 3, strB, 3);

	2336 if (result != UCOL_GREATER) {

	2337 log_err("ERROR 1 in test 4\n");

	2338 }

	2339 result = ucol_strcoll(coll, strA, -1, strB, -1);

	2340 if (result != UCOL_EQUAL) {

	2341 log_err("ERROR 2 in test 4\n");

	2342 }

	2343

	2344 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

	2345 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

	2346 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

	2347 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

	2348

	2349 r = strcmp(sortKeyA, sortKeyAz);

	2350 if (r <= 0) {

	2351 log_err("Error 3 in test 4\n");

	2352 }

	2353 r = strcmp(sortKeyA, sortKeyB);

	2354 if (r <= 0) {

	2355 log_err("Error 4 in test 4\n");

	2356 }

	2357 r = strcmp(sortKeyAz, sortKeyBz);

	2358 if (r != 0) {

	2359 log_err("Error 5 in test 4\n");

	2360 }

	2361

	2362 ucol_setStrength(coll, UCOL_IDENTICAL);

	2363 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

	2364 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

	2365 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

	2366 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

	2367

	2368 r = strcmp(sortKeyA, sortKeyAz);

	2369 if (r <= 0) {

	2370 log_err("Error 6 in test 4\n");

	2371 }

	2372 r = strcmp(sortKeyA, sortKeyB);

	2373 if (r <= 0) {

	2374 log_err("Error 7 in test 4\n");

	2375 }

	2376 r = strcmp(sortKeyAz, sortKeyBz);

	2377 if (r != 0) {

	2378 log_err("Error 8 in test 4\n");

	2379 }

	2380 ucol_setStrength(coll, UCOL_TERTIARY);

	2381 }

	2382

	2383

	2384 /* Test 5: Null characters in non-normal source strings.*/

	2385

	2386 {

	2387 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00} ;

	2388 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00} ;

	2389 char sortKeyA[50];

	2390 char sortKeyAz[50];

	2391 char sortKeyB[50];

	2392 char sortKeyBz[50];

	2393 int r;

	2394

	2395 result = ucol_strcoll(coll, strA, 6, strB, 6);

	2396 if (result != UCOL_GREATER) {

	2397 log_err("ERROR 1 in test 5\n");

	2398 }

	2399 result = ucol_strcoll(coll, strA, -1, strB, -1);

	2400 if (result != UCOL_EQUAL) {

	2401 log_err("ERROR 2 in test 5\n");

	2402 }

	2403

	2404 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

	2405 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

	2406 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

	2407 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

	2408

	2409 r = strcmp(sortKeyA, sortKeyAz);

	2410 if (r <= 0) {

	2411 log_err("Error 3 in test 5\n");

	2412 }

	2413 r = strcmp(sortKeyA, sortKeyB);

	2414 if (r <= 0) {

	2415 log_err("Error 4 in test 5\n");

	2416 }

	2417 r = strcmp(sortKeyAz, sortKeyBz);

	2418 if (r != 0) {

	2419 log_err("Error 5 in test 5\n");

	2420 }

	2421

	2422 ucol_setStrength(coll, UCOL_IDENTICAL);

	2423 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

	2424 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

	2425 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

	2426 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

	2427

	2428 r = strcmp(sortKeyA, sortKeyAz);

	2429 if (r <= 0) {

	2430 log_err("Error 6 in test 5\n");

	2431 }

	2432 r = strcmp(sortKeyA, sortKeyB);

	2433 if (r <= 0) {

	2434 log_err("Error 7 in test 5\n");

	2435 }

	2436 r = strcmp(sortKeyAz, sortKeyBz);

	2437 if (r != 0) {

	2438 log_err("Error 8 in test 5\n");

	2439 }

	2440 ucol_setStrength(coll, UCOL_TERTIARY);

	2441 }

	2442

	2443

	2444 /* Test 6: Null character as base of a non-normal combining sequence.*/

	2445

	2446 {

	2447 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00} ;

	2448 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00} ;

	2449

	2450 result = ucol_strcoll(coll, strA, 5, strB, 5);

	2451 if (result != UCOL_LESS) {

	2452 log_err("Error 1 in test 6\n");

	2453 }

	2454 result = ucol_strcoll(coll, strA, -1, strB, -1);

	2455 if (result != UCOL_EQUAL) {

	2456 log_err("Error 2 in test 6\n");

	2457 }

	2458 }

	2459

	2460 ucol_close(coll);

	2461 }

	2462

	2463

	2464

	2465 #if 0

	2466 static void TestGetCaseBit(void) {

	2467 static const char *caseBitData[] = {

	2468 "a", "A", "ch", "Ch", "CH",

	2469 "\\uFF9E", "\\u0009"

	2470 };

	2471

	2472 static const uint8_t results[] = {

	2473 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPP ER_CASE,

	2474 UCOL_UPPER_CASE, UCOL_LOWER_CASE

	2475 };

	2476

	2477 uint32_t i, blen = 0;

	2478 UChar b[256] = {0};

	2479 UErrorCode status = U_ZERO_ERROR;

	2480 UCollator *UCA = ucol_open("", &status);

	2481 uint8_t res = 0;

	2482

	2483 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {

	2484 blen = u_unescape(caseBitData[i], b, 256);

	2485 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);

	2486 if(results[i] != res) {

	2487 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0] );

	2488 }

	2489 }

	2490 }

	2491 #endif

	2492

	2493 static void TestHangulTailoring(void) {

	2494 static const char *koreanData[] = {

	2495 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53 ef", "\\u5475",

	2496 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\ \u67b7", "\\u67ef",

	2497 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\ \u8857", "\\u8888",

	2498 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",

	2499 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\ \u659D", "\\u698E",

	2500 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\ \u8B0C"

	2501 };

	2502

	2503 const char *rules =

	2504 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 << < \\u53ef <<< \\u5475 "

	2505 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "

	2506 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "

	2507 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "

	2508 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "

	2509 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";

	2510

	2511

	2512 UErrorCode status = U_ZERO_ERROR;

	2513 UChar rlz[2048] = { 0 };

	2514 uint32_t rlen = u_unescape(rules, rlz, 2048);

	2515

	2516 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, & status);

	2517 if(status == U_FILE_ACCESS_ERROR) {

	2518 log_data_err("Is your data around?\n");

	2519 return;

	2520 } else if(U_FAILURE(status)) {

	2521 log_err("Error opening collator\n");

	2522 return;

	2523 }

	2524

	2525 log_verbose("Using start of korean rules\n");

	2526

	2527 if(U_SUCCESS(status)) {

	2528 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0 ]));

	2529 } else {

	2530 log_err("Unable to open collator with rules %s\n", rules);

	2531 }

	2532

	2533 log_verbose("Setting jamoSpecial to TRUE and testing once more\n");

	2534 ((UCATableHeader )coll->image)->jamoSpecial = TRUE; / don't try this at home */

	2535 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0]) );

	2536

	2537 ucol_close(coll);

	2538

	2539 log_verbose("Using ko__LOTUS locale\n");

	2540 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean Data[0]));

	2541 }

	2542

	2543 static void TestCompressOverlap(void) {

	2544 UChar secstr[150];

	2545 UChar tertstr[150];

	2546 UErrorCode status = U_ZERO_ERROR;

	2547 UCollator *coll;

	2548 char result[200];

	2549 uint32_t resultlen;

	2550 int count = 0;

	2551 char *tempptr;

	2552

	2553 coll = ucol_open("", &status);

	2554

	2555 if (U_FAILURE(status)) {

	2556 log_err_status(status, "Collator can't be created -> %s\n", u_errorName( status));

	2557 return;

	2558 }

	2559 while (count < 149) {

	2560 secstr[count] = 0x0020; /* [06, 05, 05] */

	2561 tertstr[count] = 0x0020;

	2562 count ++;

	2563 }

	2564

	2565 /* top down compression ----------------------------------- */

	2566 secstr[count] = 0x0332; /* [, 87, 05] */

	2567 tertstr[count] = 0x3000; /* [06, 05, 07] */

	2568

	2569 /* no compression secstr should have 150 secondary bytes, tertstr should

	2570 have 150 tertiary bytes.

	2571 with correct overlapping compression, secstr should have 4 secondary

	2572 bytes, tertstr should have > 2 tertiary bytes */

	2573 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);

	2574 tempptr = uprv_strchr(result, 1) + 1;

	2575 while (*(tempptr + 1) != 1) {

	2576 /* the last secondary collation element is not checked since it is not

	2577 part of the compression */

	2578 if (*tempptr < UCOL_COMMON_TOP2 - UCOL_TOP_COUNT2) {

	2579 log_err("Secondary compression overlapped\n");

	2580 }

	2581 tempptr ++;

	2582 }

	2583

	2584 /* tertiary top/bottom/common for en_US is similar to the secondary

	2585 top/bottom/common */

	2586 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);

	2587 tempptr = uprv_strrchr(result, 1) + 1;

	2588 while (*(tempptr + 1) != 0) {

	2589 /* the last secondary collation element is not checked since it is not

	2590 part of the compression */

	2591 if (*tempptr < coll->tertiaryTop - coll->tertiaryTopCount) {

	2592 log_err("Tertiary compression overlapped\n");

	2593 }

	2594 tempptr ++;

	2595 }

	2596

	2597 /* bottom up compression ------------------------------------- */

	2598 secstr[count] = 0;

	2599 tertstr[count] = 0;

	2600 resultlen = ucol_getSortKey(coll, secstr, 150, (uint8_t *)result, 250);

	2601 tempptr = uprv_strchr(result, 1) + 1;

	2602 while (*(tempptr + 1) != 1) {

	2603 /* the last secondary collation element is not checked since it is not

	2604 part of the compression */

	2605 if (*tempptr > UCOL_COMMON_BOT2 + UCOL_BOT_COUNT2) {

	2606 log_err("Secondary compression overlapped\n");

	2607 }

	2608 tempptr ++;

	2609 }

	2610

	2611 /* tertiary top/bottom/common for en_US is similar to the secondary

	2612 top/bottom/common */

	2613 resultlen = ucol_getSortKey(coll, tertstr, 150, (uint8_t *)result, 250);

	2614 tempptr = uprv_strrchr(result, 1) + 1;

	2615 while (*(tempptr + 1) != 0) {

	2616 /* the last secondary collation element is not checked since it is not

	2617 part of the compression */

	2618 if (*tempptr > coll->tertiaryBottom + coll->tertiaryBottomCount) {

	2619 log_err("Tertiary compression overlapped\n");

	2620 }

	2621 tempptr ++;

	2622 }

	2623

	2624 ucol_close(coll);

	2625 }

	2626

	2627 static void TestCyrillicTailoring(void) {

	2628 static const char *test[] = {

	2629 "\\u0410b",

	2630 "\\u0410\\u0306a",

	2631 "\\u04d0A"

	2632 };

	2633

	2634 /* Russian overrides contractions, so this test is not valid anymore */

	2635 /genericLocaleStarter("ru", test, 3);/

	2636

	2637 genericLocaleStarter("root", test, 3);

	2638 genericRulesStarter("&\\u0410 = \\u0410", test, 3);

	2639 genericRulesStarter("&Z < \\u0410", test, 3);

	2640 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);

	2641 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);

	2642 genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);

	2643 genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);

	2644 }

	2645

	2646 static void TestSuppressContractions(void) {

	2647

	2648 static const char *testNoCont2[] = {

	2649 "\\u0410\\u0302a",

	2650 "\\u0410\\u0306b",

	2651 "\\u0410c"

	2652 };

	2653 static const char *testNoCont[] = {

	2654 "a\\u0410",

	2655 "A\\u0410\\u0306",

	2656 "\\uFF21\\u0410\\u0302"

	2657 };

	2658

	2659 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3) ;

	2660 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3 );

	2661 }

	2662

	2663 static void TestContraction(void) {

	2664 const static char *testrules[] = {

	2665 "&A = AB / B",

	2666 "&A = A\\u0306/\\u0306",

	2667 "&c = ch / h"

	2668 };

	2669 const static UChar testdata[][2] = {

	2670 {0x0041 /* 'A' /, 0x0042 / 'B' */},

	2671 {0x0041 /* 'A' /, 0x0306 / combining breve */},

	2672 {0x0063 /* 'c' /, 0x0068 / 'h' */}

	2673 };

	2674 const static UChar testdata2[][2] = {

	2675 {0x0063 /* 'c' /, 0x0067 / 'g' */},

	2676 {0x0063 /* 'c' /, 0x0068 / 'h' */},

	2677 {0x0063 /* 'c' /, 0x006C / 'l' */}

	2678 };

	2679 const static char *testrules3[] = {

	2680 "&z < xyz &xyzw << B",

	2681 "&z < xyz &xyz << B / w",

	2682 "&z < ch &achm << B",

	2683 "&z < ch &a << B / chm",

	2684 "&\\ud800\\udc00w << B",

	2685 "&\\ud800\\udc00 << B / w",

	2686 "&a\\ud800\\udc00m << B",

	2687 "&a << B / \\ud800\\udc00m",

	2688 };

	2689

	2690 UErrorCode status = U_ZERO_ERROR;

	2691 UCollator *coll;

	2692 UChar rule[256] = {0};

	2693 uint32_t rlen = 0;

	2694 int i;

	2695

	2696 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {

	2697 UCollationElements *iter1;

	2698 int j = 0;

	2699 log_verbose("Rule %s for testing\n", testrules[i]);

	2700 rlen = u_unescape(testrules[i], rule, 32);

	2701 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

	2702 if (U_FAILURE(status)) {

	2703 log_err_status(status, "Collator creation failed %s -> %s\n", testru les[i], u_errorName(status));

	2704 return;

	2705 }

	2706 iter1 = ucol_openElements(coll, testdata[i], 2, &status);

	2707 if (U_FAILURE(status)) {

	2708 log_err("Collation iterator creation failed\n");

	2709 return;

	2710 }

	2711 while (j < 2) {

	2712 UCollationElements *iter2 = ucol_openElements(coll,

	2713 &(testdata[i][j]),

	2714 1, &status);

	2715 uint32_t ce;

	2716 if (U_FAILURE(status)) {

	2717 log_err("Collation iterator creation failed\n");

	2718 return;

	2719 }

	2720 ce = ucol_next(iter2, &status);

	2721 while (ce != UCOL_NULLORDER) {

	2722 if ((uint32_t)ucol_next(iter1, &status) != ce) {

	2723 log_err("Collation elements in contraction split does not ma tch\n");

	2724 return;

	2725 }

	2726 ce = ucol_next(iter2, &status);

	2727 }

	2728 j ++;

	2729 ucol_closeElements(iter2);

	2730 }

	2731 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {

	2732 log_err("Collation elements not exhausted\n");

	2733 return;

	2734 }

	2735 ucol_closeElements(iter1);

	2736 ucol_close(coll);

	2737 }

	2738

	2739 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);

	2740 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

	2741 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {

	2742 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

	2743 testdata2[0][0], testdata2[0][1], testdata2[1][0],

	2744 testdata2[1][1]);

	2745 return;

	2746 }

	2747 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {

	2748 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

	2749 testdata2[1][0], testdata2[1][1], testdata2[2][0],

	2750 testdata2[2][1]);

	2751 return;

	2752 }

	2753 ucol_close(coll);

	2754

	2755 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {

	2756 UCollator *coll1,

	2757 *coll2;

	2758 UCollationElements *iter1,

	2759 *iter2;

	2760 UChar ch = 0x0042 /* 'B' */;

	2761 uint32_t ce;

	2762 rlen = u_unescape(testrules3[i], rule, 32);

	2763 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;

	2764 rlen = u_unescape(testrules3[i + 1], rule, 32);

	2765 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;

	2766 if (U_FAILURE(status)) {

	2767 log_err("Collator creation failed %s\n", testrules[i]);

	2768 return;

	2769 }

	2770 iter1 = ucol_openElements(coll1, &ch, 1, &status);

	2771 iter2 = ucol_openElements(coll2, &ch, 1, &status);

	2772 if (U_FAILURE(status)) {

	2773 log_err("Collation iterator creation failed\n");

	2774 return;

	2775 }

	2776 ce = ucol_next(iter1, &status);

	2777 if (U_FAILURE(status)) {

	2778 log_err("Retrieving ces failed\n");

	2779 return;

	2780 }

	2781 while (ce != UCOL_NULLORDER) {

	2782 if (ce != (uint32_t)ucol_next(iter2, &status)) {

	2783 log_err("CEs does not match\n");

	2784 return;

	2785 }

	2786 ce = ucol_next(iter1, &status);

	2787 if (U_FAILURE(status)) {

	2788 log_err("Retrieving ces failed\n");

	2789 return;

	2790 }

	2791 }

	2792 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {

	2793 log_err("CEs not exhausted\n");

	2794 return;

	2795 }

	2796 ucol_closeElements(iter1);

	2797 ucol_closeElements(iter2);

	2798 ucol_close(coll1);

	2799 ucol_close(coll2);

	2800 }

	2801 }

	2802

	2803 static void TestExpansion(void) {

	2804 const static char *testrules[] = {

	2805 "&J << K / B & K << M",

	2806 "&J << K / B << M"

	2807 };

	2808 const static UChar testdata[][3] = {

	2809 {0x004A /'J'/, 0x0041 /'A'/, 0},

	2810 {0x004D /'M'/, 0x0041 /'A'/, 0},

	2811 {0x004B /'K'/, 0x0041 /'A'/, 0},

	2812 {0x004B /'K'/, 0x0043 /'C'/, 0},

	2813 {0x004A /'J'/, 0x0043 /'C'/, 0},

	2814 {0x004D /'M'/, 0x0043 /'C'/, 0}

	2815 };

	2816

	2817 UErrorCode status = U_ZERO_ERROR;

	2818 UCollator *coll;

	2819 UChar rule[256] = {0};

	2820 uint32_t rlen = 0;

	2821 int i;

	2822

	2823 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {

	2824 int j = 0;

	2825 log_verbose("Rule %s for testing\n", testrules[i]);

	2826 rlen = u_unescape(testrules[i], rule, 32);

	2827 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

	2828 if (U_FAILURE(status)) {

	2829 log_err_status(status, "Collator creation failed %s -> %s\n", testru les[i], u_errorName(status));

	2830 return;

	2831 }

	2832

	2833 for (j = 0; j < 5; j ++) {

	2834 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);

	2835 }

	2836 ucol_close(coll);

	2837 }

	2838 }

	2839

	2840 #if 0

	2841 /* this test tests the current limitations of the engine */

	2842 /* it always fail, so it is disabled by default */

	2843 static void TestLimitations(void) {

	2844 /* recursive expansions */

	2845 {

	2846 static const char *rule = "&a=b/c&d=c/e";

	2847 static const char *tlimit01[] = {"add","b","adf"};

	2848 static const char *tlimit02[] = {"aa","b","af"};

	2849 log_verbose("recursive expansions\n");

	2850 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));

	2851 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));

	2852 }

	2853 /* contractions spanning expansions */

	2854 {

	2855 static const char *rule = "&a<<<c/e&g<<<eh";

	2856 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};

	2857 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};

	2858 log_verbose("contractions spanning expansions\n");

	2859 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));

	2860 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));

	2861 }

	2862 /* normalization: nulls in contractions */

	2863 {

	2864 static const char *rule = "&a<<<\\u0000\\u0302";

	2865 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

	2866 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

	2867 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

	2868 static const UColAttributeValue valOn[] = { UCOL_ON };

	2869 static const UColAttributeValue valOff[] = { UCOL_OFF };

	2870

	2871 log_verbose("NULL in contractions\n");

	2872 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

	2873 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

	2874 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

	2875 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

	2876

	2877 }

	2878 /* normalization: contractions spanning normalization */

	2879 {

	2880 static const char *rule = "&a<<<\\u0000\\u0302";

	2881 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

	2882 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

	2883 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

	2884 static const UColAttributeValue valOn[] = { UCOL_ON };

	2885 static const UColAttributeValue valOff[] = { UCOL_OFF };

	2886

	2887 log_verbose("contractions spanning normalization\n");

	2888 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

	2889 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

	2890 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

	2891 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

	2892

	2893 }

	2894 /* variable top: */

	2895 {

	2896 /static const char rule2 = "&\\u2010<x=[variable top]<z";*/

	2897 static const char *rule = "&\\u2010<x<[variable top]=z";

	2898 /static const char rule3 = "&' '<x<[variable top]=z";*/

	2899 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" } ;

	2900 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};

	2901 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" } ;

	2902 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };

	2903 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };

	2904 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIA RY };

	2905

	2906 log_verbose("variable top\n");

	2907 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimi t03[0]), att, valOn, sizeof(att)/sizeof(att[0]));

	2908 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOn, sizeof(att)/sizeof(att[0]));

	2909 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOn, sizeof(att)/sizeof(att[0]));

	2910 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOff, sizeof(att)/sizeof(att[0]));

	2911 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOff, sizeof(att)/sizeof(att[0]));

	2912

	2913 }

	2914 /* case level */

	2915 {

	2916 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";

	2917 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};

	2918 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};

	2919 static const UColAttribute att[] = { UCOL_CASE_FIRST};

	2920 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};

	2921 /static const UColAttributeValue valOff[] = { UCOL_OFF};/

	2922 log_verbose("case level\n");

	2923 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOn, sizeof(att)/sizeof(att[0]));

	2924 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOn, sizeof(att)/sizeof(att[0]));

	2925 /genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tli mit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));/

	2926 /genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tli mit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));/

	2927 }

	2928

	2929 }

	2930 #endif

	2931

	2932 static void TestBocsuCoverage(void) {

	2933 UErrorCode status = U_ZERO_ERROR;

	2934 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u004 1";

	2935 UChar test[256] = {0};

	2936 uint32_t tlen = u_unescape(testString, test, 32);

	2937 uint8_t key[256] = {0};

	2938 uint32_t klen = 0;

	2939

	2940 UCollator *coll = ucol_open("", &status);

	2941 if(U_SUCCESS(status)) {

	2942 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

	2943

	2944 klen = ucol_getSortKey(coll, test, tlen, key, 256);

	2945

	2946 ucol_close(coll);

	2947 } else {

	2948 log_data_err("Couldn't open UCA\n");

	2949 }

	2950 }

	2951

	2952 static void TestVariableTopSetting(void) {

	2953 UErrorCode status = U_ZERO_ERROR;

	2954 const UChar *current = NULL;

	2955 uint32_t varTopOriginal = 0, varTop1, varTop2;

	2956 UCollator *coll = ucol_open("", &status);

	2957 if(U_SUCCESS(status)) {

	2958

	2959 uint32_t strength = 0;

	2960 uint16_t specs = 0;

	2961 uint32_t chOffset = 0;

	2962 uint32_t chLen = 0;

	2963 uint32_t exOffset = 0;

	2964 uint32_t exLen = 0;

	2965 uint32_t oldChOffset = 0;

	2966 uint32_t oldChLen = 0;

	2967 uint32_t oldExOffset = 0;

	2968 uint32_t oldExLen = 0;

	2969 uint32_t prefixOffset = 0;

	2970 uint32_t prefixLen = 0;

	2971

	2972 UBool startOfRules = TRUE;

	2973 UColTokenParser src;

	2974 UColOptionSet opts;

	2975

	2976 UChar *rulesCopy = NULL;

	2977 uint32_t rulesLen;

	2978

	2979 UCollationResult result;

	2980

	2981 UChar first[256] = { 0 };

	2982 UChar second[256] = { 0 };

	2983 UParseError parseError;

	2984 int32_t myQ = getTestOption(QUICK_OPTION);

	2985

	2986 uprv_memset(&src, 0, sizeof(UColTokenParser));

	2987

	2988 src.opts = &opts;

	2989

	2990 if(getTestOption(QUICK_OPTION) <= 0) {

	2991 setTestOption(QUICK_OPTION, 1);

	2992 }

	2993

	2994 /* this test will fail when normalization is turned on */

	2995 /* therefore we always turn off exhaustive mode for it */

	2996 { /* QUICK > 0*/

	2997 log_verbose("Slide variable top over UCARules\n");

	2998 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, 0);

	2999 rulesCopy = (UChar )uprv_malloc((rulesLen+UCOL_TOK_EXTRA_RULE_SPACE_SIZE)s izeof(UChar));

	3000 rulesLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rulesCopy, rulesLen+UCOL_T OK_EXTRA_RULE_SPACE_SIZE);

	3001

	3002 if(U_SUCCESS(status) && rulesLen > 0) {

	3003 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

	3004 src.current = src.source = rulesCopy;

	3005 src.end = rulesCopy+rulesLen;

	3006 src.extraCurrent = src.end;

	3007 src.extraEnd = src.end+UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	3008

	3009 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parseNextTo ken can cause the pointer to

	3010 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

	3011 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError, &status)) != NULL) {

	3012 strength = src.parsedToken.strength;

	3013 chOffset = src.parsedToken.charsOffset;

	3014 chLen = src.parsedToken.charsLen;

	3015 exOffset = src.parsedToken.extensionOffset;

	3016 exLen = src.parsedToken.extensionLen;

	3017 prefixOffset = src.parsedToken.prefixOffset;

	3018 prefixLen = src.parsedToken.prefixLen;

	3019 specs = src.parsedToken.flags;

	3020

	3021 startOfRules = FALSE;

	3022 {

	3023 log_verbose("%04X %d ", *(src.source+chOffset), chLen);

	3024 }

	3025 if(strength == UCOL_PRIMARY) {

	3026 status = U_ZERO_ERROR;

	3027 varTopOriginal = ucol_getVariableTop(coll, &status);

	3028 varTop1 = ucol_setVariableTop(coll, src.source+oldChOffset, oldChLen, &status);

	3029 if(U_FAILURE(status)) {

	3030 char buffer[256];

	3031 char *buf = buffer;

	3032 uint32_t i = 0, j;

	3033 uint32_t CE = UCOL_NO_MORE_CES;

	3034

	3035 /* before we start screaming, let's see if there is a problem with t he rules */

	3036 UErrorCode collIterateStatus = U_ZERO_ERROR;

	3037 collIterate *s = uprv_new_collIterate(&collIterateStatus);

	3038 uprv_init_collIterate(coll, src.source+oldChOffset, oldChLen, s, &co llIterateStatus);

	3039

	3040 CE = ucol_getNextCE(coll, s, &status);

	3041

	3042 for(i = 0; i < oldChLen; i++) {

	3043 j = sprintf(buf, "%04X ", *(src.source+oldChOffset+i));

	3044 buf += j;

	3045 }

	3046 if(status == U_PRIMARY_TOO_LONG_ERROR) {

	3047 log_verbose("= Expected failure for %s =", buffer);

	3048 } else {

	3049 if(uprv_collIterateAtEnd(s)) {

	3050 log_err("Unexpected failure setting variable top at offset %d. E rror %s. Codepoints: %s\n",

	3051 oldChOffset, u_errorName(status), buffer);

	3052 } else {

	3053 log_verbose("There is a goofy contraction in UCA rules that does not appear in the fractional UCA. Codepoints: %s\n",

	3054 buffer);

	3055 }

	3056 }

	3057 uprv_delete_collIterate(s);

	3058 }

	3059 varTop2 = ucol_getVariableTop(coll, &status);

	3060 if((varTop1 & 0xFFFF0000) != (varTop2 & 0xFFFF0000)) {

	3061 log_err("cannot retrieve set varTop value!\n");

	3062 continue;

	3063 }

	3064

	3065 if((varTop1 & 0xFFFF0000) > 0 && oldExLen == 0) {

	3066

	3067 u_strncpy(first, src.source+oldChOffset, oldChLen);

	3068 u_strncpy(first+oldChLen, src.source+chOffset, chLen);

	3069 u_strncpy(first+oldChLen+chLen, src.source+oldChOffset, oldChLen);

	3070 first[2*oldChLen+chLen] = 0;

	3071

	3072 if(oldExLen == 0) {

	3073 u_strncpy(second, src.source+chOffset, chLen);

	3074 second[chLen] = 0;

	3075 } else { /* This is skipped momentarily, but should work once UCARul es are fully UCA conformant */

	3076 u_strncpy(second, src.source+oldExOffset, oldExLen);

	3077 u_strncpy(second+oldChLen, src.source+chOffset, chLen);

	3078 u_strncpy(second+oldChLen+chLen, src.source+oldExOffset, oldExLen) ;

	3079 second[2*oldExLen+chLen] = 0;

	3080 }

	3081 result = ucol_strcoll(coll, first, -1, second, -1);

	3082 if(result == UCOL_EQUAL) {

	3083 doTest(coll, first, second, UCOL_EQUAL);

	3084 } else {

	3085 log_verbose("Suspicious strcoll result for %04X and %04X\n", (src .source+oldChOffset), (src.source+chOffset));

	3086 }

	3087 }

	3088 }

	3089 if(strength != UCOL_TOK_RESET) {

	3090 oldChOffset = chOffset;

	3091 oldChLen = chLen;

	3092 oldExOffset = exOffset;

	3093 oldExLen = exLen;

	3094 }

	3095 }

	3096 status = U_ZERO_ERROR;

	3097 }

	3098 else {

	3099 log_err("Unexpected failure getting rules %s\n", u_errorName(status));

	3100 return;

	3101 }

	3102 if (U_FAILURE(status)) {

	3103 log_err("Error parsing rules %s\n", u_errorName(status));

	3104 return;

	3105 }

	3106 status = U_ZERO_ERROR;

	3107 }

	3108

	3109 setTestOption(QUICK_OPTION, myQ);

	3110

	3111 log_verbose("Testing setting variable top to contractions\n");

	3112 {

	3113 /* uint32_t tailoredCE = UCOL_NOT_FOUND; */

	3114 /UChar conts = (UChar )((uint8_t )coll->image + coll->image->UCAConsts+s izeof(UCAConstants));*/

	3115 UChar conts = (UChar )((uint8_t *)coll->image + coll->image->contractionUC ACombos);

	3116 while(*conts != 0) {

	3117 if(((conts+2) == 0) \|\| ((conts+1)==0)) { /* contracts or pre-context con tractions */

	3118 varTop1 = ucol_setVariableTop(coll, conts, -1, &status);

	3119 } else {

	3120 varTop1 = ucol_setVariableTop(coll, conts, 3, &status);

	3121 }

	3122 if(U_FAILURE(status)) {

	3123 if(status == U_PRIMARY_TOO_LONG_ERROR) {

	3124 /* ucol_setVariableTop() is documented to not accept 3-byte primaries,

	3125 * therefore it is not an error when it complains about them. */

	3126 log_verbose("Couldn't set variable top to a contraction %04X %04X %04X - U_PRIMARY_TOO_LONG_ERROR\n",

	3127 conts, (conts+1), *(conts+2));

	3128 } else {

	3129 log_err("Couldn't set variable top to a contraction %04X %04X %04X - % s\n",

	3130 conts, (conts+1), *(conts+2), u_errorName(status));

	3131 }

	3132 status = U_ZERO_ERROR;

	3133 }

	3134 conts+=3;

	3135 }

	3136

	3137 status = U_ZERO_ERROR;

	3138

	3139 first[0] = 0x0040;

	3140 first[1] = 0x0050;

	3141 first[2] = 0x0000;

	3142

	3143 ucol_setVariableTop(coll, first, -1, &status);

	3144

	3145 if(U_SUCCESS(status)) {

	3146 log_err("Invalid contraction succeded in setting variable top!\n");

	3147 }

	3148

	3149 }

	3150

	3151 log_verbose("Test restoring variable top\n");

	3152

	3153 status = U_ZERO_ERROR;

	3154 ucol_restoreVariableTop(coll, varTopOriginal, &status);

	3155 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {

	3156 log_err("Couldn't restore old variable top\n");

	3157 }

	3158

	3159 log_verbose("Testing calling with error set\n");

	3160

	3161 status = U_INTERNAL_PROGRAM_ERROR;

	3162 varTop1 = ucol_setVariableTop(coll, first, 1, &status);

	3163 varTop2 = ucol_getVariableTop(coll, &status);

	3164 ucol_restoreVariableTop(coll, varTop2, &status);

	3165 varTop1 = ucol_setVariableTop(NULL, first, 1, &status);

	3166 varTop2 = ucol_getVariableTop(NULL, &status);

	3167 ucol_restoreVariableTop(NULL, varTop2, &status);

	3168 if(status != U_INTERNAL_PROGRAM_ERROR) {

	3169 log_err("Bad reaction to passed error!\n");

	3170 }

	3171 uprv_free(src.source);

	3172 ucol_close(coll);

	3173 } else {

	3174 log_data_err("Couldn't open UCA collator\n");

	3175 }

	3176

	3177 }

	3178

	3179 static void TestNonChars(void) {

	3180 static const char *test[] = {

	3181 "\\u0000", /* ignorable */

	3182 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei ghts */

	3183 "\\uFDD0", "\\uFDEF",

	3184 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */

	3185 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */

	3186 "\\U0003FFFE", "\\U0003FFFF",

	3187 "\\U0004FFFE", "\\U0004FFFF",

	3188 "\\U0005FFFE", "\\U0005FFFF",

	3189 "\\U0006FFFE", "\\U0006FFFF",

	3190 "\\U0007FFFE", "\\U0007FFFF",

	3191 "\\U0008FFFE", "\\U0008FFFF",

	3192 "\\U0009FFFE", "\\U0009FFFF",

	3193 "\\U000AFFFE", "\\U000AFFFF",

	3194 "\\U000BFFFE", "\\U000BFFFF",

	3195 "\\U000CFFFE", "\\U000CFFFF",

	3196 "\\U000DFFFE", "\\U000DFFFF",

	3197 "\\U000EFFFE", "\\U000EFFFF",

	3198 "\\U000FFFFE", "\\U000FFFFF",

	3199 "\\U0010FFFE", "\\U0010FFFF",

	3200 "\\uFFFF" /* special character with maximum primary weight */

	3201 };

	3202 UErrorCode status = U_ZERO_ERROR;

	3203 UCollator *coll = ucol_open("en_US", &status);

	3204

	3205 log_verbose("Test non characters\n");

	3206

	3207 if(U_SUCCESS(status)) {

	3208 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);

	3209 } else {

	3210 log_err_status(status, "Unable to open collator\n");

	3211 }

	3212

	3213 ucol_close(coll);

	3214 }

	3215

	3216 static void TestExtremeCompression(void) {

	3217 static char *test[4];

	3218 int32_t j = 0, i = 0;

	3219

	3220 for(i = 0; i<4; i++) {

	3221 test[i] = (char )malloc(2048sizeof(char));

	3222 }

	3223

	3224 for(j = 20; j < 500; j++) {

	3225 for(i = 0; i<4; i++) {

	3226 uprv_memset(test[i], 'a', (j-1)*sizeof(char));

	3227 test[i][j-1] = (char)('a'+i);

	3228 test[i][j] = 0;

	3229 }

	3230 genericLocaleStarter("en_US", (const char **)test, 4);

	3231 }

	3232

	3233

	3234 for(i = 0; i<4; i++) {

	3235 free(test[i]);

	3236 }

	3237 }

	3238

	3239 #if 0

	3240 static void TestExtremeCompression(void) {

	3241 static char *test[4];

	3242 int32_t j = 0, i = 0;

	3243 UErrorCode status = U_ZERO_ERROR;

	3244 UCollator *coll = ucol_open("en_US", status);

	3245 for(i = 0; i<4; i++) {

	3246 test[i] = (char )malloc(2048sizeof(char));

	3247 }

	3248 for(j = 10; j < 2048; j++) {

	3249 for(i = 0; i<4; i++) {

	3250 uprv_memset(test[i], 'a', (j-2)*sizeof(char));

	3251 test[i][j-1] = (char)('a'+i);

	3252 test[i][j] = 0;

	3253 }

	3254 }

	3255 genericLocaleStarter("en_US", (const char **)test, 4);

	3256

	3257 for(j = 10; j < 2048; j++) {

	3258 for(i = 0; i<1; i++) {

	3259 uprv_memset(test[i], 'a', (j-1)*sizeof(char));

	3260 test[i][j] = 0;

	3261 }

	3262 }

	3263 for(i = 0; i<4; i++) {

	3264 free(test[i]);

	3265 }

	3266 }

	3267 #endif

	3268

	3269 static void TestSurrogates(void) {

	3270 static const char *test[] = {

	3271 "z","\\ud900\\udc25", "\\ud805\\udc50",

	3272 "\\ud800\\udc00y", "\\ud800\\udc00r",

	3273 "\\ud800\\udc00f", "\\ud800\\udc00",

	3274 "\\ud800\\udc00c", "\\ud800\\udc00b",

	3275 "\\ud800\\udc00fa", "\\ud800\\udc00fb",

	3276 "\\ud800\\udc00a",

	3277 "c", "b"

	3278 };

	3279

	3280 static const char *rule =

	3281 "&z < \\ud900\\udc25 < \\ud805\\udc50"

	3282 "< \\ud800\\udc00y < \\ud800\\udc00r"

	3283 "< \\ud800\\udc00f << \\ud800\\udc00"

	3284 "< \\ud800\\udc00fa << \\ud800\\udc00fb"

	3285 "< \\ud800\\udc00a < c < b" ;

	3286

	3287 genericRulesStarter(rule, test, 14);

	3288 }

	3289

	3290 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */

	3291 static void TestPrefix(void) {

	3292 uint32_t i;

	3293

	3294 static const struct {

	3295 const char *rules;

	3296 const char *data[50];

	3297 const uint32_t len;

	3298 } tests[] = {

	3299 { "&z <<< z\|a",

	3300 {"zz", "za"}, 2 },

	3301

	3302 { "&z <<< z\| a",

	3303 {"zz", "za"}, 2 },

	3304 { "[strength I]"

	3305 "&a=\\ud900\\udc25"

	3306 "&z<<<\\ud900\\udc25\|a",

	3307 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },

	3308 };

	3309

	3310

	3311 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

	3312 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

	3313 }

	3314 }

	3315

	3316 /* This test uses data suplied by Masashiko Maedera to test the implementation * /

	3317 /* JIS X 4061 collation order implementation * /

	3318 static void TestNewJapanese(void) {

	3319

	3320 static const char * const test1[] = {

	3321 "\\u30b7\\u30e3\\u30fc\\u30ec",

	3322 "\\u30b7\\u30e3\\u30a4",

	3323 "\\u30b7\\u30e4\\u30a3",

	3324 "\\u30b7\\u30e3\\u30ec",

	3325 "\\u3061\\u3087\\u3053",

	3326 "\\u3061\\u3088\\u3053",

	3327 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",

	3328 "\\u3066\\u30fc\\u305f",

	3329 "\\u30c6\\u30fc\\u30bf",

	3330 "\\u30c6\\u30a7\\u30bf",

	3331 "\\u3066\\u3048\\u305f",

	3332 "\\u3067\\u30fc\\u305f",

	3333 "\\u30c7\\u30fc\\u30bf",

	3334 "\\u30c7\\u30a7\\u30bf",

	3335 "\\u3067\\u3048\\u305f",

	3336 "\\u3066\\u30fc\\u305f\\u30fc",

	3337 "\\u30c6\\u30fc\\u30bf\\u30a1",

	3338 "\\u30c6\\u30a7\\u30bf\\u30fc",

	3339 "\\u3066\\u3047\\u305f\\u3041",

	3340 "\\u3066\\u3048\\u305f\\u30fc",

	3341 "\\u3067\\u30fc\\u305f\\u30fc",

	3342 "\\u30c7\\u30fc\\u30bf\\u30a1",

	3343 "\\u3067\\u30a7\\u305f\\u30a1",

	3344 "\\u30c7\\u3047\\u30bf\\u3041",

	3345 "\\u30c7\\u30a8\\u30bf\\u30a2",

	3346 "\\u3072\\u3086",

	3347 "\\u3073\\u3085\\u3042",

	3348 "\\u3074\\u3085\\u3042",

	3349 "\\u3073\\u3085\\u3042\\u30fc",

	3350 "\\u30d3\\u30e5\\u30a2\\u30fc",

	3351 "\\u3074\\u3085\\u3042\\u30fc",

	3352 "\\u30d4\\u30e5\\u30a2\\u30fc",

	3353 "\\u30d2\\u30e5\\u30a6",

	3354 "\\u30d2\\u30e6\\u30a6",

	3355 "\\u30d4\\u30e5\\u30a6\\u30a2",

	3356 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",

	3357 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",

	3358 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",

	3359 "\\u3072\\u3085\\u3093",

	3360 "\\u3074\\u3085\\u3093",

	3361 "\\u3075\\u30fc\\u308a",

	3362 "\\u30d5\\u30fc\\u30ea",

	3363 "\\u3075\\u3045\\u308a",

	3364 "\\u3075\\u30a5\\u308a",

	3365 "\\u3075\\u30a5\\u30ea",

	3366 "\\u30d5\\u30a6\\u30ea",

	3367 "\\u3076\\u30fc\\u308a",

	3368 "\\u30d6\\u30fc\\u30ea",

	3369 "\\u3076\\u3045\\u308a",

	3370 "\\u30d6\\u30a5\\u308a",

	3371 "\\u3077\\u3046\\u308a",

	3372 "\\u30d7\\u30a6\\u30ea",

	3373 "\\u3075\\u30fc\\u308a\\u30fc",

	3374 "\\u30d5\\u30a5\\u30ea\\u30fc",

	3375 "\\u3075\\u30a5\\u308a\\u30a3",

	3376 "\\u30d5\\u3045\\u308a\\u3043",

	3377 "\\u30d5\\u30a6\\u30ea\\u30fc",

	3378 "\\u3075\\u3046\\u308a\\u3043",

	3379 "\\u30d6\\u30a6\\u30ea\\u30a4",

	3380 "\\u3077\\u30fc\\u308a\\u30fc",

	3381 "\\u3077\\u30a5\\u308a\\u30a4",

	3382 "\\u3077\\u3046\\u308a\\u30fc",

	3383 "\\u30d7\\u30a6\\u30ea\\u30a4",

	3384 "\\u30d5\\u30fd",

	3385 "\\u3075\\u309e",

	3386 "\\u3076\\u309d",

	3387 "\\u3076\\u3075",

	3388 "\\u3076\\u30d5",

	3389 "\\u30d6\\u3075",

	3390 "\\u30d6\\u30d5",

	3391 "\\u3076\\u309e",

	3392 "\\u3076\\u3077",

	3393 "\\u30d6\\u3077",

	3394 "\\u3077\\u309d",

	3395 "\\u30d7\\u30fd",

	3396 "\\u3077\\u3075",

	3397 };

	3398

	3399 static const char *test2[] = {

	3400 "\\u306f\\u309d", /* H\\u309d */

	3401 "\\u30cf\\u30fd", /* K\\u30fd */

	3402 "\\u306f\\u306f", /* HH */

	3403 "\\u306f\\u30cf", /* HK */

	3404 "\\u30cf\\u30cf", /* KK */

	3405 "\\u306f\\u309e", /* H\\u309e */

	3406 "\\u30cf\\u30fe", /* K\\u30fe */

	3407 "\\u306f\\u3070", /* HH\\u309b */

	3408 "\\u30cf\\u30d0", /* KK\\u309b */

	3409 "\\u306f\\u3071", /* HH\\u309c */

	3410 "\\u30cf\\u3071", /* KH\\u309c */

	3411 "\\u30cf\\u30d1", /* KK\\u309c */

	3412 "\\u3070\\u309d", /* H\\u309b\\u309d */

	3413 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */

	3414 "\\u3070\\u306f", /* H\\u309bH */

	3415 "\\u30d0\\u30cf", /* K\\u309bK */

	3416 "\\u3070\\u309e", /* H\\u309b\\u309e */

	3417 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */

	3418 "\\u3070\\u3070", /* H\\u309bH\\u309b */

	3419 "\\u30d0\\u3070", /* K\\u309bH\\u309b */

	3420 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */

	3421 "\\u3070\\u3071", /* H\\u309bH\\u309c */

	3422 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */

	3423 "\\u3071\\u309d", /* H\\u309c\\u309d */

	3424 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */

	3425 "\\u3071\\u306f", /* H\\u309cH */

	3426 "\\u30d1\\u30cf", /* K\\u309cK */

	3427 "\\u3071\\u3070", /* H\\u309cH\\u309b */

	3428 "\\u3071\\u30d0", /* H\\u309cK\\u309b */

	3429 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */

	3430 "\\u3071\\u3071", /* H\\u309cH\\u309c */

	3431 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */

	3432 };

	3433 /*

	3434 static const char *test3[] = {

	3435 "\\u221er\\u221e",

	3436 "\\u221eR#",

	3437 "\\u221et\\u221e",

	3438 "#r\\u221e",

	3439 "#R#",

	3440 "#t%",

	3441 "#T%",

	3442 "8t\\u221e",

	3443 "8T\\u221e",

	3444 "8t#",

	3445 "8T#",

	3446 "8t%",

	3447 "8T%",

	3448 "8t8",

	3449 "8T8",

	3450 "\\u03c9r\\u221e",

	3451 "\\u03a9R%",

	3452 "rr\\u221e",

	3453 "rR\\u221e",

	3454 "Rr\\u221e",

	3455 "RR\\u221e",

	3456 "RT%",

	3457 "rt8",

	3458 "tr\\u221e",

	3459 "tr8",

	3460 "TR8",

	3461 "tt8",

	3462 "\\u30b7\\u30e3\\u30fc\\u30ec",

	3463 };

	3464 */

	3465 static const UColAttribute att[] = { UCOL_STRENGTH };

	3466 static const UColAttributeValue val[] = { UCOL_QUATERNARY };

	3467

	3468 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HAND LING};

	3469 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };

	3470

	3471 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a tt, val, 1);

	3472 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a tt, val, 1);

	3473 /genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));/

	3474 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a ttShifted, valShifted, 2);

	3475 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a ttShifted, valShifted, 2);

	3476 }

	3477

	3478 static void TestStrCollIdenticalPrefix(void) {

	3479 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";

	3480 const char* test[] = {

	3481 "ab\\ud9b0\\udc70",

	3482 "ab\\ud9b0\\udc71"

	3483 };

	3484 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_E QUAL);

	3485 }

	3486 /* Contractions should have all their canonically equivalent */

	3487 /* strings included */

	3488 static void TestContractionClosure(void) {

	3489 static const struct {

	3490 const char *rules;

	3491 const char *data[10];

	3492 const uint32_t len;

	3493 } tests[] = {

	3494 { "&b=\\u00e4\\u00e4",

	3495 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\ \u00e4" }, 5},

	3496 { "&b=\\u00C5",

	3497 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},

	3498 };

	3499 uint32_t i;

	3500

	3501

	3502 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

	3503 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, U COL_EQUAL);

	3504 }

	3505 }

	3506

	3507 /* This tests also fails*/

	3508 static void TestBeforePrefixFailure(void) {

	3509 static const struct {

	3510 const char *rules;

	3511 const char *data[10];

	3512 const uint32_t len;

	3513 } tests[] = {

	3514 { "&g <<< a"

	3515 "&[before 3]\\uff41 <<< x",

	3516 {"x", "\\uff41"}, 2 },

	3517 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

	3518 "&\\u30A8=\\u30A8=\\u3048=\\uff74"

	3519 "&[before 3]\\u30a7<<<\\u30a9",

	3520 {"\\u30a9", "\\u30a7"}, 2 },

	3521 { "&[before 3]\\u30a7<<<\\u30a9"

	3522 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

	3523 "&\\u30A8=\\u30A8=\\u3048=\\uff74",

	3524 {"\\u30a9", "\\u30a7"}, 2 },

	3525 };

	3526 uint32_t i;

	3527

	3528

	3529 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

	3530 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

	3531 }

	3532

	3533 #if 0

	3534 const char* rule1 =

	3535 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

	3536 "&\\u30A8=\\u30A8=\\u3048=\\uff74"

	3537 "&[before 3]\\u30a7<<<\\u30c6\|\\u30fc";

	3538 const char* rule2 =

	3539 "&[before 3]\\u30a7<<<\\u30c6\|\\u30fc"

	3540 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

	3541 "&\\u30A8=\\u30A8=\\u3048=\\uff74";

	3542 const char* test[] = {

	3543 "\\u30c6\\u30fc\\u30bf",

	3544 "\\u30c6\\u30a7\\u30bf",

	3545 };

	3546 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));

	3547 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));

	3548 /* this piece of code should be in some sort of verbose mode */

	3549 /* it gets the collation elements for elements and prints them */

	3550 /* This is useful when trying to see whether the problem is */

	3551 {

	3552 UErrorCode status = U_ZERO_ERROR;

	3553 uint32_t i = 0;

	3554 UCollationElements *it = NULL;

	3555 uint32_t CE;

	3556 UChar string[256];

	3557 uint32_t uStringLen;

	3558 UCollator *coll = NULL;

	3559

	3560 uStringLen = u_unescape(rule1, string, 256);

	3561

	3562 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

	3563

	3564 /coll = ucol_open("ja_JP_JIS", &status);/

	3565 it = ucol_openElements(coll, string, 0, &status);

	3566

	3567 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {

	3568 log_verbose("%s\n", test[i]);

	3569 uStringLen = u_unescape(test[i], string, 256);

	3570 ucol_setText(it, string, uStringLen, &status);

	3571

	3572 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {

	3573 log_verbose("%08X\n", CE);

	3574 }

	3575 log_verbose("\n");

	3576

	3577 }

	3578

	3579 ucol_closeElements(it);

	3580 ucol_close(coll);

	3581 }

	3582 #endif

	3583 }

	3584

	3585 static void TestPrefixCompose(void) {

	3586 const char* rule1 =

	3587 "&\\u30a7<<<\\u30ab\|\\u30fc=\\u30ac\|\\u30fc";

	3588 /*

	3589 const char* test[] = {

	3590 "\\u30c6\\u30fc\\u30bf",

	3591 "\\u30c6\\u30a7\\u30bf",

	3592 };

	3593 */

	3594 {

	3595 UErrorCode status = U_ZERO_ERROR;

	3596 /uint32_t i = 0;/

	3597 /UCollationElements it = NULL;*/

	3598 /* uint32_t CE;*/

	3599 UChar string[256];

	3600 uint32_t uStringLen;

	3601 UCollator *coll = NULL;

	3602

	3603 uStringLen = u_unescape(rule1, string, 256);

	3604

	3605 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

	3606 ucol_close(coll);

	3607 }

	3608

	3609

	3610 }

	3611

	3612 /*

	3613 [last variable] last variable value

	3614 [last primary ignorable] largest CE for primary ignorable

	3615 [last secondary ignorable] largest CE for secondary ignorable

	3616 [last tertiary ignorable] largest CE for tertiary ignorable

	3617 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8 )

	3618 */

	3619

	3620 static void TestRuleOptions(void) {

	3621 /* values here are hardcoded and are correct for the current UCA

	3622 * when the UCA changes, one might be forced to change these

	3623 * values.

	3624 */

	3625

	3626 /*

	3627 * These strings contain the last character before [variable top]

	3628 * and the first and second characters (by primary weights) after it.

	3629 * See FractionalUCA.txt. For example:

	3630 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICA TOR

	3631 [variable top = 0C FE]

	3632 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT

	3633 and

	3634 00B4; [0D 0C, 05, 05]

	3635 *

	3636 * Note: Starting with UCA 6.0, the [variable top] collation element

	3637 * is not the weight of any character or string,

	3638 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].

	3639 */

	3640 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"

	3641 #define FIRST_REGULAR_CHAR_STRING "\\u0060"

	3642 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"

	3643

	3644 /*

	3645 * This string has to match the character that has the [last regular] weight

	3646 * which changes with each UCA version.

	3647 * See the bottom of FractionalUCA.txt which says something like

	3648 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032

	3649 *

	3650 * Note: Starting with UCA 6.0, the [last regular] collation element

	3651 * is not the weight of any character or string,

	3652 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].

	3653 */

	3654 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"

	3655

	3656 static const struct {

	3657 const char *rules;

	3658 const char *data[10];

	3659 const uint32_t len;

	3660 } tests[] = {

	3661 /* - all befores here amount to zero */

	3662 { "&[before 3][first tertiary ignorable]<<<a",

	3663 { "\\u0000", "a"}, 2

	3664 }, /* you cannot go before first tertiary ignorable */

	3665

	3666 { "&[before 3][last tertiary ignorable]<<<a",

	3667 { "\\u0000", "a"}, 2

	3668 }, /* you cannot go before last tertiary ignorable */

	3669

	3670 { "&[before 3][first secondary ignorable]<<<a",

	3671 { "\\u0000", "a"}, 2

	3672 }, /* you cannot go before first secondary ignorable */

	3673

	3674 { "&[before 3][last secondary ignorable]<<<a",

	3675 { "\\u0000", "a"}, 2

	3676 }, /* you cannot go before first secondary ignorable */

	3677

	3678 /* 'normal' befores */

	3679

	3680 { "&[before 3][first primary ignorable]<<<c<<<b &[first primary ignorable]<a ",

	3681 { "c", "b", "\\u0332", "a" }, 4

	3682 },

	3683

	3684 /* we don't have a code point that corresponds to

	3685 * the last primary ignorable

	3686 */

	3687 { "&[before 3][last primary ignorable]<<<c<<<b &[last primary ignorable]<a",

	3688 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5

	3689 },

	3690

	3691 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",

	3692 { "c", "b", "\\u0009", "a", "\\u000a" }, 5

	3693 },

	3694

	3695 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",

	3696 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_ REGULAR_CHAR_STRING }, 5

	3697 },

	3698

	3699 { "&[first regular]<a"

	3700 "&[before 1][first regular]<b",

	3701 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4

	3702 },

	3703

	3704 { "&[before 1][last regular]<b"

	3705 "&[last regular]<a",

	3706 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4

	3707 },

	3708

	3709 { "&[before 1][first implicit]<b"

	3710 "&[first implicit]<a",

	3711 { "b", "\\u4e00", "a", "\\u4e01"}, 4

	3712 },

	3713

	3714 { "&[before 1][last implicit]<b"

	3715 "&[last implicit]<a",

	3716 { "b", "\\U0010FFFD", "a" }, 3

	3717 },

	3718

	3719 { "&[last variable]<z"

	3720 "&[last primary ignorable]<x"

	3721 "&[last secondary ignorable]<<y"

	3722 "&[last tertiary ignorable]<<<w"

	3723 "&[top]<u",

	3724 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u" }, 7

	3725 }

	3726

	3727 };

	3728 uint32_t i;

	3729

	3730 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

	3731 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

	3732 }

	3733 }

	3734

	3735

	3736 static void TestOptimize(void) {

	3737 /* this is not really a test - just trying out

	3738 * whether copying of UCA contents will fail

	3739 * Cannot really test, since the functionality

	3740 * remains the same.

	3741 */

	3742 static const struct {

	3743 const char *rules;

	3744 const char *data[10];

	3745 const uint32_t len;

	3746 } tests[] = {

	3747 /* - all befores here amount to zero */

	3748 { "[optimize [\\uAC00-\\uD7FF]]",

	3749 { "a", "b"}, 2}

	3750 };

	3751 uint32_t i;

	3752

	3753 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

	3754 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

	3755 }

	3756 }

	3757

	3758 /*

	3759 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.

	3760 weiv ucol_strcollIter?

	3761 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021

	3762 weiv these are the input strings?

	3763 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on , we have s1 > s2

	3764 weiv will check - could be a problem with utf-8 iterator

	3765 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2

	3766 weiv hmmm

	3767 cycheng@ca.ibm.c... note that we have a standalone high surrogate

	3768 weiv that doesn't sound right

	3769 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000

	3770 weiv so you have two strings, you convert them to utf-8 and to utf-16BE

	3771 cycheng@ca.ibm.c... yes

	3772 weiv and then do the comparison

	3773 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be

	3774 weiv utf-16 strings look like a little endian ones in the example you sent me

	3775 weiv It could be a bug - let me try to test it out

	3776 cycheng@ca.ibm.c... ok

	3777 cycheng@ca.ibm.c... we can wait till the conf. call

	3778 cycheng@ca.ibm.c... next weke

	3779 weiv that would be great

	3780 weiv hmmm

	3781 weiv I might be wrong

	3782 weiv let me play with it some more

	3783 cycheng@ca.ibm.c... ok

	3784 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be

	3785 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that' s built for db2

	3786 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be

	3787 weiv ok

	3788 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data

	3789 weiv thanks

	3790 cycheng@ca.ibm.c... the 4 strings we sent are just samples

	3791 */

	3792 #if 0

	3793 static void Alexis(void) {

	3794 UErrorCode status = U_ZERO_ERROR;

	3795 UCollator *coll = ucol_open("", &status);

	3796

	3797

	3798 const char utf16be[2][4] = {

	3799 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },

	3800 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }

	3801 };

	3802

	3803 const char utf8[2][4] = {

	3804 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },

	3805 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },

	3806 };

	3807

	3808 UCharIterator iterU161, iterU162;

	3809 UCharIterator iterU81, iterU82;

	3810

	3811 UCollationResult resU16, resU8;

	3812

	3813 uiter_setUTF16BE(&iterU161, utf16be[0], 4);

	3814 uiter_setUTF16BE(&iterU162, utf16be[1], 4);

	3815

	3816 uiter_setUTF8(&iterU81, utf8[0], 4);

	3817 uiter_setUTF8(&iterU82, utf8[1], 4);

	3818

	3819 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	3820

	3821 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);

	3822 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);

	3823

	3824

	3825 if(resU16 != resU8) {

	3826 log_err("different results\n");

	3827 }

	3828

	3829 ucol_close(coll);

	3830 }

	3831 #endif

	3832

	3833 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256

	3834 static void Alexis2(void) {

	3835 UErrorCode status = U_ZERO_ERROR;

	3836 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER _SIZE];

	3837 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUF FER_SIZE];

	3838 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SI ZE];

	3839 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8 LenT = 0;

	3840

	3841 UConverter *conv = NULL;

	3842

	3843 UCharIterator U16BEItS, U16BEItT;

	3844 UCharIterator U8ItS, U8ItT;

	3845

	3846 UCollationResult resU16, resU16BE, resU8;

	3847

	3848 static const char* const pairs[][2] = {

	3849 { "\\ud800\\u0021", "\\uFFFC\\u0062"},

	3850 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },

	3851 { "\\u0E40\\u0021", "\\u00A1\\u0021"},

	3852 { "\\u0E40\\u0021", "\\uFE57\\u0062"},

	3853 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},

	3854 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},

	3855 { "\\u0020", "\\u0020\\u0000"}

	3856 /*

	3857 5F20 (my result here)

	3858 5F204E008E3F

	3859 5F20 (your result here)

	3860 */

	3861 };

	3862

	3863 int32_t i = 0;

	3864

	3865 UCollator *coll = ucol_open("", &status);

	3866 if(status == U_FILE_ACCESS_ERROR) {

	3867 log_data_err("Is your data around?\n");

	3868 return;

	3869 } else if(U_FAILURE(status)) {

	3870 log_err("Error opening collator\n");

	3871 return;

	3872 }

	3873 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	3874 conv = ucnv_open("UTF16BE", &status);

	3875 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {

	3876 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);

	3877 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);

	3878

	3879 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);

	3880

	3881 log_verbose("Result of strcoll is %i\n", resU16);

	3882

	3883 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);

	3884 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);

	3885

	3886 /* use the original sizes, as the result from converter is in bytes */

	3887 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);

	3888 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);

	3889

	3890 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);

	3891

	3892 log_verbose("Result of U16BE is %i\n", resU16BE);

	3893

	3894 if(resU16 != resU16BE) {

	3895 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", p airs[i][0], pairs[i][1]);

	3896 }

	3897

	3898 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16Le nS, &status);

	3899 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16Le nT, &status);

	3900

	3901 uiter_setUTF8(&U8ItS, U8Source, U8LenS);

	3902 uiter_setUTF8(&U8ItT, U8Target, U8LenT);

	3903

	3904 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);

	3905

	3906 if(resU16 != resU8) {

	3907 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pair s[i][0], pairs[i][1]);

	3908 }

	3909

	3910 }

	3911

	3912 ucol_close(coll);

	3913 ucnv_close(conv);

	3914 }

	3915

	3916 static void TestHebrewUCA(void) {

	3917 UErrorCode status = U_ZERO_ERROR;

	3918 static const char *first[] = {

	3919 "d790d6b8d79cd795d6bcd7a9",

	3920 "d790d79cd79ed7a7d799d799d7a1",

	3921 "d790d6b4d79ed795d6bcd7a9",

	3922 };

	3923

	3924 char utf8String[3][256];

	3925 UChar utf16String[3][256];

	3926

	3927 int32_t i = 0, j = 0;

	3928 int32_t sizeUTF8[3];

	3929 int32_t sizeUTF16[3];

	3930

	3931 UCollator *coll = ucol_open("", &status);

	3932 if (U_FAILURE(status)) {

	3933 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(st atus));

	3934 return;

	3935 }

	3936 /ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);/

	3937

	3938 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {

	3939 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);

	3940 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i] , &status);

	3941 log_verbose("%i: ");

	3942 for(j = 0; j < sizeUTF16[i]; j++) {

	3943 /log_verbose("\\u%04X", utf16String[i][j]);/

	3944 log_verbose("%04X", utf16String[i][j]);

	3945 }

	3946 log_verbose("\n");

	3947 }

	3948 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {

	3949 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {

	3950 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);

	3951 }

	3952 }

	3953

	3954 ucol_close(coll);

	3955

	3956 }

	3957

	3958 static void TestPartialSortKeyTermination(void) {

	3959 static const char* cases[] = {

	3960 "\\u1234\\u1234\\udc00",

	3961 "\\udc00\\ud800\\ud800"

	3962 };

	3963

	3964 int32_t i = sizeof(UCollator);

	3965

	3966 UErrorCode status = U_ZERO_ERROR;

	3967

	3968 UCollator *coll = ucol_open("", &status);

	3969

	3970 UCharIterator iter;

	3971

	3972 UChar currCase[256];

	3973 int32_t length = 0;

	3974 int32_t pKeyLen = 0;

	3975

	3976 uint8_t key[256];

	3977

	3978 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {

	3979 uint32_t state[2] = {0, 0};

	3980 length = u_unescape(cases[i], currCase, 256);

	3981 uiter_setString(&iter, currCase, length);

	3982 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);

	3983

	3984 log_verbose("Done\n");

	3985

	3986 }

	3987 ucol_close(coll);

	3988 }

	3989

	3990 static void TestSettings(void) {

	3991 static const char* cases[] = {

	3992 "apple",

	3993 "Apple"

	3994 };

	3995

	3996 static const char* locales[] = {

	3997 "",

	3998 "en"

	3999 };

	4000

	4001 UErrorCode status = U_ZERO_ERROR;

	4002

	4003 int32_t i = 0, j = 0;

	4004

	4005 UChar source[256], target[256];

	4006 int32_t sLen = 0, tLen = 0;

	4007

	4008 UCollator *collateObject = NULL;

	4009 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {

	4010 collateObject = ucol_open(locales[i], &status);

	4011 ucol_setStrength(collateObject, UCOL_PRIMARY);

	4012 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);

	4013 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {

	4014 sLen = u_unescape(cases[j-1], source, 256);

	4015 source[sLen] = 0;

	4016 tLen = u_unescape(cases[j], target, 256);

	4017 source[tLen] = 0;

	4018 doTest(collateObject, source, target, UCOL_EQUAL);

	4019 }

	4020 ucol_close(collateObject);

	4021 }

	4022 }

	4023

	4024 static int32_t TestEqualsForCollator(const char* locName, UCollator source, UCo llator target) {

	4025 UErrorCode status = U_ZERO_ERROR;

	4026 int32_t errorNo = 0;

	4027 /const UChar sourceRules = NULL;*/

	4028 /int32_t sourceRulesLen = 0;/

	4029 UColAttributeValue french = UCOL_OFF;

	4030 int32_t cloneSize = 0;

	4031

	4032 if(!ucol_equals(source, target)) {

	4033 log_err("Same collators, different address not equal\n");

	4034 errorNo++;

	4035 }

	4036 ucol_close(target);

	4037 if(uprv_strcmp(ucol_getLocaleByType(source, ULOC_REQUESTED_LOCALE, &status), ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &status)) == 0) {

	4038 /* currently, safeClone is implemented through getRules/openRules

	4039 * so it is the same as the test below - I will comment that test out.

	4040 */

	4041 /* real thing */

	4042 target = ucol_safeClone(source, NULL, &cloneSize, &status);

	4043 if(U_FAILURE(status)) {

	4044 log_err("Error creating clone\n");

	4045 errorNo++;

	4046 return errorNo;

	4047 }

	4048 if(!ucol_equals(source, target)) {

	4049 log_err("Collator different from it's clone\n");

	4050 errorNo++;

	4051 }

	4052 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);

	4053 if(french == UCOL_ON) {

	4054 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);

	4055 } else {

	4056 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);

	4057 }

	4058 if(U_FAILURE(status)) {

	4059 log_err("Error setting attributes\n");

	4060 errorNo++;

	4061 return errorNo;

	4062 }

	4063 if(ucol_equals(source, target)) {

	4064 log_err("Collators same even when options changed\n");

	4065 errorNo++;

	4066 }

	4067 ucol_close(target);

	4068 /* commented out since safeClone uses exactly the same technique */

	4069 /*

	4070 sourceRules = ucol_getRules(source, &sourceRulesLen);

	4071 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_ DEFAULT, &parseError, &status);

	4072 if(U_FAILURE(status)) {

	4073 log_err("Error instantiating target from rules\n");

	4074 errorNo++;

	4075 return errorNo;

	4076 }

	4077 if(!ucol_equals(source, target)) {

	4078 log_err("Collator different from collator that was created from the same rules\n");

	4079 errorNo++;

	4080 }

	4081 ucol_close(target);

	4082 */

	4083 }

	4084 return errorNo;

	4085 }

	4086

	4087

	4088 static void TestEquals(void) {

	4089 /* ucol_equals is not currently a public API. There is a chance that it will become

	4090 * something like this, but currently it is only used by RuleBasedCollator::o perator==

	4091 */

	4092 /* test whether the two collators instantiated from the same locale are equa l */

	4093 UErrorCode status = U_ZERO_ERROR;

	4094 UParseError parseError;

	4095 int32_t noOfLoc = uloc_countAvailable();

	4096 const char *locName = NULL;

	4097 UCollator source = NULL, target = NULL;

	4098 int32_t i = 0;

	4099

	4100 const char* rules[] = {

	4101 "&l < lj <<< Lj <<< LJ",

	4102 "&n < nj <<< Nj <<< NJ",

	4103 "&ae <<< \\u00e4",

	4104 "&AE <<< \\u00c4"

	4105 };

	4106 /*

	4107 const char* badRules[] = {

	4108 "&l <<< Lj",

	4109 "&n < nj <<< nJ <<< NJ",

	4110 "&a <<< \\u00e4",

	4111 "&AE <<< \\u00c4 <<< x"

	4112 };

	4113 */

	4114

	4115 UChar sourceRules[1024], targetRules[1024];

	4116 int32_t sourceRulesSize = 0, targetRulesSize = 0;

	4117 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);

	4118

	4119 for(i = 0; i < rulesSize; i++) {

	4120 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 102 4 - sourceRulesSize);

	4121 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRu lesSize, 1024 - targetRulesSize);

	4122 }

	4123

	4124 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEF AULT, &parseError, &status);

	4125 if(status == U_FILE_ACCESS_ERROR) {

	4126 log_data_err("Is your data around?\n");

	4127 return;

	4128 } else if(U_FAILURE(status)) {

	4129 log_err("Error opening collator\n");

	4130 return;

	4131 }

	4132 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEF AULT, &parseError, &status);

	4133 if(!ucol_equals(source, target)) {

	4134 log_err("Equivalent collators not equal!\n");

	4135 }

	4136 ucol_close(source);

	4137 ucol_close(target);

	4138

	4139 source = ucol_open("root", &status);

	4140 target = ucol_open("root", &status);

	4141 log_verbose("Testing root\n");

	4142 if(!ucol_equals(source, source)) {

	4143 log_err("Same collator not equal\n");

	4144 }

	4145 if(TestEqualsForCollator(locName, source, target)) {

	4146 log_err("Errors for root\n", locName);

	4147 }

	4148 ucol_close(source);

	4149

	4150 for(i = 0; i<noOfLoc; i++) {

	4151 status = U_ZERO_ERROR;

	4152 locName = uloc_getAvailable(i);

	4153 /if(hasCollationElements(locName)) {/

	4154 log_verbose("Testing equality for locale %s\n", locName);

	4155 source = ucol_open(locName, &status);

	4156 target = ucol_open(locName, &status);

	4157 if (U_FAILURE(status)) {

	4158 log_err("Error opening collator for locale %s %s\n", locName, u_err orName(status));

	4159 continue;

	4160 }

	4161 if(TestEqualsForCollator(locName, source, target)) {

	4162 log_err("Errors for locale %s\n", locName);

	4163 }

	4164 ucol_close(source);

	4165 /}/

	4166 }

	4167 }

	4168

	4169 static void TestJ2726(void) {

	4170 UChar a[2] = { 0x61, 0x00 }; /"a"/

	4171 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /"a "/

	4172 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /" a"/

	4173 UErrorCode status = U_ZERO_ERROR;

	4174 UCollator *coll = ucol_open("en", &status);

	4175 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

	4176 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

	4177 doTest(coll, a, aSpace, UCOL_EQUAL);

	4178 doTest(coll, aSpace, a, UCOL_EQUAL);

	4179 doTest(coll, a, spaceA, UCOL_EQUAL);

	4180 doTest(coll, spaceA, a, UCOL_EQUAL);

	4181 doTest(coll, spaceA, aSpace, UCOL_EQUAL);

	4182 doTest(coll, aSpace, spaceA, UCOL_EQUAL);

	4183 ucol_close(coll);

	4184 }

	4185

	4186 static void NullRule(void) {

	4187 UChar r[3] = {0};

	4188 UErrorCode status = U_ZERO_ERROR;

	4189 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st atus);

	4190 if(U_SUCCESS(status)) {

	4191 log_err("This should have been an error!\n");

	4192 ucol_close(coll);

	4193 } else {

	4194 status = U_ZERO_ERROR;

	4195 }

	4196 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

	4197 if(U_FAILURE(status)) {

	4198 log_err_status(status, "Empty rules should have produced a valid collato r -> %s\n", u_errorName(status));

	4199 } else {

	4200 ucol_close(coll);

	4201 }

	4202 }

	4203

	4204 /**

	4205 * Test for CollationElementIterator previous and next for the whole set of

	4206 * unicode characters with normalization on.

	4207 */

	4208 static void TestNumericCollation(void)

	4209 {

	4210 UErrorCode status = U_ZERO_ERROR;

	4211

	4212 const static char *basicTestStrings[]={

	4213 "hello1",

	4214 "hello2",

	4215 "hello2002",

	4216 "hello2003",

	4217 "hello123456",

	4218 "hello1234567",

	4219 "hello10000000",

	4220 "hello100000000",

	4221 "hello1000000000",

	4222 "hello10000000000",

	4223 };

	4224

	4225 const static char *preZeroTestStrings[]={

	4226 "avery10000",

	4227 "avery010000",

	4228 "avery0010000",

	4229 "avery00010000",

	4230 "avery000010000",

	4231 "avery0000010000",

	4232 "avery00000010000",

	4233 "avery000000010000",

	4234 };

	4235

	4236 const static char *thirtyTwoBitNumericStrings[]={

	4237 "avery42949672960",

	4238 "avery42949672961",

	4239 "avery42949672962",

	4240 "avery429496729610"

	4241 };

	4242

	4243 const static char *longNumericStrings[]={

	4244 /* Some of these sort out of the order that would expected if digits-as-num bers handled arbitrarily-long digit strings.

	4245 In fact, a single collation element can represent a maximum of 254 digit s as a number. Digit strings longer than that

	4246 are treated as multiple collation elements. */

	4247 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 345678901234567890123z", /253digits, num + 9.23E252 + z /

	4248 "num100000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000", /254digits, num + 1.00E253 /

	4249 "num100000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000", /255digits, num + 1.00E253 + 0, out of numeric order but expected /

	4250 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234", /254digits, num + 1.23E253 /

	4251 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345", /255digits, num + 1.23E253 + 5 /

	4252 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 345678901234567890123456", /256digits, num + 1.23E253 + 56 /

	4253 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234567", /257digits, num + 1.23E253 + 567 /

	4254 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234a", /254digits, num + 1.23E253 + a, out of numeric order but expected /

	4255 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234", /254digits, num + 9.23E253, out of numeric order but e xpected /

	4256 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234a", /254digits, num + 9.23E253 + a, out of numeric order but expected /

	4257 };

	4258

	4259 const static char *supplementaryDigits[] = {

	4260 "\\uD835\\uDFCE", /* 0 */

	4261 "\\uD835\\uDFCF", /* 1 */

	4262 "\\uD835\\uDFD0", /* 2 */

	4263 "\\uD835\\uDFD1", /* 3 */

	4264 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */

	4265 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */

	4266 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */

	4267 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */

	4268 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */

	4269 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */

	4270 };

	4271

	4272 const static char *foreignDigits[] = {

	4273 "\\u0661",

	4274 "\\u0662",

	4275 "\\u0663",

	4276 "\\u0661\\u0660",

	4277 "\\u0661\\u0662",

	4278 "\\u0661\\u0663",

	4279 "\\u0662\\u0660",

	4280 "\\u0662\\u0662",

	4281 "\\u0662\\u0663",

	4282 "\\u0663\\u0660",

	4283 "\\u0663\\u0662",

	4284 "\\u0663\\u0663"

	4285 };

	4286

	4287 const static char *evenZeroes[] = {

	4288 "2000",

	4289 "2001",

	4290 "2002",

	4291 "2003"

	4292 };

	4293

	4294 UColAttribute att = UCOL_NUMERIC_COLLATION;

	4295 UColAttributeValue val = UCOL_ON;

	4296

	4297 /* Open our collator. */

	4298 UCollator* coll = ucol_open("root", &status);

	4299 if (U_FAILURE(status)){

	4300 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",

	4301 myErrorName(status));

	4302 return;

	4303 }

	4304 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestSt rings)/sizeof(basicTestStrings[0]), &att, &val, 1);

	4305 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(t hirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1) ;

	4306 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumer icStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);

	4307 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits )/sizeof(foreignDigits[0]), &att, &val, 1);

	4308 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(suppleme ntaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);

	4309 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeo f(evenZeroes[0]), &att, &val, 1);

	4310

	4311 /* Setting up our collator to do digits. */

	4312 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

	4313 if (U_FAILURE(status)){

	4314 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n ",

	4315 myErrorName(status));

	4316 return;

	4317 }

	4318

	4319 /*

	4320 Testing that prepended zeroes still yield the correct collation behavior.

	4321 We expect that every element in our strings array will be equal.

	4322 */

	4323 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestSt rings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);

	4324

	4325 ucol_close(coll);

	4326 }

	4327

	4328 static void TestTibetanConformance(void)

	4329 {

	4330 const char* test[] = {

	4331 "\\u0FB2\\u0591\\u0F71\\u0061",

	4332 "\\u0FB2\\u0F71\\u0061"

	4333 };

	4334

	4335 UErrorCode status = U_ZERO_ERROR;

	4336 UCollator *coll = ucol_open("", &status);

	4337 UChar source[100];

	4338 UChar target[100];

	4339 int result;

	4340 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	4341 if (U_SUCCESS(status)) {

	4342 u_unescape(test[0], source, 100);

	4343 u_unescape(test[1], target, 100);

	4344 doTest(coll, source, target, UCOL_EQUAL);

	4345 result = ucol_strcoll(coll, source, -1, target, -1);

	4346 log_verbose("result %d\n", result);

	4347 if (UCOL_EQUAL != result) {

	4348 log_err("Tibetan comparison error\n");

	4349 }

	4350 }

	4351 ucol_close(coll);

	4352

	4353 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);

	4354 }

	4355

	4356 static void TestPinyinProblem(void) {

	4357 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5 0" };

	4358 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));

	4359 }

	4360

	4361 #define TST_UCOL_MAX_INPUT 0x220001

	4362 #define topByte 0xFF000000;

	4363 #define bottomByte 0xFF;

	4364 #define fourBytes 0xFFFFFFFF;

	4365

	4366

	4367 static void showImplicit(UChar32 i) {

	4368 if (i >= 0 && i <= TST_UCOL_MAX_INPUT) {

	4369 log_verbose("%08X\t%08X\n", i, uprv_uca_getImplicitFromRaw(i));

	4370 }

	4371 }

	4372

	4373 static void TestImplicitGeneration(void) {

	4374 UErrorCode status = U_ZERO_ERROR;

	4375 UChar32 last = 0;

	4376 UChar32 current;

	4377 UChar32 i = 0, j = 0;

	4378 UChar32 roundtrip = 0;

	4379 UChar32 lastBottom = 0;

	4380 UChar32 currentBottom = 0;

	4381 UChar32 lastTop = 0;

	4382 UChar32 currentTop = 0;

	4383

	4384 UCollator *coll = ucol_open("root", &status);

	4385 if(U_FAILURE(status)) {

	4386 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)) ;

	4387 return;

	4388 }

	4389

	4390 uprv_uca_getRawFromImplicit(0xE20303E7);

	4391

	4392 for (i = 0; i <= TST_UCOL_MAX_INPUT; ++i) {

	4393 current = uprv_uca_getImplicitFromRaw(i) & fourBytes;

	4394

	4395 /* check that it round-trips AND that all intervening ones are illegal*/

	4396 roundtrip = uprv_uca_getRawFromImplicit(current);

	4397 if (roundtrip != i) {

	4398 log_err("No roundtrip %08X\n", i);

	4399 }

	4400 if (last != 0) {

	4401 for (j = last + 1; j < current; ++j) {

	4402 roundtrip = uprv_uca_getRawFromImplicit(j);

	4403 /* raise an error if it doesn't find an error*/

	4404 if (roundtrip != -1) {

	4405 log_err("Fails to recognize illegal %08X\n", j);

	4406 }

	4407 }

	4408 }

	4409 /* now do other consistency checks*/

	4410 lastBottom = last & bottomByte;

	4411 currentBottom = current & bottomByte;

	4412 lastTop = last & topByte;

	4413 currentTop = current & topByte;

	4414

	4415 /* print out some values for spot-checking*/

	4416 if (lastTop != currentTop \|\| i == 0x10000 \|\| i == 0x110000) {

	4417 showImplicit(i-3);

	4418 showImplicit(i-2);

	4419 showImplicit(i-1);

	4420 showImplicit(i);

	4421 showImplicit(i+1);

	4422 showImplicit(i+2);

	4423 }

	4424 last = current;

	4425

	4426 if(uprv_uca_getCodePointFromRaw(uprv_uca_getRawFromCodePoint(i)) != i) {

	4427 log_err("No raw <-> code point roundtrip for 0x%08X\n", i);

	4428 }

	4429 }

	4430 showImplicit(TST_UCOL_MAX_INPUT-2);

	4431 showImplicit(TST_UCOL_MAX_INPUT-1);

	4432 showImplicit(TST_UCOL_MAX_INPUT);

	4433 ucol_close(coll);

	4434 }

	4435

	4436 /**

	4437 * Iterate through the given iterator, checking to see that all the strings

	4438 * in the expected array are present.

	4439 * @param expected array of strings we expect to see, or NULL

	4440 * @param expectedCount number of elements of expected, or 0

	4441 */

	4442 static int32_t checkUEnumeration(const char* msg,

	4443 UEnumeration* iter,

	4444 const char** expected,

	4445 int32_t expectedCount) {

	4446 UErrorCode ec = U_ZERO_ERROR;

	4447 int32_t i = 0, n, j, bit;

	4448 int32_t seenMask = 0;

	4449

	4450 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */

	4451 n = uenum_count(iter, &ec);

	4452 if (!assertSuccess("count", &ec)) return -1;

	4453 log_verbose("%s = [", msg);

	4454 for (;; ++i) {

	4455 const char* s = uenum_next(iter, NULL, &ec);

	4456 if (!assertSuccess("snext", &ec) \|\| s == NULL) break;

	4457 if (i != 0) log_verbose(",");

	4458 log_verbose("%s", s);

	4459 /* check expected list */

	4460 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

	4461 if ((seenMask&bit) == 0 &&

	4462 uprv_strcmp(s, expected[j]) == 0) {

	4463 seenMask \|= bit;

	4464 break;

	4465 }

	4466 }

	4467 }

	4468 log_verbose("] (%d)\n", i);

	4469 assertTrue("count verified", i==n);

	4470 /* did we see all expected strings? */

	4471 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

	4472 if ((seenMask&bit)!=0) {

	4473 log_verbose("Ok: \"%s\" seen\n", expected[j]);

	4474 } else {

	4475 log_err("FAIL: \"%s\" not seen\n", expected[j]);

	4476 }

	4477 }

	4478 return n;

	4479 }

	4480

	4481 /**

	4482 * Test new API added for separate collation tree.

	4483 */

	4484 static void TestSeparateTrees(void) {

	4485 UErrorCode ec = U_ZERO_ERROR;

	4486 UEnumeration *e = NULL;

	4487 int32_t n = -1;

	4488 UBool isAvailable;

	4489 char loc[256];

	4490

	4491 static const char* AVAIL[] = { "en", "de" };

	4492

	4493 static const char* KW[] = { "collation" };

	4494

	4495 static const char* KWVAL[] = { "phonebook", "stroke" };

	4496

	4497 #if !UCONFIG_NO_SERVICE

	4498 e = ucol_openAvailableLocales(&ec);

	4499 if (e != NULL) {

	4500 assertSuccess("ucol_openAvailableLocales", &ec);

	4501 assertTrue("ucol_openAvailableLocales!=0", e!=0);

	4502 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)) ;

	4503 /* Don't need to check n because we check list */

	4504 uenum_close(e);

	4505 } else {

	4506 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you m issing data?)\n", u_errorName(ec));

	4507 }

	4508 #endif

	4509

	4510 e = ucol_getKeywords(&ec);

	4511 if (e != NULL) {

	4512 assertSuccess("ucol_getKeywords", &ec);

	4513 assertTrue("ucol_getKeywords!=0", e!=0);

	4514 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));

	4515 /* Don't need to check n because we check list */

	4516 uenum_close(e);

	4517 } else {

	4518 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing da ta?)\n", u_errorName(ec));

	4519 }

	4520

	4521 e = ucol_getKeywordValues(KW[0], &ec);

	4522 if (e != NULL) {

	4523 assertSuccess("ucol_getKeywordValues", &ec);

	4524 assertTrue("ucol_getKeywordValues!=0", e!=0);

	4525 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));

	4526 /* Don't need to check n because we check list */

	4527 uenum_close(e);

	4528 } else {

	4529 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missi ng data?)\n", u_errorName(ec));

	4530 }

	4531

	4532 /* Try setting a warning before calling ucol_getKeywordValues */

	4533 ec = U_USING_FALLBACK_WARNING;

	4534 e = ucol_getKeywordValues(KW[0], &ec);

	4535 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {

	4536 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);

	4537 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e , KWVAL, LEN(KWVAL));

	4538 /* Don't need to check n because we check list */

	4539 uenum_close(e);

	4540 }

	4541

	4542 /*

	4543 U_DRAFT int32_t U_EXPORT2

	4544 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,

	4545 const char* locale, UBool* isAvailable,

	4546 UErrorCode* status);

	4547 }

	4548 */

	4549 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",

	4550 &isAvailable, &ec);

	4551 if (assertSuccess("getFunctionalEquivalent", &ec)) {

	4552 assertEquals("getFunctionalEquivalent(de)", "de", loc);

	4553 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",

	4554 isAvailable == TRUE);

	4555 }

	4556

	4557 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",

	4558 &isAvailable, &ec);

	4559 if (assertSuccess("getFunctionalEquivalent", &ec)) {

	4560 assertEquals("getFunctionalEquivalent(de_DE)", "de", loc);

	4561 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==TRUE",

	4562 isAvailable == TRUE);

	4563 }

	4564 }

	4565

	4566 /* supercedes TestJ784 */

	4567 static void TestBeforePinyin(void) {

	4568 const static char rules[] = {

	4569 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<< \\u00E0<<<\\u00C0"

	4570 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<< \\u00E8<<<\\u00C8"

	4571 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<< \\u00EC<<<\\u00CC"

	4572 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<< \\u00F2<<<\\u00D2"

	4573 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<< \\u00F9<<<\\u00D9"

	4574 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<< \\u01DB<<\\u00FC"

	4575 };

	4576

	4577 const static char *test[] = {

	4578 "l\\u0101",

	4579 "la",

	4580 "l\\u0101n",

	4581 "lan ",

	4582 "l\\u0113",

	4583 "le",

	4584 "l\\u0113n",

	4585 "len"

	4586 };

	4587

	4588 const static char *test2[] = {

	4589 "x\\u0101",

	4590 "x\\u0100",

	4591 "X\\u0101",

	4592 "X\\u0100",

	4593 "x\\u00E1",

	4594 "x\\u00C1",

	4595 "X\\u00E1",

	4596 "X\\u00C1",

	4597 "x\\u01CE",

	4598 "x\\u01CD",

	4599 "X\\u01CE",

	4600 "X\\u01CD",

	4601 "x\\u00E0",

	4602 "x\\u00C0",

	4603 "X\\u00E0",

	4604 "X\\u00C0",

	4605 "xa",

	4606 "xA",

	4607 "Xa",

	4608 "XA",

	4609 "x\\u0101x",

	4610 "x\\u0100x",

	4611 "x\\u00E1x",

	4612 "x\\u00C1x",

	4613 "x\\u01CEx",

	4614 "x\\u01CDx",

	4615 "x\\u00E0x",

	4616 "x\\u00C0x",

	4617 "xax",

	4618 "xAx"

	4619 };

	4620

	4621 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));

	4622 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));

	4623 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));

	4624 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));

	4625 }

	4626

	4627 static void TestBeforeTightening(void) {

	4628 static const struct {

	4629 const char *rules;

	4630 UErrorCode expectedStatus;

	4631 } tests[] = {

	4632 { "&[before 1]a<x", U_ZERO_ERROR },

	4633 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },

	4634 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },

	4635 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },

	4636 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },

	4637 { "&[before 2]a<<x",U_ZERO_ERROR },

	4638 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },

	4639 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },

	4640 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },

	4641 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },

	4642 { "&[before 3]a<<<x",U_ZERO_ERROR },

	4643 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },

	4644 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }

	4645 };

	4646

	4647 int32_t i = 0;

	4648

	4649 UErrorCode status = U_ZERO_ERROR;

	4650 UChar rlz[RULE_BUFFER_LEN] = { 0 };

	4651 uint32_t rlen = 0;

	4652

	4653 UCollator *coll = NULL;

	4654

	4655

	4656 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

	4657 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);

	4658 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &statu s);

	4659 if(status != tests[i].expectedStatus) {

	4660 log_err_status(status, "Opening a collator with rules %s returned er ror code %s, expected %s\n",

	4661 tests[i].rules, u_errorName(status), u_errorName(tests[i].expect edStatus));

	4662 }

	4663 ucol_close(coll);

	4664 status = U_ZERO_ERROR;

	4665 }

	4666

	4667 }

	4668

	4669 #if 0

	4670 &m < a

	4671 &[before 1] a < x <<< X << q <<< Q < z

	4672 assert: m <<< M < x <<< X << q <<< Q < z < a < n

	4673

	4674 &m < a

	4675 &[before 2] a << x <<< X << q <<< Q < z

	4676 assert: m <<< M < x <<< X << q <<< Q << a < z < n

	4677

	4678 &m < a

	4679 &[before 3] a <<< x <<< X << q <<< Q < z

	4680 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n

	4681

	4682

	4683 &m << a

	4684 &[before 1] a < x <<< X << q <<< Q < z

	4685 assert: x <<< X << q <<< Q < z < m <<< M << a < n

	4686

	4687 &m << a

	4688 &[before 2] a << x <<< X << q <<< Q < z

	4689 assert: m <<< M << x <<< X << q <<< Q << a < z < n

	4690

	4691 &m << a

	4692 &[before 3] a <<< x <<< X << q <<< Q < z

	4693 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n

	4694

	4695

	4696 &m <<< a

	4697 &[before 1] a < x <<< X << q <<< Q < z

	4698 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M

	4699

	4700 &m <<< a

	4701 &[before 2] a << x <<< X << q <<< Q < z

	4702 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n

	4703

	4704 &m <<< a

	4705 &[before 3] a <<< x <<< X << q <<< Q < z

	4706 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n

	4707

	4708

	4709 &[before 1] s < x <<< X << q <<< Q < z

	4710 assert: r <<< R < x <<< X << q <<< Q < z < s < n

	4711

	4712 &[before 2] s << x <<< X << q <<< Q < z

	4713 assert: r <<< R < x <<< X << q <<< Q << s < z < n

	4714

	4715 &[before 3] s <<< x <<< X << q <<< Q < z

	4716 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n

	4717

	4718

	4719 &[before 1] \u24DC < x <<< X << q <<< Q < z

	4720 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M

	4721

	4722 &[before 2] \u24DC << x <<< X << q <<< Q < z

	4723 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n

	4724

	4725 &[before 3] \u24DC <<< x <<< X << q <<< Q < z

	4726 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n

	4727 #endif

	4728

	4729

	4730 #if 0

	4731 /* requires features not yet supported */

	4732 static void TestMoreBefore(void) {

	4733 static const struct {

	4734 const char* rules;

	4735 const char* order[16];

	4736 int32_t size;

	4737 } tests[] = {

	4738 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",

	4739 { "m","M","x","X","q","Q","z","a","n" }, 9},

	4740 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",

	4741 { "m","M","x","X","q","Q","a","z","n" }, 9},

	4742 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",

	4743 { "m","M","x","X","a","q","Q","z","n" }, 9},

	4744 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",

	4745 { "x","X","q","Q","z","m","M","a","n" }, 9},

	4746 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",

	4747 { "m","M","x","X","q","Q","a","z","n" }, 9},

	4748 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",

	4749 { "m","M","x","X","a","q","Q","z","n" }, 9},

	4750 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",

	4751 { "x","X","q","Q","z","n","m","a","M" }, 9},

	4752 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",

	4753 { "x","X","q","Q","m","a","M","z","n" }, 9},

	4754 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",

	4755 { "m","x","X","a","M","q","Q","z","n" }, 9},

	4756 { "&[before 1] s < x <<< X << q <<< Q < z",

	4757 { "r","R","x","X","q","Q","z","s","n" }, 9},

	4758 { "&[before 2] s << x <<< X << q <<< Q < z",

	4759 { "r","R","x","X","q","Q","s","z","n" }, 9},

	4760 { "&[before 3] s <<< x <<< X << q <<< Q < z",

	4761 { "r","R","x","X","s","q","Q","z","n" }, 9},

	4762 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",

	4763 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},

	4764 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",

	4765 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},

	4766 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",

	4767 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}

	4768 };

	4769

	4770 int32_t i = 0;

	4771

	4772 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

	4773 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);

	4774 }

	4775 }

	4776 #endif

	4777

	4778 static void TestTailorNULL( void ) {

	4779 const static char* rule = "&a <<< '\\u0000'";

	4780 UErrorCode status = U_ZERO_ERROR;

	4781 UChar rlz[RULE_BUFFER_LEN] = { 0 };

	4782 uint32_t rlen = 0;

	4783 UChar a = 1, null = 0;

	4784 UCollationResult res = UCOL_EQUAL;

	4785

	4786 UCollator *coll = NULL;

	4787

	4788

	4789 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);

	4790 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

	4791

	4792 if(U_FAILURE(status)) {

	4793 log_err_status(status, "Could not open default collator! -> %s\n", u_err orName(status));

	4794 } else {

	4795 res = ucol_strcoll(coll, &a, 1, &null, 1);

	4796

	4797 if(res != UCOL_LESS) {

	4798 log_err("NULL was not tailored properly!\n");

	4799 }

	4800 }

	4801

	4802 ucol_close(coll);

	4803 }

	4804

	4805 static void

	4806 TestUpperFirstQuaternary(void)

	4807 {

	4808 const char* tests[] = { "B", "b", "Bb", "bB" };

	4809 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };

	4810 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };

	4811 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));

	4812 }

	4813

	4814 static void

	4815 TestJ4960(void)

	4816 {

	4817 const char* tests[] = { "\\u00e2T", "aT" };

	4818 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };

	4819 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };

	4820 const char* tests2[] = { "a", "A" };

	4821 const char* rule = "&[first tertiary ignorable]=A=a";

	4822 UColAttribute att2[] = { UCOL_CASE_LEVEL };

	4823 UColAttributeValue attVals2[] = { UCOL_ON };

	4824 /* Test whether we correctly ignore primary ignorables on case level when */

	4825 /* we have only primary & case level */

	4826 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(t ests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);

	4827 /* Test whether ICU4J will make case level for sortkeys that have primary stre ngth */

	4828 /* and case level */

	4829 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0 ]), att, attVals, sizeof(att)/sizeof(att[0]));

	4830 /* Test whether completely ignorable letters have case level info (they should n't) */

	4831 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(te sts2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);

	4832 }

	4833

	4834 static void

	4835 TestJ5223(void)

	4836 {

	4837 static const char *test = "this is a test string";

	4838 UChar ustr[256];

	4839 int32_t ustr_length = u_unescape(test, ustr, 256);

	4840 unsigned char sortkey[256];

	4841 int32_t sortkey_length;

	4842 UErrorCode status = U_ZERO_ERROR;

	4843 static UCollator *coll = NULL;

	4844 coll = ucol_open("root", &status);

	4845 if(U_FAILURE(status)) {

	4846 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

	4847 return;

	4848 }

	4849 ucol_setStrength(coll, UCOL_PRIMARY);

	4850 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

	4851 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	4852 if (U_FAILURE(status)) {

	4853 log_err("Failed setting atributes\n");

	4854 return;

	4855 }

	4856 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);

	4857 if (sortkey_length > 256) return;

	4858

	4859 /* we mark the position where the null byte should be written in advance */

	4860 sortkey[sortkey_length-1] = 0xAA;

	4861

	4862 /* we set the buffer size one byte higher than needed */

	4863 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

	4864 sortkey_length+1);

	4865

	4866 /* no error occurs (for me) */

	4867 if (sortkey[sortkey_length-1] == 0xAA) {

	4868 log_err("Hit bug at first try\n");

	4869 }

	4870

	4871 /* we mark the position where the null byte should be written again */

	4872 sortkey[sortkey_length-1] = 0xAA;

	4873

	4874 /* this time we set the buffer size to the exact amount needed */

	4875 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

	4876 sortkey_length);

	4877

	4878 /* now the trailing null byte is not written */

	4879 if (sortkey[sortkey_length-1] == 0xAA) {

	4880 log_err("Hit bug at second try\n");

	4881 }

	4882

	4883 ucol_close(coll);

	4884 }

	4885

	4886 /* Regression test for Thai partial sort key problem */

	4887 static void

	4888 TestJ5232(void)

	4889 {

	4890 const static char *test[] = {

	4891 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",

	4892 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"

	4893 };

	4894

	4895 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));

	4896 }

	4897

	4898 static void

	4899 TestJ5367(void)

	4900 {

	4901 const static char *test[] = { "a", "y" };

	4902 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";

	4903 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));

	4904 }

	4905

	4906 static void

	4907 TestVI5913(void)

	4908 {

	4909 UErrorCode status = U_ZERO_ERROR;

	4910 int32_t i, j;

	4911 UCollator *coll =NULL;

	4912 uint8_t resColl[100], expColl[100];

	4913 int32_t rLen, tLen, ruleLen, sLen, kLen;

	4914 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &a<0x1FF3-omega with Ypog egrammeni*/

	4915 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/

	4916 UChar rule3[256]={0x26, 0x7a, 0x3c, 0x0061, 0x00ea, 0}; /* &z<a+e with circ umflex.*/

	4917 static const UChar tData[][20]={

	4918 {0x1EAC, 0},

	4919 {0x0041, 0x0323, 0x0302, 0},

	4920 {0x1EA0, 0x0302, 0},

	4921 {0x00C2, 0x0323, 0},

	4922 {0x1ED8, 0}, /* O with dot and circumflex */

	4923 {0x1ECC, 0x0302, 0},

	4924 {0x1EB7, 0},

	4925 {0x1EA1, 0x0306, 0},

	4926 };

	4927 static const UChar tailorData[][20]={

	4928 {0x1FA2, 0}, /* Omega with 3 combining marks */

	4929 {0x03C9, 0x0313, 0x0300, 0x0345, 0},

	4930 {0x1FF3, 0x0313, 0x0300, 0},

	4931 {0x1F60, 0x0300, 0x0345, 0},

	4932 {0x1F62, 0x0345, 0},

	4933 {0x1FA0, 0x0300, 0},

	4934 };

	4935 static const UChar tailorData2[][20]={

	4936 {0x1E63, 0x030C, 0}, /* s with dot below + caron */

	4937 {0x0073, 0x0323, 0x030C, 0},

	4938 {0x0073, 0x030C, 0x0323, 0},

	4939 };

	4940 static const UChar tailorData3[][20]={

	4941 {0x007a, 0}, /* z */

	4942 {0x0061, 0x0065, 0}, /* a + e */

	4943 {0x0061, 0x00ea, 0}, /* a + e with circumflex */

	4944 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */

	4945 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumf lex */

	4946 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot b elow */

	4947 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */

	4948 {0x00EA, 0}, /* e with circumflex */

	4949 };

	4950

	4951 /* Test Vietnamese sort. */

	4952 coll = ucol_open("vi", &status);

	4953 if(U_FAILURE(status)) {

	4954 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(sta tus));

	4955 return;

	4956 }

	4957 log_verbose("\n\nVI collation:");

	4958 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tDat a[2])) ) {

	4959 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

	4960 }

	4961 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tDat a[3])) ) {

	4962 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

	4963 }

	4964 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tDat a[4])) ) {

	4965 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");

	4966 }

	4967 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tDat a[6])) ) {

	4968 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

	4969 }

	4970

	4971 for (j=0; j<8; j++) {

	4972 tLen = u_strlen(tData[j]);

	4973 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);

	4974 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

	4975 for(i = 0; i<rLen; i++) {

	4976 log_verbose(" %02X", resColl[i]);

	4977 }

	4978 }

	4979

	4980 ucol_close(coll);

	4981

	4982 /* Test Romanian sort. */

	4983 coll = ucol_open("ro", &status);

	4984 log_verbose("\n\nRO collation:");

	4985 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tDat a[1])) ) {

	4986 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

	4987 }

	4988 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tDat a[5])) ) {

	4989 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

	4990 }

	4991 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tDat a[7])) ) {

	4992 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

	4993 }

	4994

	4995 for (j=4; j<8; j++) {

	4996 tLen = u_strlen(tData[j]);

	4997 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);

	4998 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

	4999 for(i = 0; i<rLen; i++) {

	5000 log_verbose(" %02X", resColl[i]);

	5001 }

	5002 }

	5003 ucol_close(coll);

	5004

	5005 /* Test the precomposed Greek character with 3 combining marks. */

	5006 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");

	5007 ruleLen = u_strlen(rule);

	5008 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

	5009 if (U_FAILURE(status)) {

	5010 log_err("ucol_openRules failed with %s\n", u_errorName(status));

	5011 return;

	5012 }

	5013 sLen = u_strlen(tailorData[0]);

	5014 for (j=1; j<6; j++) {

	5015 tLen = u_strlen(tailorData[j]);

	5016 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {

	5017 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);

	5018 }

	5019 }

	5020 /* Test getSortKey. */

	5021 tLen = u_strlen(tailorData[0]);

	5022 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);

	5023 for (j=0; j<6; j++) {

	5024 tLen = u_strlen(tailorData[j]);

	5025 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);

	5026 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

	5027 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);

	5028 for(i = 0; i<rLen; i++) {

	5029 log_err(" %02X", resColl[i]);

	5030 }

	5031 }

	5032 }

	5033 ucol_close(coll);

	5034

	5035 log_verbose("\n\nTailoring test for s with caron:");

	5036 ruleLen = u_strlen(rule2);

	5037 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

	5038 tLen = u_strlen(tailorData2[0]);

	5039 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);

	5040 for (j=1; j<3; j++) {

	5041 tLen = u_strlen(tailorData2[j]);

	5042 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);

	5043 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

	5044 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo rData[j], tLen);

	5045 for(i = 0; i<rLen; i++) {

	5046 log_err(" %02X", resColl[i]);

	5047 }

	5048 }

	5049 }

	5050 ucol_close(coll);

	5051

	5052 log_verbose("\n\nTailoring test for &z< ae with circumflex:");

	5053 ruleLen = u_strlen(rule3);

	5054 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

	5055 tLen = u_strlen(tailorData3[3]);

	5056 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);

	5057 for (j=4; j<6; j++) {

	5058 tLen = u_strlen(tailorData3[j]);

	5059 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);

	5060

	5061 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

	5062 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo rData[j], tLen);

	5063 for(i = 0; i<rLen; i++) {

	5064 log_err(" %02X", resColl[i]);

	5065 }

	5066 }

	5067

	5068 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, tailorData[j], t Len);

	5069 for(i = 0; i<rLen; i++) {

	5070 log_verbose(" %02X", resColl[i]);

	5071 }

	5072 }

	5073 ucol_close(coll);

	5074 }

	5075

	5076 static void

	5077 TestTailor6179(void)

	5078 {

	5079 UErrorCode status = U_ZERO_ERROR;

	5080 int32_t i;

	5081 UCollator *coll =NULL;

	5082 uint8_t resColl[100];

	5083 int32_t rLen, tLen, ruleLen;

	5084 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */

	5085 UChar rule1[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x6 1,0x72,0x79,

	5086 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x2 0,0x61,0x20,

	5087 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x7 2,0x79,0x20,

	5088 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x2 0, 0};

	5089 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */

	5090 UChar rule2[256]={0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6 E,0x64,0x61,

	5091 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3 C,0x3C,0x3C,

	5092 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6 F,0x6E,

	5093 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x6 5,0x5D,0x3C,

	5094 0x3C,0x3C,0x20,0x62,0};

	5095

	5096 UChar tData1[][20]={

	5097 {0x61, 0},

	5098 {0x62, 0},

	5099 { 0xFDD0,0x009E, 0}

	5100 };

	5101 UChar tData2[][20]={

	5102 {0x61, 0},

	5103 {0x62, 0},

	5104 { 0xFDD0,0x009E, 0}

	5105 };

	5106

	5107 /*

	5108 * These values from FractionalUCA.txt will change,

	5109 * and need to be updated here.

	5110 */

	5111 uint8_t firstPrimaryIgnCE[6]={1, 87, 1, 5, 1, 0};

	5112 uint8_t lastPrimaryIgnCE[6]={1, 0xE3, 0xC9, 1, 5, 0};

	5113 uint8_t firstSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};

	5114 uint8_t lastSecondaryIgnCE[6]={1, 1, 0x3f, 0x03, 0};

	5115

	5116 /* Test [Last Primary ignorable] */

	5117

	5118 log_verbose("\n\nTailoring test: &[last primary ignorable]<<a &[first prima ry ignorable]<<b ");

	5119 ruleLen = u_strlen(rule1);

	5120 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

	5121 if (U_FAILURE(status)) {

	5122 log_err_status(status, "Tailoring test: &[last primary ignorable] failed ! -> %s\n", u_errorName(status));

	5123 return;

	5124 }

	5125 tLen = u_strlen(tData1[0]);

	5126 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);

	5127 if (uprv_memcmp(resColl, lastPrimaryIgnCE, uprv_min(rLen,6)) < 0) {

	5128 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);

	5129 for(i = 0; i<rLen; i++) {

	5130 log_err(" %02X", resColl[i]);

	5131 }

	5132 }

	5133 tLen = u_strlen(tData1[1]);

	5134 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);

	5135 if (uprv_memcmp(resColl, firstPrimaryIgnCE, uprv_min(rLen, 6)) < 0) {

	5136 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);

	5137 for(i = 0; i<rLen; i++) {

	5138 log_err(" %02X", resColl[i]);

	5139 }

	5140 }

	5141 ucol_close(coll);

	5142

	5143

	5144 /* Test [Last Secondary ignorable] */

	5145 log_verbose("\n\nTailoring test: &[last secondary ignorable]<<<a &[first se condary ignorable]<<<b ");

	5146 ruleLen = u_strlen(rule1);

	5147 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

	5148 if (U_FAILURE(status)) {

	5149 log_err("Tailoring test: &[last primary ignorable] failed!");

	5150 return;

	5151 }

	5152 tLen = u_strlen(tData2[0]);

	5153 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);

	5154 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);

	5155 for(i = 0; i<rLen; i++) {

	5156 log_verbose(" %02X", resColl[i]);

	5157 }

	5158 if (uprv_memcmp(resColl, lastSecondaryIgnCE, uprv_min(rLen, 3)) < 0) {

	5159 log_err("\n Data[%d] :%s \tlen: %d key: ", 0, tData2[0], rLen);

	5160 for(i = 0; i<rLen; i++) {

	5161 log_err(" %02X", resColl[i]);

	5162 }

	5163 }

	5164 tLen = u_strlen(tData2[1]);

	5165 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);

	5166 log_verbose("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);

	5167 for(i = 0; i<rLen; i++) {

	5168 log_verbose(" %02X", resColl[i]);

	5169 }

	5170 if (uprv_memcmp(resColl, firstSecondaryIgnCE, uprv_min(rLen, 4)) < 0) {

	5171 log_err("\n Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);

	5172 for(i = 0; i<rLen; i++) {

	5173 log_err(" %02X", resColl[i]);

	5174 }

	5175 }

	5176 ucol_close(coll);

	5177 }

	5178

	5179 static void

	5180 TestUCAPrecontext(void)

	5181 {

	5182 UErrorCode status = U_ZERO_ERROR;

	5183 int32_t i, j;

	5184 UCollator *coll =NULL;

	5185 uint8_t resColl[100], prevColl[100];

	5186 int32_t rLen, tLen, ruleLen;

	5187 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */

	5188 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};

	5189 /* & l middle-dot << a a is an expansion. */

	5190

	5191 UChar tData1[][20]={

	5192 { 0xb7, 0}, /* standalone middle dot(0xb7) */

	5193 { 0x387, 0}, /* standalone middle dot(0x387) */

	5194 { 0x61, 0}, /* a */

	5195 { 0x6C, 0}, /* l */

	5196 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */

	5197 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */

	5198 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */

	5199 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */

	5200 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */

	5201 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */

	5202 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */

	5203 };

	5204

	5205 log_verbose("\n\nEN collation:");

	5206 coll = ucol_open("en", &status);

	5207 if (U_FAILURE(status)) {

	5208 log_err_status(status, "Tailoring test: &z <<a\|- failed! -> %s\n", u_err orName(status));

	5209 return;

	5210 }

	5211 for (j=0; j<11; j++) {

	5212 tLen = u_strlen(tData1[j]);

	5213 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

	5214 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

	5215 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

	5216 j, tData1[j]);

	5217 }

	5218 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

	5219 for(i = 0; i<rLen; i++) {

	5220 log_verbose(" %02X", resColl[i]);

	5221 }

	5222 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

	5223 }

	5224 ucol_close(coll);

	5225

	5226

	5227 log_verbose("\n\nJA collation:");

	5228 coll = ucol_open("ja", &status);

	5229 if (U_FAILURE(status)) {

	5230 log_err("Tailoring test: &z <<a\|- failed!");

	5231 return;

	5232 }

	5233 for (j=0; j<11; j++) {

	5234 tLen = u_strlen(tData1[j]);

	5235 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

	5236 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

	5237 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

	5238 j, tData1[j]);

	5239 }

	5240 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

	5241 for(i = 0; i<rLen; i++) {

	5242 log_verbose(" %02X", resColl[i]);

	5243 }

	5244 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

	5245 }

	5246 ucol_close(coll);

	5247

	5248

	5249 log_verbose("\n\nTailoring test: & middle dot < a ");

	5250 ruleLen = u_strlen(rule1);

	5251 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&statu s);

	5252 if (U_FAILURE(status)) {

	5253 log_err("Tailoring test: & middle dot < a failed!");

	5254 return;

	5255 }

	5256 for (j=0; j<11; j++) {

	5257 tLen = u_strlen(tData1[j]);

	5258 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

	5259 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

	5260 log_err("\n Expecting greater key than previous test case: Data[%d ] :%s.",

	5261 j, tData1[j]);

	5262 }

	5263 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

	5264 for(i = 0; i<rLen; i++) {

	5265 log_verbose(" %02X", resColl[i]);

	5266 }

	5267 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

	5268 }

	5269 ucol_close(coll);

	5270

	5271

	5272 log_verbose("\n\nTailoring test: & l middle-dot << a ");

	5273 ruleLen = u_strlen(rule2);

	5274 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat us);

	5275 if (U_FAILURE(status)) {

	5276 log_err("Tailoring test: & l middle-dot << a failed!");

	5277 return;

	5278 }

	5279 for (j=0; j<11; j++) {

	5280 tLen = u_strlen(tData1[j]);

	5281 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

	5282 if ((j>0) && (j!=3) && (strcmp((char )resColl, (char )prevColl)<0)) {

	5283 log_err("\n Expecting greater key than previous test case: Data[% d] :%s.",

	5284 j, tData1[j]);

	5285 }

	5286 if ((j==3)&&(strcmp((char )resColl, (char )prevColl)>0)) {

	5287 log_err("\n Expecting smaller key than previous test case: Data[% d] :%s.",

	5288 j, tData1[j]);

	5289 }

	5290 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

	5291 for(i = 0; i<rLen; i++) {

	5292 log_verbose(" %02X", resColl[i]);

	5293 }

	5294 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

	5295 }

	5296 ucol_close(coll);

	5297 }

	5298

	5299 static void

	5300 TestOutOfBuffer5468(void)

	5301 {

	5302 static const char *test = "\\u4e00";

	5303 UChar ustr[256];

	5304 int32_t ustr_length = u_unescape(test, ustr, 256);

	5305 unsigned char shortKeyBuf[1];

	5306 int32_t sortkey_length;

	5307 UErrorCode status = U_ZERO_ERROR;

	5308 static UCollator *coll = NULL;

	5309

	5310 coll = ucol_open("root", &status);

	5311 if(U_FAILURE(status)) {

	5312 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

	5313 return;

	5314 }

	5315 ucol_setStrength(coll, UCOL_PRIMARY);

	5316 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

	5317 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	5318 if (U_FAILURE(status)) {

	5319 log_err("Failed setting atributes\n");

	5320 return;

	5321 }

	5322

	5323 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeo f(shortKeyBuf));

	5324 if (sortkey_length != 4) {

	5325 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);

	5326 }

	5327 log_verbose("length of sortKey is %d", sortkey_length);

	5328 ucol_close(coll);

	5329 }

	5330

	5331 #define TSKC_DATA_SIZE 5

	5332 #define TSKC_BUF_SIZE 50

	5333 static void

	5334 TestSortKeyConsistency(void)

	5335 {

	5336 UErrorCode icuRC = U_ZERO_ERROR;

	5337 UCollator* ucol;

	5338 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};

	5339

	5340 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

	5341 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

	5342 int32_t i, j, i2;

	5343

	5344 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);

	5345 if (U_FAILURE(icuRC))

	5346 {

	5347 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_error Name(icuRC));

	5348 return;

	5349 }

	5350

	5351 for (i = 0; i < TSKC_DATA_SIZE; i++)

	5352 {

	5353 UCharIterator uiter;

	5354 uint32_t state[2] = { 0, 0 };

	5355 int32_t dataLen = i+1;

	5356 for (j=0; j<TSKC_BUF_SIZE; j++)

	5357 bufFull[i][j] = bufPart[i][j] = 0;

	5358

	5359 /* Full sort key */

	5360 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);

	5361

	5362 /* Partial sort key */

	5363 uiter_setString(&uiter, data, dataLen);

	5364 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &ic uRC);

	5365 if (U_FAILURE(icuRC))

	5366 {

	5367 log_err("ucol_nextSortKeyPart failed\n");

	5368 ucol_close(ucol);

	5369 return;

	5370 }

	5371

	5372 for (i2=0; i2<i; i2++)

	5373 {

	5374 UBool fullMatch = TRUE;

	5375 UBool partMatch = TRUE;

	5376 for (j=0; j<TSKC_BUF_SIZE; j++)

	5377 {

	5378 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);

	5379 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);

	5380 }

	5381 if (fullMatch != partMatch) {

	5382 log_err(fullMatch ? "full key was consistent, but partial key ch anged\n"

	5383 : "partial key was consistent, but full key ch anged\n");

	5384 ucol_close(ucol);

	5385 return;

	5386 }

	5387 }

	5388 }

	5389

	5390 /=============================================/

	5391 ucol_close(ucol);

	5392 }

	5393

	5394 /* ticket: 6101 */

	5395 static void TestCroatianSortKey(void) {

	5396 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";

	5397 UErrorCode status = U_ZERO_ERROR;

	5398 UCollator *ucol;

	5399 UCharIterator iter;

	5400

	5401 static const UChar text[] = { 0x0044, 0xD81A };

	5402

	5403 size_t length = sizeof(text)/sizeof(*text);

	5404

	5405 uint8_t textSortKey[32];

	5406 size_t lenSortKey = 32;

	5407 size_t actualSortKeyLen;

	5408 uint32_t uStateInfo[2] = { 0, 0 };

	5409

	5410 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);

	5411 if (U_FAILURE(status)) {

	5412 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));

	5413 return;

	5414 }

	5415

	5416 uiter_setString(&iter, text, length);

	5417

	5418 actualSortKeyLen = ucol_nextSortKeyPart(

	5419 ucol, &iter, (uint32_t*)uStateInfo,

	5420 textSortKey, lenSortKey, &status

	5421 );

	5422

	5423 if (actualSortKeyLen == lenSortKey) {

	5424 log_err("ucol_nextSortKeyPart did not give correct result in Croatian te st.\n");

	5425 }

	5426

	5427 ucol_close(ucol);

	5428 }

	5429

	5430 /* ticket: 6140 */

	5431 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since

	5432 * they are both Hiragana and Katakana

	5433 */

	5434 #define SORTKEYLEN 50

	5435 static void TestHiragana(void) {

	5436 UErrorCode status = U_ZERO_ERROR;

	5437 UCollator* ucol;

	5438 UCollationResult strcollresult;

	5439 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */

	5440 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };

	5441 int32_t data1Len = sizeof(data1)/sizeof(*data1);

	5442 int32_t data2Len = sizeof(data2)/sizeof(*data2);

	5443 int32_t i, j;

	5444 uint8_t sortKey1[SORTKEYLEN];

	5445 uint8_t sortKey2[SORTKEYLEN];

	5446

	5447 UCharIterator uiter1;

	5448 UCharIterator uiter2;

	5449 uint32_t state1[2] = { 0, 0 };

	5450 uint32_t state2[2] = { 0, 0 };

	5451 int32_t keySize1;

	5452 int32_t keySize2;

	5453

	5454 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,

	5455 &status);

	5456 if (U_FAILURE(status)) {

	5457 log_err_status(status, "Error status: %s; Unable to open collator from s hort string.\n", u_errorName(status));

	5458 return;

	5459 }

	5460

	5461 /* Start of full sort keys */

	5462 /* Full sort key1 */

	5463 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);

	5464 /* Full sort key2 */

	5465 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);

	5466 if (keySize1 == keySize2) {

	5467 for (i = 0; i < keySize1; i++) {

	5468 if (sortKey1[i] != sortKey2[i]) {

	5469 log_err("Full sort keys are different. Should be equal.");

	5470 }

	5471 }

	5472 } else {

	5473 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2) ;

	5474 }

	5475 /* End of full sort keys */

	5476

	5477 /* Start of partial sort keys */

	5478 /* Partial sort key1 */

	5479 uiter_setString(&uiter1, data1, data1Len);

	5480 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);

	5481 /* Partial sort key2 */

	5482 uiter_setString(&uiter2, data2, data2Len);

	5483 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);

	5484 if (U_SUCCESS(status) && keySize1 == keySize2) {

	5485 for (j = 0; j < keySize1; j++) {

	5486 if (sortKey1[j] != sortKey2[j]) {

	5487 log_err("Partial sort keys are different. Should be equal");

	5488 }

	5489 }

	5490 } else {

	5491 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d % d", u_errorName(status), keySize1, keySize2);

	5492 }

	5493 /* End of partial sort keys */

	5494

	5495 /* Start of strcoll */

	5496 /* Use ucol_strcoll() to determine ordering */

	5497 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);

	5498 if (strcollresult != UCOL_EQUAL) {

	5499 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");

	5500 }

	5501

	5502 ucol_close(ucol);

	5503 }

	5504

	5505 /* Convenient struct for running collation tests */

	5506 typedef struct {

	5507 const UChar source[MAX_TOKEN_LEN]; /* String on left */

	5508 const UChar target[MAX_TOKEN_LEN]; /* String on right */

	5509 UCollationResult result; /* -1, 0 or +1, depending on collation */

	5510 } OneTestCase;

	5511

	5512 /*

	5513 * Utility function to test one collation test case.

	5514 * @param testcases Array of test cases.

	5515 * @param n_testcases Size of the array testcases.

	5516 * @param str_rules Array of rules. These rules should be specifying the same r ule in different formats.

	5517 * @param n_rules Size of the array str_rules.

	5518 */

	5519 static void doTestOneTestCase(const OneTestCase testcases[],

	5520 int n_testcases,

	5521 const char* str_rules[],

	5522 int n_rules)

	5523 {

	5524 int rule_no, testcase_no;

	5525 UChar rule[500];

	5526 int32_t length = 0;

	5527 UErrorCode status = U_ZERO_ERROR;

	5528 UParseError parse_error;

	5529 UCollator *myCollation;

	5530

	5531 for (rule_no = 0; rule_no < n_rules; ++rule_no) {

	5532

	5533 length = u_unescape(str_rules[rule_no], rule, 500);

	5534 if (length == 0) {

	5535 log_err("ERROR: The rule cannot be unescaped: %s\n");

	5536 return;

	5537 }

	5538 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status);

	5539 if(U_FAILURE(status)){

	5540 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	5541 return;

	5542 }

	5543 log_verbose("Testing the <<* syntax\n");

	5544 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	5545 ucol_setStrength(myCollation, UCOL_TERTIARY);

	5546 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {

	5547 doTest(myCollation,

	5548 testcases[testcase_no].source,

	5549 testcases[testcase_no].target,

	5550 testcases[testcase_no].result

	5551 );

	5552 }

	5553 ucol_close(myCollation);

	5554 }

	5555 }

	5556

	5557 const static OneTestCase rangeTestcases[] = {

	5558 { {0x0061}, {0x0062}, UCOL _LESS }, /* "a" < "b" */

	5559 { {0x0062}, {0x0063}, UCOL _LESS }, /* "b" < "c" */

	5560 { {0x0061}, {0x0063}, UCOL _LESS }, /* "a" < "c" */

	5561

	5562 { {0x0062}, {0x006b}, UCOL _LESS }, /* "b" << "k" */

	5563 { {0x006b}, {0x006c}, UCOL _LESS }, /* "k" << "l" */

	5564 { {0x0062}, {0x006c}, UCOL _LESS }, /* "b" << "l" */

	5565 { {0x0061}, {0x006c}, UCOL _LESS }, /* "a" < "l" */

	5566 { {0x0061}, {0x006d}, UCOL _LESS }, /* "a" < "m" */

	5567

	5568 { {0x0079}, {0x006d}, UCOL _LESS }, /* "y" < "f" */

	5569 { {0x0079}, {0x0067}, UCOL _LESS }, /* "y" < "g" */

	5570 { {0x0061}, {0x0068}, UCOL _LESS }, /* "y" < "h" */

	5571 { {0x0061}, {0x0065}, UCOL _LESS }, /* "g" < "e" */

	5572

	5573 { {0x0061}, {0x0031}, UCOL _EQUAL }, /* "a" = "1" */

	5574 { {0x0061}, {0x0032}, UCOL _EQUAL }, /* "a" = "2" */

	5575 { {0x0061}, {0x0033}, UCOL _EQUAL }, /* "a" = "3" */

	5576 { {0x0061}, {0x0066}, UCOL _LESS }, /* "a" < "f" */

	5577 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL _LESS }, /* "la" < "123" */

	5578 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL _EQUAL }, /* "aaa" = "123" */

	5579 { {0x0062}, {0x007a}, UCOL _LESS }, /* "b" < "z" */

	5580 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL _LESS }, /* "azm" = "2yc" */

	5581 };

	5582

	5583 static int nRangeTestcases = LEN(rangeTestcases);

	5584

	5585 const static OneTestCase rangeTestcasesSupplemental[] = {

	5586 { {0xfffe}, {0xffff}, UCOL _LESS }, /* U+FFFE < U+FFFF */

	5587 { {0xffff}, {0xd800, 0xdc00}, UCOL _LESS }, /* U+FFFF < U+10000 */

	5588 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+10000 < U+10001 */

	5589 { {0xfffe}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+FFFE < U+10001 */

	5590 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */

	5591 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */

	5592 { {0xfffe}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+FFFE < U+10001 */

	5593 };

	5594

	5595 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);

	5596

	5597 const static OneTestCase rangeTestcasesQwerty[] = {

	5598 { {0x0071}, {0x0077}, UCOL _LESS }, /* "q" < "w" */

	5599 { {0x0077}, {0x0065}, UCOL _LESS }, /* "w" < "e" */

	5600

	5601 { {0x0079}, {0x0075}, UCOL _LESS }, /* "y" < "u" */

	5602 { {0x0071}, {0x0075}, UCOL _LESS }, /* "q" << "u" */

	5603

	5604 { {0x0074}, {0x0069}, UCOL _LESS }, /* "t" << "i" */

	5605 { {0x006f}, {0x0070}, UCOL _LESS }, /* "o" << "p" */

	5606

	5607 { {0x0079}, {0x0065}, UCOL _LESS }, /* "y" < "e" */

	5608 { {0x0069}, {0x0075}, UCOL _LESS }, /* "i" < "u" */

	5609

	5610 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},

	5611 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL _LESS }, /* "quest" < "were" */

	5612 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},

	5613 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL _LESS }, /* "quack" < "quest" */

	5614 };

	5615

	5616 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);

	5617

	5618 static void TestSameStrengthList(void)

	5619 {

	5620 const char* strRules[] = {

	5621 /* Normal */

	5622 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",

	5623

	5624 /* Lists */

	5625 "&a<bcd &b<<klm &k<<<xyz &y<fghe &a=*123",

	5626 };

	5627 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

	5628 }

	5629

	5630 static void TestSameStrengthListQuoted(void)

	5631 {

	5632 const char* strRules[] = {

	5633 /* Lists with quoted characters */

	5634 "&\\u0061<bcd &b<<klm &k<<<xyz &y<f\\u0067\\u0068e &a=*123",

	5635 "&'\\u0061'<bcd &b<<klm &k<<<xyz &y<f'\\u0067\\u0068'e &a=*123",

	5636

	5637 "&\\u0061<b\\u0063d &b<<klm &k<<<xyz &\\u0079<fgh\\u0065 &a=*\\u0031\\u0 032\\u0033",

	5638 "&'\\u0061'<b'\\u0063'd &b<<klm &k<<<xyz &'\\u0079'<fgh'\\u0065' &a=*'\\ u0031\\u0032\\u0033'",

	5639

	5640 "&\\u0061<\\u0062c\\u0064 &b<<klm &k<<<xyz &y<fghe &a=*\\u0031\\u0032\\ u0033",

	5641 "&'\\u0061'<'\\u0062'c'\\u0064' &b<<klm &k<<<xyz &y<fghe &a=*'\\u0031\\ u0032\\u0033'",

	5642 };

	5643 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

	5644 }

	5645

	5646 static void TestSameStrengthListSupplemental(void)

	5647 {

	5648 const char* strRules[] = {

	5649 "&\\ufffe<\\uffff<\\U00010000<\\U00010001<\\U00010002",

	5650 "&\\ufffe<\\uffff<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",

	5651 "&\\ufffe<*\\uffff\\U00010000\\U00010001\\U00010002",

	5652 "&\\ufffe<*\\uffff\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",

	5653 };

	5654 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));

	5655 }

	5656

	5657 static void TestSameStrengthListQwerty(void)

	5658 {

	5659 const char* strRules[] = {

	5660 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

	5661 "&q<wer &w<<tyu &t<<<iop &o=asd", /* Lists */

	5662 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00 74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",

	5663 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\ \u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",

	5664 "&\\u0071<\\u0077\\u0065\\u0072 &\\u0077<<\\u0074\\u0079\\u0075 &\\u0074<< <\\u0069\\u006f\\u0070 &\\u006f=\\u0061\\u0073\\u0064",

	5665

	5666 /* Quoted characters also will work if two quoted characters are not consecu tive. */

	5667 "&\\u0071<'\\u0077'\\u0065\\u0072 &\\u0077<<\\u0074'\\u0079'\\u0075 &\\u00 74<<<\\u0069\\u006f'\\u0070' &'\\u006f'=\\u0061\\u0073\\u0064",

	5668

	5669 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */

	5670 /* "&\\u0071<'\\u0077''\\u0065''\\u0072' &\\u0077<<'\\u0074''\\u0079''\\u0 075' &\\u0074<<<'\\u0069''\\u006f''\\u0070' &'\\u006f'=\\u0061\\u0073\\u0064", */

	5671

	5672 };

	5673 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s trRules));

	5674 }

	5675

	5676 static void TestSameStrengthListQuotedQwerty(void)

	5677 {

	5678 const char* strRules[] = {

	5679 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

	5680 "&q<wer &w<<tyu &t<<<iop &o=asd", /* Lists */

	5681 "&q<w'e'r &w<<'t'yu &t<<<io'p' &o='a's'd'", /* Lists with quotes */

	5682

	5683 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */

	5684 /* "&q<'w''e''r' &w<<'t''y''u' &t<<<'i''o''p' &o='a''s''d'", */

	5685 };

	5686 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s trRules));

	5687 }

	5688

	5689 static void TestSameStrengthListRanges(void)

	5690 {

	5691 const char* strRules[] = {

	5692 "&a<b-d &b<<k-m &k<<<x-z &y<f-he &a=*1-3",

	5693 };

	5694 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

	5695 }

	5696

	5697 static void TestSameStrengthListSupplementalRanges(void)

	5698 {

	5699 const char* strRules[] = {

	5700 "&\\ufffe<*\\uffff-\\U00010002",

	5701 };

	5702 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));

	5703 }

	5704

	5705 static void TestSpecialCharacters(void)

	5706 {

	5707 const char* strRules[] = {

	5708 /* Normal */

	5709 "&';'<'+'<','<'-'<'&'<'*'",

	5710

	5711 /* List */

	5712 "&';'<'+,-&'",

	5713

	5714 /* Range */

	5715 "&';'<'+'-'-&'",

	5716 };

	5717

	5718 const static OneTestCase specialCharacterStrings[] = {

	5719 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */

	5720 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */

	5721 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */

	5722 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */

	5723 };

	5724 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRu les, LEN(strRules));

	5725 }

	5726

	5727 static void TestPrivateUseCharacters(void)

	5728 {

	5729 const char* strRules[] = {

	5730 /* Normal */

	5731 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",

	5732 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",

	5733 };

	5734

	5735 const static OneTestCase privateUseCharacterStrings[] = {

	5736 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

	5737 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

	5738 { {0xe2d9}, {0xe2da}, UCOL_LESS },

	5739 { {0xe2da}, {0xe2db}, UCOL_LESS },

	5740 { {0xe2db}, {0xe2dc}, UCOL_LESS },

	5741 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

	5742 };

	5743 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

	5744 }

	5745

	5746 static void TestPrivateUseCharactersInList(void)

	5747 {

	5748 const char* strRules[] = {

	5749 /* List */

	5750 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",

	5751 /* "&'\\u5ea7'<\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", /

	5752 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",

	5753 };

	5754

	5755 const static OneTestCase privateUseCharacterStrings[] = {

	5756 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

	5757 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

	5758 { {0xe2d9}, {0xe2da}, UCOL_LESS },

	5759 { {0xe2da}, {0xe2db}, UCOL_LESS },

	5760 { {0xe2db}, {0xe2dc}, UCOL_LESS },

	5761 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

	5762 };

	5763 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

	5764 }

	5765

	5766 static void TestPrivateUseCharactersInRange(void)

	5767 {

	5768 const char* strRules[] = {

	5769 /* Range */

	5770 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",

	5771 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",

	5772 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */

	5773 };

	5774

	5775 const static OneTestCase privateUseCharacterStrings[] = {

	5776 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

	5777 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

	5778 { {0xe2d9}, {0xe2da}, UCOL_LESS },

	5779 { {0xe2da}, {0xe2db}, UCOL_LESS },

	5780 { {0xe2db}, {0xe2dc}, UCOL_LESS },

	5781 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

	5782 };

	5783 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

	5784 }

	5785

	5786 static void TestInvalidListsAndRanges(void)

	5787 {

	5788 const char* invalidRules[] = {

	5789 /* Range not in starred expression */

	5790 "&\\ufffe<\\uffff-\\U00010002",

	5791

	5792 /* Range without start */

	5793 "&a<*-c",

	5794

	5795 /* Range without end */

	5796 "&a<*b-",

	5797

	5798 /* More than one hyphen */

	5799 "&a<*b-g-l",

	5800

	5801 /* Range in the wrong order */

	5802 "&a<*k-b",

	5803

	5804 };

	5805

	5806 UChar rule[500];

	5807 UErrorCode status = U_ZERO_ERROR;

	5808 UParseError parse_error;

	5809 int n_rules = LEN(invalidRules);

	5810 int rule_no;

	5811 int length;

	5812 UCollator *myCollation;

	5813

	5814 for (rule_no = 0; rule_no < n_rules; ++rule_no) {

	5815

	5816 length = u_unescape(invalidRules[rule_no], rule, 500);

	5817 if (length == 0) {

	5818 log_err("ERROR: The rule cannot be unescaped: %s\n");

	5819 return;

	5820 }

	5821 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status);

	5822 if(!U_FAILURE(status)){

	5823 log_err("ERROR: Could not cause a failure as expected: \n");

	5824 }

	5825 status = U_ZERO_ERROR;

	5826 }

	5827 }

	5828

	5829 /*

	5830 * This test ensures that characters placed before a character in a different sc ript have the same lead byte

	5831 * in their collation key before and after script reordering.

	5832 */

	5833 static void TestBeforeRuleWithScriptReordering(void)

	5834 {

	5835 UParseError error;

	5836 UErrorCode status = U_ZERO_ERROR;

	5837 UCollator *myCollation;

	5838 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";

	5839 UChar rules[500];

	5840 uint32_t rulesLength = 0;

	5841 int32_t reorderCodes[1] = {USCRIPT_GREEK};

	5842 UCollationResult collResult;

	5843

	5844 uint8_t baseKey[256];

	5845 uint32_t baseKeyLength;

	5846 uint8_t beforeKey[256];

	5847 uint32_t beforeKeyLength;

	5848

	5849 UChar base[] = { 0x03b1 }; /* base */

	5850 int32_t baseLen = sizeof(base)/sizeof(*base);

	5851

	5852 UChar before[] = { 0x0e01 }; /* ko kai */

	5853 int32_t beforeLen = sizeof(before)/sizeof(*before);

	5854

	5855 /UChar data[] = { before, base };

	5856 genericRulesStarter(srules, data, 2);*/

	5857

	5858 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");

	5859

	5860

	5861 /* build collator */

	5862 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");

	5863

	5864 rulesLength = u_unescape(srules, rules, LEN(rules));

	5865 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &er ror, &status);

	5866 if(U_FAILURE(status)) {

	5867 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	5868 return;

	5869 }

	5870

	5871 /* check collation results - before rule applied but not script reordering * /

	5872 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

	5873 if (collResult != UCOL_GREATER) {

	5874 log_err("Collation result not correct before script reordering = %d\n", collResult);

	5875 }

	5876

	5877 /* check the lead byte of the collation keys before script reordering */

	5878 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

	5879 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

	5880 if (baseKey[0] != beforeKey[0]) {

	5881 log_err("Different lead byte for sort keys using before rule and before sc ript reordering. base character lead byte = %02x, before character lead byte = % 02x\n", baseKey[0], beforeKey[0]);

	5882 }

	5883

	5884 /* reorder the scripts */

	5885 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);

	5886 if(U_FAILURE(status)) {

	5887 log_err_status(status, "ERROR: while setting script order: %s\n", myErro rName(status));

	5888 return;

	5889 }

	5890

	5891 /* check collation results - before rule applied and after script reordering */

	5892 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

	5893 if (collResult != UCOL_GREATER) {

	5894 log_err("Collation result not correct after script reordering = %d\n", c ollResult);

	5895 }

	5896

	5897 /* check the lead byte of the collation keys after script reordering */

	5898 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

	5899 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

	5900 if (baseKey[0] != beforeKey[0]) {

	5901 log_err("Different lead byte for sort keys using before fule and after s cript reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);

	5902 }

	5903

	5904 ucol_close(myCollation);

	5905 }

	5906

	5907 /*

	5908 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.

	5909 */

	5910 static void TestNonLeadBytesDuringCollationReordering(void)

	5911 {

	5912 UErrorCode status = U_ZERO_ERROR;

	5913 UCollator *myCollation;

	5914 int32_t reorderCodes[1] = {USCRIPT_GREEK};

	5915 UCollationResult collResult;

	5916

	5917 uint8_t baseKey[256];

	5918 uint32_t baseKeyLength;

	5919 uint8_t reorderKey[256];

	5920 uint32_t reorderKeyLength;

	5921

	5922 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };

	5923

	5924 int i;

	5925

	5926

	5927 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

	5928

	5929 /* build collator tertiary */

	5930 myCollation = ucol_open("", &status);

	5931 ucol_setStrength(myCollation, UCOL_TERTIARY);

	5932 if(U_FAILURE(status)) {

	5933 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

	5934 return;

	5935 }

	5936 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba seKey, 256);

	5937

	5938 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

	5939 if(U_FAILURE(status)) {

	5940 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

	5941 return;

	5942 }

	5943 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);

	5944

	5945 if (baseKeyLength != reorderKeyLength) {

	5946 log_err("Key lengths not the same during reordering.\n", collResult);

	5947 return;

	5948 }

	5949

	5950 for (i = 1; i < baseKeyLength; i++) {

	5951 if (baseKey[i] != reorderKey[i]) {

	5952 log_err("Collation key bytes not the same at position %d.\n", i);

	5953 return;

	5954 }

	5955 }

	5956 ucol_close(myCollation);

	5957

	5958 /* build collator quaternary */

	5959 myCollation = ucol_open("", &status);

	5960 ucol_setStrength(myCollation, UCOL_QUATERNARY);

	5961 if(U_FAILURE(status)) {

	5962 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

	5963 return;

	5964 }

	5965 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba seKey, 256);

	5966

	5967 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

	5968 if(U_FAILURE(status)) {

	5969 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

	5970 return;

	5971 }

	5972 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);

	5973

	5974 if (baseKeyLength != reorderKeyLength) {

	5975 log_err("Key lengths not the same during reordering.\n", collResult);

	5976 return;

	5977 }

	5978

	5979 for (i = 1; i < baseKeyLength; i++) {

	5980 if (baseKey[i] != reorderKey[i]) {

	5981 log_err("Collation key bytes not the same at position %d.\n", i);

	5982 return;

	5983 }

	5984 }

	5985 ucol_close(myCollation);

	5986 }

	5987

	5988 /*

	5989 * Test reordering API.

	5990 */

	5991 static void TestReorderingAPI(void)

	5992 {

	5993 UErrorCode status = U_ZERO_ERROR;

	5994 UCollator *myCollation;

	5995 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION};

	5996 UCollationResult collResult;

	5997 int32_t retrievedReorderCodesLength;

	5998 UChar greekString[] = { 0x03b1 };

	5999 UChar punctuationString[] = { 0x203e };

	6000

	6001 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

	6002

	6003 /* build collator tertiary */

	6004 myCollation = ucol_open("", &status);

	6005 ucol_setStrength(myCollation, UCOL_TERTIARY);

	6006 if(U_FAILURE(status)) {

	6007 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

	6008 return;

	6009 }

	6010

	6011 /* set the reorderding */

	6012 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

	6013 if (U_FAILURE(status)) {

	6014 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

	6015 return;

	6016 }

	6017

	6018 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

	6019 if (status != U_BUFFER_OVERFLOW_ERROR) {

	6020 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));

	6021 return;

	6022 }

	6023 status = U_ZERO_ERROR;

	6024 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

	6025 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

	6026 return;

	6027 }

	6028 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

	6029 if (collResult != UCOL_LESS) {

	6030 log_err_status(status, "ERROR: collation result should have been UCOL_LE SS\n");

	6031 return;

	6032 }

	6033

	6034 /* clear the reordering */

	6035 ucol_setReorderCodes(myCollation, NULL, 0, &status);

	6036 if (U_FAILURE(status)) {

	6037 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myE rrorName(status));

	6038 return;

	6039 }

	6040

	6041 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

	6042 if (retrievedReorderCodesLength != 0) {

	6043 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);

	6044 return;

	6045 }

	6046

	6047 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

	6048 if (collResult != UCOL_GREATER) {

	6049 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n");

	6050 return;

	6051 }

	6052

	6053 ucol_close(myCollation);

	6054 }

	6055

	6056 /*

	6057 * Utility function to test one collation reordering test case.

	6058 * @param testcases Array of test cases.

	6059 * @param n_testcases Size of the array testcases.

	6060 * @param str_rules Array of rules. These rules should be specifying the same r ule in different formats.

	6061 * @param n_rules Size of the array str_rules.

	6062 */

	6063 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32 _t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)

	6064 {

	6065 int testCaseNum;

	6066 UErrorCode status = U_ZERO_ERROR;

	6067 UCollator *myCollation;

	6068

	6069 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {

	6070 myCollation = ucol_open("", &status);

	6071 if (U_FAILURE(status)) {

	6072 log_err_status(status, "ERROR: in creation of collator: %s\n", myErr orName(status));

	6073 return;

	6074 }

	6075 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &stat us);

	6076 if(U_FAILURE(status)) {

	6077 log_err_status(status, "ERROR: while setting script order: %s\n", my ErrorName(status));

	6078 return;

	6079 }

	6080

	6081 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {

	6082 doTest(myCollation,

	6083 testCases[testCaseNum].source,

	6084 testCases[testCaseNum].target,

	6085 testCases[testCaseNum].result

	6086 );

	6087 }

	6088 ucol_close(myCollation);

	6089 }

	6090 }

	6091

	6092 static void TestGreekFirstReorder(void)

	6093 {

	6094 const char* strRules[] = {

	6095 "[reorder Grek]"

	6096 };

	6097

	6098 const int32_t apiRules[] = {

	6099 USCRIPT_GREEK

	6100 };

	6101

	6102 const static OneTestCase privateUseCharacterStrings[] = {

	6103 { {0x0391}, {0x0391}, UCOL_EQUAL },

	6104 { {0x0041}, {0x0391}, UCOL_GREATER },

	6105 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },

	6106 { {0x0060}, {0x0391}, UCOL_LESS },

	6107 { {0x0391}, {0xe2dc}, UCOL_LESS },

	6108 { {0x0391}, {0x0060}, UCOL_GREATER },

	6109 };

	6110

	6111 /* Test rules creation */

	6112 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

	6113

	6114 /* Test collation reordering API */

	6115 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

	6116 }

	6117

	6118 static void TestGreekLastReorder(void)

	6119 {

	6120 const char* strRules[] = {

	6121 "[reorder Zzzz Grek]"

	6122 };

	6123

	6124 const int32_t apiRules[] = {

	6125 USCRIPT_UNKNOWN, USCRIPT_GREEK

	6126 };

	6127

	6128 const static OneTestCase privateUseCharacterStrings[] = {

	6129 { {0x0391}, {0x0391}, UCOL_EQUAL },

	6130 { {0x0041}, {0x0391}, UCOL_LESS },

	6131 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },

	6132 { {0x0060}, {0x0391}, UCOL_LESS },

	6133 { {0x0391}, {0xe2dc}, UCOL_GREATER },

	6134 };

	6135

	6136 /* Test rules creation */

	6137 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

	6138

	6139 /* Test collation reordering API */

	6140 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

	6141 }

	6142

	6143 static void TestNonScriptReorder(void)

	6144 {

	6145 const char* strRules[] = {

	6146 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"

	6147 };

	6148

	6149 const int32_t apiRules[] = {

	6150 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIP T_LATIN,

	6151 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,

	6152 UCOL_REORDER_CODE_CURRENCY

	6153 };

	6154

	6155 const static OneTestCase privateUseCharacterStrings[] = {

	6156 { {0x0391}, {0x0041}, UCOL_LESS },

	6157 { {0x0041}, {0x0391}, UCOL_GREATER },

	6158 { {0x0060}, {0x0041}, UCOL_LESS },

	6159 { {0x0060}, {0x0391}, UCOL_GREATER },

	6160 { {0x0024}, {0x0041}, UCOL_GREATER },

	6161 };

	6162

	6163 /* Test rules creation */

	6164 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

	6165

	6166 /* Test collation reordering API */

	6167 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

	6168 }

	6169

	6170 static void TestHaniReorder(void)

	6171 {

	6172 const char* strRules[] = {

	6173 "[reorder Hani]"

	6174 };

	6175 const int32_t apiRules[] = {

	6176 USCRIPT_HAN

	6177 };

	6178

	6179 const static OneTestCase privateUseCharacterStrings[] = {

	6180 { {0x4e00}, {0x0041}, UCOL_LESS },

	6181 { {0x4e00}, {0x0060}, UCOL_GREATER },

	6182 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },

	6183 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },

	6184 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },

	6185 { {0xfa27}, {0x0041}, UCOL_LESS },

	6186 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },

	6187 };

	6188

	6189 /* Test rules creation */

	6190 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

	6191

	6192 /* Test collation reordering API */

	6193 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

	6194 }

	6195

	6196 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)

	6197 {

	6198 for (; a == b; ++a, ++b) {

	6199 if (*a == 0) {

	6200 return 0;

	6201 }

	6202 }

	6203 return (a < b ? -1 : 1);

	6204 }

	6205

	6206 static void TestImport(void)

	6207 {

	6208 UCollator* vicoll;

	6209 UCollator* escoll;

	6210 UCollator* viescoll;

	6211 UCollator* importviescoll;

	6212 UParseError error;

	6213 UErrorCode status = U_ZERO_ERROR;

	6214 UChar* virules;

	6215 int32_t viruleslength;

	6216 UChar* esrules;

	6217 int32_t esruleslength;

	6218 UChar* viesrules;

	6219 int32_t viesruleslength;

	6220 char srules[500] = "[import vi][import es]";

	6221 UChar rules[500];

	6222 uint32_t length = 0;

	6223 int32_t itemCount;

	6224 int32_t i, k;

	6225 UChar32 start;

	6226 UChar32 end;

	6227 UChar str[500];

	6228 int32_t strLength;

	6229

	6230 uint8_t sk1[500];

	6231 uint8_t sk2[500];

	6232

	6233 UBool b;

	6234 USet* tailoredSet;

	6235 USet* importTailoredSet;

	6236

	6237

	6238 vicoll = ucol_open("vi", &status);

	6239 if(U_FAILURE(status)){

	6240 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr orName(status));

	6241 return;

	6242 }

	6243

	6244 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);

	6245 escoll = ucol_open("es", &status);

	6246 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);

	6247 viesrules = (UChar)uprv_malloc((viruleslength+esruleslength+1)sizeof(UChar *));

	6248 viesrules[0] = 0;

	6249 u_strcat(viesrules, virules);

	6250 u_strcat(viesrules, esrules);

	6251 viesruleslength = viruleslength + esruleslength;

	6252 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status);

	6253

	6254 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

	6255 length = u_unescape(srules, rules, 500);

	6256 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro r, &status);

	6257 if(U_FAILURE(status)){

	6258 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	6259 return;

	6260 }

	6261

	6262 tailoredSet = ucol_getTailoredSet(viescoll, &status);

	6263 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);

	6264

	6265 if(!uset_equals(tailoredSet, importTailoredSet)){

	6266 log_err("Tailored sets not equal");

	6267 }

	6268

	6269 uset_close(importTailoredSet);

	6270

	6271 itemCount = uset_getItemCount(tailoredSet);

	6272

	6273 for( i = 0; i < itemCount; i++){

	6274 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status );

	6275 if(strLength < 2){

	6276 for (; start <= end; start++){

	6277 k = 0;

	6278 U16_APPEND(str, k, 500, start, b);

	6279 ucol_getSortKey(viescoll, str, 1, sk1, 500);

	6280 ucol_getSortKey(importviescoll, str, 1, sk2, 500);

	6281 if(compare_uint8_t_arrays(sk1, sk2) != 0){

	6282 log_err("Sort key for %s not equal\n", str);

	6283 break;

	6284 }

	6285 }

	6286 }else{

	6287 ucol_getSortKey(viescoll, str, strLength, sk1, 500);

	6288 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);

	6289 if(compare_uint8_t_arrays(sk1, sk2) != 0){

	6290 log_err("ZZSort key for %s not equal\n", str);

	6291 break;

	6292 }

	6293

	6294 }

	6295 }

	6296

	6297 uset_close(tailoredSet);

	6298

	6299 uprv_free(viesrules);

	6300

	6301 ucol_close(vicoll);

	6302 ucol_close(escoll);

	6303 ucol_close(viescoll);

	6304 ucol_close(importviescoll);

	6305 }

	6306

	6307 static void TestImportWithType(void)

	6308 {

	6309 UCollator* vicoll;

	6310 UCollator* decoll;

	6311 UCollator* videcoll;

	6312 UCollator* importvidecoll;

	6313 UParseError error;

	6314 UErrorCode status = U_ZERO_ERROR;

	6315 const UChar* virules;

	6316 int32_t viruleslength;

	6317 const UChar* derules;

	6318 int32_t deruleslength;

	6319 UChar* viderules;

	6320 int32_t videruleslength;

	6321 const char srules[500] = "[import vi][import de-u-co-phonebk]";

	6322 UChar rules[500];

	6323 uint32_t length = 0;

	6324 int32_t itemCount;

	6325 int32_t i, k;

	6326 UChar32 start;

	6327 UChar32 end;

	6328 UChar str[500];

	6329 int32_t strLength;

	6330

	6331 uint8_t sk1[500];

	6332 uint8_t sk2[500];

	6333

	6334 USet* tailoredSet;

	6335 USet* importTailoredSet;

	6336

	6337 vicoll = ucol_open("vi", &status);

	6338 if(U_FAILURE(status)){

	6339 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	6340 return;

	6341 }

	6342 virules = ucol_getRules(vicoll, &viruleslength);

	6343 /* decoll = ucol_open("de@collation=phonebook", &status); */

	6344 decoll = ucol_open("de-u-co-phonebk", &status);

	6345 if(U_FAILURE(status)){

	6346 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	6347 return;

	6348 }

	6349

	6350

	6351 derules = ucol_getRules(decoll, &deruleslength);

	6352 viderules = (UChar)uprv_malloc((viruleslength+deruleslength+1)sizeof(UChar *));

	6353 viderules[0] = 0;

	6354 u_strcat(viderules, virules);

	6355 u_strcat(viderules, derules);

	6356 videruleslength = viruleslength + deruleslength;

	6357 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status);

	6358

	6359 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

	6360 length = u_unescape(srules, rules, 500);

	6361 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro r, &status);

	6362 if(U_FAILURE(status)){

	6363 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

	6364 return;

	6365 }

	6366

	6367 tailoredSet = ucol_getTailoredSet(videcoll, &status);

	6368 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);

	6369

	6370 if(!uset_equals(tailoredSet, importTailoredSet)){

	6371 log_err("Tailored sets not equal");

	6372 }

	6373

	6374 uset_close(importTailoredSet);

	6375

	6376 itemCount = uset_getItemCount(tailoredSet);

	6377

	6378 for( i = 0; i < itemCount; i++){

	6379 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status );

	6380 if(strLength < 2){

	6381 for (; start <= end; start++){

	6382 k = 0;

	6383 U16_APPEND_UNSAFE(str, k, start);

	6384 ucol_getSortKey(videcoll, str, 1, sk1, 500);

	6385 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);

	6386 if(compare_uint8_t_arrays(sk1, sk2) != 0){

	6387 log_err("Sort key for %s not equal\n", str);

	6388 break;

	6389 }

	6390 }

	6391 }else{

	6392 ucol_getSortKey(videcoll, str, strLength, sk1, 500);

	6393 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);

	6394 if(compare_uint8_t_arrays(sk1, sk2) != 0){

	6395 log_err("Sort key for %s not equal\n", str);

	6396 break;

	6397 }

	6398

	6399 }

	6400 }

	6401

	6402 uset_close(tailoredSet);

	6403

	6404 uprv_free(viderules);

	6405

	6406 ucol_close(videcoll);

	6407 ucol_close(importvidecoll);

	6408 ucol_close(vicoll);

	6409 ucol_close(decoll);

	6410

	6411 }

	6412

	6413

	6414 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)

	6415

	6416 void addMiscCollTest(TestNode** root)

	6417 {

	6418 TEST(TestRuleOptions);

	6419 TEST(TestBeforePrefixFailure);

	6420 TEST(TestContractionClosure);

	6421 TEST(TestPrefixCompose);

	6422 TEST(TestStrCollIdenticalPrefix);

	6423 TEST(TestPrefix);

	6424 TEST(TestNewJapanese);

	6425 /TEST(TestLimitations);/

	6426 TEST(TestNonChars);

	6427 TEST(TestExtremeCompression);

	6428 TEST(TestSurrogates);

	6429 TEST(TestVariableTopSetting);

	6430 TEST(TestBocsuCoverage);

	6431 TEST(TestCyrillicTailoring);

	6432 TEST(TestCase);

	6433 TEST(IncompleteCntTest);

	6434 TEST(BlackBirdTest);

	6435 TEST(FunkyATest);

	6436 TEST(BillFairmanTest);

	6437 TEST(RamsRulesTest);

	6438 TEST(IsTailoredTest);

	6439 TEST(TestCollations);

	6440 TEST(TestChMove);

	6441 TEST(TestImplicitTailoring);

	6442 TEST(TestFCDProblem);

	6443 TEST(TestEmptyRule);

	6444 /TEST(TestJ784);/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */

	6445 TEST(TestJ815);

	6446 /TEST(TestJ831);/ /* we changed lv locale */

	6447 TEST(TestBefore);

	6448 TEST(TestRedundantRules);

	6449 TEST(TestExpansionSyntax);

	6450 TEST(TestHangulTailoring);

	6451 TEST(TestUCARules);

	6452 TEST(TestIncrementalNormalize);

	6453 TEST(TestComposeDecompose);

	6454 TEST(TestCompressOverlap);

	6455 TEST(TestContraction);

	6456 TEST(TestExpansion);

	6457 /TEST(PrintMarkDavis);/ /* this test doesn't test - just prints sortkeys * /

	6458 /TEST(TestGetCaseBit);/ /this one requires internal things to be exported /

	6459 TEST(TestOptimize);

	6460 TEST(TestSuppressContractions);

	6461 TEST(Alexis2);

	6462 TEST(TestHebrewUCA);

	6463 TEST(TestPartialSortKeyTermination);

	6464 TEST(TestSettings);

	6465 TEST(TestEquals);

	6466 TEST(TestJ2726);

	6467 TEST(NullRule);

	6468 TEST(TestNumericCollation);

	6469 TEST(TestTibetanConformance);

	6470 TEST(TestPinyinProblem);

	6471 TEST(TestImplicitGeneration);

	6472 TEST(TestSeparateTrees);

	6473 TEST(TestBeforePinyin);

	6474 TEST(TestBeforeTightening);

	6475 /TEST(TestMoreBefore);/

	6476 TEST(TestTailorNULL);

	6477 TEST(TestUpperFirstQuaternary);

	6478 TEST(TestJ4960);

	6479 TEST(TestJ5223);

	6480 TEST(TestJ5232);

	6481 TEST(TestJ5367);

	6482 TEST(TestHiragana);

	6483 TEST(TestSortKeyConsistency);

	6484 TEST(TestVI5913); /* VI, RO tailored rules */

	6485 TEST(TestCroatianSortKey);

	6486 TEST(TestTailor6179);

	6487 TEST(TestUCAPrecontext);

	6488 TEST(TestOutOfBuffer5468);

	6489 TEST(TestSameStrengthList);

	6490

	6491 TEST(TestSameStrengthListQuoted);

	6492 TEST(TestSameStrengthListSupplemental);

	6493 TEST(TestSameStrengthListQwerty);

	6494 TEST(TestSameStrengthListQuotedQwerty);

	6495 TEST(TestSameStrengthListRanges);

	6496 TEST(TestSameStrengthListSupplementalRanges);

	6497 TEST(TestSpecialCharacters);

	6498 TEST(TestPrivateUseCharacters);

	6499 TEST(TestPrivateUseCharactersInList);

	6500 TEST(TestPrivateUseCharactersInRange);

	6501 TEST(TestInvalidListsAndRanges);

	6502 TEST(TestImport);

	6503 TEST(TestImportWithType);

	6504

	6505 TEST(TestBeforeRuleWithScriptReordering);

	6506 TEST(TestNonLeadBytesDuringCollationReordering);

	6507 TEST(TestReorderingAPI);

	6508 TEST(TestGreekFirstReorder);

	6509 TEST(TestGreekLastReorder);

	6510 TEST(TestNonScriptReorder);

	6511 TEST(TestHaniReorder);

	6512 }

	6513

	6514 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « icu46/source/test/cintltst/cloctst.c ('k') | icu46/source/test/cintltst/cmsgtst.h » ('j') | no next file with comments »