icu46/source/test/cintltst/citertst.c - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/test/cintltst/citertst.c

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /********************************************************************

	2 * COPYRIGHT:

	3 * Copyright (c) 1997-2010, International Business Machines Corporation and

	4 * others. All Rights Reserved.

	5 ********************************************************************/

	6 /******************************************************************************* *

	7 *

	8 * File CITERTST.C

	9 *

	10 * Modification History:

	11 * Date Name Description

	12 * Madhu Katragadda Ported for C API

	13 * 02/19/01 synwee Modified test case for new collation iterator

	14 ******************************************************************************** */

	15 /*

	16 * Collation Iterator tests.

	17 * (Let me reiterate my position...)

	18 */

	19

	20 #include "unicode/utypes.h"

	21

	22 #if !UCONFIG_NO_COLLATION

	23

	24 #include "unicode/ucol.h"

	25 #include "unicode/uloc.h"

	26 #include "unicode/uchar.h"

	27 #include "unicode/ustring.h"

	28 #include "unicode/putil.h"

	29 #include "callcoll.h"

	30 #include "cmemory.h"

	31 #include "cintltst.h"

	32 #include "citertst.h"

	33 #include "ccolltst.h"

	34 #include "filestrm.h"

	35 #include "cstring.h"

	36 #include "ucol_imp.h"

	37 #include "ucol_tok.h"

	38 #include "uparse.h"

	39 #include <stdio.h>

	40

	41 extern uint8_t ucol_uprv_getCaseBits(const UChar , uint32_t, UErrorCode );

	42

	43 void addCollIterTest(TestNode** root)

	44 {

	45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");

	46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset");

	47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText");

	48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");

	49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");

	50 addTest(root, &TestNormalizedUnicodeChar,

	51 "tscoll/citertst/TestNormalizedUnicodeChar");

	52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");

	53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672");

	54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");

	55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");

	56 addTest(root, &TestCEs, "tscoll/citertst/TestCEs");

	57 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");

	58 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow") ;

	59 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");

	60 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");

	61 }

	62

	63 /* The locales we support */

	64

	65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};

	66

	67 static void TestBug672() {

	68 UErrorCode status = U_ZERO_ERROR;

	69 UChar pattern[20];

	70 UChar text[50];

	71 int i;

	72 int result[3][3];

	73

	74 u_uastrcpy(pattern, "resume");

	75 u_uastrcpy(text, "Time to resume updating my resume.");

	76

	77 for (i = 0; i < 3; ++ i) {

	78 UCollator *coll = ucol_open(LOCALES[i], &status);

	79 UCollationElements *pitr = ucol_openElements(coll, pattern, -1,

	80 &status);

	81 UCollationElements *titer = ucol_openElements(coll, text, -1,

	82 &status);

	83 if (U_FAILURE(status)) {

	84 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",

	85 myErrorName(status));

	86 return;

	87 }

	88

	89 log_verbose("locale tested %s\n", LOCALES[i]);

	90

	91 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&

	92 U_SUCCESS(status)) {

	93 }

	94 if (U_FAILURE(status)) {

	95 log_err("ERROR: reversing collation iterator :%s\n",

	96 myErrorName(status));

	97 return;

	98 }

	99 ucol_reset(pitr);

	100

	101 ucol_setOffset(titer, u_strlen(pattern), &status);

	102 if (U_FAILURE(status)) {

	103 log_err("ERROR: setting offset in collator :%s\n",

	104 myErrorName(status));

	105 return;

	106 }

	107 result[i][0] = ucol_getOffset(titer);

	108 log_verbose("Text iterator set to offset %d\n", result[i][0]);

	109

	110 /* Use previous() */

	111 ucol_previous(titer, &status);

	112 result[i][1] = ucol_getOffset(titer);

	113 log_verbose("Current offset %d after previous\n", result[i][1]);

	114

	115 /* Add one to index */

	116 log_verbose("Adding one to current offset...\n");

	117 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);

	118 if (U_FAILURE(status)) {

	119 log_err("ERROR: setting offset in collator :%s\n",

	120 myErrorName(status));

	121 return;

	122 }

	123 result[i][2] = ucol_getOffset(titer);

	124 log_verbose("Current offset in text = %d\n", result[i][2]);

	125 ucol_closeElements(pitr);

	126 ucol_closeElements(titer);

	127 ucol_close(coll);

	128 }

	129

	130 if (uprv_memcmp(result[0], result[1], 3) != 0 \|\|

	131 uprv_memcmp(result[1], result[2], 3) != 0) {

	132 log_err("ERROR: Different locales have different offsets at the same cha racter\n");

	133 }

	134 }

	135

	136

	137

	138 /* Running this test with normalization enabled showed up a bug in the incremen tal

	139 normalization code. */

	140 static void TestBug672Normalize() {

	141 UErrorCode status = U_ZERO_ERROR;

	142 UChar pattern[20];

	143 UChar text[50];

	144 int i;

	145 int result[3][3];

	146

	147 u_uastrcpy(pattern, "resume");

	148 u_uastrcpy(text, "Time to resume updating my resume.");

	149

	150 for (i = 0; i < 3; ++ i) {

	151 UCollator *coll = ucol_open(LOCALES[i], &status);

	152 UCollationElements *pitr = NULL;

	153 UCollationElements *titer = NULL;

	154

	155 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	156

	157 pitr = ucol_openElements(coll, pattern, -1, &status);

	158 titer = ucol_openElements(coll, text, -1, &status);

	159 if (U_FAILURE(status)) {

	160 log_err_status(status, "ERROR: in creation of either the collator or the collation iterator :%s\n",

	161 myErrorName(status));

	162 return;

	163 }

	164

	165 log_verbose("locale tested %s\n", LOCALES[i]);

	166

	167 while (ucol_next(pitr, &status) != UCOL_NULLORDER &&

	168 U_SUCCESS(status)) {

	169 }

	170 if (U_FAILURE(status)) {

	171 log_err("ERROR: reversing collation iterator :%s\n",

	172 myErrorName(status));

	173 return;

	174 }

	175 ucol_reset(pitr);

	176

	177 ucol_setOffset(titer, u_strlen(pattern), &status);

	178 if (U_FAILURE(status)) {

	179 log_err("ERROR: setting offset in collator :%s\n",

	180 myErrorName(status));

	181 return;

	182 }

	183 result[i][0] = ucol_getOffset(titer);

	184 log_verbose("Text iterator set to offset %d\n", result[i][0]);

	185

	186 /* Use previous() */

	187 ucol_previous(titer, &status);

	188 result[i][1] = ucol_getOffset(titer);

	189 log_verbose("Current offset %d after previous\n", result[i][1]);

	190

	191 /* Add one to index */

	192 log_verbose("Adding one to current offset...\n");

	193 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);

	194 if (U_FAILURE(status)) {

	195 log_err("ERROR: setting offset in collator :%s\n",

	196 myErrorName(status));

	197 return;

	198 }

	199 result[i][2] = ucol_getOffset(titer);

	200 log_verbose("Current offset in text = %d\n", result[i][2]);

	201 ucol_closeElements(pitr);

	202 ucol_closeElements(titer);

	203 ucol_close(coll);

	204 }

	205

	206 if (uprv_memcmp(result[0], result[1], 3) != 0 \|\|

	207 uprv_memcmp(result[1], result[2], 3) != 0) {

	208 log_err("ERROR: Different locales have different offsets at the same cha racter\n");

	209 }

	210 }

	211

	212

	213

	214

	215 /**

	216 * Test for CollationElementIterator previous and next for the whole set of

	217 * unicode characters.

	218 */

	219 static void TestUnicodeChar()

	220 {

	221 UChar source[0x100];

	222 UCollator *en_us;

	223 UCollationElements *iter;

	224 UErrorCode status = U_ZERO_ERROR;

	225 UChar codepoint;

	226

	227 UChar *test;

	228 en_us = ucol_open("en_US", &status);

	229 if (U_FAILURE(status)){

	230 log_err_status(status, "ERROR: in creation of collation data using ucol_o pen()\n %s\n",

	231 myErrorName(status));

	232 return;

	233 }

	234

	235 for (codepoint = 1; codepoint < 0xFFFE;)

	236 {

	237 test = source;

	238

	239 while (codepoint % 0xFF != 0)

	240 {

	241 if (u_isdefined(codepoint))

	242 *(test ++) = codepoint;

	243 codepoint ++;

	244 }

	245

	246 if (u_isdefined(codepoint))

	247 *(test ++) = codepoint;

	248

	249 if (codepoint != 0xFFFF)

	250 codepoint ++;

	251

	252 *test = 0;

	253 iter=ucol_openElements(en_us, source, u_strlen(source), &status);

	254 if(U_FAILURE(status)){

	255 log_err("ERROR: in creation of collation element iterator using ucol_o penElements()\n %s\n",

	256 myErrorName(status));

	257 ucol_close(en_us);

	258 return;

	259 }

	260 /* A basic test to see if it's working at all */

	261 log_verbose("codepoint testing %x\n", codepoint);

	262 backAndForth(iter);

	263 ucol_closeElements(iter);

	264

	265 /* null termination test */

	266 iter=ucol_openElements(en_us, source, -1, &status);

	267 if(U_FAILURE(status)){

	268 log_err("ERROR: in creation of collation element iterator using ucol_o penElements()\n %s\n",

	269 myErrorName(status));

	270 ucol_close(en_us);

	271 return;

	272 }

	273 /* A basic test to see if it's working at all */

	274 backAndForth(iter);

	275 ucol_closeElements(iter);

	276 }

	277

	278 ucol_close(en_us);

	279 }

	280

	281 /**

	282 * Test for CollationElementIterator previous and next for the whole set of

	283 * unicode characters with normalization on.

	284 */

	285 static void TestNormalizedUnicodeChar()

	286 {

	287 UChar source[0x100];

	288 UCollator *th_th;

	289 UCollationElements *iter;

	290 UErrorCode status = U_ZERO_ERROR;

	291 UChar codepoint;

	292

	293 UChar *test;

	294 /* thai should have normalization on */

	295 th_th = ucol_open("th_TH", &status);

	296 if (U_FAILURE(status)){

	297 log_err_status(status, "ERROR: in creation of thai collation using ucol_ open()\n %s\n",

	298 myErrorName(status));

	299 return;

	300 }

	301

	302 for (codepoint = 1; codepoint < 0xFFFE;)

	303 {

	304 test = source;

	305

	306 while (codepoint % 0xFF != 0)

	307 {

	308 if (u_isdefined(codepoint))

	309 *(test ++) = codepoint;

	310 codepoint ++;

	311 }

	312

	313 if (u_isdefined(codepoint))

	314 *(test ++) = codepoint;

	315

	316 if (codepoint != 0xFFFF)

	317 codepoint ++;

	318

	319 *test = 0;

	320 iter=ucol_openElements(th_th, source, u_strlen(source), &status);

	321 if(U_FAILURE(status)){

	322 log_err("ERROR: in creation of collation element iterator using ucol_o penElements()\n %s\n",

	323 myErrorName(status));

	324 ucol_close(th_th);

	325 return;

	326 }

	327

	328 backAndForth(iter);

	329 ucol_closeElements(iter);

	330

	331 iter=ucol_openElements(th_th, source, -1, &status);

	332 if(U_FAILURE(status)){

	333 log_err("ERROR: in creation of collation element iterator using ucol_o penElements()\n %s\n",

	334 myErrorName(status));

	335 ucol_close(th_th);

	336 return;

	337 }

	338

	339 backAndForth(iter);

	340 ucol_closeElements(iter);

	341 }

	342

	343 ucol_close(th_th);

	344 }

	345

	346 /**

	347 * Test the incremental normalization

	348 */

	349 static void TestNormalization()

	350 {

	351 UErrorCode status = U_ZERO_ERROR;

	352 const char *str =

	353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0 315B < \\u0316\\u0300\\u0315";

	354 UCollator *coll;

	355 UChar rule[50];

	356 int rulelen = u_unescape(str, rule, 50);

	357 int count = 0;

	358 const char *testdata[] =

	359 {"\\u1ED9", "o\\u0323\\u0302",

	360 "\\u0300\\u0315", "\\u0315\\u0300",

	361 "A\\u0300\\u0315B", "A\\u0315\\u0300B",

	362 "A\\u0316\\u0315B", "A\\u0315\\u0316B",

	363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",

	364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",

	365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};

	366 int32_t srclen;

	367 UChar source[10];

	368 UCollationElements *iter;

	369

	370 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);

	371 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	372 if (U_FAILURE(status)){

	373 log_err_status(status, "ERROR: in creation of collator using ucol_openRu les()\n %s\n",

	374 myErrorName(status));

	375 return;

	376 }

	377

	378 srclen = u_unescape(testdata[0], source, 10);

	379 iter = ucol_openElements(coll, source, srclen, &status);

	380 backAndForth(iter);

	381 ucol_closeElements(iter);

	382

	383 srclen = u_unescape(testdata[1], source, 10);

	384 iter = ucol_openElements(coll, source, srclen, &status);

	385 backAndForth(iter);

	386 ucol_closeElements(iter);

	387

	388 while (count < 12) {

	389 srclen = u_unescape(testdata[count], source, 10);

	390 iter = ucol_openElements(coll, source, srclen, &status);

	391

	392 if (U_FAILURE(status)){

	393 log_err("ERROR: in creation of collator element iterator\n %s\n",

	394 myErrorName(status));

	395 return;

	396 }

	397 backAndForth(iter);

	398 ucol_closeElements(iter);

	399

	400 iter = ucol_openElements(coll, source, -1, &status);

	401

	402 if (U_FAILURE(status)){

	403 log_err("ERROR: in creation of collator element iterator\n %s\n",

	404 myErrorName(status));

	405 return;

	406 }

	407 backAndForth(iter);

	408 ucol_closeElements(iter);

	409 count ++;

	410 }

	411 ucol_close(coll);

	412 }

	413

	414 /**

	415 * Test for CollationElementIterator.previous()

	416 *

	417 * @bug 4108758 - Make sure it works with contracting characters

	418 *

	419 */

	420 static void TestPrevious()

	421 {

	422 UCollator *coll=NULL;

	423 UChar rule[50];

	424 UChar *source;

	425 UCollator c1, c2, *c3;

	426 UCollationElements *iter;

	427 UErrorCode status = U_ZERO_ERROR;

	428 UChar test1[50];

	429 UChar test2[50];

	430

	431 u_uastrcpy(test1, "What subset of all possible test cases?");

	432 u_uastrcpy(test2, "has the highest probability of detecting");

	433 coll = ucol_open("en_US", &status);

	434

	435 iter=ucol_openElements(coll, test1, u_strlen(test1), &status);

	436 log_verbose("English locale testing back and forth\n");

	437 if(U_FAILURE(status)){

	438 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",

	439 myErrorName(status));

	440 ucol_close(coll);

	441 return;

	442 }

	443 /* A basic test to see if it's working at all */

	444 backAndForth(iter);

	445 ucol_closeElements(iter);

	446 ucol_close(coll);

	447

	448 /* Test with a contracting character sequence */

	449 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");

	450 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N ULL, &status);

	451

	452 log_verbose("Contraction rule testing back and forth with no normalization\n ");

	453

	454 if (c1 == NULL \|\| U_FAILURE(status))

	455 {

	456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence \n %s\n",

	457 myErrorName(status));

	458 return;

	459 }

	460 source=(UChar)malloc(sizeof(UChar) 20);

	461 u_uastrcpy(source, "abchdcba");

	462 iter=ucol_openElements(c1, source, u_strlen(source), &status);

	463 if(U_FAILURE(status)){

	464 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	465 myErrorName(status));

	466 return;

	467 }

	468 backAndForth(iter);

	469 ucol_closeElements(iter);

	470 ucol_close(c1);

	471

	472 /* Test with an expanding character sequence */

	473 u_uastrcpy(rule, "&a < b < c/abd < d");

	474 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N ULL, &status);

	475 log_verbose("Expansion rule testing back and forth with no normalization\n") ;

	476 if (c2 == NULL \|\| U_FAILURE(status))

	477 {

	478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence .\n %s\n",

	479 myErrorName(status));

	480 return;

	481 }

	482 u_uastrcpy(source, "abcd");

	483 iter=ucol_openElements(c2, source, u_strlen(source), &status);

	484 if(U_FAILURE(status)){

	485 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	486 myErrorName(status));

	487 return;

	488 }

	489 backAndForth(iter);

	490 ucol_closeElements(iter);

	491 ucol_close(c2);

	492 /* Now try both */

	493 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");

	494 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENG TH,NULL, &status);

	495 log_verbose("Expansion/contraction rule testing back and forth with no norma lization\n");

	496

	497 if (c3 == NULL \|\| U_FAILURE(status))

	498 {

	499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence .\n %s\n",

	500 myErrorName(status));

	501 return;

	502 }

	503 u_uastrcpy(source, "abcdbchdc");

	504 iter=ucol_openElements(c3, source, u_strlen(source), &status);

	505 if(U_FAILURE(status)){

	506 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	507 myErrorName(status));

	508 return;

	509 }

	510 backAndForth(iter);

	511 ucol_closeElements(iter);

	512 ucol_close(c3);

	513 source[0] = 0x0e41;

	514 source[1] = 0x0e02;

	515 source[2] = 0x0e41;

	516 source[3] = 0x0e02;

	517 source[4] = 0x0e27;

	518 source[5] = 0x61;

	519 source[6] = 0x62;

	520 source[7] = 0x63;

	521 source[8] = 0;

	522

	523 coll = ucol_open("th_TH", &status);

	524 log_verbose("Thai locale testing back and forth with normalization\n");

	525 iter=ucol_openElements(coll, source, u_strlen(source), &status);

	526 if(U_FAILURE(status)){

	527 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	528 myErrorName(status));

	529 return;

	530 }

	531 backAndForth(iter);

	532 ucol_closeElements(iter);

	533 ucol_close(coll);

	534

	535 /* prev test */

	536 source[0] = 0x0061;

	537 source[1] = 0x30CF;

	538 source[2] = 0x3099;

	539 source[3] = 0x30FC;

	540 source[4] = 0;

	541

	542 coll = ucol_open("ja_JP", &status);

	543 log_verbose("Japanese locale testing back and forth with normalization\n");

	544 iter=ucol_openElements(coll, source, u_strlen(source), &status);

	545 if(U_FAILURE(status)){

	546 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	547 myErrorName(status));

	548 return;

	549 }

	550 backAndForth(iter);

	551 ucol_closeElements(iter);

	552 ucol_close(coll);

	553

	554 free(source);

	555 }

	556

	557 /**

	558 * Test for getOffset() and setOffset()

	559 */

	560 static void TestOffset()

	561 {

	562 UErrorCode status= U_ZERO_ERROR;

	563 UCollator *en_us=NULL;

	564 UCollationElements iter, pristine;

	565 int32_t offset;

	566 OrderAndOffset *orders;

	567 int32_t orderLength=0;

	568 int count = 0;

	569 UChar test1[50];

	570 UChar test2[50];

	571

	572 u_uastrcpy(test1, "What subset of all possible test cases?");

	573 u_uastrcpy(test2, "has the highest probability of detecting");

	574 en_us = ucol_open("en_US", &status);

	575 log_verbose("Testing getOffset and setOffset for collations\n");

	576 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);

	577 if(U_FAILURE(status)){

	578 log_err_status(status, "ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",

	579 myErrorName(status));

	580 ucol_close(en_us);

	581 return;

	582 }

	583

	584 /* testing boundaries */

	585 ucol_setOffset(iter, 0, &status);

	586 if (U_FAILURE(status) \|\| ucol_previous(iter, &status) != UCOL_NULLORDER) {

	587 log_err("Error: After setting offset to 0, we should be at the end "

	588 "of the backwards iteration");

	589 }

	590 ucol_setOffset(iter, u_strlen(test1), &status);

	591 if (U_FAILURE(status) \|\| ucol_next(iter, &status) != UCOL_NULLORDER) {

	592 log_err("Error: After setting offset to end of the string, we should "

	593 "be at the end of the backwards iteration");

	594 }

	595

	596 /* Run all the way through the iterator, then get the offset */

	597

	598 orders = getOrders(iter, &orderLength);

	599

	600 offset = ucol_getOffset(iter);

	601

	602 if (offset != u_strlen(test1))

	603 {

	604 log_err("offset at end != length %d vs %d\n", offset,

	605 u_strlen(test1) );

	606 }

	607

	608 /* Now set the offset back to the beginning and see if it works */

	609 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);

	610 if(U_FAILURE(status)){

	611 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	612 myErrorName(status));

	613 ucol_close(en_us);

	614 return;

	615 }

	616 status = U_ZERO_ERROR;

	617

	618 ucol_setOffset(iter, 0, &status);

	619 if (U_FAILURE(status))

	620 {

	621 log_err("setOffset failed. %s\n", myErrorName(status));

	622 }

	623 else

	624 {

	625 assertEqual(iter, pristine);

	626 }

	627

	628 ucol_closeElements(pristine);

	629 ucol_closeElements(iter);

	630 free(orders);

	631

	632 /* testing offsets in normalization buffer */

	633 test1[0] = 0x61;

	634 test1[1] = 0x300;

	635 test1[2] = 0x316;

	636 test1[3] = 0x62;

	637 test1[4] = 0;

	638 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

	639 iter = ucol_openElements(en_us, test1, 4, &status);

	640 if(U_FAILURE(status)){

	641 log_err("ERROR: in creation of collation element iterator using ucol_ope nElements()\n %s\n",

	642 myErrorName(status));

	643 ucol_close(en_us);

	644 return;

	645 }

	646

	647 count = 0;

	648 while (ucol_next(iter, &status) != UCOL_NULLORDER &&

	649 U_SUCCESS(status)) {

	650 switch (count) {

	651 case 0:

	652 if (ucol_getOffset(iter) != 1) {

	653 log_err("ERROR: Offset of iteration should be 1\n");

	654 }

	655 break;

	656 case 3:

	657 if (ucol_getOffset(iter) != 4) {

	658 log_err("ERROR: Offset of iteration should be 4\n");

	659 }

	660 break;

	661 default:

	662 if (ucol_getOffset(iter) != 3) {

	663 log_err("ERROR: Offset of iteration should be 3\n");

	664 }

	665 }

	666 count ++;

	667 }

	668

	669 ucol_reset(iter);

	670 count = 0;

	671 while (ucol_previous(iter, &status) != UCOL_NULLORDER &&

	672 U_SUCCESS(status)) {

	673 switch (count) {

	674 case 0:

	675 case 1:

	676 if (ucol_getOffset(iter) != 3) {

	677 log_err("ERROR: Offset of iteration should be 3\n");

	678 }

	679 break;

	680 case 2:

	681 if (ucol_getOffset(iter) != 1) {

	682 log_err("ERROR: Offset of iteration should be 1\n");

	683 }

	684 break;

	685 default:

	686 if (ucol_getOffset(iter) != 0) {

	687 log_err("ERROR: Offset of iteration should be 0\n");

	688 }

	689 }

	690 count ++;

	691 }

	692

	693 if(U_FAILURE(status)){

	694 log_err("ERROR: in iterating collation elements %s\n",

	695 myErrorName(status));

	696 }

	697

	698 ucol_closeElements(iter);

	699 ucol_close(en_us);

	700 }

	701

	702 /**

	703 * Test for setText()

	704 */

	705 static void TestSetText()

	706 {

	707 int32_t c,i;

	708 UErrorCode status = U_ZERO_ERROR;

	709 UCollator *en_us=NULL;

	710 UCollationElements iter1, iter2;

	711 UChar test1[50];

	712 UChar test2[50];

	713

	714 u_uastrcpy(test1, "What subset of all possible test cases?");

	715 u_uastrcpy(test2, "has the highest probability of detecting");

	716 en_us = ucol_open("en_US", &status);

	717 log_verbose("testing setText for Collation elements\n");

	718 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);

	719 if(U_FAILURE(status)){

	720 log_err_status(status, "ERROR: in creation of collation element iterator 1 using ucol_openElements()\n %s\n",

	721 myErrorName(status));

	722 ucol_close(en_us);

	723 return;

	724 }

	725 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);

	726 if(U_FAILURE(status)){

	727 log_err("ERROR: in creation of collation element iterator2 using ucol_op enElements()\n %s\n",

	728 myErrorName(status));

	729 ucol_close(en_us);

	730 return;

	731 }

	732

	733 /* Run through the second iterator just to exercise it */

	734 c = ucol_next(iter2, &status);

	735 i = 0;

	736

	737 while ( ++i < 10 && (c != UCOL_NULLORDER))

	738 {

	739 if (U_FAILURE(status))

	740 {

	741 log_err("iter2->next() returned an error. %s\n", myErrorName(status) );

	742 ucol_closeElements(iter2);

	743 ucol_closeElements(iter1);

	744 ucol_close(en_us);

	745 return;

	746 }

	747

	748 c = ucol_next(iter2, &status);

	749 }

	750

	751 /* Now set it to point to the same string as the first iterator */

	752 ucol_setText(iter2, test1, u_strlen(test1), &status);

	753 if (U_FAILURE(status))

	754 {

	755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status ));

	756 }

	757 else

	758 {

	759 assertEqual(iter1, iter2);

	760 }

	761

	762 /* Now set it to point to a null string with fake length*/

	763 ucol_setText(iter2, NULL, 2, &status);

	764 if (U_FAILURE(status))

	765 {

	766 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status) );

	767 }

	768 else

	769 {

	770 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {

	771 log_err("iter2 with null text expected to return UCOL_NULLORDER\n");

	772 }

	773 }

	774

	775 ucol_closeElements(iter2);

	776 ucol_closeElements(iter1);

	777 ucol_close(en_us);

	778 }

	779

	780 /** @bug 4108762

	781 * Test for getMaxExpansion()

	782 */

	783 static void TestMaxExpansion()

	784 {

	785 UErrorCode status = U_ZERO_ERROR;

	786 UCollator coll ;/= ucol_open("en_US", &status);*/

	787 UChar ch = 0;

	788 UChar32 unassigned = 0xEFFFD;

	789 UChar supplementary[2];

	790 uint32_t stringOffset = 0;

	791 UBool isError = FALSE;

	792 uint32_t sorder = 0;

	793 UCollationElements iter ;/= ucol_openElements(coll, &ch, 1, &status);*/

	794 uint32_t temporder = 0;

	795

	796 UChar rule[256];

	797 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");

	798 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,

	799 UCOL_DEFAULT_STRENGTH,NULL, &status);

	800 if(U_SUCCESS(status) && coll) {

	801 iter = ucol_openElements(coll, &ch, 1, &status);

	802

	803 while (ch < 0xFFFF && U_SUCCESS(status)) {

	804 int count = 1;

	805 uint32_t order;

	806 int32_t size = 0;

	807

	808 ch ++;

	809

	810 ucol_setText(iter, &ch, 1, &status);

	811 order = ucol_previous(iter, &status);

	812

	813 /* thai management */

	814 if (order == 0)

	815 order = ucol_previous(iter, &status);

	816

	817 while (U_SUCCESS(status) &&

	818 ucol_previous(iter, &status) != UCOL_NULLORDER) {

	819 count ++;

	820 }

	821

	822 size = ucol_getMaxExpansion(iter, order);

	823 if (U_FAILURE(status) \|\| size < count) {

	824 log_err("Failure at codepoint %d, maximum expansion count < %d\n",

	825 ch, count);

	826 }

	827 }

	828

	829 /* testing for exact max expansion */

	830 ch = 0;

	831 while (ch < 0x61) {

	832 uint32_t order;

	833 int32_t size;

	834 ucol_setText(iter, &ch, 1, &status);

	835 order = ucol_previous(iter, &status);

	836 size = ucol_getMaxExpansion(iter, order);

	837 if (U_FAILURE(status) \|\| size != 1) {

	838 log_err("Failure at codepoint %d, maximum expansion count < %d\n",

	839 ch, 1);

	840 }

	841 ch ++;

	842 }

	843

	844 ch = 0x63;

	845 ucol_setText(iter, &ch, 1, &status);

	846 temporder = ucol_previous(iter, &status);

	847

	848 if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 3) {

	849 log_err("Failure at codepoint %d, maximum expansion count != %d\n",

	850 ch, 3);

	851 }

	852

	853 ch = 0x64;

	854 ucol_setText(iter, &ch, 1, &status);

	855 temporder = ucol_previous(iter, &status);

	856

	857 if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 1) {

	858 log_err("Failure at codepoint %d, maximum expansion count != %d\n",

	859 ch, 3);

	860 }

	861

	862 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError);

	863 ucol_setText(iter, supplementary, 2, &status);

	864 sorder = ucol_previous(iter, &status);

	865

	866 if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, sorder) != 2) {

	867 log_err("Failure at codepoint %d, maximum expansion count < %d\n",

	868 ch, 2);

	869 }

	870

	871 /* testing jamo */

	872 ch = 0x1165;

	873

	874 ucol_setText(iter, &ch, 1, &status);

	875 temporder = ucol_previous(iter, &status);

	876 if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) > 3) {

	877 log_err("Failure at codepoint %d, maximum expansion count > %d\n",

	878 ch, 3);

	879 }

	880

	881 ucol_closeElements(iter);

	882 ucol_close(coll);

	883

	884 /* testing special jamo &a<\u1160 */

	885 rule[0] = 0x26;

	886 rule[1] = 0x71;

	887 rule[2] = 0x3c;

	888 rule[3] = 0x1165;

	889 rule[4] = 0x2f;

	890 rule[5] = 0x71;

	891 rule[6] = 0x71;

	892 rule[7] = 0x71;

	893 rule[8] = 0x71;

	894 rule[9] = 0;

	895

	896 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,

	897 UCOL_DEFAULT_STRENGTH,NULL, &status);

	898 iter = ucol_openElements(coll, &ch, 1, &status);

	899

	900 temporder = ucol_previous(iter, &status);

	901 if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 6) {

	902 log_err("Failure at codepoint %d, maximum expansion count > %d\n",

	903 ch, 5);

	904 }

	905

	906 ucol_closeElements(iter);

	907 ucol_close(coll);

	908 } else {

	909 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu s));

	910 }

	911

	912 }

	913

	914

	915 static void assertEqual(UCollationElements i1, UCollationElements i2)

	916 {

	917 int32_t c1, c2;

	918 int32_t count = 0;

	919 UErrorCode status = U_ZERO_ERROR;

	920

	921 do

	922 {

	923 c1 = ucol_next(i1, &status);

	924 c2 = ucol_next(i2, &status);

	925

	926 if (c1 != c2)

	927 {

	928 log_err("Error in iteration %d assetEqual between\n %d and %d, t hey are not equal\n", count, c1, c2);

	929 break;

	930 }

	931

	932 count += 1;

	933 }

	934 while (c1 != UCOL_NULLORDER);

	935 }

	936

	937 /**

	938 * Testing iterators with extremely small buffers

	939 */

	940 static void TestSmallBuffer()

	941 {

	942 UErrorCode status = U_ZERO_ERROR;

	943 UCollator *coll;

	944 UCollationElements *testiter,

	945 *iter;

	946 int32_t count = 0;

	947 OrderAndOffset *testorders,

	948 *orders;

	949

	950 UChar teststr[500];

	951 UChar str[] = {0x300, 0x31A, 0};

	952 /*

	953 creating a long string of decomposable characters,

	954 since by default the writable buffer is of size 256

	955 */

	956 while (count < 500) {

	957 if ((count & 1) == 0) {

	958 teststr[count ++] = 0x300;

	959 }

	960 else {

	961 teststr[count ++] = 0x31A;

	962 }

	963 }

	964

	965 coll = ucol_open("th_TH", &status);

	966 if(U_SUCCESS(status) && coll) {

	967 testiter = ucol_openElements(coll, teststr, 500, &status);

	968 iter = ucol_openElements(coll, str, 2, &status);

	969

	970 orders = getOrders(iter, &count);

	971 if (count != 2) {

	972 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n") ;

	973 }

	974

	975 /*

	976 this will rearrange the string data to 250 characters of 0x300 first then

	977 250 characters of 0x031A

	978 */

	979 testorders = getOrders(testiter, &count);

	980

	981 if (count != 500) {

	982 log_err("Error decomposition does not give the right sized collation e lements\n");

	983 }

	984

	985 while (count != 0) {

	986 /* UCA collation element for 0x0F76 */

	987 if ((count > 250 && testorders[-- count].order != orders[1].order) \|\|

	988 (count <= 250 && testorders[-- count].order != orders[0].order)) {

	989 log_err("Error decomposition does not give the right collation ele ment at %d count\n", count);

	990 break;

	991 }

	992 }

	993

	994 free(testorders);

	995 free(orders);

	996

	997 ucol_reset(testiter);

	998

	999 /* ensures closing of elements done properly to clear writable buffer */

	1000 ucol_next(testiter, &status);

	1001 ucol_next(testiter, &status);

	1002 ucol_closeElements(testiter);

	1003 ucol_closeElements(iter);

	1004 ucol_close(coll);

	1005 } else {

	1006 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu s));

	1007 }

	1008 }

	1009

	1010 /**

	1011 * Sniplets of code from genuca

	1012 */

	1013 static int32_t hex2num(char hex) {

	1014 if(hex>='0' && hex <='9') {

	1015 return hex-'0';

	1016 } else if(hex>='a' && hex<='f') {

	1017 return hex-'a'+10;

	1018 } else if(hex>='A' && hex<='F') {

	1019 return hex-'A'+10;

	1020 } else {

	1021 return 0;

	1022 }

	1023 }

	1024

	1025 /**

	1026 * Getting codepoints from a string

	1027 * @param str character string contain codepoints seperated by space and ended

	1028 * by a semicolon

	1029 * @param codepoints array for storage, assuming size > 5

	1030 * @return position at the end of the codepoint section

	1031 */

	1032 static char getCodePoints(char str, UChar codepoints, UChar contextCPs) {

	1033 UErrorCode errorCode = U_ZERO_ERROR;

	1034 char *semi = uprv_strchr(str, ';');

	1035 char *pipe = uprv_strchr(str, '\|');

	1036 char *s;

	1037 *codepoints = 0;

	1038 *contextCPs = 0;

	1039 if(semi == NULL) {

	1040 log_err("expected semicolon after code point string in FractionalUCA.txt %s\n", str);

	1041 return str;

	1042 }

	1043 if(pipe != NULL) {

	1044 int32_t contextLength;

	1045 *pipe = 0;

	1046 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode);

	1047 *pipe = '\|';

	1048 if(U_FAILURE(errorCode)) {

	1049 log_err("error parsing precontext string from FractionalUCA.txt %s\n ", str);

	1050 return str;

	1051 }

	1052 /* prepend the precontext string to the codepoints */

	1053 u_memcpy(codepoints, contextCPs, contextLength);

	1054 codepoints += contextLength;

	1055 /* start of the code point string */

	1056 s = pipe + 1;

	1057 } else {

	1058 s = str;

	1059 }

	1060 u_parseString(s, codepoints, 99, NULL, &errorCode);

	1061 if(U_FAILURE(errorCode)) {

	1062 log_err("error parsing code point string from FractionalUCA.txt %s\n", s tr);

	1063 return str;

	1064 }

	1065 return semi + 1;

	1066 }

	1067

	1068 /**

	1069 * Sniplets of code from genuca

	1070 */

	1071 static int32_t

	1072 readElement(char *from, char to, char separator, UErrorCode *status)

	1073 {

	1074 if (U_SUCCESS(*status)) {

	1075 char buffer[1024];

	1076 int32_t i = 0;

	1077 while (**from != separator) {

	1078 if (**from != ' ') {

	1079 (buffer+i++) = *from;

	1080 }

	1081 (*from)++;

	1082 }

	1083 (*from)++;

	1084 *(buffer + i) = 0;

	1085 strcpy(to, buffer);

	1086 return i/2;

	1087 }

	1088

	1089 return 0;

	1090 }

	1091

	1092 /**

	1093 * Sniplets of code from genuca

	1094 */

	1095 static uint32_t

	1096 getSingleCEValue(char primary, char secondary, char *tertiary,

	1097 UErrorCode *status)

	1098 {

	1099 if (U_SUCCESS(*status)) {

	1100 uint32_t value = 0;

	1101 char primsave = '\0';

	1102 char secsave = '\0';

	1103 char tersave = '\0';

	1104 char *primend = primary+4;

	1105 char *secend = secondary+2;

	1106 char *terend = tertiary+2;

	1107 uint32_t primvalue;

	1108 uint32_t secvalue;

	1109 uint32_t tervalue;

	1110

	1111 if (uprv_strlen(primary) > 4) {

	1112 primsave = *primend;

	1113 *primend = '\0';

	1114 }

	1115

	1116 if (uprv_strlen(secondary) > 2) {

	1117 secsave = *secend;

	1118 *secend = '\0';

	1119 }

	1120

	1121 if (uprv_strlen(tertiary) > 2) {

	1122 tersave = *terend;

	1123 *terend = '\0';

	1124 }

	1125

	1126 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;

	1127 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;

	1128 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;

	1129 if(primvalue <= 0xFF) {

	1130 primvalue <<= 8;

	1131 }

	1132

	1133 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)

	1134 \| ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)

	1135 \| (tervalue & UCOL_TERTIARYORDERMASK);

	1136

	1137 if(primsave!='\0') {

	1138 *primend = primsave;

	1139 }

	1140 if(secsave!='\0') {

	1141 *secend = secsave;

	1142 }

	1143 if(tersave!='\0') {

	1144 *terend = tersave;

	1145 }

	1146 return value;

	1147 }

	1148 return 0;

	1149 }

	1150

	1151 /**

	1152 * Getting collation elements generated from a string

	1153 * @param str character string contain collation elements contained in [] and

	1154 * seperated by space

	1155 * @param ce array for storage, assuming size > 20

	1156 * @param status error status

	1157 * @return position at the end of the codepoint section

	1158 */

	1159 static char * getCEs(char str, uint32_t ces, UErrorCode *status) {

	1160 char *pStartCP = uprv_strchr(str, '[');

	1161 int count = 0;

	1162 char *pEndCP;

	1163 char primary[100];

	1164 char secondary[100];

	1165 char tertiary[100];

	1166

	1167 while (*pStartCP == '[') {

	1168 uint32_t primarycount = 0;

	1169 uint32_t secondarycount = 0;

	1170 uint32_t tertiarycount = 0;

	1171 uint32_t CEi = 1;

	1172 pEndCP = strchr(pStartCP, ']');

	1173 if(pEndCP == NULL) {

	1174 break;

	1175 }

	1176 pStartCP ++;

	1177

	1178 primarycount = readElement(&pStartCP, primary, ',', status);

	1179 secondarycount = readElement(&pStartCP, secondary, ',', status);

	1180 tertiarycount = readElement(&pStartCP, tertiary, ']', status);

	1181

	1182 /* I want to get the CEs entered right here, including continuation */

	1183 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);

	1184 if (U_FAILURE(*status)) {

	1185 break;

	1186 }

	1187

	1188 while (2 * CEi < primarycount \|\| CEi < secondarycount \|\|

	1189 CEi < tertiarycount) {

	1190 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */

	1191 if (2 * CEi < primarycount) {

	1192 value \|= ((hex2num((primary + 4 CEi)) & 0xF) << 28);

	1193 value \|= ((hex2num((primary + 4 CEi + 1)) & 0xF) << 24);

	1194 }

	1195

	1196 if (2 * CEi + 1 < primarycount) {

	1197 value \|= ((hex2num((primary + 4 CEi + 2)) & 0xF) << 20);

	1198 value \|= ((hex2num((primary + 4 CEi + 3)) &0xF) << 16);

	1199 }

	1200

	1201 if (CEi < secondarycount) {

	1202 value \|= ((hex2num((secondary + 2 CEi)) & 0xF) << 12);

	1203 value \|= ((hex2num((secondary + 2 CEi + 1)) & 0xF) << 8);

	1204 }

	1205

	1206 if (CEi < tertiarycount) {

	1207 value \|= ((hex2num((tertiary + 2 CEi)) & 0x3) << 4);

	1208 value \|= (hex2num((tertiary + 2 CEi + 1)) & 0xF);

	1209 }

	1210

	1211 CEi ++;

	1212 ces[count ++] = value;

	1213 }

	1214

	1215 pStartCP = pEndCP + 1;

	1216 }

	1217 ces[count] = 0;

	1218 return pStartCP;

	1219 }

	1220

	1221 /**

	1222 * Getting the FractionalUCA.txt file stream

	1223 */

	1224 static FileStream * getFractionalUCA(void)

	1225 {

	1226 char newPath[256];

	1227 char backupPath[256];

	1228 FileStream *result = NULL;

	1229

	1230 /* Look inside ICU_DATA first */

	1231 uprv_strcpy(newPath, ctest_dataSrcDir());

	1232 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );

	1233 uprv_strcat(newPath, "FractionalUCA.txt");

	1234

	1235 /* As a fallback, try to guess where the source data was located

	1236 * at the time ICU was built, and look there.

	1237 */

	1238 #if defined (U_TOPSRCDIR)

	1239 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");

	1240 #else

	1241 {

	1242 UErrorCode errorCode = U_ZERO_ERROR;

	1243 strcpy(backupPath, loadTestData(&errorCode));

	1244 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_ SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");

	1245 }

	1246 #endif

	1247 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "Fractional UCA.txt");

	1248

	1249 result = T_FileStream_open(newPath, "rb");

	1250

	1251 if (result == NULL) {

	1252 result = T_FileStream_open(backupPath, "rb");

	1253 if (result == NULL) {

	1254 log_err("Failed to open either %s or %s\n", newPath, backupPath);

	1255 }

	1256 }

	1257 return result;

	1258 }

	1259

	1260 /**

	1261 * Testing the CEs returned by the iterator

	1262 */

	1263 static void TestCEs() {

	1264 FileStream *file = NULL;

	1265 char line[2048];

	1266 char *str;

	1267 UChar codepoints[10];

	1268 uint32_t ces[20];

	1269 UErrorCode status = U_ZERO_ERROR;

	1270 UCollator *coll = ucol_open("", &status);

	1271 uint32_t lineNo = 0;

	1272 UChar contextCPs[5];

	1273

	1274 if (U_FAILURE(status)) {

	1275 log_err_status(status, "Error in opening root collator -> %s\n", u_error Name(status));

	1276 return;

	1277 }

	1278

	1279 file = getFractionalUCA();

	1280

	1281 if (file == NULL) {

	1282 log_err("* unable to open input FractionalUCA.txt file *\n");

	1283 return;

	1284 }

	1285

	1286

	1287 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

	1288 int count = 0;

	1289 UCollationElements *iter;

	1290 int32_t preContextCeLen=0;

	1291 lineNo++;

	1292 /* skip this line if it is empty or a comment or is a return value

	1293 or start of some variable section */

	1294 if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|

	1295 line[0] == 0x000D \|\| line[0] == '[') {

	1296 continue;

	1297 }

	1298

	1299 str = getCodePoints(line, codepoints, contextCPs);

	1300

	1301 /* these are 'fake' codepoints in the fractional UCA, and are used just

	1302 * for positioning of indirect values. They should not go through this

	1303 * test.

	1304 */

	1305 if(*codepoints == 0xFDD0) {

	1306 continue;

	1307 }

	1308 if (*contextCPs != 0) {

	1309 iter = ucol_openElements(coll, contextCPs, -1, &status);

	1310 if (U_FAILURE(status)) {

	1311 log_err("Error in opening collation elements\n");

	1312 break;

	1313 }

	1314 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t )UCOL_NULLORDER) {

	1315 preContextCeLen++;

	1316 }

	1317 ucol_closeElements(iter);

	1318 }

	1319

	1320 getCEs(str, ces+preContextCeLen, &status);

	1321 if (U_FAILURE(status)) {

	1322 log_err("Error in parsing collation elements in FractionalUCA.txt\n" );

	1323 break;

	1324 }

	1325 iter = ucol_openElements(coll, codepoints, -1, &status);

	1326 if (U_FAILURE(status)) {

	1327 log_err("Error in opening collation elements\n");

	1328 break;

	1329 }

	1330 for (;;) {

	1331 uint32_t ce = (uint32_t)ucol_next(iter, &status);

	1332 if (ce == 0xFFFFFFFF) {

	1333 ce = 0;

	1334 }

	1335 /* we now unconditionally reorder Thai/Lao prevowels, so this

	1336 * test would fail if we don't skip here.

	1337 */

	1338 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {

	1339 continue;

	1340 }

	1341 if (ce != ces[count] \|\| U_FAILURE(status)) {

	1342 log_err("Collation elements in FractionalUCA.txt and iterators d o not match!\n");

	1343 break;

	1344 }

	1345 if (ces[count] == 0) {

	1346 break;

	1347 }

	1348 count ++;

	1349 }

	1350 ucol_closeElements(iter);

	1351 }

	1352

	1353 T_FileStream_close(file);

	1354 ucol_close(coll);

	1355 }

	1356

	1357 /**

	1358 * Testing the discontigous contractions

	1359 */

	1360 static void TestDiscontiguos() {

	1361 const char *rulestr =

	1362 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";

	1363 UChar rule[50];

	1364 int rulelen = u_unescape(rulestr, rule, 50);

	1365 const char *src[] = {

	1366 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC",

	1367 /* base character blocked */

	1368 "XD\\u0300", "XD\\u0300\\u0315",

	1369 /* non blocking combining character */

	1370 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",

	1371 /* blocking combining character */

	1372 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",

	1373 /* contraction prefix */

	1374 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",

	1375 "X\\u0300\\u031A\\u0315",

	1376 /* ends not with a contraction character */

	1377 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",

	1378 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"

	1379 };

	1380 const char *tgt[] = {

	1381 /* non blocking combining character */

	1382 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",

	1383 /* base character blocked */

	1384 "X D \\u0300", "X D \\u0300\\u0315",

	1385 /* non blocking combining character */

	1386 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",

	1387 /* blocking combining character */

	1388 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",

	1389 /* contraction prefix */

	1390 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",

	1391 "X\\u0300 \\u031A \\u0315",

	1392 /* ends not with a contraction character */

	1393 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",

	1394 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"

	1395 };

	1396 int size = 20;

	1397 UCollator *coll;

	1398 UErrorCode status = U_ZERO_ERROR;

	1399 int count = 0;

	1400 UCollationElements *iter;

	1401 UCollationElements *resultiter;

	1402

	1403 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,N ULL, &status);

	1404 iter = ucol_openElements(coll, rule, 1, &status);

	1405 resultiter = ucol_openElements(coll, rule, 1, &status);

	1406

	1407 if (U_FAILURE(status)) {

	1408 log_err_status(status, "Error opening collation rules -> %s\n", u_errorN ame(status));

	1409 return;

	1410 }

	1411

	1412 while (count < size) {

	1413 UChar str[20];

	1414 UChar tstr[20];

	1415 int strLen = u_unescape(src[count], str, 20);

	1416 UChar *s;

	1417

	1418 ucol_setText(iter, str, strLen, &status);

	1419 if (U_FAILURE(status)) {

	1420 log_err("Error opening collation iterator\n");

	1421 return;

	1422 }

	1423

	1424 u_unescape(tgt[count], tstr, 20);

	1425 s = tstr;

	1426

	1427 log_verbose("count %d\n", count);

	1428

	1429 for (;;) {

	1430 uint32_t ce;

	1431 UChar *e = u_strchr(s, 0x20);

	1432 if (e == 0) {

	1433 e = u_strchr(s, 0);

	1434 }

	1435 ucol_setText(resultiter, s, (int32_t)(e - s), &status);

	1436 ce = ucol_next(resultiter, &status);

	1437 if (U_FAILURE(status)) {

	1438 log_err("Error manipulating collation iterator\n");

	1439 return;

	1440 }

	1441 while (ce != UCOL_NULLORDER) {

	1442 if (ce != (uint32_t)ucol_next(iter, &status) \|\|

	1443 U_FAILURE(status)) {

	1444 log_err("Discontiguos contraction test mismatch\n");

	1445 return;

	1446 }

	1447 ce = ucol_next(resultiter, &status);

	1448 if (U_FAILURE(status)) {

	1449 log_err("Error getting next collation element\n");

	1450 return;

	1451 }

	1452 }

	1453 s = e + 1;

	1454 if (*e == 0) {

	1455 break;

	1456 }

	1457 }

	1458 ucol_reset(iter);

	1459 backAndForth(iter);

	1460 count ++;

	1461 }

	1462 ucol_closeElements(resultiter);

	1463 ucol_closeElements(iter);

	1464 ucol_close(coll);

	1465 }

	1466

	1467 static void TestCEBufferOverflow()

	1468 {

	1469 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];

	1470 UErrorCode status = U_ZERO_ERROR;

	1471 UChar rule[10];

	1472 UCollator *coll;

	1473 UCollationElements *iter;

	1474

	1475 u_uastrcpy(rule, "&z < AB");

	1476 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);

	1477 if (U_FAILURE(status)) {

	1478 log_err_status(status, "Rule based collator not created for testing ce b uffer overflow -> %s\n", u_errorName(status));

	1479 return;

	1480 }

	1481

	1482 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic

	1483 test. this will cause an overflow in getPrev */

	1484 str[0] = 0x0041; /* 'A' */

	1485 /uprv_memset(str + 1, 0xE0, sizeof(UChar) UCOL_EXPAND_CE_BUFFER_SIZE);*/

	1486 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);

	1487 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */

	1488 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,

	1489 &status);

	1490 if (ucol_previous(iter, &status) == UCOL_NULLORDER \|\|

	1491 status == U_BUFFER_OVERFLOW_ERROR) {

	1492 log_err("CE buffer should not overflow with long string of trail surroga tes\n");

	1493 }

	1494 ucol_closeElements(iter);

	1495 ucol_close(coll);

	1496 }

	1497

	1498 /**

	1499 * Checking collation element validity.

	1500 */

	1501 #define MAX_CODEPOINTS_TO_SHOW 10

	1502 static void showCodepoints(const UChar codepoints, int length, char codepoint Text) {

	1503 int i, lengthToUse = length;

	1504 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) {

	1505 lengthToUse = MAX_CODEPOINTS_TO_SHOW;

	1506 }

	1507 for (i = 0; i < lengthToUse; ++i) {

	1508 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++);

	1509 if (bytesWritten <= 0) {

	1510 break;

	1511 }

	1512 codepointText += bytesWritten;

	1513 }

	1514 if (i < length) {

	1515 sprintf(codepointText, " ...");

	1516 }

	1517 }

	1518

	1519 static UBool checkCEValidity(const UCollator coll, const UChar codepoints,

	1520 int length)

	1521 {

	1522 UErrorCode status = U_ZERO_ERROR;

	1523 UCollationElements *iter = ucol_openElements(coll, codepoints, length,

	1524 &status);

	1525 UBool result = FALSE;

	1526 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE;

	1527 const char * collLocale;

	1528

	1529 if (U_FAILURE(status)) {

	1530 log_err("Error creating iterator for testing validity\n");

	1531 return FALSE;

	1532 }

	1533 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status);

	1534 if (U_FAILURE(status) \|\| collLocale==NULL) {

	1535 status = U_ZERO_ERROR;

	1536 collLocale = "?";

	1537 }

	1538

	1539 for (;;) {

	1540 uint32_t ce = ucol_next(iter, &status);

	1541 uint32_t primary, p1, p2, secondary, tertiary;

	1542 if (ce == UCOL_NULLORDER) {

	1543 result = TRUE;

	1544 break;

	1545 }

	1546 if (ce == 0) {

	1547 continue;

	1548 }

	1549 if (ce == 0x02000202) {

	1550 /* special CE for merge-sort character */

	1551 if (codepoints == 0xFFFE / && length == 1 */) {

	1552 /*

	1553 * Note: We should check for length==1 but the token parser appe ars

	1554 * to give us trailing NUL characters.

	1555 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTail oredSet()

	1556 * rather than the internal collation rule p arser

	1557 */

	1558 continue;

	1559 } else {

	1560 log_err("Special 02/02/02 weight for code point U+%04X [len %d] != U+FFFE\n",

	1561 (int)*codepoints, (int)length);

	1562 break;

	1563 }

	1564 }

	1565 primary = UCOL_PRIMARYORDER(ce);

	1566 p1 = primary >> 8;

	1567 p2 = primary & 0xFF;

	1568 secondary = UCOL_SECONDARYORDER(ce);

	1569 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION;

	1570

	1571 if (!isContinuation(ce)) {

	1572 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {

	1573 log_err("Empty CE %08lX except for case bits\n", (long)ce);

	1574 break;

	1575 }

	1576 if (p1 == 0) {

	1577 if (p2 != 0) {

	1578 log_err("Primary 00 xx in %08lX\n", (long)ce);

	1579 break;

	1580 }

	1581 primaryDone = TRUE;

	1582 } else {

	1583 if (p1 <= 2 \|\| p1 >= 0xF0) {

	1584 /* Primary first bytes F0..FF are specials. */

	1585 log_err("Primary first byte of %08lX out of range\n", (long) ce);

	1586 break;

	1587 }

	1588 if (p2 == 0) {

	1589 primaryDone = TRUE;

	1590 } else {

	1591 if (p2 <= 3 \|\| p2 >= 0xFF) {

	1592 /* Primary second bytes 03 and FF are sort key compressi on terminators. */

	1593 log_err("Primary second byte of %08lX out of range\n", ( long)ce);

	1594 break;

	1595 }

	1596 primaryDone = FALSE;

	1597 }

	1598 }

	1599 if (secondary == 0) {

	1600 if (primary != 0) {

	1601 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce);

	1602 break;

	1603 }

	1604 secondaryDone = TRUE;

	1605 } else {

	1606 if (secondary <= 2 \|\|

	1607 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COM MON + 0x80))

	1608 ) {

	1609 /* Secondary first bytes common+1..+0x80 are used for sort k ey compression. */

	1610 log_err("Secondary byte of %08lX out of range\n", (long)ce);

	1611 break;

	1612 }

	1613 secondaryDone = FALSE;

	1614 }

	1615 if (tertiary == 0) {

	1616 /* We know that ce != 0. */

	1617 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n", (long)ce);

	1618 break;

	1619 }

	1620 if (tertiary <= 2) {

	1621 log_err("Tertiary byte of %08lX out of range\n", (long)ce);

	1622 break;

	1623 }

	1624 tertiaryDone = FALSE;

	1625 } else {

	1626 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) {

	1627 log_err("Empty continuation %08lX\n", (long)ce);

	1628 break;

	1629 }

	1630 if (primaryDone && primary != 0) {

	1631 log_err("Primary was done but continues in %08lX\n", (long)ce);

	1632 break;

	1633 }

	1634 if (p1 == 0) {

	1635 if (p2 != 0) {

	1636 log_err("Primary 00 xx in %08lX\n", (long)ce);

	1637 break;

	1638 }

	1639 primaryDone = TRUE;

	1640 } else {

	1641 if (p1 <= 2) {

	1642 log_err("Primary first byte of %08lX out of range\n", (long) ce);

	1643 break;

	1644 }

	1645 if (p2 == 0) {

	1646 primaryDone = TRUE;

	1647 } else {

	1648 if (p2 <= 3) {

	1649 log_err("Primary second byte of %08lX out of range\n", ( long)ce);

	1650 break;

	1651 }

	1652 }

	1653 }

	1654 if (secondaryDone && secondary != 0) {

	1655 log_err("Secondary was done but continues in %08lX\n", (long)ce) ;

	1656 break;

	1657 }

	1658 if (secondary == 0) {

	1659 secondaryDone = TRUE;

	1660 } else {

	1661 if (secondary <= 2) {

	1662 log_err("Secondary byte of %08lX out of range\n", (long)ce);

	1663 break;

	1664 }

	1665 }

	1666 if (tertiaryDone && tertiary != 0) {

	1667 log_err("Tertiary was done but continues in %08lX\n", (long)ce);

	1668 break;

	1669 }

	1670 if (tertiary == 0) {

	1671 tertiaryDone = TRUE;

	1672 } else if (tertiary <= 2) {

	1673 log_err("Tertiary byte of %08lX out of range\n", (long)ce);

	1674 break;

	1675 }

	1676 }

	1677 }

	1678 if (!result) {

	1679 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5];

	1680 showCodepoints(codepoints, length, codepointText);

	1681 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText );

	1682 }

	1683 ucol_closeElements(iter);

	1684 return result;

	1685 }

	1686

	1687 static void TestCEValidity()

	1688 {

	1689 /* testing UCA collation elements */

	1690 UErrorCode status = U_ZERO_ERROR;

	1691 /* en_US has no tailorings */

	1692 UCollator *coll = ucol_open("root", &status);

	1693 /* tailored locales */

	1694 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh __PINYIN"};

	1695 const char *loc;

	1696 FileStream *file = NULL;

	1697 char line[2048];

	1698 UChar codepoints[11];

	1699 int count = 0;

	1700 int maxCount = 0;

	1701 UChar contextCPs[3];

	1702 UChar32 c;

	1703 UParseError parseError;

	1704 if (U_FAILURE(status)) {

	1705 log_err_status(status, "en_US collator creation failed -> %s\n", u_error Name(status));

	1706 return;

	1707 }

	1708 log_verbose("Testing UCA elements\n");

	1709 file = getFractionalUCA();

	1710 if (file == NULL) {

	1711 log_err("Fractional UCA data can not be opened\n");

	1712 return;

	1713 }

	1714

	1715 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

	1716 if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|

	1717 line[0] == 0x000D \|\| line[0] == '[') {

	1718 continue;

	1719 }

	1720

	1721 getCodePoints(line, codepoints, contextCPs);

	1722 checkCEValidity(coll, codepoints, u_strlen(codepoints));

	1723 }

	1724

	1725 log_verbose("Testing UCA elements for the whole range of unicode characters\ n");

	1726 for (c = 0; c <= 0xffff; ++c) {

	1727 if (u_isdefined(c)) {

	1728 codepoints[0] = (UChar)c;

	1729 checkCEValidity(coll, codepoints, 1);

	1730 }

	1731 }

	1732 for (; c <= 0x10ffff; ++c) {

	1733 if (u_isdefined(c)) {

	1734 int32_t i = 0;

	1735 U16_APPEND_UNSAFE(codepoints, i, c);

	1736 checkCEValidity(coll, codepoints, i);

	1737 }

	1738 }

	1739

	1740 ucol_close(coll);

	1741

	1742 /* testing tailored collation elements */

	1743 log_verbose("Testing tailored elements\n");

	1744 if(getTestOption(QUICK_OPTION)) {

	1745 maxCount = sizeof(locale)/sizeof(locale[0]);

	1746 } else {

	1747 maxCount = uloc_countAvailable();

	1748 }

	1749 while (count < maxCount) {

	1750 const UChar *rules = NULL,

	1751 *current = NULL;

	1752 UChar *rulesCopy = NULL;

	1753 int32_t ruleLen = 0;

	1754

	1755 uint32_t chOffset = 0;

	1756 uint32_t chLen = 0;

	1757 uint32_t exOffset = 0;

	1758 uint32_t exLen = 0;

	1759 uint32_t prefixOffset = 0;

	1760 uint32_t prefixLen = 0;

	1761 UBool startOfRules = TRUE;

	1762 UColOptionSet opts;

	1763

	1764 UColTokenParser src;

	1765 uint32_t strength = 0;

	1766 uint16_t specs = 0;

	1767 if(getTestOption(QUICK_OPTION)) {

	1768 loc = locale[count];

	1769 } else {

	1770 loc = uloc_getAvailable(count);

	1771 if(!hasCollationElements(loc)) {

	1772 count++;

	1773 continue;

	1774 }

	1775 }

	1776

	1777 uprv_memset(&src, 0, sizeof(UColTokenParser));

	1778

	1779 log_verbose("Testing CEs for %s\n", loc);

	1780

	1781 coll = ucol_open(loc, &status);

	1782 if (U_FAILURE(status)) {

	1783 log_err("%s collator creation failed\n", loc);

	1784 return;

	1785 }

	1786

	1787 src.opts = &opts;

	1788 rules = ucol_getRules(coll, &ruleLen);

	1789

	1790 if (ruleLen > 0) {

	1791 rulesCopy = (UChar *)uprv_malloc((ruleLen +

	1792 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));

	1793 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));

	1794 src.current = src.source = rulesCopy;

	1795 src.end = rulesCopy + ruleLen;

	1796 src.extraCurrent = src.end;

	1797 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	1798

	1799 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse NextToken can cause the pointer to

	1800 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

	1801 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parse Error,&status)) != NULL) {

	1802 strength = src.parsedToken.strength;

	1803 chOffset = src.parsedToken.charsOffset;

	1804 chLen = src.parsedToken.charsLen;

	1805 exOffset = src.parsedToken.extensionOffset;

	1806 exLen = src.parsedToken.extensionLen;

	1807 prefixOffset = src.parsedToken.prefixOffset;

	1808 prefixLen = src.parsedToken.prefixLen;

	1809 specs = src.parsedToken.flags;

	1810

	1811 startOfRules = FALSE;

	1812 uprv_memcpy(codepoints, src.source + chOffset,

	1813 chLen * sizeof(UChar));

	1814 codepoints[chLen] = 0;

	1815 checkCEValidity(coll, codepoints, chLen);

	1816 }

	1817 uprv_free(src.source);

	1818 }

	1819

	1820 ucol_close(coll);

	1821 count ++;

	1822 }

	1823 T_FileStream_close(file);

	1824 }

	1825

	1826 static void printSortKeyError(const UChar *codepoints, int length,

	1827 uint8_t *sortkey, int sklen)

	1828 {

	1829 int count = 0;

	1830 log_err("Sortkey not valid for ");

	1831 while (length > 0) {

	1832 log_err("0x%04x ", *codepoints);

	1833 length --;

	1834 codepoints ++;

	1835 }

	1836 log_err("\nSortkey : ");

	1837 while (count < sklen) {

	1838 log_err("0x%02x ", sortkey[count]);

	1839 count ++;

	1840 }

	1841 log_err("\n");

	1842 }

	1843

	1844 /**

	1845 * Checking sort key validity for all levels

	1846 */

	1847 static UBool checkSortKeyValidity(UCollator *coll,

	1848 const UChar *codepoints,

	1849 int length)

	1850 {

	1851 UErrorCode status = U_ZERO_ERROR;

	1852 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,

	1853 UCOL_TERTIARY, UCOL_QUATERNARY,

	1854 UCOL_IDENTICAL};

	1855 int strengthlen = 5;

	1856 int strengthIndex = 0;

	1857 int caselevel = 0;

	1858

	1859 while (caselevel < 1) {

	1860 if (caselevel == 0) {

	1861 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);

	1862 }

	1863 else {

	1864 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);

	1865 }

	1866

	1867 while (strengthIndex < strengthlen) {

	1868 int count01 = 0;

	1869 uint32_t count = 0;

	1870 uint8_t sortkey[128];

	1871 uint32_t sklen;

	1872

	1873 ucol_setStrength(coll, strength[strengthIndex]);

	1874 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);

	1875 while (sortkey[count] != 0) {

	1876 if (sortkey[count] == 2 \|\| (sortkey[count] == 3 && count01 > 0 & & strengthIndex != 4)) {

	1877 printSortKeyError(codepoints, length, sortkey, sklen);

	1878 return FALSE;

	1879 }

	1880 if (sortkey[count] == 1) {

	1881 count01 ++;

	1882 }

	1883 count ++;

	1884 }

	1885

	1886 if (count + 1 != sklen \|\| (count01 != strengthIndex + caselevel)) {

	1887 printSortKeyError(codepoints, length, sortkey, sklen);

	1888 return FALSE;

	1889 }

	1890 strengthIndex ++;

	1891 }

	1892 caselevel ++;

	1893 }

	1894 return TRUE;

	1895 }

	1896

	1897 static void TestSortKeyValidity(void)

	1898 {

	1899 /* testing UCA collation elements */

	1900 UErrorCode status = U_ZERO_ERROR;

	1901 /* en_US has no tailorings */

	1902 UCollator *coll = ucol_open("en_US", &status);

	1903 /* tailored locales */

	1904 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};

	1905 FileStream *file = NULL;

	1906 char line[2048];

	1907 UChar codepoints[10];

	1908 int count = 0;

	1909 UChar contextCPs[5];

	1910 UParseError parseError;

	1911 if (U_FAILURE(status)) {

	1912 log_err_status(status, "en_US collator creation failed -> %s\n", u_error Name(status));

	1913 return;

	1914 }

	1915 log_verbose("Testing UCA elements\n");

	1916 file = getFractionalUCA();

	1917 if (file == NULL) {

	1918 log_err("Fractional UCA data can not be opened\n");

	1919 return;

	1920 }

	1921

	1922 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {

	1923 if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|

	1924 line[0] == 0x000D \|\| line[0] == '[') {

	1925 continue;

	1926 }

	1927

	1928 getCodePoints(line, codepoints, contextCPs);

	1929 if(codepoints[0] == 0xFFFE) {

	1930 /* Skip special merge-sort character U+FFFE which has otherwise ille gal 02 weight bytes. */

	1931 continue;

	1932 }

	1933 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));

	1934 }

	1935

	1936 log_verbose("Testing UCA elements for the whole range of unicode characters\ n");

	1937 codepoints[0] = 0;

	1938

	1939 while (codepoints[0] < 0xFFFF) {

	1940 if (u_isdefined((UChar32)codepoints[0])) {

	1941 checkSortKeyValidity(coll, codepoints, 1);

	1942 }

	1943 codepoints[0] ++;

	1944 }

	1945

	1946 ucol_close(coll);

	1947

	1948 /* testing tailored collation elements */

	1949 log_verbose("Testing tailored elements\n");

	1950 while (count < 5) {

	1951 const UChar *rules = NULL,

	1952 *current = NULL;

	1953 UChar *rulesCopy = NULL;

	1954 int32_t ruleLen = 0;

	1955

	1956 uint32_t chOffset = 0;

	1957 uint32_t chLen = 0;

	1958 uint32_t exOffset = 0;

	1959 uint32_t exLen = 0;

	1960 uint32_t prefixOffset = 0;

	1961 uint32_t prefixLen = 0;

	1962 UBool startOfRules = TRUE;

	1963 UColOptionSet opts;

	1964

	1965 UColTokenParser src;

	1966 uint32_t strength = 0;

	1967 uint16_t specs = 0;

	1968

	1969 uprv_memset(&src, 0, sizeof(UColTokenParser));

	1970

	1971 coll = ucol_open(locale[count], &status);

	1972 if (U_FAILURE(status)) {

	1973 log_err("%s collator creation failed\n", locale[count]);

	1974 return;

	1975 }

	1976

	1977 src.opts = &opts;

	1978 rules = ucol_getRules(coll, &ruleLen);

	1979

	1980 if (ruleLen > 0) {

	1981 rulesCopy = (UChar *)uprv_malloc((ruleLen +

	1982 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));

	1983 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));

	1984 src.current = src.source = rulesCopy;

	1985 src.end = rulesCopy + ruleLen;

	1986 src.extraCurrent = src.end;

	1987 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;

	1988

	1989 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse NextToken can cause the pointer to

	1990 the rules copy in src.source to get reallocated, freeing the original pointer in rulesCopy */

	1991 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseE rror, &status)) != NULL) {

	1992 strength = src.parsedToken.strength;

	1993 chOffset = src.parsedToken.charsOffset;

	1994 chLen = src.parsedToken.charsLen;

	1995 exOffset = src.parsedToken.extensionOffset;

	1996 exLen = src.parsedToken.extensionLen;

	1997 prefixOffset = src.parsedToken.prefixOffset;

	1998 prefixLen = src.parsedToken.prefixLen;

	1999 specs = src.parsedToken.flags;

	2000

	2001 startOfRules = FALSE;

	2002 uprv_memcpy(codepoints, src.source + chOffset,

	2003 chLen * sizeof(UChar));

	2004 codepoints[chLen] = 0;

	2005 if(codepoints[0] == 0xFFFE) {

	2006 /* Skip special merge-sort character U+FFFE which has otherw ise illegal 02 weight bytes. */

	2007 continue;

	2008 }

	2009 checkSortKeyValidity(coll, codepoints, chLen);

	2010 }

	2011 uprv_free(src.source);

	2012 }

	2013

	2014 ucol_close(coll);

	2015 count ++;

	2016 }

	2017 T_FileStream_close(file);

	2018 }

	2019

	2020 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « icu46/source/test/cintltst/citertst.h ('k') | icu46/source/test/cintltst/cjaptst.h » ('j') | no next file with comments »