source/test/cintltst/cmsccoll.c - Issue 2435373002: Delete source/test

Side by Side Diff: source/test/cintltst/cmsccoll.c

Issue 2435373002: Delete source/test (Closed)

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
	(Empty)
1

2 /********************************************************************

3 * COPYRIGHT:

4 * Copyright (c) 2001-2015, International Business Machines Corporation and

5 * others. All Rights Reserved.

6 ********************************************************************/

7 /*******************************************************************************

8 *

9 * File cmsccoll.C

10 *

11 *******************************************************************************/

12 /**

13 * These are the tests specific to ICU 1.8 and above, that I didn't know where

14 * to fit.

15 */

16

17 #include <stdio.h>

18

19 #include "unicode/utypes.h"

20

21 #if !UCONFIG_NO_COLLATION

22

23 #include "unicode/ucol.h"

24 #include "unicode/ucoleitr.h"

25 #include "unicode/uloc.h"

26 #include "cintltst.h"

27 #include "ccolltst.h"

28 #include "callcoll.h"

29 #include "unicode/ustring.h"

30 #include "string.h"

31 #include "ucol_imp.h"

32 #include "cmemory.h"

33 #include "cstring.h"

34 #include "uassert.h"

35 #include "unicode/parseerr.h"

36 #include "unicode/ucnv.h"

37 #include "unicode/ures.h"

38 #include "unicode/uscript.h"

39 #include "unicode/utf16.h"

40 #include "uparse.h"

41 #include "putilimp.h"

42

43

44 #define LEN(a) (sizeof(a)/sizeof(a[0]))

45

46 #define MAX_TOKEN_LEN 16

47

48 typedef UCollationResult tst_strcoll(void *collator, const int object,

49 const UChar *source, const int sLen,

50 const UChar *target, const int tLen);

51

52

53

54 const static char cnt1[][10] = {

55

56 "AA",

57 "AC",

58 "AZ",

59 "AQ",

60 "AB",

61 "ABZ",

62 "ABQ",

63 "Z",

64 "ABC",

65 "Q",

66 "B"

67 };

68

69 const static char cnt2[][10] = {

70 "DA",

71 "DAD",

72 "DAZ",

73 "MAR",

74 "Z",

75 "DAVIS",

76 "MARK",

77 "DAV",

78 "DAVI"

79 };

80

81 static void IncompleteCntTest(void)

82 {

83 UErrorCode status = U_ZERO_ERROR;

84 UChar temp[90];

85 UChar t1[90];

86 UChar t2[90];

87

88 UCollator *coll = NULL;

89 uint32_t i = 0, j = 0;

90 uint32_t size = 0;

91

92 u_uastrcpy(temp, " & Z < ABC < Q < B");

93

94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N ULL,&status);

95

96 if(U_SUCCESS(status)) {

97 size = sizeof(cnt1)/sizeof(cnt1[0]);

98 for(i = 0; i < size-1; i++) {

99 for(j = i+1; j < size; j++) {

100 UCollationElements *iter;

101 u_uastrcpy(t1, cnt1[i]);

102 u_uastrcpy(t2, cnt1[j]);

103 doTest(coll, t1, t2, UCOL_LESS);

104 /* synwee : added collation element iterator test */

105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

106 if (U_FAILURE(status)) {

107 log_err("Creation of iterator failed\n");

108 break;

109 }

110 backAndForth(iter);

111 ucol_closeElements(iter);

112 }

113 }

114 }

115

116 ucol_close(coll);

117

118

119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV");

120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NU LL, &status);

121

122 if(U_SUCCESS(status)) {

123 size = sizeof(cnt2)/sizeof(cnt2[0]);

124 for(i = 0; i < size-1; i++) {

125 for(j = i+1; j < size; j++) {

126 UCollationElements *iter;

127 u_uastrcpy(t1, cnt2[i]);

128 u_uastrcpy(t2, cnt2[j]);

129 doTest(coll, t1, t2, UCOL_LESS);

130

131 /* synwee : added collation element iterator test */

132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status);

133 if (U_FAILURE(status)) {

134 log_err("Creation of iterator failed\n");

135 break;

136 }

137 backAndForth(iter);

138 ucol_closeElements(iter);

139 }

140 }

141 }

142

143 ucol_close(coll);

144

145

146 }

147

148 const static char shifted[][20] = {

149 "black bird",

150 "black-bird",

151 "blackbird",

152 "black Bird",

153 "black-Bird",

154 "blackBird",

155 "black birds",

156 "black-birds",

157 "blackbirds"

158 };

159

160 const static UCollationResult shiftedTert[] = {

161 UCOL_EQUAL,

162 UCOL_EQUAL,

163 UCOL_EQUAL,

164 UCOL_LESS,

165 UCOL_EQUAL,

166 UCOL_EQUAL,

167 UCOL_LESS,

168 UCOL_EQUAL,

169 UCOL_EQUAL

170 };

171

172 const static char nonignorable[][20] = {

173 "black bird",

174 "black Bird",

175 "black birds",

176 "black-bird",

177 "black-Bird",

178 "black-birds",

179 "blackbird",

180 "blackBird",

181 "blackbirds"

182 };

183

184 static void BlackBirdTest(void) {

185 UErrorCode status = U_ZERO_ERROR;

186 UChar t1[90];

187 UChar t2[90];

188

189 uint32_t i = 0, j = 0;

190 uint32_t size = 0;

191 UCollator *coll = ucol_open("en_US", &status);

192

193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);

195

196 if(U_SUCCESS(status)) {

197 size = sizeof(nonignorable)/sizeof(nonignorable[0]);

198 for(i = 0; i < size-1; i++) {

199 for(j = i+1; j < size; j++) {

200 u_uastrcpy(t1, nonignorable[i]);

201 u_uastrcpy(t2, nonignorable[j]);

202 doTest(coll, t1, t2, UCOL_LESS);

203 }

204 }

205 }

206

207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status);

209

210 if(U_SUCCESS(status)) {

211 size = sizeof(shifted)/sizeof(shifted[0]);

212 for(i = 0; i < size-1; i++) {

213 for(j = i+1; j < size; j++) {

214 u_uastrcpy(t1, shifted[i]);

215 u_uastrcpy(t2, shifted[j]);

216 doTest(coll, t1, t2, UCOL_LESS);

217 }

218 }

219 }

220

221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status);

222 if(U_SUCCESS(status)) {

223 size = sizeof(shifted)/sizeof(shifted[0]);

224 for(i = 1; i < size; i++) {

225 u_uastrcpy(t1, shifted[i-1]);

226 u_uastrcpy(t2, shifted[i]);

227 doTest(coll, t1, t2, shiftedTert[i]);

228 }

229 }

230

231 ucol_close(coll);

232 }

233

234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = {

235 {0x0041/'A'/, 0x0300, 0x0301, 0x0000},

236 {0x0041/'A'/, 0x0300, 0x0316, 0x0000},

237 {0x0041/'A'/, 0x0300, 0x0000},

238 {0x00C0, 0x0301, 0x0000},

239 /* this would work with forced normalization */

240 {0x00C0, 0x0316, 0x0000}

241 };

242

243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = {

244 {0x0041/'A'/, 0x0301, 0x0300, 0x0000},

245 {0x0041/'A'/, 0x0316, 0x0300, 0x0000},

246 {0x00C0, 0},

247 {0x0041/'A'/, 0x0301, 0x0300, 0x0000},

248 /* this would work with forced normalization */

249 {0x0041/'A'/, 0x0316, 0x0300, 0x0000}

250 };

251

252 const static UCollationResult results[] = {

253 UCOL_GREATER,

254 UCOL_EQUAL,

255 UCOL_EQUAL,

256 UCOL_GREATER,

257 UCOL_EQUAL

258 };

259

260 static void FunkyATest(void)

261 {

262

263 int32_t i;

264 UErrorCode status = U_ZERO_ERROR;

265 UCollator *myCollation;

266 myCollation = ucol_open("en_US", &status);

267 if(U_FAILURE(status)){

268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

269 return;

270 }

271 log_verbose("Testing some A letters, for some reason\n");

272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

273 ucol_setStrength(myCollation, UCOL_TERTIARY);

274 for (i = 0; i < 4 ; i++)

275 {

276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);

277 }

278 ucol_close(myCollation);

279 }

280

281 UColAttributeValue caseFirst[] = {

282 UCOL_OFF,

283 UCOL_LOWER_FIRST,

284 UCOL_UPPER_FIRST

285 };

286

287

288 UColAttributeValue alternateHandling[] = {

289 UCOL_NON_IGNORABLE,

290 UCOL_SHIFTED

291 };

292

293 UColAttributeValue caseLevel[] = {

294 UCOL_OFF,

295 UCOL_ON

296 };

297

298 UColAttributeValue strengths[] = {

299 UCOL_PRIMARY,

300 UCOL_SECONDARY,

301 UCOL_TERTIARY,

302 UCOL_QUATERNARY,

303 UCOL_IDENTICAL

304 };

305

306 #if 0

307 static const char * strengthsC[] = {

308 "UCOL_PRIMARY",

309 "UCOL_SECONDARY",

310 "UCOL_TERTIARY",

311 "UCOL_QUATERNARY",

312 "UCOL_IDENTICAL"

313 };

314

315 static const char * caseFirstC[] = {

316 "UCOL_OFF",

317 "UCOL_LOWER_FIRST",

318 "UCOL_UPPER_FIRST"

319 };

320

321

322 static const char * alternateHandlingC[] = {

323 "UCOL_NON_IGNORABLE",

324 "UCOL_SHIFTED"

325 };

326

327 static const char * caseLevelC[] = {

328 "UCOL_OFF",

329 "UCOL_ON"

330 };

331

332 /* not used currently - does not test only prints */

333 static void PrintMarkDavis(void)

334 {

335 UErrorCode status = U_ZERO_ERROR;

336 UChar m[256];

337 uint8_t sortkey[256];

338 UCollator *coll = ucol_open("en_US", &status);

339 uint32_t h,i,j,k, sortkeysize;

340 uint32_t sizem = 0;

341 char buffer[512];

342 uint32_t len = 512;

343

344 log_verbose("PrintMarkDavis");

345

346 u_uastrcpy(m, "Mark Davis");

347 sizem = u_strlen(m);

348

349

350 m[1] = 0xe4;

351

352 for(i = 0; i<sizem; i++) {

353 fprintf(stderr, "\\u%04X ", m[i]);

354 }

355 fprintf(stderr, "\n");

356

357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) {

358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status);

359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]);

360

361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) {

362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &st atus);

363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]);

364

365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) {

366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status);

367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]);

368

369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) {

370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status);

371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256);

372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]);

373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &l en));

374 }

375

376 }

377

378 }

379

380 }

381 }

382 #endif

383

384 static void BillFairmanTest(void) {

385 /*

386 ** check for actual locale via ICU resource bundles

387 **

388 ** lp points to the original locale ("fr_FR_....")

389 */

390

391 UResourceBundle lr,cr;

392 UErrorCode lec = U_ZERO_ERROR;

393 const char *lp = "fr_FR_you_ll_never_find_this_locale";

394

395 log_verbose("BillFairmanTest\n");

396

397 lr = ures_open(NULL,lp,&lec);

398 if (lr) {

399 cr = ures_getByKey(lr,"collations",0,&lec);

400 if (cr) {

401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec);

402 if (lp) {

403 if (U_SUCCESS(lec)) {

404 if(strcmp(lp, "fr") != 0) {

405 log_err("Wrong locale for French Collation Data, expecte d \"fr\" got %s", lp);

406 }

407 }

408 }

409 ures_close(cr);

410 }

411 ures_close(lr);

412 }

413 }

414

415 const static char chTest[][20] = {

416 "c",

417 "C",

418 "ca", "cb", "cx", "cy", "CZ",

419 "c\\u030C", "C\\u030C",

420 "h",

421 "H",

422 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY",

423 "ch", "cH", "Ch", "CH",

424 "cha", "charly", "che", "chh", "chch", "chr",

425 "i", "I", "iarly",

426 "r", "R",

427 "r\\u030C", "R\\u030C",

428 "s",

429 "S",

430 "s\\u030C", "S\\u030C",

431 "z", "Z",

432 "z\\u030C", "Z\\u030C"

433 };

434

435 static void TestChMove(void) {

436 UChar t1[256] = {0};

437 UChar t2[256] = {0};

438

439 uint32_t i = 0, j = 0;

440 uint32_t size = 0;

441 UErrorCode status = U_ZERO_ERROR;

442

443 UCollator *coll = ucol_open("cs", &status);

444

445 if(U_SUCCESS(status)) {

446 size = sizeof(chTest)/sizeof(chTest[0]);

447 for(i = 0; i < size-1; i++) {

448 for(j = i+1; j < size; j++) {

449 u_unescape(chTest[i], t1, 256);

450 u_unescape(chTest[j], t2, 256);

451 doTest(coll, t1, t2, UCOL_LESS);

452 }

453 }

454 }

455 else {

456 log_data_err("Can't open collator");

457 }

458 ucol_close(coll);

459 }

460

461

462

463

464 /*

465 const static char impTest[][20] = {

466 "\\u4e00",

467 "a",

468 "A",

469 "b",

470 "B",

471 "\\u4e01"

472 };

473 */

474

475

476 static void TestImplicitTailoring(void) {

477 static const struct {

478 const char *rules;

479 const char *data[10];

480 const uint32_t len;

481 } tests[] = {

482 {

483 /* Tailor b and c before U+4E00. */

484 "&[before 1]\\u4e00 < b < c "

485 /* Now, before U+4E00 is c; put d and e after that. */

486 "&[before 1]\\u4e00 < d < e",

487 { "b", "c", "d", "e", "\\u4e00"}, 5 },

488 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4 e01"}, 6 },

489 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e 00"}, 3},

490 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e 01"}, 3}

491 };

492

493 int32_t i = 0;

494

495 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

496 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

497 }

498

499 /*

500 UChar t1[256] = {0};

501 UChar t2[256] = {0};

502

503 const char *rule = "&\\u4e00 < a <<< A < b <<< B";

504

505 uint32_t i = 0, j = 0;

506 uint32_t size = 0;

507 uint32_t ruleLen = 0;

508 UErrorCode status = U_ZERO_ERROR;

509 UCollator *coll = NULL;

510 ruleLen = u_unescape(rule, t1, 256);

511

512 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status);

513

514 if(U_SUCCESS(status)) {

515 size = sizeof(impTest)/sizeof(impTest[0]);

516 for(i = 0; i < size-1; i++) {

517 for(j = i+1; j < size; j++) {

518 u_unescape(impTest[i], t1, 256);

519 u_unescape(impTest[j], t2, 256);

520 doTest(coll, t1, t2, UCOL_LESS);

521 }

522 }

523 }

524 else {

525 log_err("Can't open collator");

526 }

527 ucol_close(coll);

528 */

529 }

530

531 static void TestFCDProblem(void) {

532 UChar t1[256] = {0};

533 UChar t2[256] = {0};

534

535 const char *s1 = "\\u0430\\u0306\\u0325";

536 const char *s2 = "\\u04D1\\u0325";

537

538 UErrorCode status = U_ZERO_ERROR;

539 UCollator *coll = ucol_open("", &status);

540 u_unescape(s1, t1, 256);

541 u_unescape(s2, t2, 256);

542

543 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);

544 doTest(coll, t1, t2, UCOL_EQUAL);

545

546 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

547 doTest(coll, t1, t2, UCOL_EQUAL);

548

549 ucol_close(coll);

550 }

551

552 /*

553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC

554 We're only using NFC/NFD in this test.

555 */

556 #define NORM_BUFFER_TEST_LEN 18

557 typedef struct {

558 UChar32 u;

559 UChar NFC[NORM_BUFFER_TEST_LEN];

560 UChar NFD[NORM_BUFFER_TEST_LEN];

561 } tester;

562

563 static void TestComposeDecompose(void) {

564 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */

565 static const UChar UNICODESET_STR[] = {

566 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x 61,

567 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x 72,

568 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0

569 };

570 int32_t noOfLoc;

571 int32_t i = 0, j = 0;

572

573 UErrorCode status = U_ZERO_ERROR;

574 const char *locName = NULL;

575 uint32_t nfcSize;

576 uint32_t nfdSize;

577 tester **t;

578 uint32_t noCases = 0;

579 UCollator *coll = NULL;

580 UChar32 u = 0;

581 UChar comp[NORM_BUFFER_TEST_LEN];

582 uint32_t len = 0;

583 UCollationElements *iter;

584 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status);

585 int32_t charsToTestSize;

586

587 noOfLoc = uloc_countAvailable();

588

589 coll = ucol_open("", &status);

590 if (U_FAILURE(status)) {

591 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u _errorName(status));

592 return;

593 }

594 charsToTestSize = uset_size(charsToTest);

595 if (charsToTestSize <= 0) {

596 log_err("Set was zero. Missing data?\n");

597 return;

598 }

599 t = (tester *)malloc(charsToTestSize sizeof(tester *));

600 t[0] = (tester *)malloc(sizeof(tester));

601 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize);

602

603 for(u = 0; u < charsToTestSize; u++) {

604 UChar32 ch = uset_charAt(charsToTest, u);

605 len = 0;

606 U16_APPEND_UNSAFE(comp, len, ch);

607 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM _BUFFER_TEST_LEN, &status);

608 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM _BUFFER_TEST_LEN, &status);

609

610 if(nfcSize != nfdSize \|\| (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD, nfcSize * sizeof(UChar)) != 0)

611 \|\| (len != nfdSize \|\| (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * si zeof(UChar)) != 0))) {

612 t[noCases]->u = ch;

613 if(len != nfdSize \|\| (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * s izeof(UChar)) != 0)) {

614 u_strncpy(t[noCases]->NFC, comp, len);

615 t[noCases]->NFC[len] = 0;

616 }

617 noCases++;

618 t[noCases] = (tester *)malloc(sizeof(tester));

619 uprv_memset(t[noCases], 0, sizeof(tester));

620 }

621 }

622 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSi ze);

623 uset_close(charsToTest);

624 charsToTest = NULL;

625

626 for(u=0; u<(UChar32)noCases; u++) {

627 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

628 log_err("Failure: codePoint %05X fails TestComposeDecompose in the U CA\n", t[u]->u);

629 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

630 }

631 }

632 /*

633 for(u = 0; u < charsToTestSize; u++) {

634 if(!(u&0xFFFF)) {

635 log_verbose("%08X ", u);

636 }

637 uprv_memset(t[noCases], 0, sizeof(tester));

638 t[noCases]->u = u;

639 len = 0;

640 U16_APPEND_UNSAFE(comp, len, u);

641 comp[len] = 0;

642 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_B UFFER_TEST_LEN, &status);

643 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_B UFFER_TEST_LEN, &status);

644 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL);

645 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL);

646 }

647 */

648

649 ucol_close(coll);

650

651 log_verbose("Testing locales, number of cases = %i\n", noCases);

652 for(i = 0; i<noOfLoc; i++) {

653 status = U_ZERO_ERROR;

654 locName = uloc_getAvailable(i);

655 if(hasCollationElements(locName)) {

656 char cName[256];

657 UChar name[256];

658 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(c Name), &status);

659

660 for(j = 0; j<nameSize; j++) {

661 cName[j] = (char)name[j];

662 }

663 cName[nameSize] = 0;

664 log_verbose("\nTesting locale %s (%s)\n", locName, cName);

665

666 coll = ucol_open(locName, &status);

667 ucol_setStrength(coll, UCOL_IDENTICAL);

668 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &stat us);

669

670 for(u=0; u<(UChar32)noCases; u++) {

671 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) {

672 log_err("Failure: codePoint %05X fails TestComposeDecompose for locale %s\n", t[u]->u, cName);

673 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL);

674 log_verbose("Testing NFC\n");

675 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status);

676 backAndForth(iter);

677 log_verbose("Testing NFD\n");

678 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status);

679 backAndForth(iter);

680 }

681 }

682 ucol_closeElements(iter);

683 ucol_close(coll);

684 }

685 }

686 for(u = 0; u <= (UChar32)noCases; u++) {

687 free(t[u]);

688 }

689 free(t);

690 }

691

692 static void TestEmptyRule(void) {

693 UErrorCode status = U_ZERO_ERROR;

694 UChar rulez[] = { 0 };

695 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &stat us);

696

697 ucol_close(coll);

698 }

699

700 static void TestUCARules(void) {

701 UErrorCode status = U_ZERO_ERROR;

702 UChar b[256];

703 UChar *rules = b;

704 uint32_t ruleLen = 0;

705 UCollator *UCAfromRules = NULL;

706 UCollator *coll = ucol_open("", &status);

707 if(status == U_FILE_ACCESS_ERROR) {

708 log_data_err("Is your data around?\n");

709 return;

710 } else if(U_FAILURE(status)) {

711 log_err("Error opening collator\n");

712 return;

713 }

714 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256);

715

716 log_verbose("TestUCARules\n");

717 if(ruleLen > 256) {

718 rules = (UChar )malloc((ruleLen+1)sizeof(UChar));

719 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen);

720 }

721 log_verbose("Rules length is %d\n", ruleLen);

722 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&s tatus);

723 if(U_SUCCESS(status)) {

724 ucol_close(UCAfromRules);

725 } else {

726 log_verbose("Unable to create a collator from UCARules!\n");

727 }

728 /*

729 u_unescape(blah, b, 256);

730 ucol_getSortKey(coll, b, 1, res, 256);

731 */

732 ucol_close(coll);

733 if(rules != b) {

734 free(rules);

735 }

736 }

737

738

739 /* Pinyin tonal order */

740 /*

741 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0)

742 (w/macron)< (w/acute)< (w/caron)< (w/grave)

743 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8)

744 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec)

745 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2)

746 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9)

747 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) <

748 .. (\u00fc)

749

750 However, in testing we got the following order:

751 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101)

752 (w/acute)< (w/grave)< (w/caron)< (w/macron)

753 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) <

754 .. (\u0113)

755 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b)

756 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d)

757 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) <

758 .. (\u01d8)

759 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b)

760 */

761

762 static void TestBefore(void) {

763 const static char *data[] = {

764 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A",

765 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E",

766 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I",

767 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O",

768 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U",

769 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc"

770 };

771 genericRulesStarter(

772 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0"

773 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8"

774 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec"

775 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2"

776 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9"

777 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc",

778 data, sizeof(data)/sizeof(data[0]));

779 }

780

781 #if 0

782 /* superceded by TestBeforePinyin */

783 static void TestJ784(void) {

784 const static char *data[] = {

785 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0",

786 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8",

787 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec",

788 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2",

789 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9",

790 "\\u00fc",

791 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc"

792 };

793 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0]));

794 }

795 #endif

796

797 #if 0

798 /* superceded by the changes to the lv locale */

799 static void TestJ831(void) {

800 const static char *data[] = {

801 "I",

802 "i",

803 "Y",

804 "y"

805 };

806 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0]));

807 }

808 #endif

809

810 static void TestJ815(void) {

811 const static char *data[] = {

812 "aa",

813 "Aa",

814 "ab",

815 "Ab",

816 "ad",

817 "Ad",

818 "ae",

819 "Ae",

820 "\\u00e6",

821 "\\u00c6",

822 "af",

823 "Af",

824 "b",

825 "B"

826 };

827 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0]));

828 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat a)/sizeof(data[0]));

829 }

830

831

832 static void TestCase(void)

833 {

834 const static UChar gRules[MAX_TOKEN_LEN] =

835 /" & 0 < 1,\u2461<a,A"/

836 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x 0041, 0x0000 };

837

838 const static UChar testCase[][MAX_TOKEN_LEN] =

839 {

840 /0/ {0x0031 /'1'/, 0x0061/'a'/, 0x0000},

841 /1/ {0x0031 /'1'/, 0x0041/'A'/, 0x0000},

842 /2/ {0x2460 /circ'1'/, 0x0061/'a'/, 0x0000},

843 /3/ {0x2460 /circ'1'/, 0x0041/'A'/, 0x0000}

844 };

845

846 const static UCollationResult caseTestResults[][9] =

847 {

848 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

849 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER },

850 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS },

851 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_ LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }

852 };

853

854 const static UColAttributeValue caseTestAttributes[][2] =

855 {

856 { UCOL_LOWER_FIRST, UCOL_OFF},

857 { UCOL_UPPER_FIRST, UCOL_OFF},

858 { UCOL_LOWER_FIRST, UCOL_ON},

859 { UCOL_UPPER_FIRST, UCOL_ON}

860 };

861 int32_t i,j,k;

862 UErrorCode status = U_ZERO_ERROR;

863 UCollationElements *iter;

864 UCollator *myCollation;

865 myCollation = ucol_open("en_US", &status);

866

867 if(U_FAILURE(status)){

868 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

869 return;

870 }

871 log_verbose("Testing different case settings\n");

872 ucol_setStrength(myCollation, UCOL_TERTIARY);

873

874 for(k = 0; k<4; k++) {

875 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

876 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

877 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0] , caseTestAttributes[k][1]);

878 for (i = 0; i < 3 ; i++) {

879 for(j = i+1; j<4; j++) {

880 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j -1]);

881 }

882 }

883 }

884 ucol_close(myCollation);

885

886 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIA RY,NULL, &status);

887 if(U_FAILURE(status)){

888 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(s tatus));

889 return;

890 }

891 log_verbose("Testing different case settings with custom rules\n");

892 ucol_setStrength(myCollation, UCOL_TERTIARY);

893

894 for(k = 0; k<4; k++) {

895 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0], &status);

896 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1], &status);

897 for (i = 0; i < 3 ; i++) {

898 for(j = i+1; j<4; j++) {

899 log_verbose("k:%d, i:%d, j:%d\n", k, i, j);

900 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j -1]);

901 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i]) , &status);

902 backAndForth(iter);

903 ucol_closeElements(iter);

904 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j]) , &status);

905 backAndForth(iter);

906 ucol_closeElements(iter);

907 }

908 }

909 }

910 ucol_close(myCollation);

911 {

912 const static char *lowerFirst[] = {

913 "h",

914 "H",

915 "ch",

916 "Ch",

917 "CH",

918 "cha",

919 "chA",

920 "Cha",

921 "ChA",

922 "CHa",

923 "CHA",

924 "i",

925 "I"

926 };

927

928 const static char *upperFirst[] = {

929 "H",

930 "h",

931 "CH",

932 "Ch",

933 "ch",

934 "CHA",

935 "CHa",

936 "ChA",

937 "Cha",

938 "chA",

939 "cha",

940 "I",

941 "i"

942 };

943 log_verbose("mixed case test\n");

944 log_verbose("lower first, case level off\n");

945 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof (lowerFirst)/sizeof(lowerFirst[0]));

946 log_verbose("upper first, case level off\n");

947 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof (upperFirst)/sizeof(upperFirst[0]));

948 log_verbose("lower first, case level on\n");

949 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowe rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0]));

950 log_verbose("upper first, case level on\n");

951 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", uppe rFirst, sizeof(upperFirst)/sizeof(upperFirst[0]));

952 }

953

954 }

955

956 static void TestIncrementalNormalize(void) {

957

958 /UChar baseA =0x61;/

959 UChar baseA =0x41;

960 /* UChar baseB = 0x42;*/

961 static const UChar ccMix[] = {0x316, 0x321, 0x300};

962 /UChar ccMix[] = {0x61, 0x61, 0x61};/

963 /*

964 0x316 is combining grave accent below, cc=220

965 0x321 is combining palatalized hook below, cc=202

966 0x300 is combining grave accent, cc=230

967 */

968

969 #define MAXSLEN 2000

970 /int maxSLen = 64000;/

971 int sLen;

972 int i;

973

974 UCollator *coll;

975 UErrorCode status = U_ZERO_ERROR;

976 UCollationResult result;

977

978 int32_t myQ = getTestOption(QUICK_OPTION);

979

980 if(getTestOption(QUICK_OPTION) < 0) {

981 setTestOption(QUICK_OPTION, 1);

982 }

983

984 {

985 /* Test 1. Run very long unnormalized strings, to force overflow of*/

986 /* most buffers along the way.*/

987 UChar strA[MAXSLEN+1];

988 UChar strB[MAXSLEN+1];

989

990 coll = ucol_open("en_US", &status);

991 if(status == U_FILE_ACCESS_ERROR) {

992 log_data_err("Is your data around?\n");

993 return;

994 } else if(U_FAILURE(status)) {

995 log_err("Error opening collator\n");

996 return;

997 }

998 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

999

1000 /for (sLen = 257; sLen<MAXSLEN; sLen++) {/

1001 /for (sLen = 4; sLen<MAXSLEN; sLen++) {/

1002 /for (sLen = 1000; sLen<1001; sLen++) {/

1003 for (sLen = 500; sLen<501; sLen++) {

1004 /for (sLen = 40000; sLen<65000; sLen+=1000) {/

1005 strA[0] = baseA;

1006 strB[0] = baseA;

1007 for (i=1; i<=sLen-1; i++) {

1008 strA[i] = ccMix[i % 3];

1009 strB[sLen-i] = ccMix[i % 3];

1010 }

1011 strA[sLen] = 0;

1012 strB[sLen] = 0;

1013

1014 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default str ength, which runs*/

1015 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in the impl*/

1016 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow, general impl.*/

1017 doTest(coll, strA, strB, UCOL_EQUAL);

1018 }

1019 }

1020

1021 setTestOption(QUICK_OPTION, myQ);

1022

1023

1024 /* Test 2: Non-normal sequence in a string that extends to the last charac ter*/

1025 /* of the string. Checks a couple of edge cases.*/

1026

1027 {

1028 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0};

1029 static const UChar strB[] = {0x41, 0xc0, 0x316, 0};

1030 ucol_setStrength(coll, UCOL_TERTIARY);

1031 doTest(coll, strA, strB, UCOL_EQUAL);

1032 }

1033

1034 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/

1035

1036 {

1037 /* New UCA 3.1.1.

1038 * test below used a code point from Desseret, which sorts differently

1039 * than d800 dc00

1040 */

1041 /UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};/

1042 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0 };

1043 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0};

1044 ucol_setStrength(coll, UCOL_TERTIARY);

1045 doTest(coll, strA, strB, UCOL_GREATER);

1046 }

1047

1048 /* Test 4: Imbedded nulls do not terminate a string when length is specifi ed.*/

1049

1050 {

1051 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00};

1052 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00};

1053 char sortKeyA[50];

1054 char sortKeyAz[50];

1055 char sortKeyB[50];

1056 char sortKeyBz[50];

1057 int r;

1058

1059 /* there used to be -3 here. Hmmmm.... */

1060 /result = ucol_strcoll(coll, strA, -3, strB, -3);/

1061 result = ucol_strcoll(coll, strA, 3, strB, 3);

1062 if (result != UCOL_GREATER) {

1063 log_err("ERROR 1 in test 4\n");

1064 }

1065 result = ucol_strcoll(coll, strA, -1, strB, -1);

1066 if (result != UCOL_EQUAL) {

1067 log_err("ERROR 2 in test 4\n");

1068 }

1069

1070 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

1071 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

1072 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

1073 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

1074

1075 r = strcmp(sortKeyA, sortKeyAz);

1076 if (r <= 0) {

1077 log_err("Error 3 in test 4\n");

1078 }

1079 r = strcmp(sortKeyA, sortKeyB);

1080 if (r <= 0) {

1081 log_err("Error 4 in test 4\n");

1082 }

1083 r = strcmp(sortKeyAz, sortKeyBz);

1084 if (r != 0) {

1085 log_err("Error 5 in test 4\n");

1086 }

1087

1088 ucol_setStrength(coll, UCOL_IDENTICAL);

1089 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA));

1090 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

1091 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB));

1092 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

1093

1094 r = strcmp(sortKeyA, sortKeyAz);

1095 if (r <= 0) {

1096 log_err("Error 6 in test 4\n");

1097 }

1098 r = strcmp(sortKeyA, sortKeyB);

1099 if (r <= 0) {

1100 log_err("Error 7 in test 4\n");

1101 }

1102 r = strcmp(sortKeyAz, sortKeyBz);

1103 if (r != 0) {

1104 log_err("Error 8 in test 4\n");

1105 }

1106 ucol_setStrength(coll, UCOL_TERTIARY);

1107 }

1108

1109

1110 /* Test 5: Null characters in non-normal source strings.*/

1111

1112 {

1113 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00} ;

1114 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00} ;

1115 char sortKeyA[50];

1116 char sortKeyAz[50];

1117 char sortKeyB[50];

1118 char sortKeyBz[50];

1119 int r;

1120

1121 result = ucol_strcoll(coll, strA, 6, strB, 6);

1122 if (result != UCOL_GREATER) {

1123 log_err("ERROR 1 in test 5\n");

1124 }

1125 result = ucol_strcoll(coll, strA, -1, strB, -1);

1126 if (result != UCOL_EQUAL) {

1127 log_err("ERROR 2 in test 5\n");

1128 }

1129

1130 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

1131 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

1132 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

1133 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

1134

1135 r = strcmp(sortKeyA, sortKeyAz);

1136 if (r <= 0) {

1137 log_err("Error 3 in test 5\n");

1138 }

1139 r = strcmp(sortKeyA, sortKeyB);

1140 if (r <= 0) {

1141 log_err("Error 4 in test 5\n");

1142 }

1143 r = strcmp(sortKeyAz, sortKeyBz);

1144 if (r != 0) {

1145 log_err("Error 5 in test 5\n");

1146 }

1147

1148 ucol_setStrength(coll, UCOL_IDENTICAL);

1149 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA));

1150 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz)) ;

1151 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB));

1152 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz)) ;

1153

1154 r = strcmp(sortKeyA, sortKeyAz);

1155 if (r <= 0) {

1156 log_err("Error 6 in test 5\n");

1157 }

1158 r = strcmp(sortKeyA, sortKeyB);

1159 if (r <= 0) {

1160 log_err("Error 7 in test 5\n");

1161 }

1162 r = strcmp(sortKeyAz, sortKeyBz);

1163 if (r != 0) {

1164 log_err("Error 8 in test 5\n");

1165 }

1166 ucol_setStrength(coll, UCOL_TERTIARY);

1167 }

1168

1169

1170 /* Test 6: Null character as base of a non-normal combining sequence.*/

1171

1172 {

1173 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00} ;

1174 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00} ;

1175

1176 result = ucol_strcoll(coll, strA, 5, strB, 5);

1177 if (result != UCOL_LESS) {

1178 log_err("Error 1 in test 6\n");

1179 }

1180 result = ucol_strcoll(coll, strA, -1, strB, -1);

1181 if (result != UCOL_EQUAL) {

1182 log_err("Error 2 in test 6\n");

1183 }

1184 }

1185

1186 ucol_close(coll);

1187 }

1188

1189

1190

1191 #if 0

1192 static void TestGetCaseBit(void) {

1193 static const char *caseBitData[] = {

1194 "a", "A", "ch", "Ch", "CH",

1195 "\\uFF9E", "\\u0009"

1196 };

1197

1198 static const uint8_t results[] = {

1199 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPP ER_CASE,

1200 UCOL_UPPER_CASE, UCOL_LOWER_CASE

1201 };

1202

1203 uint32_t i, blen = 0;

1204 UChar b[256] = {0};

1205 UErrorCode status = U_ZERO_ERROR;

1206 UCollator *UCA = ucol_open("", &status);

1207 uint8_t res = 0;

1208

1209 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) {

1210 blen = u_unescape(caseBitData[i], b, 256);

1211 res = ucol_uprv_getCaseBits(UCA, b, blen, &status);

1212 if(results[i] != res) {

1213 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0] );

1214 }

1215 }

1216 }

1217 #endif

1218

1219 static void TestHangulTailoring(void) {

1220 static const char *koreanData[] = {

1221 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53 ef", "\\u5475",

1222 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\ \u67b7", "\\u67ef",

1223 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\ \u8857", "\\u8888",

1224 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5",

1225 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\ \u659D", "\\u698E",

1226 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\ \u8B0C"

1227 };

1228

1229 const char *rules =

1230 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 << < \\u53ef <<< \\u5475 "

1231 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6 <<< \\u67b7 <<< \\u67ef "

1232 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304 <<< \\u8857 <<< \\u8888 "

1233 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5 "

1234 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A <<< \\u659D <<< \\u698E "

1235 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D <<< \\u8B0C";

1236

1237

1238 UErrorCode status = U_ZERO_ERROR;

1239 UChar rlz[2048] = { 0 };

1240 uint32_t rlen = u_unescape(rules, rlz, 2048);

1241

1242 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, & status);

1243 if(status == U_FILE_ACCESS_ERROR) {

1244 log_data_err("Is your data around?\n");

1245 return;

1246 } else if(U_FAILURE(status)) {

1247 log_err("Error opening collator\n");

1248 return;

1249 }

1250

1251 log_verbose("Using start of korean rules\n");

1252

1253 if(U_SUCCESS(status)) {

1254 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0 ]));

1255 } else {

1256 log_err("Unable to open collator with rules %s\n", rules);

1257 }

1258

1259 ucol_close(coll);

1260

1261 log_verbose("Using ko__LOTUS locale\n");

1262 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean Data[0]));

1263 }

1264

1265 /*

1266 * The secondary/tertiary compression middle byte

1267 * as used by the current implementation.

1268 * Subject to change as the sort key compression changes.

1269 * See class CollationKeys.

1270 */

1271 enum {

1272 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */

1273 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */

1274 };

1275

1276 static void TestCompressOverlap(void) {

1277 UChar secstr[150];

1278 UChar tertstr[150];

1279 UErrorCode status = U_ZERO_ERROR;

1280 UCollator *coll;

1281 uint8_t result[500];

1282 uint32_t resultlen;

1283 int count = 0;

1284 uint8_t *tempptr;

1285

1286 coll = ucol_open("", &status);

1287

1288 if (U_FAILURE(status)) {

1289 log_err_status(status, "Collator can't be created -> %s\n", u_errorName( status));

1290 return;

1291 }

1292 while (count < 149) {

1293 secstr[count] = 0x0020; /* [06, 05, 05] */

1294 tertstr[count] = 0x0020;

1295 count ++;

1296 }

1297

1298 /* top down compression ----------------------------------- */

1299 secstr[count] = 0x0332; /* [, 87, 05] */

1300 tertstr[count] = 0x3000; /* [06, 05, 07] */

1301

1302 /* no compression secstr should have 150 secondary bytes, tertstr should

1303 have 150 tertiary bytes.

1304 with correct compression, secstr should have 6 secondary

1305 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes * /

1306 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));

1307 (void)resultlen; /* Suppress set but not used warning. */

1308 tempptr = (uint8_t )uprv_strchr((char )result, 1) + 1;

1309 while (*(tempptr + 1) != 1) {

1310 /* the last secondary collation element is not checked since it is not

1311 part of the compression */

1312 if (*tempptr < SEC_COMMON_MIDDLE) {

1313 log_err("Secondary top down compression overlapped\n");

1314 }

1315 tempptr ++;

1316 }

1317

1318 /* tertiary top/bottom/common for en_US is similar to the secondary

1319 top/bottom/common */

1320 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));

1321 tempptr = (uint8_t )uprv_strrchr((char )result, 1) + 1;

1322 while (*(tempptr + 1) != 0) {

1323 /* the last secondary collation element is not checked since it is not

1324 part of the compression */

1325 if (*tempptr < TER_ONLY_COMMON_MIDDLE) {

1326 log_err("Tertiary top down compression overlapped\n");

1327 }

1328 tempptr ++;

1329 }

1330

1331 /* bottom up compression ------------------------------------- */

1332 secstr[count] = 0;

1333 tertstr[count] = 0;

1334 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result));

1335 tempptr = (uint8_t )uprv_strchr((char )result, 1) + 1;

1336 while (*(tempptr + 1) != 1) {

1337 /* the last secondary collation element is not checked since it is not

1338 part of the compression */

1339 if (*tempptr > SEC_COMMON_MIDDLE) {

1340 log_err("Secondary bottom up compression overlapped\n");

1341 }

1342 tempptr ++;

1343 }

1344

1345 /* tertiary top/bottom/common for en_US is similar to the secondary

1346 top/bottom/common */

1347 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result));

1348 tempptr = (uint8_t )uprv_strrchr((char )result, 1) + 1;

1349 while (*(tempptr + 1) != 0) {

1350 /* the last secondary collation element is not checked since it is not

1351 part of the compression */

1352 if (*tempptr > TER_ONLY_COMMON_MIDDLE) {

1353 log_err("Tertiary bottom up compression overlapped\n");

1354 }

1355 tempptr ++;

1356 }

1357

1358 ucol_close(coll);

1359 }

1360

1361 static void TestCyrillicTailoring(void) {

1362 static const char *test[] = {

1363 "\\u0410b",

1364 "\\u0410\\u0306a",

1365 "\\u04d0A"

1366 };

1367

1368 /* Russian overrides contractions, so this test is not valid anymore */

1369 /genericLocaleStarter("ru", test, 3);/

1370

1371 // Most of the following are commented out because UCA 8.0

1372 // drops most of the Cyrillic contractions from the default order.

1373 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions".

1374

1375 // genericLocaleStarter("root", test, 3);

1376 // genericRulesStarter("&\\u0410 = \\u0410", test, 3);

1377 // genericRulesStarter("&Z < \\u0410", test, 3);

1378 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3);

1379 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3);

1380 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3);

1381 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3);

1382 }

1383

1384 static void TestSuppressContractions(void) {

1385

1386 static const char *testNoCont2[] = {

1387 "\\u0410\\u0302a",

1388 "\\u0410\\u0306b",

1389 "\\u0410c"

1390 };

1391 static const char *testNoCont[] = {

1392 "a\\u0410",

1393 "A\\u0410\\u0306",

1394 "\\uFF21\\u0410\\u0302"

1395 };

1396

1397 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3) ;

1398 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3 );

1399 }

1400

1401 static void TestContraction(void) {

1402 const static char *testrules[] = {

1403 "&A = AB / B",

1404 "&A = A\\u0306/\\u0306",

1405 "&c = ch / h"

1406 };

1407 const static UChar testdata[][2] = {

1408 {0x0041 /* 'A' /, 0x0042 / 'B' */},

1409 {0x0041 /* 'A' /, 0x0306 / combining breve */},

1410 {0x0063 /* 'c' /, 0x0068 / 'h' */}

1411 };

1412 const static UChar testdata2[][2] = {

1413 {0x0063 /* 'c' /, 0x0067 / 'g' */},

1414 {0x0063 /* 'c' /, 0x0068 / 'h' */},

1415 {0x0063 /* 'c' /, 0x006C / 'l' */}

1416 };

1417 #if 0

1418 /*

1419 * These pairs of rule strings are not guaranteed to yield the very same map pings.

1420 * In fact, LDML 24 recommends an improved way of creating mappings

1421 * which always yields different mappings for such pairs. See

1422 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings

1423 */

1424 const static char *testrules3[] = {

1425 "&z < xyz &xyzw << B",

1426 "&z < xyz &xyz << B / w",

1427 "&z < ch &achm << B",

1428 "&z < ch &a << B / chm",

1429 "&\\ud800\\udc00w << B",

1430 "&\\ud800\\udc00 << B / w",

1431 "&a\\ud800\\udc00m << B",

1432 "&a << B / \\ud800\\udc00m",

1433 };

1434 #endif

1435

1436 UErrorCode status = U_ZERO_ERROR;

1437 UCollator *coll;

1438 UChar rule[256] = {0};

1439 uint32_t rlen = 0;

1440 int i;

1441

1442 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {

1443 UCollationElements *iter1;

1444 int j = 0;

1445 log_verbose("Rule %s for testing\n", testrules[i]);

1446 rlen = u_unescape(testrules[i], rule, 32);

1447 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

1448 if (U_FAILURE(status)) {

1449 log_err_status(status, "Collator creation failed %s -> %s\n", testru les[i], u_errorName(status));

1450 return;

1451 }

1452 iter1 = ucol_openElements(coll, testdata[i], 2, &status);

1453 if (U_FAILURE(status)) {

1454 log_err("Collation iterator creation failed\n");

1455 return;

1456 }

1457 while (j < 2) {

1458 UCollationElements *iter2 = ucol_openElements(coll,

1459 &(testdata[i][j]),

1460 1, &status);

1461 uint32_t ce;

1462 if (U_FAILURE(status)) {

1463 log_err("Collation iterator creation failed\n");

1464 return;

1465 }

1466 ce = ucol_next(iter2, &status);

1467 while (ce != UCOL_NULLORDER) {

1468 if ((uint32_t)ucol_next(iter1, &status) != ce) {

1469 log_err("Collation elements in contraction split does not ma tch\n");

1470 return;

1471 }

1472 ce = ucol_next(iter2, &status);

1473 }

1474 j ++;

1475 ucol_closeElements(iter2);

1476 }

1477 if (ucol_next(iter1, &status) != UCOL_NULLORDER) {

1478 log_err("Collation elements not exhausted\n");

1479 return;

1480 }

1481 ucol_closeElements(iter1);

1482 ucol_close(coll);

1483 }

1484

1485 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256);

1486 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

1487 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) {

1488 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

1489 testdata2[0][0], testdata2[0][1], testdata2[1][0],

1490 testdata2[1][1]);

1491 return;

1492 }

1493 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) {

1494 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n",

1495 testdata2[1][0], testdata2[1][1], testdata2[2][0],

1496 testdata2[2][1]);

1497 return;

1498 }

1499 ucol_close(coll);

1500 #if 0 /* see above */

1501 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) {

1502 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], t estrules3[i + 1]);

1503 UCollator *coll1,

1504 *coll2;

1505 UCollationElements *iter1,

1506 *iter2;

1507 UChar ch = 0x0042 /* 'B' */;

1508 uint32_t ce;

1509 rlen = u_unescape(testrules3[i], rule, 32);

1510 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;

1511 rlen = u_unescape(testrules3[i + 1], rule, 32);

1512 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status) ;

1513 if (U_FAILURE(status)) {

1514 log_err("Collator creation failed %s\n", testrules[i]);

1515 return;

1516 }

1517 iter1 = ucol_openElements(coll1, &ch, 1, &status);

1518 iter2 = ucol_openElements(coll2, &ch, 1, &status);

1519 if (U_FAILURE(status)) {

1520 log_err("Collation iterator creation failed\n");

1521 return;

1522 }

1523 ce = ucol_next(iter1, &status);

1524 if (U_FAILURE(status)) {

1525 log_err("Retrieving ces failed\n");

1526 return;

1527 }

1528 while (ce != UCOL_NULLORDER) {

1529 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status);

1530 if (ce == ce2) {

1531 log_verbose("CEs match: %08x\n", ce);

1532 } else {

1533 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2);

1534 return;

1535 }

1536 ce = ucol_next(iter1, &status);

1537 if (U_FAILURE(status)) {

1538 log_err("Retrieving ces failed\n");

1539 return;

1540 }

1541 }

1542 if (ucol_next(iter2, &status) != UCOL_NULLORDER) {

1543 log_err("CEs not exhausted\n");

1544 return;

1545 }

1546 ucol_closeElements(iter1);

1547 ucol_closeElements(iter2);

1548 ucol_close(coll1);

1549 ucol_close(coll2);

1550 }

1551 #endif

1552 }

1553

1554 static void TestExpansion(void) {

1555 const static char *testrules[] = {

1556 #if 0

1557 /*

1558 * This seems to have tested that M was not mapped to an expansion.

1559 * I believe the old builder just did that because it computed the exten sion CEs

1560 * at the very end, which was a bug.

1561 * Among other problems, it violated the core tailoring principle

1562 * by making an earlier rule depend on a later one.

1563 * And, of course, if M did not get an expansion, then it was primary di fferent from K,

1564 * unlike what the rule &K<<M says.

1565 */

1566 "&J << K / B & K << M",

1567 #endif

1568 "&J << K / B << M"

1569 };

1570 const static UChar testdata[][3] = {

1571 {0x004A /'J'/, 0x0041 /'A'/, 0},

1572 {0x004D /'M'/, 0x0041 /'A'/, 0},

1573 {0x004B /'K'/, 0x0041 /'A'/, 0},

1574 {0x004B /'K'/, 0x0043 /'C'/, 0},

1575 {0x004A /'J'/, 0x0043 /'C'/, 0},

1576 {0x004D /'M'/, 0x0043 /'C'/, 0}

1577 };

1578

1579 UErrorCode status = U_ZERO_ERROR;

1580 UCollator *coll;

1581 UChar rule[256] = {0};

1582 uint32_t rlen = 0;

1583 int i;

1584

1585 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) {

1586 int j = 0;

1587 log_verbose("Rule %s for testing\n", testrules[i]);

1588 rlen = u_unescape(testrules[i], rule, 32);

1589 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status);

1590 if (U_FAILURE(status)) {

1591 log_err_status(status, "Collator creation failed %s -> %s\n", testru les[i], u_errorName(status));

1592 return;

1593 }

1594

1595 for (j = 0; j < 5; j ++) {

1596 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS);

1597 }

1598 ucol_close(coll);

1599 }

1600 }

1601

1602 #if 0

1603 /* this test tests the current limitations of the engine */

1604 /* it always fail, so it is disabled by default */

1605 static void TestLimitations(void) {

1606 /* recursive expansions */

1607 {

1608 static const char *rule = "&a=b/c&d=c/e";

1609 static const char *tlimit01[] = {"add","b","adf"};

1610 static const char *tlimit02[] = {"aa","b","af"};

1611 log_verbose("recursive expansions\n");

1612 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));

1613 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));

1614 }

1615 /* contractions spanning expansions */

1616 {

1617 static const char *rule = "&a<<<c/e&g<<<eh";

1618 static const char *tlimit01[] = {"ad","c","af","f","ch","h"};

1619 static const char *tlimit02[] = {"ad","c","ch","af","f","h"};

1620 log_verbose("contractions spanning expansions\n");

1621 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0]));

1622 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0]));

1623 }

1624 /* normalization: nulls in contractions */

1625 {

1626 static const char *rule = "&a<<<\\u0000\\u0302";

1627 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

1628 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

1629 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

1630 static const UColAttributeValue valOn[] = { UCOL_ON };

1631 static const UColAttributeValue valOff[] = { UCOL_OFF };

1632

1633 log_verbose("NULL in contractions\n");

1634 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

1635 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

1636 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

1637 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

1638

1639 }

1640 /* normalization: contractions spanning normalization */

1641 {

1642 static const char *rule = "&a<<<\\u0000\\u0302";

1643 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"};

1644 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"};

1645 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE };

1646 static const UColAttributeValue valOn[] = { UCOL_ON };

1647 static const UColAttributeValue valOff[] = { UCOL_OFF };

1648

1649 log_verbose("contractions spanning normalization\n");

1650 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1);

1651 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1);

1652 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1);

1653 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1);

1654

1655 }

1656 /* variable top: */

1657 {

1658 /static const char rule2 = "&\\u2010<x=[variable top]<z";*/

1659 static const char *rule = "&\\u2010<x<[variable top]=z";

1660 /static const char rule3 = "&' '<x<[variable top]=z";*/

1661 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" } ;

1662 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a", "a", "-b", "b", "c"};

1663 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" } ;

1664 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH };

1665 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY };

1666 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIA RY };

1667

1668 log_verbose("variable top\n");

1669 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimi t03[0]), att, valOn, sizeof(att)/sizeof(att[0]));

1670 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOn, sizeof(att)/sizeof(att[0]));

1671 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOn, sizeof(att)/sizeof(att[0]));

1672 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOff, sizeof(att)/sizeof(att[0]));

1673 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOff, sizeof(att)/sizeof(att[0]));

1674

1675 }

1676 /* case level */

1677 {

1678 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH";

1679 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"};

1680 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"};

1681 static const UColAttribute att[] = { UCOL_CASE_FIRST};

1682 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST};

1683 /static const UColAttributeValue valOff[] = { UCOL_OFF};/

1684 log_verbose("case level\n");

1685 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi t01[0]), att, valOn, sizeof(att)/sizeof(att[0]));

1686 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi t02[0]), att, valOn, sizeof(att)/sizeof(att[0]));

1687 /genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tli mit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));/

1688 /genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tli mit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));/

1689 }

1690

1691 }

1692 #endif

1693

1694 static void TestBocsuCoverage(void) {

1695 UErrorCode status = U_ZERO_ERROR;

1696 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u004 1";

1697 UChar test[256] = {0};

1698 uint32_t tlen = u_unescape(testString, test, 32);

1699 uint8_t key[256] = {0};

1700 uint32_t klen = 0;

1701

1702 UCollator *coll = ucol_open("", &status);

1703 if(U_SUCCESS(status)) {

1704 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

1705

1706 klen = ucol_getSortKey(coll, test, tlen, key, 256);

1707 (void)klen; /* Suppress set but not used warning. */

1708

1709 ucol_close(coll);

1710 } else {

1711 log_data_err("Couldn't open UCA\n");

1712 }

1713 }

1714

1715 static void TestVariableTopSetting(void) {

1716 UErrorCode status = U_ZERO_ERROR;

1717 uint32_t varTopOriginal = 0, varTop1, varTop2;

1718 UCollator *coll = ucol_open("", &status);

1719 if(U_SUCCESS(status)) {

1720

1721 static const UChar nul = 0;

1722 static const UChar space = 0x20;

1723 static const UChar dot = 0x2e; /* punctuation */

1724 static const UChar degree = 0xb0; /* symbol */

1725 static const UChar dollar = 0x24; /* currency symbol */

1726 static const UChar zero = 0x30; /* digit */

1727

1728 varTopOriginal = ucol_getVariableTop(coll, &status);

1729 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal);

1730 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

1731

1732 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);

1733 varTop2 = ucol_getVariableTop(coll, &status);

1734 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1);

1735 if(U_FAILURE(status) \|\| varTop1 != varTop2 \|\|

1736 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1737 ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1738 ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1739 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1740 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1741 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {

1742 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status ));

1743 }

1744

1745 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status);

1746 varTop2 = ucol_getVariableTop(coll, &status);

1747 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1);

1748 if(U_FAILURE(status) \|\| varTop1 != varTop2 \|\|

1749 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1750 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1751 ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1752 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1753 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1754 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {

1755 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status)) ;

1756 }

1757

1758 varTop1 = ucol_setVariableTop(coll, &degree, 1, &status);

1759 varTop2 = ucol_getVariableTop(coll, &status);

1760 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1);

1761 if(U_FAILURE(status) \|\| varTop1 != varTop2 \|\|

1762 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1763 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1764 !ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1765 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1766 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1767 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {

1768 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(statu s));

1769 }

1770

1771 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status);

1772 varTop2 = ucol_getVariableTop(coll, &status);

1773 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1);

1774 if(U_FAILURE(status) \|\| varTop1 != varTop2 \|\|

1775 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1776 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1777 !ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1778 !ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1779 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1780 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {

1781 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(statu s));

1782 }

1783

1784 log_verbose("Testing setting variable top to contractions\n");

1785 {

1786 UChar first[4] = { 0 };

1787 first[0] = 0x0040;

1788 first[1] = 0x0050;

1789 first[2] = 0x0000;

1790

1791 status = U_ZERO_ERROR;

1792 ucol_setVariableTop(coll, first, -1, &status);

1793

1794 if(U_SUCCESS(status)) {

1795 log_err("Invalid contraction succeded in setting variable top!\n");

1796 }

1797

1798 }

1799

1800 log_verbose("Test restoring variable top\n");

1801

1802 status = U_ZERO_ERROR;

1803 ucol_restoreVariableTop(coll, varTopOriginal, &status);

1804 if(varTopOriginal != ucol_getVariableTop(coll, &status)) {

1805 log_err("Couldn't restore old variable top\n");

1806 }

1807

1808 log_verbose("Testing calling with error set\n");

1809

1810 status = U_INTERNAL_PROGRAM_ERROR;

1811 varTop1 = ucol_setVariableTop(coll, &space, 1, &status);

1812 varTop2 = ucol_getVariableTop(coll, &status);

1813 ucol_restoreVariableTop(coll, varTop2, &status);

1814 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status);

1815 varTop2 = ucol_getVariableTop(NULL, &status);

1816 ucol_restoreVariableTop(NULL, varTop2, &status);

1817 if(status != U_INTERNAL_PROGRAM_ERROR) {

1818 log_err("Bad reaction to passed error!\n");

1819 }

1820 ucol_close(coll);

1821 } else {

1822 log_data_err("Couldn't open UCA collator\n");

1823 }

1824 }

1825

1826 static void TestMaxVariable() {

1827 UErrorCode status = U_ZERO_ERROR;

1828 UColReorderCode oldMax, max;

1829 UCollator *coll;

1830

1831 static const UChar nul = 0;

1832 static const UChar space = 0x20;

1833 static const UChar dot = 0x2e; /* punctuation */

1834 static const UChar degree = 0xb0; /* symbol */

1835 static const UChar dollar = 0x24; /* currency symbol */

1836 static const UChar zero = 0x30; /* digit */

1837

1838 coll = ucol_open("", &status);

1839 if(U_FAILURE(status)) {

1840 log_data_err("Couldn't open root collator\n");

1841 return;

1842 }

1843

1844 oldMax = ucol_getMaxVariable(coll);

1845 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax);

1846 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

1847

1848 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);

1849 max = ucol_getMaxVariable(coll);

1850 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max);

1851 if(U_FAILURE(status) \|\| max != UCOL_REORDER_CODE_SPACE \|\|

1852 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1853 ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1854 ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1855 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1856 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1857 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) {

1858 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status ));

1859 }

1860

1861 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status);

1862 max = ucol_getMaxVariable(coll);

1863 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max);

1864 if(U_FAILURE(status) \|\| max != UCOL_REORDER_CODE_PUNCTUATION \|\|

1865 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1866 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1867 ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1868 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1869 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1870 ucol_greaterOrEqual(coll, &dot, 1, &degree, 1)) {

1871 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName( status));

1872 }

1873

1874 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status);

1875 max = ucol_getMaxVariable(coll);

1876 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max);

1877 if(U_FAILURE(status) \|\| max != UCOL_REORDER_CODE_SYMBOL \|\|

1878 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1879 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1880 !ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1881 ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1882 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1883 ucol_greaterOrEqual(coll, &degree, 1, &dollar, 1)) {

1884 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(statu s));

1885 }

1886

1887 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status);

1888 max = ucol_getMaxVariable(coll);

1889 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max);

1890 if(U_FAILURE(status) \|\| max != UCOL_REORDER_CODE_CURRENCY \|\|

1891 !ucol_equal(coll, &nul, 0, &space, 1) \|\|

1892 !ucol_equal(coll, &nul, 0, &dot, 1) \|\|

1893 !ucol_equal(coll, &nul, 0, &degree, 1) \|\|

1894 !ucol_equal(coll, &nul, 0, &dollar, 1) \|\|

1895 ucol_equal(coll, &nul, 0, &zero, 1) \|\|

1896 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) {

1897 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(sta tus));

1898 }

1899

1900 log_verbose("Test restoring maxVariable\n");

1901 status = U_ZERO_ERROR;

1902 ucol_setMaxVariable(coll, oldMax, &status);

1903 if(oldMax != ucol_getMaxVariable(coll)) {

1904 log_err("Couldn't restore old maxVariable\n");

1905 }

1906

1907 log_verbose("Testing calling with error set\n");

1908 status = U_INTERNAL_PROGRAM_ERROR;

1909 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status);

1910 max = ucol_getMaxVariable(coll);

1911 if(max != oldMax \|\| status != U_INTERNAL_PROGRAM_ERROR) {

1912 log_err("Bad reaction to passed error!\n");

1913 }

1914 ucol_close(coll);

1915 }

1916

1917 static void TestNonChars(void) {

1918 static const char *test[] = {

1919 "\\u0000", /* ignorable */

1920 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei ghts */

1921 "\\uFDD0", "\\uFDEF",

1922 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like unassigned, */

1923 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */

1924 "\\U0003FFFE", "\\U0003FFFF",

1925 "\\U0004FFFE", "\\U0004FFFF",

1926 "\\U0005FFFE", "\\U0005FFFF",

1927 "\\U0006FFFE", "\\U0006FFFF",

1928 "\\U0007FFFE", "\\U0007FFFF",

1929 "\\U0008FFFE", "\\U0008FFFF",

1930 "\\U0009FFFE", "\\U0009FFFF",

1931 "\\U000AFFFE", "\\U000AFFFF",

1932 "\\U000BFFFE", "\\U000BFFFF",

1933 "\\U000CFFFE", "\\U000CFFFF",

1934 "\\U000DFFFE", "\\U000DFFFF",

1935 "\\U000EFFFE", "\\U000EFFFF",

1936 "\\U000FFFFE", "\\U000FFFFF",

1937 "\\U0010FFFE", "\\U0010FFFF",

1938 "\\uFFFF" /* special character with maximum primary weight */

1939 };

1940 UErrorCode status = U_ZERO_ERROR;

1941 UCollator *coll = ucol_open("en_US", &status);

1942

1943 log_verbose("Test non characters\n");

1944

1945 if(U_SUCCESS(status)) {

1946 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS);

1947 } else {

1948 log_err_status(status, "Unable to open collator\n");

1949 }

1950

1951 ucol_close(coll);

1952 }

1953

1954 static void TestExtremeCompression(void) {

1955 static char *test[4];

1956 int32_t j = 0, i = 0;

1957

1958 for(i = 0; i<4; i++) {

1959 test[i] = (char )malloc(2048sizeof(char));

1960 }

1961

1962 for(j = 20; j < 500; j++) {

1963 for(i = 0; i<4; i++) {

1964 uprv_memset(test[i], 'a', (j-1)*sizeof(char));

1965 test[i][j-1] = (char)('a'+i);

1966 test[i][j] = 0;

1967 }

1968 genericLocaleStarter("en_US", (const char **)test, 4);

1969 }

1970

1971

1972 for(i = 0; i<4; i++) {

1973 free(test[i]);

1974 }

1975 }

1976

1977 #if 0

1978 static void TestExtremeCompression(void) {

1979 static char *test[4];

1980 int32_t j = 0, i = 0;

1981 UErrorCode status = U_ZERO_ERROR;

1982 UCollator *coll = ucol_open("en_US", status);

1983 for(i = 0; i<4; i++) {

1984 test[i] = (char )malloc(2048sizeof(char));

1985 }

1986 for(j = 10; j < 2048; j++) {

1987 for(i = 0; i<4; i++) {

1988 uprv_memset(test[i], 'a', (j-2)*sizeof(char));

1989 test[i][j-1] = (char)('a'+i);

1990 test[i][j] = 0;

1991 }

1992 }

1993 genericLocaleStarter("en_US", (const char **)test, 4);

1994

1995 for(j = 10; j < 2048; j++) {

1996 for(i = 0; i<1; i++) {

1997 uprv_memset(test[i], 'a', (j-1)*sizeof(char));

1998 test[i][j] = 0;

1999 }

2000 }

2001 for(i = 0; i<4; i++) {

2002 free(test[i]);

2003 }

2004 }

2005 #endif

2006

2007 static void TestSurrogates(void) {

2008 static const char *test[] = {

2009 "z","\\ud900\\udc25", "\\ud805\\udc50",

2010 "\\ud800\\udc00y", "\\ud800\\udc00r",

2011 "\\ud800\\udc00f", "\\ud800\\udc00",

2012 "\\ud800\\udc00c", "\\ud800\\udc00b",

2013 "\\ud800\\udc00fa", "\\ud800\\udc00fb",

2014 "\\ud800\\udc00a",

2015 "c", "b"

2016 };

2017

2018 static const char *rule =

2019 "&z < \\ud900\\udc25 < \\ud805\\udc50"

2020 "< \\ud800\\udc00y < \\ud800\\udc00r"

2021 "< \\ud800\\udc00f << \\ud800\\udc00"

2022 "< \\ud800\\udc00fa << \\ud800\\udc00fb"

2023 "< \\ud800\\udc00a < c < b" ;

2024

2025 genericRulesStarter(rule, test, 14);

2026 }

2027

2028 /* This is a test for prefix implementation, used by JIS X 4061 collation rules */

2029 static void TestPrefix(void) {

2030 uint32_t i;

2031

2032 static const struct {

2033 const char *rules;

2034 const char *data[50];

2035 const uint32_t len;

2036 } tests[] = {

2037 { "&z <<< z\|a",

2038 {"zz", "za"}, 2 },

2039

2040 { "&z <<< z\| a",

2041 {"zz", "za"}, 2 },

2042 { "[strength I]"

2043 "&a=\\ud900\\udc25"

2044 "&z<<<\\ud900\\udc25\|a",

2045 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 },

2046 };

2047

2048

2049 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

2050 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2051 }

2052 }

2053

2054 /* This test uses data suplied by Masashiko Maedera to test the implementation * /

2055 /* JIS X 4061 collation order implementation * /

2056 static void TestNewJapanese(void) {

2057

2058 static const char * const test1[] = {

2059 "\\u30b7\\u30e3\\u30fc\\u30ec",

2060 "\\u30b7\\u30e3\\u30a4",

2061 "\\u30b7\\u30e4\\u30a3",

2062 "\\u30b7\\u30e3\\u30ec",

2063 "\\u3061\\u3087\\u3053",

2064 "\\u3061\\u3088\\u3053",

2065 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8",

2066 "\\u3066\\u30fc\\u305f",

2067 "\\u30c6\\u30fc\\u30bf",

2068 "\\u30c6\\u30a7\\u30bf",

2069 "\\u3066\\u3048\\u305f",

2070 "\\u3067\\u30fc\\u305f",

2071 "\\u30c7\\u30fc\\u30bf",

2072 "\\u30c7\\u30a7\\u30bf",

2073 "\\u3067\\u3048\\u305f",

2074 "\\u3066\\u30fc\\u305f\\u30fc",

2075 "\\u30c6\\u30fc\\u30bf\\u30a1",

2076 "\\u30c6\\u30a7\\u30bf\\u30fc",

2077 "\\u3066\\u3047\\u305f\\u3041",

2078 "\\u3066\\u3048\\u305f\\u30fc",

2079 "\\u3067\\u30fc\\u305f\\u30fc",

2080 "\\u30c7\\u30fc\\u30bf\\u30a1",

2081 "\\u3067\\u30a7\\u305f\\u30a1",

2082 "\\u30c7\\u3047\\u30bf\\u3041",

2083 "\\u30c7\\u30a8\\u30bf\\u30a2",

2084 "\\u3072\\u3086",

2085 "\\u3073\\u3085\\u3042",

2086 "\\u3074\\u3085\\u3042",

2087 "\\u3073\\u3085\\u3042\\u30fc",

2088 "\\u30d3\\u30e5\\u30a2\\u30fc",

2089 "\\u3074\\u3085\\u3042\\u30fc",

2090 "\\u30d4\\u30e5\\u30a2\\u30fc",

2091 "\\u30d2\\u30e5\\u30a6",

2092 "\\u30d2\\u30e6\\u30a6",

2093 "\\u30d4\\u30e5\\u30a6\\u30a2",

2094 "\\u3073\\u3085\\u30fc\\u3042\\u30fc",

2095 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc",

2096 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc",

2097 "\\u3072\\u3085\\u3093",

2098 "\\u3074\\u3085\\u3093",

2099 "\\u3075\\u30fc\\u308a",

2100 "\\u30d5\\u30fc\\u30ea",

2101 "\\u3075\\u3045\\u308a",

2102 "\\u3075\\u30a5\\u308a",

2103 "\\u3075\\u30a5\\u30ea",

2104 "\\u30d5\\u30a6\\u30ea",

2105 "\\u3076\\u30fc\\u308a",

2106 "\\u30d6\\u30fc\\u30ea",

2107 "\\u3076\\u3045\\u308a",

2108 "\\u30d6\\u30a5\\u308a",

2109 "\\u3077\\u3046\\u308a",

2110 "\\u30d7\\u30a6\\u30ea",

2111 "\\u3075\\u30fc\\u308a\\u30fc",

2112 "\\u30d5\\u30a5\\u30ea\\u30fc",

2113 "\\u3075\\u30a5\\u308a\\u30a3",

2114 "\\u30d5\\u3045\\u308a\\u3043",

2115 "\\u30d5\\u30a6\\u30ea\\u30fc",

2116 "\\u3075\\u3046\\u308a\\u3043",

2117 "\\u30d6\\u30a6\\u30ea\\u30a4",

2118 "\\u3077\\u30fc\\u308a\\u30fc",

2119 "\\u3077\\u30a5\\u308a\\u30a4",

2120 "\\u3077\\u3046\\u308a\\u30fc",

2121 "\\u30d7\\u30a6\\u30ea\\u30a4",

2122 "\\u30d5\\u30fd",

2123 "\\u3075\\u309e",

2124 "\\u3076\\u309d",

2125 "\\u3076\\u3075",

2126 "\\u3076\\u30d5",

2127 "\\u30d6\\u3075",

2128 "\\u30d6\\u30d5",

2129 "\\u3076\\u309e",

2130 "\\u3076\\u3077",

2131 "\\u30d6\\u3077",

2132 "\\u3077\\u309d",

2133 "\\u30d7\\u30fd",

2134 "\\u3077\\u3075",

2135 };

2136

2137 static const char *test2[] = {

2138 "\\u306f\\u309d", /* H\\u309d */

2139 "\\u30cf\\u30fd", /* K\\u30fd */

2140 "\\u306f\\u306f", /* HH */

2141 "\\u306f\\u30cf", /* HK */

2142 "\\u30cf\\u30cf", /* KK */

2143 "\\u306f\\u309e", /* H\\u309e */

2144 "\\u30cf\\u30fe", /* K\\u30fe */

2145 "\\u306f\\u3070", /* HH\\u309b */

2146 "\\u30cf\\u30d0", /* KK\\u309b */

2147 "\\u306f\\u3071", /* HH\\u309c */

2148 "\\u30cf\\u3071", /* KH\\u309c */

2149 "\\u30cf\\u30d1", /* KK\\u309c */

2150 "\\u3070\\u309d", /* H\\u309b\\u309d */

2151 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */

2152 "\\u3070\\u306f", /* H\\u309bH */

2153 "\\u30d0\\u30cf", /* K\\u309bK */

2154 "\\u3070\\u309e", /* H\\u309b\\u309e */

2155 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */

2156 "\\u3070\\u3070", /* H\\u309bH\\u309b */

2157 "\\u30d0\\u3070", /* K\\u309bH\\u309b */

2158 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */

2159 "\\u3070\\u3071", /* H\\u309bH\\u309c */

2160 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */

2161 "\\u3071\\u309d", /* H\\u309c\\u309d */

2162 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */

2163 "\\u3071\\u306f", /* H\\u309cH */

2164 "\\u30d1\\u30cf", /* K\\u309cK */

2165 "\\u3071\\u3070", /* H\\u309cH\\u309b */

2166 "\\u3071\\u30d0", /* H\\u309cK\\u309b */

2167 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */

2168 "\\u3071\\u3071", /* H\\u309cH\\u309c */

2169 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */

2170 };

2171 /*

2172 static const char *test3[] = {

2173 "\\u221er\\u221e",

2174 "\\u221eR#",

2175 "\\u221et\\u221e",

2176 "#r\\u221e",

2177 "#R#",

2178 "#t%",

2179 "#T%",

2180 "8t\\u221e",

2181 "8T\\u221e",

2182 "8t#",

2183 "8T#",

2184 "8t%",

2185 "8T%",

2186 "8t8",

2187 "8T8",

2188 "\\u03c9r\\u221e",

2189 "\\u03a9R%",

2190 "rr\\u221e",

2191 "rR\\u221e",

2192 "Rr\\u221e",

2193 "RR\\u221e",

2194 "RT%",

2195 "rt8",

2196 "tr\\u221e",

2197 "tr8",

2198 "TR8",

2199 "tt8",

2200 "\\u30b7\\u30e3\\u30fc\\u30ec",

2201 };

2202 */

2203 static const UColAttribute att[] = { UCOL_STRENGTH };

2204 static const UColAttributeValue val[] = { UCOL_QUATERNARY };

2205

2206 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HAND LING};

2207 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED };

2208

2209 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a tt, val, 1);

2210 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a tt, val, 1);

2211 /genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));/

2212 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a ttShifted, valShifted, 2);

2213 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a ttShifted, valShifted, 2);

2214 }

2215

2216 static void TestStrCollIdenticalPrefix(void) {

2217 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71";

2218 const char* test[] = {

2219 "ab\\ud9b0\\udc70",

2220 "ab\\ud9b0\\udc71"

2221 };

2222 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_E QUAL);

2223 }

2224 /* Contractions should have all their canonically equivalent */

2225 /* strings included */

2226 static void TestContractionClosure(void) {

2227 static const struct {

2228 const char *rules;

2229 const char *data[10];

2230 const uint32_t len;

2231 } tests[] = {

2232 { "&b=\\u00e4\\u00e4",

2233 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\ \u00e4" }, 5},

2234 { "&b=\\u00C5",

2235 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4},

2236 };

2237 uint32_t i;

2238

2239

2240 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

2241 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, U COL_EQUAL);

2242 }

2243 }

2244

2245 /* This tests also fails*/

2246 static void TestBeforePrefixFailure(void) {

2247 static const struct {

2248 const char *rules;

2249 const char *data[10];

2250 const uint32_t len;

2251 } tests[] = {

2252 { "&g <<< a"

2253 "&[before 3]\\uff41 <<< x",

2254 {"x", "\\uff41"}, 2 },

2255 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

2256 "&\\u30A8=\\u30A8=\\u3048=\\uff74"

2257 "&[before 3]\\u30a7<<<\\u30a9",

2258 {"\\u30a9", "\\u30a7"}, 2 },

2259 { "&[before 3]\\u30a7<<<\\u30a9"

2260 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

2261 "&\\u30A8=\\u30A8=\\u3048=\\uff74",

2262 {"\\u30a9", "\\u30a7"}, 2 },

2263 };

2264 uint32_t i;

2265

2266

2267 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

2268 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2269 }

2270

2271 #if 0

2272 const char* rule1 =

2273 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

2274 "&\\u30A8=\\u30A8=\\u3048=\\uff74"

2275 "&[before 3]\\u30a7<<<\\u30c6\|\\u30fc";

2276 const char* rule2 =

2277 "&[before 3]\\u30a7<<<\\u30c6\|\\u30fc"

2278 "&\\u30A7=\\u30A7=\\u3047=\\uff6a"

2279 "&\\u30A8=\\u30A8=\\u3048=\\uff74";

2280 const char* test[] = {

2281 "\\u30c6\\u30fc\\u30bf",

2282 "\\u30c6\\u30a7\\u30bf",

2283 };

2284 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0]));

2285 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0]));

2286 /* this piece of code should be in some sort of verbose mode */

2287 /* it gets the collation elements for elements and prints them */

2288 /* This is useful when trying to see whether the problem is */

2289 {

2290 UErrorCode status = U_ZERO_ERROR;

2291 uint32_t i = 0;

2292 UCollationElements *it = NULL;

2293 uint32_t CE;

2294 UChar string[256];

2295 uint32_t uStringLen;

2296 UCollator *coll = NULL;

2297

2298 uStringLen = u_unescape(rule1, string, 256);

2299

2300 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

2301

2302 /coll = ucol_open("ja_JP_JIS", &status);/

2303 it = ucol_openElements(coll, string, 0, &status);

2304

2305 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) {

2306 log_verbose("%s\n", test[i]);

2307 uStringLen = u_unescape(test[i], string, 256);

2308 ucol_setText(it, string, uStringLen, &status);

2309

2310 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) {

2311 log_verbose("%08X\n", CE);

2312 }

2313 log_verbose("\n");

2314

2315 }

2316

2317 ucol_closeElements(it);

2318 ucol_close(coll);

2319 }

2320 #endif

2321 }

2322

2323 static void TestPrefixCompose(void) {

2324 const char* rule1 =

2325 "&\\u30a7<<<\\u30ab\|\\u30fc=\\u30ac\|\\u30fc";

2326 /*

2327 const char* test[] = {

2328 "\\u30c6\\u30fc\\u30bf",

2329 "\\u30c6\\u30a7\\u30bf",

2330 };

2331 */

2332 {

2333 UErrorCode status = U_ZERO_ERROR;

2334 /uint32_t i = 0;/

2335 /UCollationElements it = NULL;*/

2336 /* uint32_t CE;*/

2337 UChar string[256];

2338 uint32_t uStringLen;

2339 UCollator *coll = NULL;

2340

2341 uStringLen = u_unescape(rule1, string, 256);

2342

2343 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

2344 ucol_close(coll);

2345 }

2346

2347

2348 }

2349

2350 /*

2351 [last variable] last variable value

2352 [last primary ignorable] largest CE for primary ignorable

2353 [last secondary ignorable] largest CE for secondary ignorable

2354 [last tertiary ignorable] largest CE for tertiary ignorable

2355 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8 )

2356 */

2357

2358 static void TestRuleOptions(void) {

2359 /* values here are hardcoded and are correct for the current UCA

2360 * when the UCA changes, one might be forced to change these

2361 * values.

2362 */

2363

2364 /*

2365 * These strings contain the last character before [variable top]

2366 * and the first and second characters (by primary weights) after it.

2367 * See FractionalUCA.txt. For example:

2368 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICA TOR

2369 [variable top = 0C FE]

2370 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT

2371 and

2372 00B4; [0D 0C, 05, 05]

2373 *

2374 * Note: Starting with UCA 6.0, the [variable top] collation element

2375 * is not the weight of any character or string,

2376 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable].

2377 */

2378 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F"

2379 #define FIRST_REGULAR_CHAR_STRING "\\u0060"

2380 #define SECOND_REGULAR_CHAR_STRING "\\u00B4"

2381

2382 /*

2383 * This string has to match the character that has the [last regular] weight

2384 * which changes with each UCA version.

2385 * See the bottom of FractionalUCA.txt which says something like

2386 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032

2387 *

2388 * Note: Starting with UCA 6.0, the [last regular] collation element

2389 * is not the weight of any character or string,

2390 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular].

2391 */

2392 #define LAST_REGULAR_CHAR_STRING "\\U0001342E"

2393

2394 static const struct {

2395 const char *rules;

2396 const char *data[10];

2397 const uint32_t len;

2398 } tests[] = {

2399 #if 0

2400 /* "you cannot go before ...": The parser now sets an error for such nonsens ical rules. */

2401 /* - all befores here amount to zero */

2402 { "&[before 3][first tertiary ignorable]<<<a",

2403 { "\\u0000", "a"}, 2

2404 }, /* you cannot go before first tertiary ignorable */

2405

2406 { "&[before 3][last tertiary ignorable]<<<a",

2407 { "\\u0000", "a"}, 2

2408 }, /* you cannot go before last tertiary ignorable */

2409 #endif

2410 /*

2411 * However, there is a real secondary ignorable (artificial addition in Frac tionalUCA.txt),

2412 * and it is possible to "go before" that.

2413 */

2414 { "&[before 3][first secondary ignorable]<<<a",

2415 { "\\u0000", "a"}, 2

2416 },

2417

2418 { "&[before 3][last secondary ignorable]<<<a",

2419 { "\\u0000", "a"}, 2

2420 },

2421

2422 /* 'normal' befores */

2423

2424 /*

2425 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt,

2426 * it is not possible to tailor &[first primary ignorable]<a or &[last prima ry ignorable]<a

2427 * because there is no tailoring space before that boundary.

2428 * Made the tests work by tailoring to a space instead.

2429 */

2430 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first pri mary ignorable]<a */

2431 { "c", "b", "\\u0332", "a" }, 4

2432 },

2433

2434 /* we don't have a code point that corresponds to

2435 * the last primary ignorable

2436 */

2437 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last prima ry ignorable]<a */

2438 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5

2439 },

2440

2441 { "&[before 3][first variable]<<<c<<<b &[first variable]<a",

2442 { "c", "b", "\\u0009", "a", "\\u000a" }, 5

2443 },

2444

2445 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ",

2446 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_ REGULAR_CHAR_STRING }, 5

2447 },

2448

2449 { "&[first regular]<a"

2450 "&[before 1][first regular]<b",

2451 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4

2452 },

2453

2454 { "&[before 1][last regular]<b"

2455 "&[last regular]<a",

2456 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" }, 4

2457 },

2458

2459 { "&[before 1][first implicit]<b"

2460 "&[first implicit]<a",

2461 { "b", "\\u4e00", "a", "\\u4e01"}, 4

2462 },

2463 #if 0 /* The current builder does not support tailoring to unassigned-implicit CEs (seems unnecessary, adds complexity). */

2464 { "&[before 1][last implicit]<b"

2465 "&[last implicit]<a",

2466 { "b", "\\U0010FFFD", "a" }, 3

2467 },

2468 #endif

2469 { "&[last variable]<z"

2470 "&' '<x" /* was &[last primary ignorable]<x, see above */

2471 "&[last secondary ignorable]<<y"

2472 "&[last tertiary ignorable]<<<w"

2473 "&[top]<u",

2474 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u" }, 7

2475 }

2476

2477 };

2478 uint32_t i;

2479

2480 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

2481 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2482 }

2483 }

2484

2485

2486 static void TestOptimize(void) {

2487 /* this is not really a test - just trying out

2488 * whether copying of UCA contents will fail

2489 * Cannot really test, since the functionality

2490 * remains the same.

2491 */

2492 static const struct {

2493 const char *rules;

2494 const char *data[10];

2495 const uint32_t len;

2496 } tests[] = {

2497 /* - all befores here amount to zero */

2498 { "[optimize [\\uAC00-\\uD7FF]]",

2499 { "a", "b"}, 2}

2500 };

2501 uint32_t i;

2502

2503 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) {

2504 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len);

2505 }

2506 }

2507

2508 /*

2509 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator and the UTF-8 iterator.

2510 weiv ucol_strcollIter?

2511 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021

2512 weiv these are the input strings?

2513 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on , we have s1 > s2

2514 weiv will check - could be a problem with utf-8 iterator

2515 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2 = eda08021, we have s1 < s2

2516 weiv hmmm

2517 cycheng@ca.ibm.c... note that we have a standalone high surrogate

2518 weiv that doesn't sound right

2519 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000

2520 weiv so you have two strings, you convert them to utf-8 and to utf-16BE

2521 cycheng@ca.ibm.c... yes

2522 weiv and then do the comparison

2523 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other case the input strings are in utf-16be

2524 weiv utf-16 strings look like a little endian ones in the example you sent me

2525 weiv It could be a bug - let me try to test it out

2526 cycheng@ca.ibm.c... ok

2527 cycheng@ca.ibm.c... we can wait till the conf. call

2528 cycheng@ca.ibm.c... next weke

2529 weiv that would be great

2530 weiv hmmm

2531 weiv I might be wrong

2532 weiv let me play with it some more

2533 cycheng@ca.ibm.c... ok

2534 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both are in utf-16be

2535 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that' s built for db2

2536 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be

2537 weiv ok

2538 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data

2539 weiv thanks

2540 cycheng@ca.ibm.c... the 4 strings we sent are just samples

2541 */

2542 #if 0

2543 static void Alexis(void) {

2544 UErrorCode status = U_ZERO_ERROR;

2545 UCollator *coll = ucol_open("", &status);

2546

2547

2548 const char utf16be[2][4] = {

2549 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 },

2550 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 }

2551 };

2552

2553 const char utf8[2][4] = {

2554 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 },

2555 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 },

2556 };

2557

2558 UCharIterator iterU161, iterU162;

2559 UCharIterator iterU81, iterU82;

2560

2561 UCollationResult resU16, resU8;

2562

2563 uiter_setUTF16BE(&iterU161, utf16be[0], 4);

2564 uiter_setUTF16BE(&iterU162, utf16be[1], 4);

2565

2566 uiter_setUTF8(&iterU81, utf8[0], 4);

2567 uiter_setUTF8(&iterU82, utf8[1], 4);

2568

2569 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

2570

2571 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status);

2572 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status);

2573

2574

2575 if(resU16 != resU8) {

2576 log_err("different results\n");

2577 }

2578

2579 ucol_close(coll);

2580 }

2581 #endif

2582

2583 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256

2584 static void Alexis2(void) {

2585 UErrorCode status = U_ZERO_ERROR;

2586 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER _SIZE];

2587 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUF FER_SIZE];

2588 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SI ZE];

2589 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8 LenT = 0;

2590

2591 UConverter *conv = NULL;

2592

2593 UCharIterator U16BEItS, U16BEItT;

2594 UCharIterator U8ItS, U8ItT;

2595

2596 UCollationResult resU16, resU16BE, resU8;

2597

2598 static const char* const pairs[][2] = {

2599 { "\\ud800\\u0021", "\\uFFFC\\u0062"},

2600 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" },

2601 { "\\u0E40\\u0021", "\\u00A1\\u0021"},

2602 { "\\u0E40\\u0021", "\\uFE57\\u0062"},

2603 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"},

2604 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"},

2605 { "\\u0020", "\\u0020\\u0000"}

2606 /*

2607 5F20 (my result here)

2608 5F204E008E3F

2609 5F20 (your result here)

2610 */

2611 };

2612

2613 int32_t i = 0;

2614

2615 UCollator *coll = ucol_open("", &status);

2616 if(status == U_FILE_ACCESS_ERROR) {

2617 log_data_err("Is your data around?\n");

2618 return;

2619 } else if(U_FAILURE(status)) {

2620 log_err("Error opening collator\n");

2621 return;

2622 }

2623 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

2624 conv = ucnv_open("UTF16BE", &status);

2625 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {

2626 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE);

2627 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE);

2628

2629 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT);

2630

2631 log_verbose("Result of strcoll is %i\n", resU16);

2632

2633 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Source, U16LenS, &status);

2634 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE, U16Target, U16LenT, &status);

2635 (void)U16BELenS; /* Suppress set but not used warnings. */

2636 (void)U16BELenT;

2637

2638 /* use the original sizes, as the result from converter is in bytes */

2639 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS);

2640 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT);

2641

2642 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status);

2643

2644 log_verbose("Result of U16BE is %i\n", resU16BE);

2645

2646 if(resU16 != resU16BE) {

2647 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", p airs[i][0], pairs[i][1]);

2648 }

2649

2650 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16Le nS, &status);

2651 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16Le nT, &status);

2652

2653 uiter_setUTF8(&U8ItS, U8Source, U8LenS);

2654 uiter_setUTF8(&U8ItT, U8Target, U8LenT);

2655

2656 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status);

2657

2658 if(resU16 != resU8) {

2659 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pair s[i][0], pairs[i][1]);

2660 }

2661

2662 }

2663

2664 ucol_close(coll);

2665 ucnv_close(conv);

2666 }

2667

2668 static void TestHebrewUCA(void) {

2669 UErrorCode status = U_ZERO_ERROR;

2670 static const char *first[] = {

2671 "d790d6b8d79cd795d6bcd7a9",

2672 "d790d79cd79ed7a7d799d799d7a1",

2673 "d790d6b4d79ed795d6bcd7a9",

2674 };

2675

2676 char utf8String[3][256];

2677 UChar utf16String[3][256];

2678

2679 int32_t i = 0, j = 0;

2680 int32_t sizeUTF8[3];

2681 int32_t sizeUTF16[3];

2682

2683 UCollator *coll = ucol_open("", &status);

2684 if (U_FAILURE(status)) {

2685 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(st atus));

2686 return;

2687 }

2688 /ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);/

2689

2690 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) {

2691 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status);

2692 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i] , &status);

2693 log_verbose("%i: ");

2694 for(j = 0; j < sizeUTF16[i]; j++) {

2695 /log_verbose("\\u%04X", utf16String[i][j]);/

2696 log_verbose("%04X", utf16String[i][j]);

2697 }

2698 log_verbose("\n");

2699 }

2700 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) {

2701 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) {

2702 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS);

2703 }

2704 }

2705

2706 ucol_close(coll);

2707

2708 }

2709

2710 static void TestPartialSortKeyTermination(void) {

2711 static const char* cases[] = {

2712 "\\u1234\\u1234\\udc00",

2713 "\\udc00\\ud800\\ud800"

2714 };

2715

2716 int32_t i;

2717

2718 UErrorCode status = U_ZERO_ERROR;

2719

2720 UCollator *coll = ucol_open("", &status);

2721

2722 UCharIterator iter;

2723

2724 UChar currCase[256];

2725 int32_t length = 0;

2726 int32_t pKeyLen = 0;

2727

2728 uint8_t key[256];

2729

2730 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {

2731 uint32_t state[2] = {0, 0};

2732 length = u_unescape(cases[i], currCase, 256);

2733 uiter_setString(&iter, currCase, length);

2734 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status);

2735 (void)pKeyLen; /* Suppress set but not used warning. */

2736

2737 log_verbose("Done\n");

2738

2739 }

2740 ucol_close(coll);

2741 }

2742

2743 static void TestSettings(void) {

2744 static const char* cases[] = {

2745 "apple",

2746 "Apple"

2747 };

2748

2749 static const char* locales[] = {

2750 "",

2751 "en"

2752 };

2753

2754 UErrorCode status = U_ZERO_ERROR;

2755

2756 int32_t i = 0, j = 0;

2757

2758 UChar source[256], target[256];

2759 int32_t sLen = 0, tLen = 0;

2760

2761 UCollator *collateObject = NULL;

2762 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) {

2763 collateObject = ucol_open(locales[i], &status);

2764 ucol_setStrength(collateObject, UCOL_PRIMARY);

2765 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status);

2766 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) {

2767 sLen = u_unescape(cases[j-1], source, 256);

2768 source[sLen] = 0;

2769 tLen = u_unescape(cases[j], target, 256);

2770 source[tLen] = 0;

2771 doTest(collateObject, source, target, UCOL_EQUAL);

2772 }

2773 ucol_close(collateObject);

2774 }

2775 }

2776

2777 static int32_t TestEqualsForCollator(const char* locName, UCollator source, UCo llator target) {

2778 UErrorCode status = U_ZERO_ERROR;

2779 int32_t errorNo = 0;

2780 const UChar *sourceRules = NULL;

2781 int32_t sourceRulesLen = 0;

2782 UParseError parseError;

2783 UColAttributeValue french = UCOL_OFF;

2784

2785 if(!ucol_equals(source, target)) {

2786 log_err("Same collators, different address not equal\n");

2787 errorNo++;

2788 }

2789 ucol_close(target);

2790 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &st atus)) == 0) {

2791 target = ucol_safeClone(source, NULL, NULL, &status);

2792 if(U_FAILURE(status)) {

2793 log_err("Error creating clone\n");

2794 errorNo++;

2795 return errorNo;

2796 }

2797 if(!ucol_equals(source, target)) {

2798 log_err("Collator different from it's clone\n");

2799 errorNo++;

2800 }

2801 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status);

2802 if(french == UCOL_ON) {

2803 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);

2804 } else {

2805 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status);

2806 }

2807 if(U_FAILURE(status)) {

2808 log_err("Error setting attributes\n");

2809 errorNo++;

2810 return errorNo;

2811 }

2812 if(ucol_equals(source, target)) {

2813 log_err("Collators same even when options changed\n");

2814 errorNo++;

2815 }

2816 ucol_close(target);

2817

2818 sourceRules = ucol_getRules(source, &sourceRulesLen);

2819 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_ DEFAULT, &parseError, &status);

2820 if(U_FAILURE(status)) {

2821 log_err("Error instantiating target from rules - %s\n", u_errorName( status));

2822 errorNo++;

2823 return errorNo;

2824 }

2825 /* Note: The tailoring rule string is an optional data item. */

2826 if(!ucol_equals(source, target) && sourceRulesLen != 0) {

2827 log_err("Collator different from collator that was created from the same rules\n");

2828 errorNo++;

2829 }

2830 ucol_close(target);

2831 }

2832 return errorNo;

2833 }

2834

2835

2836 static void TestEquals(void) {

2837 /* ucol_equals is not currently a public API. There is a chance that it will become

2838 * something like this.

2839 */

2840 /* test whether the two collators instantiated from the same locale are equa l */

2841 UErrorCode status = U_ZERO_ERROR;

2842 UParseError parseError;

2843 int32_t noOfLoc = uloc_countAvailable();

2844 const char *locName = NULL;

2845 UCollator source = NULL, target = NULL;

2846 int32_t i = 0;

2847

2848 const char* rules[] = {

2849 "&l < lj <<< Lj <<< LJ",

2850 "&n < nj <<< Nj <<< NJ",

2851 "&ae <<< \\u00e4",

2852 "&AE <<< \\u00c4"

2853 };

2854 /*

2855 const char* badRules[] = {

2856 "&l <<< Lj",

2857 "&n < nj <<< nJ <<< NJ",

2858 "&a <<< \\u00e4",

2859 "&AE <<< \\u00c4 <<< x"

2860 };

2861 */

2862

2863 UChar sourceRules[1024], targetRules[1024];

2864 int32_t sourceRulesSize = 0, targetRulesSize = 0;

2865 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]);

2866

2867 for(i = 0; i < rulesSize; i++) {

2868 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 102 4 - sourceRulesSize);

2869 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRu lesSize, 1024 - targetRulesSize);

2870 }

2871

2872 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEF AULT, &parseError, &status);

2873 if(status == U_FILE_ACCESS_ERROR) {

2874 log_data_err("Is your data around?\n");

2875 return;

2876 } else if(U_FAILURE(status)) {

2877 log_err("Error opening collator\n");

2878 return;

2879 }

2880 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEF AULT, &parseError, &status);

2881 if(!ucol_equals(source, target)) {

2882 log_err("Equivalent collators not equal!\n");

2883 }

2884 ucol_close(source);

2885 ucol_close(target);

2886

2887 source = ucol_open("root", &status);

2888 target = ucol_open("root", &status);

2889 log_verbose("Testing root\n");

2890 if(!ucol_equals(source, source)) {

2891 log_err("Same collator not equal\n");

2892 }

2893 if(TestEqualsForCollator("root", source, target)) {

2894 log_err("Errors for root\n");

2895 }

2896 ucol_close(source);

2897

2898 for(i = 0; i<noOfLoc; i++) {

2899 status = U_ZERO_ERROR;

2900 locName = uloc_getAvailable(i);

2901 /if(hasCollationElements(locName)) {/

2902 log_verbose("Testing equality for locale %s\n", locName);

2903 source = ucol_open(locName, &status);

2904 target = ucol_open(locName, &status);

2905 if (U_FAILURE(status)) {

2906 log_err("Error opening collator for locale %s %s\n", locName, u_err orName(status));

2907 continue;

2908 }

2909 if(TestEqualsForCollator(locName, source, target)) {

2910 log_err("Errors for locale %s\n", locName);

2911 }

2912 ucol_close(source);

2913 /}/

2914 }

2915 }

2916

2917 static void TestJ2726(void) {

2918 UChar a[2] = { 0x61, 0x00 }; /"a"/

2919 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /"a "/

2920 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /" a"/

2921 UErrorCode status = U_ZERO_ERROR;

2922 UCollator *coll = ucol_open("en", &status);

2923 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);

2924 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

2925 doTest(coll, a, aSpace, UCOL_EQUAL);

2926 doTest(coll, aSpace, a, UCOL_EQUAL);

2927 doTest(coll, a, spaceA, UCOL_EQUAL);

2928 doTest(coll, spaceA, a, UCOL_EQUAL);

2929 doTest(coll, spaceA, aSpace, UCOL_EQUAL);

2930 doTest(coll, aSpace, spaceA, UCOL_EQUAL);

2931 ucol_close(coll);

2932 }

2933

2934 static void NullRule(void) {

2935 UChar r[3] = {0};

2936 UErrorCode status = U_ZERO_ERROR;

2937 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st atus);

2938 if(U_SUCCESS(status)) {

2939 log_err("This should have been an error!\n");

2940 ucol_close(coll);

2941 } else {

2942 status = U_ZERO_ERROR;

2943 }

2944 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status);

2945 if(U_FAILURE(status)) {

2946 log_err_status(status, "Empty rules should have produced a valid collato r -> %s\n", u_errorName(status));

2947 } else {

2948 ucol_close(coll);

2949 }

2950 }

2951

2952 /**

2953 * Test for CollationElementIterator previous and next for the whole set of

2954 * unicode characters with normalization on.

2955 */

2956 static void TestNumericCollation(void)

2957 {

2958 UErrorCode status = U_ZERO_ERROR;

2959

2960 const static char *basicTestStrings[]={

2961 "hello1",

2962 "hello2",

2963 "hello2002",

2964 "hello2003",

2965 "hello123456",

2966 "hello1234567",

2967 "hello10000000",

2968 "hello100000000",

2969 "hello1000000000",

2970 "hello10000000000",

2971 };

2972

2973 const static char *preZeroTestStrings[]={

2974 "avery10000",

2975 "avery010000",

2976 "avery0010000",

2977 "avery00010000",

2978 "avery000010000",

2979 "avery0000010000",

2980 "avery00000010000",

2981 "avery000000010000",

2982 };

2983

2984 const static char *thirtyTwoBitNumericStrings[]={

2985 "avery42949672960",

2986 "avery42949672961",

2987 "avery42949672962",

2988 "avery429496729610"

2989 };

2990

2991 const static char *longNumericStrings[]={

2992 /* Some of these sort out of the order that would expected if digits-as-num bers handled arbitrarily-long digit strings.

2993 In fact, a single collation element can represent a maximum of 254 digit s as a number. Digit strings longer than that

2994 are treated as multiple collation elements. */

2995 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 345678901234567890123z", /253digits, num + 9.23E252 + z /

2996 "num100000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000", /254digits, num + 1.00E253 /

2997 "num100000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000000000000000000000000000000000000000000000000000000000000 00000000000000000000000", /255digits, num + 1.00E253 + 0, out of numeric order but expected /

2998 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234", /254digits, num + 1.23E253 /

2999 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345", /255digits, num + 1.23E253 + 5 /

3000 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 345678901234567890123456", /256digits, num + 1.23E253 + 56 /

3001 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234567", /257digits, num + 1.23E253 + 567 /

3002 "num123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234a", /254digits, num + 1.23E253 + a, out of numeric order but expected /

3003 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234", /254digits, num + 9.23E253, out of numeric order but e xpected /

3004 "num923456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 34567890123456789012345678901234567890123456789012345678901234567890123456789012 3456789012345678901234a", /254digits, num + 9.23E253 + a, out of numeric order but expected /

3005 };

3006

3007 const static char *supplementaryDigits[] = {

3008 "\\uD835\\uDFCE", /* 0 */

3009 "\\uD835\\uDFCF", /* 1 */

3010 "\\uD835\\uDFD0", /* 2 */

3011 "\\uD835\\uDFD1", /* 3 */

3012 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */

3013 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */

3014 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */

3015 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */

3016 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */

3017 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */

3018 };

3019

3020 const static char *foreignDigits[] = {

3021 "\\u0661",

3022 "\\u0662",

3023 "\\u0663",

3024 "\\u0661\\u0660",

3025 "\\u0661\\u0662",

3026 "\\u0661\\u0663",

3027 "\\u0662\\u0660",

3028 "\\u0662\\u0662",

3029 "\\u0662\\u0663",

3030 "\\u0663\\u0660",

3031 "\\u0663\\u0662",

3032 "\\u0663\\u0663"

3033 };

3034

3035 const static char *evenZeroes[] = {

3036 "2000",

3037 "2001",

3038 "2002",

3039 "2003"

3040 };

3041

3042 UColAttribute att = UCOL_NUMERIC_COLLATION;

3043 UColAttributeValue val = UCOL_ON;

3044

3045 /* Open our collator. */

3046 UCollator* coll = ucol_open("root", &status);

3047 if (U_FAILURE(status)){

3048 log_err_status(status, "ERROR: in using ucol_open() -> %s\n",

3049 myErrorName(status));

3050 return;

3051 }

3052 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestSt rings)/sizeof(basicTestStrings[0]), &att, &val, 1);

3053 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(t hirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1) ;

3054 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumer icStrings)/sizeof(longNumericStrings[0]), &att, &val, 1);

3055 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits )/sizeof(foreignDigits[0]), &att, &val, 1);

3056 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(suppleme ntaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1);

3057 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeo f(evenZeroes[0]), &att, &val, 1);

3058

3059 /* Setting up our collator to do digits. */

3060 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

3061 if (U_FAILURE(status)){

3062 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n ",

3063 myErrorName(status));

3064 return;

3065 }

3066

3067 /*

3068 Testing that prepended zeroes still yield the correct collation behavior.

3069 We expect that every element in our strings array will be equal.

3070 */

3071 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestSt rings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL);

3072

3073 ucol_close(coll);

3074 }

3075

3076 static void TestTibetanConformance(void)

3077 {

3078 const char* test[] = {

3079 "\\u0FB2\\u0591\\u0F71\\u0061",

3080 "\\u0FB2\\u0F71\\u0061"

3081 };

3082

3083 UErrorCode status = U_ZERO_ERROR;

3084 UCollator *coll = ucol_open("", &status);

3085 UChar source[100];

3086 UChar target[100];

3087 int result;

3088 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

3089 if (U_SUCCESS(status)) {

3090 u_unescape(test[0], source, 100);

3091 u_unescape(test[1], target, 100);

3092 doTest(coll, source, target, UCOL_EQUAL);

3093 result = ucol_strcoll(coll, source, -1, target, -1);

3094 log_verbose("result %d\n", result);

3095 if (UCOL_EQUAL != result) {

3096 log_err("Tibetan comparison error\n");

3097 }

3098 }

3099 ucol_close(coll);

3100

3101 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL);

3102 }

3103

3104 static void TestPinyinProblem(void) {

3105 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5 0" };

3106 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0]));

3107 }

3108

3109 /**

3110 * Iterate through the given iterator, checking to see that all the strings

3111 * in the expected array are present.

3112 * @param expected array of strings we expect to see, or NULL

3113 * @param expectedCount number of elements of expected, or 0

3114 */

3115 static int32_t checkUEnumeration(const char* msg,

3116 UEnumeration* iter,

3117 const char** expected,

3118 int32_t expectedCount) {

3119 UErrorCode ec = U_ZERO_ERROR;

3120 int32_t i = 0, n, j, bit;

3121 int32_t seenMask = 0;

3122

3123 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */

3124 n = uenum_count(iter, &ec);

3125 if (!assertSuccess("count", &ec)) return -1;

3126 log_verbose("%s = [", msg);

3127 for (;; ++i) {

3128 const char* s = uenum_next(iter, NULL, &ec);

3129 if (!assertSuccess("snext", &ec) \|\| s == NULL) break;

3130 if (i != 0) log_verbose(",");

3131 log_verbose("%s", s);

3132 /* check expected list */

3133 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

3134 if ((seenMask&bit) == 0 &&

3135 uprv_strcmp(s, expected[j]) == 0) {

3136 seenMask \|= bit;

3137 break;

3138 }

3139 }

3140 }

3141 log_verbose("] (%d)\n", i);

3142 assertTrue("count verified", i==n);

3143 /* did we see all expected strings? */

3144 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) {

3145 if ((seenMask&bit)!=0) {

3146 log_verbose("Ok: \"%s\" seen\n", expected[j]);

3147 } else {

3148 log_err("FAIL: \"%s\" not seen\n", expected[j]);

3149 }

3150 }

3151 return n;

3152 }

3153

3154 /**

3155 * Test new API added for separate collation tree.

3156 */

3157 static void TestSeparateTrees(void) {

3158 UErrorCode ec = U_ZERO_ERROR;

3159 UEnumeration *e = NULL;

3160 int32_t n = -1;

3161 UBool isAvailable;

3162 char loc[256];

3163

3164 static const char* AVAIL[] = { "en", "de" };

3165

3166 static const char* KW[] = { "collation" };

3167

3168 static const char* KWVAL[] = { "phonebook", "stroke" };

3169

3170 #if !UCONFIG_NO_SERVICE

3171 e = ucol_openAvailableLocales(&ec);

3172 if (e != NULL) {

3173 assertSuccess("ucol_openAvailableLocales", &ec);

3174 assertTrue("ucol_openAvailableLocales!=0", e!=0);

3175 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL)) ;

3176 (void)n; /* Suppress set but not used warnings. */

3177 /* Don't need to check n because we check list */

3178 uenum_close(e);

3179 } else {

3180 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you m issing data?)\n", u_errorName(ec));

3181 }

3182 #endif

3183

3184 e = ucol_getKeywords(&ec);

3185 if (e != NULL) {

3186 assertSuccess("ucol_getKeywords", &ec);

3187 assertTrue("ucol_getKeywords!=0", e!=0);

3188 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW));

3189 /* Don't need to check n because we check list */

3190 uenum_close(e);

3191 } else {

3192 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing da ta?)\n", u_errorName(ec));

3193 }

3194

3195 e = ucol_getKeywordValues(KW[0], &ec);

3196 if (e != NULL) {

3197 assertSuccess("ucol_getKeywordValues", &ec);

3198 assertTrue("ucol_getKeywordValues!=0", e!=0);

3199 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL));

3200 /* Don't need to check n because we check list */

3201 uenum_close(e);

3202 } else {

3203 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missi ng data?)\n", u_errorName(ec));

3204 }

3205

3206 /* Try setting a warning before calling ucol_getKeywordValues */

3207 ec = U_USING_FALLBACK_WARNING;

3208 e = ucol_getKeywordValues(KW[0], &ec);

3209 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) {

3210 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0);

3211 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e , KWVAL, LEN(KWVAL));

3212 /* Don't need to check n because we check list */

3213 uenum_close(e);

3214 }

3215

3216 /*

3217 U_DRAFT int32_t U_EXPORT2

3218 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,

3219 const char* locale, UBool* isAvailable,

3220 UErrorCode* status);

3221 }

3222 */

3223 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de",

3224 &isAvailable, &ec);

3225 if (assertSuccess("getFunctionalEquivalent", &ec)) {

3226 assertEquals("getFunctionalEquivalent(de)", "root", loc);

3227 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE",

3228 isAvailable == TRUE);

3229 }

3230

3231 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE",

3232 &isAvailable, &ec);

3233 if (assertSuccess("getFunctionalEquivalent", &ec)) {

3234 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc);

3235 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE",

3236 isAvailable == FALSE);

3237 }

3238 }

3239

3240 /* supercedes TestJ784 */

3241 static void TestBeforePinyin(void) {

3242 const static char rules[] = {

3243 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<< \\u00E0<<<\\u00C0"

3244 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<< \\u00E8<<<\\u00C8"

3245 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<< \\u00EC<<<\\u00CC"

3246 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<< \\u00F2<<<\\u00D2"

3247 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<< \\u00F9<<<\\u00D9"

3248 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<< \\u01DB<<\\u00FC"

3249 };

3250

3251 const static char *test[] = {

3252 "l\\u0101",

3253 "la",

3254 "l\\u0101n",

3255 "lan ",

3256 "l\\u0113",

3257 "le",

3258 "l\\u0113n",

3259 "len"

3260 };

3261

3262 const static char *test2[] = {

3263 "x\\u0101",

3264 "x\\u0100",

3265 "X\\u0101",

3266 "X\\u0100",

3267 "x\\u00E1",

3268 "x\\u00C1",

3269 "X\\u00E1",

3270 "X\\u00C1",

3271 "x\\u01CE",

3272 "x\\u01CD",

3273 "X\\u01CE",

3274 "X\\u01CD",

3275 "x\\u00E0",

3276 "x\\u00C0",

3277 "X\\u00E0",

3278 "X\\u00C0",

3279 "xa",

3280 "xA",

3281 "Xa",

3282 "XA",

3283 "x\\u0101x",

3284 "x\\u0100x",

3285 "x\\u00E1x",

3286 "x\\u00C1x",

3287 "x\\u01CEx",

3288 "x\\u01CDx",

3289 "x\\u00E0x",

3290 "x\\u00C0x",

3291 "xax",

3292 "xAx"

3293 };

3294

3295 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));

3296 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0]));

3297 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0]));

3298 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0]));

3299 }

3300

3301 static void TestBeforeTightening(void) {

3302 static const struct {

3303 const char *rules;

3304 UErrorCode expectedStatus;

3305 } tests[] = {

3306 { "&[before 1]a<x", U_ZERO_ERROR },

3307 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR },

3308 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR },

3309 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR },

3310 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR },

3311 { "&[before 2]a<<x",U_ZERO_ERROR },

3312 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR },

3313 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR },

3314 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR },

3315 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR },

3316 { "&[before 3]a<<<x",U_ZERO_ERROR },

3317 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR },

3318 { "&[before I]a = x",U_INVALID_FORMAT_ERROR }

3319 };

3320

3321 int32_t i = 0;

3322

3323 UErrorCode status = U_ZERO_ERROR;

3324 UChar rlz[RULE_BUFFER_LEN] = { 0 };

3325 uint32_t rlen = 0;

3326

3327 UCollator *coll = NULL;

3328

3329

3330 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

3331 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN);

3332 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &statu s);

3333 if(status != tests[i].expectedStatus) {

3334 log_err_status(status, "Opening a collator with rules %s returned er ror code %s, expected %s\n",

3335 tests[i].rules, u_errorName(status), u_errorName(tests[i].expect edStatus));

3336 }

3337 ucol_close(coll);

3338 status = U_ZERO_ERROR;

3339 }

3340

3341 }

3342

3343 /*

3344 &m < a

3345 &[before 1] a < x <<< X << q <<< Q < z

3346 assert: m <<< M < x <<< X << q <<< Q < z < a < n

3347

3348 &m < a

3349 &[before 2] a << x <<< X << q <<< Q < z

3350 assert: m <<< M < x <<< X << q <<< Q << a < z < n

3351

3352 &m < a

3353 &[before 3] a <<< x <<< X << q <<< Q < z

3354 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n

3355

3356

3357 &m << a

3358 &[before 1] a < x <<< X << q <<< Q < z

3359 assert: x <<< X << q <<< Q < z < m <<< M << a < n

3360

3361 &m << a

3362 &[before 2] a << x <<< X << q <<< Q < z

3363 assert: m <<< M << x <<< X << q <<< Q << a < z < n

3364

3365 &m << a

3366 &[before 3] a <<< x <<< X << q <<< Q < z

3367 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n

3368

3369

3370 &m <<< a

3371 &[before 1] a < x <<< X << q <<< Q < z

3372 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M

3373

3374 &m <<< a

3375 &[before 2] a << x <<< X << q <<< Q < z

3376 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n

3377

3378 &m <<< a

3379 &[before 3] a <<< x <<< X << q <<< Q < z

3380 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n

3381

3382

3383 &[before 1] s < x <<< X << q <<< Q < z

3384 assert: r <<< R < x <<< X << q <<< Q < z < s < n

3385

3386 &[before 2] s << x <<< X << q <<< Q < z

3387 assert: r <<< R < x <<< X << q <<< Q << s < z < n

3388

3389 &[before 3] s <<< x <<< X << q <<< Q < z

3390 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n

3391

3392

3393 &[before 1] \u24DC < x <<< X << q <<< Q < z

3394 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M

3395

3396 &[before 2] \u24DC << x <<< X << q <<< Q < z

3397 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n

3398

3399 &[before 3] \u24DC <<< x <<< X << q <<< Q < z

3400 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n

3401 */

3402

3403

3404 #if 0

3405 /* requires features not yet supported */

3406 static void TestMoreBefore(void) {

3407 static const struct {

3408 const char* rules;

3409 const char* order[16];

3410 int32_t size;

3411 } tests[] = {

3412 { "&m < a &[before 1] a < x <<< X << q <<< Q < z",

3413 { "m","M","x","X","q","Q","z","a","n" }, 9},

3414 { "&m < a &[before 2] a << x <<< X << q <<< Q < z",

3415 { "m","M","x","X","q","Q","a","z","n" }, 9},

3416 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z",

3417 { "m","M","x","X","a","q","Q","z","n" }, 9},

3418 { "&m << a &[before 1] a < x <<< X << q <<< Q < z",

3419 { "x","X","q","Q","z","m","M","a","n" }, 9},

3420 { "&m << a &[before 2] a << x <<< X << q <<< Q < z",

3421 { "m","M","x","X","q","Q","a","z","n" }, 9},

3422 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z",

3423 { "m","M","x","X","a","q","Q","z","n" }, 9},

3424 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z",

3425 { "x","X","q","Q","z","n","m","a","M" }, 9},

3426 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z",

3427 { "x","X","q","Q","m","a","M","z","n" }, 9},

3428 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z",

3429 { "m","x","X","a","M","q","Q","z","n" }, 9},

3430 { "&[before 1] s < x <<< X << q <<< Q < z",

3431 { "r","R","x","X","q","Q","z","s","n" }, 9},

3432 { "&[before 2] s << x <<< X << q <<< Q < z",

3433 { "r","R","x","X","q","Q","s","z","n" }, 9},

3434 { "&[before 3] s <<< x <<< X << q <<< Q < z",

3435 { "r","R","x","X","s","q","Q","z","n" }, 9},

3436 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z",

3437 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9},

3438 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z",

3439 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9},

3440 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z",

3441 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9}

3442 };

3443

3444 int32_t i = 0;

3445

3446 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {

3447 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size);

3448 }

3449 }

3450 #endif

3451

3452 static void TestTailorNULL( void ) {

3453 const static char* rule = "&a <<< '\\u0000'";

3454 UErrorCode status = U_ZERO_ERROR;

3455 UChar rlz[RULE_BUFFER_LEN] = { 0 };

3456 uint32_t rlen = 0;

3457 UChar a = 1, null = 0;

3458 UCollationResult res = UCOL_EQUAL;

3459

3460 UCollator *coll = NULL;

3461

3462

3463 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN);

3464 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status);

3465

3466 if(U_FAILURE(status)) {

3467 log_err_status(status, "Could not open default collator! -> %s\n", u_err orName(status));

3468 } else {

3469 res = ucol_strcoll(coll, &a, 1, &null, 1);

3470

3471 if(res != UCOL_LESS) {

3472 log_err("NULL was not tailored properly!\n");

3473 }

3474 }

3475

3476 ucol_close(coll);

3477 }

3478

3479 static void

3480 TestUpperFirstQuaternary(void)

3481 {

3482 const char* tests[] = { "B", "b", "Bb", "bB" };

3483 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST };

3484 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST };

3485 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]), att, attVals, sizeof(att)/sizeof(att[0]));

3486 }

3487

3488 static void

3489 TestJ4960(void)

3490 {

3491 const char* tests[] = { "\\u00e2T", "aT" };

3492 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL };

3493 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON };

3494 const char* tests2[] = { "a", "A" };

3495 const char* rule = "&[first tertiary ignorable]=A=a";

3496 UColAttribute att2[] = { UCOL_CASE_LEVEL };

3497 UColAttributeValue attVals2[] = { UCOL_ON };

3498 /* Test whether we correctly ignore primary ignorables on case level when */

3499 /* we have only primary & case level */

3500 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(t ests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL);

3501 /* Test whether ICU4J will make case level for sortkeys that have primary stre ngth */

3502 /* and case level */

3503 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0 ]), att, attVals, sizeof(att)/sizeof(att[0]));

3504 /* Test whether completely ignorable letters have case level info (they should n't) */

3505 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(te sts2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL);

3506 }

3507

3508 static void

3509 TestJ5223(void)

3510 {

3511 static const char *test = "this is a test string";

3512 UChar ustr[256];

3513 int32_t ustr_length = u_unescape(test, ustr, 256);

3514 unsigned char sortkey[256];

3515 int32_t sortkey_length;

3516 UErrorCode status = U_ZERO_ERROR;

3517 static UCollator *coll = NULL;

3518 coll = ucol_open("root", &status);

3519 if(U_FAILURE(status)) {

3520 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

3521 return;

3522 }

3523 ucol_setStrength(coll, UCOL_PRIMARY);

3524 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

3525 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

3526 if (U_FAILURE(status)) {

3527 log_err("Failed setting atributes\n");

3528 return;

3529 }

3530 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0);

3531 if (sortkey_length > 256) return;

3532

3533 /* we mark the position where the null byte should be written in advance */

3534 sortkey[sortkey_length-1] = 0xAA;

3535

3536 /* we set the buffer size one byte higher than needed */

3537 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

3538 sortkey_length+1);

3539

3540 /* no error occurs (for me) */

3541 if (sortkey[sortkey_length-1] == 0xAA) {

3542 log_err("Hit bug at first try\n");

3543 }

3544

3545 /* we mark the position where the null byte should be written again */

3546 sortkey[sortkey_length-1] = 0xAA;

3547

3548 /* this time we set the buffer size to the exact amount needed */

3549 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey,

3550 sortkey_length);

3551

3552 /* now the trailing null byte is not written */

3553 if (sortkey[sortkey_length-1] == 0xAA) {

3554 log_err("Hit bug at second try\n");

3555 }

3556

3557 ucol_close(coll);

3558 }

3559

3560 /* Regression test for Thai partial sort key problem */

3561 static void

3562 TestJ5232(void)

3563 {

3564 const static char *test[] = {

3565 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21",

3566 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21"

3567 };

3568

3569 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0]));

3570 }

3571

3572 static void

3573 TestJ5367(void)

3574 {

3575 const static char *test[] = { "a", "y" };

3576 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a";

3577 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0]));

3578 }

3579

3580 static void

3581 TestVI5913(void)

3582 {

3583 UErrorCode status = U_ZERO_ERROR;

3584 int32_t i, j;

3585 UCollator *coll =NULL;

3586 uint8_t resColl[100], expColl[100];

3587 int32_t rLen, tLen, ruleLen, sLen, kLen;

3588 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypog egrammeni*/

3589 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/

3590 /*

3591 * Note: Just tailoring &z<ae^ does not work as expected:

3592 * The UCA spec requires for discontiguous contractions that they

3593 * extend an existing match by one combining mark at a time.

3594 * Therefore, ae must be a contraction so that the builder finds

3595 * discontiguous contractions for ae^, for example with an intervening under dot.

3596 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302 , etc.

3597 */

3598 UChar rule3[256]={

3599 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */

3600 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/

3601 0};

3602 static const UChar tData[][20]={

3603 {0x1EAC, 0},

3604 {0x0041, 0x0323, 0x0302, 0},

3605 {0x1EA0, 0x0302, 0},

3606 {0x00C2, 0x0323, 0},

3607 {0x1ED8, 0}, /* O with dot and circumflex */

3608 {0x1ECC, 0x0302, 0},

3609 {0x1EB7, 0},

3610 {0x1EA1, 0x0306, 0},

3611 };

3612 static const UChar tailorData[][20]={

3613 {0x1FA2, 0}, /* Omega with 3 combining marks */

3614 {0x03C9, 0x0313, 0x0300, 0x0345, 0},

3615 {0x1FF3, 0x0313, 0x0300, 0},

3616 {0x1F60, 0x0300, 0x0345, 0},

3617 {0x1F62, 0x0345, 0},

3618 {0x1FA0, 0x0300, 0},

3619 };

3620 static const UChar tailorData2[][20]={

3621 {0x1E63, 0x030C, 0}, /* s with dot below + caron */

3622 {0x0073, 0x0323, 0x030C, 0},

3623 {0x0073, 0x030C, 0x0323, 0},

3624 };

3625 static const UChar tailorData3[][20]={

3626 {0x007a, 0}, /* z */

3627 {0x0061, 0x0065, 0}, /* a + e */

3628 {0x0061, 0x00ea, 0}, /* a + e with circumflex */

3629 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */

3630 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumf lex */

3631 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot b elow */

3632 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */

3633 {0x00EA, 0}, /* e with circumflex */

3634 };

3635

3636 /* Test Vietnamese sort. */

3637 coll = ucol_open("vi", &status);

3638 if(U_FAILURE(status)) {

3639 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(sta tus));

3640 return;

3641 }

3642 log_verbose("\n\nVI collation:");

3643 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tDat a[2])) ) {

3644 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

3645 }

3646 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tDat a[3])) ) {

3647 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

3648 }

3649 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tDat a[4])) ) {

3650 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n");

3651 }

3652 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tDat a[6])) ) {

3653 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

3654 }

3655

3656 for (j=0; j<8; j++) {

3657 tLen = u_strlen(tData[j]);

3658 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);

3659 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

3660 for(i = 0; i<rLen; i++) {

3661 log_verbose(" %02X", resColl[i]);

3662 }

3663 }

3664

3665 ucol_close(coll);

3666

3667 /* Test Romanian sort. */

3668 coll = ucol_open("ro", &status);

3669 log_verbose("\n\nRO collation:");

3670 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tDat a[1])) ) {

3671 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n");

3672 }

3673 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tDat a[5])) ) {

3674 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n");

3675 }

3676 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tDat a[7])) ) {

3677 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n");

3678 }

3679

3680 for (j=4; j<8; j++) {

3681 tLen = u_strlen(tData[j]);

3682 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen);

3683 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100);

3684 for(i = 0; i<rLen; i++) {

3685 log_verbose(" %02X", resColl[i]);

3686 }

3687 }

3688 ucol_close(coll);

3689

3690 /* Test the precomposed Greek character with 3 combining marks. */

3691 log_verbose("\n\nTailoring test: Greek character with 3 combining marks");

3692 ruleLen = u_strlen(rule);

3693 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status);

3694 if (U_FAILURE(status)) {

3695 log_err("ucol_openRules failed with %s\n", u_errorName(status));

3696 return;

3697 }

3698 sLen = u_strlen(tailorData[0]);

3699 for (j=1; j<6; j++) {

3700 tLen = u_strlen(tailorData[j]);

3701 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) {

3702 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]);

3703 }

3704 }

3705 /* Test getSortKey. */

3706 tLen = u_strlen(tailorData[0]);

3707 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100);

3708 for (j=0; j<6; j++) {

3709 tLen = u_strlen(tailorData[j]);

3710 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100);

3711 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

3712 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen);

3713 for(i = 0; i<rLen; i++) {

3714 log_err(" %02X", resColl[i]);

3715 }

3716 }

3717 }

3718 ucol_close(coll);

3719

3720 log_verbose("\n\nTailoring test for s with caron:");

3721 ruleLen = u_strlen(rule2);

3722 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

3723 tLen = u_strlen(tailorData2[0]);

3724 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100);

3725 for (j=1; j<3; j++) {

3726 tLen = u_strlen(tailorData2[j]);

3727 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100);

3728 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

3729 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo rData[j], tLen);

3730 for(i = 0; i<rLen; i++) {

3731 log_err(" %02X", resColl[i]);

3732 }

3733 }

3734 }

3735 ucol_close(coll);

3736

3737 log_verbose("\n\nTailoring test for &z< ae with circumflex:");

3738 ruleLen = u_strlen(rule3);

3739 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

3740 tLen = u_strlen(tailorData3[3]);

3741 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100);

3742 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3 ], tLen), tLen);

3743 for(i = 0; i<kLen; i++) {

3744 log_verbose(" %02X", expColl[i]);

3745 }

3746 for (j=4; j<6; j++) {

3747 tLen = u_strlen(tailorData3[j]);

3748 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100);

3749

3750 if ( kLen!=rLen \|\| uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!= 0 ) {

3751 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescs trdup(tailorData3[j], tLen), tLen);

3752 for(i = 0; i<rLen; i++) {

3753 log_err(" %02X", resColl[i]);

3754 }

3755 }

3756

3757 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailo rData3[j], tLen), tLen);

3758 for(i = 0; i<rLen; i++) {

3759 log_verbose(" %02X", resColl[i]);

3760 }

3761 }

3762 ucol_close(coll);

3763 }

3764

3765 static void

3766 TestTailor6179(void)

3767 {

3768 UErrorCode status = U_ZERO_ERROR;

3769 int32_t i;

3770 UCollator *coll =NULL;

3771 uint8_t resColl[100];

3772 int32_t rLen, tLen, ruleLen;

3773 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */

3774 static const UChar rule1[]={

3775 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x7 9,

3776 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x2 0,0x61,0x20,

3777 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x7 2,0x79,0x20,

3778 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x2 0, 0};

3779 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */

3780 static const UChar rule2[]={

3781 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x6 1,

3782 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3 C,0x3C,0x3C,

3783 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6 F,0x6E,

3784 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x6 5,0x5D,0x3C,

3785 0x3C,0x3C,0x20,0x62,0};

3786

3787 static const UChar tData1[][4]={

3788 {0x61, 0},

3789 {0x62, 0},

3790 { 0xFDD0,0x009E, 0}

3791 };

3792 static const UChar tData2[][4]={

3793 {0x61, 0},

3794 {0x62, 0},

3795 { 0xFDD0,0x009E, 0}

3796 };

3797

3798 /*

3799 * These values from FractionalUCA.txt will change,

3800 * and need to be updated here.

3801 * TODO: Make this not check for particular sort keys.

3802 * Instead, test that we get CEs before & after other ignorables; see ticket #6179.

3803 */

3804 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0};

3805 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0};

3806 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0};

3807 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0};

3808

3809 UParseError parseError;

3810

3811 /* Test [Last Primary ignorable] */

3812

3813 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i gnorable]<<b\n");

3814 ruleLen = u_strlen(rule1);

3815 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status) ;

3816 if (U_FAILURE(status)) {

3817 log_err_status(status, "Tailoring test: &[last primary ignorable] failed ! -> %s\n", u_errorName(status));

3818 return;

3819 }

3820 tLen = u_strlen(tData1[0]);

3821 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100);

3822 if (rLen != LEN(lastPrimaryIgnCE) \|\| uprv_memcmp(resColl, lastPrimaryIgnCE, rLen) != 0) {

3823 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0, tData1[0], rLen);

3824 for(i = 0; i<rLen; i++) {

3825 log_err(" %02X", resColl[i]);

3826 }

3827 log_err("\n");

3828 }

3829 tLen = u_strlen(tData1[1]);

3830 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100);

3831 if (rLen != LEN(firstPrimaryIgnCE) \|\| uprv_memcmp(resColl, firstPrimaryIgnCE , rLen) != 0) {

3832 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData1[1], rLen);

3833 for(i = 0; i<rLen; i++) {

3834 log_err(" %02X", resColl[i]);

3835 }

3836 log_err("\n");

3837 }

3838 ucol_close(coll);

3839

3840

3841 /* Test [Last Secondary ignorable] */

3842 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second ary ignorable]<<<b\n");

3843 ruleLen = u_strlen(rule2);

3844 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError, &status);

3845 if (U_FAILURE(status)) {

3846 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u _errorName(status));

3847 log_info(" offset=%d \"%s\" \| \"%s\"\n",

3848 parseError.offset, aescstrdup(parseError.preContext, -1), aescs trdup(parseError.postContext, -1));

3849 return;

3850 }

3851 tLen = u_strlen(tData2[0]);

3852 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100);

3853 if (rLen != LEN(lastSecondaryIgnCE) \|\| uprv_memcmp(resColl, lastSecondaryIgn CE, rLen) != 0) {

3854 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0 , tData2[0], rLen);

3855 for(i = 0; i<rLen; i++) {

3856 log_err(" %02X", resColl[i]);

3857 }

3858 log_err("\n");

3859 }

3860 tLen = u_strlen(tData2[1]);

3861 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100);

3862 if (rLen != LEN(firstSecondaryIgnCE) \|\| uprv_memcmp(resColl, firstSecondaryI gnCE, rLen) != 0) {

3863 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1, tData2[1], rLen);

3864 for(i = 0; i<rLen; i++) {

3865 log_err(" %02X", resColl[i]);

3866 }

3867 log_err("\n");

3868 }

3869 ucol_close(coll);

3870 }

3871

3872 static void

3873 TestUCAPrecontext(void)

3874 {

3875 UErrorCode status = U_ZERO_ERROR;

3876 int32_t i, j;

3877 UCollator *coll =NULL;

3878 uint8_t resColl[100], prevColl[100];

3879 int32_t rLen, tLen, ruleLen;

3880 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */

3881 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0};

3882 /* & l middle-dot << a a is an expansion. */

3883

3884 UChar tData1[][20]={

3885 { 0xb7, 0}, /* standalone middle dot(0xb7) */

3886 { 0x387, 0}, /* standalone middle dot(0x387) */

3887 { 0x61, 0}, /* a */

3888 { 0x6C, 0}, /* l */

3889 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */

3890 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */

3891 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */

3892 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */

3893 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */

3894 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */

3895 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */

3896 };

3897

3898 log_verbose("\n\nEN collation:");

3899 coll = ucol_open("en", &status);

3900 if (U_FAILURE(status)) {

3901 log_err_status(status, "Tailoring test: &z <<a\|- failed! -> %s\n", u_err orName(status));

3902 return;

3903 }

3904 for (j=0; j<11; j++) {

3905 tLen = u_strlen(tData1[j]);

3906 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

3907 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

3908 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3909 j, tData1[j]);

3910 }

3911 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

3912 for(i = 0; i<rLen; i++) {

3913 log_verbose(" %02X", resColl[i]);

3914 }

3915 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3916 }

3917 ucol_close(coll);

3918

3919

3920 log_verbose("\n\nJA collation:");

3921 coll = ucol_open("ja", &status);

3922 if (U_FAILURE(status)) {

3923 log_err("Tailoring test: &z <<a\|- failed!");

3924 return;

3925 }

3926 for (j=0; j<11; j++) {

3927 tLen = u_strlen(tData1[j]);

3928 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

3929 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

3930 log_err("\n Expecting greater key than previous test case: Data[%d] :%s.",

3931 j, tData1[j]);

3932 }

3933 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

3934 for(i = 0; i<rLen; i++) {

3935 log_verbose(" %02X", resColl[i]);

3936 }

3937 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3938 }

3939 ucol_close(coll);

3940

3941

3942 log_verbose("\n\nTailoring test: & middle dot < a ");

3943 ruleLen = u_strlen(rule1);

3944 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&statu s);

3945 if (U_FAILURE(status)) {

3946 log_err("Tailoring test: & middle dot < a failed!");

3947 return;

3948 }

3949 for (j=0; j<11; j++) {

3950 tLen = u_strlen(tData1[j]);

3951 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

3952 if ((j>0) && (strcmp((char )resColl, (char )prevColl)<0)) {

3953 log_err("\n Expecting greater key than previous test case: Data[%d ] :%s.",

3954 j, tData1[j]);

3955 }

3956 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

3957 for(i = 0; i<rLen; i++) {

3958 log_verbose(" %02X", resColl[i]);

3959 }

3960 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3961 }

3962 ucol_close(coll);

3963

3964

3965 log_verbose("\n\nTailoring test: & l middle-dot << a ");

3966 ruleLen = u_strlen(rule2);

3967 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat us);

3968 if (U_FAILURE(status)) {

3969 log_err("Tailoring test: & l middle-dot << a failed!");

3970 return;

3971 }

3972 for (j=0; j<11; j++) {

3973 tLen = u_strlen(tData1[j]);

3974 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100);

3975 if ((j>0) && (j!=3) && (strcmp((char )resColl, (char )prevColl)<0)) {

3976 log_err("\n Expecting greater key than previous test case: Data[% d] :%s.",

3977 j, tData1[j]);

3978 }

3979 if ((j==3)&&(strcmp((char )resColl, (char )prevColl)>0)) {

3980 log_err("\n Expecting smaller key than previous test case: Data[% d] :%s.",

3981 j, tData1[j]);

3982 }

3983 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen);

3984 for(i = 0; i<rLen; i++) {

3985 log_verbose(" %02X", resColl[i]);

3986 }

3987 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1));

3988 }

3989 ucol_close(coll);

3990 }

3991

3992 static void

3993 TestOutOfBuffer5468(void)

3994 {

3995 static const char *test = "\\u4e00";

3996 UChar ustr[256];

3997 int32_t ustr_length = u_unescape(test, ustr, 256);

3998 unsigned char shortKeyBuf[1];

3999 int32_t sortkey_length;

4000 UErrorCode status = U_ZERO_ERROR;

4001 static UCollator *coll = NULL;

4002

4003 coll = ucol_open("root", &status);

4004 if(U_FAILURE(status)) {

4005 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status));

4006 return;

4007 }

4008 ucol_setStrength(coll, UCOL_PRIMARY);

4009 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);

4010 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

4011 if (U_FAILURE(status)) {

4012 log_err("Failed setting atributes\n");

4013 return;

4014 }

4015

4016 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeo f(shortKeyBuf));

4017 if (sortkey_length != 4) {

4018 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length);

4019 }

4020 log_verbose("length of sortKey is %d", sortkey_length);

4021 ucol_close(coll);

4022 }

4023

4024 #define TSKC_DATA_SIZE 5

4025 #define TSKC_BUF_SIZE 50

4026 static void

4027 TestSortKeyConsistency(void)

4028 {

4029 UErrorCode icuRC = U_ZERO_ERROR;

4030 UCollator* ucol;

4031 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD};

4032

4033 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

4034 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE];

4035 int32_t i, j, i2;

4036

4037 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC);

4038 if (U_FAILURE(icuRC))

4039 {

4040 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_error Name(icuRC));

4041 return;

4042 }

4043

4044 for (i = 0; i < TSKC_DATA_SIZE; i++)

4045 {

4046 UCharIterator uiter;

4047 uint32_t state[2] = { 0, 0 };

4048 int32_t dataLen = i+1;

4049 for (j=0; j<TSKC_BUF_SIZE; j++)

4050 bufFull[i][j] = bufPart[i][j] = 0;

4051

4052 /* Full sort key */

4053 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE);

4054

4055 /* Partial sort key */

4056 uiter_setString(&uiter, data, dataLen);

4057 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &ic uRC);

4058 if (U_FAILURE(icuRC))

4059 {

4060 log_err("ucol_nextSortKeyPart failed\n");

4061 ucol_close(ucol);

4062 return;

4063 }

4064

4065 for (i2=0; i2<i; i2++)

4066 {

4067 UBool fullMatch = TRUE;

4068 UBool partMatch = TRUE;

4069 for (j=0; j<TSKC_BUF_SIZE; j++)

4070 {

4071 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]);

4072 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]);

4073 }

4074 if (fullMatch != partMatch) {

4075 log_err(fullMatch ? "full key was consistent, but partial key ch anged\n"

4076 : "partial key was consistent, but full key ch anged\n");

4077 ucol_close(ucol);

4078 return;

4079 }

4080 }

4081 }

4082

4083 /=============================================/

4084 ucol_close(ucol);

4085 }

4086

4087 /* ticket: 6101 */

4088 static void TestCroatianSortKey(void) {

4089 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3";

4090 UErrorCode status = U_ZERO_ERROR;

4091 UCollator *ucol;

4092 UCharIterator iter;

4093

4094 static const UChar text[] = { 0x0044, 0xD81A };

4095

4096 size_t length = sizeof(text)/sizeof(*text);

4097

4098 uint8_t textSortKey[32];

4099 size_t lenSortKey = 32;

4100 size_t actualSortKeyLen;

4101 uint32_t uStateInfo[2] = { 0, 0 };

4102

4103 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status);

4104 if (U_FAILURE(status)) {

4105 log_err_status(status, "ucol_openFromShortString error in Craotian test. -> %s\n", u_errorName(status));

4106 return;

4107 }

4108

4109 uiter_setString(&iter, text, length);

4110

4111 actualSortKeyLen = ucol_nextSortKeyPart(

4112 ucol, &iter, (uint32_t*)uStateInfo,

4113 textSortKey, lenSortKey, &status

4114 );

4115

4116 if (actualSortKeyLen == lenSortKey) {

4117 log_err("ucol_nextSortKeyPart did not give correct result in Croatian te st.\n");

4118 }

4119

4120 ucol_close(ucol);

4121 }

4122

4123 /* ticket: 6140 */

4124 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the collator since

4125 * they are both Hiragana and Katakana

4126 */

4127 #define SORTKEYLEN 50

4128 static void TestHiragana(void) {

4129 UErrorCode status = U_ZERO_ERROR;

4130 UCollator* ucol;

4131 UCollationResult strcollresult;

4132 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */

4133 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 };

4134 int32_t data1Len = sizeof(data1)/sizeof(*data1);

4135 int32_t data2Len = sizeof(data2)/sizeof(*data2);

4136 int32_t i, j;

4137 uint8_t sortKey1[SORTKEYLEN];

4138 uint8_t sortKey2[SORTKEYLEN];

4139

4140 UCharIterator uiter1;

4141 UCharIterator uiter2;

4142 uint32_t state1[2] = { 0, 0 };

4143 uint32_t state2[2] = { 0, 0 };

4144 int32_t keySize1;

4145 int32_t keySize2;

4146

4147 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL,

4148 &status);

4149 if (U_FAILURE(status)) {

4150 log_err_status(status, "Error status: %s; Unable to open collator from s hort string.\n", u_errorName(status));

4151 return;

4152 }

4153

4154 /* Start of full sort keys */

4155 /* Full sort key1 */

4156 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN);

4157 /* Full sort key2 */

4158 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN);

4159 if (keySize1 == keySize2) {

4160 for (i = 0; i < keySize1; i++) {

4161 if (sortKey1[i] != sortKey2[i]) {

4162 log_err("Full sort keys are different. Should be equal.");

4163 }

4164 }

4165 } else {

4166 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2) ;

4167 }

4168 /* End of full sort keys */

4169

4170 /* Start of partial sort keys */

4171 /* Partial sort key1 */

4172 uiter_setString(&uiter1, data1, data1Len);

4173 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN, &status);

4174 /* Partial sort key2 */

4175 uiter_setString(&uiter2, data2, data2Len);

4176 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN, &status);

4177 if (U_SUCCESS(status) && keySize1 == keySize2) {

4178 for (j = 0; j < keySize1; j++) {

4179 if (sortKey1[j] != sortKey2[j]) {

4180 log_err("Partial sort keys are different. Should be equal");

4181 }

4182 }

4183 } else {

4184 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d % d", u_errorName(status), keySize1, keySize2);

4185 }

4186 /* End of partial sort keys */

4187

4188 /* Start of strcoll */

4189 /* Use ucol_strcoll() to determine ordering */

4190 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len);

4191 if (strcollresult != UCOL_EQUAL) {

4192 log_err("Result from ucol_strcoll() should be UCOL_EQUAL.");

4193 }

4194

4195 ucol_close(ucol);

4196 }

4197

4198 /* Convenient struct for running collation tests */

4199 typedef struct {

4200 const UChar source[MAX_TOKEN_LEN]; /* String on left */

4201 const UChar target[MAX_TOKEN_LEN]; /* String on right */

4202 UCollationResult result; /* -1, 0 or +1, depending on collation */

4203 } OneTestCase;

4204

4205 /*

4206 * Utility function to test one collation test case.

4207 * @param testcases Array of test cases.

4208 * @param n_testcases Size of the array testcases.

4209 * @param str_rules Array of rules. These rules should be specifying the same r ule in different formats.

4210 * @param n_rules Size of the array str_rules.

4211 */

4212 static void doTestOneTestCase(const OneTestCase testcases[],

4213 int n_testcases,

4214 const char* str_rules[],

4215 int n_rules)

4216 {

4217 int rule_no, testcase_no;

4218 UChar rule[500];

4219 int32_t length = 0;

4220 UErrorCode status = U_ZERO_ERROR;

4221 UParseError parse_error;

4222 UCollator *myCollation;

4223

4224 for (rule_no = 0; rule_no < n_rules; ++rule_no) {

4225

4226 length = u_unescape(str_rules[rule_no], rule, 500);

4227 if (length == 0) {

4228 log_err("ERROR: The rule cannot be unescaped: %s\n");

4229 return;

4230 }

4231 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status);

4232 if(U_FAILURE(status)){

4233 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

4234 log_info(" offset=%d \"%s\" \| \"%s\"\n",

4235 parse_error.offset,

4236 aescstrdup(parse_error.preContext, -1),

4237 aescstrdup(parse_error.postContext, -1));

4238 return;

4239 }

4240 log_verbose("Testing the <<* syntax\n");

4241 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

4242 ucol_setStrength(myCollation, UCOL_TERTIARY);

4243 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) {

4244 doTest(myCollation,

4245 testcases[testcase_no].source,

4246 testcases[testcase_no].target,

4247 testcases[testcase_no].result

4248 );

4249 }

4250 ucol_close(myCollation);

4251 }

4252 }

4253

4254 const static OneTestCase rangeTestcases[] = {

4255 { {0x0061}, {0x0062}, UCOL _LESS }, /* "a" < "b" */

4256 { {0x0062}, {0x0063}, UCOL _LESS }, /* "b" < "c" */

4257 { {0x0061}, {0x0063}, UCOL _LESS }, /* "a" < "c" */

4258

4259 { {0x0062}, {0x006b}, UCOL _LESS }, /* "b" << "k" */

4260 { {0x006b}, {0x006c}, UCOL _LESS }, /* "k" << "l" */

4261 { {0x0062}, {0x006c}, UCOL _LESS }, /* "b" << "l" */

4262 { {0x0061}, {0x006c}, UCOL _LESS }, /* "a" < "l" */

4263 { {0x0061}, {0x006d}, UCOL _LESS }, /* "a" < "m" */

4264

4265 { {0x0079}, {0x006d}, UCOL _LESS }, /* "y" < "f" */

4266 { {0x0079}, {0x0067}, UCOL _LESS }, /* "y" < "g" */

4267 { {0x0061}, {0x0068}, UCOL _LESS }, /* "y" < "h" */

4268 { {0x0061}, {0x0065}, UCOL _LESS }, /* "g" < "e" */

4269

4270 { {0x0061}, {0x0031}, UCOL _EQUAL }, /* "a" = "1" */

4271 { {0x0061}, {0x0032}, UCOL _EQUAL }, /* "a" = "2" */

4272 { {0x0061}, {0x0033}, UCOL _EQUAL }, /* "a" = "3" */

4273 { {0x0061}, {0x0066}, UCOL _LESS }, /* "a" < "f" */

4274 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL _LESS }, /* "la" < "123" */

4275 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL _EQUAL }, /* "aaa" = "123" */

4276 { {0x0062}, {0x007a}, UCOL _LESS }, /* "b" < "z" */

4277 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL _LESS }, /* "azm" = "2yc" */

4278 };

4279

4280 static int nRangeTestcases = LEN(rangeTestcases);

4281

4282 const static OneTestCase rangeTestcasesSupplemental[] = {

4283 { {0x4e00}, {0xfffb}, UCOL _LESS }, /* U+4E00 < U+FFFB */

4284 { {0xfffb}, {0xd800, 0xdc00}, UCOL _LESS }, /* U+FFFB < U+10000 */

4285 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+10000 < U+10001 */

4286 { {0x4e00}, {0xd800, 0xdc01}, UCOL _LESS }, /* U+4E00 < U+10001 */

4287 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */

4288 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+10000 < U+10001 */

4289 { {0x4e00}, {0xd800, 0xdc02}, UCOL _LESS }, /* U+4E00 < U+10001 */

4290 };

4291

4292 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental);

4293

4294 const static OneTestCase rangeTestcasesQwerty[] = {

4295 { {0x0071}, {0x0077}, UCOL _LESS }, /* "q" < "w" */

4296 { {0x0077}, {0x0065}, UCOL _LESS }, /* "w" < "e" */

4297

4298 { {0x0079}, {0x0075}, UCOL _LESS }, /* "y" < "u" */

4299 { {0x0071}, {0x0075}, UCOL _LESS }, /* "q" << "u" */

4300

4301 { {0x0074}, {0x0069}, UCOL _LESS }, /* "t" << "i" */

4302 { {0x006f}, {0x0070}, UCOL _LESS }, /* "o" << "p" */

4303

4304 { {0x0079}, {0x0065}, UCOL _LESS }, /* "y" < "e" */

4305 { {0x0069}, {0x0075}, UCOL _LESS }, /* "i" < "u" */

4306

4307 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074},

4308 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL _LESS }, /* "quest" < "were" */

4309 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b},

4310 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL _LESS }, /* "quack" < "quest" */

4311 };

4312

4313 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty);

4314

4315 static void TestSameStrengthList(void)

4316 {

4317 const char* strRules[] = {

4318 /* Normal */

4319 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3",

4320

4321 /* Lists */

4322 "&a<bcd &b<<klm &k<<<xyz &y<fghe &a=*123",

4323 };

4324 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

4325 }

4326

4327 static void TestSameStrengthListQuoted(void)

4328 {

4329 const char* strRules[] = {

4330 /* Lists with quoted characters */

4331 "&\\u0061<bcd &b<<klm &k<<<xyz &y<f\\u0067\\u0068e &a=*123",

4332 "&'\\u0061'<bcd &b<<klm &k<<<xyz &y<f'\\u0067\\u0068'e &a=*123",

4333

4334 "&\\u0061<b\\u0063d &b<<klm &k<<<xyz &\\u0079<fgh\\u0065 &a=*\\u0031\\u0 032\\u0033",

4335 "&'\\u0061'<b'\\u0063'd &b<<klm &k<<<xyz &'\\u0079'<fgh'\\u0065' &a=*'\\ u0031\\u0032\\u0033'",

4336

4337 "&\\u0061<\\u0062c\\u0064 &b<<klm &k<<<xyz &y<fghe &a=*\\u0031\\u0032\\ u0033",

4338 "&'\\u0061'<'\\u0062'c'\\u0064' &b<<klm &k<<<xyz &y<fghe &a=*'\\u0031\\ u0032\\u0033'",

4339 };

4340 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

4341 }

4342

4343 static void TestSameStrengthListSupplemental(void)

4344 {

4345 const char* strRules[] = {

4346 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002",

4347 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02",

4348 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002",

4349 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02",

4350 };

4351 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));

4352 }

4353

4354 static void TestSameStrengthListQwerty(void)

4355 {

4356 const char* strRules[] = {

4357 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

4358 "&q<wer &w<<tyu &t<<<iop &o=asd", /* Lists */

4359 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00 74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064",

4360 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\ \u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064",

4361 "&\\u0071<\\u0077\\u0065\\u0072 &\\u0077<<\\u0074\\u0079\\u0075 &\\u0074<< <\\u0069\\u006f\\u0070 &\\u006f=\\u0061\\u0073\\u0064",

4362

4363 /* Quoted characters also will work if two quoted characters are not consecu tive. */

4364 "&\\u0071<'\\u0077'\\u0065\\u0072 &\\u0077<<\\u0074'\\u0079'\\u0075 &\\u00 74<<<\\u0069\\u006f'\\u0070' &'\\u006f'=\\u0061\\u0073\\u0064",

4365

4366 /* Consecutive quoted charactes do not work, because a '' will be treated as a quote character. */

4367 /* "&\\u0071<'\\u0077''\\u0065''\\u0072' &\\u0077<<'\\u0074''\\u0079''\\u0 075' &\\u0074<<<'\\u0069''\\u006f''\\u0070' &'\\u006f'=\\u0061\\u0073\\u0064", */

4368

4369 };

4370 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s trRules));

4371 }

4372

4373 static void TestSameStrengthListQuotedQwerty(void)

4374 {

4375 const char* strRules[] = {

4376 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */

4377 "&q<wer &w<<tyu &t<<<iop &o=asd", /* Lists */

4378 "&q<w'e'r &w<<'t'yu &t<<<io'p' &o='a's'd'", /* Lists with quotes */

4379

4380 /* Lists with continuous quotes may not work, because '' will be treated as a quote character. */

4381 /* "&q<'w''e''r' &w<<'t''y''u' &t<<<'i''o''p' &o='a''s''d'", */

4382 };

4383 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s trRules));

4384 }

4385

4386 static void TestSameStrengthListRanges(void)

4387 {

4388 const char* strRules[] = {

4389 "&a<b-d &b<<k-m &k<<<x-z &y<f-he &a=*1-3",

4390 };

4391 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules));

4392 }

4393

4394 static void TestSameStrengthListSupplementalRanges(void)

4395 {

4396 const char* strRules[] = {

4397 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them. */

4398 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002",

4399 };

4400 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str Rules, LEN(strRules));

4401 }

4402

4403 static void TestSpecialCharacters(void)

4404 {

4405 const char* strRules[] = {

4406 /* Normal */

4407 "&';'<'+'<','<'-'<'&'<'*'",

4408

4409 /* List */

4410 "&';'<'+,-&'",

4411

4412 /* Range */

4413 "&';'<'+'-'-&'",

4414 };

4415

4416 const static OneTestCase specialCharacterStrings[] = {

4417 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */

4418 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */

4419 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */

4420 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */

4421 };

4422 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRu les, LEN(strRules));

4423 }

4424

4425 static void TestPrivateUseCharacters(void)

4426 {

4427 const char* strRules[] = {

4428 /* Normal */

4429 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'",

4430 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d",

4431 };

4432

4433 const static OneTestCase privateUseCharacterStrings[] = {

4434 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

4435 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

4436 { {0xe2d9}, {0xe2da}, UCOL_LESS },

4437 { {0xe2da}, {0xe2db}, UCOL_LESS },

4438 { {0xe2db}, {0xe2dc}, UCOL_LESS },

4439 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4440 };

4441 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

4442 }

4443

4444 static void TestPrivateUseCharactersInList(void)

4445 {

4446 const char* strRules[] = {

4447 /* List */

4448 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'",

4449 /* "&'\\u5ea7'<\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", /

4450 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d",

4451 };

4452

4453 const static OneTestCase privateUseCharacterStrings[] = {

4454 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

4455 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

4456 { {0xe2d9}, {0xe2da}, UCOL_LESS },

4457 { {0xe2da}, {0xe2db}, UCOL_LESS },

4458 { {0xe2db}, {0xe2dc}, UCOL_LESS },

4459 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4460 };

4461 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

4462 }

4463

4464 static void TestPrivateUseCharactersInRange(void)

4465 {

4466 const char* strRules[] = {

4467 /* Range */

4468 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'",

4469 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d",

4470 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */

4471 };

4472

4473 const static OneTestCase privateUseCharacterStrings[] = {

4474 { {0x5ea7}, {0xe2d8}, UCOL_LESS },

4475 { {0xe2d8}, {0xe2d9}, UCOL_LESS },

4476 { {0xe2d9}, {0xe2da}, UCOL_LESS },

4477 { {0xe2da}, {0xe2db}, UCOL_LESS },

4478 { {0xe2db}, {0xe2dc}, UCOL_LESS },

4479 { {0xe2dc}, {0x4e8d}, UCOL_LESS },

4480 };

4481 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings), strRules, LEN(strRules));

4482 }

4483

4484 static void TestInvalidListsAndRanges(void)

4485 {

4486 const char* invalidRules[] = {

4487 /* Range not in starred expression */

4488 "&\\ufffe<\\uffff-\\U00010002",

4489

4490 /* Range without start */

4491 "&a<*-c",

4492

4493 /* Range without end */

4494 "&a<*b-",

4495

4496 /* More than one hyphen */

4497 "&a<*b-g-l",

4498

4499 /* Range in the wrong order */

4500 "&a<*k-b",

4501

4502 };

4503

4504 UChar rule[500];

4505 UErrorCode status = U_ZERO_ERROR;

4506 UParseError parse_error;

4507 int n_rules = LEN(invalidRules);

4508 int rule_no;

4509 int length;

4510 UCollator *myCollation;

4511

4512 for (rule_no = 0; rule_no < n_rules; ++rule_no) {

4513

4514 length = u_unescape(invalidRules[rule_no], rule, 500);

4515 if (length == 0) {

4516 log_err("ERROR: The rule cannot be unescaped: %s\n");

4517 return;

4518 }

4519 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er ror, &status);

4520 (void)myCollation; /* Suppress set but not used warning. */

4521 if(!U_FAILURE(status)){

4522 log_err("ERROR: Could not cause a failure as expected: \n");

4523 }

4524 status = U_ZERO_ERROR;

4525 }

4526 }

4527

4528 /*

4529 * This test ensures that characters placed before a character in a different sc ript have the same lead byte

4530 * in their collation key before and after script reordering.

4531 */

4532 static void TestBeforeRuleWithScriptReordering(void)

4533 {

4534 UParseError error;

4535 UErrorCode status = U_ZERO_ERROR;

4536 UCollator *myCollation;

4537 char srules[500] = "&[before 1]\\u03b1 < \\u0e01";

4538 UChar rules[500];

4539 uint32_t rulesLength = 0;

4540 int32_t reorderCodes[1] = {USCRIPT_GREEK};

4541 UCollationResult collResult;

4542

4543 uint8_t baseKey[256];

4544 uint32_t baseKeyLength;

4545 uint8_t beforeKey[256];

4546 uint32_t beforeKeyLength;

4547

4548 UChar base[] = { 0x03b1 }; /* base */

4549 int32_t baseLen = sizeof(base)/sizeof(*base);

4550

4551 UChar before[] = { 0x0e01 }; /* ko kai */

4552 int32_t beforeLen = sizeof(before)/sizeof(*before);

4553

4554 /UChar data[] = { before, base };

4555 genericRulesStarter(srules, data, 2);*/

4556

4557 log_verbose("Testing the &[before 1] rule with [reorder grek]\n");

4558

4559 (void)beforeKeyLength; /* Suppress set but not used warnings. */

4560 (void)baseKeyLength;

4561

4562 /* build collator */

4563 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n");

4564

4565 rulesLength = u_unescape(srules, rules, LEN(rules));

4566 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &er ror, &status);

4567 if(U_FAILURE(status)) {

4568 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

4569 return;

4570 }

4571

4572 /* check collation results - before rule applied but not script reordering * /

4573 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

4574 if (collResult != UCOL_GREATER) {

4575 log_err("Collation result not correct before script reordering = %d\n", collResult);

4576 }

4577

4578 /* check the lead byte of the collation keys before script reordering */

4579 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

4580 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

4581 if (baseKey[0] != beforeKey[0]) {

4582 log_err("Different lead byte for sort keys using before rule and before sc ript reordering. base character lead byte = %02x, before character lead byte = % 02x\n", baseKey[0], beforeKey[0]);

4583 }

4584

4585 /* reorder the scripts */

4586 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status);

4587 if(U_FAILURE(status)) {

4588 log_err_status(status, "ERROR: while setting script order: %s\n", myErro rName(status));

4589 return;

4590 }

4591

4592 /* check collation results - before rule applied and after script reordering */

4593 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen);

4594 if (collResult != UCOL_GREATER) {

4595 log_err("Collation result not correct after script reordering = %d\n", c ollResult);

4596 }

4597

4598 /* check the lead byte of the collation keys after script reordering */

4599 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256);

4600 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256);

4601 if (baseKey[0] != beforeKey[0]) {

4602 log_err("Different lead byte for sort keys using before fule and after s cript reordering. base character lead byte = %02x, before character lead byte = %02x\n", baseKey[0], beforeKey[0]);

4603 }

4604

4605 ucol_close(myCollation);

4606 }

4607

4608 /*

4609 * Test that in a primary-compressed sort key all bytes except the first one are unchanged under script reordering.

4610 */

4611 static void TestNonLeadBytesDuringCollationReordering(void)

4612 {

4613 UErrorCode status = U_ZERO_ERROR;

4614 UCollator *myCollation;

4615 int32_t reorderCodes[1] = {USCRIPT_GREEK};

4616

4617 uint8_t baseKey[256];

4618 uint32_t baseKeyLength;

4619 uint8_t reorderKey[256];

4620 uint32_t reorderKeyLength;

4621

4622 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 };

4623

4624 uint32_t i;

4625

4626

4627 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

4628

4629 /* build collator tertiary */

4630 myCollation = ucol_open("", &status);

4631 ucol_setStrength(myCollation, UCOL_TERTIARY);

4632 if(U_FAILURE(status)) {

4633 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

4634 return;

4635 }

4636 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba seKey, 256);

4637

4638 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

4639 if(U_FAILURE(status)) {

4640 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

4641 return;

4642 }

4643 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);

4644

4645 if (baseKeyLength != reorderKeyLength) {

4646 log_err("Key lengths not the same during reordering.\n");

4647 return;

4648 }

4649

4650 for (i = 1; i < baseKeyLength; i++) {

4651 if (baseKey[i] != reorderKey[i]) {

4652 log_err("Collation key bytes not the same at position %d.\n", i);

4653 return;

4654 }

4655 }

4656 ucol_close(myCollation);

4657

4658 /* build collator quaternary */

4659 myCollation = ucol_open("", &status);

4660 ucol_setStrength(myCollation, UCOL_QUATERNARY);

4661 if(U_FAILURE(status)) {

4662 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

4663 return;

4664 }

4665 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba seKey, 256);

4666

4667 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

4668 if(U_FAILURE(status)) {

4669 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

4670 return;

4671 }

4672 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), reorderKey, 256);

4673

4674 if (baseKeyLength != reorderKeyLength) {

4675 log_err("Key lengths not the same during reordering.\n");

4676 return;

4677 }

4678

4679 for (i = 1; i < baseKeyLength; i++) {

4680 if (baseKey[i] != reorderKey[i]) {

4681 log_err("Collation key bytes not the same at position %d.\n", i);

4682 return;

4683 }

4684 }

4685 ucol_close(myCollation);

4686 }

4687

4688 /*

4689 * Test reordering API.

4690 */

4691 static void TestReorderingAPI(void)

4692 {

4693 UErrorCode status = U_ZERO_ERROR;

4694 UCollator *myCollation;

4695 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION};

4696 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REO RDER_CODE_CURRENCY, USCRIPT_KATAKANA};

4697 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION};

4698 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE;

4699 UCollationResult collResult;

4700 int32_t retrievedReorderCodesLength;

4701 int32_t retrievedReorderCodes[10];

4702 UChar greekString[] = { 0x03b1 };

4703 UChar punctuationString[] = { 0x203e };

4704 int loopIndex;

4705

4706 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

4707

4708 /* build collator tertiary */

4709 myCollation = ucol_open("", &status);

4710 ucol_setStrength(myCollation, UCOL_TERTIARY);

4711 if(U_FAILURE(status)) {

4712 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

4713 return;

4714 }

4715

4716 /* set the reorderding */

4717 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

4718 if (U_FAILURE(status)) {

4719 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

4720 return;

4721 }

4722

4723 /* get the reordering */

4724 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

4725 if (status != U_BUFFER_OVERFLOW_ERROR) {

4726 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));

4727 return;

4728 }

4729 status = U_ZERO_ERROR;

4730 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

4731 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

4732 return;

4733 }

4734 /* now let's really get it */

4735 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo rderCodes, LEN(retrievedReorderCodes), &status);

4736 if (U_FAILURE(status)) {

4737 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName (status));

4738 return;

4739 }

4740 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

4741 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

4742 return;

4743 }

4744 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

4745 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

4746 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4747 return;

4748 }

4749 }

4750 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

4751 if (collResult != UCOL_LESS) {

4752 log_err_status(status, "ERROR: collation result should have been UCOL_LE SS\n");

4753 return;

4754 }

4755

4756 /* clear the reordering */

4757 ucol_setReorderCodes(myCollation, NULL, 0, &status);

4758 if (U_FAILURE(status)) {

4759 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myE rrorName(status));

4760 return;

4761 }

4762

4763 /* get the reordering again */

4764 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

4765 if (retrievedReorderCodesLength != 0) {

4766 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);

4767 return;

4768 }

4769

4770 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

4771 if (collResult != UCOL_GREATER) {

4772 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n");

4773 return;

4774 }

4775

4776 /* clear the reordering using [NONE] */

4777 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status);

4778 if (U_FAILURE(status)) {

4779 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", m yErrorName(status));

4780 return;

4781 }

4782

4783 /* get the reordering again */

4784 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

4785 if (retrievedReorderCodesLength != 0) {

4786 log_err_status(status,

4787 "ERROR: [NONE] retrieved reorder codes length was %d but should have been 0\n",

4788 retrievedReorderCodesLength);

4789 return;

4790 }

4791

4792 /* test for error condition on duplicate reorder codes */

4793 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde rCodes), &status);

4794 if (!U_FAILURE(status)) {

4795 log_err_status(status, "ERROR: setting duplicate reorder codes did not g enerate a failure\n");

4796 return;

4797 }

4798

4799 status = U_ZERO_ERROR;

4800 /* test for reorder codes after a reset code */

4801 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord erCodesStartingWithDefault), &status);

4802 if (!U_FAILURE(status)) {

4803 log_err_status(status, "ERROR: reorderd code sequence starting with defa ult and having following codes didn't cause an error\n");

4804 return;

4805 }

4806

4807 ucol_close(myCollation);

4808 }

4809

4810 /*

4811 * Test reordering API.

4812 */

4813 static void TestReorderingAPIWithRuleCreatedCollator(void)

4814 {

4815 UErrorCode status = U_ZERO_ERROR;

4816 UCollator *myCollation;

4817 UChar rules[90];

4818 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK};

4819 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REO RDER_CODE_PUNCTUATION};

4820 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT};

4821 UCollationResult collResult;

4822 int32_t retrievedReorderCodesLength;

4823 int32_t retrievedReorderCodes[10];

4824 static const UChar greekString[] = { 0x03b1 };

4825 static const UChar punctuationString[] = { 0x203e };

4826 static const UChar hanString[] = { 0x65E5, 0x672C };

4827 int loopIndex;

4828

4829 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

4830

4831 /* build collator from rules */

4832 u_uastrcpy(rules, "[reorder Hani Grek]");

4833 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERT IARY, NULL, &status);

4834 if(U_FAILURE(status)) {

4835 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

4836 return;

4837 }

4838

4839 /* get the reordering */

4840 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo rderCodes, LEN(retrievedReorderCodes), &status);

4841 if (U_FAILURE(status)) {

4842 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName (status));

4843 return;

4844 }

4845 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {

4846 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));

4847 return;

4848 }

4849 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

4850 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {

4851 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4852 return;

4853 }

4854 }

4855 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanStr ing, LEN(hanString));

4856 if (collResult != UCOL_GREATER) {

4857 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n");

4858 return;

4859 }

4860

4861 /* set the reordering */

4862 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

4863 if (U_FAILURE(status)) {

4864 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

4865 return;

4866 }

4867

4868 /* get the reordering */

4869 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

4870 if (status != U_BUFFER_OVERFLOW_ERROR) {

4871 log_err_status(status, "ERROR: getting error codes should have returned U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status));

4872 return;

4873 }

4874 status = U_ZERO_ERROR;

4875 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

4876 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

4877 return;

4878 }

4879 /* now let's really get it */

4880 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo rderCodes, LEN(retrievedReorderCodes), &status);

4881 if (U_FAILURE(status)) {

4882 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName (status));

4883 return;

4884 }

4885 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

4886 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

4887 return;

4888 }

4889 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

4890 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

4891 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4892 return;

4893 }

4894 }

4895 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

4896 if (collResult != UCOL_LESS) {

4897 log_err_status(status, "ERROR: collation result should have been UCOL_LE SS\n");

4898 return;

4899 }

4900

4901 /* clear the reordering */

4902 ucol_setReorderCodes(myCollation, NULL, 0, &status);

4903 if (U_FAILURE(status)) {

4904 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myE rrorName(status));

4905 return;

4906 }

4907

4908 /* get the reordering again */

4909 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st atus);

4910 if (retrievedReorderCodesLength != 0) {

4911 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, 0);

4912 return;

4913 }

4914

4915 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu ationString, LEN(punctuationString));

4916 if (collResult != UCOL_GREATER) {

4917 log_err_status(status, "ERROR: collation result should have been UCOL_GR EATER\n");

4918 return;

4919 }

4920

4921 /* reset the reordering */

4922 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status);

4923 if (U_FAILURE(status)) {

4924 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n" , myErrorName(status));

4925 return;

4926 }

4927 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo rderCodes, LEN(retrievedReorderCodes), &status);

4928 if (U_FAILURE(status)) {

4929 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName (status));

4930 return;

4931 }

4932 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) {

4933 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes));

4934 return;

4935 }

4936 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

4937 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) {

4938 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

4939 return;

4940 }

4941 }

4942

4943 ucol_close(myCollation);

4944 }

4945

4946 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int 32_t expectedScript) {

4947 int32_t i;

4948 for (i = 0; i < length; ++i) {

4949 if (expectedScript == scripts[i]) { return TRUE; }

4950 }

4951 return FALSE;

4952 }

4953

4954 static void TestEquivalentReorderingScripts(void) {

4955 // Beginning with ICU 55, collation reordering moves single scripts

4956 // rather than groups of scripts,

4957 // except where scripts share a range and sort primary-equal.

4958 UErrorCode status = U_ZERO_ERROR;

4959 int32_t equivalentScripts[100];

4960 int32_t length;

4961 int i;

4962 int32_t prevScript;

4963 /* These scripts are expected to be equivalent. */

4964 static const int32_t expectedScripts[] = {

4965 USCRIPT_HIRAGANA,

4966 USCRIPT_KATAKANA,

4967 USCRIPT_KATAKANA_OR_HIRAGANA

4968 };

4969

4970 equivalentScripts[0] = 0;

4971 length = ucol_getEquivalentReorderCodes(

4972 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status);

4973 if (U_FAILURE(status)) {

4974 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code s: %s\n", myErrorName(status));

4975 return;

4976 }

4977 if (length != 1 \|\| equivalentScripts[0] != USCRIPT_GOTHIC) {

4978 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: "

4979 "length expected 1, was = %d; expected [%d] was [%d]\n",

4980 length, USCRIPT_GOTHIC, equivalentScripts[0]);

4981 }

4982

4983 length = ucol_getEquivalentReorderCodes(

4984 USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status );

4985 if (U_FAILURE(status)) {

4986 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder co des: %s\n", myErrorName(status));

4987 return;

4988 }

4989 if (length != LEN(expectedScripts)) {

4990 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: "

4991 "expected %d, was = %d\n",

4992 LEN(expectedScripts), length);

4993 }

4994 prevScript = -1;

4995 for (i = 0; i < length; ++i) {

4996 int32_t script = equivalentScripts[i];

4997 if (script <= prevScript) {

4998 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d \n", i);

4999 }

5000 prevScript = script;

5001 }

5002 for (i = 0; i < LEN(expectedScripts); i++) {

5003 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i ])) {

5004 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n",

5005 expectedScripts[i]);

5006 }

5007 }

5008

5009 length = ucol_getEquivalentReorderCodes(

5010 USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status );

5011 if (U_FAILURE(status)) {

5012 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder co des: %s\n", myErrorName(status));

5013 return;

5014 }

5015 if (length != LEN(expectedScripts)) {

5016 log_err("ERROR/Katakana: retrieved equivalent script length wrong: "

5017 "expected %d, was = %d\n",

5018 LEN(expectedScripts), length);

5019 }

5020 for (i = 0; i < LEN(expectedScripts); i++) {

5021 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i ])) {

5022 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n",

5023 expectedScripts[i]);

5024 }

5025 }

5026

5027 length = ucol_getEquivalentReorderCodes(

5028 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScrip ts), &status);

5029 if (U_FAILURE(status) \|\| length != LEN(expectedScripts)) {

5030 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: "

5031 "expected %d, was = %d\n",

5032 LEN(expectedScripts), length);

5033 }

5034

5035 length = ucol_getEquivalentReorderCodes(

5036 USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status);

5037 if (U_FAILURE(status) \|\| length != 3) {

5038 log_err("ERROR/Hani: retrieved equivalent script length wrong: "

5039 "expected 3, was = %d\n", length);

5040 }

5041 length = ucol_getEquivalentReorderCodes(

5042 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), & status);

5043 if (U_FAILURE(status) \|\| length != 3) {

5044 log_err("ERROR/Hans: retrieved equivalent script length wrong: "

5045 "expected 3, was = %d\n", length);

5046 }

5047 length = ucol_getEquivalentReorderCodes(

5048 USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts), &status);

5049 if (U_FAILURE(status) \|\| length != 3) {

5050 log_err("ERROR/Hant: retrieved equivalent script length wrong: "

5051 "expected 3, was = %d\n", length);

5052 }

5053

5054 length = ucol_getEquivalentReorderCodes(

5055 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts), &status);

5056 if (U_FAILURE(status) \|\| length != 2) {

5057 log_err("ERROR/Merc: retrieved equivalent script length wrong: "

5058 "expected 2, was = %d\n", length);

5059 }

5060 length = ucol_getEquivalentReorderCodes(

5061 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScrip ts), &status);

5062 if (U_FAILURE(status) \|\| length != 2) {

5063 log_err("ERROR/Mero: retrieved equivalent script length wrong: "

5064 "expected 2, was = %d\n", length);

5065 }

5066 }

5067

5068 static void TestReorderingAcrossCloning(void)

5069 {

5070 UErrorCode status = U_ZERO_ERROR;

5071 UCollator *myCollation;

5072 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN CTUATION};

5073 UCollator *clonedCollation;

5074 int32_t retrievedReorderCodesLength;

5075 int32_t retrievedReorderCodes[10];

5076 int loopIndex;

5077

5078 log_verbose("Testing non-lead bytes in a sort key with and without reorderin g\n");

5079

5080 /* build collator tertiary */

5081 myCollation = ucol_open("", &status);

5082 ucol_setStrength(myCollation, UCOL_TERTIARY);

5083 if(U_FAILURE(status)) {

5084 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

5085 return;

5086 }

5087

5088 /* set the reorderding */

5089 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status);

5090 if (U_FAILURE(status)) {

5091 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName (status));

5092 return;

5093 }

5094

5095 /* clone the collator */

5096 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status);

5097 if (U_FAILURE(status)) {

5098 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(stat us));

5099 return;

5100 }

5101

5102 /* get the reordering */

5103 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrieve dReorderCodes, LEN(retrievedReorderCodes), &status);

5104 if (U_FAILURE(status)) {

5105 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName (status));

5106 return;

5107 }

5108 if (retrievedReorderCodesLength != LEN(reorderCodes)) {

5109 log_err_status(status, "ERROR: retrieved reorder codes length was %d but should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes));

5110 return;

5111 }

5112 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) {

5113 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) {

5114 log_err_status(status, "ERROR: retrieved reorder code doesn't match set reorder code at index %d\n", loopIndex);

5115 return;

5116 }

5117 }

5118

5119 /uprv_free(buffer);/

5120 ucol_close(myCollation);

5121 ucol_close(clonedCollation);

5122 }

5123

5124 /*

5125 * Utility function to test one collation reordering test case set.

5126 * @param testcases Array of test cases.

5127 * @param n_testcases Size of the array testcases.

5128 * @param reorderTokens Array of reordering codes.

5129 * @param reorderTokensLen Size of the array reorderTokens.

5130 */

5131 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32 _t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen)

5132 {

5133 uint32_t testCaseNum;

5134 UErrorCode status = U_ZERO_ERROR;

5135 UCollator *myCollation;

5136

5137 myCollation = ucol_open("", &status);

5138 if (U_FAILURE(status)) {

5139 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

5140 return;

5141 }

5142 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status);

5143 if(U_FAILURE(status)) {

5144 log_err_status(status, "ERROR: while setting script order: %s\n", myErro rName(status));

5145 return;

5146 }

5147

5148 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) {

5149 doTest(myCollation,

5150 testCases[testCaseNum].source,

5151 testCases[testCaseNum].target,

5152 testCases[testCaseNum].result

5153 );

5154 }

5155 ucol_close(myCollation);

5156 }

5157

5158 static void TestGreekFirstReorder(void)

5159 {

5160 const char* strRules[] = {

5161 "[reorder Grek]"

5162 };

5163

5164 const int32_t apiRules[] = {

5165 USCRIPT_GREEK

5166 };

5167

5168 const static OneTestCase privateUseCharacterStrings[] = {

5169 { {0x0391}, {0x0391}, UCOL_EQUAL },

5170 { {0x0041}, {0x0391}, UCOL_GREATER },

5171 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER },

5172 { {0x0060}, {0x0391}, UCOL_LESS },

5173 { {0x0391}, {0xe2dc}, UCOL_LESS },

5174 { {0x0391}, {0x0060}, UCOL_GREATER },

5175 };

5176

5177 /* Test rules creation */

5178 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

5179

5180 /* Test collation reordering API */

5181 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

5182 }

5183

5184 static void TestGreekLastReorder(void)

5185 {

5186 const char* strRules[] = {

5187 "[reorder Zzzz Grek]"

5188 };

5189

5190 const int32_t apiRules[] = {

5191 USCRIPT_UNKNOWN, USCRIPT_GREEK

5192 };

5193

5194 const static OneTestCase privateUseCharacterStrings[] = {

5195 { {0x0391}, {0x0391}, UCOL_EQUAL },

5196 { {0x0041}, {0x0391}, UCOL_LESS },

5197 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS },

5198 { {0x0060}, {0x0391}, UCOL_LESS },

5199 { {0x0391}, {0xe2dc}, UCOL_GREATER },

5200 };

5201

5202 /* Test rules creation */

5203 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

5204

5205 /* Test collation reordering API */

5206 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

5207 }

5208

5209 static void TestNonScriptReorder(void)

5210 {

5211 const char* strRules[] = {

5212 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]"

5213 };

5214

5215 const int32_t apiRules[] = {

5216 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIP T_LATIN,

5217 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,

5218 UCOL_REORDER_CODE_CURRENCY

5219 };

5220

5221 const static OneTestCase privateUseCharacterStrings[] = {

5222 { {0x0391}, {0x0041}, UCOL_LESS },

5223 { {0x0041}, {0x0391}, UCOL_GREATER },

5224 { {0x0060}, {0x0041}, UCOL_LESS },

5225 { {0x0060}, {0x0391}, UCOL_GREATER },

5226 { {0x0024}, {0x0041}, UCOL_GREATER },

5227 };

5228

5229 /* Test rules creation */

5230 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

5231

5232 /* Test collation reordering API */

5233 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

5234 }

5235

5236 static void TestHaniReorder(void)

5237 {

5238 const char* strRules[] = {

5239 "[reorder Hani]"

5240 };

5241 const int32_t apiRules[] = {

5242 USCRIPT_HAN

5243 };

5244

5245 const static OneTestCase privateUseCharacterStrings[] = {

5246 { {0x4e00}, {0x0041}, UCOL_LESS },

5247 { {0x4e00}, {0x0060}, UCOL_GREATER },

5248 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },

5249 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },

5250 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },

5251 { {0xfa27}, {0x0041}, UCOL_LESS },

5252 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },

5253 };

5254

5255 /* Test rules creation */

5256 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

5257

5258 /* Test collation reordering API */

5259 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha racterStrings), apiRules, LEN(apiRules));

5260 }

5261

5262 static void TestHaniReorderWithOtherRules(void)

5263 {

5264 const char* strRules[] = {

5265 "[reorder Hani] &b<a"

5266 };

5267 /*const int32_t apiRules[] = {

5268 USCRIPT_HAN

5269 };*/

5270

5271 const static OneTestCase privateUseCharacterStrings[] = {

5272 { {0x4e00}, {0x0041}, UCOL_LESS },

5273 { {0x4e00}, {0x0060}, UCOL_GREATER },

5274 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS },

5275 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER },

5276 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS },

5277 { {0xfa27}, {0x0041}, UCOL_LESS },

5278 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS },

5279 { {0x0062}, {0x0061}, UCOL_LESS },

5280 };

5281

5282 /* Test rules creation */

5283 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings ), strRules, LEN(strRules));

5284 }

5285

5286 static void TestMultipleReorder(void)

5287 {

5288 const char* strRules[] = {

5289 "[reorder Grek Zzzz DIGIT Latn Hani]"

5290 };

5291

5292 const int32_t apiRules[] = {

5293 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN, USCRIPT_HAN

5294 };

5295

5296 const static OneTestCase collationTestCases[] = {

5297 { {0x0391}, {0x0041}, UCOL_LESS},

5298 { {0x0031}, {0x0041}, UCOL_LESS},

5299 { {0x0041}, {0x4e00}, UCOL_LESS},

5300 };

5301

5302 /* Test rules creation */

5303 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN (strRules));

5304

5305 /* Test collation reordering API */

5306 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases), apiRules, LEN(apiRules));

5307 }

5308

5309 /*

5310 * Test that covers issue reported in ticket 8814

5311 */

5312 static void TestReorderWithNumericCollation(void)

5313 {

5314 UErrorCode status = U_ZERO_ERROR;

5315 UCollator *myCollation;

5316 UCollator *myReorderCollation;

5317 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUA TION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_L ATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS};

5318 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 };

5319 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */

5320 UChar fortyS[] = { 0x0053 };

5321 UChar fortyThreeP[] = { 0x0050 };

5322 uint8_t fortyS_sortKey[128];

5323 int32_t fortyS_sortKey_Length;

5324 uint8_t fortyThreeP_sortKey[128];

5325 int32_t fortyThreeP_sortKey_Length;

5326 uint8_t fortyS_sortKey_reorder[128];

5327 int32_t fortyS_sortKey_reorder_Length;

5328 uint8_t fortyThreeP_sortKey_reorder[128];

5329 int32_t fortyThreeP_sortKey_reorder_Length;

5330 UCollationResult collResult;

5331 UCollationResult collResultReorder;

5332

5333 log_verbose("Testing reordering with and without numeric collation\n");

5334

5335 /* build collator tertiary with numeric */

5336 myCollation = ucol_open("", &status);

5337 /*

5338 ucol_setStrength(myCollation, UCOL_TERTIARY);

5339 */

5340 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);

5341 if(U_FAILURE(status)) {

5342 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

5343 return;

5344 }

5345

5346 /* build collator tertiary with numeric and reordering */

5347 myReorderCollation = ucol_open("", &status);

5348 /*

5349 ucol_setStrength(myReorderCollation, UCOL_TERTIARY);

5350 */

5351 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &stat us);

5352 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &s tatus);

5353 if(U_FAILURE(status)) {

5354 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa me(status));

5355 return;

5356 }

5357

5358 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fo rtyS_sortKey, 128);

5359 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(f ortyThreeP), fortyThreeP_sortKey, 128);

5360 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS, LEN(fortyS), fortyS_sortKey_reorder, 128);

5361 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, for tyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128);

5362

5363 if (fortyS_sortKey_Length < 0 \|\| fortyThreeP_sortKey_Length < 0 \|\| fortyS_so rtKey_reorder_Length < 0 \|\| fortyThreeP_sortKey_reorder_Length < 0) {

5364 log_err_status(status, "ERROR: couldn't generate sort keys\n");

5365 return;

5366 }

5367 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN (fortyThreeP));

5368 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fo rtyThreeP, LEN(fortyThreeP));

5369 /*

5370 fprintf(stderr, "\tcollResult = %x\n", collResult);

5371 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder);

5372 fprintf(stderr, "\nfortyS\n");

5373 for (i = 0; i < fortyS_sortKey_Length; i++) {

5374 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder [i]);

5375 }

5376 fprintf(stderr, "\nfortyThreeP\n");

5377 for (i = 0; i < fortyThreeP_sortKey_Length; i++) {

5378 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortK ey_reorder[i]);

5379 }

5380 */

5381 if (collResult != collResultReorder) {

5382 log_err_status(status, "ERROR: collation results should have been the sa me.\n");

5383 return;

5384 }

5385

5386 ucol_close(myCollation);

5387 ucol_close(myReorderCollation);

5388 }

5389

5390 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b)

5391 {

5392 for (; a == b; ++a, ++b) {

5393 if (*a == 0) {

5394 return 0;

5395 }

5396 }

5397 return (a < b ? -1 : 1);

5398 }

5399

5400 static void TestImportRulesDeWithPhonebook(void)

5401 {

5402 const char* normalRules[] = {

5403 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc",

5404 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc",

5405 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc",

5406 };

5407 const OneTestCase normalTests[] = {

5408 { {0x00e6}, {0x00c6}, UCOL_LESS},

5409 { {0x00fc}, {0x00dc}, UCOL_GREATER},

5410 };

5411

5412 const char* importRules[] = {

5413 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]",

5414 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",

5415 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]",

5416 };

5417 const OneTestCase importTests[] = {

5418 { {0x00e6}, {0x00c6}, UCOL_LESS},

5419 { {0x00fc}, {0x00dc}, UCOL_LESS},

5420 };

5421

5422 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules) );

5423 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules) );

5424 }

5425

5426 #if 0

5427 static void TestImportRulesFiWithEor(void)

5428 {

5429 /* DUCET. */

5430 const char* defaultRules[] = {

5431 "&a<b", /* Dummy rule. */

5432 };

5433

5434 const OneTestCase defaultTests[] = {

5435 { {0x0110}, {0x00F0}, UCOL_LESS},

5436 { {0x00a3}, {0x00a5}, UCOL_LESS},

5437 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},

5438 };

5439

5440 /* European Ordering rules: ignore currency characters. */

5441 const char* eorRules[] = {

5442 "[import root-u-co-eor]",

5443 };

5444

5445 const OneTestCase eorTests[] = {

5446 { {0x0110}, {0x00F0}, UCOL_LESS},

5447 { {0x00a3}, {0x00a5}, UCOL_EQUAL},

5448 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},

5449 };

5450

5451 const char* fiStdRules[] = {

5452 "[import fi-u-co-standard]",

5453 };

5454

5455 const OneTestCase fiStdTests[] = {

5456 { {0x0110}, {0x00F0}, UCOL_GREATER},

5457 { {0x00a3}, {0x00a5}, UCOL_LESS},

5458 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS},

5459 };

5460

5461 /* Both European Ordering Rules and Fi Standard Rules. */

5462 const char* eorFiStdRules[] = {

5463 "[import root-u-co-eor][import fi-u-co-standard]",

5464 };

5465

5466 /* This is essentially same as the one before once fi.txt is updated with impo rt. */

5467 const char* fiEorRules[] = {

5468 "[import fi-u-co-eor]",

5469 };

5470

5471 const OneTestCase fiEorTests[] = {

5472 { {0x0110}, {0x00F0}, UCOL_GREATER},

5473 { {0x00a3}, {0x00a5}, UCOL_EQUAL},

5474 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL},

5475 };

5476

5477 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRu les));

5478 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules));

5479 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules));

5480 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRule s));

5481

5482 log_knownIssue("8962", NULL);

5483 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt is updated with the following rule:

5484 eor{

5485 Sequence{

5486 "[import root-u-co-eor][import fi-u-co-standard]"

5487 }

5488 Version{"21.0"}

5489 }

5490 */

5491 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules)) ; */

5492

5493 }

5494 #endif

5495

5496 #if 0

5497 /*

5498 * This test case tests inclusion with the unihan rules, but this cannot be incl uded now, unless

5499 * the resource files are built with -includeUnihanColl option.

5500 * TODO: Uncomment this function and make it work when unihan rules are built by default.

5501 */

5502 static void TestImportRulesCJKWithUnihan(void)

5503 {

5504 /* DUCET. */

5505 const char* defaultRules[] = {

5506 "&a<b", /* Dummy rule. */

5507 };

5508

5509 const OneTestCase defaultTests[] = {

5510 { {0x3402}, {0x4e1e}, UCOL_GREATER},

5511 };

5512

5513 /* European Ordering rules: ignore currency characters. */

5514 const char* unihanRules[] = {

5515 "[import ko-u-co-unihan]",

5516 };

5517

5518 const OneTestCase unihanTests[] = {

5519 { {0x3402}, {0x4e1e}, UCOL_LESS},

5520 };

5521

5522 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRu les));

5523 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules) );

5524

5525 }

5526 #endif

5527

5528 static void TestImport(void)

5529 {

5530 UCollator* vicoll;

5531 UCollator* escoll;

5532 UCollator* viescoll;

5533 UCollator* importviescoll;

5534 UParseError error;

5535 UErrorCode status = U_ZERO_ERROR;

5536 UChar* virules;

5537 int32_t viruleslength;

5538 UChar* esrules;

5539 int32_t esruleslength;

5540 UChar* viesrules;

5541 int32_t viesruleslength;

5542 char srules[500] = "[import vi][import es]";

5543 UChar rules[500];

5544 uint32_t length = 0;

5545 int32_t itemCount;

5546 int32_t i, k;

5547 UChar32 start;

5548 UChar32 end;

5549 UChar str[500];

5550 int32_t strLength;

5551

5552 uint8_t sk1[500];

5553 uint8_t sk2[500];

5554

5555 UBool b;

5556 USet* tailoredSet;

5557 USet* importTailoredSet;

5558

5559

5560 vicoll = ucol_open("vi", &status);

5561 if(U_FAILURE(status)){

5562 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr orName(status));

5563 return;

5564 }

5565

5566 virules = (UChar*) ucol_getRules(vicoll, &viruleslength);

5567 if(viruleslength == 0) {

5568 log_data_err("missing vi tailoring rule string\n");

5569 ucol_close(vicoll);

5570 return;

5571 }

5572 escoll = ucol_open("es", &status);

5573 esrules = (UChar*) ucol_getRules(escoll, &esruleslength);

5574 viesrules = (UChar)uprv_malloc((viruleslength+esruleslength+1)sizeof(UChar *));

5575 viesrules[0] = 0;

5576 u_strcat(viesrules, virules);

5577 u_strcat(viesrules, esrules);

5578 viesruleslength = viruleslength + esruleslength;

5579 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status);

5580

5581 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

5582 length = u_unescape(srules, rules, 500);

5583 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro r, &status);

5584 if(U_FAILURE(status)){

5585 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

5586 return;

5587 }

5588

5589 tailoredSet = ucol_getTailoredSet(viescoll, &status);

5590 importTailoredSet = ucol_getTailoredSet(importviescoll, &status);

5591

5592 if(!uset_equals(tailoredSet, importTailoredSet)){

5593 log_err("Tailored sets not equal");

5594 }

5595

5596 uset_close(importTailoredSet);

5597

5598 itemCount = uset_getItemCount(tailoredSet);

5599

5600 for( i = 0; i < itemCount; i++){

5601 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status );

5602 if(strLength < 2){

5603 for (; start <= end; start++){

5604 k = 0;

5605 U16_APPEND(str, k, 500, start, b);

5606 (void)b; /* Suppress set but not used warning. */

5607 ucol_getSortKey(viescoll, str, 1, sk1, 500);

5608 ucol_getSortKey(importviescoll, str, 1, sk2, 500);

5609 if(compare_uint8_t_arrays(sk1, sk2) != 0){

5610 log_err("Sort key for %s not equal\n", str);

5611 break;

5612 }

5613 }

5614 }else{

5615 ucol_getSortKey(viescoll, str, strLength, sk1, 500);

5616 ucol_getSortKey(importviescoll, str, strLength, sk2, 500);

5617 if(compare_uint8_t_arrays(sk1, sk2) != 0){

5618 log_err("ZZSort key for %s not equal\n", str);

5619 break;

5620 }

5621

5622 }

5623 }

5624

5625 uset_close(tailoredSet);

5626

5627 uprv_free(viesrules);

5628

5629 ucol_close(vicoll);

5630 ucol_close(escoll);

5631 ucol_close(viescoll);

5632 ucol_close(importviescoll);

5633 }

5634

5635 static void TestImportWithType(void)

5636 {

5637 UCollator* vicoll;

5638 UCollator* decoll;

5639 UCollator* videcoll;

5640 UCollator* importvidecoll;

5641 UParseError error;

5642 UErrorCode status = U_ZERO_ERROR;

5643 const UChar* virules;

5644 int32_t viruleslength;

5645 const UChar* derules;

5646 int32_t deruleslength;

5647 UChar* viderules;

5648 int32_t videruleslength;

5649 const char srules[500] = "[import vi][import de-u-co-phonebk]";

5650 UChar rules[500];

5651 uint32_t length = 0;

5652 int32_t itemCount;

5653 int32_t i, k;

5654 UChar32 start;

5655 UChar32 end;

5656 UChar str[500];

5657 int32_t strLength;

5658

5659 uint8_t sk1[500];

5660 uint8_t sk2[500];

5661

5662 USet* tailoredSet;

5663 USet* importTailoredSet;

5664

5665 vicoll = ucol_open("vi", &status);

5666 if(U_FAILURE(status)){

5667 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

5668 return;

5669 }

5670 virules = ucol_getRules(vicoll, &viruleslength);

5671 if(viruleslength == 0) {

5672 log_data_err("missing vi tailoring rule string\n");

5673 ucol_close(vicoll);

5674 return;

5675 }

5676 /* decoll = ucol_open("de@collation=phonebook", &status); */

5677 decoll = ucol_open("de-u-co-phonebk", &status);

5678 if(U_FAILURE(status)){

5679 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

5680 return;

5681 }

5682

5683

5684 derules = ucol_getRules(decoll, &deruleslength);

5685 viderules = (UChar)uprv_malloc((viruleslength+deruleslength+1)sizeof(UChar *));

5686 viderules[0] = 0;

5687 u_strcat(viderules, virules);

5688 u_strcat(viderules, derules);

5689 videruleslength = viruleslength + deruleslength;

5690 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY , &error, &status);

5691

5692 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */

5693 length = u_unescape(srules, rules, 500);

5694 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro r, &status);

5695 if(U_FAILURE(status)){

5696 log_err_status(status, "ERROR: in creation of rule based collator: %s\n" , myErrorName(status));

5697 return;

5698 }

5699

5700 tailoredSet = ucol_getTailoredSet(videcoll, &status);

5701 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status);

5702

5703 if(!uset_equals(tailoredSet, importTailoredSet)){

5704 log_err("Tailored sets not equal");

5705 }

5706

5707 uset_close(importTailoredSet);

5708

5709 itemCount = uset_getItemCount(tailoredSet);

5710

5711 for( i = 0; i < itemCount; i++){

5712 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status );

5713 if(strLength < 2){

5714 for (; start <= end; start++){

5715 k = 0;

5716 U16_APPEND_UNSAFE(str, k, start);

5717 ucol_getSortKey(videcoll, str, 1, sk1, 500);

5718 ucol_getSortKey(importvidecoll, str, 1, sk2, 500);

5719 if(compare_uint8_t_arrays(sk1, sk2) != 0){

5720 log_err("Sort key for %s not equal\n", str);

5721 break;

5722 }

5723 }

5724 }else{

5725 ucol_getSortKey(videcoll, str, strLength, sk1, 500);

5726 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500);

5727 if(compare_uint8_t_arrays(sk1, sk2) != 0){

5728 log_err("Sort key for %s not equal\n", str);

5729 break;

5730 }

5731

5732 }

5733 }

5734

5735 uset_close(tailoredSet);

5736

5737 uprv_free(viderules);

5738

5739 ucol_close(videcoll);

5740 ucol_close(importvidecoll);

5741 ucol_close(vicoll);

5742 ucol_close(decoll);

5743 }

5744

5745 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMIC S AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia' */

5746 static const UChar longUpperStr1[]= { /* 155 chars */

5747 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F , 0x4E, 0x41, 0x4C,

5748 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D , 0x20, 0x50, 0x52,

5749 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45 , 0x52, 0x45, 0x4E,

5750 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49 , 0x43, 0x53, 0x2C,

5751 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53 , 0x20, 0x41, 0x4E,

5752 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E , 0x41, 0x4C, 0x20,

5753 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F , 0x42, 0x4C, 0x45,

5754 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E , 0x65, 0x20, 0x32,

5755 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65 , 0x72, 0x73, 0x62,

5756 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61

5757 };

5758

5759 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */

5760 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */

5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0 xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

5762 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0 xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

5763 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0 xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

5764 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0 xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20,

5765 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0 xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20

5766 };

5767

5768 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */

5769 static const UChar longUpperStr3[]= { /* 324 chars */

5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20,

5781 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0 x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20

5782 };

5783

5784 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0]))

5785

5786 typedef struct {

5787 const UChar * longUpperStrPtr;

5788 int32_t longUpperStrLen;

5789 } LongUpperStrItem;

5790

5791 /* String pointers must be in reverse collation order of the corresponding strin gs */

5792 static const LongUpperStrItem longUpperStrItems[] = {

5793 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) },

5794 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) },

5795 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) },

5796 { NULL, 0 }

5797 };

5798

5799 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */

5800

5801 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup tion */

5802 static void TestCaseLevelBufferOverflow(void)

5803 {

5804 UErrorCode status = U_ZERO_ERROR;

5805 UCollator * ucol = ucol_open("root", &status);

5806 if ( U_SUCCESS(status) ) {

5807 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status);

5808 if ( U_SUCCESS(status) ) {

5809 const LongUpperStrItem * itemPtr;

5810 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax];

5811 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL; itemPtr++ ) {

5812 int32_t sortKeyLen;

5813 if (itemPtr > longUpperStrItems) {

5814 uprv_strcpy((char )sortKeyB, (char )sortKeyA);

5815 }

5816 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, ite mPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax);

5817 if (sortKeyLen <= 0 \|\| sortKeyLen > kCollKeyLenMax) {

5818 log_err("ERROR sort key length from ucol_getSortKey is %d\n" , sortKeyLen);

5819 break;

5820 }

5821 if ( itemPtr > longUpperStrItems ) {

5822 int compareResult = uprv_strcmp((char )sortKeyA, (char )so rtKeyB);

5823 if (compareResult >= 0) {

5824 log_err("ERROR in sort key comparison result, expected - 1, got %d\n", compareResult);

5825 }

5826 }

5827 }

5828 } else {

5829 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o n: %s\n", myErrorName(status));

5830 }

5831 ucol_close(ucol);

5832 } else {

5833 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName( status));

5834 }

5835 }

5836

5837 /* Test for #10595 */

5838 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66, 0}; /* Sa sa Ki, Takeshi */

5839 #define KEY_PART_SIZE 16

5840

5841 static void TestNextSortKeyPartJaIdentical(void)

5842 {

5843 UErrorCode status = U_ZERO_ERROR;

5844 UCollator *coll;

5845 uint8_t keyPart[KEY_PART_SIZE];

5846 UCharIterator iter;

5847 uint32_t state[2] = {0, 0};

5848 int32_t keyPartLen;

5849

5850 coll = ucol_open("ja", &status);

5851 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status);

5852 if (U_FAILURE(status)) {

5853 log_err_status(status, "ERROR: in creation of Japanese collator with ide ntical strength: %s\n", myErrorName(status));

5854 return;

5855 }

5856

5857 uiter_setString(&iter, testJapaneseName, 5);

5858 keyPartLen = KEY_PART_SIZE;

5859 while (keyPartLen == KEY_PART_SIZE) {

5860 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_ SIZE, &status);

5861 if (U_FAILURE(status)) {

5862 log_err_status(status, "ERROR: in iterating next sort key part: %s\n ", myErrorName(status));

5863 break;

5864 }

5865 }

5866

5867 ucol_close(coll);

5868 }

5869

5870 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x)

5871

5872 void addMiscCollTest(TestNode** root)

5873 {

5874 TEST(TestRuleOptions);

5875 TEST(TestBeforePrefixFailure);

5876 TEST(TestContractionClosure);

5877 TEST(TestPrefixCompose);

5878 TEST(TestStrCollIdenticalPrefix);

5879 TEST(TestPrefix);

5880 TEST(TestNewJapanese);

5881 /TEST(TestLimitations);/

5882 TEST(TestNonChars);

5883 TEST(TestExtremeCompression);

5884 TEST(TestSurrogates);

5885 TEST(TestVariableTopSetting);

5886 TEST(TestMaxVariable);

5887 TEST(TestBocsuCoverage);

5888 TEST(TestCyrillicTailoring);

5889 TEST(TestCase);

5890 TEST(IncompleteCntTest);

5891 TEST(BlackBirdTest);

5892 TEST(FunkyATest);

5893 TEST(BillFairmanTest);

5894 TEST(TestChMove);

5895 TEST(TestImplicitTailoring);

5896 TEST(TestFCDProblem);

5897 TEST(TestEmptyRule);

5898 /TEST(TestJ784);/ /* 'zh' locale has changed - now it is getting tested by TestBeforePinyin */

5899 TEST(TestJ815);

5900 /TEST(TestJ831);/ /* we changed lv locale */

5901 TEST(TestBefore);

5902 TEST(TestHangulTailoring);

5903 TEST(TestUCARules);

5904 TEST(TestIncrementalNormalize);

5905 TEST(TestComposeDecompose);

5906 TEST(TestCompressOverlap);

5907 TEST(TestContraction);

5908 TEST(TestExpansion);

5909 /TEST(PrintMarkDavis);/ /* this test doesn't test - just prints sortkeys * /

5910 /TEST(TestGetCaseBit);/ /this one requires internal things to be exported /

5911 TEST(TestOptimize);

5912 TEST(TestSuppressContractions);

5913 TEST(Alexis2);

5914 TEST(TestHebrewUCA);

5915 TEST(TestPartialSortKeyTermination);

5916 TEST(TestSettings);

5917 TEST(TestEquals);

5918 TEST(TestJ2726);

5919 TEST(NullRule);

5920 TEST(TestNumericCollation);

5921 TEST(TestTibetanConformance);

5922 TEST(TestPinyinProblem);

5923 TEST(TestSeparateTrees);

5924 TEST(TestBeforePinyin);

5925 TEST(TestBeforeTightening);

5926 /TEST(TestMoreBefore);/

5927 TEST(TestTailorNULL);

5928 TEST(TestUpperFirstQuaternary);

5929 TEST(TestJ4960);

5930 TEST(TestJ5223);

5931 TEST(TestJ5232);

5932 TEST(TestJ5367);

5933 TEST(TestHiragana);

5934 TEST(TestSortKeyConsistency);

5935 TEST(TestVI5913); /* VI, RO tailored rules */

5936 TEST(TestCroatianSortKey);

5937 TEST(TestTailor6179);

5938 TEST(TestUCAPrecontext);

5939 TEST(TestOutOfBuffer5468);

5940 TEST(TestSameStrengthList);

5941

5942 TEST(TestSameStrengthListQuoted);

5943 TEST(TestSameStrengthListSupplemental);

5944 TEST(TestSameStrengthListQwerty);

5945 TEST(TestSameStrengthListQuotedQwerty);

5946 TEST(TestSameStrengthListRanges);

5947 TEST(TestSameStrengthListSupplementalRanges);

5948 TEST(TestSpecialCharacters);

5949 TEST(TestPrivateUseCharacters);

5950 TEST(TestPrivateUseCharactersInList);

5951 TEST(TestPrivateUseCharactersInRange);

5952 TEST(TestInvalidListsAndRanges);

5953 TEST(TestImportRulesDeWithPhonebook);

5954 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */

5955 /* TEST(TestImportRulesCJKWithUnihan); */

5956 TEST(TestImport);

5957 TEST(TestImportWithType);

5958

5959 TEST(TestBeforeRuleWithScriptReordering);

5960 TEST(TestNonLeadBytesDuringCollationReordering);

5961 TEST(TestReorderingAPI);

5962 TEST(TestReorderingAPIWithRuleCreatedCollator);

5963 TEST(TestEquivalentReorderingScripts);

5964 TEST(TestGreekFirstReorder);

5965 TEST(TestGreekLastReorder);

5966 TEST(TestNonScriptReorder);

5967 TEST(TestHaniReorder);

5968 TEST(TestHaniReorderWithOtherRules);

5969 TEST(TestMultipleReorder);

5970 TEST(TestReorderingAcrossCloning);

5971 TEST(TestReorderWithNumericCollation);

5972

5973 TEST(TestCaseLevelBufferOverflow);

5974 TEST(TestNextSortKeyPartJaIdentical);

5975 }

5976

5977 #endif /* #if !UCONFIG_NO_COLLATION */

OLD	NEW

« no previous file with comments | « source/test/cintltst/cloctst.c ('k') | source/test/cintltst/cmsgtst.h » ('j') | no next file with comments »