OLD | NEW |
| (Empty) |
1 | |
2 /******************************************************************** | |
3 * COPYRIGHT: | |
4 * Copyright (c) 2001-2015, International Business Machines Corporation and | |
5 * others. All Rights Reserved. | |
6 ********************************************************************/ | |
7 /******************************************************************************* | |
8 * | |
9 * File cmsccoll.C | |
10 * | |
11 *******************************************************************************/ | |
12 /** | |
13 * These are the tests specific to ICU 1.8 and above, that I didn't know where | |
14 * to fit. | |
15 */ | |
16 | |
17 #include <stdio.h> | |
18 | |
19 #include "unicode/utypes.h" | |
20 | |
21 #if !UCONFIG_NO_COLLATION | |
22 | |
23 #include "unicode/ucol.h" | |
24 #include "unicode/ucoleitr.h" | |
25 #include "unicode/uloc.h" | |
26 #include "cintltst.h" | |
27 #include "ccolltst.h" | |
28 #include "callcoll.h" | |
29 #include "unicode/ustring.h" | |
30 #include "string.h" | |
31 #include "ucol_imp.h" | |
32 #include "cmemory.h" | |
33 #include "cstring.h" | |
34 #include "uassert.h" | |
35 #include "unicode/parseerr.h" | |
36 #include "unicode/ucnv.h" | |
37 #include "unicode/ures.h" | |
38 #include "unicode/uscript.h" | |
39 #include "unicode/utf16.h" | |
40 #include "uparse.h" | |
41 #include "putilimp.h" | |
42 | |
43 | |
44 #define LEN(a) (sizeof(a)/sizeof(a[0])) | |
45 | |
46 #define MAX_TOKEN_LEN 16 | |
47 | |
48 typedef UCollationResult tst_strcoll(void *collator, const int object, | |
49 const UChar *source, const int sLen, | |
50 const UChar *target, const int tLen); | |
51 | |
52 | |
53 | |
54 const static char cnt1[][10] = { | |
55 | |
56 "AA", | |
57 "AC", | |
58 "AZ", | |
59 "AQ", | |
60 "AB", | |
61 "ABZ", | |
62 "ABQ", | |
63 "Z", | |
64 "ABC", | |
65 "Q", | |
66 "B" | |
67 }; | |
68 | |
69 const static char cnt2[][10] = { | |
70 "DA", | |
71 "DAD", | |
72 "DAZ", | |
73 "MAR", | |
74 "Z", | |
75 "DAVIS", | |
76 "MARK", | |
77 "DAV", | |
78 "DAVI" | |
79 }; | |
80 | |
81 static void IncompleteCntTest(void) | |
82 { | |
83 UErrorCode status = U_ZERO_ERROR; | |
84 UChar temp[90]; | |
85 UChar t1[90]; | |
86 UChar t2[90]; | |
87 | |
88 UCollator *coll = NULL; | |
89 uint32_t i = 0, j = 0; | |
90 uint32_t size = 0; | |
91 | |
92 u_uastrcpy(temp, " & Z < ABC < Q < B"); | |
93 | |
94 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N
ULL,&status); | |
95 | |
96 if(U_SUCCESS(status)) { | |
97 size = sizeof(cnt1)/sizeof(cnt1[0]); | |
98 for(i = 0; i < size-1; i++) { | |
99 for(j = i+1; j < size; j++) { | |
100 UCollationElements *iter; | |
101 u_uastrcpy(t1, cnt1[i]); | |
102 u_uastrcpy(t2, cnt1[j]); | |
103 doTest(coll, t1, t2, UCOL_LESS); | |
104 /* synwee : added collation element iterator test */ | |
105 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); | |
106 if (U_FAILURE(status)) { | |
107 log_err("Creation of iterator failed\n"); | |
108 break; | |
109 } | |
110 backAndForth(iter); | |
111 ucol_closeElements(iter); | |
112 } | |
113 } | |
114 } | |
115 | |
116 ucol_close(coll); | |
117 | |
118 | |
119 u_uastrcpy(temp, " & Z < DAVIS < MARK <DAV"); | |
120 coll = ucol_openRules(temp, u_strlen(temp), UCOL_OFF, UCOL_DEFAULT_STRENGTH,NU
LL, &status); | |
121 | |
122 if(U_SUCCESS(status)) { | |
123 size = sizeof(cnt2)/sizeof(cnt2[0]); | |
124 for(i = 0; i < size-1; i++) { | |
125 for(j = i+1; j < size; j++) { | |
126 UCollationElements *iter; | |
127 u_uastrcpy(t1, cnt2[i]); | |
128 u_uastrcpy(t2, cnt2[j]); | |
129 doTest(coll, t1, t2, UCOL_LESS); | |
130 | |
131 /* synwee : added collation element iterator test */ | |
132 iter = ucol_openElements(coll, t2, u_strlen(t2), &status); | |
133 if (U_FAILURE(status)) { | |
134 log_err("Creation of iterator failed\n"); | |
135 break; | |
136 } | |
137 backAndForth(iter); | |
138 ucol_closeElements(iter); | |
139 } | |
140 } | |
141 } | |
142 | |
143 ucol_close(coll); | |
144 | |
145 | |
146 } | |
147 | |
148 const static char shifted[][20] = { | |
149 "black bird", | |
150 "black-bird", | |
151 "blackbird", | |
152 "black Bird", | |
153 "black-Bird", | |
154 "blackBird", | |
155 "black birds", | |
156 "black-birds", | |
157 "blackbirds" | |
158 }; | |
159 | |
160 const static UCollationResult shiftedTert[] = { | |
161 UCOL_EQUAL, | |
162 UCOL_EQUAL, | |
163 UCOL_EQUAL, | |
164 UCOL_LESS, | |
165 UCOL_EQUAL, | |
166 UCOL_EQUAL, | |
167 UCOL_LESS, | |
168 UCOL_EQUAL, | |
169 UCOL_EQUAL | |
170 }; | |
171 | |
172 const static char nonignorable[][20] = { | |
173 "black bird", | |
174 "black Bird", | |
175 "black birds", | |
176 "black-bird", | |
177 "black-Bird", | |
178 "black-birds", | |
179 "blackbird", | |
180 "blackBird", | |
181 "blackbirds" | |
182 }; | |
183 | |
184 static void BlackBirdTest(void) { | |
185 UErrorCode status = U_ZERO_ERROR; | |
186 UChar t1[90]; | |
187 UChar t2[90]; | |
188 | |
189 uint32_t i = 0, j = 0; | |
190 uint32_t size = 0; | |
191 UCollator *coll = ucol_open("en_US", &status); | |
192 | |
193 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); | |
194 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); | |
195 | |
196 if(U_SUCCESS(status)) { | |
197 size = sizeof(nonignorable)/sizeof(nonignorable[0]); | |
198 for(i = 0; i < size-1; i++) { | |
199 for(j = i+1; j < size; j++) { | |
200 u_uastrcpy(t1, nonignorable[i]); | |
201 u_uastrcpy(t2, nonignorable[j]); | |
202 doTest(coll, t1, t2, UCOL_LESS); | |
203 } | |
204 } | |
205 } | |
206 | |
207 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
208 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_QUATERNARY, &status); | |
209 | |
210 if(U_SUCCESS(status)) { | |
211 size = sizeof(shifted)/sizeof(shifted[0]); | |
212 for(i = 0; i < size-1; i++) { | |
213 for(j = i+1; j < size; j++) { | |
214 u_uastrcpy(t1, shifted[i]); | |
215 u_uastrcpy(t2, shifted[j]); | |
216 doTest(coll, t1, t2, UCOL_LESS); | |
217 } | |
218 } | |
219 } | |
220 | |
221 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_TERTIARY, &status); | |
222 if(U_SUCCESS(status)) { | |
223 size = sizeof(shifted)/sizeof(shifted[0]); | |
224 for(i = 1; i < size; i++) { | |
225 u_uastrcpy(t1, shifted[i-1]); | |
226 u_uastrcpy(t2, shifted[i]); | |
227 doTest(coll, t1, t2, shiftedTert[i]); | |
228 } | |
229 } | |
230 | |
231 ucol_close(coll); | |
232 } | |
233 | |
234 const static UChar testSourceCases[][MAX_TOKEN_LEN] = { | |
235 {0x0041/*'A'*/, 0x0300, 0x0301, 0x0000}, | |
236 {0x0041/*'A'*/, 0x0300, 0x0316, 0x0000}, | |
237 {0x0041/*'A'*/, 0x0300, 0x0000}, | |
238 {0x00C0, 0x0301, 0x0000}, | |
239 /* this would work with forced normalization */ | |
240 {0x00C0, 0x0316, 0x0000} | |
241 }; | |
242 | |
243 const static UChar testTargetCases[][MAX_TOKEN_LEN] = { | |
244 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, | |
245 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000}, | |
246 {0x00C0, 0}, | |
247 {0x0041/*'A'*/, 0x0301, 0x0300, 0x0000}, | |
248 /* this would work with forced normalization */ | |
249 {0x0041/*'A'*/, 0x0316, 0x0300, 0x0000} | |
250 }; | |
251 | |
252 const static UCollationResult results[] = { | |
253 UCOL_GREATER, | |
254 UCOL_EQUAL, | |
255 UCOL_EQUAL, | |
256 UCOL_GREATER, | |
257 UCOL_EQUAL | |
258 }; | |
259 | |
260 static void FunkyATest(void) | |
261 { | |
262 | |
263 int32_t i; | |
264 UErrorCode status = U_ZERO_ERROR; | |
265 UCollator *myCollation; | |
266 myCollation = ucol_open("en_US", &status); | |
267 if(U_FAILURE(status)){ | |
268 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
269 return; | |
270 } | |
271 log_verbose("Testing some A letters, for some reason\n"); | |
272 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
273 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
274 for (i = 0; i < 4 ; i++) | |
275 { | |
276 doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); | |
277 } | |
278 ucol_close(myCollation); | |
279 } | |
280 | |
281 UColAttributeValue caseFirst[] = { | |
282 UCOL_OFF, | |
283 UCOL_LOWER_FIRST, | |
284 UCOL_UPPER_FIRST | |
285 }; | |
286 | |
287 | |
288 UColAttributeValue alternateHandling[] = { | |
289 UCOL_NON_IGNORABLE, | |
290 UCOL_SHIFTED | |
291 }; | |
292 | |
293 UColAttributeValue caseLevel[] = { | |
294 UCOL_OFF, | |
295 UCOL_ON | |
296 }; | |
297 | |
298 UColAttributeValue strengths[] = { | |
299 UCOL_PRIMARY, | |
300 UCOL_SECONDARY, | |
301 UCOL_TERTIARY, | |
302 UCOL_QUATERNARY, | |
303 UCOL_IDENTICAL | |
304 }; | |
305 | |
306 #if 0 | |
307 static const char * strengthsC[] = { | |
308 "UCOL_PRIMARY", | |
309 "UCOL_SECONDARY", | |
310 "UCOL_TERTIARY", | |
311 "UCOL_QUATERNARY", | |
312 "UCOL_IDENTICAL" | |
313 }; | |
314 | |
315 static const char * caseFirstC[] = { | |
316 "UCOL_OFF", | |
317 "UCOL_LOWER_FIRST", | |
318 "UCOL_UPPER_FIRST" | |
319 }; | |
320 | |
321 | |
322 static const char * alternateHandlingC[] = { | |
323 "UCOL_NON_IGNORABLE", | |
324 "UCOL_SHIFTED" | |
325 }; | |
326 | |
327 static const char * caseLevelC[] = { | |
328 "UCOL_OFF", | |
329 "UCOL_ON" | |
330 }; | |
331 | |
332 /* not used currently - does not test only prints */ | |
333 static void PrintMarkDavis(void) | |
334 { | |
335 UErrorCode status = U_ZERO_ERROR; | |
336 UChar m[256]; | |
337 uint8_t sortkey[256]; | |
338 UCollator *coll = ucol_open("en_US", &status); | |
339 uint32_t h,i,j,k, sortkeysize; | |
340 uint32_t sizem = 0; | |
341 char buffer[512]; | |
342 uint32_t len = 512; | |
343 | |
344 log_verbose("PrintMarkDavis"); | |
345 | |
346 u_uastrcpy(m, "Mark Davis"); | |
347 sizem = u_strlen(m); | |
348 | |
349 | |
350 m[1] = 0xe4; | |
351 | |
352 for(i = 0; i<sizem; i++) { | |
353 fprintf(stderr, "\\u%04X ", m[i]); | |
354 } | |
355 fprintf(stderr, "\n"); | |
356 | |
357 for(h = 0; h<sizeof(caseFirst)/sizeof(caseFirst[0]); h++) { | |
358 ucol_setAttribute(coll, UCOL_CASE_FIRST, caseFirst[i], &status); | |
359 fprintf(stderr, "caseFirst: %s\n", caseFirstC[h]); | |
360 | |
361 for(i = 0; i<sizeof(alternateHandling)/sizeof(alternateHandling[0]); i++) { | |
362 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, alternateHandling[i], &st
atus); | |
363 fprintf(stderr, " AltHandling: %s\n", alternateHandlingC[i]); | |
364 | |
365 for(j = 0; j<sizeof(caseLevel)/sizeof(caseLevel[0]); j++) { | |
366 ucol_setAttribute(coll, UCOL_CASE_LEVEL, caseLevel[j], &status); | |
367 fprintf(stderr, " caseLevel: %s\n", caseLevelC[j]); | |
368 | |
369 for(k = 0; k<sizeof(strengths)/sizeof(strengths[0]); k++) { | |
370 ucol_setAttribute(coll, UCOL_STRENGTH, strengths[k], &status); | |
371 sortkeysize = ucol_getSortKey(coll, m, sizem, sortkey, 256); | |
372 fprintf(stderr, " strength: %s\n Sortkey: ", strengthsC[k]); | |
373 fprintf(stderr, "%s\n", ucol_sortKeyToString(coll, sortkey, buffer, &l
en)); | |
374 } | |
375 | |
376 } | |
377 | |
378 } | |
379 | |
380 } | |
381 } | |
382 #endif | |
383 | |
384 static void BillFairmanTest(void) { | |
385 /* | |
386 ** check for actual locale via ICU resource bundles | |
387 ** | |
388 ** lp points to the original locale ("fr_FR_....") | |
389 */ | |
390 | |
391 UResourceBundle *lr,*cr; | |
392 UErrorCode lec = U_ZERO_ERROR; | |
393 const char *lp = "fr_FR_you_ll_never_find_this_locale"; | |
394 | |
395 log_verbose("BillFairmanTest\n"); | |
396 | |
397 lr = ures_open(NULL,lp,&lec); | |
398 if (lr) { | |
399 cr = ures_getByKey(lr,"collations",0,&lec); | |
400 if (cr) { | |
401 lp = ures_getLocaleByType(cr, ULOC_ACTUAL_LOCALE, &lec); | |
402 if (lp) { | |
403 if (U_SUCCESS(lec)) { | |
404 if(strcmp(lp, "fr") != 0) { | |
405 log_err("Wrong locale for French Collation Data, expecte
d \"fr\" got %s", lp); | |
406 } | |
407 } | |
408 } | |
409 ures_close(cr); | |
410 } | |
411 ures_close(lr); | |
412 } | |
413 } | |
414 | |
415 const static char chTest[][20] = { | |
416 "c", | |
417 "C", | |
418 "ca", "cb", "cx", "cy", "CZ", | |
419 "c\\u030C", "C\\u030C", | |
420 "h", | |
421 "H", | |
422 "ha", "Ha", "harly", "hb", "HB", "hx", "HX", "hy", "HY", | |
423 "ch", "cH", "Ch", "CH", | |
424 "cha", "charly", "che", "chh", "chch", "chr", | |
425 "i", "I", "iarly", | |
426 "r", "R", | |
427 "r\\u030C", "R\\u030C", | |
428 "s", | |
429 "S", | |
430 "s\\u030C", "S\\u030C", | |
431 "z", "Z", | |
432 "z\\u030C", "Z\\u030C" | |
433 }; | |
434 | |
435 static void TestChMove(void) { | |
436 UChar t1[256] = {0}; | |
437 UChar t2[256] = {0}; | |
438 | |
439 uint32_t i = 0, j = 0; | |
440 uint32_t size = 0; | |
441 UErrorCode status = U_ZERO_ERROR; | |
442 | |
443 UCollator *coll = ucol_open("cs", &status); | |
444 | |
445 if(U_SUCCESS(status)) { | |
446 size = sizeof(chTest)/sizeof(chTest[0]); | |
447 for(i = 0; i < size-1; i++) { | |
448 for(j = i+1; j < size; j++) { | |
449 u_unescape(chTest[i], t1, 256); | |
450 u_unescape(chTest[j], t2, 256); | |
451 doTest(coll, t1, t2, UCOL_LESS); | |
452 } | |
453 } | |
454 } | |
455 else { | |
456 log_data_err("Can't open collator"); | |
457 } | |
458 ucol_close(coll); | |
459 } | |
460 | |
461 | |
462 | |
463 | |
464 /* | |
465 const static char impTest[][20] = { | |
466 "\\u4e00", | |
467 "a", | |
468 "A", | |
469 "b", | |
470 "B", | |
471 "\\u4e01" | |
472 }; | |
473 */ | |
474 | |
475 | |
476 static void TestImplicitTailoring(void) { | |
477 static const struct { | |
478 const char *rules; | |
479 const char *data[10]; | |
480 const uint32_t len; | |
481 } tests[] = { | |
482 { | |
483 /* Tailor b and c before U+4E00. */ | |
484 "&[before 1]\\u4e00 < b < c " | |
485 /* Now, before U+4E00 is c; put d and e after that. */ | |
486 "&[before 1]\\u4e00 < d < e", | |
487 { "b", "c", "d", "e", "\\u4e00"}, 5 }, | |
488 { "&\\u4e00 < a <<< A < b <<< B", { "\\u4e00", "a", "A", "b", "B", "\\u4
e01"}, 6 }, | |
489 { "&[before 1]\\u4e00 < \\u4e01 < \\u4e02", { "\\u4e01", "\\u4e02", "\\u4e
00"}, 3}, | |
490 { "&[before 1]\\u4e01 < \\u4e02 < \\u4e03", { "\\u4e02", "\\u4e03", "\\u4e
01"}, 3} | |
491 }; | |
492 | |
493 int32_t i = 0; | |
494 | |
495 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { | |
496 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | |
497 } | |
498 | |
499 /* | |
500 UChar t1[256] = {0}; | |
501 UChar t2[256] = {0}; | |
502 | |
503 const char *rule = "&\\u4e00 < a <<< A < b <<< B"; | |
504 | |
505 uint32_t i = 0, j = 0; | |
506 uint32_t size = 0; | |
507 uint32_t ruleLen = 0; | |
508 UErrorCode status = U_ZERO_ERROR; | |
509 UCollator *coll = NULL; | |
510 ruleLen = u_unescape(rule, t1, 256); | |
511 | |
512 coll = ucol_openRules(t1, ruleLen, UCOL_OFF, UCOL_TERTIARY,NULL, &status); | |
513 | |
514 if(U_SUCCESS(status)) { | |
515 size = sizeof(impTest)/sizeof(impTest[0]); | |
516 for(i = 0; i < size-1; i++) { | |
517 for(j = i+1; j < size; j++) { | |
518 u_unescape(impTest[i], t1, 256); | |
519 u_unescape(impTest[j], t2, 256); | |
520 doTest(coll, t1, t2, UCOL_LESS); | |
521 } | |
522 } | |
523 } | |
524 else { | |
525 log_err("Can't open collator"); | |
526 } | |
527 ucol_close(coll); | |
528 */ | |
529 } | |
530 | |
531 static void TestFCDProblem(void) { | |
532 UChar t1[256] = {0}; | |
533 UChar t2[256] = {0}; | |
534 | |
535 const char *s1 = "\\u0430\\u0306\\u0325"; | |
536 const char *s2 = "\\u04D1\\u0325"; | |
537 | |
538 UErrorCode status = U_ZERO_ERROR; | |
539 UCollator *coll = ucol_open("", &status); | |
540 u_unescape(s1, t1, 256); | |
541 u_unescape(s2, t2, 256); | |
542 | |
543 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); | |
544 doTest(coll, t1, t2, UCOL_EQUAL); | |
545 | |
546 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
547 doTest(coll, t1, t2, UCOL_EQUAL); | |
548 | |
549 ucol_close(coll); | |
550 } | |
551 | |
552 /* | |
553 The largest normalization form is 18 for NFKC/NFKD, 4 for NFD and 3 for NFC | |
554 We're only using NFC/NFD in this test. | |
555 */ | |
556 #define NORM_BUFFER_TEST_LEN 18 | |
557 typedef struct { | |
558 UChar32 u; | |
559 UChar NFC[NORM_BUFFER_TEST_LEN]; | |
560 UChar NFD[NORM_BUFFER_TEST_LEN]; | |
561 } tester; | |
562 | |
563 static void TestComposeDecompose(void) { | |
564 /* [[:NFD_Inert=false:][:NFC_Inert=false:]] */ | |
565 static const UChar UNICODESET_STR[] = { | |
566 0x5B,0x5B,0x3A,0x4E,0x46,0x44,0x5F,0x49,0x6E,0x65,0x72,0x74,0x3D,0x66,0x
61, | |
567 0x6C,0x73,0x65,0x3A,0x5D,0x5B,0x3A,0x4E,0x46,0x43,0x5F,0x49,0x6E,0x65,0x
72, | |
568 0x74,0x3D,0x66,0x61,0x6C,0x73,0x65,0x3A,0x5D,0x5D,0 | |
569 }; | |
570 int32_t noOfLoc; | |
571 int32_t i = 0, j = 0; | |
572 | |
573 UErrorCode status = U_ZERO_ERROR; | |
574 const char *locName = NULL; | |
575 uint32_t nfcSize; | |
576 uint32_t nfdSize; | |
577 tester **t; | |
578 uint32_t noCases = 0; | |
579 UCollator *coll = NULL; | |
580 UChar32 u = 0; | |
581 UChar comp[NORM_BUFFER_TEST_LEN]; | |
582 uint32_t len = 0; | |
583 UCollationElements *iter; | |
584 USet *charsToTest = uset_openPattern(UNICODESET_STR, -1, &status); | |
585 int32_t charsToTestSize; | |
586 | |
587 noOfLoc = uloc_countAvailable(); | |
588 | |
589 coll = ucol_open("", &status); | |
590 if (U_FAILURE(status)) { | |
591 log_data_err("Error opening collator -> %s (Are you missing data?)\n", u
_errorName(status)); | |
592 return; | |
593 } | |
594 charsToTestSize = uset_size(charsToTest); | |
595 if (charsToTestSize <= 0) { | |
596 log_err("Set was zero. Missing data?\n"); | |
597 return; | |
598 } | |
599 t = (tester **)malloc(charsToTestSize * sizeof(tester *)); | |
600 t[0] = (tester *)malloc(sizeof(tester)); | |
601 log_verbose("Testing UCA extensively for %d characters\n", charsToTestSize); | |
602 | |
603 for(u = 0; u < charsToTestSize; u++) { | |
604 UChar32 ch = uset_charAt(charsToTest, u); | |
605 len = 0; | |
606 U16_APPEND_UNSAFE(comp, len, ch); | |
607 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM
_BUFFER_TEST_LEN, &status); | |
608 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM
_BUFFER_TEST_LEN, &status); | |
609 | |
610 if(nfcSize != nfdSize || (uprv_memcmp(t[noCases]->NFC, t[noCases]->NFD,
nfcSize * sizeof(UChar)) != 0) | |
611 || (len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * si
zeof(UChar)) != 0))) { | |
612 t[noCases]->u = ch; | |
613 if(len != nfdSize || (uprv_memcmp(comp, t[noCases]->NFD, nfdSize * s
izeof(UChar)) != 0)) { | |
614 u_strncpy(t[noCases]->NFC, comp, len); | |
615 t[noCases]->NFC[len] = 0; | |
616 } | |
617 noCases++; | |
618 t[noCases] = (tester *)malloc(sizeof(tester)); | |
619 uprv_memset(t[noCases], 0, sizeof(tester)); | |
620 } | |
621 } | |
622 log_verbose("Testing %d/%d of possible test cases\n", noCases, charsToTestSi
ze); | |
623 uset_close(charsToTest); | |
624 charsToTest = NULL; | |
625 | |
626 for(u=0; u<(UChar32)noCases; u++) { | |
627 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { | |
628 log_err("Failure: codePoint %05X fails TestComposeDecompose in the U
CA\n", t[u]->u); | |
629 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); | |
630 } | |
631 } | |
632 /* | |
633 for(u = 0; u < charsToTestSize; u++) { | |
634 if(!(u&0xFFFF)) { | |
635 log_verbose("%08X ", u); | |
636 } | |
637 uprv_memset(t[noCases], 0, sizeof(tester)); | |
638 t[noCases]->u = u; | |
639 len = 0; | |
640 U16_APPEND_UNSAFE(comp, len, u); | |
641 comp[len] = 0; | |
642 nfcSize = unorm_normalize(comp, len, UNORM_NFC, 0, t[noCases]->NFC, NORM_B
UFFER_TEST_LEN, &status); | |
643 nfdSize = unorm_normalize(comp, len, UNORM_NFD, 0, t[noCases]->NFD, NORM_B
UFFER_TEST_LEN, &status); | |
644 doTest(coll, comp, t[noCases]->NFD, UCOL_EQUAL); | |
645 doTest(coll, comp, t[noCases]->NFC, UCOL_EQUAL); | |
646 } | |
647 */ | |
648 | |
649 ucol_close(coll); | |
650 | |
651 log_verbose("Testing locales, number of cases = %i\n", noCases); | |
652 for(i = 0; i<noOfLoc; i++) { | |
653 status = U_ZERO_ERROR; | |
654 locName = uloc_getAvailable(i); | |
655 if(hasCollationElements(locName)) { | |
656 char cName[256]; | |
657 UChar name[256]; | |
658 int32_t nameSize = uloc_getDisplayName(locName, NULL, name, sizeof(c
Name), &status); | |
659 | |
660 for(j = 0; j<nameSize; j++) { | |
661 cName[j] = (char)name[j]; | |
662 } | |
663 cName[nameSize] = 0; | |
664 log_verbose("\nTesting locale %s (%s)\n", locName, cName); | |
665 | |
666 coll = ucol_open(locName, &status); | |
667 ucol_setStrength(coll, UCOL_IDENTICAL); | |
668 iter = ucol_openElements(coll, t[u]->NFD, u_strlen(t[u]->NFD), &stat
us); | |
669 | |
670 for(u=0; u<(UChar32)noCases; u++) { | |
671 if(!ucol_equal(coll, t[u]->NFC, -1, t[u]->NFD, -1)) { | |
672 log_err("Failure: codePoint %05X fails TestComposeDecompose
for locale %s\n", t[u]->u, cName); | |
673 doTest(coll, t[u]->NFC, t[u]->NFD, UCOL_EQUAL); | |
674 log_verbose("Testing NFC\n"); | |
675 ucol_setText(iter, t[u]->NFC, u_strlen(t[u]->NFC), &status); | |
676 backAndForth(iter); | |
677 log_verbose("Testing NFD\n"); | |
678 ucol_setText(iter, t[u]->NFD, u_strlen(t[u]->NFD), &status); | |
679 backAndForth(iter); | |
680 } | |
681 } | |
682 ucol_closeElements(iter); | |
683 ucol_close(coll); | |
684 } | |
685 } | |
686 for(u = 0; u <= (UChar32)noCases; u++) { | |
687 free(t[u]); | |
688 } | |
689 free(t); | |
690 } | |
691 | |
692 static void TestEmptyRule(void) { | |
693 UErrorCode status = U_ZERO_ERROR; | |
694 UChar rulez[] = { 0 }; | |
695 UCollator *coll = ucol_openRules(rulez, 0, UCOL_OFF, UCOL_TERTIARY,NULL, &stat
us); | |
696 | |
697 ucol_close(coll); | |
698 } | |
699 | |
700 static void TestUCARules(void) { | |
701 UErrorCode status = U_ZERO_ERROR; | |
702 UChar b[256]; | |
703 UChar *rules = b; | |
704 uint32_t ruleLen = 0; | |
705 UCollator *UCAfromRules = NULL; | |
706 UCollator *coll = ucol_open("", &status); | |
707 if(status == U_FILE_ACCESS_ERROR) { | |
708 log_data_err("Is your data around?\n"); | |
709 return; | |
710 } else if(U_FAILURE(status)) { | |
711 log_err("Error opening collator\n"); | |
712 return; | |
713 } | |
714 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, 256); | |
715 | |
716 log_verbose("TestUCARules\n"); | |
717 if(ruleLen > 256) { | |
718 rules = (UChar *)malloc((ruleLen+1)*sizeof(UChar)); | |
719 ruleLen = ucol_getRulesEx(coll, UCOL_FULL_RULES, rules, ruleLen); | |
720 } | |
721 log_verbose("Rules length is %d\n", ruleLen); | |
722 UCAfromRules = ucol_openRules(rules, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&s
tatus); | |
723 if(U_SUCCESS(status)) { | |
724 ucol_close(UCAfromRules); | |
725 } else { | |
726 log_verbose("Unable to create a collator from UCARules!\n"); | |
727 } | |
728 /* | |
729 u_unescape(blah, b, 256); | |
730 ucol_getSortKey(coll, b, 1, res, 256); | |
731 */ | |
732 ucol_close(coll); | |
733 if(rules != b) { | |
734 free(rules); | |
735 } | |
736 } | |
737 | |
738 | |
739 /* Pinyin tonal order */ | |
740 /* | |
741 A < .. (\u0101) < .. (\u00e1) < .. (\u01ce) < .. (\u00e0) | |
742 (w/macron)< (w/acute)< (w/caron)< (w/grave) | |
743 E < .. (\u0113) < .. (\u00e9) < .. (\u011b) < .. (\u00e8) | |
744 I < .. (\u012b) < .. (\u00ed) < .. (\u01d0) < .. (\u00ec) | |
745 O < .. (\u014d) < .. (\u00f3) < .. (\u01d2) < .. (\u00f2) | |
746 U < .. (\u016b) < .. (\u00fa) < .. (\u01d4) < .. (\u00f9) | |
747 < .. (\u01d6) < .. (\u01d8) < .. (\u01da) < .. (\u01dc) < | |
748 .. (\u00fc) | |
749 | |
750 However, in testing we got the following order: | |
751 A < .. (\u00e1) < .. (\u00e0) < .. (\u01ce) < .. (\u0101) | |
752 (w/acute)< (w/grave)< (w/caron)< (w/macron) | |
753 E < .. (\u00e9) < .. (\u00e8) < .. (\u00ea) < .. (\u011b) < | |
754 .. (\u0113) | |
755 I < .. (\u00ed) < .. (\u00ec) < .. (\u01d0) < .. (\u012b) | |
756 O < .. (\u00f3) < .. (\u00f2) < .. (\u01d2) < .. (\u014d) | |
757 U < .. (\u00fa) < .. (\u00f9) < .. (\u01d4) < .. (\u00fc) < | |
758 .. (\u01d8) | |
759 < .. (\u01dc) < .. (\u01da) < .. (\u01d6) < .. (\u016b) | |
760 */ | |
761 | |
762 static void TestBefore(void) { | |
763 const static char *data[] = { | |
764 "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", "A", | |
765 "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", "E", | |
766 "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", "I", | |
767 "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", "O", | |
768 "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", "U", | |
769 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc", "\\u00fc" | |
770 }; | |
771 genericRulesStarter( | |
772 "&[before 1]a<\\u0101<\\u00e1<\\u01ce<\\u00e0" | |
773 "&[before 1]e<\\u0113<\\u00e9<\\u011b<\\u00e8" | |
774 "&[before 1]i<\\u012b<\\u00ed<\\u01d0<\\u00ec" | |
775 "&[before 1]o<\\u014d<\\u00f3<\\u01d2<\\u00f2" | |
776 "&[before 1]u<\\u016b<\\u00fa<\\u01d4<\\u00f9" | |
777 "&u<\\u01d6<\\u01d8<\\u01da<\\u01dc<\\u00fc", | |
778 data, sizeof(data)/sizeof(data[0])); | |
779 } | |
780 | |
781 #if 0 | |
782 /* superceded by TestBeforePinyin */ | |
783 static void TestJ784(void) { | |
784 const static char *data[] = { | |
785 "A", "\\u0101", "\\u00e1", "\\u01ce", "\\u00e0", | |
786 "E", "\\u0113", "\\u00e9", "\\u011b", "\\u00e8", | |
787 "I", "\\u012b", "\\u00ed", "\\u01d0", "\\u00ec", | |
788 "O", "\\u014d", "\\u00f3", "\\u01d2", "\\u00f2", | |
789 "U", "\\u016b", "\\u00fa", "\\u01d4", "\\u00f9", | |
790 "\\u00fc", | |
791 "\\u01d6", "\\u01d8", "\\u01da", "\\u01dc" | |
792 }; | |
793 genericLocaleStarter("zh", data, sizeof(data)/sizeof(data[0])); | |
794 } | |
795 #endif | |
796 | |
797 #if 0 | |
798 /* superceded by the changes to the lv locale */ | |
799 static void TestJ831(void) { | |
800 const static char *data[] = { | |
801 "I", | |
802 "i", | |
803 "Y", | |
804 "y" | |
805 }; | |
806 genericLocaleStarter("lv", data, sizeof(data)/sizeof(data[0])); | |
807 } | |
808 #endif | |
809 | |
810 static void TestJ815(void) { | |
811 const static char *data[] = { | |
812 "aa", | |
813 "Aa", | |
814 "ab", | |
815 "Ab", | |
816 "ad", | |
817 "Ad", | |
818 "ae", | |
819 "Ae", | |
820 "\\u00e6", | |
821 "\\u00c6", | |
822 "af", | |
823 "Af", | |
824 "b", | |
825 "B" | |
826 }; | |
827 genericLocaleStarter("fr", data, sizeof(data)/sizeof(data[0])); | |
828 genericRulesStarter("[backwards 2]&A<<\\u00e6/e<<<\\u00c6/E", data, sizeof(dat
a)/sizeof(data[0])); | |
829 } | |
830 | |
831 | |
832 static void TestCase(void) | |
833 { | |
834 const static UChar gRules[MAX_TOKEN_LEN] = | |
835 /*" & 0 < 1,\u2461<a,A"*/ | |
836 { 0x0026, 0x0030, 0x003C, 0x0031, 0x002C, 0x2460, 0x003C, 0x0061, 0x002C, 0x
0041, 0x0000 }; | |
837 | |
838 const static UChar testCase[][MAX_TOKEN_LEN] = | |
839 { | |
840 /*0*/ {0x0031 /*'1'*/, 0x0061/*'a'*/, 0x0000}, | |
841 /*1*/ {0x0031 /*'1'*/, 0x0041/*'A'*/, 0x0000}, | |
842 /*2*/ {0x2460 /*circ'1'*/, 0x0061/*'a'*/, 0x0000}, | |
843 /*3*/ {0x2460 /*circ'1'*/, 0x0041/*'A'*/, 0x0000} | |
844 }; | |
845 | |
846 const static UCollationResult caseTestResults[][9] = | |
847 { | |
848 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_
LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, | |
849 { UCOL_GREATER, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_LESS, UCOL_
LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER }, | |
850 { UCOL_LESS, UCOL_LESS, UCOL_LESS, UCOL_EQUAL, UCOL_GREATER, UCOL_
LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_LESS }, | |
851 { UCOL_GREATER, UCOL_LESS, UCOL_GREATER, UCOL_EQUAL, UCOL_LESS, UCOL_
LESS, UCOL_EQUAL, UCOL_EQUAL, UCOL_GREATER } | |
852 }; | |
853 | |
854 const static UColAttributeValue caseTestAttributes[][2] = | |
855 { | |
856 { UCOL_LOWER_FIRST, UCOL_OFF}, | |
857 { UCOL_UPPER_FIRST, UCOL_OFF}, | |
858 { UCOL_LOWER_FIRST, UCOL_ON}, | |
859 { UCOL_UPPER_FIRST, UCOL_ON} | |
860 }; | |
861 int32_t i,j,k; | |
862 UErrorCode status = U_ZERO_ERROR; | |
863 UCollationElements *iter; | |
864 UCollator *myCollation; | |
865 myCollation = ucol_open("en_US", &status); | |
866 | |
867 if(U_FAILURE(status)){ | |
868 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
869 return; | |
870 } | |
871 log_verbose("Testing different case settings\n"); | |
872 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
873 | |
874 for(k = 0; k<4; k++) { | |
875 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0],
&status); | |
876 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1],
&status); | |
877 log_verbose("Case first = %d, Case level = %d\n", caseTestAttributes[k][0]
, caseTestAttributes[k][1]); | |
878 for (i = 0; i < 3 ; i++) { | |
879 for(j = i+1; j<4; j++) { | |
880 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j
-1]); | |
881 } | |
882 } | |
883 } | |
884 ucol_close(myCollation); | |
885 | |
886 myCollation = ucol_openRules(gRules, u_strlen(gRules), UCOL_OFF, UCOL_TERTIA
RY,NULL, &status); | |
887 if(U_FAILURE(status)){ | |
888 log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(s
tatus)); | |
889 return; | |
890 } | |
891 log_verbose("Testing different case settings with custom rules\n"); | |
892 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
893 | |
894 for(k = 0; k<4; k++) { | |
895 ucol_setAttribute(myCollation, UCOL_CASE_FIRST, caseTestAttributes[k][0],
&status); | |
896 ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, caseTestAttributes[k][1],
&status); | |
897 for (i = 0; i < 3 ; i++) { | |
898 for(j = i+1; j<4; j++) { | |
899 log_verbose("k:%d, i:%d, j:%d\n", k, i, j); | |
900 doTest(myCollation, testCase[i], testCase[j], caseTestResults[k][3*i+j
-1]); | |
901 iter=ucol_openElements(myCollation, testCase[i], u_strlen(testCase[i])
, &status); | |
902 backAndForth(iter); | |
903 ucol_closeElements(iter); | |
904 iter=ucol_openElements(myCollation, testCase[j], u_strlen(testCase[j])
, &status); | |
905 backAndForth(iter); | |
906 ucol_closeElements(iter); | |
907 } | |
908 } | |
909 } | |
910 ucol_close(myCollation); | |
911 { | |
912 const static char *lowerFirst[] = { | |
913 "h", | |
914 "H", | |
915 "ch", | |
916 "Ch", | |
917 "CH", | |
918 "cha", | |
919 "chA", | |
920 "Cha", | |
921 "ChA", | |
922 "CHa", | |
923 "CHA", | |
924 "i", | |
925 "I" | |
926 }; | |
927 | |
928 const static char *upperFirst[] = { | |
929 "H", | |
930 "h", | |
931 "CH", | |
932 "Ch", | |
933 "ch", | |
934 "CHA", | |
935 "CHa", | |
936 "ChA", | |
937 "Cha", | |
938 "chA", | |
939 "cha", | |
940 "I", | |
941 "i" | |
942 }; | |
943 log_verbose("mixed case test\n"); | |
944 log_verbose("lower first, case level off\n"); | |
945 genericRulesStarter("[caseFirst lower]&H<ch<<<Ch<<<CH", lowerFirst, sizeof
(lowerFirst)/sizeof(lowerFirst[0])); | |
946 log_verbose("upper first, case level off\n"); | |
947 genericRulesStarter("[caseFirst upper]&H<ch<<<Ch<<<CH", upperFirst, sizeof
(upperFirst)/sizeof(upperFirst[0])); | |
948 log_verbose("lower first, case level on\n"); | |
949 genericRulesStarter("[caseFirst lower][caseLevel on]&H<ch<<<Ch<<<CH", lowe
rFirst, sizeof(lowerFirst)/sizeof(lowerFirst[0])); | |
950 log_verbose("upper first, case level on\n"); | |
951 genericRulesStarter("[caseFirst upper][caseLevel on]&H<ch<<<Ch<<<CH", uppe
rFirst, sizeof(upperFirst)/sizeof(upperFirst[0])); | |
952 } | |
953 | |
954 } | |
955 | |
956 static void TestIncrementalNormalize(void) { | |
957 | |
958 /*UChar baseA =0x61;*/ | |
959 UChar baseA =0x41; | |
960 /* UChar baseB = 0x42;*/ | |
961 static const UChar ccMix[] = {0x316, 0x321, 0x300}; | |
962 /*UChar ccMix[] = {0x61, 0x61, 0x61};*/ | |
963 /* | |
964 0x316 is combining grave accent below, cc=220 | |
965 0x321 is combining palatalized hook below, cc=202 | |
966 0x300 is combining grave accent, cc=230 | |
967 */ | |
968 | |
969 #define MAXSLEN 2000 | |
970 /*int maxSLen = 64000;*/ | |
971 int sLen; | |
972 int i; | |
973 | |
974 UCollator *coll; | |
975 UErrorCode status = U_ZERO_ERROR; | |
976 UCollationResult result; | |
977 | |
978 int32_t myQ = getTestOption(QUICK_OPTION); | |
979 | |
980 if(getTestOption(QUICK_OPTION) < 0) { | |
981 setTestOption(QUICK_OPTION, 1); | |
982 } | |
983 | |
984 { | |
985 /* Test 1. Run very long unnormalized strings, to force overflow of*/ | |
986 /* most buffers along the way.*/ | |
987 UChar strA[MAXSLEN+1]; | |
988 UChar strB[MAXSLEN+1]; | |
989 | |
990 coll = ucol_open("en_US", &status); | |
991 if(status == U_FILE_ACCESS_ERROR) { | |
992 log_data_err("Is your data around?\n"); | |
993 return; | |
994 } else if(U_FAILURE(status)) { | |
995 log_err("Error opening collator\n"); | |
996 return; | |
997 } | |
998 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
999 | |
1000 /*for (sLen = 257; sLen<MAXSLEN; sLen++) {*/ | |
1001 /*for (sLen = 4; sLen<MAXSLEN; sLen++) {*/ | |
1002 /*for (sLen = 1000; sLen<1001; sLen++) {*/ | |
1003 for (sLen = 500; sLen<501; sLen++) { | |
1004 /*for (sLen = 40000; sLen<65000; sLen+=1000) {*/ | |
1005 strA[0] = baseA; | |
1006 strB[0] = baseA; | |
1007 for (i=1; i<=sLen-1; i++) { | |
1008 strA[i] = ccMix[i % 3]; | |
1009 strB[sLen-i] = ccMix[i % 3]; | |
1010 } | |
1011 strA[sLen] = 0; | |
1012 strB[sLen] = 0; | |
1013 | |
1014 ucol_setStrength(coll, UCOL_TERTIARY); /* Do test with default str
ength, which runs*/ | |
1015 doTest(coll, strA, strB, UCOL_EQUAL); /* optimized functions in
the impl*/ | |
1016 ucol_setStrength(coll, UCOL_IDENTICAL); /* Do again with the slow,
general impl.*/ | |
1017 doTest(coll, strA, strB, UCOL_EQUAL); | |
1018 } | |
1019 } | |
1020 | |
1021 setTestOption(QUICK_OPTION, myQ); | |
1022 | |
1023 | |
1024 /* Test 2: Non-normal sequence in a string that extends to the last charac
ter*/ | |
1025 /* of the string. Checks a couple of edge cases.*/ | |
1026 | |
1027 { | |
1028 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0}; | |
1029 static const UChar strB[] = {0x41, 0xc0, 0x316, 0}; | |
1030 ucol_setStrength(coll, UCOL_TERTIARY); | |
1031 doTest(coll, strA, strB, UCOL_EQUAL); | |
1032 } | |
1033 | |
1034 /* Test 3: Non-normal sequence is terminated by a surrogate pair.*/ | |
1035 | |
1036 { | |
1037 /* New UCA 3.1.1. | |
1038 * test below used a code point from Desseret, which sorts differently | |
1039 * than d800 dc00 | |
1040 */ | |
1041 /*UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD801, 0xDC00, 0};*/ | |
1042 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0xD800, 0xDC01, 0
}; | |
1043 static const UChar strB[] = {0x41, 0xc0, 0x316, 0xD800, 0xDC00, 0}; | |
1044 ucol_setStrength(coll, UCOL_TERTIARY); | |
1045 doTest(coll, strA, strB, UCOL_GREATER); | |
1046 } | |
1047 | |
1048 /* Test 4: Imbedded nulls do not terminate a string when length is specifi
ed.*/ | |
1049 | |
1050 { | |
1051 static const UChar strA[] = {0x41, 0x00, 0x42, 0x00}; | |
1052 static const UChar strB[] = {0x41, 0x00, 0x00, 0x00}; | |
1053 char sortKeyA[50]; | |
1054 char sortKeyAz[50]; | |
1055 char sortKeyB[50]; | |
1056 char sortKeyBz[50]; | |
1057 int r; | |
1058 | |
1059 /* there used to be -3 here. Hmmmm.... */ | |
1060 /*result = ucol_strcoll(coll, strA, -3, strB, -3);*/ | |
1061 result = ucol_strcoll(coll, strA, 3, strB, 3); | |
1062 if (result != UCOL_GREATER) { | |
1063 log_err("ERROR 1 in test 4\n"); | |
1064 } | |
1065 result = ucol_strcoll(coll, strA, -1, strB, -1); | |
1066 if (result != UCOL_EQUAL) { | |
1067 log_err("ERROR 2 in test 4\n"); | |
1068 } | |
1069 | |
1070 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); | |
1071 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz))
; | |
1072 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); | |
1073 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz))
; | |
1074 | |
1075 r = strcmp(sortKeyA, sortKeyAz); | |
1076 if (r <= 0) { | |
1077 log_err("Error 3 in test 4\n"); | |
1078 } | |
1079 r = strcmp(sortKeyA, sortKeyB); | |
1080 if (r <= 0) { | |
1081 log_err("Error 4 in test 4\n"); | |
1082 } | |
1083 r = strcmp(sortKeyAz, sortKeyBz); | |
1084 if (r != 0) { | |
1085 log_err("Error 5 in test 4\n"); | |
1086 } | |
1087 | |
1088 ucol_setStrength(coll, UCOL_IDENTICAL); | |
1089 ucol_getSortKey(coll, strA, 3, (uint8_t *)sortKeyA, sizeof(sortKeyA)); | |
1090 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz))
; | |
1091 ucol_getSortKey(coll, strB, 3, (uint8_t *)sortKeyB, sizeof(sortKeyB)); | |
1092 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz))
; | |
1093 | |
1094 r = strcmp(sortKeyA, sortKeyAz); | |
1095 if (r <= 0) { | |
1096 log_err("Error 6 in test 4\n"); | |
1097 } | |
1098 r = strcmp(sortKeyA, sortKeyB); | |
1099 if (r <= 0) { | |
1100 log_err("Error 7 in test 4\n"); | |
1101 } | |
1102 r = strcmp(sortKeyAz, sortKeyBz); | |
1103 if (r != 0) { | |
1104 log_err("Error 8 in test 4\n"); | |
1105 } | |
1106 ucol_setStrength(coll, UCOL_TERTIARY); | |
1107 } | |
1108 | |
1109 | |
1110 /* Test 5: Null characters in non-normal source strings.*/ | |
1111 | |
1112 { | |
1113 static const UChar strA[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x42, 0x00}
; | |
1114 static const UChar strB[] = {0x41, 0x41, 0x300, 0x316, 0x00, 0x00, 0x00}
; | |
1115 char sortKeyA[50]; | |
1116 char sortKeyAz[50]; | |
1117 char sortKeyB[50]; | |
1118 char sortKeyBz[50]; | |
1119 int r; | |
1120 | |
1121 result = ucol_strcoll(coll, strA, 6, strB, 6); | |
1122 if (result != UCOL_GREATER) { | |
1123 log_err("ERROR 1 in test 5\n"); | |
1124 } | |
1125 result = ucol_strcoll(coll, strA, -1, strB, -1); | |
1126 if (result != UCOL_EQUAL) { | |
1127 log_err("ERROR 2 in test 5\n"); | |
1128 } | |
1129 | |
1130 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); | |
1131 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz))
; | |
1132 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); | |
1133 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz))
; | |
1134 | |
1135 r = strcmp(sortKeyA, sortKeyAz); | |
1136 if (r <= 0) { | |
1137 log_err("Error 3 in test 5\n"); | |
1138 } | |
1139 r = strcmp(sortKeyA, sortKeyB); | |
1140 if (r <= 0) { | |
1141 log_err("Error 4 in test 5\n"); | |
1142 } | |
1143 r = strcmp(sortKeyAz, sortKeyBz); | |
1144 if (r != 0) { | |
1145 log_err("Error 5 in test 5\n"); | |
1146 } | |
1147 | |
1148 ucol_setStrength(coll, UCOL_IDENTICAL); | |
1149 ucol_getSortKey(coll, strA, 6, (uint8_t *)sortKeyA, sizeof(sortKeyA)); | |
1150 ucol_getSortKey(coll, strA, -1, (uint8_t *)sortKeyAz, sizeof(sortKeyAz))
; | |
1151 ucol_getSortKey(coll, strB, 6, (uint8_t *)sortKeyB, sizeof(sortKeyB)); | |
1152 ucol_getSortKey(coll, strB, -1, (uint8_t *)sortKeyBz, sizeof(sortKeyBz))
; | |
1153 | |
1154 r = strcmp(sortKeyA, sortKeyAz); | |
1155 if (r <= 0) { | |
1156 log_err("Error 6 in test 5\n"); | |
1157 } | |
1158 r = strcmp(sortKeyA, sortKeyB); | |
1159 if (r <= 0) { | |
1160 log_err("Error 7 in test 5\n"); | |
1161 } | |
1162 r = strcmp(sortKeyAz, sortKeyBz); | |
1163 if (r != 0) { | |
1164 log_err("Error 8 in test 5\n"); | |
1165 } | |
1166 ucol_setStrength(coll, UCOL_TERTIARY); | |
1167 } | |
1168 | |
1169 | |
1170 /* Test 6: Null character as base of a non-normal combining sequence.*/ | |
1171 | |
1172 { | |
1173 static const UChar strA[] = {0x41, 0x0, 0x300, 0x316, 0x41, 0x302, 0x00}
; | |
1174 static const UChar strB[] = {0x41, 0x0, 0x302, 0x316, 0x41, 0x300, 0x00}
; | |
1175 | |
1176 result = ucol_strcoll(coll, strA, 5, strB, 5); | |
1177 if (result != UCOL_LESS) { | |
1178 log_err("Error 1 in test 6\n"); | |
1179 } | |
1180 result = ucol_strcoll(coll, strA, -1, strB, -1); | |
1181 if (result != UCOL_EQUAL) { | |
1182 log_err("Error 2 in test 6\n"); | |
1183 } | |
1184 } | |
1185 | |
1186 ucol_close(coll); | |
1187 } | |
1188 | |
1189 | |
1190 | |
1191 #if 0 | |
1192 static void TestGetCaseBit(void) { | |
1193 static const char *caseBitData[] = { | |
1194 "a", "A", "ch", "Ch", "CH", | |
1195 "\\uFF9E", "\\u0009" | |
1196 }; | |
1197 | |
1198 static const uint8_t results[] = { | |
1199 UCOL_LOWER_CASE, UCOL_UPPER_CASE, UCOL_LOWER_CASE, UCOL_MIXED_CASE, UCOL_UPP
ER_CASE, | |
1200 UCOL_UPPER_CASE, UCOL_LOWER_CASE | |
1201 }; | |
1202 | |
1203 uint32_t i, blen = 0; | |
1204 UChar b[256] = {0}; | |
1205 UErrorCode status = U_ZERO_ERROR; | |
1206 UCollator *UCA = ucol_open("", &status); | |
1207 uint8_t res = 0; | |
1208 | |
1209 for(i = 0; i<sizeof(results)/sizeof(results[0]); i++) { | |
1210 blen = u_unescape(caseBitData[i], b, 256); | |
1211 res = ucol_uprv_getCaseBits(UCA, b, blen, &status); | |
1212 if(results[i] != res) { | |
1213 log_err("Expected case = %02X, got %02X for %04X\n", results[i], res, b[0]
); | |
1214 } | |
1215 } | |
1216 } | |
1217 #endif | |
1218 | |
1219 static void TestHangulTailoring(void) { | |
1220 static const char *koreanData[] = { | |
1221 "\\uac00", "\\u4f3d", "\\u4f73", "\\u5047", "\\u50f9", "\\u52a0", "\\u53
ef", "\\u5475", | |
1222 "\\u54e5", "\\u5609", "\\u5ac1", "\\u5bb6", "\\u6687", "\\u67b6", "\
\u67b7", "\\u67ef", | |
1223 "\\u6b4c", "\\u73c2", "\\u75c2", "\\u7a3c", "\\u82db", "\\u8304", "\
\u8857", "\\u8888", | |
1224 "\\u8a36", "\\u8cc8", "\\u8dcf", "\\u8efb", "\\u8fe6", "\\u99d5", | |
1225 "\\u4EEE", "\\u50A2", "\\u5496", "\\u54FF", "\\u5777", "\\u5B8A", "\
\u659D", "\\u698E", | |
1226 "\\u6A9F", "\\u73C8", "\\u7B33", "\\u801E", "\\u8238", "\\u846D", "\
\u8B0C" | |
1227 }; | |
1228 | |
1229 const char *rules = | |
1230 "&\\uac00 <<< \\u4f3d <<< \\u4f73 <<< \\u5047 <<< \\u50f9 <<< \\u52a0 <<
< \\u53ef <<< \\u5475 " | |
1231 "<<< \\u54e5 <<< \\u5609 <<< \\u5ac1 <<< \\u5bb6 <<< \\u6687 <<< \\u67b6
<<< \\u67b7 <<< \\u67ef " | |
1232 "<<< \\u6b4c <<< \\u73c2 <<< \\u75c2 <<< \\u7a3c <<< \\u82db <<< \\u8304
<<< \\u8857 <<< \\u8888 " | |
1233 "<<< \\u8a36 <<< \\u8cc8 <<< \\u8dcf <<< \\u8efb <<< \\u8fe6 <<< \\u99d5
" | |
1234 "<<< \\u4EEE <<< \\u50A2 <<< \\u5496 <<< \\u54FF <<< \\u5777 <<< \\u5B8A
<<< \\u659D <<< \\u698E " | |
1235 "<<< \\u6A9F <<< \\u73C8 <<< \\u7B33 <<< \\u801E <<< \\u8238 <<< \\u846D
<<< \\u8B0C"; | |
1236 | |
1237 | |
1238 UErrorCode status = U_ZERO_ERROR; | |
1239 UChar rlz[2048] = { 0 }; | |
1240 uint32_t rlen = u_unescape(rules, rlz, 2048); | |
1241 | |
1242 UCollator *coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &
status); | |
1243 if(status == U_FILE_ACCESS_ERROR) { | |
1244 log_data_err("Is your data around?\n"); | |
1245 return; | |
1246 } else if(U_FAILURE(status)) { | |
1247 log_err("Error opening collator\n"); | |
1248 return; | |
1249 } | |
1250 | |
1251 log_verbose("Using start of korean rules\n"); | |
1252 | |
1253 if(U_SUCCESS(status)) { | |
1254 genericOrderingTest(coll, koreanData, sizeof(koreanData)/sizeof(koreanData[0
])); | |
1255 } else { | |
1256 log_err("Unable to open collator with rules %s\n", rules); | |
1257 } | |
1258 | |
1259 ucol_close(coll); | |
1260 | |
1261 log_verbose("Using ko__LOTUS locale\n"); | |
1262 genericLocaleStarter("ko__LOTUS", koreanData, sizeof(koreanData)/sizeof(korean
Data[0])); | |
1263 } | |
1264 | |
1265 /* | |
1266 * The secondary/tertiary compression middle byte | |
1267 * as used by the current implementation. | |
1268 * Subject to change as the sort key compression changes. | |
1269 * See class CollationKeys. | |
1270 */ | |
1271 enum { | |
1272 SEC_COMMON_MIDDLE = 0x25, /* range 05..45 */ | |
1273 TER_ONLY_COMMON_MIDDLE = 0x65 /* range 05..C5 */ | |
1274 }; | |
1275 | |
1276 static void TestCompressOverlap(void) { | |
1277 UChar secstr[150]; | |
1278 UChar tertstr[150]; | |
1279 UErrorCode status = U_ZERO_ERROR; | |
1280 UCollator *coll; | |
1281 uint8_t result[500]; | |
1282 uint32_t resultlen; | |
1283 int count = 0; | |
1284 uint8_t *tempptr; | |
1285 | |
1286 coll = ucol_open("", &status); | |
1287 | |
1288 if (U_FAILURE(status)) { | |
1289 log_err_status(status, "Collator can't be created -> %s\n", u_errorName(
status)); | |
1290 return; | |
1291 } | |
1292 while (count < 149) { | |
1293 secstr[count] = 0x0020; /* [06, 05, 05] */ | |
1294 tertstr[count] = 0x0020; | |
1295 count ++; | |
1296 } | |
1297 | |
1298 /* top down compression ----------------------------------- */ | |
1299 secstr[count] = 0x0332; /* [, 87, 05] */ | |
1300 tertstr[count] = 0x3000; /* [06, 05, 07] */ | |
1301 | |
1302 /* no compression secstr should have 150 secondary bytes, tertstr should | |
1303 have 150 tertiary bytes. | |
1304 with correct compression, secstr should have 6 secondary | |
1305 bytes (149/33 rounded up + accent), tertstr should have > 2 tertiary bytes *
/ | |
1306 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); | |
1307 (void)resultlen; /* Suppress set but not used warning. */ | |
1308 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; | |
1309 while (*(tempptr + 1) != 1) { | |
1310 /* the last secondary collation element is not checked since it is not | |
1311 part of the compression */ | |
1312 if (*tempptr < SEC_COMMON_MIDDLE) { | |
1313 log_err("Secondary top down compression overlapped\n"); | |
1314 } | |
1315 tempptr ++; | |
1316 } | |
1317 | |
1318 /* tertiary top/bottom/common for en_US is similar to the secondary | |
1319 top/bottom/common */ | |
1320 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); | |
1321 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; | |
1322 while (*(tempptr + 1) != 0) { | |
1323 /* the last secondary collation element is not checked since it is not | |
1324 part of the compression */ | |
1325 if (*tempptr < TER_ONLY_COMMON_MIDDLE) { | |
1326 log_err("Tertiary top down compression overlapped\n"); | |
1327 } | |
1328 tempptr ++; | |
1329 } | |
1330 | |
1331 /* bottom up compression ------------------------------------- */ | |
1332 secstr[count] = 0; | |
1333 tertstr[count] = 0; | |
1334 resultlen = ucol_getSortKey(coll, secstr, 150, result, LEN(result)); | |
1335 tempptr = (uint8_t *)uprv_strchr((char *)result, 1) + 1; | |
1336 while (*(tempptr + 1) != 1) { | |
1337 /* the last secondary collation element is not checked since it is not | |
1338 part of the compression */ | |
1339 if (*tempptr > SEC_COMMON_MIDDLE) { | |
1340 log_err("Secondary bottom up compression overlapped\n"); | |
1341 } | |
1342 tempptr ++; | |
1343 } | |
1344 | |
1345 /* tertiary top/bottom/common for en_US is similar to the secondary | |
1346 top/bottom/common */ | |
1347 resultlen = ucol_getSortKey(coll, tertstr, 150, result, LEN(result)); | |
1348 tempptr = (uint8_t *)uprv_strrchr((char *)result, 1) + 1; | |
1349 while (*(tempptr + 1) != 0) { | |
1350 /* the last secondary collation element is not checked since it is not | |
1351 part of the compression */ | |
1352 if (*tempptr > TER_ONLY_COMMON_MIDDLE) { | |
1353 log_err("Tertiary bottom up compression overlapped\n"); | |
1354 } | |
1355 tempptr ++; | |
1356 } | |
1357 | |
1358 ucol_close(coll); | |
1359 } | |
1360 | |
1361 static void TestCyrillicTailoring(void) { | |
1362 static const char *test[] = { | |
1363 "\\u0410b", | |
1364 "\\u0410\\u0306a", | |
1365 "\\u04d0A" | |
1366 }; | |
1367 | |
1368 /* Russian overrides contractions, so this test is not valid anymore */ | |
1369 /*genericLocaleStarter("ru", test, 3);*/ | |
1370 | |
1371 // Most of the following are commented out because UCA 8.0 | |
1372 // drops most of the Cyrillic contractions from the default order. | |
1373 // See CLDR ticket #7246 "root collation: remove Cyrillic contractions". | |
1374 | |
1375 // genericLocaleStarter("root", test, 3); | |
1376 // genericRulesStarter("&\\u0410 = \\u0410", test, 3); | |
1377 // genericRulesStarter("&Z < \\u0410", test, 3); | |
1378 genericRulesStarter("&\\u0410 = \\u0410 < \\u04d0", test, 3); | |
1379 genericRulesStarter("&Z < \\u0410 < \\u04d0", test, 3); | |
1380 // genericRulesStarter("&\\u0410 = \\u0410 < \\u0410\\u0301", test, 3); | |
1381 // genericRulesStarter("&Z < \\u0410 < \\u0410\\u0301", test, 3); | |
1382 } | |
1383 | |
1384 static void TestSuppressContractions(void) { | |
1385 | |
1386 static const char *testNoCont2[] = { | |
1387 "\\u0410\\u0302a", | |
1388 "\\u0410\\u0306b", | |
1389 "\\u0410c" | |
1390 }; | |
1391 static const char *testNoCont[] = { | |
1392 "a\\u0410", | |
1393 "A\\u0410\\u0306", | |
1394 "\\uFF21\\u0410\\u0302" | |
1395 }; | |
1396 | |
1397 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont, 3)
; | |
1398 genericRulesStarter("[suppressContractions [\\u0400-\\u047f]]", testNoCont2, 3
); | |
1399 } | |
1400 | |
1401 static void TestContraction(void) { | |
1402 const static char *testrules[] = { | |
1403 "&A = AB / B", | |
1404 "&A = A\\u0306/\\u0306", | |
1405 "&c = ch / h" | |
1406 }; | |
1407 const static UChar testdata[][2] = { | |
1408 {0x0041 /* 'A' */, 0x0042 /* 'B' */}, | |
1409 {0x0041 /* 'A' */, 0x0306 /* combining breve */}, | |
1410 {0x0063 /* 'c' */, 0x0068 /* 'h' */} | |
1411 }; | |
1412 const static UChar testdata2[][2] = { | |
1413 {0x0063 /* 'c' */, 0x0067 /* 'g' */}, | |
1414 {0x0063 /* 'c' */, 0x0068 /* 'h' */}, | |
1415 {0x0063 /* 'c' */, 0x006C /* 'l' */} | |
1416 }; | |
1417 #if 0 | |
1418 /* | |
1419 * These pairs of rule strings are not guaranteed to yield the very same map
pings. | |
1420 * In fact, LDML 24 recommends an improved way of creating mappings | |
1421 * which always yields different mappings for such pairs. See | |
1422 * http://www.unicode.org/reports/tr35/tr35-33/tr35-collation.html#Orderings | |
1423 */ | |
1424 const static char *testrules3[] = { | |
1425 "&z < xyz &xyzw << B", | |
1426 "&z < xyz &xyz << B / w", | |
1427 "&z < ch &achm << B", | |
1428 "&z < ch &a << B / chm", | |
1429 "&\\ud800\\udc00w << B", | |
1430 "&\\ud800\\udc00 << B / w", | |
1431 "&a\\ud800\\udc00m << B", | |
1432 "&a << B / \\ud800\\udc00m", | |
1433 }; | |
1434 #endif | |
1435 | |
1436 UErrorCode status = U_ZERO_ERROR; | |
1437 UCollator *coll; | |
1438 UChar rule[256] = {0}; | |
1439 uint32_t rlen = 0; | |
1440 int i; | |
1441 | |
1442 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { | |
1443 UCollationElements *iter1; | |
1444 int j = 0; | |
1445 log_verbose("Rule %s for testing\n", testrules[i]); | |
1446 rlen = u_unescape(testrules[i], rule, 32); | |
1447 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); | |
1448 if (U_FAILURE(status)) { | |
1449 log_err_status(status, "Collator creation failed %s -> %s\n", testru
les[i], u_errorName(status)); | |
1450 return; | |
1451 } | |
1452 iter1 = ucol_openElements(coll, testdata[i], 2, &status); | |
1453 if (U_FAILURE(status)) { | |
1454 log_err("Collation iterator creation failed\n"); | |
1455 return; | |
1456 } | |
1457 while (j < 2) { | |
1458 UCollationElements *iter2 = ucol_openElements(coll, | |
1459 &(testdata[i][j]), | |
1460 1, &status); | |
1461 uint32_t ce; | |
1462 if (U_FAILURE(status)) { | |
1463 log_err("Collation iterator creation failed\n"); | |
1464 return; | |
1465 } | |
1466 ce = ucol_next(iter2, &status); | |
1467 while (ce != UCOL_NULLORDER) { | |
1468 if ((uint32_t)ucol_next(iter1, &status) != ce) { | |
1469 log_err("Collation elements in contraction split does not ma
tch\n"); | |
1470 return; | |
1471 } | |
1472 ce = ucol_next(iter2, &status); | |
1473 } | |
1474 j ++; | |
1475 ucol_closeElements(iter2); | |
1476 } | |
1477 if (ucol_next(iter1, &status) != UCOL_NULLORDER) { | |
1478 log_err("Collation elements not exhausted\n"); | |
1479 return; | |
1480 } | |
1481 ucol_closeElements(iter1); | |
1482 ucol_close(coll); | |
1483 } | |
1484 | |
1485 rlen = u_unescape("& a < b < c < ch < d & c = ch / h", rule, 256); | |
1486 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); | |
1487 if (ucol_strcoll(coll, testdata2[0], 2, testdata2[1], 2) != UCOL_LESS) { | |
1488 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", | |
1489 testdata2[0][0], testdata2[0][1], testdata2[1][0], | |
1490 testdata2[1][1]); | |
1491 return; | |
1492 } | |
1493 if (ucol_strcoll(coll, testdata2[1], 2, testdata2[2], 2) != UCOL_LESS) { | |
1494 log_err("Expected \\u%04x\\u%04x < \\u%04x\\u%04x\n", | |
1495 testdata2[1][0], testdata2[1][1], testdata2[2][0], | |
1496 testdata2[2][1]); | |
1497 return; | |
1498 } | |
1499 ucol_close(coll); | |
1500 #if 0 /* see above */ | |
1501 for (i = 0; i < sizeof(testrules3) / sizeof(testrules3[0]); i += 2) { | |
1502 log_verbose("testrules3 i==%d \"%s\" vs. \"%s\"\n", i, testrules3[i], t
estrules3[i + 1]); | |
1503 UCollator *coll1, | |
1504 *coll2; | |
1505 UCollationElements *iter1, | |
1506 *iter2; | |
1507 UChar ch = 0x0042 /* 'B' */; | |
1508 uint32_t ce; | |
1509 rlen = u_unescape(testrules3[i], rule, 32); | |
1510 coll1 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; | |
1511 rlen = u_unescape(testrules3[i + 1], rule, 32); | |
1512 coll2 = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status)
; | |
1513 if (U_FAILURE(status)) { | |
1514 log_err("Collator creation failed %s\n", testrules[i]); | |
1515 return; | |
1516 } | |
1517 iter1 = ucol_openElements(coll1, &ch, 1, &status); | |
1518 iter2 = ucol_openElements(coll2, &ch, 1, &status); | |
1519 if (U_FAILURE(status)) { | |
1520 log_err("Collation iterator creation failed\n"); | |
1521 return; | |
1522 } | |
1523 ce = ucol_next(iter1, &status); | |
1524 if (U_FAILURE(status)) { | |
1525 log_err("Retrieving ces failed\n"); | |
1526 return; | |
1527 } | |
1528 while (ce != UCOL_NULLORDER) { | |
1529 uint32_t ce2 = (uint32_t)ucol_next(iter2, &status); | |
1530 if (ce == ce2) { | |
1531 log_verbose("CEs match: %08x\n", ce); | |
1532 } else { | |
1533 log_err("CEs do not match: %08x vs. %08x\n", ce, ce2); | |
1534 return; | |
1535 } | |
1536 ce = ucol_next(iter1, &status); | |
1537 if (U_FAILURE(status)) { | |
1538 log_err("Retrieving ces failed\n"); | |
1539 return; | |
1540 } | |
1541 } | |
1542 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { | |
1543 log_err("CEs not exhausted\n"); | |
1544 return; | |
1545 } | |
1546 ucol_closeElements(iter1); | |
1547 ucol_closeElements(iter2); | |
1548 ucol_close(coll1); | |
1549 ucol_close(coll2); | |
1550 } | |
1551 #endif | |
1552 } | |
1553 | |
1554 static void TestExpansion(void) { | |
1555 const static char *testrules[] = { | |
1556 #if 0 | |
1557 /* | |
1558 * This seems to have tested that M was not mapped to an expansion. | |
1559 * I believe the old builder just did that because it computed the exten
sion CEs | |
1560 * at the very end, which was a bug. | |
1561 * Among other problems, it violated the core tailoring principle | |
1562 * by making an earlier rule depend on a later one. | |
1563 * And, of course, if M did not get an expansion, then it was primary di
fferent from K, | |
1564 * unlike what the rule &K<<M says. | |
1565 */ | |
1566 "&J << K / B & K << M", | |
1567 #endif | |
1568 "&J << K / B << M" | |
1569 }; | |
1570 const static UChar testdata[][3] = { | |
1571 {0x004A /*'J'*/, 0x0041 /*'A'*/, 0}, | |
1572 {0x004D /*'M'*/, 0x0041 /*'A'*/, 0}, | |
1573 {0x004B /*'K'*/, 0x0041 /*'A'*/, 0}, | |
1574 {0x004B /*'K'*/, 0x0043 /*'C'*/, 0}, | |
1575 {0x004A /*'J'*/, 0x0043 /*'C'*/, 0}, | |
1576 {0x004D /*'M'*/, 0x0043 /*'C'*/, 0} | |
1577 }; | |
1578 | |
1579 UErrorCode status = U_ZERO_ERROR; | |
1580 UCollator *coll; | |
1581 UChar rule[256] = {0}; | |
1582 uint32_t rlen = 0; | |
1583 int i; | |
1584 | |
1585 for (i = 0; i < sizeof(testrules) / sizeof(testrules[0]); i ++) { | |
1586 int j = 0; | |
1587 log_verbose("Rule %s for testing\n", testrules[i]); | |
1588 rlen = u_unescape(testrules[i], rule, 32); | |
1589 coll = ucol_openRules(rule, rlen, UCOL_ON, UCOL_TERTIARY,NULL, &status); | |
1590 if (U_FAILURE(status)) { | |
1591 log_err_status(status, "Collator creation failed %s -> %s\n", testru
les[i], u_errorName(status)); | |
1592 return; | |
1593 } | |
1594 | |
1595 for (j = 0; j < 5; j ++) { | |
1596 doTest(coll, testdata[j], testdata[j + 1], UCOL_LESS); | |
1597 } | |
1598 ucol_close(coll); | |
1599 } | |
1600 } | |
1601 | |
1602 #if 0 | |
1603 /* this test tests the current limitations of the engine */ | |
1604 /* it always fail, so it is disabled by default */ | |
1605 static void TestLimitations(void) { | |
1606 /* recursive expansions */ | |
1607 { | |
1608 static const char *rule = "&a=b/c&d=c/e"; | |
1609 static const char *tlimit01[] = {"add","b","adf"}; | |
1610 static const char *tlimit02[] = {"aa","b","af"}; | |
1611 log_verbose("recursive expansions\n"); | |
1612 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); | |
1613 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); | |
1614 } | |
1615 /* contractions spanning expansions */ | |
1616 { | |
1617 static const char *rule = "&a<<<c/e&g<<<eh"; | |
1618 static const char *tlimit01[] = {"ad","c","af","f","ch","h"}; | |
1619 static const char *tlimit02[] = {"ad","c","ch","af","f","h"}; | |
1620 log_verbose("contractions spanning expansions\n"); | |
1621 genericRulesStarter(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimit01[0])); | |
1622 genericRulesStarter(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimit02[0])); | |
1623 } | |
1624 /* normalization: nulls in contractions */ | |
1625 { | |
1626 static const char *rule = "&a<<<\\u0000\\u0302"; | |
1627 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; | |
1628 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; | |
1629 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; | |
1630 static const UColAttributeValue valOn[] = { UCOL_ON }; | |
1631 static const UColAttributeValue valOff[] = { UCOL_OFF }; | |
1632 | |
1633 log_verbose("NULL in contractions\n"); | |
1634 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); | |
1635 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); | |
1636 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); | |
1637 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); | |
1638 | |
1639 } | |
1640 /* normalization: contractions spanning normalization */ | |
1641 { | |
1642 static const char *rule = "&a<<<\\u0000\\u0302"; | |
1643 static const char *tlimit01[] = {"a","\\u0000\\u0302\\u0327"}; | |
1644 static const char *tlimit02[] = {"\\u0000\\u0302\\u0327","a"}; | |
1645 static const UColAttribute att[] = { UCOL_DECOMPOSITION_MODE }; | |
1646 static const UColAttributeValue valOn[] = { UCOL_ON }; | |
1647 static const UColAttributeValue valOff[] = { UCOL_OFF }; | |
1648 | |
1649 log_verbose("contractions spanning normalization\n"); | |
1650 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOn, 1); | |
1651 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOn, 1); | |
1652 genericRulesStarterWithOptions(rule, tlimit01, 2, att, valOff, 1); | |
1653 genericRulesStarterWithOptions(rule, tlimit02, 2, att, valOff, 1); | |
1654 | |
1655 } | |
1656 /* variable top: */ | |
1657 { | |
1658 /*static const char *rule2 = "&\\u2010<x=[variable top]<z";*/ | |
1659 static const char *rule = "&\\u2010<x<[variable top]=z"; | |
1660 /*static const char *rule3 = "&' '<x<[variable top]=z";*/ | |
1661 static const char *tlimit01[] = {" ", "z", "zb", "a", " b", "xb", "b", "c" }
; | |
1662 static const char *tlimit02[] = {"-", "-x", "x","xb", "-z", "z", "zb", "-a",
"a", "-b", "b", "c"}; | |
1663 static const char *tlimit03[] = {" ", "xb", "z", "zb", "a", " b", "b", "c" }
; | |
1664 static const UColAttribute att[] = { UCOL_ALTERNATE_HANDLING, UCOL_STRENGTH
}; | |
1665 static const UColAttributeValue valOn[] = { UCOL_SHIFTED, UCOL_QUATERNARY }; | |
1666 static const UColAttributeValue valOff[] = { UCOL_NON_IGNORABLE, UCOL_TERTIA
RY }; | |
1667 | |
1668 log_verbose("variable top\n"); | |
1669 genericRulesStarterWithOptions(rule, tlimit03, sizeof(tlimit03)/sizeof(tlimi
t03[0]), att, valOn, sizeof(att)/sizeof(att[0])); | |
1670 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi
t01[0]), att, valOn, sizeof(att)/sizeof(att[0])); | |
1671 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi
t02[0]), att, valOn, sizeof(att)/sizeof(att[0])); | |
1672 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi
t01[0]), att, valOff, sizeof(att)/sizeof(att[0])); | |
1673 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi
t02[0]), att, valOff, sizeof(att)/sizeof(att[0])); | |
1674 | |
1675 } | |
1676 /* case level */ | |
1677 { | |
1678 static const char *rule = "&c<ch<<<cH<<<Ch<<<CH"; | |
1679 static const char *tlimit01[] = {"c","CH","Ch","cH","ch"}; | |
1680 static const char *tlimit02[] = {"c","CH","cH","Ch","ch"}; | |
1681 static const UColAttribute att[] = { UCOL_CASE_FIRST}; | |
1682 static const UColAttributeValue valOn[] = { UCOL_UPPER_FIRST}; | |
1683 /*static const UColAttributeValue valOff[] = { UCOL_OFF};*/ | |
1684 log_verbose("case level\n"); | |
1685 genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tlimi
t01[0]), att, valOn, sizeof(att)/sizeof(att[0])); | |
1686 genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tlimi
t02[0]), att, valOn, sizeof(att)/sizeof(att[0])); | |
1687 /*genericRulesStarterWithOptions(rule, tlimit01, sizeof(tlimit01)/sizeof(tli
mit01[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ | |
1688 /*genericRulesStarterWithOptions(rule, tlimit02, sizeof(tlimit02)/sizeof(tli
mit02[0]), att, valOff, sizeof(att)/sizeof(att[0]));*/ | |
1689 } | |
1690 | |
1691 } | |
1692 #endif | |
1693 | |
1694 static void TestBocsuCoverage(void) { | |
1695 UErrorCode status = U_ZERO_ERROR; | |
1696 const char *testString = "\\u0041\\u0441\\u4441\\U00044441\\u4441\\u0441\\u004
1"; | |
1697 UChar test[256] = {0}; | |
1698 uint32_t tlen = u_unescape(testString, test, 32); | |
1699 uint8_t key[256] = {0}; | |
1700 uint32_t klen = 0; | |
1701 | |
1702 UCollator *coll = ucol_open("", &status); | |
1703 if(U_SUCCESS(status)) { | |
1704 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); | |
1705 | |
1706 klen = ucol_getSortKey(coll, test, tlen, key, 256); | |
1707 (void)klen; /* Suppress set but not used warning. */ | |
1708 | |
1709 ucol_close(coll); | |
1710 } else { | |
1711 log_data_err("Couldn't open UCA\n"); | |
1712 } | |
1713 } | |
1714 | |
1715 static void TestVariableTopSetting(void) { | |
1716 UErrorCode status = U_ZERO_ERROR; | |
1717 uint32_t varTopOriginal = 0, varTop1, varTop2; | |
1718 UCollator *coll = ucol_open("", &status); | |
1719 if(U_SUCCESS(status)) { | |
1720 | |
1721 static const UChar nul = 0; | |
1722 static const UChar space = 0x20; | |
1723 static const UChar dot = 0x2e; /* punctuation */ | |
1724 static const UChar degree = 0xb0; /* symbol */ | |
1725 static const UChar dollar = 0x24; /* currency symbol */ | |
1726 static const UChar zero = 0x30; /* digit */ | |
1727 | |
1728 varTopOriginal = ucol_getVariableTop(coll, &status); | |
1729 log_verbose("ucol_getVariableTop(root) -> %08x\n", varTopOriginal); | |
1730 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
1731 | |
1732 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); | |
1733 varTop2 = ucol_getVariableTop(coll, &status); | |
1734 log_verbose("ucol_setVariableTop(space) -> %08x\n", varTop1); | |
1735 if(U_FAILURE(status) || varTop1 != varTop2 || | |
1736 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1737 ucol_equal(coll, &nul, 0, &dot, 1) || | |
1738 ucol_equal(coll, &nul, 0, °ree, 1) || | |
1739 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1740 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1741 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { | |
1742 log_err("ucol_setVariableTop(space) did not work - %s\n", u_errorName(status
)); | |
1743 } | |
1744 | |
1745 varTop1 = ucol_setVariableTop(coll, &dot, 1, &status); | |
1746 varTop2 = ucol_getVariableTop(coll, &status); | |
1747 log_verbose("ucol_setVariableTop(dot) -> %08x\n", varTop1); | |
1748 if(U_FAILURE(status) || varTop1 != varTop2 || | |
1749 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1750 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1751 ucol_equal(coll, &nul, 0, °ree, 1) || | |
1752 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1753 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1754 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { | |
1755 log_err("ucol_setVariableTop(dot) did not work - %s\n", u_errorName(status))
; | |
1756 } | |
1757 | |
1758 varTop1 = ucol_setVariableTop(coll, °ree, 1, &status); | |
1759 varTop2 = ucol_getVariableTop(coll, &status); | |
1760 log_verbose("ucol_setVariableTop(degree) -> %08x\n", varTop1); | |
1761 if(U_FAILURE(status) || varTop1 != varTop2 || | |
1762 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1763 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1764 !ucol_equal(coll, &nul, 0, °ree, 1) || | |
1765 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1766 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1767 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { | |
1768 log_err("ucol_setVariableTop(degree) did not work - %s\n", u_errorName(statu
s)); | |
1769 } | |
1770 | |
1771 varTop1 = ucol_setVariableTop(coll, &dollar, 1, &status); | |
1772 varTop2 = ucol_getVariableTop(coll, &status); | |
1773 log_verbose("ucol_setVariableTop(dollar) -> %08x\n", varTop1); | |
1774 if(U_FAILURE(status) || varTop1 != varTop2 || | |
1775 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1776 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1777 !ucol_equal(coll, &nul, 0, °ree, 1) || | |
1778 !ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1779 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1780 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { | |
1781 log_err("ucol_setVariableTop(dollar) did not work - %s\n", u_errorName(statu
s)); | |
1782 } | |
1783 | |
1784 log_verbose("Testing setting variable top to contractions\n"); | |
1785 { | |
1786 UChar first[4] = { 0 }; | |
1787 first[0] = 0x0040; | |
1788 first[1] = 0x0050; | |
1789 first[2] = 0x0000; | |
1790 | |
1791 status = U_ZERO_ERROR; | |
1792 ucol_setVariableTop(coll, first, -1, &status); | |
1793 | |
1794 if(U_SUCCESS(status)) { | |
1795 log_err("Invalid contraction succeded in setting variable top!\n"); | |
1796 } | |
1797 | |
1798 } | |
1799 | |
1800 log_verbose("Test restoring variable top\n"); | |
1801 | |
1802 status = U_ZERO_ERROR; | |
1803 ucol_restoreVariableTop(coll, varTopOriginal, &status); | |
1804 if(varTopOriginal != ucol_getVariableTop(coll, &status)) { | |
1805 log_err("Couldn't restore old variable top\n"); | |
1806 } | |
1807 | |
1808 log_verbose("Testing calling with error set\n"); | |
1809 | |
1810 status = U_INTERNAL_PROGRAM_ERROR; | |
1811 varTop1 = ucol_setVariableTop(coll, &space, 1, &status); | |
1812 varTop2 = ucol_getVariableTop(coll, &status); | |
1813 ucol_restoreVariableTop(coll, varTop2, &status); | |
1814 varTop1 = ucol_setVariableTop(NULL, &dot, 1, &status); | |
1815 varTop2 = ucol_getVariableTop(NULL, &status); | |
1816 ucol_restoreVariableTop(NULL, varTop2, &status); | |
1817 if(status != U_INTERNAL_PROGRAM_ERROR) { | |
1818 log_err("Bad reaction to passed error!\n"); | |
1819 } | |
1820 ucol_close(coll); | |
1821 } else { | |
1822 log_data_err("Couldn't open UCA collator\n"); | |
1823 } | |
1824 } | |
1825 | |
1826 static void TestMaxVariable() { | |
1827 UErrorCode status = U_ZERO_ERROR; | |
1828 UColReorderCode oldMax, max; | |
1829 UCollator *coll; | |
1830 | |
1831 static const UChar nul = 0; | |
1832 static const UChar space = 0x20; | |
1833 static const UChar dot = 0x2e; /* punctuation */ | |
1834 static const UChar degree = 0xb0; /* symbol */ | |
1835 static const UChar dollar = 0x24; /* currency symbol */ | |
1836 static const UChar zero = 0x30; /* digit */ | |
1837 | |
1838 coll = ucol_open("", &status); | |
1839 if(U_FAILURE(status)) { | |
1840 log_data_err("Couldn't open root collator\n"); | |
1841 return; | |
1842 } | |
1843 | |
1844 oldMax = ucol_getMaxVariable(coll); | |
1845 log_verbose("ucol_getMaxVariable(root) -> %04x\n", oldMax); | |
1846 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
1847 | |
1848 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); | |
1849 max = ucol_getMaxVariable(coll); | |
1850 log_verbose("ucol_setMaxVariable(space) -> %04x\n", max); | |
1851 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SPACE || | |
1852 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1853 ucol_equal(coll, &nul, 0, &dot, 1) || | |
1854 ucol_equal(coll, &nul, 0, °ree, 1) || | |
1855 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1856 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1857 ucol_greaterOrEqual(coll, &space, 1, &dot, 1)) { | |
1858 log_err("ucol_setMaxVariable(space) did not work - %s\n", u_errorName(status
)); | |
1859 } | |
1860 | |
1861 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_PUNCTUATION, &status); | |
1862 max = ucol_getMaxVariable(coll); | |
1863 log_verbose("ucol_setMaxVariable(punctuation) -> %04x\n", max); | |
1864 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_PUNCTUATION || | |
1865 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1866 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1867 ucol_equal(coll, &nul, 0, °ree, 1) || | |
1868 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1869 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1870 ucol_greaterOrEqual(coll, &dot, 1, °ree, 1)) { | |
1871 log_err("ucol_setMaxVariable(punctuation) did not work - %s\n", u_errorName(
status)); | |
1872 } | |
1873 | |
1874 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SYMBOL, &status); | |
1875 max = ucol_getMaxVariable(coll); | |
1876 log_verbose("ucol_setMaxVariable(symbol) -> %04x\n", max); | |
1877 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_SYMBOL || | |
1878 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1879 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1880 !ucol_equal(coll, &nul, 0, °ree, 1) || | |
1881 ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1882 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1883 ucol_greaterOrEqual(coll, °ree, 1, &dollar, 1)) { | |
1884 log_err("ucol_setMaxVariable(symbol) did not work - %s\n", u_errorName(statu
s)); | |
1885 } | |
1886 | |
1887 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_CURRENCY, &status); | |
1888 max = ucol_getMaxVariable(coll); | |
1889 log_verbose("ucol_setMaxVariable(currency) -> %04x\n", max); | |
1890 if(U_FAILURE(status) || max != UCOL_REORDER_CODE_CURRENCY || | |
1891 !ucol_equal(coll, &nul, 0, &space, 1) || | |
1892 !ucol_equal(coll, &nul, 0, &dot, 1) || | |
1893 !ucol_equal(coll, &nul, 0, °ree, 1) || | |
1894 !ucol_equal(coll, &nul, 0, &dollar, 1) || | |
1895 ucol_equal(coll, &nul, 0, &zero, 1) || | |
1896 ucol_greaterOrEqual(coll, &dollar, 1, &zero, 1)) { | |
1897 log_err("ucol_setMaxVariable(currency) did not work - %s\n", u_errorName(sta
tus)); | |
1898 } | |
1899 | |
1900 log_verbose("Test restoring maxVariable\n"); | |
1901 status = U_ZERO_ERROR; | |
1902 ucol_setMaxVariable(coll, oldMax, &status); | |
1903 if(oldMax != ucol_getMaxVariable(coll)) { | |
1904 log_err("Couldn't restore old maxVariable\n"); | |
1905 } | |
1906 | |
1907 log_verbose("Testing calling with error set\n"); | |
1908 status = U_INTERNAL_PROGRAM_ERROR; | |
1909 ucol_setMaxVariable(coll, UCOL_REORDER_CODE_SPACE, &status); | |
1910 max = ucol_getMaxVariable(coll); | |
1911 if(max != oldMax || status != U_INTERNAL_PROGRAM_ERROR) { | |
1912 log_err("Bad reaction to passed error!\n"); | |
1913 } | |
1914 ucol_close(coll); | |
1915 } | |
1916 | |
1917 static void TestNonChars(void) { | |
1918 static const char *test[] = { | |
1919 "\\u0000", /* ignorable */ | |
1920 "\\uFFFE", /* special merge-sort character with minimum non-ignorable wei
ghts */ | |
1921 "\\uFDD0", "\\uFDEF", | |
1922 "\\U0001FFFE", "\\U0001FFFF", /* UCA 6.0: noncharacters are treated like
unassigned, */ | |
1923 "\\U0002FFFE", "\\U0002FFFF", /* not like ignorable. */ | |
1924 "\\U0003FFFE", "\\U0003FFFF", | |
1925 "\\U0004FFFE", "\\U0004FFFF", | |
1926 "\\U0005FFFE", "\\U0005FFFF", | |
1927 "\\U0006FFFE", "\\U0006FFFF", | |
1928 "\\U0007FFFE", "\\U0007FFFF", | |
1929 "\\U0008FFFE", "\\U0008FFFF", | |
1930 "\\U0009FFFE", "\\U0009FFFF", | |
1931 "\\U000AFFFE", "\\U000AFFFF", | |
1932 "\\U000BFFFE", "\\U000BFFFF", | |
1933 "\\U000CFFFE", "\\U000CFFFF", | |
1934 "\\U000DFFFE", "\\U000DFFFF", | |
1935 "\\U000EFFFE", "\\U000EFFFF", | |
1936 "\\U000FFFFE", "\\U000FFFFF", | |
1937 "\\U0010FFFE", "\\U0010FFFF", | |
1938 "\\uFFFF" /* special character with maximum primary weight */ | |
1939 }; | |
1940 UErrorCode status = U_ZERO_ERROR; | |
1941 UCollator *coll = ucol_open("en_US", &status); | |
1942 | |
1943 log_verbose("Test non characters\n"); | |
1944 | |
1945 if(U_SUCCESS(status)) { | |
1946 genericOrderingTestWithResult(coll, test, 35, UCOL_LESS); | |
1947 } else { | |
1948 log_err_status(status, "Unable to open collator\n"); | |
1949 } | |
1950 | |
1951 ucol_close(coll); | |
1952 } | |
1953 | |
1954 static void TestExtremeCompression(void) { | |
1955 static char *test[4]; | |
1956 int32_t j = 0, i = 0; | |
1957 | |
1958 for(i = 0; i<4; i++) { | |
1959 test[i] = (char *)malloc(2048*sizeof(char)); | |
1960 } | |
1961 | |
1962 for(j = 20; j < 500; j++) { | |
1963 for(i = 0; i<4; i++) { | |
1964 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); | |
1965 test[i][j-1] = (char)('a'+i); | |
1966 test[i][j] = 0; | |
1967 } | |
1968 genericLocaleStarter("en_US", (const char **)test, 4); | |
1969 } | |
1970 | |
1971 | |
1972 for(i = 0; i<4; i++) { | |
1973 free(test[i]); | |
1974 } | |
1975 } | |
1976 | |
1977 #if 0 | |
1978 static void TestExtremeCompression(void) { | |
1979 static char *test[4]; | |
1980 int32_t j = 0, i = 0; | |
1981 UErrorCode status = U_ZERO_ERROR; | |
1982 UCollator *coll = ucol_open("en_US", status); | |
1983 for(i = 0; i<4; i++) { | |
1984 test[i] = (char *)malloc(2048*sizeof(char)); | |
1985 } | |
1986 for(j = 10; j < 2048; j++) { | |
1987 for(i = 0; i<4; i++) { | |
1988 uprv_memset(test[i], 'a', (j-2)*sizeof(char)); | |
1989 test[i][j-1] = (char)('a'+i); | |
1990 test[i][j] = 0; | |
1991 } | |
1992 } | |
1993 genericLocaleStarter("en_US", (const char **)test, 4); | |
1994 | |
1995 for(j = 10; j < 2048; j++) { | |
1996 for(i = 0; i<1; i++) { | |
1997 uprv_memset(test[i], 'a', (j-1)*sizeof(char)); | |
1998 test[i][j] = 0; | |
1999 } | |
2000 } | |
2001 for(i = 0; i<4; i++) { | |
2002 free(test[i]); | |
2003 } | |
2004 } | |
2005 #endif | |
2006 | |
2007 static void TestSurrogates(void) { | |
2008 static const char *test[] = { | |
2009 "z","\\ud900\\udc25", "\\ud805\\udc50", | |
2010 "\\ud800\\udc00y", "\\ud800\\udc00r", | |
2011 "\\ud800\\udc00f", "\\ud800\\udc00", | |
2012 "\\ud800\\udc00c", "\\ud800\\udc00b", | |
2013 "\\ud800\\udc00fa", "\\ud800\\udc00fb", | |
2014 "\\ud800\\udc00a", | |
2015 "c", "b" | |
2016 }; | |
2017 | |
2018 static const char *rule = | |
2019 "&z < \\ud900\\udc25 < \\ud805\\udc50" | |
2020 "< \\ud800\\udc00y < \\ud800\\udc00r" | |
2021 "< \\ud800\\udc00f << \\ud800\\udc00" | |
2022 "< \\ud800\\udc00fa << \\ud800\\udc00fb" | |
2023 "< \\ud800\\udc00a < c < b" ; | |
2024 | |
2025 genericRulesStarter(rule, test, 14); | |
2026 } | |
2027 | |
2028 /* This is a test for prefix implementation, used by JIS X 4061 collation rules
*/ | |
2029 static void TestPrefix(void) { | |
2030 uint32_t i; | |
2031 | |
2032 static const struct { | |
2033 const char *rules; | |
2034 const char *data[50]; | |
2035 const uint32_t len; | |
2036 } tests[] = { | |
2037 { "&z <<< z|a", | |
2038 {"zz", "za"}, 2 }, | |
2039 | |
2040 { "&z <<< z| a", | |
2041 {"zz", "za"}, 2 }, | |
2042 { "[strength I]" | |
2043 "&a=\\ud900\\udc25" | |
2044 "&z<<<\\ud900\\udc25|a", | |
2045 {"aa", "az", "\\ud900\\udc25z", "\\ud900\\udc25a", "zz"}, 4 }, | |
2046 }; | |
2047 | |
2048 | |
2049 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | |
2050 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | |
2051 } | |
2052 } | |
2053 | |
2054 /* This test uses data suplied by Masashiko Maedera to test the implementation *
/ | |
2055 /* JIS X 4061 collation order implementation *
/ | |
2056 static void TestNewJapanese(void) { | |
2057 | |
2058 static const char * const test1[] = { | |
2059 "\\u30b7\\u30e3\\u30fc\\u30ec", | |
2060 "\\u30b7\\u30e3\\u30a4", | |
2061 "\\u30b7\\u30e4\\u30a3", | |
2062 "\\u30b7\\u30e3\\u30ec", | |
2063 "\\u3061\\u3087\\u3053", | |
2064 "\\u3061\\u3088\\u3053", | |
2065 "\\u30c1\\u30e7\\u30b3\\u30ec\\u30fc\\u30c8", | |
2066 "\\u3066\\u30fc\\u305f", | |
2067 "\\u30c6\\u30fc\\u30bf", | |
2068 "\\u30c6\\u30a7\\u30bf", | |
2069 "\\u3066\\u3048\\u305f", | |
2070 "\\u3067\\u30fc\\u305f", | |
2071 "\\u30c7\\u30fc\\u30bf", | |
2072 "\\u30c7\\u30a7\\u30bf", | |
2073 "\\u3067\\u3048\\u305f", | |
2074 "\\u3066\\u30fc\\u305f\\u30fc", | |
2075 "\\u30c6\\u30fc\\u30bf\\u30a1", | |
2076 "\\u30c6\\u30a7\\u30bf\\u30fc", | |
2077 "\\u3066\\u3047\\u305f\\u3041", | |
2078 "\\u3066\\u3048\\u305f\\u30fc", | |
2079 "\\u3067\\u30fc\\u305f\\u30fc", | |
2080 "\\u30c7\\u30fc\\u30bf\\u30a1", | |
2081 "\\u3067\\u30a7\\u305f\\u30a1", | |
2082 "\\u30c7\\u3047\\u30bf\\u3041", | |
2083 "\\u30c7\\u30a8\\u30bf\\u30a2", | |
2084 "\\u3072\\u3086", | |
2085 "\\u3073\\u3085\\u3042", | |
2086 "\\u3074\\u3085\\u3042", | |
2087 "\\u3073\\u3085\\u3042\\u30fc", | |
2088 "\\u30d3\\u30e5\\u30a2\\u30fc", | |
2089 "\\u3074\\u3085\\u3042\\u30fc", | |
2090 "\\u30d4\\u30e5\\u30a2\\u30fc", | |
2091 "\\u30d2\\u30e5\\u30a6", | |
2092 "\\u30d2\\u30e6\\u30a6", | |
2093 "\\u30d4\\u30e5\\u30a6\\u30a2", | |
2094 "\\u3073\\u3085\\u30fc\\u3042\\u30fc", | |
2095 "\\u30d3\\u30e5\\u30fc\\u30a2\\u30fc", | |
2096 "\\u30d3\\u30e5\\u30a6\\u30a2\\u30fc", | |
2097 "\\u3072\\u3085\\u3093", | |
2098 "\\u3074\\u3085\\u3093", | |
2099 "\\u3075\\u30fc\\u308a", | |
2100 "\\u30d5\\u30fc\\u30ea", | |
2101 "\\u3075\\u3045\\u308a", | |
2102 "\\u3075\\u30a5\\u308a", | |
2103 "\\u3075\\u30a5\\u30ea", | |
2104 "\\u30d5\\u30a6\\u30ea", | |
2105 "\\u3076\\u30fc\\u308a", | |
2106 "\\u30d6\\u30fc\\u30ea", | |
2107 "\\u3076\\u3045\\u308a", | |
2108 "\\u30d6\\u30a5\\u308a", | |
2109 "\\u3077\\u3046\\u308a", | |
2110 "\\u30d7\\u30a6\\u30ea", | |
2111 "\\u3075\\u30fc\\u308a\\u30fc", | |
2112 "\\u30d5\\u30a5\\u30ea\\u30fc", | |
2113 "\\u3075\\u30a5\\u308a\\u30a3", | |
2114 "\\u30d5\\u3045\\u308a\\u3043", | |
2115 "\\u30d5\\u30a6\\u30ea\\u30fc", | |
2116 "\\u3075\\u3046\\u308a\\u3043", | |
2117 "\\u30d6\\u30a6\\u30ea\\u30a4", | |
2118 "\\u3077\\u30fc\\u308a\\u30fc", | |
2119 "\\u3077\\u30a5\\u308a\\u30a4", | |
2120 "\\u3077\\u3046\\u308a\\u30fc", | |
2121 "\\u30d7\\u30a6\\u30ea\\u30a4", | |
2122 "\\u30d5\\u30fd", | |
2123 "\\u3075\\u309e", | |
2124 "\\u3076\\u309d", | |
2125 "\\u3076\\u3075", | |
2126 "\\u3076\\u30d5", | |
2127 "\\u30d6\\u3075", | |
2128 "\\u30d6\\u30d5", | |
2129 "\\u3076\\u309e", | |
2130 "\\u3076\\u3077", | |
2131 "\\u30d6\\u3077", | |
2132 "\\u3077\\u309d", | |
2133 "\\u30d7\\u30fd", | |
2134 "\\u3077\\u3075", | |
2135 }; | |
2136 | |
2137 static const char *test2[] = { | |
2138 "\\u306f\\u309d", /* H\\u309d */ | |
2139 "\\u30cf\\u30fd", /* K\\u30fd */ | |
2140 "\\u306f\\u306f", /* HH */ | |
2141 "\\u306f\\u30cf", /* HK */ | |
2142 "\\u30cf\\u30cf", /* KK */ | |
2143 "\\u306f\\u309e", /* H\\u309e */ | |
2144 "\\u30cf\\u30fe", /* K\\u30fe */ | |
2145 "\\u306f\\u3070", /* HH\\u309b */ | |
2146 "\\u30cf\\u30d0", /* KK\\u309b */ | |
2147 "\\u306f\\u3071", /* HH\\u309c */ | |
2148 "\\u30cf\\u3071", /* KH\\u309c */ | |
2149 "\\u30cf\\u30d1", /* KK\\u309c */ | |
2150 "\\u3070\\u309d", /* H\\u309b\\u309d */ | |
2151 "\\u30d0\\u30fd", /* K\\u309b\\u30fd */ | |
2152 "\\u3070\\u306f", /* H\\u309bH */ | |
2153 "\\u30d0\\u30cf", /* K\\u309bK */ | |
2154 "\\u3070\\u309e", /* H\\u309b\\u309e */ | |
2155 "\\u30d0\\u30fe", /* K\\u309b\\u30fe */ | |
2156 "\\u3070\\u3070", /* H\\u309bH\\u309b */ | |
2157 "\\u30d0\\u3070", /* K\\u309bH\\u309b */ | |
2158 "\\u30d0\\u30d0", /* K\\u309bK\\u309b */ | |
2159 "\\u3070\\u3071", /* H\\u309bH\\u309c */ | |
2160 "\\u30d0\\u30d1", /* K\\u309bK\\u309c */ | |
2161 "\\u3071\\u309d", /* H\\u309c\\u309d */ | |
2162 "\\u30d1\\u30fd", /* K\\u309c\\u30fd */ | |
2163 "\\u3071\\u306f", /* H\\u309cH */ | |
2164 "\\u30d1\\u30cf", /* K\\u309cK */ | |
2165 "\\u3071\\u3070", /* H\\u309cH\\u309b */ | |
2166 "\\u3071\\u30d0", /* H\\u309cK\\u309b */ | |
2167 "\\u30d1\\u30d0", /* K\\u309cK\\u309b */ | |
2168 "\\u3071\\u3071", /* H\\u309cH\\u309c */ | |
2169 "\\u30d1\\u30d1", /* K\\u309cK\\u309c */ | |
2170 }; | |
2171 /* | |
2172 static const char *test3[] = { | |
2173 "\\u221er\\u221e", | |
2174 "\\u221eR#", | |
2175 "\\u221et\\u221e", | |
2176 "#r\\u221e", | |
2177 "#R#", | |
2178 "#t%", | |
2179 "#T%", | |
2180 "8t\\u221e", | |
2181 "8T\\u221e", | |
2182 "8t#", | |
2183 "8T#", | |
2184 "8t%", | |
2185 "8T%", | |
2186 "8t8", | |
2187 "8T8", | |
2188 "\\u03c9r\\u221e", | |
2189 "\\u03a9R%", | |
2190 "rr\\u221e", | |
2191 "rR\\u221e", | |
2192 "Rr\\u221e", | |
2193 "RR\\u221e", | |
2194 "RT%", | |
2195 "rt8", | |
2196 "tr\\u221e", | |
2197 "tr8", | |
2198 "TR8", | |
2199 "tt8", | |
2200 "\\u30b7\\u30e3\\u30fc\\u30ec", | |
2201 }; | |
2202 */ | |
2203 static const UColAttribute att[] = { UCOL_STRENGTH }; | |
2204 static const UColAttributeValue val[] = { UCOL_QUATERNARY }; | |
2205 | |
2206 static const UColAttribute attShifted[] = { UCOL_STRENGTH, UCOL_ALTERNATE_HAND
LING}; | |
2207 static const UColAttributeValue valShifted[] = { UCOL_QUATERNARY, UCOL_SHIFTED
}; | |
2208 | |
2209 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a
tt, val, 1); | |
2210 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a
tt, val, 1); | |
2211 /*genericLocaleStarter("ja", test3, sizeof(test3)/sizeof(test3[0]));*/ | |
2212 genericLocaleStarterWithOptions("ja", test1, sizeof(test1)/sizeof(test1[0]), a
ttShifted, valShifted, 2); | |
2213 genericLocaleStarterWithOptions("ja", test2, sizeof(test2)/sizeof(test2[0]), a
ttShifted, valShifted, 2); | |
2214 } | |
2215 | |
2216 static void TestStrCollIdenticalPrefix(void) { | |
2217 const char* rule = "&\\ud9b0\\udc70=\\ud9b0\\udc71"; | |
2218 const char* test[] = { | |
2219 "ab\\ud9b0\\udc70", | |
2220 "ab\\ud9b0\\udc71" | |
2221 }; | |
2222 genericRulesStarterWithResult(rule, test, sizeof(test)/sizeof(test[0]), UCOL_E
QUAL); | |
2223 } | |
2224 /* Contractions should have all their canonically equivalent */ | |
2225 /* strings included */ | |
2226 static void TestContractionClosure(void) { | |
2227 static const struct { | |
2228 const char *rules; | |
2229 const char *data[10]; | |
2230 const uint32_t len; | |
2231 } tests[] = { | |
2232 { "&b=\\u00e4\\u00e4", | |
2233 { "b", "\\u00e4\\u00e4", "a\\u0308a\\u0308", "\\u00e4a\\u0308", "a\\u0308\
\u00e4" }, 5}, | |
2234 { "&b=\\u00C5", | |
2235 { "b", "\\u00C5", "A\\u030A", "\\u212B" }, 4}, | |
2236 }; | |
2237 uint32_t i; | |
2238 | |
2239 | |
2240 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | |
2241 genericRulesStarterWithResult(tests[i].rules, tests[i].data, tests[i].len, U
COL_EQUAL); | |
2242 } | |
2243 } | |
2244 | |
2245 /* This tests also fails*/ | |
2246 static void TestBeforePrefixFailure(void) { | |
2247 static const struct { | |
2248 const char *rules; | |
2249 const char *data[10]; | |
2250 const uint32_t len; | |
2251 } tests[] = { | |
2252 { "&g <<< a" | |
2253 "&[before 3]\\uff41 <<< x", | |
2254 {"x", "\\uff41"}, 2 }, | |
2255 { "&\\u30A7=\\u30A7=\\u3047=\\uff6a" | |
2256 "&\\u30A8=\\u30A8=\\u3048=\\uff74" | |
2257 "&[before 3]\\u30a7<<<\\u30a9", | |
2258 {"\\u30a9", "\\u30a7"}, 2 }, | |
2259 { "&[before 3]\\u30a7<<<\\u30a9" | |
2260 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" | |
2261 "&\\u30A8=\\u30A8=\\u3048=\\uff74", | |
2262 {"\\u30a9", "\\u30a7"}, 2 }, | |
2263 }; | |
2264 uint32_t i; | |
2265 | |
2266 | |
2267 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | |
2268 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | |
2269 } | |
2270 | |
2271 #if 0 | |
2272 const char* rule1 = | |
2273 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" | |
2274 "&\\u30A8=\\u30A8=\\u3048=\\uff74" | |
2275 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc"; | |
2276 const char* rule2 = | |
2277 "&[before 3]\\u30a7<<<\\u30c6|\\u30fc" | |
2278 "&\\u30A7=\\u30A7=\\u3047=\\uff6a" | |
2279 "&\\u30A8=\\u30A8=\\u3048=\\uff74"; | |
2280 const char* test[] = { | |
2281 "\\u30c6\\u30fc\\u30bf", | |
2282 "\\u30c6\\u30a7\\u30bf", | |
2283 }; | |
2284 genericRulesStarter(rule1, test, sizeof(test)/sizeof(test[0])); | |
2285 genericRulesStarter(rule2, test, sizeof(test)/sizeof(test[0])); | |
2286 /* this piece of code should be in some sort of verbose mode */ | |
2287 /* it gets the collation elements for elements and prints them */ | |
2288 /* This is useful when trying to see whether the problem is */ | |
2289 { | |
2290 UErrorCode status = U_ZERO_ERROR; | |
2291 uint32_t i = 0; | |
2292 UCollationElements *it = NULL; | |
2293 uint32_t CE; | |
2294 UChar string[256]; | |
2295 uint32_t uStringLen; | |
2296 UCollator *coll = NULL; | |
2297 | |
2298 uStringLen = u_unescape(rule1, string, 256); | |
2299 | |
2300 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,
&status); | |
2301 | |
2302 /*coll = ucol_open("ja_JP_JIS", &status);*/ | |
2303 it = ucol_openElements(coll, string, 0, &status); | |
2304 | |
2305 for(i = 0; i < sizeof(test)/sizeof(test[0]); i++) { | |
2306 log_verbose("%s\n", test[i]); | |
2307 uStringLen = u_unescape(test[i], string, 256); | |
2308 ucol_setText(it, string, uStringLen, &status); | |
2309 | |
2310 while((CE=ucol_next(it, &status)) != UCOL_NULLORDER) { | |
2311 log_verbose("%08X\n", CE); | |
2312 } | |
2313 log_verbose("\n"); | |
2314 | |
2315 } | |
2316 | |
2317 ucol_closeElements(it); | |
2318 ucol_close(coll); | |
2319 } | |
2320 #endif | |
2321 } | |
2322 | |
2323 static void TestPrefixCompose(void) { | |
2324 const char* rule1 = | |
2325 "&\\u30a7<<<\\u30ab|\\u30fc=\\u30ac|\\u30fc"; | |
2326 /* | |
2327 const char* test[] = { | |
2328 "\\u30c6\\u30fc\\u30bf", | |
2329 "\\u30c6\\u30a7\\u30bf", | |
2330 }; | |
2331 */ | |
2332 { | |
2333 UErrorCode status = U_ZERO_ERROR; | |
2334 /*uint32_t i = 0;*/ | |
2335 /*UCollationElements *it = NULL;*/ | |
2336 /* uint32_t CE;*/ | |
2337 UChar string[256]; | |
2338 uint32_t uStringLen; | |
2339 UCollator *coll = NULL; | |
2340 | |
2341 uStringLen = u_unescape(rule1, string, 256); | |
2342 | |
2343 coll = ucol_openRules(string, uStringLen, UCOL_DEFAULT, UCOL_DEFAULT, NULL,
&status); | |
2344 ucol_close(coll); | |
2345 } | |
2346 | |
2347 | |
2348 } | |
2349 | |
2350 /* | |
2351 [last variable] last variable value | |
2352 [last primary ignorable] largest CE for primary ignorable | |
2353 [last secondary ignorable] largest CE for secondary ignorable | |
2354 [last tertiary ignorable] largest CE for tertiary ignorable | |
2355 [top] guaranteed to be above all implicit CEs, for now and in the future (in 1.8
) | |
2356 */ | |
2357 | |
2358 static void TestRuleOptions(void) { | |
2359 /* values here are hardcoded and are correct for the current UCA | |
2360 * when the UCA changes, one might be forced to change these | |
2361 * values. | |
2362 */ | |
2363 | |
2364 /* | |
2365 * These strings contain the last character before [variable top] | |
2366 * and the first and second characters (by primary weights) after it. | |
2367 * See FractionalUCA.txt. For example: | |
2368 [last variable [0C FE, 05, 05]] # U+10A7F OLD SOUTH ARABIAN NUMERIC INDICA
TOR | |
2369 [variable top = 0C FE] | |
2370 [first regular [0D 0A, 05, 05]] # U+0060 GRAVE ACCENT | |
2371 and | |
2372 00B4; [0D 0C, 05, 05] | |
2373 * | |
2374 * Note: Starting with UCA 6.0, the [variable top] collation element | |
2375 * is not the weight of any character or string, | |
2376 * which means that LAST_VARIABLE_CHAR_STRING sorts before [last variable]. | |
2377 */ | |
2378 #define LAST_VARIABLE_CHAR_STRING "\\U00010A7F" | |
2379 #define FIRST_REGULAR_CHAR_STRING "\\u0060" | |
2380 #define SECOND_REGULAR_CHAR_STRING "\\u00B4" | |
2381 | |
2382 /* | |
2383 * This string has to match the character that has the [last regular] weight | |
2384 * which changes with each UCA version. | |
2385 * See the bottom of FractionalUCA.txt which says something like | |
2386 [last regular [7A FE, 05, 05]] # U+1342E EGYPTIAN HIEROGLYPH AA032 | |
2387 * | |
2388 * Note: Starting with UCA 6.0, the [last regular] collation element | |
2389 * is not the weight of any character or string, | |
2390 * which means that LAST_REGULAR_CHAR_STRING sorts before [last regular]. | |
2391 */ | |
2392 #define LAST_REGULAR_CHAR_STRING "\\U0001342E" | |
2393 | |
2394 static const struct { | |
2395 const char *rules; | |
2396 const char *data[10]; | |
2397 const uint32_t len; | |
2398 } tests[] = { | |
2399 #if 0 | |
2400 /* "you cannot go before ...": The parser now sets an error for such nonsens
ical rules. */ | |
2401 /* - all befores here amount to zero */ | |
2402 { "&[before 3][first tertiary ignorable]<<<a", | |
2403 { "\\u0000", "a"}, 2 | |
2404 }, /* you cannot go before first tertiary ignorable */ | |
2405 | |
2406 { "&[before 3][last tertiary ignorable]<<<a", | |
2407 { "\\u0000", "a"}, 2 | |
2408 }, /* you cannot go before last tertiary ignorable */ | |
2409 #endif | |
2410 /* | |
2411 * However, there is a real secondary ignorable (artificial addition in Frac
tionalUCA.txt), | |
2412 * and it *is* possible to "go before" that. | |
2413 */ | |
2414 { "&[before 3][first secondary ignorable]<<<a", | |
2415 { "\\u0000", "a"}, 2 | |
2416 }, | |
2417 | |
2418 { "&[before 3][last secondary ignorable]<<<a", | |
2419 { "\\u0000", "a"}, 2 | |
2420 }, | |
2421 | |
2422 /* 'normal' befores */ | |
2423 | |
2424 /* | |
2425 * Note: With a "SPACE first primary" boundary CE in FractionalUCA.txt, | |
2426 * it is not possible to tailor &[first primary ignorable]<a or &[last prima
ry ignorable]<a | |
2427 * because there is no tailoring space before that boundary. | |
2428 * Made the tests work by tailoring to a space instead. | |
2429 */ | |
2430 { "&[before 3][first primary ignorable]<<<c<<<b &' '<a", /* was &[first pri
mary ignorable]<a */ | |
2431 { "c", "b", "\\u0332", "a" }, 4 | |
2432 }, | |
2433 | |
2434 /* we don't have a code point that corresponds to | |
2435 * the last primary ignorable | |
2436 */ | |
2437 { "&[before 3][last primary ignorable]<<<c<<<b &' '<a", /* was &[last prima
ry ignorable]<a */ | |
2438 { "\\u0332", "\\u20e3", "c", "b", "a" }, 5 | |
2439 }, | |
2440 | |
2441 { "&[before 3][first variable]<<<c<<<b &[first variable]<a", | |
2442 { "c", "b", "\\u0009", "a", "\\u000a" }, 5 | |
2443 }, | |
2444 | |
2445 { "&[last variable]<a &[before 3][last variable]<<<c<<<b ", | |
2446 { LAST_VARIABLE_CHAR_STRING, "c", "b", /* [last variable] */ "a", FIRST_
REGULAR_CHAR_STRING }, 5 | |
2447 }, | |
2448 | |
2449 { "&[first regular]<a" | |
2450 "&[before 1][first regular]<b", | |
2451 { "b", FIRST_REGULAR_CHAR_STRING, "a", SECOND_REGULAR_CHAR_STRING }, 4 | |
2452 }, | |
2453 | |
2454 { "&[before 1][last regular]<b" | |
2455 "&[last regular]<a", | |
2456 { LAST_REGULAR_CHAR_STRING, "b", /* [last regular] */ "a", "\\u4e00" },
4 | |
2457 }, | |
2458 | |
2459 { "&[before 1][first implicit]<b" | |
2460 "&[first implicit]<a", | |
2461 { "b", "\\u4e00", "a", "\\u4e01"}, 4 | |
2462 }, | |
2463 #if 0 /* The current builder does not support tailoring to unassigned-implicit
CEs (seems unnecessary, adds complexity). */ | |
2464 { "&[before 1][last implicit]<b" | |
2465 "&[last implicit]<a", | |
2466 { "b", "\\U0010FFFD", "a" }, 3 | |
2467 }, | |
2468 #endif | |
2469 { "&[last variable]<z" | |
2470 "&' '<x" /* was &[last primary ignorable]<x, see above */ | |
2471 "&[last secondary ignorable]<<y" | |
2472 "&[last tertiary ignorable]<<<w" | |
2473 "&[top]<u", | |
2474 {"\\ufffb", "w", "y", "\\u20e3", "x", LAST_VARIABLE_CHAR_STRING, "z", "u"
}, 7 | |
2475 } | |
2476 | |
2477 }; | |
2478 uint32_t i; | |
2479 | |
2480 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | |
2481 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | |
2482 } | |
2483 } | |
2484 | |
2485 | |
2486 static void TestOptimize(void) { | |
2487 /* this is not really a test - just trying out | |
2488 * whether copying of UCA contents will fail | |
2489 * Cannot really test, since the functionality | |
2490 * remains the same. | |
2491 */ | |
2492 static const struct { | |
2493 const char *rules; | |
2494 const char *data[10]; | |
2495 const uint32_t len; | |
2496 } tests[] = { | |
2497 /* - all befores here amount to zero */ | |
2498 { "[optimize [\\uAC00-\\uD7FF]]", | |
2499 { "a", "b"}, 2} | |
2500 }; | |
2501 uint32_t i; | |
2502 | |
2503 for(i = 0; i<(sizeof(tests)/sizeof(tests[0])); i++) { | |
2504 genericRulesStarter(tests[i].rules, tests[i].data, tests[i].len); | |
2505 } | |
2506 } | |
2507 | |
2508 /* | |
2509 cycheng@ca.ibm.c... we got inconsistent results when using the UTF-16BE iterator
and the UTF-8 iterator. | |
2510 weiv ucol_strcollIter? | |
2511 cycheng@ca.ibm.c... e.g. s1 = 0xfffc0062, and s2 = d8000021 | |
2512 weiv these are the input strings? | |
2513 cycheng@ca.ibm.c... yes, using the utf-16 iterator and UCA with normalization on
, we have s1 > s2 | |
2514 weiv will check - could be a problem with utf-8 iterator | |
2515 cycheng@ca.ibm.c... but if we use the utf-8 iterator, i.e. s1 = efbfbc62 and s2
= eda08021, we have s1 < s2 | |
2516 weiv hmmm | |
2517 cycheng@ca.ibm.c... note that we have a standalone high surrogate | |
2518 weiv that doesn't sound right | |
2519 cycheng@ca.ibm.c... we got the same inconsistent results on AIX and Win2000 | |
2520 weiv so you have two strings, you convert them to utf-8 and to utf-16BE | |
2521 cycheng@ca.ibm.c... yes | |
2522 weiv and then do the comparison | |
2523 cycheng@ca.ibm.c... in one case, the input strings are in utf8, and in the other
case the input strings are in utf-16be | |
2524 weiv utf-16 strings look like a little endian ones in the example you sent me | |
2525 weiv It could be a bug - let me try to test it out | |
2526 cycheng@ca.ibm.c... ok | |
2527 cycheng@ca.ibm.c... we can wait till the conf. call | |
2528 cycheng@ca.ibm.c... next weke | |
2529 weiv that would be great | |
2530 weiv hmmm | |
2531 weiv I might be wrong | |
2532 weiv let me play with it some more | |
2533 cycheng@ca.ibm.c... ok | |
2534 cycheng@ca.ibm.c... also please check s3 = 0x0e3a0062 and s4 = 0x0e400021. both
are in utf-16be | |
2535 cycheng@ca.ibm.c... seems with icu 2.2 we have s3 > s4, but not in icu 2.4 that'
s built for db2 | |
2536 cycheng@ca.ibm.c... also s1 & s2 that I sent you earlier are also in utf-16be | |
2537 weiv ok | |
2538 cycheng@ca.ibm.c... i ask sherman to send you more inconsistent data | |
2539 weiv thanks | |
2540 cycheng@ca.ibm.c... the 4 strings we sent are just samples | |
2541 */ | |
2542 #if 0 | |
2543 static void Alexis(void) { | |
2544 UErrorCode status = U_ZERO_ERROR; | |
2545 UCollator *coll = ucol_open("", &status); | |
2546 | |
2547 | |
2548 const char utf16be[2][4] = { | |
2549 { (char)0xd8, (char)0x00, (char)0x00, (char)0x21 }, | |
2550 { (char)0xff, (char)0xfc, (char)0x00, (char)0x62 } | |
2551 }; | |
2552 | |
2553 const char utf8[2][4] = { | |
2554 { (char)0xed, (char)0xa0, (char)0x80, (char)0x21 }, | |
2555 { (char)0xef, (char)0xbf, (char)0xbc, (char)0x62 }, | |
2556 }; | |
2557 | |
2558 UCharIterator iterU161, iterU162; | |
2559 UCharIterator iterU81, iterU82; | |
2560 | |
2561 UCollationResult resU16, resU8; | |
2562 | |
2563 uiter_setUTF16BE(&iterU161, utf16be[0], 4); | |
2564 uiter_setUTF16BE(&iterU162, utf16be[1], 4); | |
2565 | |
2566 uiter_setUTF8(&iterU81, utf8[0], 4); | |
2567 uiter_setUTF8(&iterU82, utf8[1], 4); | |
2568 | |
2569 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
2570 | |
2571 resU16 = ucol_strcollIter(coll, &iterU161, &iterU162, &status); | |
2572 resU8 = ucol_strcollIter(coll, &iterU81, &iterU82, &status); | |
2573 | |
2574 | |
2575 if(resU16 != resU8) { | |
2576 log_err("different results\n"); | |
2577 } | |
2578 | |
2579 ucol_close(coll); | |
2580 } | |
2581 #endif | |
2582 | |
2583 #define CMSCOLL_ALEXIS2_BUFFER_SIZE 256 | |
2584 static void Alexis2(void) { | |
2585 UErrorCode status = U_ZERO_ERROR; | |
2586 UChar U16Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16Target[CMSCOLL_ALEXIS2_BUFFER
_SIZE]; | |
2587 char U16BESource[CMSCOLL_ALEXIS2_BUFFER_SIZE], U16BETarget[CMSCOLL_ALEXIS2_BUF
FER_SIZE]; | |
2588 char U8Source[CMSCOLL_ALEXIS2_BUFFER_SIZE], U8Target[CMSCOLL_ALEXIS2_BUFFER_SI
ZE]; | |
2589 int32_t U16LenS = 0, U16LenT = 0, U16BELenS = 0, U16BELenT = 0, U8LenS = 0, U8
LenT = 0; | |
2590 | |
2591 UConverter *conv = NULL; | |
2592 | |
2593 UCharIterator U16BEItS, U16BEItT; | |
2594 UCharIterator U8ItS, U8ItT; | |
2595 | |
2596 UCollationResult resU16, resU16BE, resU8; | |
2597 | |
2598 static const char* const pairs[][2] = { | |
2599 { "\\ud800\\u0021", "\\uFFFC\\u0062"}, | |
2600 { "\\u0435\\u0308\\u0334", "\\u0415\\u0334\\u0340" }, | |
2601 { "\\u0E40\\u0021", "\\u00A1\\u0021"}, | |
2602 { "\\u0E40\\u0021", "\\uFE57\\u0062"}, | |
2603 { "\\u5F20", "\\u5F20\\u4E00\\u8E3F"}, | |
2604 { "\\u0000\\u0020", "\\u0000\\u0020\\u0000"}, | |
2605 { "\\u0020", "\\u0020\\u0000"} | |
2606 /* | |
2607 5F20 (my result here) | |
2608 5F204E008E3F | |
2609 5F20 (your result here) | |
2610 */ | |
2611 }; | |
2612 | |
2613 int32_t i = 0; | |
2614 | |
2615 UCollator *coll = ucol_open("", &status); | |
2616 if(status == U_FILE_ACCESS_ERROR) { | |
2617 log_data_err("Is your data around?\n"); | |
2618 return; | |
2619 } else if(U_FAILURE(status)) { | |
2620 log_err("Error opening collator\n"); | |
2621 return; | |
2622 } | |
2623 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
2624 conv = ucnv_open("UTF16BE", &status); | |
2625 for(i = 0; i < sizeof(pairs)/sizeof(pairs[0]); i++) { | |
2626 U16LenS = u_unescape(pairs[i][0], U16Source, CMSCOLL_ALEXIS2_BUFFER_SIZE); | |
2627 U16LenT = u_unescape(pairs[i][1], U16Target, CMSCOLL_ALEXIS2_BUFFER_SIZE); | |
2628 | |
2629 resU16 = ucol_strcoll(coll, U16Source, U16LenS, U16Target, U16LenT); | |
2630 | |
2631 log_verbose("Result of strcoll is %i\n", resU16); | |
2632 | |
2633 U16BELenS = ucnv_fromUChars(conv, U16BESource, CMSCOLL_ALEXIS2_BUFFER_SIZE,
U16Source, U16LenS, &status); | |
2634 U16BELenT = ucnv_fromUChars(conv, U16BETarget, CMSCOLL_ALEXIS2_BUFFER_SIZE,
U16Target, U16LenT, &status); | |
2635 (void)U16BELenS; /* Suppress set but not used warnings. */ | |
2636 (void)U16BELenT; | |
2637 | |
2638 /* use the original sizes, as the result from converter is in bytes */ | |
2639 uiter_setUTF16BE(&U16BEItS, U16BESource, U16LenS); | |
2640 uiter_setUTF16BE(&U16BEItT, U16BETarget, U16LenT); | |
2641 | |
2642 resU16BE = ucol_strcollIter(coll, &U16BEItS, &U16BEItT, &status); | |
2643 | |
2644 log_verbose("Result of U16BE is %i\n", resU16BE); | |
2645 | |
2646 if(resU16 != resU16BE) { | |
2647 log_verbose("Different results between UTF16 and UTF16BE for %s & %s\n", p
airs[i][0], pairs[i][1]); | |
2648 } | |
2649 | |
2650 u_strToUTF8(U8Source, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenS, U16Source, U16Le
nS, &status); | |
2651 u_strToUTF8(U8Target, CMSCOLL_ALEXIS2_BUFFER_SIZE, &U8LenT, U16Target, U16Le
nT, &status); | |
2652 | |
2653 uiter_setUTF8(&U8ItS, U8Source, U8LenS); | |
2654 uiter_setUTF8(&U8ItT, U8Target, U8LenT); | |
2655 | |
2656 resU8 = ucol_strcollIter(coll, &U8ItS, &U8ItT, &status); | |
2657 | |
2658 if(resU16 != resU8) { | |
2659 log_verbose("Different results between UTF16 and UTF8 for %s & %s\n", pair
s[i][0], pairs[i][1]); | |
2660 } | |
2661 | |
2662 } | |
2663 | |
2664 ucol_close(coll); | |
2665 ucnv_close(conv); | |
2666 } | |
2667 | |
2668 static void TestHebrewUCA(void) { | |
2669 UErrorCode status = U_ZERO_ERROR; | |
2670 static const char *first[] = { | |
2671 "d790d6b8d79cd795d6bcd7a9", | |
2672 "d790d79cd79ed7a7d799d799d7a1", | |
2673 "d790d6b4d79ed795d6bcd7a9", | |
2674 }; | |
2675 | |
2676 char utf8String[3][256]; | |
2677 UChar utf16String[3][256]; | |
2678 | |
2679 int32_t i = 0, j = 0; | |
2680 int32_t sizeUTF8[3]; | |
2681 int32_t sizeUTF16[3]; | |
2682 | |
2683 UCollator *coll = ucol_open("", &status); | |
2684 if (U_FAILURE(status)) { | |
2685 log_err_status(status, "Could not open UCA collation %s\n", u_errorName(st
atus)); | |
2686 return; | |
2687 } | |
2688 /*ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);*/ | |
2689 | |
2690 for(i = 0; i < sizeof(first)/sizeof(first[0]); i++) { | |
2691 sizeUTF8[i] = u_parseUTF8(first[i], -1, utf8String[i], 256, &status); | |
2692 u_strFromUTF8(utf16String[i], 256, &sizeUTF16[i], utf8String[i], sizeUTF8[i]
, &status); | |
2693 log_verbose("%i: "); | |
2694 for(j = 0; j < sizeUTF16[i]; j++) { | |
2695 /*log_verbose("\\u%04X", utf16String[i][j]);*/ | |
2696 log_verbose("%04X", utf16String[i][j]); | |
2697 } | |
2698 log_verbose("\n"); | |
2699 } | |
2700 for(i = 0; i < sizeof(first)/sizeof(first[0])-1; i++) { | |
2701 for(j = i + 1; j < sizeof(first)/sizeof(first[0]); j++) { | |
2702 doTest(coll, utf16String[i], utf16String[j], UCOL_LESS); | |
2703 } | |
2704 } | |
2705 | |
2706 ucol_close(coll); | |
2707 | |
2708 } | |
2709 | |
2710 static void TestPartialSortKeyTermination(void) { | |
2711 static const char* cases[] = { | |
2712 "\\u1234\\u1234\\udc00", | |
2713 "\\udc00\\ud800\\ud800" | |
2714 }; | |
2715 | |
2716 int32_t i; | |
2717 | |
2718 UErrorCode status = U_ZERO_ERROR; | |
2719 | |
2720 UCollator *coll = ucol_open("", &status); | |
2721 | |
2722 UCharIterator iter; | |
2723 | |
2724 UChar currCase[256]; | |
2725 int32_t length = 0; | |
2726 int32_t pKeyLen = 0; | |
2727 | |
2728 uint8_t key[256]; | |
2729 | |
2730 for(i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { | |
2731 uint32_t state[2] = {0, 0}; | |
2732 length = u_unescape(cases[i], currCase, 256); | |
2733 uiter_setString(&iter, currCase, length); | |
2734 pKeyLen = ucol_nextSortKeyPart(coll, &iter, state, key, 256, &status); | |
2735 (void)pKeyLen; /* Suppress set but not used warning. */ | |
2736 | |
2737 log_verbose("Done\n"); | |
2738 | |
2739 } | |
2740 ucol_close(coll); | |
2741 } | |
2742 | |
2743 static void TestSettings(void) { | |
2744 static const char* cases[] = { | |
2745 "apple", | |
2746 "Apple" | |
2747 }; | |
2748 | |
2749 static const char* locales[] = { | |
2750 "", | |
2751 "en" | |
2752 }; | |
2753 | |
2754 UErrorCode status = U_ZERO_ERROR; | |
2755 | |
2756 int32_t i = 0, j = 0; | |
2757 | |
2758 UChar source[256], target[256]; | |
2759 int32_t sLen = 0, tLen = 0; | |
2760 | |
2761 UCollator *collateObject = NULL; | |
2762 for(i = 0; i < sizeof(locales)/sizeof(locales[0]); i++) { | |
2763 collateObject = ucol_open(locales[i], &status); | |
2764 ucol_setStrength(collateObject, UCOL_PRIMARY); | |
2765 ucol_setAttribute(collateObject, UCOL_CASE_LEVEL , UCOL_OFF, &status); | |
2766 for(j = 1; j < sizeof(cases)/sizeof(cases[0]); j++) { | |
2767 sLen = u_unescape(cases[j-1], source, 256); | |
2768 source[sLen] = 0; | |
2769 tLen = u_unescape(cases[j], target, 256); | |
2770 source[tLen] = 0; | |
2771 doTest(collateObject, source, target, UCOL_EQUAL); | |
2772 } | |
2773 ucol_close(collateObject); | |
2774 } | |
2775 } | |
2776 | |
2777 static int32_t TestEqualsForCollator(const char* locName, UCollator *source, UCo
llator *target) { | |
2778 UErrorCode status = U_ZERO_ERROR; | |
2779 int32_t errorNo = 0; | |
2780 const UChar *sourceRules = NULL; | |
2781 int32_t sourceRulesLen = 0; | |
2782 UParseError parseError; | |
2783 UColAttributeValue french = UCOL_OFF; | |
2784 | |
2785 if(!ucol_equals(source, target)) { | |
2786 log_err("Same collators, different address not equal\n"); | |
2787 errorNo++; | |
2788 } | |
2789 ucol_close(target); | |
2790 if(uprv_strcmp(locName, ucol_getLocaleByType(source, ULOC_ACTUAL_LOCALE, &st
atus)) == 0) { | |
2791 target = ucol_safeClone(source, NULL, NULL, &status); | |
2792 if(U_FAILURE(status)) { | |
2793 log_err("Error creating clone\n"); | |
2794 errorNo++; | |
2795 return errorNo; | |
2796 } | |
2797 if(!ucol_equals(source, target)) { | |
2798 log_err("Collator different from it's clone\n"); | |
2799 errorNo++; | |
2800 } | |
2801 french = ucol_getAttribute(source, UCOL_FRENCH_COLLATION, &status); | |
2802 if(french == UCOL_ON) { | |
2803 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); | |
2804 } else { | |
2805 ucol_setAttribute(target, UCOL_FRENCH_COLLATION, UCOL_ON, &status); | |
2806 } | |
2807 if(U_FAILURE(status)) { | |
2808 log_err("Error setting attributes\n"); | |
2809 errorNo++; | |
2810 return errorNo; | |
2811 } | |
2812 if(ucol_equals(source, target)) { | |
2813 log_err("Collators same even when options changed\n"); | |
2814 errorNo++; | |
2815 } | |
2816 ucol_close(target); | |
2817 | |
2818 sourceRules = ucol_getRules(source, &sourceRulesLen); | |
2819 target = ucol_openRules(sourceRules, sourceRulesLen, UCOL_DEFAULT, UCOL_
DEFAULT, &parseError, &status); | |
2820 if(U_FAILURE(status)) { | |
2821 log_err("Error instantiating target from rules - %s\n", u_errorName(
status)); | |
2822 errorNo++; | |
2823 return errorNo; | |
2824 } | |
2825 /* Note: The tailoring rule string is an optional data item. */ | |
2826 if(!ucol_equals(source, target) && sourceRulesLen != 0) { | |
2827 log_err("Collator different from collator that was created from the
same rules\n"); | |
2828 errorNo++; | |
2829 } | |
2830 ucol_close(target); | |
2831 } | |
2832 return errorNo; | |
2833 } | |
2834 | |
2835 | |
2836 static void TestEquals(void) { | |
2837 /* ucol_equals is not currently a public API. There is a chance that it will
become | |
2838 * something like this. | |
2839 */ | |
2840 /* test whether the two collators instantiated from the same locale are equa
l */ | |
2841 UErrorCode status = U_ZERO_ERROR; | |
2842 UParseError parseError; | |
2843 int32_t noOfLoc = uloc_countAvailable(); | |
2844 const char *locName = NULL; | |
2845 UCollator *source = NULL, *target = NULL; | |
2846 int32_t i = 0; | |
2847 | |
2848 const char* rules[] = { | |
2849 "&l < lj <<< Lj <<< LJ", | |
2850 "&n < nj <<< Nj <<< NJ", | |
2851 "&ae <<< \\u00e4", | |
2852 "&AE <<< \\u00c4" | |
2853 }; | |
2854 /* | |
2855 const char* badRules[] = { | |
2856 "&l <<< Lj", | |
2857 "&n < nj <<< nJ <<< NJ", | |
2858 "&a <<< \\u00e4", | |
2859 "&AE <<< \\u00c4 <<< x" | |
2860 }; | |
2861 */ | |
2862 | |
2863 UChar sourceRules[1024], targetRules[1024]; | |
2864 int32_t sourceRulesSize = 0, targetRulesSize = 0; | |
2865 int32_t rulesSize = sizeof(rules)/sizeof(rules[0]); | |
2866 | |
2867 for(i = 0; i < rulesSize; i++) { | |
2868 sourceRulesSize += u_unescape(rules[i], sourceRules+sourceRulesSize, 102
4 - sourceRulesSize); | |
2869 targetRulesSize += u_unescape(rules[rulesSize-i-1], targetRules+targetRu
lesSize, 1024 - targetRulesSize); | |
2870 } | |
2871 | |
2872 source = ucol_openRules(sourceRules, sourceRulesSize, UCOL_DEFAULT, UCOL_DEF
AULT, &parseError, &status); | |
2873 if(status == U_FILE_ACCESS_ERROR) { | |
2874 log_data_err("Is your data around?\n"); | |
2875 return; | |
2876 } else if(U_FAILURE(status)) { | |
2877 log_err("Error opening collator\n"); | |
2878 return; | |
2879 } | |
2880 target = ucol_openRules(targetRules, targetRulesSize, UCOL_DEFAULT, UCOL_DEF
AULT, &parseError, &status); | |
2881 if(!ucol_equals(source, target)) { | |
2882 log_err("Equivalent collators not equal!\n"); | |
2883 } | |
2884 ucol_close(source); | |
2885 ucol_close(target); | |
2886 | |
2887 source = ucol_open("root", &status); | |
2888 target = ucol_open("root", &status); | |
2889 log_verbose("Testing root\n"); | |
2890 if(!ucol_equals(source, source)) { | |
2891 log_err("Same collator not equal\n"); | |
2892 } | |
2893 if(TestEqualsForCollator("root", source, target)) { | |
2894 log_err("Errors for root\n"); | |
2895 } | |
2896 ucol_close(source); | |
2897 | |
2898 for(i = 0; i<noOfLoc; i++) { | |
2899 status = U_ZERO_ERROR; | |
2900 locName = uloc_getAvailable(i); | |
2901 /*if(hasCollationElements(locName)) {*/ | |
2902 log_verbose("Testing equality for locale %s\n", locName); | |
2903 source = ucol_open(locName, &status); | |
2904 target = ucol_open(locName, &status); | |
2905 if (U_FAILURE(status)) { | |
2906 log_err("Error opening collator for locale %s %s\n", locName, u_err
orName(status)); | |
2907 continue; | |
2908 } | |
2909 if(TestEqualsForCollator(locName, source, target)) { | |
2910 log_err("Errors for locale %s\n", locName); | |
2911 } | |
2912 ucol_close(source); | |
2913 /*}*/ | |
2914 } | |
2915 } | |
2916 | |
2917 static void TestJ2726(void) { | |
2918 UChar a[2] = { 0x61, 0x00 }; /*"a"*/ | |
2919 UChar aSpace[3] = { 0x61, 0x20, 0x00 }; /*"a "*/ | |
2920 UChar spaceA[3] = { 0x20, 0x61, 0x00 }; /*" a"*/ | |
2921 UErrorCode status = U_ZERO_ERROR; | |
2922 UCollator *coll = ucol_open("en", &status); | |
2923 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); | |
2924 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); | |
2925 doTest(coll, a, aSpace, UCOL_EQUAL); | |
2926 doTest(coll, aSpace, a, UCOL_EQUAL); | |
2927 doTest(coll, a, spaceA, UCOL_EQUAL); | |
2928 doTest(coll, spaceA, a, UCOL_EQUAL); | |
2929 doTest(coll, spaceA, aSpace, UCOL_EQUAL); | |
2930 doTest(coll, aSpace, spaceA, UCOL_EQUAL); | |
2931 ucol_close(coll); | |
2932 } | |
2933 | |
2934 static void NullRule(void) { | |
2935 UChar r[3] = {0}; | |
2936 UErrorCode status = U_ZERO_ERROR; | |
2937 UCollator *coll = ucol_openRules(r, 1, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &st
atus); | |
2938 if(U_SUCCESS(status)) { | |
2939 log_err("This should have been an error!\n"); | |
2940 ucol_close(coll); | |
2941 } else { | |
2942 status = U_ZERO_ERROR; | |
2943 } | |
2944 coll = ucol_openRules(r, 0, UCOL_DEFAULT, UCOL_DEFAULT, NULL, &status); | |
2945 if(U_FAILURE(status)) { | |
2946 log_err_status(status, "Empty rules should have produced a valid collato
r -> %s\n", u_errorName(status)); | |
2947 } else { | |
2948 ucol_close(coll); | |
2949 } | |
2950 } | |
2951 | |
2952 /** | |
2953 * Test for CollationElementIterator previous and next for the whole set of | |
2954 * unicode characters with normalization on. | |
2955 */ | |
2956 static void TestNumericCollation(void) | |
2957 { | |
2958 UErrorCode status = U_ZERO_ERROR; | |
2959 | |
2960 const static char *basicTestStrings[]={ | |
2961 "hello1", | |
2962 "hello2", | |
2963 "hello2002", | |
2964 "hello2003", | |
2965 "hello123456", | |
2966 "hello1234567", | |
2967 "hello10000000", | |
2968 "hello100000000", | |
2969 "hello1000000000", | |
2970 "hello10000000000", | |
2971 }; | |
2972 | |
2973 const static char *preZeroTestStrings[]={ | |
2974 "avery10000", | |
2975 "avery010000", | |
2976 "avery0010000", | |
2977 "avery00010000", | |
2978 "avery000010000", | |
2979 "avery0000010000", | |
2980 "avery00000010000", | |
2981 "avery000000010000", | |
2982 }; | |
2983 | |
2984 const static char *thirtyTwoBitNumericStrings[]={ | |
2985 "avery42949672960", | |
2986 "avery42949672961", | |
2987 "avery42949672962", | |
2988 "avery429496729610" | |
2989 }; | |
2990 | |
2991 const static char *longNumericStrings[]={ | |
2992 /* Some of these sort out of the order that would expected if digits-as-num
bers handled arbitrarily-long digit strings. | |
2993 In fact, a single collation element can represent a maximum of 254 digit
s as a number. Digit strings longer than that | |
2994 are treated as multiple collation elements. */ | |
2995 "num923456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
345678901234567890123z", /*253digits, num + 9.23E252 + z */ | |
2996 "num100000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
0000000000000000000000", /*254digits, num + 1.00E253 */ | |
2997 "num100000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000000000000000000000000000000000000000000000000000000000000
00000000000000000000000", /*255digits, num + 1.00E253 + 0, out of numeric order
but expected */ | |
2998 "num123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
3456789012345678901234", /*254digits, num + 1.23E253 */ | |
2999 "num123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345", /*255digits, num + 1.23E253 + 5 */ | |
3000 "num123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
345678901234567890123456", /*256digits, num + 1.23E253 + 56 */ | |
3001 "num123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
3456789012345678901234567", /*257digits, num + 1.23E253 + 567 */ | |
3002 "num123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
3456789012345678901234a", /*254digits, num + 1.23E253 + a, out of numeric order
but expected */ | |
3003 "num923456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
3456789012345678901234", /*254digits, num + 9.23E253, out of numeric order but e
xpected */ | |
3004 "num923456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
34567890123456789012345678901234567890123456789012345678901234567890123456789012
3456789012345678901234a", /*254digits, num + 9.23E253 + a, out of numeric order
but expected */ | |
3005 }; | |
3006 | |
3007 const static char *supplementaryDigits[] = { | |
3008 "\\uD835\\uDFCE", /* 0 */ | |
3009 "\\uD835\\uDFCF", /* 1 */ | |
3010 "\\uD835\\uDFD0", /* 2 */ | |
3011 "\\uD835\\uDFD1", /* 3 */ | |
3012 "\\uD835\\uDFCF\\uD835\\uDFCE", /* 10 */ | |
3013 "\\uD835\\uDFCF\\uD835\\uDFCF", /* 11 */ | |
3014 "\\uD835\\uDFCF\\uD835\\uDFD0", /* 12 */ | |
3015 "\\uD835\\uDFD0\\uD835\\uDFCE", /* 20 */ | |
3016 "\\uD835\\uDFD0\\uD835\\uDFCF", /* 21 */ | |
3017 "\\uD835\\uDFD0\\uD835\\uDFD0" /* 22 */ | |
3018 }; | |
3019 | |
3020 const static char *foreignDigits[] = { | |
3021 "\\u0661", | |
3022 "\\u0662", | |
3023 "\\u0663", | |
3024 "\\u0661\\u0660", | |
3025 "\\u0661\\u0662", | |
3026 "\\u0661\\u0663", | |
3027 "\\u0662\\u0660", | |
3028 "\\u0662\\u0662", | |
3029 "\\u0662\\u0663", | |
3030 "\\u0663\\u0660", | |
3031 "\\u0663\\u0662", | |
3032 "\\u0663\\u0663" | |
3033 }; | |
3034 | |
3035 const static char *evenZeroes[] = { | |
3036 "2000", | |
3037 "2001", | |
3038 "2002", | |
3039 "2003" | |
3040 }; | |
3041 | |
3042 UColAttribute att = UCOL_NUMERIC_COLLATION; | |
3043 UColAttributeValue val = UCOL_ON; | |
3044 | |
3045 /* Open our collator. */ | |
3046 UCollator* coll = ucol_open("root", &status); | |
3047 if (U_FAILURE(status)){ | |
3048 log_err_status(status, "ERROR: in using ucol_open() -> %s\n", | |
3049 myErrorName(status)); | |
3050 return; | |
3051 } | |
3052 genericLocaleStarterWithOptions("root", basicTestStrings, sizeof(basicTestSt
rings)/sizeof(basicTestStrings[0]), &att, &val, 1); | |
3053 genericLocaleStarterWithOptions("root", thirtyTwoBitNumericStrings, sizeof(t
hirtyTwoBitNumericStrings)/sizeof(thirtyTwoBitNumericStrings[0]), &att, &val, 1)
; | |
3054 genericLocaleStarterWithOptions("root", longNumericStrings, sizeof(longNumer
icStrings)/sizeof(longNumericStrings[0]), &att, &val, 1); | |
3055 genericLocaleStarterWithOptions("en_US", foreignDigits, sizeof(foreignDigits
)/sizeof(foreignDigits[0]), &att, &val, 1); | |
3056 genericLocaleStarterWithOptions("root", supplementaryDigits, sizeof(suppleme
ntaryDigits)/sizeof(supplementaryDigits[0]), &att, &val, 1); | |
3057 genericLocaleStarterWithOptions("root", evenZeroes, sizeof(evenZeroes)/sizeo
f(evenZeroes[0]), &att, &val, 1); | |
3058 | |
3059 /* Setting up our collator to do digits. */ | |
3060 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); | |
3061 if (U_FAILURE(status)){ | |
3062 log_err("ERROR: in setting UCOL_NUMERIC_COLLATION as an attribute\n %s\n
", | |
3063 myErrorName(status)); | |
3064 return; | |
3065 } | |
3066 | |
3067 /* | |
3068 Testing that prepended zeroes still yield the correct collation behavior. | |
3069 We expect that every element in our strings array will be equal. | |
3070 */ | |
3071 genericOrderingTestWithResult(coll, preZeroTestStrings, sizeof(preZeroTestSt
rings)/sizeof(preZeroTestStrings[0]), UCOL_EQUAL); | |
3072 | |
3073 ucol_close(coll); | |
3074 } | |
3075 | |
3076 static void TestTibetanConformance(void) | |
3077 { | |
3078 const char* test[] = { | |
3079 "\\u0FB2\\u0591\\u0F71\\u0061", | |
3080 "\\u0FB2\\u0F71\\u0061" | |
3081 }; | |
3082 | |
3083 UErrorCode status = U_ZERO_ERROR; | |
3084 UCollator *coll = ucol_open("", &status); | |
3085 UChar source[100]; | |
3086 UChar target[100]; | |
3087 int result; | |
3088 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
3089 if (U_SUCCESS(status)) { | |
3090 u_unescape(test[0], source, 100); | |
3091 u_unescape(test[1], target, 100); | |
3092 doTest(coll, source, target, UCOL_EQUAL); | |
3093 result = ucol_strcoll(coll, source, -1, target, -1); | |
3094 log_verbose("result %d\n", result); | |
3095 if (UCOL_EQUAL != result) { | |
3096 log_err("Tibetan comparison error\n"); | |
3097 } | |
3098 } | |
3099 ucol_close(coll); | |
3100 | |
3101 genericLocaleStarterWithResult("", test, 2, UCOL_EQUAL); | |
3102 } | |
3103 | |
3104 static void TestPinyinProblem(void) { | |
3105 static const char *test[] = { "\\u4E56\\u4E56\\u7761", "\\u4E56\\u5B69\\u5B5
0" }; | |
3106 genericLocaleStarter("zh__PINYIN", test, sizeof(test)/sizeof(test[0])); | |
3107 } | |
3108 | |
3109 /** | |
3110 * Iterate through the given iterator, checking to see that all the strings | |
3111 * in the expected array are present. | |
3112 * @param expected array of strings we expect to see, or NULL | |
3113 * @param expectedCount number of elements of expected, or 0 | |
3114 */ | |
3115 static int32_t checkUEnumeration(const char* msg, | |
3116 UEnumeration* iter, | |
3117 const char** expected, | |
3118 int32_t expectedCount) { | |
3119 UErrorCode ec = U_ZERO_ERROR; | |
3120 int32_t i = 0, n, j, bit; | |
3121 int32_t seenMask = 0; | |
3122 | |
3123 U_ASSERT(expectedCount >= 0 && expectedCount < 31); /* [sic] 31 not 32 */ | |
3124 n = uenum_count(iter, &ec); | |
3125 if (!assertSuccess("count", &ec)) return -1; | |
3126 log_verbose("%s = [", msg); | |
3127 for (;; ++i) { | |
3128 const char* s = uenum_next(iter, NULL, &ec); | |
3129 if (!assertSuccess("snext", &ec) || s == NULL) break; | |
3130 if (i != 0) log_verbose(","); | |
3131 log_verbose("%s", s); | |
3132 /* check expected list */ | |
3133 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { | |
3134 if ((seenMask&bit) == 0 && | |
3135 uprv_strcmp(s, expected[j]) == 0) { | |
3136 seenMask |= bit; | |
3137 break; | |
3138 } | |
3139 } | |
3140 } | |
3141 log_verbose("] (%d)\n", i); | |
3142 assertTrue("count verified", i==n); | |
3143 /* did we see all expected strings? */ | |
3144 for (j=0, bit=1; j<expectedCount; ++j, bit<<=1) { | |
3145 if ((seenMask&bit)!=0) { | |
3146 log_verbose("Ok: \"%s\" seen\n", expected[j]); | |
3147 } else { | |
3148 log_err("FAIL: \"%s\" not seen\n", expected[j]); | |
3149 } | |
3150 } | |
3151 return n; | |
3152 } | |
3153 | |
3154 /** | |
3155 * Test new API added for separate collation tree. | |
3156 */ | |
3157 static void TestSeparateTrees(void) { | |
3158 UErrorCode ec = U_ZERO_ERROR; | |
3159 UEnumeration *e = NULL; | |
3160 int32_t n = -1; | |
3161 UBool isAvailable; | |
3162 char loc[256]; | |
3163 | |
3164 static const char* AVAIL[] = { "en", "de" }; | |
3165 | |
3166 static const char* KW[] = { "collation" }; | |
3167 | |
3168 static const char* KWVAL[] = { "phonebook", "stroke" }; | |
3169 | |
3170 #if !UCONFIG_NO_SERVICE | |
3171 e = ucol_openAvailableLocales(&ec); | |
3172 if (e != NULL) { | |
3173 assertSuccess("ucol_openAvailableLocales", &ec); | |
3174 assertTrue("ucol_openAvailableLocales!=0", e!=0); | |
3175 n = checkUEnumeration("ucol_openAvailableLocales", e, AVAIL, LEN(AVAIL))
; | |
3176 (void)n; /* Suppress set but not used warnings. */ | |
3177 /* Don't need to check n because we check list */ | |
3178 uenum_close(e); | |
3179 } else { | |
3180 log_data_err("Error calling ucol_openAvailableLocales() -> %s (Are you m
issing data?)\n", u_errorName(ec)); | |
3181 } | |
3182 #endif | |
3183 | |
3184 e = ucol_getKeywords(&ec); | |
3185 if (e != NULL) { | |
3186 assertSuccess("ucol_getKeywords", &ec); | |
3187 assertTrue("ucol_getKeywords!=0", e!=0); | |
3188 n = checkUEnumeration("ucol_getKeywords", e, KW, LEN(KW)); | |
3189 /* Don't need to check n because we check list */ | |
3190 uenum_close(e); | |
3191 } else { | |
3192 log_data_err("Error calling ucol_getKeywords() -> %s (Are you missing da
ta?)\n", u_errorName(ec)); | |
3193 } | |
3194 | |
3195 e = ucol_getKeywordValues(KW[0], &ec); | |
3196 if (e != NULL) { | |
3197 assertSuccess("ucol_getKeywordValues", &ec); | |
3198 assertTrue("ucol_getKeywordValues!=0", e!=0); | |
3199 n = checkUEnumeration("ucol_getKeywordValues", e, KWVAL, LEN(KWVAL)); | |
3200 /* Don't need to check n because we check list */ | |
3201 uenum_close(e); | |
3202 } else { | |
3203 log_data_err("Error calling ucol_getKeywordValues() -> %s (Are you missi
ng data?)\n", u_errorName(ec)); | |
3204 } | |
3205 | |
3206 /* Try setting a warning before calling ucol_getKeywordValues */ | |
3207 ec = U_USING_FALLBACK_WARNING; | |
3208 e = ucol_getKeywordValues(KW[0], &ec); | |
3209 if (assertSuccess("ucol_getKeywordValues [with warning code set]", &ec)) { | |
3210 assertTrue("ucol_getKeywordValues!=0 [with warning code set]", e!=0); | |
3211 n = checkUEnumeration("ucol_getKeywordValues [with warning code set]", e
, KWVAL, LEN(KWVAL)); | |
3212 /* Don't need to check n because we check list */ | |
3213 uenum_close(e); | |
3214 } | |
3215 | |
3216 /* | |
3217 U_DRAFT int32_t U_EXPORT2 | |
3218 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, | |
3219 const char* locale, UBool* isAvailable, | |
3220 UErrorCode* status); | |
3221 } | |
3222 */ | |
3223 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de", | |
3224 &isAvailable, &ec); | |
3225 if (assertSuccess("getFunctionalEquivalent", &ec)) { | |
3226 assertEquals("getFunctionalEquivalent(de)", "root", loc); | |
3227 assertTrue("getFunctionalEquivalent(de).isAvailable==TRUE", | |
3228 isAvailable == TRUE); | |
3229 } | |
3230 | |
3231 n = ucol_getFunctionalEquivalent(loc, sizeof(loc), "collation", "de_DE", | |
3232 &isAvailable, &ec); | |
3233 if (assertSuccess("getFunctionalEquivalent", &ec)) { | |
3234 assertEquals("getFunctionalEquivalent(de_DE)", "root", loc); | |
3235 assertTrue("getFunctionalEquivalent(de_DE).isAvailable==FALSE", | |
3236 isAvailable == FALSE); | |
3237 } | |
3238 } | |
3239 | |
3240 /* supercedes TestJ784 */ | |
3241 static void TestBeforePinyin(void) { | |
3242 const static char rules[] = { | |
3243 "&[before 2]A<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD<<
\\u00E0<<<\\u00C0" | |
3244 "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A<<
\\u00E8<<<\\u00C8" | |
3245 "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF<<
\\u00EC<<<\\u00CC" | |
3246 "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1<<
\\u00F2<<<\\u00D2" | |
3247 "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3<<
\\u00F9<<<\\u00D9" | |
3248 "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC<<<
\\u01DB<<\\u00FC" | |
3249 }; | |
3250 | |
3251 const static char *test[] = { | |
3252 "l\\u0101", | |
3253 "la", | |
3254 "l\\u0101n", | |
3255 "lan ", | |
3256 "l\\u0113", | |
3257 "le", | |
3258 "l\\u0113n", | |
3259 "len" | |
3260 }; | |
3261 | |
3262 const static char *test2[] = { | |
3263 "x\\u0101", | |
3264 "x\\u0100", | |
3265 "X\\u0101", | |
3266 "X\\u0100", | |
3267 "x\\u00E1", | |
3268 "x\\u00C1", | |
3269 "X\\u00E1", | |
3270 "X\\u00C1", | |
3271 "x\\u01CE", | |
3272 "x\\u01CD", | |
3273 "X\\u01CE", | |
3274 "X\\u01CD", | |
3275 "x\\u00E0", | |
3276 "x\\u00C0", | |
3277 "X\\u00E0", | |
3278 "X\\u00C0", | |
3279 "xa", | |
3280 "xA", | |
3281 "Xa", | |
3282 "XA", | |
3283 "x\\u0101x", | |
3284 "x\\u0100x", | |
3285 "x\\u00E1x", | |
3286 "x\\u00C1x", | |
3287 "x\\u01CEx", | |
3288 "x\\u01CDx", | |
3289 "x\\u00E0x", | |
3290 "x\\u00C0x", | |
3291 "xax", | |
3292 "xAx" | |
3293 }; | |
3294 | |
3295 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); | |
3296 genericLocaleStarter("zh", test, sizeof(test)/sizeof(test[0])); | |
3297 genericRulesStarter(rules, test2, sizeof(test2)/sizeof(test2[0])); | |
3298 genericLocaleStarter("zh", test2, sizeof(test2)/sizeof(test2[0])); | |
3299 } | |
3300 | |
3301 static void TestBeforeTightening(void) { | |
3302 static const struct { | |
3303 const char *rules; | |
3304 UErrorCode expectedStatus; | |
3305 } tests[] = { | |
3306 { "&[before 1]a<x", U_ZERO_ERROR }, | |
3307 { "&[before 1]a<<x", U_INVALID_FORMAT_ERROR }, | |
3308 { "&[before 1]a<<<x", U_INVALID_FORMAT_ERROR }, | |
3309 { "&[before 1]a=x", U_INVALID_FORMAT_ERROR }, | |
3310 { "&[before 2]a<x",U_INVALID_FORMAT_ERROR }, | |
3311 { "&[before 2]a<<x",U_ZERO_ERROR }, | |
3312 { "&[before 2]a<<<x",U_INVALID_FORMAT_ERROR }, | |
3313 { "&[before 2]a=x",U_INVALID_FORMAT_ERROR }, | |
3314 { "&[before 3]a<x",U_INVALID_FORMAT_ERROR }, | |
3315 { "&[before 3]a<<x",U_INVALID_FORMAT_ERROR }, | |
3316 { "&[before 3]a<<<x",U_ZERO_ERROR }, | |
3317 { "&[before 3]a=x",U_INVALID_FORMAT_ERROR }, | |
3318 { "&[before I]a = x",U_INVALID_FORMAT_ERROR } | |
3319 }; | |
3320 | |
3321 int32_t i = 0; | |
3322 | |
3323 UErrorCode status = U_ZERO_ERROR; | |
3324 UChar rlz[RULE_BUFFER_LEN] = { 0 }; | |
3325 uint32_t rlen = 0; | |
3326 | |
3327 UCollator *coll = NULL; | |
3328 | |
3329 | |
3330 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { | |
3331 rlen = u_unescape(tests[i].rules, rlz, RULE_BUFFER_LEN); | |
3332 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &statu
s); | |
3333 if(status != tests[i].expectedStatus) { | |
3334 log_err_status(status, "Opening a collator with rules %s returned er
ror code %s, expected %s\n", | |
3335 tests[i].rules, u_errorName(status), u_errorName(tests[i].expect
edStatus)); | |
3336 } | |
3337 ucol_close(coll); | |
3338 status = U_ZERO_ERROR; | |
3339 } | |
3340 | |
3341 } | |
3342 | |
3343 /* | |
3344 &m < a | |
3345 &[before 1] a < x <<< X << q <<< Q < z | |
3346 assert: m <<< M < x <<< X << q <<< Q < z < a < n | |
3347 | |
3348 &m < a | |
3349 &[before 2] a << x <<< X << q <<< Q < z | |
3350 assert: m <<< M < x <<< X << q <<< Q << a < z < n | |
3351 | |
3352 &m < a | |
3353 &[before 3] a <<< x <<< X << q <<< Q < z | |
3354 assert: m <<< M < x <<< X <<< a << q <<< Q < z < n | |
3355 | |
3356 | |
3357 &m << a | |
3358 &[before 1] a < x <<< X << q <<< Q < z | |
3359 assert: x <<< X << q <<< Q < z < m <<< M << a < n | |
3360 | |
3361 &m << a | |
3362 &[before 2] a << x <<< X << q <<< Q < z | |
3363 assert: m <<< M << x <<< X << q <<< Q << a < z < n | |
3364 | |
3365 &m << a | |
3366 &[before 3] a <<< x <<< X << q <<< Q < z | |
3367 assert: m <<< M << x <<< X <<< a << q <<< Q < z < n | |
3368 | |
3369 | |
3370 &m <<< a | |
3371 &[before 1] a < x <<< X << q <<< Q < z | |
3372 assert: x <<< X << q <<< Q < z < n < m <<< a <<< M | |
3373 | |
3374 &m <<< a | |
3375 &[before 2] a << x <<< X << q <<< Q < z | |
3376 assert: x <<< X << q <<< Q << m <<< a <<< M < z < n | |
3377 | |
3378 &m <<< a | |
3379 &[before 3] a <<< x <<< X << q <<< Q < z | |
3380 assert: m <<< x <<< X <<< a <<< M << q <<< Q < z < n | |
3381 | |
3382 | |
3383 &[before 1] s < x <<< X << q <<< Q < z | |
3384 assert: r <<< R < x <<< X << q <<< Q < z < s < n | |
3385 | |
3386 &[before 2] s << x <<< X << q <<< Q < z | |
3387 assert: r <<< R < x <<< X << q <<< Q << s < z < n | |
3388 | |
3389 &[before 3] s <<< x <<< X << q <<< Q < z | |
3390 assert: r <<< R < x <<< X <<< s << q <<< Q < z < n | |
3391 | |
3392 | |
3393 &[before 1] \u24DC < x <<< X << q <<< Q < z | |
3394 assert: x <<< X << q <<< Q < z < n < m <<< \u24DC <<< M | |
3395 | |
3396 &[before 2] \u24DC << x <<< X << q <<< Q < z | |
3397 assert: x <<< X << q <<< Q << m <<< \u24DC <<< M < z < n | |
3398 | |
3399 &[before 3] \u24DC <<< x <<< X << q <<< Q < z | |
3400 assert: m <<< x <<< X <<< \u24DC <<< M << q <<< Q < z < n | |
3401 */ | |
3402 | |
3403 | |
3404 #if 0 | |
3405 /* requires features not yet supported */ | |
3406 static void TestMoreBefore(void) { | |
3407 static const struct { | |
3408 const char* rules; | |
3409 const char* order[16]; | |
3410 int32_t size; | |
3411 } tests[] = { | |
3412 { "&m < a &[before 1] a < x <<< X << q <<< Q < z", | |
3413 { "m","M","x","X","q","Q","z","a","n" }, 9}, | |
3414 { "&m < a &[before 2] a << x <<< X << q <<< Q < z", | |
3415 { "m","M","x","X","q","Q","a","z","n" }, 9}, | |
3416 { "&m < a &[before 3] a <<< x <<< X << q <<< Q < z", | |
3417 { "m","M","x","X","a","q","Q","z","n" }, 9}, | |
3418 { "&m << a &[before 1] a < x <<< X << q <<< Q < z", | |
3419 { "x","X","q","Q","z","m","M","a","n" }, 9}, | |
3420 { "&m << a &[before 2] a << x <<< X << q <<< Q < z", | |
3421 { "m","M","x","X","q","Q","a","z","n" }, 9}, | |
3422 { "&m << a &[before 3] a <<< x <<< X << q <<< Q < z", | |
3423 { "m","M","x","X","a","q","Q","z","n" }, 9}, | |
3424 { "&m <<< a &[before 1] a < x <<< X << q <<< Q < z", | |
3425 { "x","X","q","Q","z","n","m","a","M" }, 9}, | |
3426 { "&m <<< a &[before 2] a << x <<< X << q <<< Q < z", | |
3427 { "x","X","q","Q","m","a","M","z","n" }, 9}, | |
3428 { "&m <<< a &[before 3] a <<< x <<< X << q <<< Q < z", | |
3429 { "m","x","X","a","M","q","Q","z","n" }, 9}, | |
3430 { "&[before 1] s < x <<< X << q <<< Q < z", | |
3431 { "r","R","x","X","q","Q","z","s","n" }, 9}, | |
3432 { "&[before 2] s << x <<< X << q <<< Q < z", | |
3433 { "r","R","x","X","q","Q","s","z","n" }, 9}, | |
3434 { "&[before 3] s <<< x <<< X << q <<< Q < z", | |
3435 { "r","R","x","X","s","q","Q","z","n" }, 9}, | |
3436 { "&[before 1] \\u24DC < x <<< X << q <<< Q < z", | |
3437 { "x","X","q","Q","z","n","m","\\u24DC","M" }, 9}, | |
3438 { "&[before 2] \\u24DC << x <<< X << q <<< Q < z", | |
3439 { "x","X","q","Q","m","\\u24DC","M","z","n" }, 9}, | |
3440 { "&[before 3] \\u24DC <<< x <<< X << q <<< Q < z", | |
3441 { "m","x","X","\\u24DC","M","q","Q","z","n" }, 9} | |
3442 }; | |
3443 | |
3444 int32_t i = 0; | |
3445 | |
3446 for(i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) { | |
3447 genericRulesStarter(tests[i].rules, tests[i].order, tests[i].size); | |
3448 } | |
3449 } | |
3450 #endif | |
3451 | |
3452 static void TestTailorNULL( void ) { | |
3453 const static char* rule = "&a <<< '\\u0000'"; | |
3454 UErrorCode status = U_ZERO_ERROR; | |
3455 UChar rlz[RULE_BUFFER_LEN] = { 0 }; | |
3456 uint32_t rlen = 0; | |
3457 UChar a = 1, null = 0; | |
3458 UCollationResult res = UCOL_EQUAL; | |
3459 | |
3460 UCollator *coll = NULL; | |
3461 | |
3462 | |
3463 rlen = u_unescape(rule, rlz, RULE_BUFFER_LEN); | |
3464 coll = ucol_openRules(rlz, rlen, UCOL_DEFAULT, UCOL_DEFAULT,NULL, &status); | |
3465 | |
3466 if(U_FAILURE(status)) { | |
3467 log_err_status(status, "Could not open default collator! -> %s\n", u_err
orName(status)); | |
3468 } else { | |
3469 res = ucol_strcoll(coll, &a, 1, &null, 1); | |
3470 | |
3471 if(res != UCOL_LESS) { | |
3472 log_err("NULL was not tailored properly!\n"); | |
3473 } | |
3474 } | |
3475 | |
3476 ucol_close(coll); | |
3477 } | |
3478 | |
3479 static void | |
3480 TestUpperFirstQuaternary(void) | |
3481 { | |
3482 const char* tests[] = { "B", "b", "Bb", "bB" }; | |
3483 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_FIRST }; | |
3484 UColAttributeValue attVals[] = { UCOL_QUATERNARY, UCOL_UPPER_FIRST }; | |
3485 genericLocaleStarterWithOptions("root", tests, sizeof(tests)/sizeof(tests[0]),
att, attVals, sizeof(att)/sizeof(att[0])); | |
3486 } | |
3487 | |
3488 static void | |
3489 TestJ4960(void) | |
3490 { | |
3491 const char* tests[] = { "\\u00e2T", "aT" }; | |
3492 UColAttribute att[] = { UCOL_STRENGTH, UCOL_CASE_LEVEL }; | |
3493 UColAttributeValue attVals[] = { UCOL_PRIMARY, UCOL_ON }; | |
3494 const char* tests2[] = { "a", "A" }; | |
3495 const char* rule = "&[first tertiary ignorable]=A=a"; | |
3496 UColAttribute att2[] = { UCOL_CASE_LEVEL }; | |
3497 UColAttributeValue attVals2[] = { UCOL_ON }; | |
3498 /* Test whether we correctly ignore primary ignorables on case level when */ | |
3499 /* we have only primary & case level */ | |
3500 genericLocaleStarterWithOptionsAndResult("root", tests, sizeof(tests)/sizeof(t
ests[0]), att, attVals, sizeof(att)/sizeof(att[0]), UCOL_EQUAL); | |
3501 /* Test whether ICU4J will make case level for sortkeys that have primary stre
ngth */ | |
3502 /* and case level */ | |
3503 genericLocaleStarterWithOptions("root", tests2, sizeof(tests2)/sizeof(tests2[0
]), att, attVals, sizeof(att)/sizeof(att[0])); | |
3504 /* Test whether completely ignorable letters have case level info (they should
n't) */ | |
3505 genericRulesStarterWithOptionsAndResult(rule, tests2, sizeof(tests2)/sizeof(te
sts2[0]), att2, attVals2, sizeof(att2)/sizeof(att2[0]), UCOL_EQUAL); | |
3506 } | |
3507 | |
3508 static void | |
3509 TestJ5223(void) | |
3510 { | |
3511 static const char *test = "this is a test string"; | |
3512 UChar ustr[256]; | |
3513 int32_t ustr_length = u_unescape(test, ustr, 256); | |
3514 unsigned char sortkey[256]; | |
3515 int32_t sortkey_length; | |
3516 UErrorCode status = U_ZERO_ERROR; | |
3517 static UCollator *coll = NULL; | |
3518 coll = ucol_open("root", &status); | |
3519 if(U_FAILURE(status)) { | |
3520 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); | |
3521 return; | |
3522 } | |
3523 ucol_setStrength(coll, UCOL_PRIMARY); | |
3524 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); | |
3525 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
3526 if (U_FAILURE(status)) { | |
3527 log_err("Failed setting atributes\n"); | |
3528 return; | |
3529 } | |
3530 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, NULL, 0); | |
3531 if (sortkey_length > 256) return; | |
3532 | |
3533 /* we mark the position where the null byte should be written in advance */ | |
3534 sortkey[sortkey_length-1] = 0xAA; | |
3535 | |
3536 /* we set the buffer size one byte higher than needed */ | |
3537 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, | |
3538 sortkey_length+1); | |
3539 | |
3540 /* no error occurs (for me) */ | |
3541 if (sortkey[sortkey_length-1] == 0xAA) { | |
3542 log_err("Hit bug at first try\n"); | |
3543 } | |
3544 | |
3545 /* we mark the position where the null byte should be written again */ | |
3546 sortkey[sortkey_length-1] = 0xAA; | |
3547 | |
3548 /* this time we set the buffer size to the exact amount needed */ | |
3549 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, sortkey, | |
3550 sortkey_length); | |
3551 | |
3552 /* now the trailing null byte is not written */ | |
3553 if (sortkey[sortkey_length-1] == 0xAA) { | |
3554 log_err("Hit bug at second try\n"); | |
3555 } | |
3556 | |
3557 ucol_close(coll); | |
3558 } | |
3559 | |
3560 /* Regression test for Thai partial sort key problem */ | |
3561 static void | |
3562 TestJ5232(void) | |
3563 { | |
3564 const static char *test[] = { | |
3565 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e47\\u0e21", | |
3566 "\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e40\\u0e25\\u0e48\\u0e21" | |
3567 }; | |
3568 | |
3569 genericLocaleStarter("th", test, sizeof(test)/sizeof(test[0])); | |
3570 } | |
3571 | |
3572 static void | |
3573 TestJ5367(void) | |
3574 { | |
3575 const static char *test[] = { "a", "y" }; | |
3576 const char* rules = "&Ny << Y &[first secondary ignorable] <<< a"; | |
3577 genericRulesStarter(rules, test, sizeof(test)/sizeof(test[0])); | |
3578 } | |
3579 | |
3580 static void | |
3581 TestVI5913(void) | |
3582 { | |
3583 UErrorCode status = U_ZERO_ERROR; | |
3584 int32_t i, j; | |
3585 UCollator *coll =NULL; | |
3586 uint8_t resColl[100], expColl[100]; | |
3587 int32_t rLen, tLen, ruleLen, sLen, kLen; | |
3588 UChar rule[256]={0x26, 0x62, 0x3c, 0x1FF3, 0}; /* &b<0x1FF3-omega with Ypog
egrammeni*/ | |
3589 UChar rule2[256]={0x26, 0x7a, 0x3c, 0x0161, 0}; /* &z<s with caron*/ | |
3590 /* | |
3591 * Note: Just tailoring &z<ae^ does not work as expected: | |
3592 * The UCA spec requires for discontiguous contractions that they | |
3593 * extend an *existing match* by one combining mark at a time. | |
3594 * Therefore, ae must be a contraction so that the builder finds | |
3595 * discontiguous contractions for ae^, for example with an intervening under
dot. | |
3596 * Only then do we get the expected tail closure with a\u1EC7, a\u1EB9\u0302
, etc. | |
3597 */ | |
3598 UChar rule3[256]={ | |
3599 0x26, 0x78, 0x3c, 0x61, 0x65, /* &x<ae */ | |
3600 0x26, 0x7a, 0x3c, 0x0061, 0x00ea, /* &z<a+e with circumflex.*/ | |
3601 0}; | |
3602 static const UChar tData[][20]={ | |
3603 {0x1EAC, 0}, | |
3604 {0x0041, 0x0323, 0x0302, 0}, | |
3605 {0x1EA0, 0x0302, 0}, | |
3606 {0x00C2, 0x0323, 0}, | |
3607 {0x1ED8, 0}, /* O with dot and circumflex */ | |
3608 {0x1ECC, 0x0302, 0}, | |
3609 {0x1EB7, 0}, | |
3610 {0x1EA1, 0x0306, 0}, | |
3611 }; | |
3612 static const UChar tailorData[][20]={ | |
3613 {0x1FA2, 0}, /* Omega with 3 combining marks */ | |
3614 {0x03C9, 0x0313, 0x0300, 0x0345, 0}, | |
3615 {0x1FF3, 0x0313, 0x0300, 0}, | |
3616 {0x1F60, 0x0300, 0x0345, 0}, | |
3617 {0x1F62, 0x0345, 0}, | |
3618 {0x1FA0, 0x0300, 0}, | |
3619 }; | |
3620 static const UChar tailorData2[][20]={ | |
3621 {0x1E63, 0x030C, 0}, /* s with dot below + caron */ | |
3622 {0x0073, 0x0323, 0x030C, 0}, | |
3623 {0x0073, 0x030C, 0x0323, 0}, | |
3624 }; | |
3625 static const UChar tailorData3[][20]={ | |
3626 {0x007a, 0}, /* z */ | |
3627 {0x0061, 0x0065, 0}, /* a + e */ | |
3628 {0x0061, 0x00ea, 0}, /* a + e with circumflex */ | |
3629 {0x0061, 0x1EC7, 0}, /* a+ e with dot below and circumflex */ | |
3630 {0x0061, 0x1EB9, 0x0302, 0}, /* a + e with dot below + combining circumf
lex */ | |
3631 {0x0061, 0x00EA, 0x0323, 0}, /* a + e with circumflex + combining dot b
elow */ | |
3632 {0x00EA, 0x0323, 0}, /* e with circumflex + combining dot below */ | |
3633 {0x00EA, 0}, /* e with circumflex */ | |
3634 }; | |
3635 | |
3636 /* Test Vietnamese sort. */ | |
3637 coll = ucol_open("vi", &status); | |
3638 if(U_FAILURE(status)) { | |
3639 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(sta
tus)); | |
3640 return; | |
3641 } | |
3642 log_verbose("\n\nVI collation:"); | |
3643 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[2], u_strlen(tDat
a[2])) ) { | |
3644 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); | |
3645 } | |
3646 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[3], u_strlen(tDat
a[3])) ) { | |
3647 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); | |
3648 } | |
3649 if ( !ucol_equal(coll, tData[5], u_strlen(tData[5]), tData[4], u_strlen(tDat
a[4])) ) { | |
3650 log_err("\\u1ED8 not equals to \\u1ECC+\\u0302\n"); | |
3651 } | |
3652 if ( !ucol_equal(coll, tData[7], u_strlen(tData[7]), tData[6], u_strlen(tDat
a[6])) ) { | |
3653 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); | |
3654 } | |
3655 | |
3656 for (j=0; j<8; j++) { | |
3657 tLen = u_strlen(tData[j]); | |
3658 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); | |
3659 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); | |
3660 for(i = 0; i<rLen; i++) { | |
3661 log_verbose(" %02X", resColl[i]); | |
3662 } | |
3663 } | |
3664 | |
3665 ucol_close(coll); | |
3666 | |
3667 /* Test Romanian sort. */ | |
3668 coll = ucol_open("ro", &status); | |
3669 log_verbose("\n\nRO collation:"); | |
3670 if ( !ucol_equal(coll, tData[0], u_strlen(tData[0]), tData[1], u_strlen(tDat
a[1])) ) { | |
3671 log_err("\\u1EAC not equals to \\u1EA0+\\u0302\n"); | |
3672 } | |
3673 if ( !ucol_equal(coll, tData[4], u_strlen(tData[4]), tData[5], u_strlen(tDat
a[5])) ) { | |
3674 log_err("\\u1EAC not equals to \\u00c2+\\u0323\n"); | |
3675 } | |
3676 if ( !ucol_equal(coll, tData[6], u_strlen(tData[6]), tData[7], u_strlen(tDat
a[7])) ) { | |
3677 log_err("\\u1EB7 not equals to \\u1EA1+\\u0306\n"); | |
3678 } | |
3679 | |
3680 for (j=4; j<8; j++) { | |
3681 tLen = u_strlen(tData[j]); | |
3682 log_verbose("\n Data :%s \tlen: %d key: ", tData[j], tLen); | |
3683 rLen = ucol_getSortKey(coll, tData[j], tLen, resColl, 100); | |
3684 for(i = 0; i<rLen; i++) { | |
3685 log_verbose(" %02X", resColl[i]); | |
3686 } | |
3687 } | |
3688 ucol_close(coll); | |
3689 | |
3690 /* Test the precomposed Greek character with 3 combining marks. */ | |
3691 log_verbose("\n\nTailoring test: Greek character with 3 combining marks"); | |
3692 ruleLen = u_strlen(rule); | |
3693 coll = ucol_openRules(rule, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status); | |
3694 if (U_FAILURE(status)) { | |
3695 log_err("ucol_openRules failed with %s\n", u_errorName(status)); | |
3696 return; | |
3697 } | |
3698 sLen = u_strlen(tailorData[0]); | |
3699 for (j=1; j<6; j++) { | |
3700 tLen = u_strlen(tailorData[j]); | |
3701 if ( !ucol_equal(coll, tailorData[0], sLen, tailorData[j], tLen)) { | |
3702 log_err("\n \\u1FA2 not equals to data[%d]:%s\n", j, tailorData[j]); | |
3703 } | |
3704 } | |
3705 /* Test getSortKey. */ | |
3706 tLen = u_strlen(tailorData[0]); | |
3707 kLen=ucol_getSortKey(coll, tailorData[0], tLen, expColl, 100); | |
3708 for (j=0; j<6; j++) { | |
3709 tLen = u_strlen(tailorData[j]); | |
3710 rLen = ucol_getSortKey(coll, tailorData[j], tLen, resColl, 100); | |
3711 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=
0 ) { | |
3712 log_err("\n Data[%d] :%s \tlen: %d key: ", j, tailorData[j], tLen); | |
3713 for(i = 0; i<rLen; i++) { | |
3714 log_err(" %02X", resColl[i]); | |
3715 } | |
3716 } | |
3717 } | |
3718 ucol_close(coll); | |
3719 | |
3720 log_verbose("\n\nTailoring test for s with caron:"); | |
3721 ruleLen = u_strlen(rule2); | |
3722 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | |
3723 tLen = u_strlen(tailorData2[0]); | |
3724 kLen=ucol_getSortKey(coll, tailorData2[0], tLen, expColl, 100); | |
3725 for (j=1; j<3; j++) { | |
3726 tLen = u_strlen(tailorData2[j]); | |
3727 rLen = ucol_getSortKey(coll, tailorData2[j], tLen, resColl, 100); | |
3728 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=
0 ) { | |
3729 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, tailo
rData[j], tLen); | |
3730 for(i = 0; i<rLen; i++) { | |
3731 log_err(" %02X", resColl[i]); | |
3732 } | |
3733 } | |
3734 } | |
3735 ucol_close(coll); | |
3736 | |
3737 log_verbose("\n\nTailoring test for &z< ae with circumflex:"); | |
3738 ruleLen = u_strlen(rule3); | |
3739 coll = ucol_openRules(rule3, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | |
3740 tLen = u_strlen(tailorData3[3]); | |
3741 kLen=ucol_getSortKey(coll, tailorData3[3], tLen, expColl, 100); | |
3742 log_verbose("\n Test Data[3] :%s \tlen: %d key: ", aescstrdup(tailorData3[3
], tLen), tLen); | |
3743 for(i = 0; i<kLen; i++) { | |
3744 log_verbose(" %02X", expColl[i]); | |
3745 } | |
3746 for (j=4; j<6; j++) { | |
3747 tLen = u_strlen(tailorData3[j]); | |
3748 rLen = ucol_getSortKey(coll, tailorData3[j], tLen, resColl, 100); | |
3749 | |
3750 if ( kLen!=rLen || uprv_memcmp(expColl, resColl, rLen*sizeof(uint8_t))!=
0 ) { | |
3751 log_err("\n After tailoring Data[%d] :%s \tlen: %d key: ", j, aescs
trdup(tailorData3[j], tLen), tLen); | |
3752 for(i = 0; i<rLen; i++) { | |
3753 log_err(" %02X", resColl[i]); | |
3754 } | |
3755 } | |
3756 | |
3757 log_verbose("\n Test Data[%d] :%s \tlen: %d key: ", j, aescstrdup(tailo
rData3[j], tLen), tLen); | |
3758 for(i = 0; i<rLen; i++) { | |
3759 log_verbose(" %02X", resColl[i]); | |
3760 } | |
3761 } | |
3762 ucol_close(coll); | |
3763 } | |
3764 | |
3765 static void | |
3766 TestTailor6179(void) | |
3767 { | |
3768 UErrorCode status = U_ZERO_ERROR; | |
3769 int32_t i; | |
3770 UCollator *coll =NULL; | |
3771 uint8_t resColl[100]; | |
3772 int32_t rLen, tLen, ruleLen; | |
3773 /* &[last primary ignorable]<< a &[first primary ignorable]<<b */ | |
3774 static const UChar rule1[]={ | |
3775 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x72,0x7
9, | |
3776 0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x2
0,0x61,0x20, | |
3777 0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x70,0x72,0x69,0x6D,0x61,0x7
2,0x79,0x20, | |
3778 0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3C,0x3C,0x62,0x2
0, 0}; | |
3779 /* &[last secondary ignorable]<<< a &[first secondary ignorable]<<<b */ | |
3780 static const UChar rule2[]={ | |
3781 0x26,0x5B,0x6C,0x61,0x73,0x74,0x20,0x73,0x65,0x63,0x6F,0x6E,0x64,0x6
1, | |
3782 0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x65,0x5D,0x3
C,0x3C,0x3C, | |
3783 0x61,0x20,0x26,0x5B,0x66,0x69,0x72,0x73,0x74,0x20,0x73,0x65,0x63,0x6
F,0x6E, | |
3784 0x64,0x61,0x72,0x79,0x20,0x69,0x67,0x6E,0x6F,0x72,0x61,0x62,0x6C,0x6
5,0x5D,0x3C, | |
3785 0x3C,0x3C,0x20,0x62,0}; | |
3786 | |
3787 static const UChar tData1[][4]={ | |
3788 {0x61, 0}, | |
3789 {0x62, 0}, | |
3790 { 0xFDD0,0x009E, 0} | |
3791 }; | |
3792 static const UChar tData2[][4]={ | |
3793 {0x61, 0}, | |
3794 {0x62, 0}, | |
3795 { 0xFDD0,0x009E, 0} | |
3796 }; | |
3797 | |
3798 /* | |
3799 * These values from FractionalUCA.txt will change, | |
3800 * and need to be updated here. | |
3801 * TODO: Make this not check for particular sort keys. | |
3802 * Instead, test that we get CEs before & after other ignorables; see ticket
#6179. | |
3803 */ | |
3804 static const uint8_t firstPrimaryIgnCE[]={1, 0x83, 1, 5, 0}; | |
3805 static const uint8_t lastPrimaryIgnCE[]={1, 0xFC, 1, 5, 0}; | |
3806 static const uint8_t firstSecondaryIgnCE[]={1, 1, 0xfe, 0}; | |
3807 static const uint8_t lastSecondaryIgnCE[]={1, 1, 0xff, 0}; | |
3808 | |
3809 UParseError parseError; | |
3810 | |
3811 /* Test [Last Primary ignorable] */ | |
3812 | |
3813 log_verbose("Tailoring test: &[last primary ignorable]<<a &[first primary i
gnorable]<<b\n"); | |
3814 ruleLen = u_strlen(rule1); | |
3815 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&status)
; | |
3816 if (U_FAILURE(status)) { | |
3817 log_err_status(status, "Tailoring test: &[last primary ignorable] failed
! -> %s\n", u_errorName(status)); | |
3818 return; | |
3819 } | |
3820 tLen = u_strlen(tData1[0]); | |
3821 rLen = ucol_getSortKey(coll, tData1[0], tLen, resColl, 100); | |
3822 if (rLen != LEN(lastPrimaryIgnCE) || uprv_memcmp(resColl, lastPrimaryIgnCE,
rLen) != 0) { | |
3823 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 0,
tData1[0], rLen); | |
3824 for(i = 0; i<rLen; i++) { | |
3825 log_err(" %02X", resColl[i]); | |
3826 } | |
3827 log_err("\n"); | |
3828 } | |
3829 tLen = u_strlen(tData1[1]); | |
3830 rLen = ucol_getSortKey(coll, tData1[1], tLen, resColl, 100); | |
3831 if (rLen != LEN(firstPrimaryIgnCE) || uprv_memcmp(resColl, firstPrimaryIgnCE
, rLen) != 0) { | |
3832 log_err("Bad result for &[lpi]<<a...: Data[%d] :%s \tlen: %d key: ", 1,
tData1[1], rLen); | |
3833 for(i = 0; i<rLen; i++) { | |
3834 log_err(" %02X", resColl[i]); | |
3835 } | |
3836 log_err("\n"); | |
3837 } | |
3838 ucol_close(coll); | |
3839 | |
3840 | |
3841 /* Test [Last Secondary ignorable] */ | |
3842 log_verbose("Tailoring test: &[last secondary ignorable]<<<a &[first second
ary ignorable]<<<b\n"); | |
3843 ruleLen = u_strlen(rule2); | |
3844 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, &parseError,
&status); | |
3845 if (U_FAILURE(status)) { | |
3846 log_err("Tailoring test: &[last secondary ignorable] failed! -> %s\n", u
_errorName(status)); | |
3847 log_info(" offset=%d \"%s\" | \"%s\"\n", | |
3848 parseError.offset, aescstrdup(parseError.preContext, -1), aescs
trdup(parseError.postContext, -1)); | |
3849 return; | |
3850 } | |
3851 tLen = u_strlen(tData2[0]); | |
3852 rLen = ucol_getSortKey(coll, tData2[0], tLen, resColl, 100); | |
3853 if (rLen != LEN(lastSecondaryIgnCE) || uprv_memcmp(resColl, lastSecondaryIgn
CE, rLen) != 0) { | |
3854 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 0
, tData2[0], rLen); | |
3855 for(i = 0; i<rLen; i++) { | |
3856 log_err(" %02X", resColl[i]); | |
3857 } | |
3858 log_err("\n"); | |
3859 } | |
3860 tLen = u_strlen(tData2[1]); | |
3861 rLen = ucol_getSortKey(coll, tData2[1], tLen, resColl, 100); | |
3862 if (rLen != LEN(firstSecondaryIgnCE) || uprv_memcmp(resColl, firstSecondaryI
gnCE, rLen) != 0) { | |
3863 log_err("Bad result for &[lsi]<<<a...: Data[%d] :%s \tlen: %d key: ", 1,
tData2[1], rLen); | |
3864 for(i = 0; i<rLen; i++) { | |
3865 log_err(" %02X", resColl[i]); | |
3866 } | |
3867 log_err("\n"); | |
3868 } | |
3869 ucol_close(coll); | |
3870 } | |
3871 | |
3872 static void | |
3873 TestUCAPrecontext(void) | |
3874 { | |
3875 UErrorCode status = U_ZERO_ERROR; | |
3876 int32_t i, j; | |
3877 UCollator *coll =NULL; | |
3878 uint8_t resColl[100], prevColl[100]; | |
3879 int32_t rLen, tLen, ruleLen; | |
3880 UChar rule1[256]= {0x26, 0xb7, 0x3c, 0x61, 0}; /* & middle-dot < a */ | |
3881 UChar rule2[256]= {0x26, 0x4C, 0xb7, 0x3c, 0x3c, 0x61, 0}; | |
3882 /* & l middle-dot << a a is an expansion. */ | |
3883 | |
3884 UChar tData1[][20]={ | |
3885 { 0xb7, 0}, /* standalone middle dot(0xb7) */ | |
3886 { 0x387, 0}, /* standalone middle dot(0x387) */ | |
3887 { 0x61, 0}, /* a */ | |
3888 { 0x6C, 0}, /* l */ | |
3889 { 0x4C, 0x0332, 0}, /* l with [first primary ignorable] */ | |
3890 { 0x6C, 0xb7, 0}, /* l with middle dot(0xb7) */ | |
3891 { 0x6C, 0x387, 0}, /* l with middle dot(0x387) */ | |
3892 { 0x4C, 0xb7, 0}, /* L with middle dot(0xb7) */ | |
3893 { 0x4C, 0x387, 0}, /* L with middle dot(0x387) */ | |
3894 { 0x6C, 0x61, 0x387, 0}, /* la with middle dot(0x387) */ | |
3895 { 0x4C, 0x61, 0xb7, 0}, /* La with middle dot(0xb7) */ | |
3896 }; | |
3897 | |
3898 log_verbose("\n\nEN collation:"); | |
3899 coll = ucol_open("en", &status); | |
3900 if (U_FAILURE(status)) { | |
3901 log_err_status(status, "Tailoring test: &z <<a|- failed! -> %s\n", u_err
orName(status)); | |
3902 return; | |
3903 } | |
3904 for (j=0; j<11; j++) { | |
3905 tLen = u_strlen(tData1[j]); | |
3906 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); | |
3907 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { | |
3908 log_err("\n Expecting greater key than previous test case: Data[%d]
:%s.", | |
3909 j, tData1[j]); | |
3910 } | |
3911 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); | |
3912 for(i = 0; i<rLen; i++) { | |
3913 log_verbose(" %02X", resColl[i]); | |
3914 } | |
3915 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); | |
3916 } | |
3917 ucol_close(coll); | |
3918 | |
3919 | |
3920 log_verbose("\n\nJA collation:"); | |
3921 coll = ucol_open("ja", &status); | |
3922 if (U_FAILURE(status)) { | |
3923 log_err("Tailoring test: &z <<a|- failed!"); | |
3924 return; | |
3925 } | |
3926 for (j=0; j<11; j++) { | |
3927 tLen = u_strlen(tData1[j]); | |
3928 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); | |
3929 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { | |
3930 log_err("\n Expecting greater key than previous test case: Data[%d]
:%s.", | |
3931 j, tData1[j]); | |
3932 } | |
3933 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); | |
3934 for(i = 0; i<rLen; i++) { | |
3935 log_verbose(" %02X", resColl[i]); | |
3936 } | |
3937 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); | |
3938 } | |
3939 ucol_close(coll); | |
3940 | |
3941 | |
3942 log_verbose("\n\nTailoring test: & middle dot < a "); | |
3943 ruleLen = u_strlen(rule1); | |
3944 coll = ucol_openRules(rule1, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&statu
s); | |
3945 if (U_FAILURE(status)) { | |
3946 log_err("Tailoring test: & middle dot < a failed!"); | |
3947 return; | |
3948 } | |
3949 for (j=0; j<11; j++) { | |
3950 tLen = u_strlen(tData1[j]); | |
3951 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); | |
3952 if ((j>0) && (strcmp((char *)resColl, (char *)prevColl)<0)) { | |
3953 log_err("\n Expecting greater key than previous test case: Data[%d
] :%s.", | |
3954 j, tData1[j]); | |
3955 } | |
3956 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); | |
3957 for(i = 0; i<rLen; i++) { | |
3958 log_verbose(" %02X", resColl[i]); | |
3959 } | |
3960 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); | |
3961 } | |
3962 ucol_close(coll); | |
3963 | |
3964 | |
3965 log_verbose("\n\nTailoring test: & l middle-dot << a "); | |
3966 ruleLen = u_strlen(rule2); | |
3967 coll = ucol_openRules(rule2, ruleLen, UCOL_OFF, UCOL_TERTIARY, NULL,&stat
us); | |
3968 if (U_FAILURE(status)) { | |
3969 log_err("Tailoring test: & l middle-dot << a failed!"); | |
3970 return; | |
3971 } | |
3972 for (j=0; j<11; j++) { | |
3973 tLen = u_strlen(tData1[j]); | |
3974 rLen = ucol_getSortKey(coll, tData1[j], tLen, resColl, 100); | |
3975 if ((j>0) && (j!=3) && (strcmp((char *)resColl, (char *)prevColl)<0))
{ | |
3976 log_err("\n Expecting greater key than previous test case: Data[%
d] :%s.", | |
3977 j, tData1[j]); | |
3978 } | |
3979 if ((j==3)&&(strcmp((char *)resColl, (char *)prevColl)>0)) { | |
3980 log_err("\n Expecting smaller key than previous test case: Data[%
d] :%s.", | |
3981 j, tData1[j]); | |
3982 } | |
3983 log_verbose("\n Data[%d] :%s \tlen: %d key: ", j, tData1[j], rLen); | |
3984 for(i = 0; i<rLen; i++) { | |
3985 log_verbose(" %02X", resColl[i]); | |
3986 } | |
3987 uprv_memcpy(prevColl, resColl, sizeof(uint8_t)*(rLen+1)); | |
3988 } | |
3989 ucol_close(coll); | |
3990 } | |
3991 | |
3992 static void | |
3993 TestOutOfBuffer5468(void) | |
3994 { | |
3995 static const char *test = "\\u4e00"; | |
3996 UChar ustr[256]; | |
3997 int32_t ustr_length = u_unescape(test, ustr, 256); | |
3998 unsigned char shortKeyBuf[1]; | |
3999 int32_t sortkey_length; | |
4000 UErrorCode status = U_ZERO_ERROR; | |
4001 static UCollator *coll = NULL; | |
4002 | |
4003 coll = ucol_open("root", &status); | |
4004 if(U_FAILURE(status)) { | |
4005 log_err_status(status, "Couldn't open UCA -> %s\n", u_errorName(status)); | |
4006 return; | |
4007 } | |
4008 ucol_setStrength(coll, UCOL_PRIMARY); | |
4009 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); | |
4010 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
4011 if (U_FAILURE(status)) { | |
4012 log_err("Failed setting atributes\n"); | |
4013 return; | |
4014 } | |
4015 | |
4016 sortkey_length = ucol_getSortKey(coll, ustr, ustr_length, shortKeyBuf, sizeo
f(shortKeyBuf)); | |
4017 if (sortkey_length != 4) { | |
4018 log_err("expecting length of sortKey is 4 got:%d ", sortkey_length); | |
4019 } | |
4020 log_verbose("length of sortKey is %d", sortkey_length); | |
4021 ucol_close(coll); | |
4022 } | |
4023 | |
4024 #define TSKC_DATA_SIZE 5 | |
4025 #define TSKC_BUF_SIZE 50 | |
4026 static void | |
4027 TestSortKeyConsistency(void) | |
4028 { | |
4029 UErrorCode icuRC = U_ZERO_ERROR; | |
4030 UCollator* ucol; | |
4031 UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006, 0xFFFD}; | |
4032 | |
4033 uint8_t bufFull[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; | |
4034 uint8_t bufPart[TSKC_DATA_SIZE][TSKC_BUF_SIZE]; | |
4035 int32_t i, j, i2; | |
4036 | |
4037 ucol = ucol_openFromShortString("LEN_S4", FALSE, NULL, &icuRC); | |
4038 if (U_FAILURE(icuRC)) | |
4039 { | |
4040 log_err_status(icuRC, "ucol_openFromShortString failed -> %s\n", u_error
Name(icuRC)); | |
4041 return; | |
4042 } | |
4043 | |
4044 for (i = 0; i < TSKC_DATA_SIZE; i++) | |
4045 { | |
4046 UCharIterator uiter; | |
4047 uint32_t state[2] = { 0, 0 }; | |
4048 int32_t dataLen = i+1; | |
4049 for (j=0; j<TSKC_BUF_SIZE; j++) | |
4050 bufFull[i][j] = bufPart[i][j] = 0; | |
4051 | |
4052 /* Full sort key */ | |
4053 ucol_getSortKey(ucol, data, dataLen, bufFull[i], TSKC_BUF_SIZE); | |
4054 | |
4055 /* Partial sort key */ | |
4056 uiter_setString(&uiter, data, dataLen); | |
4057 ucol_nextSortKeyPart(ucol, &uiter, state, bufPart[i], TSKC_BUF_SIZE, &ic
uRC); | |
4058 if (U_FAILURE(icuRC)) | |
4059 { | |
4060 log_err("ucol_nextSortKeyPart failed\n"); | |
4061 ucol_close(ucol); | |
4062 return; | |
4063 } | |
4064 | |
4065 for (i2=0; i2<i; i2++) | |
4066 { | |
4067 UBool fullMatch = TRUE; | |
4068 UBool partMatch = TRUE; | |
4069 for (j=0; j<TSKC_BUF_SIZE; j++) | |
4070 { | |
4071 fullMatch = fullMatch && (bufFull[i][j] != bufFull[i2][j]); | |
4072 partMatch = partMatch && (bufPart[i][j] != bufPart[i2][j]); | |
4073 } | |
4074 if (fullMatch != partMatch) { | |
4075 log_err(fullMatch ? "full key was consistent, but partial key ch
anged\n" | |
4076 : "partial key was consistent, but full key ch
anged\n"); | |
4077 ucol_close(ucol); | |
4078 return; | |
4079 } | |
4080 } | |
4081 } | |
4082 | |
4083 /*=============================================*/ | |
4084 ucol_close(ucol); | |
4085 } | |
4086 | |
4087 /* ticket: 6101 */ | |
4088 static void TestCroatianSortKey(void) { | |
4089 const char* collString = "LHR_AN_CX_EX_FX_HX_NX_S3"; | |
4090 UErrorCode status = U_ZERO_ERROR; | |
4091 UCollator *ucol; | |
4092 UCharIterator iter; | |
4093 | |
4094 static const UChar text[] = { 0x0044, 0xD81A }; | |
4095 | |
4096 size_t length = sizeof(text)/sizeof(*text); | |
4097 | |
4098 uint8_t textSortKey[32]; | |
4099 size_t lenSortKey = 32; | |
4100 size_t actualSortKeyLen; | |
4101 uint32_t uStateInfo[2] = { 0, 0 }; | |
4102 | |
4103 ucol = ucol_openFromShortString(collString, FALSE, NULL, &status); | |
4104 if (U_FAILURE(status)) { | |
4105 log_err_status(status, "ucol_openFromShortString error in Craotian test.
-> %s\n", u_errorName(status)); | |
4106 return; | |
4107 } | |
4108 | |
4109 uiter_setString(&iter, text, length); | |
4110 | |
4111 actualSortKeyLen = ucol_nextSortKeyPart( | |
4112 ucol, &iter, (uint32_t*)uStateInfo, | |
4113 textSortKey, lenSortKey, &status | |
4114 ); | |
4115 | |
4116 if (actualSortKeyLen == lenSortKey) { | |
4117 log_err("ucol_nextSortKeyPart did not give correct result in Croatian te
st.\n"); | |
4118 } | |
4119 | |
4120 ucol_close(ucol); | |
4121 } | |
4122 | |
4123 /* ticket: 6140 */ | |
4124 /* This test ensures that codepoints such as 0x3099 are flagged correctly by the
collator since | |
4125 * they are both Hiragana and Katakana | |
4126 */ | |
4127 #define SORTKEYLEN 50 | |
4128 static void TestHiragana(void) { | |
4129 UErrorCode status = U_ZERO_ERROR; | |
4130 UCollator* ucol; | |
4131 UCollationResult strcollresult; | |
4132 UChar data1[] = { 0x3058, 0x30B8 }; /* Hiragana and Katakana letter Zi */ | |
4133 UChar data2[] = { 0x3057, 0x3099, 0x30B7, 0x3099 }; | |
4134 int32_t data1Len = sizeof(data1)/sizeof(*data1); | |
4135 int32_t data2Len = sizeof(data2)/sizeof(*data2); | |
4136 int32_t i, j; | |
4137 uint8_t sortKey1[SORTKEYLEN]; | |
4138 uint8_t sortKey2[SORTKEYLEN]; | |
4139 | |
4140 UCharIterator uiter1; | |
4141 UCharIterator uiter2; | |
4142 uint32_t state1[2] = { 0, 0 }; | |
4143 uint32_t state2[2] = { 0, 0 }; | |
4144 int32_t keySize1; | |
4145 int32_t keySize2; | |
4146 | |
4147 ucol = ucol_openFromShortString("LJA_AN_CX_EX_FX_HO_NX_S4", FALSE, NULL, | |
4148 &status); | |
4149 if (U_FAILURE(status)) { | |
4150 log_err_status(status, "Error status: %s; Unable to open collator from s
hort string.\n", u_errorName(status)); | |
4151 return; | |
4152 } | |
4153 | |
4154 /* Start of full sort keys */ | |
4155 /* Full sort key1 */ | |
4156 keySize1 = ucol_getSortKey(ucol, data1, data1Len, sortKey1, SORTKEYLEN); | |
4157 /* Full sort key2 */ | |
4158 keySize2 = ucol_getSortKey(ucol, data2, data2Len, sortKey2, SORTKEYLEN); | |
4159 if (keySize1 == keySize2) { | |
4160 for (i = 0; i < keySize1; i++) { | |
4161 if (sortKey1[i] != sortKey2[i]) { | |
4162 log_err("Full sort keys are different. Should be equal."); | |
4163 } | |
4164 } | |
4165 } else { | |
4166 log_err("Full sort keys sizes doesn't match: %d %d", keySize1, keySize2)
; | |
4167 } | |
4168 /* End of full sort keys */ | |
4169 | |
4170 /* Start of partial sort keys */ | |
4171 /* Partial sort key1 */ | |
4172 uiter_setString(&uiter1, data1, data1Len); | |
4173 keySize1 = ucol_nextSortKeyPart(ucol, &uiter1, state1, sortKey1, SORTKEYLEN,
&status); | |
4174 /* Partial sort key2 */ | |
4175 uiter_setString(&uiter2, data2, data2Len); | |
4176 keySize2 = ucol_nextSortKeyPart(ucol, &uiter2, state2, sortKey2, SORTKEYLEN,
&status); | |
4177 if (U_SUCCESS(status) && keySize1 == keySize2) { | |
4178 for (j = 0; j < keySize1; j++) { | |
4179 if (sortKey1[j] != sortKey2[j]) { | |
4180 log_err("Partial sort keys are different. Should be equal"); | |
4181 } | |
4182 } | |
4183 } else { | |
4184 log_err("Error Status: %s or Partial sort keys sizes doesn't match: %d %
d", u_errorName(status), keySize1, keySize2); | |
4185 } | |
4186 /* End of partial sort keys */ | |
4187 | |
4188 /* Start of strcoll */ | |
4189 /* Use ucol_strcoll() to determine ordering */ | |
4190 strcollresult = ucol_strcoll(ucol, data1, data1Len, data2, data2Len); | |
4191 if (strcollresult != UCOL_EQUAL) { | |
4192 log_err("Result from ucol_strcoll() should be UCOL_EQUAL."); | |
4193 } | |
4194 | |
4195 ucol_close(ucol); | |
4196 } | |
4197 | |
4198 /* Convenient struct for running collation tests */ | |
4199 typedef struct { | |
4200 const UChar source[MAX_TOKEN_LEN]; /* String on left */ | |
4201 const UChar target[MAX_TOKEN_LEN]; /* String on right */ | |
4202 UCollationResult result; /* -1, 0 or +1, depending on collation */ | |
4203 } OneTestCase; | |
4204 | |
4205 /* | |
4206 * Utility function to test one collation test case. | |
4207 * @param testcases Array of test cases. | |
4208 * @param n_testcases Size of the array testcases. | |
4209 * @param str_rules Array of rules. These rules should be specifying the same r
ule in different formats. | |
4210 * @param n_rules Size of the array str_rules. | |
4211 */ | |
4212 static void doTestOneTestCase(const OneTestCase testcases[], | |
4213 int n_testcases, | |
4214 const char* str_rules[], | |
4215 int n_rules) | |
4216 { | |
4217 int rule_no, testcase_no; | |
4218 UChar rule[500]; | |
4219 int32_t length = 0; | |
4220 UErrorCode status = U_ZERO_ERROR; | |
4221 UParseError parse_error; | |
4222 UCollator *myCollation; | |
4223 | |
4224 for (rule_no = 0; rule_no < n_rules; ++rule_no) { | |
4225 | |
4226 length = u_unescape(str_rules[rule_no], rule, 500); | |
4227 if (length == 0) { | |
4228 log_err("ERROR: The rule cannot be unescaped: %s\n"); | |
4229 return; | |
4230 } | |
4231 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er
ror, &status); | |
4232 if(U_FAILURE(status)){ | |
4233 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
4234 log_info(" offset=%d \"%s\" | \"%s\"\n", | |
4235 parse_error.offset, | |
4236 aescstrdup(parse_error.preContext, -1), | |
4237 aescstrdup(parse_error.postContext, -1)); | |
4238 return; | |
4239 } | |
4240 log_verbose("Testing the <<* syntax\n"); | |
4241 ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
4242 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
4243 for (testcase_no = 0; testcase_no < n_testcases; ++testcase_no) { | |
4244 doTest(myCollation, | |
4245 testcases[testcase_no].source, | |
4246 testcases[testcase_no].target, | |
4247 testcases[testcase_no].result | |
4248 ); | |
4249 } | |
4250 ucol_close(myCollation); | |
4251 } | |
4252 } | |
4253 | |
4254 const static OneTestCase rangeTestcases[] = { | |
4255 { {0x0061}, {0x0062}, UCOL
_LESS }, /* "a" < "b" */ | |
4256 { {0x0062}, {0x0063}, UCOL
_LESS }, /* "b" < "c" */ | |
4257 { {0x0061}, {0x0063}, UCOL
_LESS }, /* "a" < "c" */ | |
4258 | |
4259 { {0x0062}, {0x006b}, UCOL
_LESS }, /* "b" << "k" */ | |
4260 { {0x006b}, {0x006c}, UCOL
_LESS }, /* "k" << "l" */ | |
4261 { {0x0062}, {0x006c}, UCOL
_LESS }, /* "b" << "l" */ | |
4262 { {0x0061}, {0x006c}, UCOL
_LESS }, /* "a" < "l" */ | |
4263 { {0x0061}, {0x006d}, UCOL
_LESS }, /* "a" < "m" */ | |
4264 | |
4265 { {0x0079}, {0x006d}, UCOL
_LESS }, /* "y" < "f" */ | |
4266 { {0x0079}, {0x0067}, UCOL
_LESS }, /* "y" < "g" */ | |
4267 { {0x0061}, {0x0068}, UCOL
_LESS }, /* "y" < "h" */ | |
4268 { {0x0061}, {0x0065}, UCOL
_LESS }, /* "g" < "e" */ | |
4269 | |
4270 { {0x0061}, {0x0031}, UCOL
_EQUAL }, /* "a" = "1" */ | |
4271 { {0x0061}, {0x0032}, UCOL
_EQUAL }, /* "a" = "2" */ | |
4272 { {0x0061}, {0x0033}, UCOL
_EQUAL }, /* "a" = "3" */ | |
4273 { {0x0061}, {0x0066}, UCOL
_LESS }, /* "a" < "f" */ | |
4274 { {0x006c, 0x0061}, {0x006b, 0x0062}, UCOL
_LESS }, /* "la" < "123" */ | |
4275 { {0x0061, 0x0061, 0x0061}, {0x0031, 0x0032, 0x0033}, UCOL
_EQUAL }, /* "aaa" = "123" */ | |
4276 { {0x0062}, {0x007a}, UCOL
_LESS }, /* "b" < "z" */ | |
4277 { {0x0061, 0x007a, 0x0062}, {0x0032, 0x0079, 0x006d}, UCOL
_LESS }, /* "azm" = "2yc" */ | |
4278 }; | |
4279 | |
4280 static int nRangeTestcases = LEN(rangeTestcases); | |
4281 | |
4282 const static OneTestCase rangeTestcasesSupplemental[] = { | |
4283 { {0x4e00}, {0xfffb}, UCOL
_LESS }, /* U+4E00 < U+FFFB */ | |
4284 { {0xfffb}, {0xd800, 0xdc00}, UCOL
_LESS }, /* U+FFFB < U+10000 */ | |
4285 { {0xd800, 0xdc00}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+10000 < U+10001 */ | |
4286 { {0x4e00}, {0xd800, 0xdc01}, UCOL
_LESS }, /* U+4E00 < U+10001 */ | |
4287 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ | |
4288 { {0xd800, 0xdc01}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+10000 < U+10001 */ | |
4289 { {0x4e00}, {0xd800, 0xdc02}, UCOL
_LESS }, /* U+4E00 < U+10001 */ | |
4290 }; | |
4291 | |
4292 static int nRangeTestcasesSupplemental = LEN(rangeTestcasesSupplemental); | |
4293 | |
4294 const static OneTestCase rangeTestcasesQwerty[] = { | |
4295 { {0x0071}, {0x0077}, UCOL
_LESS }, /* "q" < "w" */ | |
4296 { {0x0077}, {0x0065}, UCOL
_LESS }, /* "w" < "e" */ | |
4297 | |
4298 { {0x0079}, {0x0075}, UCOL
_LESS }, /* "y" < "u" */ | |
4299 { {0x0071}, {0x0075}, UCOL
_LESS }, /* "q" << "u" */ | |
4300 | |
4301 { {0x0074}, {0x0069}, UCOL
_LESS }, /* "t" << "i" */ | |
4302 { {0x006f}, {0x0070}, UCOL
_LESS }, /* "o" << "p" */ | |
4303 | |
4304 { {0x0079}, {0x0065}, UCOL
_LESS }, /* "y" < "e" */ | |
4305 { {0x0069}, {0x0075}, UCOL
_LESS }, /* "i" < "u" */ | |
4306 | |
4307 { {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, | |
4308 {0x0077, 0x0065, 0x0072, 0x0065}, UCOL
_LESS }, /* "quest" < "were" */ | |
4309 { {0x0071, 0x0075, 0x0061, 0x0063, 0x006b}, | |
4310 {0x0071, 0x0075, 0x0065, 0x0073, 0x0074}, UCOL
_LESS }, /* "quack" < "quest" */ | |
4311 }; | |
4312 | |
4313 static int nRangeTestcasesQwerty = LEN(rangeTestcasesQwerty); | |
4314 | |
4315 static void TestSameStrengthList(void) | |
4316 { | |
4317 const char* strRules[] = { | |
4318 /* Normal */ | |
4319 "&a<b<c<d &b<<k<<l<<m &k<<<x<<<y<<<z &y<f<g<h<e &a=1=2=3", | |
4320 | |
4321 /* Lists */ | |
4322 "&a<*bcd &b<<*klm &k<<<*xyz &y<*fghe &a=*123", | |
4323 }; | |
4324 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); | |
4325 } | |
4326 | |
4327 static void TestSameStrengthListQuoted(void) | |
4328 { | |
4329 const char* strRules[] = { | |
4330 /* Lists with quoted characters */ | |
4331 "&\\u0061<*bcd &b<<*klm &k<<<*xyz &y<*f\\u0067\\u0068e &a=*123", | |
4332 "&'\\u0061'<*bcd &b<<*klm &k<<<*xyz &y<*f'\\u0067\\u0068'e &a=*123", | |
4333 | |
4334 "&\\u0061<*b\\u0063d &b<<*klm &k<<<*xyz &\\u0079<*fgh\\u0065 &a=*\\u0031\\u0
032\\u0033", | |
4335 "&'\\u0061'<*b'\\u0063'd &b<<*klm &k<<<*xyz &'\\u0079'<*fgh'\\u0065' &a=*'\\
u0031\\u0032\\u0033'", | |
4336 | |
4337 "&\\u0061<*\\u0062c\\u0064 &b<<*klm &k<<<*xyz &y<*fghe &a=*\\u0031\\u0032\\
u0033", | |
4338 "&'\\u0061'<*'\\u0062'c'\\u0064' &b<<*klm &k<<<*xyz &y<*fghe &a=*'\\u0031\\
u0032\\u0033'", | |
4339 }; | |
4340 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); | |
4341 } | |
4342 | |
4343 static void TestSameStrengthListSupplemental(void) | |
4344 { | |
4345 const char* strRules[] = { | |
4346 "&\\u4e00<\\ufffb<\\U00010000<\\U00010001<\\U00010002", | |
4347 "&\\u4e00<\\ufffb<\\ud800\\udc00<\\ud800\\udc01<\\ud800\\udc02", | |
4348 "&\\u4e00<*\\ufffb\\U00010000\\U00010001\\U00010002", | |
4349 "&\\u4e00<*\\ufffb\\ud800\\udc00\\ud800\\udc01\\ud800\\udc02", | |
4350 }; | |
4351 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); | |
4352 } | |
4353 | |
4354 static void TestSameStrengthListQwerty(void) | |
4355 { | |
4356 const char* strRules[] = { | |
4357 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ | |
4358 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ | |
4359 "&\\u0071<\\u0077<\\u0065<\\u0072 &\\u0077<<\\u0074<<\\u0079<<\\u0075 &\\u00
74<<<\\u0069<<<\\u006f<<<\\u0070 &\\u006f=\\u0061=\\u0073=\\u0064", | |
4360 "&'\\u0071'<\\u0077<\\u0065<\\u0072 &\\u0077<<'\\u0074'<<\\u0079<<\\u0075 &\
\u0074<<<\\u0069<<<'\\u006f'<<<\\u0070 &\\u006f=\\u0061='\\u0073'=\\u0064", | |
4361 "&\\u0071<*\\u0077\\u0065\\u0072 &\\u0077<<*\\u0074\\u0079\\u0075 &\\u0074<<
<*\\u0069\\u006f\\u0070 &\\u006f=*\\u0061\\u0073\\u0064", | |
4362 | |
4363 /* Quoted characters also will work if two quoted characters are not consecu
tive. */ | |
4364 "&\\u0071<*'\\u0077'\\u0065\\u0072 &\\u0077<<*\\u0074'\\u0079'\\u0075 &\\u00
74<<<*\\u0069\\u006f'\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064", | |
4365 | |
4366 /* Consecutive quoted charactes do not work, because a '' will be treated as
a quote character. */ | |
4367 /* "&\\u0071<*'\\u0077''\\u0065''\\u0072' &\\u0077<<*'\\u0074''\\u0079''\\u0
075' &\\u0074<<<*'\\u0069''\\u006f''\\u0070' &'\\u006f'=*\\u0061\\u0073\\u0064",
*/ | |
4368 | |
4369 }; | |
4370 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s
trRules)); | |
4371 } | |
4372 | |
4373 static void TestSameStrengthListQuotedQwerty(void) | |
4374 { | |
4375 const char* strRules[] = { | |
4376 "&q<w<e<r &w<<t<<y<<u &t<<<i<<<o<<<p &o=a=s=d", /* Normal */ | |
4377 "&q<*wer &w<<*tyu &t<<<*iop &o=*asd", /* Lists */ | |
4378 "&q<*w'e'r &w<<*'t'yu &t<<<*io'p' &o=*'a's'd'", /* Lists with quotes */ | |
4379 | |
4380 /* Lists with continuous quotes may not work, because '' will be treated as
a quote character. */ | |
4381 /* "&q<*'w''e''r' &w<<*'t''y''u' &t<<<*'i''o''p' &o=*'a''s''d'", */ | |
4382 }; | |
4383 doTestOneTestCase(rangeTestcasesQwerty, nRangeTestcasesQwerty, strRules, LEN(s
trRules)); | |
4384 } | |
4385 | |
4386 static void TestSameStrengthListRanges(void) | |
4387 { | |
4388 const char* strRules[] = { | |
4389 "&a<*b-d &b<<*k-m &k<<<*x-z &y<*f-he &a=*1-3", | |
4390 }; | |
4391 doTestOneTestCase(rangeTestcases, nRangeTestcases, strRules, LEN(strRules)); | |
4392 } | |
4393 | |
4394 static void TestSameStrengthListSupplementalRanges(void) | |
4395 { | |
4396 const char* strRules[] = { | |
4397 /* Note: U+FFFD..U+FFFF are not tailorable, so a range cannot include them.
*/ | |
4398 "&\\u4e00<*\\ufffb\\U00010000-\\U00010002", | |
4399 }; | |
4400 doTestOneTestCase(rangeTestcasesSupplemental, nRangeTestcasesSupplemental, str
Rules, LEN(strRules)); | |
4401 } | |
4402 | |
4403 static void TestSpecialCharacters(void) | |
4404 { | |
4405 const char* strRules[] = { | |
4406 /* Normal */ | |
4407 "&';'<'+'<','<'-'<'&'<'*'", | |
4408 | |
4409 /* List */ | |
4410 "&';'<*'+,-&*'", | |
4411 | |
4412 /* Range */ | |
4413 "&';'<*'+'-'-&*'", | |
4414 }; | |
4415 | |
4416 const static OneTestCase specialCharacterStrings[] = { | |
4417 { {0x003b}, {0x002b}, UCOL_LESS }, /* ; < + */ | |
4418 { {0x002b}, {0x002c}, UCOL_LESS }, /* + < , */ | |
4419 { {0x002c}, {0x002d}, UCOL_LESS }, /* , < - */ | |
4420 { {0x002d}, {0x0026}, UCOL_LESS }, /* - < & */ | |
4421 }; | |
4422 doTestOneTestCase(specialCharacterStrings, LEN(specialCharacterStrings), strRu
les, LEN(strRules)); | |
4423 } | |
4424 | |
4425 static void TestPrivateUseCharacters(void) | |
4426 { | |
4427 const char* strRules[] = { | |
4428 /* Normal */ | |
4429 "&'\\u5ea7'<'\\uE2D8'<'\\uE2D9'<'\\uE2DA'<'\\uE2DB'<'\\uE2DC'<'\\u4e8d'", | |
4430 "&\\u5ea7<\\uE2D8<\\uE2D9<\\uE2DA<\\uE2DB<\\uE2DC<\\u4e8d", | |
4431 }; | |
4432 | |
4433 const static OneTestCase privateUseCharacterStrings[] = { | |
4434 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, | |
4435 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, | |
4436 { {0xe2d9}, {0xe2da}, UCOL_LESS }, | |
4437 { {0xe2da}, {0xe2db}, UCOL_LESS }, | |
4438 { {0xe2db}, {0xe2dc}, UCOL_LESS }, | |
4439 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, | |
4440 }; | |
4441 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings),
strRules, LEN(strRules)); | |
4442 } | |
4443 | |
4444 static void TestPrivateUseCharactersInList(void) | |
4445 { | |
4446 const char* strRules[] = { | |
4447 /* List */ | |
4448 "&'\\u5ea7'<*'\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d'", | |
4449 /* "&'\\u5ea7'<*\\uE2D8'\\uE2D9\\uE2DA'\\uE2DB'\\uE2DC\\u4e8d'", */ | |
4450 "&\\u5ea7<*\\uE2D8\\uE2D9\\uE2DA\\uE2DB\\uE2DC\\u4e8d", | |
4451 }; | |
4452 | |
4453 const static OneTestCase privateUseCharacterStrings[] = { | |
4454 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, | |
4455 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, | |
4456 { {0xe2d9}, {0xe2da}, UCOL_LESS }, | |
4457 { {0xe2da}, {0xe2db}, UCOL_LESS }, | |
4458 { {0xe2db}, {0xe2dc}, UCOL_LESS }, | |
4459 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, | |
4460 }; | |
4461 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings),
strRules, LEN(strRules)); | |
4462 } | |
4463 | |
4464 static void TestPrivateUseCharactersInRange(void) | |
4465 { | |
4466 const char* strRules[] = { | |
4467 /* Range */ | |
4468 "&'\\u5ea7'<*'\\uE2D8'-'\\uE2DC\\u4e8d'", | |
4469 "&\\u5ea7<*\\uE2D8-\\uE2DC\\u4e8d", | |
4470 /* "&\\u5ea7<\\uE2D8'\\uE2D8'-'\\uE2D9'\\uE2DA-\\uE2DB\\uE2DC\\u4e8d", */ | |
4471 }; | |
4472 | |
4473 const static OneTestCase privateUseCharacterStrings[] = { | |
4474 { {0x5ea7}, {0xe2d8}, UCOL_LESS }, | |
4475 { {0xe2d8}, {0xe2d9}, UCOL_LESS }, | |
4476 { {0xe2d9}, {0xe2da}, UCOL_LESS }, | |
4477 { {0xe2da}, {0xe2db}, UCOL_LESS }, | |
4478 { {0xe2db}, {0xe2dc}, UCOL_LESS }, | |
4479 { {0xe2dc}, {0x4e8d}, UCOL_LESS }, | |
4480 }; | |
4481 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings),
strRules, LEN(strRules)); | |
4482 } | |
4483 | |
4484 static void TestInvalidListsAndRanges(void) | |
4485 { | |
4486 const char* invalidRules[] = { | |
4487 /* Range not in starred expression */ | |
4488 "&\\ufffe<\\uffff-\\U00010002", | |
4489 | |
4490 /* Range without start */ | |
4491 "&a<*-c", | |
4492 | |
4493 /* Range without end */ | |
4494 "&a<*b-", | |
4495 | |
4496 /* More than one hyphen */ | |
4497 "&a<*b-g-l", | |
4498 | |
4499 /* Range in the wrong order */ | |
4500 "&a<*k-b", | |
4501 | |
4502 }; | |
4503 | |
4504 UChar rule[500]; | |
4505 UErrorCode status = U_ZERO_ERROR; | |
4506 UParseError parse_error; | |
4507 int n_rules = LEN(invalidRules); | |
4508 int rule_no; | |
4509 int length; | |
4510 UCollator *myCollation; | |
4511 | |
4512 for (rule_no = 0; rule_no < n_rules; ++rule_no) { | |
4513 | |
4514 length = u_unescape(invalidRules[rule_no], rule, 500); | |
4515 if (length == 0) { | |
4516 log_err("ERROR: The rule cannot be unescaped: %s\n"); | |
4517 return; | |
4518 } | |
4519 myCollation = ucol_openRules(rule, length, UCOL_ON, UCOL_TERTIARY, &parse_er
ror, &status); | |
4520 (void)myCollation; /* Suppress set but not used warning. */ | |
4521 if(!U_FAILURE(status)){ | |
4522 log_err("ERROR: Could not cause a failure as expected: \n"); | |
4523 } | |
4524 status = U_ZERO_ERROR; | |
4525 } | |
4526 } | |
4527 | |
4528 /* | |
4529 * This test ensures that characters placed before a character in a different sc
ript have the same lead byte | |
4530 * in their collation key before and after script reordering. | |
4531 */ | |
4532 static void TestBeforeRuleWithScriptReordering(void) | |
4533 { | |
4534 UParseError error; | |
4535 UErrorCode status = U_ZERO_ERROR; | |
4536 UCollator *myCollation; | |
4537 char srules[500] = "&[before 1]\\u03b1 < \\u0e01"; | |
4538 UChar rules[500]; | |
4539 uint32_t rulesLength = 0; | |
4540 int32_t reorderCodes[1] = {USCRIPT_GREEK}; | |
4541 UCollationResult collResult; | |
4542 | |
4543 uint8_t baseKey[256]; | |
4544 uint32_t baseKeyLength; | |
4545 uint8_t beforeKey[256]; | |
4546 uint32_t beforeKeyLength; | |
4547 | |
4548 UChar base[] = { 0x03b1 }; /* base */ | |
4549 int32_t baseLen = sizeof(base)/sizeof(*base); | |
4550 | |
4551 UChar before[] = { 0x0e01 }; /* ko kai */ | |
4552 int32_t beforeLen = sizeof(before)/sizeof(*before); | |
4553 | |
4554 /*UChar *data[] = { before, base }; | |
4555 genericRulesStarter(srules, data, 2);*/ | |
4556 | |
4557 log_verbose("Testing the &[before 1] rule with [reorder grek]\n"); | |
4558 | |
4559 (void)beforeKeyLength; /* Suppress set but not used warnings. */ | |
4560 (void)baseKeyLength; | |
4561 | |
4562 /* build collator */ | |
4563 log_verbose("Testing the &[before 1] rule with [scriptReorder grek]\n"); | |
4564 | |
4565 rulesLength = u_unescape(srules, rules, LEN(rules)); | |
4566 myCollation = ucol_openRules(rules, rulesLength, UCOL_ON, UCOL_TERTIARY, &er
ror, &status); | |
4567 if(U_FAILURE(status)) { | |
4568 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
4569 return; | |
4570 } | |
4571 | |
4572 /* check collation results - before rule applied but not script reordering *
/ | |
4573 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); | |
4574 if (collResult != UCOL_GREATER) { | |
4575 log_err("Collation result not correct before script reordering = %d\n",
collResult); | |
4576 } | |
4577 | |
4578 /* check the lead byte of the collation keys before script reordering */ | |
4579 baseKeyLength = ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); | |
4580 beforeKeyLength = ucol_getSortKey(myCollation, before, beforeLen, beforeKey,
256); | |
4581 if (baseKey[0] != beforeKey[0]) { | |
4582 log_err("Different lead byte for sort keys using before rule and before sc
ript reordering. base character lead byte = %02x, before character lead byte = %
02x\n", baseKey[0], beforeKey[0]); | |
4583 } | |
4584 | |
4585 /* reorder the scripts */ | |
4586 ucol_setReorderCodes(myCollation, reorderCodes, 1, &status); | |
4587 if(U_FAILURE(status)) { | |
4588 log_err_status(status, "ERROR: while setting script order: %s\n", myErro
rName(status)); | |
4589 return; | |
4590 } | |
4591 | |
4592 /* check collation results - before rule applied and after script reordering
*/ | |
4593 collResult = ucol_strcoll(myCollation, base, baseLen, before, beforeLen); | |
4594 if (collResult != UCOL_GREATER) { | |
4595 log_err("Collation result not correct after script reordering = %d\n", c
ollResult); | |
4596 } | |
4597 | |
4598 /* check the lead byte of the collation keys after script reordering */ | |
4599 ucol_getSortKey(myCollation, base, baseLen, baseKey, 256); | |
4600 ucol_getSortKey(myCollation, before, beforeLen, beforeKey, 256); | |
4601 if (baseKey[0] != beforeKey[0]) { | |
4602 log_err("Different lead byte for sort keys using before fule and after s
cript reordering. base character lead byte = %02x, before character lead byte =
%02x\n", baseKey[0], beforeKey[0]); | |
4603 } | |
4604 | |
4605 ucol_close(myCollation); | |
4606 } | |
4607 | |
4608 /* | |
4609 * Test that in a primary-compressed sort key all bytes except the first one are
unchanged under script reordering. | |
4610 */ | |
4611 static void TestNonLeadBytesDuringCollationReordering(void) | |
4612 { | |
4613 UErrorCode status = U_ZERO_ERROR; | |
4614 UCollator *myCollation; | |
4615 int32_t reorderCodes[1] = {USCRIPT_GREEK}; | |
4616 | |
4617 uint8_t baseKey[256]; | |
4618 uint32_t baseKeyLength; | |
4619 uint8_t reorderKey[256]; | |
4620 uint32_t reorderKeyLength; | |
4621 | |
4622 UChar testString[] = { 0x03b1, 0x03b2, 0x03b3 }; | |
4623 | |
4624 uint32_t i; | |
4625 | |
4626 | |
4627 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | |
4628 | |
4629 /* build collator tertiary */ | |
4630 myCollation = ucol_open("", &status); | |
4631 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
4632 if(U_FAILURE(status)) { | |
4633 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
4634 return; | |
4635 } | |
4636 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba
seKey, 256); | |
4637 | |
4638 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); | |
4639 if(U_FAILURE(status)) { | |
4640 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName
(status)); | |
4641 return; | |
4642 } | |
4643 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString),
reorderKey, 256); | |
4644 | |
4645 if (baseKeyLength != reorderKeyLength) { | |
4646 log_err("Key lengths not the same during reordering.\n"); | |
4647 return; | |
4648 } | |
4649 | |
4650 for (i = 1; i < baseKeyLength; i++) { | |
4651 if (baseKey[i] != reorderKey[i]) { | |
4652 log_err("Collation key bytes not the same at position %d.\n", i); | |
4653 return; | |
4654 } | |
4655 } | |
4656 ucol_close(myCollation); | |
4657 | |
4658 /* build collator quaternary */ | |
4659 myCollation = ucol_open("", &status); | |
4660 ucol_setStrength(myCollation, UCOL_QUATERNARY); | |
4661 if(U_FAILURE(status)) { | |
4662 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
4663 return; | |
4664 } | |
4665 baseKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString), ba
seKey, 256); | |
4666 | |
4667 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); | |
4668 if(U_FAILURE(status)) { | |
4669 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName
(status)); | |
4670 return; | |
4671 } | |
4672 reorderKeyLength = ucol_getSortKey(myCollation, testString, LEN(testString),
reorderKey, 256); | |
4673 | |
4674 if (baseKeyLength != reorderKeyLength) { | |
4675 log_err("Key lengths not the same during reordering.\n"); | |
4676 return; | |
4677 } | |
4678 | |
4679 for (i = 1; i < baseKeyLength; i++) { | |
4680 if (baseKey[i] != reorderKey[i]) { | |
4681 log_err("Collation key bytes not the same at position %d.\n", i); | |
4682 return; | |
4683 } | |
4684 } | |
4685 ucol_close(myCollation); | |
4686 } | |
4687 | |
4688 /* | |
4689 * Test reordering API. | |
4690 */ | |
4691 static void TestReorderingAPI(void) | |
4692 { | |
4693 UErrorCode status = U_ZERO_ERROR; | |
4694 UCollator *myCollation; | |
4695 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | |
4696 int32_t duplicateReorderCodes[] = {USCRIPT_HIRAGANA, USCRIPT_GREEK, UCOL_REO
RDER_CODE_CURRENCY, USCRIPT_KATAKANA}; | |
4697 int32_t reorderCodesStartingWithDefault[] = {UCOL_REORDER_CODE_DEFAULT, USCR
IPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUNCTUATION}; | |
4698 int32_t reorderCodeNone = UCOL_REORDER_CODE_NONE; | |
4699 UCollationResult collResult; | |
4700 int32_t retrievedReorderCodesLength; | |
4701 int32_t retrievedReorderCodes[10]; | |
4702 UChar greekString[] = { 0x03b1 }; | |
4703 UChar punctuationString[] = { 0x203e }; | |
4704 int loopIndex; | |
4705 | |
4706 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | |
4707 | |
4708 /* build collator tertiary */ | |
4709 myCollation = ucol_open("", &status); | |
4710 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
4711 if(U_FAILURE(status)) { | |
4712 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
4713 return; | |
4714 } | |
4715 | |
4716 /* set the reorderding */ | |
4717 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); | |
4718 if (U_FAILURE(status)) { | |
4719 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName
(status)); | |
4720 return; | |
4721 } | |
4722 | |
4723 /* get the reordering */ | |
4724 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); | |
4725 if (status != U_BUFFER_OVERFLOW_ERROR) { | |
4726 log_err_status(status, "ERROR: getting error codes should have returned
U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); | |
4727 return; | |
4728 } | |
4729 status = U_ZERO_ERROR; | |
4730 if (retrievedReorderCodesLength != LEN(reorderCodes)) { | |
4731 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); | |
4732 return; | |
4733 } | |
4734 /* now let's really get it */ | |
4735 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo
rderCodes, LEN(retrievedReorderCodes), &status); | |
4736 if (U_FAILURE(status)) { | |
4737 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName
(status)); | |
4738 return; | |
4739 } | |
4740 if (retrievedReorderCodesLength != LEN(reorderCodes)) { | |
4741 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); | |
4742 return; | |
4743 } | |
4744 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | |
4745 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { | |
4746 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | |
4747 return; | |
4748 } | |
4749 } | |
4750 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); | |
4751 if (collResult != UCOL_LESS) { | |
4752 log_err_status(status, "ERROR: collation result should have been UCOL_LE
SS\n"); | |
4753 return; | |
4754 } | |
4755 | |
4756 /* clear the reordering */ | |
4757 ucol_setReorderCodes(myCollation, NULL, 0, &status); | |
4758 if (U_FAILURE(status)) { | |
4759 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myE
rrorName(status)); | |
4760 return; | |
4761 } | |
4762 | |
4763 /* get the reordering again */ | |
4764 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); | |
4765 if (retrievedReorderCodesLength != 0) { | |
4766 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, 0); | |
4767 return; | |
4768 } | |
4769 | |
4770 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); | |
4771 if (collResult != UCOL_GREATER) { | |
4772 log_err_status(status, "ERROR: collation result should have been UCOL_GR
EATER\n"); | |
4773 return; | |
4774 } | |
4775 | |
4776 /* clear the reordering using [NONE] */ | |
4777 ucol_setReorderCodes(myCollation, &reorderCodeNone, 1, &status); | |
4778 if (U_FAILURE(status)) { | |
4779 log_err_status(status, "ERROR: setting reorder codes to [NONE]: %s\n", m
yErrorName(status)); | |
4780 return; | |
4781 } | |
4782 | |
4783 /* get the reordering again */ | |
4784 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); | |
4785 if (retrievedReorderCodesLength != 0) { | |
4786 log_err_status(status, | |
4787 "ERROR: [NONE] retrieved reorder codes length was %d but
should have been 0\n", | |
4788 retrievedReorderCodesLength); | |
4789 return; | |
4790 } | |
4791 | |
4792 /* test for error condition on duplicate reorder codes */ | |
4793 ucol_setReorderCodes(myCollation, duplicateReorderCodes, LEN(duplicateReorde
rCodes), &status); | |
4794 if (!U_FAILURE(status)) { | |
4795 log_err_status(status, "ERROR: setting duplicate reorder codes did not g
enerate a failure\n"); | |
4796 return; | |
4797 } | |
4798 | |
4799 status = U_ZERO_ERROR; | |
4800 /* test for reorder codes after a reset code */ | |
4801 ucol_setReorderCodes(myCollation, reorderCodesStartingWithDefault, LEN(reord
erCodesStartingWithDefault), &status); | |
4802 if (!U_FAILURE(status)) { | |
4803 log_err_status(status, "ERROR: reorderd code sequence starting with defa
ult and having following codes didn't cause an error\n"); | |
4804 return; | |
4805 } | |
4806 | |
4807 ucol_close(myCollation); | |
4808 } | |
4809 | |
4810 /* | |
4811 * Test reordering API. | |
4812 */ | |
4813 static void TestReorderingAPIWithRuleCreatedCollator(void) | |
4814 { | |
4815 UErrorCode status = U_ZERO_ERROR; | |
4816 UCollator *myCollation; | |
4817 UChar rules[90]; | |
4818 static const int32_t rulesReorderCodes[2] = {USCRIPT_HAN, USCRIPT_GREEK}; | |
4819 static const int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REO
RDER_CODE_PUNCTUATION}; | |
4820 static const int32_t onlyDefault[1] = {UCOL_REORDER_CODE_DEFAULT}; | |
4821 UCollationResult collResult; | |
4822 int32_t retrievedReorderCodesLength; | |
4823 int32_t retrievedReorderCodes[10]; | |
4824 static const UChar greekString[] = { 0x03b1 }; | |
4825 static const UChar punctuationString[] = { 0x203e }; | |
4826 static const UChar hanString[] = { 0x65E5, 0x672C }; | |
4827 int loopIndex; | |
4828 | |
4829 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | |
4830 | |
4831 /* build collator from rules */ | |
4832 u_uastrcpy(rules, "[reorder Hani Grek]"); | |
4833 myCollation = ucol_openRules(rules, u_strlen(rules), UCOL_DEFAULT, UCOL_TERT
IARY, NULL, &status); | |
4834 if(U_FAILURE(status)) { | |
4835 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
4836 return; | |
4837 } | |
4838 | |
4839 /* get the reordering */ | |
4840 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo
rderCodes, LEN(retrievedReorderCodes), &status); | |
4841 if (U_FAILURE(status)) { | |
4842 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName
(status)); | |
4843 return; | |
4844 } | |
4845 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { | |
4846 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); | |
4847 return; | |
4848 } | |
4849 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | |
4850 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { | |
4851 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | |
4852 return; | |
4853 } | |
4854 } | |
4855 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), hanStr
ing, LEN(hanString)); | |
4856 if (collResult != UCOL_GREATER) { | |
4857 log_err_status(status, "ERROR: collation result should have been UCOL_GR
EATER\n"); | |
4858 return; | |
4859 } | |
4860 | |
4861 /* set the reordering */ | |
4862 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); | |
4863 if (U_FAILURE(status)) { | |
4864 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName
(status)); | |
4865 return; | |
4866 } | |
4867 | |
4868 /* get the reordering */ | |
4869 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); | |
4870 if (status != U_BUFFER_OVERFLOW_ERROR) { | |
4871 log_err_status(status, "ERROR: getting error codes should have returned
U_BUFFER_OVERFLOW_ERROR : %s\n", myErrorName(status)); | |
4872 return; | |
4873 } | |
4874 status = U_ZERO_ERROR; | |
4875 if (retrievedReorderCodesLength != LEN(reorderCodes)) { | |
4876 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); | |
4877 return; | |
4878 } | |
4879 /* now let's really get it */ | |
4880 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo
rderCodes, LEN(retrievedReorderCodes), &status); | |
4881 if (U_FAILURE(status)) { | |
4882 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName
(status)); | |
4883 return; | |
4884 } | |
4885 if (retrievedReorderCodesLength != LEN(reorderCodes)) { | |
4886 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); | |
4887 return; | |
4888 } | |
4889 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | |
4890 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { | |
4891 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | |
4892 return; | |
4893 } | |
4894 } | |
4895 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); | |
4896 if (collResult != UCOL_LESS) { | |
4897 log_err_status(status, "ERROR: collation result should have been UCOL_LE
SS\n"); | |
4898 return; | |
4899 } | |
4900 | |
4901 /* clear the reordering */ | |
4902 ucol_setReorderCodes(myCollation, NULL, 0, &status); | |
4903 if (U_FAILURE(status)) { | |
4904 log_err_status(status, "ERROR: setting reorder codes to NULL: %s\n", myE
rrorName(status)); | |
4905 return; | |
4906 } | |
4907 | |
4908 /* get the reordering again */ | |
4909 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, NULL, 0, &st
atus); | |
4910 if (retrievedReorderCodesLength != 0) { | |
4911 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, 0); | |
4912 return; | |
4913 } | |
4914 | |
4915 collResult = ucol_strcoll(myCollation, greekString, LEN(greekString), punctu
ationString, LEN(punctuationString)); | |
4916 if (collResult != UCOL_GREATER) { | |
4917 log_err_status(status, "ERROR: collation result should have been UCOL_GR
EATER\n"); | |
4918 return; | |
4919 } | |
4920 | |
4921 /* reset the reordering */ | |
4922 ucol_setReorderCodes(myCollation, onlyDefault, 1, &status); | |
4923 if (U_FAILURE(status)) { | |
4924 log_err_status(status, "ERROR: setting reorder codes to {default}: %s\n"
, myErrorName(status)); | |
4925 return; | |
4926 } | |
4927 retrievedReorderCodesLength = ucol_getReorderCodes(myCollation, retrievedReo
rderCodes, LEN(retrievedReorderCodes), &status); | |
4928 if (U_FAILURE(status)) { | |
4929 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName
(status)); | |
4930 return; | |
4931 } | |
4932 if (retrievedReorderCodesLength != LEN(rulesReorderCodes)) { | |
4933 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(rulesReorderCodes)); | |
4934 return; | |
4935 } | |
4936 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | |
4937 if (retrievedReorderCodes[loopIndex] != rulesReorderCodes[loopIndex]) { | |
4938 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | |
4939 return; | |
4940 } | |
4941 } | |
4942 | |
4943 ucol_close(myCollation); | |
4944 } | |
4945 | |
4946 static UBool containsExpectedScript(const int32_t scripts[], int32_t length, int
32_t expectedScript) { | |
4947 int32_t i; | |
4948 for (i = 0; i < length; ++i) { | |
4949 if (expectedScript == scripts[i]) { return TRUE; } | |
4950 } | |
4951 return FALSE; | |
4952 } | |
4953 | |
4954 static void TestEquivalentReorderingScripts(void) { | |
4955 // Beginning with ICU 55, collation reordering moves single scripts | |
4956 // rather than groups of scripts, | |
4957 // except where scripts share a range and sort primary-equal. | |
4958 UErrorCode status = U_ZERO_ERROR; | |
4959 int32_t equivalentScripts[100]; | |
4960 int32_t length; | |
4961 int i; | |
4962 int32_t prevScript; | |
4963 /* These scripts are expected to be equivalent. */ | |
4964 static const int32_t expectedScripts[] = { | |
4965 USCRIPT_HIRAGANA, | |
4966 USCRIPT_KATAKANA, | |
4967 USCRIPT_KATAKANA_OR_HIRAGANA | |
4968 }; | |
4969 | |
4970 equivalentScripts[0] = 0; | |
4971 length = ucol_getEquivalentReorderCodes( | |
4972 USCRIPT_GOTHIC, equivalentScripts, LEN(equivalentScripts), &status); | |
4973 if (U_FAILURE(status)) { | |
4974 log_err_status(status, "ERROR/Gothic: retrieving equivalent reorder code
s: %s\n", myErrorName(status)); | |
4975 return; | |
4976 } | |
4977 if (length != 1 || equivalentScripts[0] != USCRIPT_GOTHIC) { | |
4978 log_err("ERROR/Gothic: retrieved equivalent scripts wrong: " | |
4979 "length expected 1, was = %d; expected [%d] was [%d]\n", | |
4980 length, USCRIPT_GOTHIC, equivalentScripts[0]); | |
4981 } | |
4982 | |
4983 length = ucol_getEquivalentReorderCodes( | |
4984 USCRIPT_HIRAGANA, equivalentScripts, LEN(equivalentScripts), &status
); | |
4985 if (U_FAILURE(status)) { | |
4986 log_err_status(status, "ERROR/Hiragana: retrieving equivalent reorder co
des: %s\n", myErrorName(status)); | |
4987 return; | |
4988 } | |
4989 if (length != LEN(expectedScripts)) { | |
4990 log_err("ERROR/Hiragana: retrieved equivalent script length wrong: " | |
4991 "expected %d, was = %d\n", | |
4992 LEN(expectedScripts), length); | |
4993 } | |
4994 prevScript = -1; | |
4995 for (i = 0; i < length; ++i) { | |
4996 int32_t script = equivalentScripts[i]; | |
4997 if (script <= prevScript) { | |
4998 log_err("ERROR/Hiragana: equivalent scripts out of order at index %d
\n", i); | |
4999 } | |
5000 prevScript = script; | |
5001 } | |
5002 for (i = 0; i < LEN(expectedScripts); i++) { | |
5003 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { | |
5004 log_err("ERROR/Hiragana: equivalent scripts do not contain %d\n", | |
5005 expectedScripts[i]); | |
5006 } | |
5007 } | |
5008 | |
5009 length = ucol_getEquivalentReorderCodes( | |
5010 USCRIPT_KATAKANA, equivalentScripts, LEN(equivalentScripts), &status
); | |
5011 if (U_FAILURE(status)) { | |
5012 log_err_status(status, "ERROR/Katakana: retrieving equivalent reorder co
des: %s\n", myErrorName(status)); | |
5013 return; | |
5014 } | |
5015 if (length != LEN(expectedScripts)) { | |
5016 log_err("ERROR/Katakana: retrieved equivalent script length wrong: " | |
5017 "expected %d, was = %d\n", | |
5018 LEN(expectedScripts), length); | |
5019 } | |
5020 for (i = 0; i < LEN(expectedScripts); i++) { | |
5021 if (!containsExpectedScript(equivalentScripts, length, expectedScripts[i
])) { | |
5022 log_err("ERROR/Katakana: equivalent scripts do not contain %d\n", | |
5023 expectedScripts[i]); | |
5024 } | |
5025 } | |
5026 | |
5027 length = ucol_getEquivalentReorderCodes( | |
5028 USCRIPT_KATAKANA_OR_HIRAGANA, equivalentScripts, LEN(equivalentScrip
ts), &status); | |
5029 if (U_FAILURE(status) || length != LEN(expectedScripts)) { | |
5030 log_err("ERROR/Hrkt: retrieved equivalent script length wrong: " | |
5031 "expected %d, was = %d\n", | |
5032 LEN(expectedScripts), length); | |
5033 } | |
5034 | |
5035 length = ucol_getEquivalentReorderCodes( | |
5036 USCRIPT_HAN, equivalentScripts, LEN(equivalentScripts), &status); | |
5037 if (U_FAILURE(status) || length != 3) { | |
5038 log_err("ERROR/Hani: retrieved equivalent script length wrong: " | |
5039 "expected 3, was = %d\n", length); | |
5040 } | |
5041 length = ucol_getEquivalentReorderCodes( | |
5042 USCRIPT_SIMPLIFIED_HAN, equivalentScripts, LEN(equivalentScripts), &
status); | |
5043 if (U_FAILURE(status) || length != 3) { | |
5044 log_err("ERROR/Hans: retrieved equivalent script length wrong: " | |
5045 "expected 3, was = %d\n", length); | |
5046 } | |
5047 length = ucol_getEquivalentReorderCodes( | |
5048 USCRIPT_TRADITIONAL_HAN, equivalentScripts, LEN(equivalentScripts),
&status); | |
5049 if (U_FAILURE(status) || length != 3) { | |
5050 log_err("ERROR/Hant: retrieved equivalent script length wrong: " | |
5051 "expected 3, was = %d\n", length); | |
5052 } | |
5053 | |
5054 length = ucol_getEquivalentReorderCodes( | |
5055 USCRIPT_MEROITIC_CURSIVE, equivalentScripts, LEN(equivalentScripts),
&status); | |
5056 if (U_FAILURE(status) || length != 2) { | |
5057 log_err("ERROR/Merc: retrieved equivalent script length wrong: " | |
5058 "expected 2, was = %d\n", length); | |
5059 } | |
5060 length = ucol_getEquivalentReorderCodes( | |
5061 USCRIPT_MEROITIC_HIEROGLYPHS, equivalentScripts, LEN(equivalentScrip
ts), &status); | |
5062 if (U_FAILURE(status) || length != 2) { | |
5063 log_err("ERROR/Mero: retrieved equivalent script length wrong: " | |
5064 "expected 2, was = %d\n", length); | |
5065 } | |
5066 } | |
5067 | |
5068 static void TestReorderingAcrossCloning(void) | |
5069 { | |
5070 UErrorCode status = U_ZERO_ERROR; | |
5071 UCollator *myCollation; | |
5072 int32_t reorderCodes[3] = {USCRIPT_GREEK, USCRIPT_HAN, UCOL_REORDER_CODE_PUN
CTUATION}; | |
5073 UCollator *clonedCollation; | |
5074 int32_t retrievedReorderCodesLength; | |
5075 int32_t retrievedReorderCodes[10]; | |
5076 int loopIndex; | |
5077 | |
5078 log_verbose("Testing non-lead bytes in a sort key with and without reorderin
g\n"); | |
5079 | |
5080 /* build collator tertiary */ | |
5081 myCollation = ucol_open("", &status); | |
5082 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
5083 if(U_FAILURE(status)) { | |
5084 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
5085 return; | |
5086 } | |
5087 | |
5088 /* set the reorderding */ | |
5089 ucol_setReorderCodes(myCollation, reorderCodes, LEN(reorderCodes), &status); | |
5090 if (U_FAILURE(status)) { | |
5091 log_err_status(status, "ERROR: setting reorder codes: %s\n", myErrorName
(status)); | |
5092 return; | |
5093 } | |
5094 | |
5095 /* clone the collator */ | |
5096 clonedCollation = ucol_safeClone(myCollation, NULL, NULL, &status); | |
5097 if (U_FAILURE(status)) { | |
5098 log_err_status(status, "ERROR: cloning collator: %s\n", myErrorName(stat
us)); | |
5099 return; | |
5100 } | |
5101 | |
5102 /* get the reordering */ | |
5103 retrievedReorderCodesLength = ucol_getReorderCodes(clonedCollation, retrieve
dReorderCodes, LEN(retrievedReorderCodes), &status); | |
5104 if (U_FAILURE(status)) { | |
5105 log_err_status(status, "ERROR: getting reorder codes: %s\n", myErrorName
(status)); | |
5106 return; | |
5107 } | |
5108 if (retrievedReorderCodesLength != LEN(reorderCodes)) { | |
5109 log_err_status(status, "ERROR: retrieved reorder codes length was %d but
should have been %d\n", retrievedReorderCodesLength, LEN(reorderCodes)); | |
5110 return; | |
5111 } | |
5112 for (loopIndex = 0; loopIndex < retrievedReorderCodesLength; loopIndex++) { | |
5113 if (retrievedReorderCodes[loopIndex] != reorderCodes[loopIndex]) { | |
5114 log_err_status(status, "ERROR: retrieved reorder code doesn't match
set reorder code at index %d\n", loopIndex); | |
5115 return; | |
5116 } | |
5117 } | |
5118 | |
5119 /*uprv_free(buffer);*/ | |
5120 ucol_close(myCollation); | |
5121 ucol_close(clonedCollation); | |
5122 } | |
5123 | |
5124 /* | |
5125 * Utility function to test one collation reordering test case set. | |
5126 * @param testcases Array of test cases. | |
5127 * @param n_testcases Size of the array testcases. | |
5128 * @param reorderTokens Array of reordering codes. | |
5129 * @param reorderTokensLen Size of the array reorderTokens. | |
5130 */ | |
5131 static void doTestOneReorderingAPITestCase(const OneTestCase testCases[], uint32
_t testCasesLen, const int32_t reorderTokens[], int32_t reorderTokensLen) | |
5132 { | |
5133 uint32_t testCaseNum; | |
5134 UErrorCode status = U_ZERO_ERROR; | |
5135 UCollator *myCollation; | |
5136 | |
5137 myCollation = ucol_open("", &status); | |
5138 if (U_FAILURE(status)) { | |
5139 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
5140 return; | |
5141 } | |
5142 ucol_setReorderCodes(myCollation, reorderTokens, reorderTokensLen, &status); | |
5143 if(U_FAILURE(status)) { | |
5144 log_err_status(status, "ERROR: while setting script order: %s\n", myErro
rName(status)); | |
5145 return; | |
5146 } | |
5147 | |
5148 for (testCaseNum = 0; testCaseNum < testCasesLen; ++testCaseNum) { | |
5149 doTest(myCollation, | |
5150 testCases[testCaseNum].source, | |
5151 testCases[testCaseNum].target, | |
5152 testCases[testCaseNum].result | |
5153 ); | |
5154 } | |
5155 ucol_close(myCollation); | |
5156 } | |
5157 | |
5158 static void TestGreekFirstReorder(void) | |
5159 { | |
5160 const char* strRules[] = { | |
5161 "[reorder Grek]" | |
5162 }; | |
5163 | |
5164 const int32_t apiRules[] = { | |
5165 USCRIPT_GREEK | |
5166 }; | |
5167 | |
5168 const static OneTestCase privateUseCharacterStrings[] = { | |
5169 { {0x0391}, {0x0391}, UCOL_EQUAL }, | |
5170 { {0x0041}, {0x0391}, UCOL_GREATER }, | |
5171 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_GREATER }, | |
5172 { {0x0060}, {0x0391}, UCOL_LESS }, | |
5173 { {0x0391}, {0xe2dc}, UCOL_LESS }, | |
5174 { {0x0391}, {0x0060}, UCOL_GREATER }, | |
5175 }; | |
5176 | |
5177 /* Test rules creation */ | |
5178 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings
), strRules, LEN(strRules)); | |
5179 | |
5180 /* Test collation reordering API */ | |
5181 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha
racterStrings), apiRules, LEN(apiRules)); | |
5182 } | |
5183 | |
5184 static void TestGreekLastReorder(void) | |
5185 { | |
5186 const char* strRules[] = { | |
5187 "[reorder Zzzz Grek]" | |
5188 }; | |
5189 | |
5190 const int32_t apiRules[] = { | |
5191 USCRIPT_UNKNOWN, USCRIPT_GREEK | |
5192 }; | |
5193 | |
5194 const static OneTestCase privateUseCharacterStrings[] = { | |
5195 { {0x0391}, {0x0391}, UCOL_EQUAL }, | |
5196 { {0x0041}, {0x0391}, UCOL_LESS }, | |
5197 { {0x03B1, 0x0041}, {0x03B1, 0x0391}, UCOL_LESS }, | |
5198 { {0x0060}, {0x0391}, UCOL_LESS }, | |
5199 { {0x0391}, {0xe2dc}, UCOL_GREATER }, | |
5200 }; | |
5201 | |
5202 /* Test rules creation */ | |
5203 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings
), strRules, LEN(strRules)); | |
5204 | |
5205 /* Test collation reordering API */ | |
5206 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha
racterStrings), apiRules, LEN(apiRules)); | |
5207 } | |
5208 | |
5209 static void TestNonScriptReorder(void) | |
5210 { | |
5211 const char* strRules[] = { | |
5212 "[reorder Grek Symbol DIGIT Latn Punct space Zzzz cURRENCy]" | |
5213 }; | |
5214 | |
5215 const int32_t apiRules[] = { | |
5216 USCRIPT_GREEK, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIP
T_LATIN, | |
5217 UCOL_REORDER_CODE_PUNCTUATION, UCOL_REORDER_CODE_SPACE, USCRIPT_UNKNOWN,
| |
5218 UCOL_REORDER_CODE_CURRENCY | |
5219 }; | |
5220 | |
5221 const static OneTestCase privateUseCharacterStrings[] = { | |
5222 { {0x0391}, {0x0041}, UCOL_LESS }, | |
5223 { {0x0041}, {0x0391}, UCOL_GREATER }, | |
5224 { {0x0060}, {0x0041}, UCOL_LESS }, | |
5225 { {0x0060}, {0x0391}, UCOL_GREATER }, | |
5226 { {0x0024}, {0x0041}, UCOL_GREATER }, | |
5227 }; | |
5228 | |
5229 /* Test rules creation */ | |
5230 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings
), strRules, LEN(strRules)); | |
5231 | |
5232 /* Test collation reordering API */ | |
5233 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha
racterStrings), apiRules, LEN(apiRules)); | |
5234 } | |
5235 | |
5236 static void TestHaniReorder(void) | |
5237 { | |
5238 const char* strRules[] = { | |
5239 "[reorder Hani]" | |
5240 }; | |
5241 const int32_t apiRules[] = { | |
5242 USCRIPT_HAN | |
5243 }; | |
5244 | |
5245 const static OneTestCase privateUseCharacterStrings[] = { | |
5246 { {0x4e00}, {0x0041}, UCOL_LESS }, | |
5247 { {0x4e00}, {0x0060}, UCOL_GREATER }, | |
5248 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, | |
5249 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, | |
5250 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, | |
5251 { {0xfa27}, {0x0041}, UCOL_LESS }, | |
5252 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, | |
5253 }; | |
5254 | |
5255 /* Test rules creation */ | |
5256 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings
), strRules, LEN(strRules)); | |
5257 | |
5258 /* Test collation reordering API */ | |
5259 doTestOneReorderingAPITestCase(privateUseCharacterStrings, LEN(privateUseCha
racterStrings), apiRules, LEN(apiRules)); | |
5260 } | |
5261 | |
5262 static void TestHaniReorderWithOtherRules(void) | |
5263 { | |
5264 const char* strRules[] = { | |
5265 "[reorder Hani] &b<a" | |
5266 }; | |
5267 /*const int32_t apiRules[] = { | |
5268 USCRIPT_HAN | |
5269 };*/ | |
5270 | |
5271 const static OneTestCase privateUseCharacterStrings[] = { | |
5272 { {0x4e00}, {0x0041}, UCOL_LESS }, | |
5273 { {0x4e00}, {0x0060}, UCOL_GREATER }, | |
5274 { {0xD86D, 0xDF40}, {0x0041}, UCOL_LESS }, | |
5275 { {0xD86D, 0xDF40}, {0x0060}, UCOL_GREATER }, | |
5276 { {0x4e00}, {0xD86D, 0xDF40}, UCOL_LESS }, | |
5277 { {0xfa27}, {0x0041}, UCOL_LESS }, | |
5278 { {0xD869, 0xDF00}, {0x0041}, UCOL_LESS }, | |
5279 { {0x0062}, {0x0061}, UCOL_LESS }, | |
5280 }; | |
5281 | |
5282 /* Test rules creation */ | |
5283 doTestOneTestCase(privateUseCharacterStrings, LEN(privateUseCharacterStrings
), strRules, LEN(strRules)); | |
5284 } | |
5285 | |
5286 static void TestMultipleReorder(void) | |
5287 { | |
5288 const char* strRules[] = { | |
5289 "[reorder Grek Zzzz DIGIT Latn Hani]" | |
5290 }; | |
5291 | |
5292 const int32_t apiRules[] = { | |
5293 USCRIPT_GREEK, USCRIPT_UNKNOWN, UCOL_REORDER_CODE_DIGIT, USCRIPT_LATIN,
USCRIPT_HAN | |
5294 }; | |
5295 | |
5296 const static OneTestCase collationTestCases[] = { | |
5297 { {0x0391}, {0x0041}, UCOL_LESS}, | |
5298 { {0x0031}, {0x0041}, UCOL_LESS}, | |
5299 { {0x0041}, {0x4e00}, UCOL_LESS}, | |
5300 }; | |
5301 | |
5302 /* Test rules creation */ | |
5303 doTestOneTestCase(collationTestCases, LEN(collationTestCases), strRules, LEN
(strRules)); | |
5304 | |
5305 /* Test collation reordering API */ | |
5306 doTestOneReorderingAPITestCase(collationTestCases, LEN(collationTestCases),
apiRules, LEN(apiRules)); | |
5307 } | |
5308 | |
5309 /* | |
5310 * Test that covers issue reported in ticket 8814 | |
5311 */ | |
5312 static void TestReorderWithNumericCollation(void) | |
5313 { | |
5314 UErrorCode status = U_ZERO_ERROR; | |
5315 UCollator *myCollation; | |
5316 UCollator *myReorderCollation; | |
5317 int32_t reorderCodes[] = {UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUA
TION, UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_DIGIT, USCRIPT_GREEK,USCRIPT_L
ATIN, USCRIPT_HEBREW, UCOL_REORDER_CODE_OTHERS}; | |
5318 /* UChar fortyS[] = { 0x0034, 0x0030, 0x0053 }; | |
5319 UChar fortyThreeP[] = { 0x0034, 0x0033, 0x0050 }; */ | |
5320 UChar fortyS[] = { 0x0053 }; | |
5321 UChar fortyThreeP[] = { 0x0050 }; | |
5322 uint8_t fortyS_sortKey[128]; | |
5323 int32_t fortyS_sortKey_Length; | |
5324 uint8_t fortyThreeP_sortKey[128]; | |
5325 int32_t fortyThreeP_sortKey_Length; | |
5326 uint8_t fortyS_sortKey_reorder[128]; | |
5327 int32_t fortyS_sortKey_reorder_Length; | |
5328 uint8_t fortyThreeP_sortKey_reorder[128]; | |
5329 int32_t fortyThreeP_sortKey_reorder_Length; | |
5330 UCollationResult collResult; | |
5331 UCollationResult collResultReorder; | |
5332 | |
5333 log_verbose("Testing reordering with and without numeric collation\n"); | |
5334 | |
5335 /* build collator tertiary with numeric */ | |
5336 myCollation = ucol_open("", &status); | |
5337 /* | |
5338 ucol_setStrength(myCollation, UCOL_TERTIARY); | |
5339 */ | |
5340 ucol_setAttribute(myCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); | |
5341 if(U_FAILURE(status)) { | |
5342 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
5343 return; | |
5344 } | |
5345 | |
5346 /* build collator tertiary with numeric and reordering */ | |
5347 myReorderCollation = ucol_open("", &status); | |
5348 /* | |
5349 ucol_setStrength(myReorderCollation, UCOL_TERTIARY); | |
5350 */ | |
5351 ucol_setAttribute(myReorderCollation, UCOL_NUMERIC_COLLATION, UCOL_ON, &stat
us); | |
5352 ucol_setReorderCodes(myReorderCollation, reorderCodes, LEN(reorderCodes), &s
tatus); | |
5353 if(U_FAILURE(status)) { | |
5354 log_err_status(status, "ERROR: in creation of collator: %s\n", myErrorNa
me(status)); | |
5355 return; | |
5356 } | |
5357 | |
5358 fortyS_sortKey_Length = ucol_getSortKey(myCollation, fortyS, LEN(fortyS), fo
rtyS_sortKey, 128); | |
5359 fortyThreeP_sortKey_Length = ucol_getSortKey(myCollation, fortyThreeP, LEN(f
ortyThreeP), fortyThreeP_sortKey, 128); | |
5360 fortyS_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, fortyS,
LEN(fortyS), fortyS_sortKey_reorder, 128); | |
5361 fortyThreeP_sortKey_reorder_Length = ucol_getSortKey(myReorderCollation, for
tyThreeP, LEN(fortyThreeP), fortyThreeP_sortKey_reorder, 128); | |
5362 | |
5363 if (fortyS_sortKey_Length < 0 || fortyThreeP_sortKey_Length < 0 || fortyS_so
rtKey_reorder_Length < 0 || fortyThreeP_sortKey_reorder_Length < 0) { | |
5364 log_err_status(status, "ERROR: couldn't generate sort keys\n"); | |
5365 return; | |
5366 } | |
5367 collResult = ucol_strcoll(myCollation, fortyS, LEN(fortyS), fortyThreeP, LEN
(fortyThreeP)); | |
5368 collResultReorder = ucol_strcoll(myReorderCollation, fortyS, LEN(fortyS), fo
rtyThreeP, LEN(fortyThreeP)); | |
5369 /* | |
5370 fprintf(stderr, "\tcollResult = %x\n", collResult); | |
5371 fprintf(stderr, "\tcollResultReorder = %x\n", collResultReorder); | |
5372 fprintf(stderr, "\nfortyS\n"); | |
5373 for (i = 0; i < fortyS_sortKey_Length; i++) { | |
5374 fprintf(stderr, "%x --- %x\n", fortyS_sortKey[i], fortyS_sortKey_reorder
[i]); | |
5375 } | |
5376 fprintf(stderr, "\nfortyThreeP\n"); | |
5377 for (i = 0; i < fortyThreeP_sortKey_Length; i++) { | |
5378 fprintf(stderr, "%x --- %x\n", fortyThreeP_sortKey[i], fortyThreeP_sortK
ey_reorder[i]); | |
5379 } | |
5380 */ | |
5381 if (collResult != collResultReorder) { | |
5382 log_err_status(status, "ERROR: collation results should have been the sa
me.\n"); | |
5383 return; | |
5384 } | |
5385 | |
5386 ucol_close(myCollation); | |
5387 ucol_close(myReorderCollation); | |
5388 } | |
5389 | |
5390 static int compare_uint8_t_arrays(const uint8_t* a, const uint8_t* b) | |
5391 { | |
5392 for (; *a == *b; ++a, ++b) { | |
5393 if (*a == 0) { | |
5394 return 0; | |
5395 } | |
5396 } | |
5397 return (*a < *b ? -1 : 1); | |
5398 } | |
5399 | |
5400 static void TestImportRulesDeWithPhonebook(void) | |
5401 { | |
5402 const char* normalRules[] = { | |
5403 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc", | |
5404 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc", | |
5405 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc", | |
5406 }; | |
5407 const OneTestCase normalTests[] = { | |
5408 { {0x00e6}, {0x00c6}, UCOL_LESS}, | |
5409 { {0x00fc}, {0x00dc}, UCOL_GREATER}, | |
5410 }; | |
5411 | |
5412 const char* importRules[] = { | |
5413 "&a<\\u00e6<\\u00c6<\\u00dc<\\u00fc[import de-u-co-phonebk]", | |
5414 "&a<<\\u00e6<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", | |
5415 "&a<<\\u00e6<<<\\u00c6<<\\u00dc<<\\u00fc[import de-u-co-phonebk]", | |
5416 }; | |
5417 const OneTestCase importTests[] = { | |
5418 { {0x00e6}, {0x00c6}, UCOL_LESS}, | |
5419 { {0x00fc}, {0x00dc}, UCOL_LESS}, | |
5420 }; | |
5421 | |
5422 doTestOneTestCase(normalTests, LEN(normalTests), normalRules, LEN(normalRules)
); | |
5423 doTestOneTestCase(importTests, LEN(importTests), importRules, LEN(importRules)
); | |
5424 } | |
5425 | |
5426 #if 0 | |
5427 static void TestImportRulesFiWithEor(void) | |
5428 { | |
5429 /* DUCET. */ | |
5430 const char* defaultRules[] = { | |
5431 "&a<b", /* Dummy rule. */ | |
5432 }; | |
5433 | |
5434 const OneTestCase defaultTests[] = { | |
5435 { {0x0110}, {0x00F0}, UCOL_LESS}, | |
5436 { {0x00a3}, {0x00a5}, UCOL_LESS}, | |
5437 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, | |
5438 }; | |
5439 | |
5440 /* European Ordering rules: ignore currency characters. */ | |
5441 const char* eorRules[] = { | |
5442 "[import root-u-co-eor]", | |
5443 }; | |
5444 | |
5445 const OneTestCase eorTests[] = { | |
5446 { {0x0110}, {0x00F0}, UCOL_LESS}, | |
5447 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, | |
5448 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, | |
5449 }; | |
5450 | |
5451 const char* fiStdRules[] = { | |
5452 "[import fi-u-co-standard]", | |
5453 }; | |
5454 | |
5455 const OneTestCase fiStdTests[] = { | |
5456 { {0x0110}, {0x00F0}, UCOL_GREATER}, | |
5457 { {0x00a3}, {0x00a5}, UCOL_LESS}, | |
5458 { {0x0061}, {0x0061, 0x00a3}, UCOL_LESS}, | |
5459 }; | |
5460 | |
5461 /* Both European Ordering Rules and Fi Standard Rules. */ | |
5462 const char* eorFiStdRules[] = { | |
5463 "[import root-u-co-eor][import fi-u-co-standard]", | |
5464 }; | |
5465 | |
5466 /* This is essentially same as the one before once fi.txt is updated with impo
rt. */ | |
5467 const char* fiEorRules[] = { | |
5468 "[import fi-u-co-eor]", | |
5469 }; | |
5470 | |
5471 const OneTestCase fiEorTests[] = { | |
5472 { {0x0110}, {0x00F0}, UCOL_GREATER}, | |
5473 { {0x00a3}, {0x00a5}, UCOL_EQUAL}, | |
5474 { {0x0061}, {0x0061, 0x00a3}, UCOL_EQUAL}, | |
5475 }; | |
5476 | |
5477 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRu
les)); | |
5478 doTestOneTestCase(eorTests, LEN(eorTests), eorRules, LEN(eorRules)); | |
5479 doTestOneTestCase(fiStdTests, LEN(fiStdTests), fiStdRules, LEN(fiStdRules)); | |
5480 doTestOneTestCase(fiEorTests, LEN(fiEorTests), eorFiStdRules, LEN(eorFiStdRule
s)); | |
5481 | |
5482 log_knownIssue("8962", NULL); | |
5483 /* TODO: Fix ICU ticket #8962 by uncommenting the following test after fi.txt
is updated with the following rule: | |
5484 eor{ | |
5485 Sequence{ | |
5486 "[import root-u-co-eor][import fi-u-co-standard]" | |
5487 } | |
5488 Version{"21.0"} | |
5489 } | |
5490 */ | |
5491 /* doTestOneTestCase(fiEorTests, LEN(fiEorTests), fiEorRules, LEN(fiEorRules))
; */ | |
5492 | |
5493 } | |
5494 #endif | |
5495 | |
5496 #if 0 | |
5497 /* | |
5498 * This test case tests inclusion with the unihan rules, but this cannot be incl
uded now, unless | |
5499 * the resource files are built with -includeUnihanColl option. | |
5500 * TODO: Uncomment this function and make it work when unihan rules are built by
default. | |
5501 */ | |
5502 static void TestImportRulesCJKWithUnihan(void) | |
5503 { | |
5504 /* DUCET. */ | |
5505 const char* defaultRules[] = { | |
5506 "&a<b", /* Dummy rule. */ | |
5507 }; | |
5508 | |
5509 const OneTestCase defaultTests[] = { | |
5510 { {0x3402}, {0x4e1e}, UCOL_GREATER}, | |
5511 }; | |
5512 | |
5513 /* European Ordering rules: ignore currency characters. */ | |
5514 const char* unihanRules[] = { | |
5515 "[import ko-u-co-unihan]", | |
5516 }; | |
5517 | |
5518 const OneTestCase unihanTests[] = { | |
5519 { {0x3402}, {0x4e1e}, UCOL_LESS}, | |
5520 }; | |
5521 | |
5522 doTestOneTestCase(defaultTests, LEN(defaultTests), defaultRules, LEN(defaultRu
les)); | |
5523 doTestOneTestCase(unihanTests, LEN(unihanTests), unihanRules, LEN(unihanRules)
); | |
5524 | |
5525 } | |
5526 #endif | |
5527 | |
5528 static void TestImport(void) | |
5529 { | |
5530 UCollator* vicoll; | |
5531 UCollator* escoll; | |
5532 UCollator* viescoll; | |
5533 UCollator* importviescoll; | |
5534 UParseError error; | |
5535 UErrorCode status = U_ZERO_ERROR; | |
5536 UChar* virules; | |
5537 int32_t viruleslength; | |
5538 UChar* esrules; | |
5539 int32_t esruleslength; | |
5540 UChar* viesrules; | |
5541 int32_t viesruleslength; | |
5542 char srules[500] = "[import vi][import es]"; | |
5543 UChar rules[500]; | |
5544 uint32_t length = 0; | |
5545 int32_t itemCount; | |
5546 int32_t i, k; | |
5547 UChar32 start; | |
5548 UChar32 end; | |
5549 UChar str[500]; | |
5550 int32_t strLength; | |
5551 | |
5552 uint8_t sk1[500]; | |
5553 uint8_t sk2[500]; | |
5554 | |
5555 UBool b; | |
5556 USet* tailoredSet; | |
5557 USet* importTailoredSet; | |
5558 | |
5559 | |
5560 vicoll = ucol_open("vi", &status); | |
5561 if(U_FAILURE(status)){ | |
5562 log_err_status(status, "ERROR: Call ucol_open(\"vi\", ...): %s\n", myErr
orName(status)); | |
5563 return; | |
5564 } | |
5565 | |
5566 virules = (UChar*) ucol_getRules(vicoll, &viruleslength); | |
5567 if(viruleslength == 0) { | |
5568 log_data_err("missing vi tailoring rule string\n"); | |
5569 ucol_close(vicoll); | |
5570 return; | |
5571 } | |
5572 escoll = ucol_open("es", &status); | |
5573 esrules = (UChar*) ucol_getRules(escoll, &esruleslength); | |
5574 viesrules = (UChar*)uprv_malloc((viruleslength+esruleslength+1)*sizeof(UChar
*)); | |
5575 viesrules[0] = 0; | |
5576 u_strcat(viesrules, virules); | |
5577 u_strcat(viesrules, esrules); | |
5578 viesruleslength = viruleslength + esruleslength; | |
5579 viescoll = ucol_openRules(viesrules, viesruleslength, UCOL_ON, UCOL_TERTIARY
, &error, &status); | |
5580 | |
5581 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ | |
5582 length = u_unescape(srules, rules, 500); | |
5583 importviescoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro
r, &status); | |
5584 if(U_FAILURE(status)){ | |
5585 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
5586 return; | |
5587 } | |
5588 | |
5589 tailoredSet = ucol_getTailoredSet(viescoll, &status); | |
5590 importTailoredSet = ucol_getTailoredSet(importviescoll, &status); | |
5591 | |
5592 if(!uset_equals(tailoredSet, importTailoredSet)){ | |
5593 log_err("Tailored sets not equal"); | |
5594 } | |
5595 | |
5596 uset_close(importTailoredSet); | |
5597 | |
5598 itemCount = uset_getItemCount(tailoredSet); | |
5599 | |
5600 for( i = 0; i < itemCount; i++){ | |
5601 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status
); | |
5602 if(strLength < 2){ | |
5603 for (; start <= end; start++){ | |
5604 k = 0; | |
5605 U16_APPEND(str, k, 500, start, b); | |
5606 (void)b; /* Suppress set but not used warning. */ | |
5607 ucol_getSortKey(viescoll, str, 1, sk1, 500); | |
5608 ucol_getSortKey(importviescoll, str, 1, sk2, 500); | |
5609 if(compare_uint8_t_arrays(sk1, sk2) != 0){ | |
5610 log_err("Sort key for %s not equal\n", str); | |
5611 break; | |
5612 } | |
5613 } | |
5614 }else{ | |
5615 ucol_getSortKey(viescoll, str, strLength, sk1, 500); | |
5616 ucol_getSortKey(importviescoll, str, strLength, sk2, 500); | |
5617 if(compare_uint8_t_arrays(sk1, sk2) != 0){ | |
5618 log_err("ZZSort key for %s not equal\n", str); | |
5619 break; | |
5620 } | |
5621 | |
5622 } | |
5623 } | |
5624 | |
5625 uset_close(tailoredSet); | |
5626 | |
5627 uprv_free(viesrules); | |
5628 | |
5629 ucol_close(vicoll); | |
5630 ucol_close(escoll); | |
5631 ucol_close(viescoll); | |
5632 ucol_close(importviescoll); | |
5633 } | |
5634 | |
5635 static void TestImportWithType(void) | |
5636 { | |
5637 UCollator* vicoll; | |
5638 UCollator* decoll; | |
5639 UCollator* videcoll; | |
5640 UCollator* importvidecoll; | |
5641 UParseError error; | |
5642 UErrorCode status = U_ZERO_ERROR; | |
5643 const UChar* virules; | |
5644 int32_t viruleslength; | |
5645 const UChar* derules; | |
5646 int32_t deruleslength; | |
5647 UChar* viderules; | |
5648 int32_t videruleslength; | |
5649 const char srules[500] = "[import vi][import de-u-co-phonebk]"; | |
5650 UChar rules[500]; | |
5651 uint32_t length = 0; | |
5652 int32_t itemCount; | |
5653 int32_t i, k; | |
5654 UChar32 start; | |
5655 UChar32 end; | |
5656 UChar str[500]; | |
5657 int32_t strLength; | |
5658 | |
5659 uint8_t sk1[500]; | |
5660 uint8_t sk2[500]; | |
5661 | |
5662 USet* tailoredSet; | |
5663 USet* importTailoredSet; | |
5664 | |
5665 vicoll = ucol_open("vi", &status); | |
5666 if(U_FAILURE(status)){ | |
5667 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
5668 return; | |
5669 } | |
5670 virules = ucol_getRules(vicoll, &viruleslength); | |
5671 if(viruleslength == 0) { | |
5672 log_data_err("missing vi tailoring rule string\n"); | |
5673 ucol_close(vicoll); | |
5674 return; | |
5675 } | |
5676 /* decoll = ucol_open("de@collation=phonebook", &status); */ | |
5677 decoll = ucol_open("de-u-co-phonebk", &status); | |
5678 if(U_FAILURE(status)){ | |
5679 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
5680 return; | |
5681 } | |
5682 | |
5683 | |
5684 derules = ucol_getRules(decoll, &deruleslength); | |
5685 viderules = (UChar*)uprv_malloc((viruleslength+deruleslength+1)*sizeof(UChar
*)); | |
5686 viderules[0] = 0; | |
5687 u_strcat(viderules, virules); | |
5688 u_strcat(viderules, derules); | |
5689 videruleslength = viruleslength + deruleslength; | |
5690 videcoll = ucol_openRules(viderules, videruleslength, UCOL_ON, UCOL_TERTIARY
, &error, &status); | |
5691 | |
5692 /* u_strFromUTF8(rules, 500, &length, srules, strlen(srules), &status); */ | |
5693 length = u_unescape(srules, rules, 500); | |
5694 importvidecoll = ucol_openRules(rules, length, UCOL_ON, UCOL_TERTIARY, &erro
r, &status); | |
5695 if(U_FAILURE(status)){ | |
5696 log_err_status(status, "ERROR: in creation of rule based collator: %s\n"
, myErrorName(status)); | |
5697 return; | |
5698 } | |
5699 | |
5700 tailoredSet = ucol_getTailoredSet(videcoll, &status); | |
5701 importTailoredSet = ucol_getTailoredSet(importvidecoll, &status); | |
5702 | |
5703 if(!uset_equals(tailoredSet, importTailoredSet)){ | |
5704 log_err("Tailored sets not equal"); | |
5705 } | |
5706 | |
5707 uset_close(importTailoredSet); | |
5708 | |
5709 itemCount = uset_getItemCount(tailoredSet); | |
5710 | |
5711 for( i = 0; i < itemCount; i++){ | |
5712 strLength = uset_getItem(tailoredSet, i, &start, &end, str, 500, &status
); | |
5713 if(strLength < 2){ | |
5714 for (; start <= end; start++){ | |
5715 k = 0; | |
5716 U16_APPEND_UNSAFE(str, k, start); | |
5717 ucol_getSortKey(videcoll, str, 1, sk1, 500); | |
5718 ucol_getSortKey(importvidecoll, str, 1, sk2, 500); | |
5719 if(compare_uint8_t_arrays(sk1, sk2) != 0){ | |
5720 log_err("Sort key for %s not equal\n", str); | |
5721 break; | |
5722 } | |
5723 } | |
5724 }else{ | |
5725 ucol_getSortKey(videcoll, str, strLength, sk1, 500); | |
5726 ucol_getSortKey(importvidecoll, str, strLength, sk2, 500); | |
5727 if(compare_uint8_t_arrays(sk1, sk2) != 0){ | |
5728 log_err("Sort key for %s not equal\n", str); | |
5729 break; | |
5730 } | |
5731 | |
5732 } | |
5733 } | |
5734 | |
5735 uset_close(tailoredSet); | |
5736 | |
5737 uprv_free(viderules); | |
5738 | |
5739 ucol_close(videcoll); | |
5740 ucol_close(importvidecoll); | |
5741 ucol_close(vicoll); | |
5742 ucol_close(decoll); | |
5743 } | |
5744 | |
5745 /* 'IV INTERNATIONAL SCIENTIFIC - PRACTICAL CONFERENCE "GEOPOLITICS, GEOECONOMIC
S AND INTERNATIONAL RELATIONS PROBLEMS" 22-23 June 2010, St. Petersburg, Russia'
*/ | |
5746 static const UChar longUpperStr1[]= { /* 155 chars */ | |
5747 0x49, 0x56, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F
, 0x4E, 0x41, 0x4C, | |
5748 0x20, 0x53, 0x43, 0x49, 0x45, 0x4E, 0x54, 0x49, 0x46, 0x49, 0x43, 0x20, 0x2D
, 0x20, 0x50, 0x52, | |
5749 0x41, 0x43, 0x54, 0x49, 0x43, 0x41, 0x4C, 0x20, 0x43, 0x4F, 0x4E, 0x46, 0x45
, 0x52, 0x45, 0x4E, | |
5750 0x43, 0x45, 0x20, 0x22, 0x47, 0x45, 0x4F, 0x50, 0x4F, 0x4C, 0x49, 0x54, 0x49
, 0x43, 0x53, 0x2C, | |
5751 0x20, 0x47, 0x45, 0x4F, 0x45, 0x43, 0x4F, 0x4E, 0x4F, 0x4D, 0x49, 0x43, 0x53
, 0x20, 0x41, 0x4E, | |
5752 0x44, 0x20, 0x49, 0x4E, 0x54, 0x45, 0x52, 0x4E, 0x41, 0x54, 0x49, 0x4F, 0x4E
, 0x41, 0x4C, 0x20, | |
5753 0x52, 0x45, 0x4C, 0x41, 0x54, 0x49, 0x4F, 0x4E, 0x53, 0x20, 0x50, 0x52, 0x4F
, 0x42, 0x4C, 0x45, | |
5754 0x4D, 0x53, 0x22, 0x20, 0x32, 0x32, 0x2D, 0x32, 0x33, 0x20, 0x4A, 0x75, 0x6E
, 0x65, 0x20, 0x32, | |
5755 0x30, 0x31, 0x30, 0x2C, 0x20, 0x53, 0x74, 0x2E, 0x20, 0x50, 0x65, 0x74, 0x65
, 0x72, 0x73, 0x62, | |
5756 0x75, 0x72, 0x67, 0x2C, 0x20, 0x52, 0x75, 0x73, 0x73, 0x69, 0x61 | |
5757 }; | |
5758 | |
5759 /* 'BACEDIFOGUHAJEKILOMUNAPE ' with diacritics on vowels, repeated 5 times */ | |
5760 static const UChar longUpperStr2[]= { /* 125 chars, > 128 collation elements */ | |
5761 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0
xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, | |
5762 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0
xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, | |
5763 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0
xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, | |
5764 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0
xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20, | |
5765 0x42,0xC1,0x43,0xC9,0x44,0xCD,0x46,0xD3,0x47,0xDA,0x48,0xC0,0x4A,0xC8,0x4B,0
xCC,0x4C,0xD2,0x4D,0xD9,0x4E,0xC2,0x50,0xCA,0x20 | |
5766 }; | |
5767 | |
5768 /* 'ABCDEFGHIJKLMNOPQRSTUVWXYZ ' repeated 12 times */ | |
5769 static const UChar longUpperStr3[]= { /* 324 chars */ | |
5770 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5771 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5772 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5773 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5774 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5775 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5776 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5777 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5778 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5779 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5780 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20, | |
5781 0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0
x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x20 | |
5782 }; | |
5783 | |
5784 #define MY_ARRAY_LEN(array) (sizeof(array)/sizeof(array[0])) | |
5785 | |
5786 typedef struct { | |
5787 const UChar * longUpperStrPtr; | |
5788 int32_t longUpperStrLen; | |
5789 } LongUpperStrItem; | |
5790 | |
5791 /* String pointers must be in reverse collation order of the corresponding strin
gs */ | |
5792 static const LongUpperStrItem longUpperStrItems[] = { | |
5793 { longUpperStr1, MY_ARRAY_LEN(longUpperStr1) }, | |
5794 { longUpperStr2, MY_ARRAY_LEN(longUpperStr2) }, | |
5795 { longUpperStr3, MY_ARRAY_LEN(longUpperStr3) }, | |
5796 { NULL, 0 } | |
5797 }; | |
5798 | |
5799 enum { kCollKeyLenMax = 850 }; /* may change with collation changes */ | |
5800 | |
5801 /* Text fix for #8445; without fix, could have crash due to stack or heap corrup
tion */ | |
5802 static void TestCaseLevelBufferOverflow(void) | |
5803 { | |
5804 UErrorCode status = U_ZERO_ERROR; | |
5805 UCollator * ucol = ucol_open("root", &status); | |
5806 if ( U_SUCCESS(status) ) { | |
5807 ucol_setAttribute(ucol, UCOL_CASE_LEVEL, UCOL_ON, &status); | |
5808 if ( U_SUCCESS(status) ) { | |
5809 const LongUpperStrItem * itemPtr; | |
5810 uint8_t sortKeyA[kCollKeyLenMax], sortKeyB[kCollKeyLenMax]; | |
5811 for ( itemPtr = longUpperStrItems; itemPtr->longUpperStrPtr != NULL;
itemPtr++ ) { | |
5812 int32_t sortKeyLen; | |
5813 if (itemPtr > longUpperStrItems) { | |
5814 uprv_strcpy((char *)sortKeyB, (char *)sortKeyA); | |
5815 } | |
5816 sortKeyLen = ucol_getSortKey(ucol, itemPtr->longUpperStrPtr, ite
mPtr->longUpperStrLen, sortKeyA, kCollKeyLenMax); | |
5817 if (sortKeyLen <= 0 || sortKeyLen > kCollKeyLenMax) { | |
5818 log_err("ERROR sort key length from ucol_getSortKey is %d\n"
, sortKeyLen); | |
5819 break; | |
5820 } | |
5821 if ( itemPtr > longUpperStrItems ) { | |
5822 int compareResult = uprv_strcmp((char *)sortKeyA, (char *)so
rtKeyB); | |
5823 if (compareResult >= 0) { | |
5824 log_err("ERROR in sort key comparison result, expected -
1, got %d\n", compareResult); | |
5825 } | |
5826 } | |
5827 } | |
5828 } else { | |
5829 log_err_status(status, "ERROR in ucol_setAttribute UCOL_CASE_LEVEL o
n: %s\n", myErrorName(status)); | |
5830 } | |
5831 ucol_close(ucol); | |
5832 } else { | |
5833 log_err_status(status, "ERROR in ucol_open for root: %s\n", myErrorName(
status)); | |
5834 } | |
5835 } | |
5836 | |
5837 /* Test for #10595 */ | |
5838 static const UChar testJapaneseName[] = {0x4F50, 0x3005, 0x6728, 0x002C, 0x6B66,
0}; /* Sa sa Ki, Takeshi */ | |
5839 #define KEY_PART_SIZE 16 | |
5840 | |
5841 static void TestNextSortKeyPartJaIdentical(void) | |
5842 { | |
5843 UErrorCode status = U_ZERO_ERROR; | |
5844 UCollator *coll; | |
5845 uint8_t keyPart[KEY_PART_SIZE]; | |
5846 UCharIterator iter; | |
5847 uint32_t state[2] = {0, 0}; | |
5848 int32_t keyPartLen; | |
5849 | |
5850 coll = ucol_open("ja", &status); | |
5851 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_IDENTICAL, &status); | |
5852 if (U_FAILURE(status)) { | |
5853 log_err_status(status, "ERROR: in creation of Japanese collator with ide
ntical strength: %s\n", myErrorName(status)); | |
5854 return; | |
5855 } | |
5856 | |
5857 uiter_setString(&iter, testJapaneseName, 5); | |
5858 keyPartLen = KEY_PART_SIZE; | |
5859 while (keyPartLen == KEY_PART_SIZE) { | |
5860 keyPartLen = ucol_nextSortKeyPart(coll, &iter, state, keyPart, KEY_PART_
SIZE, &status); | |
5861 if (U_FAILURE(status)) { | |
5862 log_err_status(status, "ERROR: in iterating next sort key part: %s\n
", myErrorName(status)); | |
5863 break; | |
5864 } | |
5865 } | |
5866 | |
5867 ucol_close(coll); | |
5868 } | |
5869 | |
5870 #define TEST(x) addTest(root, &x, "tscoll/cmsccoll/" # x) | |
5871 | |
5872 void addMiscCollTest(TestNode** root) | |
5873 { | |
5874 TEST(TestRuleOptions); | |
5875 TEST(TestBeforePrefixFailure); | |
5876 TEST(TestContractionClosure); | |
5877 TEST(TestPrefixCompose); | |
5878 TEST(TestStrCollIdenticalPrefix); | |
5879 TEST(TestPrefix); | |
5880 TEST(TestNewJapanese); | |
5881 /*TEST(TestLimitations);*/ | |
5882 TEST(TestNonChars); | |
5883 TEST(TestExtremeCompression); | |
5884 TEST(TestSurrogates); | |
5885 TEST(TestVariableTopSetting); | |
5886 TEST(TestMaxVariable); | |
5887 TEST(TestBocsuCoverage); | |
5888 TEST(TestCyrillicTailoring); | |
5889 TEST(TestCase); | |
5890 TEST(IncompleteCntTest); | |
5891 TEST(BlackBirdTest); | |
5892 TEST(FunkyATest); | |
5893 TEST(BillFairmanTest); | |
5894 TEST(TestChMove); | |
5895 TEST(TestImplicitTailoring); | |
5896 TEST(TestFCDProblem); | |
5897 TEST(TestEmptyRule); | |
5898 /*TEST(TestJ784);*/ /* 'zh' locale has changed - now it is getting tested by
TestBeforePinyin */ | |
5899 TEST(TestJ815); | |
5900 /*TEST(TestJ831);*/ /* we changed lv locale */ | |
5901 TEST(TestBefore); | |
5902 TEST(TestHangulTailoring); | |
5903 TEST(TestUCARules); | |
5904 TEST(TestIncrementalNormalize); | |
5905 TEST(TestComposeDecompose); | |
5906 TEST(TestCompressOverlap); | |
5907 TEST(TestContraction); | |
5908 TEST(TestExpansion); | |
5909 /*TEST(PrintMarkDavis);*/ /* this test doesn't test - just prints sortkeys *
/ | |
5910 /*TEST(TestGetCaseBit);*/ /*this one requires internal things to be exported
*/ | |
5911 TEST(TestOptimize); | |
5912 TEST(TestSuppressContractions); | |
5913 TEST(Alexis2); | |
5914 TEST(TestHebrewUCA); | |
5915 TEST(TestPartialSortKeyTermination); | |
5916 TEST(TestSettings); | |
5917 TEST(TestEquals); | |
5918 TEST(TestJ2726); | |
5919 TEST(NullRule); | |
5920 TEST(TestNumericCollation); | |
5921 TEST(TestTibetanConformance); | |
5922 TEST(TestPinyinProblem); | |
5923 TEST(TestSeparateTrees); | |
5924 TEST(TestBeforePinyin); | |
5925 TEST(TestBeforeTightening); | |
5926 /*TEST(TestMoreBefore);*/ | |
5927 TEST(TestTailorNULL); | |
5928 TEST(TestUpperFirstQuaternary); | |
5929 TEST(TestJ4960); | |
5930 TEST(TestJ5223); | |
5931 TEST(TestJ5232); | |
5932 TEST(TestJ5367); | |
5933 TEST(TestHiragana); | |
5934 TEST(TestSortKeyConsistency); | |
5935 TEST(TestVI5913); /* VI, RO tailored rules */ | |
5936 TEST(TestCroatianSortKey); | |
5937 TEST(TestTailor6179); | |
5938 TEST(TestUCAPrecontext); | |
5939 TEST(TestOutOfBuffer5468); | |
5940 TEST(TestSameStrengthList); | |
5941 | |
5942 TEST(TestSameStrengthListQuoted); | |
5943 TEST(TestSameStrengthListSupplemental); | |
5944 TEST(TestSameStrengthListQwerty); | |
5945 TEST(TestSameStrengthListQuotedQwerty); | |
5946 TEST(TestSameStrengthListRanges); | |
5947 TEST(TestSameStrengthListSupplementalRanges); | |
5948 TEST(TestSpecialCharacters); | |
5949 TEST(TestPrivateUseCharacters); | |
5950 TEST(TestPrivateUseCharactersInList); | |
5951 TEST(TestPrivateUseCharactersInRange); | |
5952 TEST(TestInvalidListsAndRanges); | |
5953 TEST(TestImportRulesDeWithPhonebook); | |
5954 /* TEST(TestImportRulesFiWithEor); EOR rules removed from CLDR 21 */ | |
5955 /* TEST(TestImportRulesCJKWithUnihan); */ | |
5956 TEST(TestImport); | |
5957 TEST(TestImportWithType); | |
5958 | |
5959 TEST(TestBeforeRuleWithScriptReordering); | |
5960 TEST(TestNonLeadBytesDuringCollationReordering); | |
5961 TEST(TestReorderingAPI); | |
5962 TEST(TestReorderingAPIWithRuleCreatedCollator); | |
5963 TEST(TestEquivalentReorderingScripts); | |
5964 TEST(TestGreekFirstReorder); | |
5965 TEST(TestGreekLastReorder); | |
5966 TEST(TestNonScriptReorder); | |
5967 TEST(TestHaniReorder); | |
5968 TEST(TestHaniReorderWithOtherRules); | |
5969 TEST(TestMultipleReorder); | |
5970 TEST(TestReorderingAcrossCloning); | |
5971 TEST(TestReorderWithNumericCollation); | |
5972 | |
5973 TEST(TestCaseLevelBufferOverflow); | |
5974 TEST(TestNextSortKeyPartJaIdentical); | |
5975 } | |
5976 | |
5977 #endif /* #if !UCONFIG_NO_COLLATION */ | |
OLD | NEW |