OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 /*******************************************************************************
* |
| 7 * |
| 8 * File CITERTST.C |
| 9 * |
| 10 * Modification History: |
| 11 * Date Name Description |
| 12 * Madhu Katragadda Ported for C API |
| 13 * 02/19/01 synwee Modified test case for new collation iterator |
| 14 ********************************************************************************
*/ |
| 15 /* |
| 16 * Collation Iterator tests. |
| 17 * (Let me reiterate my position...) |
| 18 */ |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 |
| 22 #if !UCONFIG_NO_COLLATION |
| 23 |
| 24 #include "unicode/ucol.h" |
| 25 #include "unicode/uloc.h" |
| 26 #include "unicode/uchar.h" |
| 27 #include "unicode/ustring.h" |
| 28 #include "unicode/putil.h" |
| 29 #include "callcoll.h" |
| 30 #include "cmemory.h" |
| 31 #include "cintltst.h" |
| 32 #include "citertst.h" |
| 33 #include "ccolltst.h" |
| 34 #include "filestrm.h" |
| 35 #include "cstring.h" |
| 36 #include "ucol_imp.h" |
| 37 #include "ucol_tok.h" |
| 38 #include "uparse.h" |
| 39 #include <stdio.h> |
| 40 |
| 41 extern uint8_t ucol_uprv_getCaseBits(const UChar *, uint32_t, UErrorCode *); |
| 42 |
| 43 void addCollIterTest(TestNode** root) |
| 44 { |
| 45 addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious"); |
| 46 addTest(root, &TestOffset, "tscoll/citertst/TestOffset"); |
| 47 addTest(root, &TestSetText, "tscoll/citertst/TestSetText"); |
| 48 addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion"); |
| 49 addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar"); |
| 50 addTest(root, &TestNormalizedUnicodeChar, |
| 51 "tscoll/citertst/TestNormalizedUnicodeChar"); |
| 52 addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization"); |
| 53 addTest(root, &TestBug672, "tscoll/citertst/TestBug672"); |
| 54 addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize"); |
| 55 addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer"); |
| 56 addTest(root, &TestCEs, "tscoll/citertst/TestCEs"); |
| 57 addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos"); |
| 58 addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow")
; |
| 59 addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity"); |
| 60 addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity"); |
| 61 } |
| 62 |
| 63 /* The locales we support */ |
| 64 |
| 65 static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"}; |
| 66 |
| 67 static void TestBug672() { |
| 68 UErrorCode status = U_ZERO_ERROR; |
| 69 UChar pattern[20]; |
| 70 UChar text[50]; |
| 71 int i; |
| 72 int result[3][3]; |
| 73 |
| 74 u_uastrcpy(pattern, "resume"); |
| 75 u_uastrcpy(text, "Time to resume updating my resume."); |
| 76 |
| 77 for (i = 0; i < 3; ++ i) { |
| 78 UCollator *coll = ucol_open(LOCALES[i], &status); |
| 79 UCollationElements *pitr = ucol_openElements(coll, pattern, -1, |
| 80 &status); |
| 81 UCollationElements *titer = ucol_openElements(coll, text, -1, |
| 82 &status); |
| 83 if (U_FAILURE(status)) { |
| 84 log_err_status(status, "ERROR: in creation of either the collator or
the collation iterator :%s\n", |
| 85 myErrorName(status)); |
| 86 return; |
| 87 } |
| 88 |
| 89 log_verbose("locale tested %s\n", LOCALES[i]); |
| 90 |
| 91 while (ucol_next(pitr, &status) != UCOL_NULLORDER && |
| 92 U_SUCCESS(status)) { |
| 93 } |
| 94 if (U_FAILURE(status)) { |
| 95 log_err("ERROR: reversing collation iterator :%s\n", |
| 96 myErrorName(status)); |
| 97 return; |
| 98 } |
| 99 ucol_reset(pitr); |
| 100 |
| 101 ucol_setOffset(titer, u_strlen(pattern), &status); |
| 102 if (U_FAILURE(status)) { |
| 103 log_err("ERROR: setting offset in collator :%s\n", |
| 104 myErrorName(status)); |
| 105 return; |
| 106 } |
| 107 result[i][0] = ucol_getOffset(titer); |
| 108 log_verbose("Text iterator set to offset %d\n", result[i][0]); |
| 109 |
| 110 /* Use previous() */ |
| 111 ucol_previous(titer, &status); |
| 112 result[i][1] = ucol_getOffset(titer); |
| 113 log_verbose("Current offset %d after previous\n", result[i][1]); |
| 114 |
| 115 /* Add one to index */ |
| 116 log_verbose("Adding one to current offset...\n"); |
| 117 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); |
| 118 if (U_FAILURE(status)) { |
| 119 log_err("ERROR: setting offset in collator :%s\n", |
| 120 myErrorName(status)); |
| 121 return; |
| 122 } |
| 123 result[i][2] = ucol_getOffset(titer); |
| 124 log_verbose("Current offset in text = %d\n", result[i][2]); |
| 125 ucol_closeElements(pitr); |
| 126 ucol_closeElements(titer); |
| 127 ucol_close(coll); |
| 128 } |
| 129 |
| 130 if (uprv_memcmp(result[0], result[1], 3) != 0 || |
| 131 uprv_memcmp(result[1], result[2], 3) != 0) { |
| 132 log_err("ERROR: Different locales have different offsets at the same cha
racter\n"); |
| 133 } |
| 134 } |
| 135 |
| 136 |
| 137 |
| 138 /* Running this test with normalization enabled showed up a bug in the incremen
tal |
| 139 normalization code. */ |
| 140 static void TestBug672Normalize() { |
| 141 UErrorCode status = U_ZERO_ERROR; |
| 142 UChar pattern[20]; |
| 143 UChar text[50]; |
| 144 int i; |
| 145 int result[3][3]; |
| 146 |
| 147 u_uastrcpy(pattern, "resume"); |
| 148 u_uastrcpy(text, "Time to resume updating my resume."); |
| 149 |
| 150 for (i = 0; i < 3; ++ i) { |
| 151 UCollator *coll = ucol_open(LOCALES[i], &status); |
| 152 UCollationElements *pitr = NULL; |
| 153 UCollationElements *titer = NULL; |
| 154 |
| 155 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 156 |
| 157 pitr = ucol_openElements(coll, pattern, -1, &status); |
| 158 titer = ucol_openElements(coll, text, -1, &status); |
| 159 if (U_FAILURE(status)) { |
| 160 log_err_status(status, "ERROR: in creation of either the collator or
the collation iterator :%s\n", |
| 161 myErrorName(status)); |
| 162 return; |
| 163 } |
| 164 |
| 165 log_verbose("locale tested %s\n", LOCALES[i]); |
| 166 |
| 167 while (ucol_next(pitr, &status) != UCOL_NULLORDER && |
| 168 U_SUCCESS(status)) { |
| 169 } |
| 170 if (U_FAILURE(status)) { |
| 171 log_err("ERROR: reversing collation iterator :%s\n", |
| 172 myErrorName(status)); |
| 173 return; |
| 174 } |
| 175 ucol_reset(pitr); |
| 176 |
| 177 ucol_setOffset(titer, u_strlen(pattern), &status); |
| 178 if (U_FAILURE(status)) { |
| 179 log_err("ERROR: setting offset in collator :%s\n", |
| 180 myErrorName(status)); |
| 181 return; |
| 182 } |
| 183 result[i][0] = ucol_getOffset(titer); |
| 184 log_verbose("Text iterator set to offset %d\n", result[i][0]); |
| 185 |
| 186 /* Use previous() */ |
| 187 ucol_previous(titer, &status); |
| 188 result[i][1] = ucol_getOffset(titer); |
| 189 log_verbose("Current offset %d after previous\n", result[i][1]); |
| 190 |
| 191 /* Add one to index */ |
| 192 log_verbose("Adding one to current offset...\n"); |
| 193 ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status); |
| 194 if (U_FAILURE(status)) { |
| 195 log_err("ERROR: setting offset in collator :%s\n", |
| 196 myErrorName(status)); |
| 197 return; |
| 198 } |
| 199 result[i][2] = ucol_getOffset(titer); |
| 200 log_verbose("Current offset in text = %d\n", result[i][2]); |
| 201 ucol_closeElements(pitr); |
| 202 ucol_closeElements(titer); |
| 203 ucol_close(coll); |
| 204 } |
| 205 |
| 206 if (uprv_memcmp(result[0], result[1], 3) != 0 || |
| 207 uprv_memcmp(result[1], result[2], 3) != 0) { |
| 208 log_err("ERROR: Different locales have different offsets at the same cha
racter\n"); |
| 209 } |
| 210 } |
| 211 |
| 212 |
| 213 |
| 214 |
| 215 /** |
| 216 * Test for CollationElementIterator previous and next for the whole set of |
| 217 * unicode characters. |
| 218 */ |
| 219 static void TestUnicodeChar() |
| 220 { |
| 221 UChar source[0x100]; |
| 222 UCollator *en_us; |
| 223 UCollationElements *iter; |
| 224 UErrorCode status = U_ZERO_ERROR; |
| 225 UChar codepoint; |
| 226 |
| 227 UChar *test; |
| 228 en_us = ucol_open("en_US", &status); |
| 229 if (U_FAILURE(status)){ |
| 230 log_err_status(status, "ERROR: in creation of collation data using ucol_o
pen()\n %s\n", |
| 231 myErrorName(status)); |
| 232 return; |
| 233 } |
| 234 |
| 235 for (codepoint = 1; codepoint < 0xFFFE;) |
| 236 { |
| 237 test = source; |
| 238 |
| 239 while (codepoint % 0xFF != 0) |
| 240 { |
| 241 if (u_isdefined(codepoint)) |
| 242 *(test ++) = codepoint; |
| 243 codepoint ++; |
| 244 } |
| 245 |
| 246 if (u_isdefined(codepoint)) |
| 247 *(test ++) = codepoint; |
| 248 |
| 249 if (codepoint != 0xFFFF) |
| 250 codepoint ++; |
| 251 |
| 252 *test = 0; |
| 253 iter=ucol_openElements(en_us, source, u_strlen(source), &status); |
| 254 if(U_FAILURE(status)){ |
| 255 log_err("ERROR: in creation of collation element iterator using ucol_o
penElements()\n %s\n", |
| 256 myErrorName(status)); |
| 257 ucol_close(en_us); |
| 258 return; |
| 259 } |
| 260 /* A basic test to see if it's working at all */ |
| 261 log_verbose("codepoint testing %x\n", codepoint); |
| 262 backAndForth(iter); |
| 263 ucol_closeElements(iter); |
| 264 |
| 265 /* null termination test */ |
| 266 iter=ucol_openElements(en_us, source, -1, &status); |
| 267 if(U_FAILURE(status)){ |
| 268 log_err("ERROR: in creation of collation element iterator using ucol_o
penElements()\n %s\n", |
| 269 myErrorName(status)); |
| 270 ucol_close(en_us); |
| 271 return; |
| 272 } |
| 273 /* A basic test to see if it's working at all */ |
| 274 backAndForth(iter); |
| 275 ucol_closeElements(iter); |
| 276 } |
| 277 |
| 278 ucol_close(en_us); |
| 279 } |
| 280 |
| 281 /** |
| 282 * Test for CollationElementIterator previous and next for the whole set of |
| 283 * unicode characters with normalization on. |
| 284 */ |
| 285 static void TestNormalizedUnicodeChar() |
| 286 { |
| 287 UChar source[0x100]; |
| 288 UCollator *th_th; |
| 289 UCollationElements *iter; |
| 290 UErrorCode status = U_ZERO_ERROR; |
| 291 UChar codepoint; |
| 292 |
| 293 UChar *test; |
| 294 /* thai should have normalization on */ |
| 295 th_th = ucol_open("th_TH", &status); |
| 296 if (U_FAILURE(status)){ |
| 297 log_err_status(status, "ERROR: in creation of thai collation using ucol_
open()\n %s\n", |
| 298 myErrorName(status)); |
| 299 return; |
| 300 } |
| 301 |
| 302 for (codepoint = 1; codepoint < 0xFFFE;) |
| 303 { |
| 304 test = source; |
| 305 |
| 306 while (codepoint % 0xFF != 0) |
| 307 { |
| 308 if (u_isdefined(codepoint)) |
| 309 *(test ++) = codepoint; |
| 310 codepoint ++; |
| 311 } |
| 312 |
| 313 if (u_isdefined(codepoint)) |
| 314 *(test ++) = codepoint; |
| 315 |
| 316 if (codepoint != 0xFFFF) |
| 317 codepoint ++; |
| 318 |
| 319 *test = 0; |
| 320 iter=ucol_openElements(th_th, source, u_strlen(source), &status); |
| 321 if(U_FAILURE(status)){ |
| 322 log_err("ERROR: in creation of collation element iterator using ucol_o
penElements()\n %s\n", |
| 323 myErrorName(status)); |
| 324 ucol_close(th_th); |
| 325 return; |
| 326 } |
| 327 |
| 328 backAndForth(iter); |
| 329 ucol_closeElements(iter); |
| 330 |
| 331 iter=ucol_openElements(th_th, source, -1, &status); |
| 332 if(U_FAILURE(status)){ |
| 333 log_err("ERROR: in creation of collation element iterator using ucol_o
penElements()\n %s\n", |
| 334 myErrorName(status)); |
| 335 ucol_close(th_th); |
| 336 return; |
| 337 } |
| 338 |
| 339 backAndForth(iter); |
| 340 ucol_closeElements(iter); |
| 341 } |
| 342 |
| 343 ucol_close(th_th); |
| 344 } |
| 345 |
| 346 /** |
| 347 * Test the incremental normalization |
| 348 */ |
| 349 static void TestNormalization() |
| 350 { |
| 351 UErrorCode status = U_ZERO_ERROR; |
| 352 const char *str = |
| 353 "&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0
315B < \\u0316\\u0300\\u0315"; |
| 354 UCollator *coll; |
| 355 UChar rule[50]; |
| 356 int rulelen = u_unescape(str, rule, 50); |
| 357 int count = 0; |
| 358 const char *testdata[] = |
| 359 {"\\u1ED9", "o\\u0323\\u0302", |
| 360 "\\u0300\\u0315", "\\u0315\\u0300", |
| 361 "A\\u0300\\u0315B", "A\\u0315\\u0300B", |
| 362 "A\\u0316\\u0315B", "A\\u0315\\u0316B", |
| 363 "\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316", |
| 364 "A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B", |
| 365 "\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"}; |
| 366 int32_t srclen; |
| 367 UChar source[10]; |
| 368 UCollationElements *iter; |
| 369 |
| 370 coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status); |
| 371 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 372 if (U_FAILURE(status)){ |
| 373 log_err_status(status, "ERROR: in creation of collator using ucol_openRu
les()\n %s\n", |
| 374 myErrorName(status)); |
| 375 return; |
| 376 } |
| 377 |
| 378 srclen = u_unescape(testdata[0], source, 10); |
| 379 iter = ucol_openElements(coll, source, srclen, &status); |
| 380 backAndForth(iter); |
| 381 ucol_closeElements(iter); |
| 382 |
| 383 srclen = u_unescape(testdata[1], source, 10); |
| 384 iter = ucol_openElements(coll, source, srclen, &status); |
| 385 backAndForth(iter); |
| 386 ucol_closeElements(iter); |
| 387 |
| 388 while (count < 12) { |
| 389 srclen = u_unescape(testdata[count], source, 10); |
| 390 iter = ucol_openElements(coll, source, srclen, &status); |
| 391 |
| 392 if (U_FAILURE(status)){ |
| 393 log_err("ERROR: in creation of collator element iterator\n %s\n", |
| 394 myErrorName(status)); |
| 395 return; |
| 396 } |
| 397 backAndForth(iter); |
| 398 ucol_closeElements(iter); |
| 399 |
| 400 iter = ucol_openElements(coll, source, -1, &status); |
| 401 |
| 402 if (U_FAILURE(status)){ |
| 403 log_err("ERROR: in creation of collator element iterator\n %s\n", |
| 404 myErrorName(status)); |
| 405 return; |
| 406 } |
| 407 backAndForth(iter); |
| 408 ucol_closeElements(iter); |
| 409 count ++; |
| 410 } |
| 411 ucol_close(coll); |
| 412 } |
| 413 |
| 414 /** |
| 415 * Test for CollationElementIterator.previous() |
| 416 * |
| 417 * @bug 4108758 - Make sure it works with contracting characters |
| 418 * |
| 419 */ |
| 420 static void TestPrevious() |
| 421 { |
| 422 UCollator *coll=NULL; |
| 423 UChar rule[50]; |
| 424 UChar *source; |
| 425 UCollator *c1, *c2, *c3; |
| 426 UCollationElements *iter; |
| 427 UErrorCode status = U_ZERO_ERROR; |
| 428 UChar test1[50]; |
| 429 UChar test2[50]; |
| 430 |
| 431 u_uastrcpy(test1, "What subset of all possible test cases?"); |
| 432 u_uastrcpy(test2, "has the highest probability of detecting"); |
| 433 coll = ucol_open("en_US", &status); |
| 434 |
| 435 iter=ucol_openElements(coll, test1, u_strlen(test1), &status); |
| 436 log_verbose("English locale testing back and forth\n"); |
| 437 if(U_FAILURE(status)){ |
| 438 log_err_status(status, "ERROR: in creation of collation element iterator
using ucol_openElements()\n %s\n", |
| 439 myErrorName(status)); |
| 440 ucol_close(coll); |
| 441 return; |
| 442 } |
| 443 /* A basic test to see if it's working at all */ |
| 444 backAndForth(iter); |
| 445 ucol_closeElements(iter); |
| 446 ucol_close(coll); |
| 447 |
| 448 /* Test with a contracting character sequence */ |
| 449 u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); |
| 450 c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N
ULL, &status); |
| 451 |
| 452 log_verbose("Contraction rule testing back and forth with no normalization\n
"); |
| 453 |
| 454 if (c1 == NULL || U_FAILURE(status)) |
| 455 { |
| 456 log_err("Couldn't create a RuleBasedCollator with a contracting sequence
\n %s\n", |
| 457 myErrorName(status)); |
| 458 return; |
| 459 } |
| 460 source=(UChar*)malloc(sizeof(UChar) * 20); |
| 461 u_uastrcpy(source, "abchdcba"); |
| 462 iter=ucol_openElements(c1, source, u_strlen(source), &status); |
| 463 if(U_FAILURE(status)){ |
| 464 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 465 myErrorName(status)); |
| 466 return; |
| 467 } |
| 468 backAndForth(iter); |
| 469 ucol_closeElements(iter); |
| 470 ucol_close(c1); |
| 471 |
| 472 /* Test with an expanding character sequence */ |
| 473 u_uastrcpy(rule, "&a < b < c/abd < d"); |
| 474 c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, N
ULL, &status); |
| 475 log_verbose("Expansion rule testing back and forth with no normalization\n")
; |
| 476 if (c2 == NULL || U_FAILURE(status)) |
| 477 { |
| 478 log_err("Couldn't create a RuleBasedCollator with a contracting sequence
.\n %s\n", |
| 479 myErrorName(status)); |
| 480 return; |
| 481 } |
| 482 u_uastrcpy(source, "abcd"); |
| 483 iter=ucol_openElements(c2, source, u_strlen(source), &status); |
| 484 if(U_FAILURE(status)){ |
| 485 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 486 myErrorName(status)); |
| 487 return; |
| 488 } |
| 489 backAndForth(iter); |
| 490 ucol_closeElements(iter); |
| 491 ucol_close(c2); |
| 492 /* Now try both */ |
| 493 u_uastrcpy(rule, "&a < b < c/aba < d < z < ch"); |
| 494 c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENG
TH,NULL, &status); |
| 495 log_verbose("Expansion/contraction rule testing back and forth with no norma
lization\n"); |
| 496 |
| 497 if (c3 == NULL || U_FAILURE(status)) |
| 498 { |
| 499 log_err("Couldn't create a RuleBasedCollator with a contracting sequence
.\n %s\n", |
| 500 myErrorName(status)); |
| 501 return; |
| 502 } |
| 503 u_uastrcpy(source, "abcdbchdc"); |
| 504 iter=ucol_openElements(c3, source, u_strlen(source), &status); |
| 505 if(U_FAILURE(status)){ |
| 506 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 507 myErrorName(status)); |
| 508 return; |
| 509 } |
| 510 backAndForth(iter); |
| 511 ucol_closeElements(iter); |
| 512 ucol_close(c3); |
| 513 source[0] = 0x0e41; |
| 514 source[1] = 0x0e02; |
| 515 source[2] = 0x0e41; |
| 516 source[3] = 0x0e02; |
| 517 source[4] = 0x0e27; |
| 518 source[5] = 0x61; |
| 519 source[6] = 0x62; |
| 520 source[7] = 0x63; |
| 521 source[8] = 0; |
| 522 |
| 523 coll = ucol_open("th_TH", &status); |
| 524 log_verbose("Thai locale testing back and forth with normalization\n"); |
| 525 iter=ucol_openElements(coll, source, u_strlen(source), &status); |
| 526 if(U_FAILURE(status)){ |
| 527 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 528 myErrorName(status)); |
| 529 return; |
| 530 } |
| 531 backAndForth(iter); |
| 532 ucol_closeElements(iter); |
| 533 ucol_close(coll); |
| 534 |
| 535 /* prev test */ |
| 536 source[0] = 0x0061; |
| 537 source[1] = 0x30CF; |
| 538 source[2] = 0x3099; |
| 539 source[3] = 0x30FC; |
| 540 source[4] = 0; |
| 541 |
| 542 coll = ucol_open("ja_JP", &status); |
| 543 log_verbose("Japanese locale testing back and forth with normalization\n"); |
| 544 iter=ucol_openElements(coll, source, u_strlen(source), &status); |
| 545 if(U_FAILURE(status)){ |
| 546 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 547 myErrorName(status)); |
| 548 return; |
| 549 } |
| 550 backAndForth(iter); |
| 551 ucol_closeElements(iter); |
| 552 ucol_close(coll); |
| 553 |
| 554 free(source); |
| 555 } |
| 556 |
| 557 /** |
| 558 * Test for getOffset() and setOffset() |
| 559 */ |
| 560 static void TestOffset() |
| 561 { |
| 562 UErrorCode status= U_ZERO_ERROR; |
| 563 UCollator *en_us=NULL; |
| 564 UCollationElements *iter, *pristine; |
| 565 int32_t offset; |
| 566 OrderAndOffset *orders; |
| 567 int32_t orderLength=0; |
| 568 int count = 0; |
| 569 UChar test1[50]; |
| 570 UChar test2[50]; |
| 571 |
| 572 u_uastrcpy(test1, "What subset of all possible test cases?"); |
| 573 u_uastrcpy(test2, "has the highest probability of detecting"); |
| 574 en_us = ucol_open("en_US", &status); |
| 575 log_verbose("Testing getOffset and setOffset for collations\n"); |
| 576 iter = ucol_openElements(en_us, test1, u_strlen(test1), &status); |
| 577 if(U_FAILURE(status)){ |
| 578 log_err_status(status, "ERROR: in creation of collation element iterator
using ucol_openElements()\n %s\n", |
| 579 myErrorName(status)); |
| 580 ucol_close(en_us); |
| 581 return; |
| 582 } |
| 583 |
| 584 /* testing boundaries */ |
| 585 ucol_setOffset(iter, 0, &status); |
| 586 if (U_FAILURE(status) || ucol_previous(iter, &status) != UCOL_NULLORDER) { |
| 587 log_err("Error: After setting offset to 0, we should be at the end " |
| 588 "of the backwards iteration"); |
| 589 } |
| 590 ucol_setOffset(iter, u_strlen(test1), &status); |
| 591 if (U_FAILURE(status) || ucol_next(iter, &status) != UCOL_NULLORDER) { |
| 592 log_err("Error: After setting offset to end of the string, we should " |
| 593 "be at the end of the backwards iteration"); |
| 594 } |
| 595 |
| 596 /* Run all the way through the iterator, then get the offset */ |
| 597 |
| 598 orders = getOrders(iter, &orderLength); |
| 599 |
| 600 offset = ucol_getOffset(iter); |
| 601 |
| 602 if (offset != u_strlen(test1)) |
| 603 { |
| 604 log_err("offset at end != length %d vs %d\n", offset, |
| 605 u_strlen(test1) ); |
| 606 } |
| 607 |
| 608 /* Now set the offset back to the beginning and see if it works */ |
| 609 pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status); |
| 610 if(U_FAILURE(status)){ |
| 611 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 612 myErrorName(status)); |
| 613 ucol_close(en_us); |
| 614 return; |
| 615 } |
| 616 status = U_ZERO_ERROR; |
| 617 |
| 618 ucol_setOffset(iter, 0, &status); |
| 619 if (U_FAILURE(status)) |
| 620 { |
| 621 log_err("setOffset failed. %s\n", myErrorName(status)); |
| 622 } |
| 623 else |
| 624 { |
| 625 assertEqual(iter, pristine); |
| 626 } |
| 627 |
| 628 ucol_closeElements(pristine); |
| 629 ucol_closeElements(iter); |
| 630 free(orders); |
| 631 |
| 632 /* testing offsets in normalization buffer */ |
| 633 test1[0] = 0x61; |
| 634 test1[1] = 0x300; |
| 635 test1[2] = 0x316; |
| 636 test1[3] = 0x62; |
| 637 test1[4] = 0; |
| 638 ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); |
| 639 iter = ucol_openElements(en_us, test1, 4, &status); |
| 640 if(U_FAILURE(status)){ |
| 641 log_err("ERROR: in creation of collation element iterator using ucol_ope
nElements()\n %s\n", |
| 642 myErrorName(status)); |
| 643 ucol_close(en_us); |
| 644 return; |
| 645 } |
| 646 |
| 647 count = 0; |
| 648 while (ucol_next(iter, &status) != UCOL_NULLORDER && |
| 649 U_SUCCESS(status)) { |
| 650 switch (count) { |
| 651 case 0: |
| 652 if (ucol_getOffset(iter) != 1) { |
| 653 log_err("ERROR: Offset of iteration should be 1\n"); |
| 654 } |
| 655 break; |
| 656 case 3: |
| 657 if (ucol_getOffset(iter) != 4) { |
| 658 log_err("ERROR: Offset of iteration should be 4\n"); |
| 659 } |
| 660 break; |
| 661 default: |
| 662 if (ucol_getOffset(iter) != 3) { |
| 663 log_err("ERROR: Offset of iteration should be 3\n"); |
| 664 } |
| 665 } |
| 666 count ++; |
| 667 } |
| 668 |
| 669 ucol_reset(iter); |
| 670 count = 0; |
| 671 while (ucol_previous(iter, &status) != UCOL_NULLORDER && |
| 672 U_SUCCESS(status)) { |
| 673 switch (count) { |
| 674 case 0: |
| 675 case 1: |
| 676 if (ucol_getOffset(iter) != 3) { |
| 677 log_err("ERROR: Offset of iteration should be 3\n"); |
| 678 } |
| 679 break; |
| 680 case 2: |
| 681 if (ucol_getOffset(iter) != 1) { |
| 682 log_err("ERROR: Offset of iteration should be 1\n"); |
| 683 } |
| 684 break; |
| 685 default: |
| 686 if (ucol_getOffset(iter) != 0) { |
| 687 log_err("ERROR: Offset of iteration should be 0\n"); |
| 688 } |
| 689 } |
| 690 count ++; |
| 691 } |
| 692 |
| 693 if(U_FAILURE(status)){ |
| 694 log_err("ERROR: in iterating collation elements %s\n", |
| 695 myErrorName(status)); |
| 696 } |
| 697 |
| 698 ucol_closeElements(iter); |
| 699 ucol_close(en_us); |
| 700 } |
| 701 |
| 702 /** |
| 703 * Test for setText() |
| 704 */ |
| 705 static void TestSetText() |
| 706 { |
| 707 int32_t c,i; |
| 708 UErrorCode status = U_ZERO_ERROR; |
| 709 UCollator *en_us=NULL; |
| 710 UCollationElements *iter1, *iter2; |
| 711 UChar test1[50]; |
| 712 UChar test2[50]; |
| 713 |
| 714 u_uastrcpy(test1, "What subset of all possible test cases?"); |
| 715 u_uastrcpy(test2, "has the highest probability of detecting"); |
| 716 en_us = ucol_open("en_US", &status); |
| 717 log_verbose("testing setText for Collation elements\n"); |
| 718 iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status); |
| 719 if(U_FAILURE(status)){ |
| 720 log_err_status(status, "ERROR: in creation of collation element iterator
1 using ucol_openElements()\n %s\n", |
| 721 myErrorName(status)); |
| 722 ucol_close(en_us); |
| 723 return; |
| 724 } |
| 725 iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status); |
| 726 if(U_FAILURE(status)){ |
| 727 log_err("ERROR: in creation of collation element iterator2 using ucol_op
enElements()\n %s\n", |
| 728 myErrorName(status)); |
| 729 ucol_close(en_us); |
| 730 return; |
| 731 } |
| 732 |
| 733 /* Run through the second iterator just to exercise it */ |
| 734 c = ucol_next(iter2, &status); |
| 735 i = 0; |
| 736 |
| 737 while ( ++i < 10 && (c != UCOL_NULLORDER)) |
| 738 { |
| 739 if (U_FAILURE(status)) |
| 740 { |
| 741 log_err("iter2->next() returned an error. %s\n", myErrorName(status)
); |
| 742 ucol_closeElements(iter2); |
| 743 ucol_closeElements(iter1); |
| 744 ucol_close(en_us); |
| 745 return; |
| 746 } |
| 747 |
| 748 c = ucol_next(iter2, &status); |
| 749 } |
| 750 |
| 751 /* Now set it to point to the same string as the first iterator */ |
| 752 ucol_setText(iter2, test1, u_strlen(test1), &status); |
| 753 if (U_FAILURE(status)) |
| 754 { |
| 755 log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status
)); |
| 756 } |
| 757 else |
| 758 { |
| 759 assertEqual(iter1, iter2); |
| 760 } |
| 761 |
| 762 /* Now set it to point to a null string with fake length*/ |
| 763 ucol_setText(iter2, NULL, 2, &status); |
| 764 if (U_FAILURE(status)) |
| 765 { |
| 766 log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status)
); |
| 767 } |
| 768 else |
| 769 { |
| 770 if (ucol_next(iter2, &status) != UCOL_NULLORDER) { |
| 771 log_err("iter2 with null text expected to return UCOL_NULLORDER\n"); |
| 772 } |
| 773 } |
| 774 |
| 775 ucol_closeElements(iter2); |
| 776 ucol_closeElements(iter1); |
| 777 ucol_close(en_us); |
| 778 } |
| 779 |
| 780 /** @bug 4108762 |
| 781 * Test for getMaxExpansion() |
| 782 */ |
| 783 static void TestMaxExpansion() |
| 784 { |
| 785 UErrorCode status = U_ZERO_ERROR; |
| 786 UCollator *coll ;/*= ucol_open("en_US", &status);*/ |
| 787 UChar ch = 0; |
| 788 UChar32 unassigned = 0xEFFFD; |
| 789 UChar supplementary[2]; |
| 790 uint32_t stringOffset = 0; |
| 791 UBool isError = FALSE; |
| 792 uint32_t sorder = 0; |
| 793 UCollationElements *iter ;/*= ucol_openElements(coll, &ch, 1, &status);*/ |
| 794 uint32_t temporder = 0; |
| 795 |
| 796 UChar rule[256]; |
| 797 u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch"); |
| 798 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, |
| 799 UCOL_DEFAULT_STRENGTH,NULL, &status); |
| 800 if(U_SUCCESS(status) && coll) { |
| 801 iter = ucol_openElements(coll, &ch, 1, &status); |
| 802 |
| 803 while (ch < 0xFFFF && U_SUCCESS(status)) { |
| 804 int count = 1; |
| 805 uint32_t order; |
| 806 int32_t size = 0; |
| 807 |
| 808 ch ++; |
| 809 |
| 810 ucol_setText(iter, &ch, 1, &status); |
| 811 order = ucol_previous(iter, &status); |
| 812 |
| 813 /* thai management */ |
| 814 if (order == 0) |
| 815 order = ucol_previous(iter, &status); |
| 816 |
| 817 while (U_SUCCESS(status) && |
| 818 ucol_previous(iter, &status) != UCOL_NULLORDER) { |
| 819 count ++; |
| 820 } |
| 821 |
| 822 size = ucol_getMaxExpansion(iter, order); |
| 823 if (U_FAILURE(status) || size < count) { |
| 824 log_err("Failure at codepoint %d, maximum expansion count < %d\n", |
| 825 ch, count); |
| 826 } |
| 827 } |
| 828 |
| 829 /* testing for exact max expansion */ |
| 830 ch = 0; |
| 831 while (ch < 0x61) { |
| 832 uint32_t order; |
| 833 int32_t size; |
| 834 ucol_setText(iter, &ch, 1, &status); |
| 835 order = ucol_previous(iter, &status); |
| 836 size = ucol_getMaxExpansion(iter, order); |
| 837 if (U_FAILURE(status) || size != 1) { |
| 838 log_err("Failure at codepoint %d, maximum expansion count < %d\n", |
| 839 ch, 1); |
| 840 } |
| 841 ch ++; |
| 842 } |
| 843 |
| 844 ch = 0x63; |
| 845 ucol_setText(iter, &ch, 1, &status); |
| 846 temporder = ucol_previous(iter, &status); |
| 847 |
| 848 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 3) { |
| 849 log_err("Failure at codepoint %d, maximum expansion count != %d\n", |
| 850 ch, 3); |
| 851 } |
| 852 |
| 853 ch = 0x64; |
| 854 ucol_setText(iter, &ch, 1, &status); |
| 855 temporder = ucol_previous(iter, &status); |
| 856 |
| 857 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 1) { |
| 858 log_err("Failure at codepoint %d, maximum expansion count != %d\n", |
| 859 ch, 3); |
| 860 } |
| 861 |
| 862 U16_APPEND(supplementary, stringOffset, 2, unassigned, isError); |
| 863 ucol_setText(iter, supplementary, 2, &status); |
| 864 sorder = ucol_previous(iter, &status); |
| 865 |
| 866 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, sorder) != 2) { |
| 867 log_err("Failure at codepoint %d, maximum expansion count < %d\n", |
| 868 ch, 2); |
| 869 } |
| 870 |
| 871 /* testing jamo */ |
| 872 ch = 0x1165; |
| 873 |
| 874 ucol_setText(iter, &ch, 1, &status); |
| 875 temporder = ucol_previous(iter, &status); |
| 876 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) > 3) { |
| 877 log_err("Failure at codepoint %d, maximum expansion count > %d\n", |
| 878 ch, 3); |
| 879 } |
| 880 |
| 881 ucol_closeElements(iter); |
| 882 ucol_close(coll); |
| 883 |
| 884 /* testing special jamo &a<\u1160 */ |
| 885 rule[0] = 0x26; |
| 886 rule[1] = 0x71; |
| 887 rule[2] = 0x3c; |
| 888 rule[3] = 0x1165; |
| 889 rule[4] = 0x2f; |
| 890 rule[5] = 0x71; |
| 891 rule[6] = 0x71; |
| 892 rule[7] = 0x71; |
| 893 rule[8] = 0x71; |
| 894 rule[9] = 0; |
| 895 |
| 896 coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, |
| 897 UCOL_DEFAULT_STRENGTH,NULL, &status); |
| 898 iter = ucol_openElements(coll, &ch, 1, &status); |
| 899 |
| 900 temporder = ucol_previous(iter, &status); |
| 901 if (U_FAILURE(status) || ucol_getMaxExpansion(iter, temporder) != 6) { |
| 902 log_err("Failure at codepoint %d, maximum expansion count > %d\n", |
| 903 ch, 5); |
| 904 } |
| 905 |
| 906 ucol_closeElements(iter); |
| 907 ucol_close(coll); |
| 908 } else { |
| 909 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu
s)); |
| 910 } |
| 911 |
| 912 } |
| 913 |
| 914 |
| 915 static void assertEqual(UCollationElements *i1, UCollationElements *i2) |
| 916 { |
| 917 int32_t c1, c2; |
| 918 int32_t count = 0; |
| 919 UErrorCode status = U_ZERO_ERROR; |
| 920 |
| 921 do |
| 922 { |
| 923 c1 = ucol_next(i1, &status); |
| 924 c2 = ucol_next(i2, &status); |
| 925 |
| 926 if (c1 != c2) |
| 927 { |
| 928 log_err("Error in iteration %d assetEqual between\n %d and %d, t
hey are not equal\n", count, c1, c2); |
| 929 break; |
| 930 } |
| 931 |
| 932 count += 1; |
| 933 } |
| 934 while (c1 != UCOL_NULLORDER); |
| 935 } |
| 936 |
| 937 /** |
| 938 * Testing iterators with extremely small buffers |
| 939 */ |
| 940 static void TestSmallBuffer() |
| 941 { |
| 942 UErrorCode status = U_ZERO_ERROR; |
| 943 UCollator *coll; |
| 944 UCollationElements *testiter, |
| 945 *iter; |
| 946 int32_t count = 0; |
| 947 OrderAndOffset *testorders, |
| 948 *orders; |
| 949 |
| 950 UChar teststr[500]; |
| 951 UChar str[] = {0x300, 0x31A, 0}; |
| 952 /* |
| 953 creating a long string of decomposable characters, |
| 954 since by default the writable buffer is of size 256 |
| 955 */ |
| 956 while (count < 500) { |
| 957 if ((count & 1) == 0) { |
| 958 teststr[count ++] = 0x300; |
| 959 } |
| 960 else { |
| 961 teststr[count ++] = 0x31A; |
| 962 } |
| 963 } |
| 964 |
| 965 coll = ucol_open("th_TH", &status); |
| 966 if(U_SUCCESS(status) && coll) { |
| 967 testiter = ucol_openElements(coll, teststr, 500, &status); |
| 968 iter = ucol_openElements(coll, str, 2, &status); |
| 969 |
| 970 orders = getOrders(iter, &count); |
| 971 if (count != 2) { |
| 972 log_err("Error collation elements size is not 2 for \\u0300\\u031A\n")
; |
| 973 } |
| 974 |
| 975 /* |
| 976 this will rearrange the string data to 250 characters of 0x300 first then |
| 977 250 characters of 0x031A |
| 978 */ |
| 979 testorders = getOrders(testiter, &count); |
| 980 |
| 981 if (count != 500) { |
| 982 log_err("Error decomposition does not give the right sized collation e
lements\n"); |
| 983 } |
| 984 |
| 985 while (count != 0) { |
| 986 /* UCA collation element for 0x0F76 */ |
| 987 if ((count > 250 && testorders[-- count].order != orders[1].order) || |
| 988 (count <= 250 && testorders[-- count].order != orders[0].order)) { |
| 989 log_err("Error decomposition does not give the right collation ele
ment at %d count\n", count); |
| 990 break; |
| 991 } |
| 992 } |
| 993 |
| 994 free(testorders); |
| 995 free(orders); |
| 996 |
| 997 ucol_reset(testiter); |
| 998 |
| 999 /* ensures closing of elements done properly to clear writable buffer */ |
| 1000 ucol_next(testiter, &status); |
| 1001 ucol_next(testiter, &status); |
| 1002 ucol_closeElements(testiter); |
| 1003 ucol_closeElements(iter); |
| 1004 ucol_close(coll); |
| 1005 } else { |
| 1006 log_err_status(status, "Couldn't open collator -> %s\n", u_errorName(statu
s)); |
| 1007 } |
| 1008 } |
| 1009 |
| 1010 /** |
| 1011 * Sniplets of code from genuca |
| 1012 */ |
| 1013 static int32_t hex2num(char hex) { |
| 1014 if(hex>='0' && hex <='9') { |
| 1015 return hex-'0'; |
| 1016 } else if(hex>='a' && hex<='f') { |
| 1017 return hex-'a'+10; |
| 1018 } else if(hex>='A' && hex<='F') { |
| 1019 return hex-'A'+10; |
| 1020 } else { |
| 1021 return 0; |
| 1022 } |
| 1023 } |
| 1024 |
| 1025 /** |
| 1026 * Getting codepoints from a string |
| 1027 * @param str character string contain codepoints seperated by space and ended |
| 1028 * by a semicolon |
| 1029 * @param codepoints array for storage, assuming size > 5 |
| 1030 * @return position at the end of the codepoint section |
| 1031 */ |
| 1032 static char *getCodePoints(char *str, UChar *codepoints, UChar *contextCPs) { |
| 1033 UErrorCode errorCode = U_ZERO_ERROR; |
| 1034 char *semi = uprv_strchr(str, ';'); |
| 1035 char *pipe = uprv_strchr(str, '|'); |
| 1036 char *s; |
| 1037 *codepoints = 0; |
| 1038 *contextCPs = 0; |
| 1039 if(semi == NULL) { |
| 1040 log_err("expected semicolon after code point string in FractionalUCA.txt
%s\n", str); |
| 1041 return str; |
| 1042 } |
| 1043 if(pipe != NULL) { |
| 1044 int32_t contextLength; |
| 1045 *pipe = 0; |
| 1046 contextLength = u_parseString(str, contextCPs, 99, NULL, &errorCode); |
| 1047 *pipe = '|'; |
| 1048 if(U_FAILURE(errorCode)) { |
| 1049 log_err("error parsing precontext string from FractionalUCA.txt %s\n
", str); |
| 1050 return str; |
| 1051 } |
| 1052 /* prepend the precontext string to the codepoints */ |
| 1053 u_memcpy(codepoints, contextCPs, contextLength); |
| 1054 codepoints += contextLength; |
| 1055 /* start of the code point string */ |
| 1056 s = pipe + 1; |
| 1057 } else { |
| 1058 s = str; |
| 1059 } |
| 1060 u_parseString(s, codepoints, 99, NULL, &errorCode); |
| 1061 if(U_FAILURE(errorCode)) { |
| 1062 log_err("error parsing code point string from FractionalUCA.txt %s\n", s
tr); |
| 1063 return str; |
| 1064 } |
| 1065 return semi + 1; |
| 1066 } |
| 1067 |
| 1068 /** |
| 1069 * Sniplets of code from genuca |
| 1070 */ |
| 1071 static int32_t |
| 1072 readElement(char **from, char *to, char separator, UErrorCode *status) |
| 1073 { |
| 1074 if (U_SUCCESS(*status)) { |
| 1075 char buffer[1024]; |
| 1076 int32_t i = 0; |
| 1077 while (**from != separator) { |
| 1078 if (**from != ' ') { |
| 1079 *(buffer+i++) = **from; |
| 1080 } |
| 1081 (*from)++; |
| 1082 } |
| 1083 (*from)++; |
| 1084 *(buffer + i) = 0; |
| 1085 strcpy(to, buffer); |
| 1086 return i/2; |
| 1087 } |
| 1088 |
| 1089 return 0; |
| 1090 } |
| 1091 |
| 1092 /** |
| 1093 * Sniplets of code from genuca |
| 1094 */ |
| 1095 static uint32_t |
| 1096 getSingleCEValue(char *primary, char *secondary, char *tertiary, |
| 1097 UErrorCode *status) |
| 1098 { |
| 1099 if (U_SUCCESS(*status)) { |
| 1100 uint32_t value = 0; |
| 1101 char primsave = '\0'; |
| 1102 char secsave = '\0'; |
| 1103 char tersave = '\0'; |
| 1104 char *primend = primary+4; |
| 1105 char *secend = secondary+2; |
| 1106 char *terend = tertiary+2; |
| 1107 uint32_t primvalue; |
| 1108 uint32_t secvalue; |
| 1109 uint32_t tervalue; |
| 1110 |
| 1111 if (uprv_strlen(primary) > 4) { |
| 1112 primsave = *primend; |
| 1113 *primend = '\0'; |
| 1114 } |
| 1115 |
| 1116 if (uprv_strlen(secondary) > 2) { |
| 1117 secsave = *secend; |
| 1118 *secend = '\0'; |
| 1119 } |
| 1120 |
| 1121 if (uprv_strlen(tertiary) > 2) { |
| 1122 tersave = *terend; |
| 1123 *terend = '\0'; |
| 1124 } |
| 1125 |
| 1126 primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0; |
| 1127 secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0; |
| 1128 tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0; |
| 1129 if(primvalue <= 0xFF) { |
| 1130 primvalue <<= 8; |
| 1131 } |
| 1132 |
| 1133 value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK) |
| 1134 | ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK) |
| 1135 | (tervalue & UCOL_TERTIARYORDERMASK); |
| 1136 |
| 1137 if(primsave!='\0') { |
| 1138 *primend = primsave; |
| 1139 } |
| 1140 if(secsave!='\0') { |
| 1141 *secend = secsave; |
| 1142 } |
| 1143 if(tersave!='\0') { |
| 1144 *terend = tersave; |
| 1145 } |
| 1146 return value; |
| 1147 } |
| 1148 return 0; |
| 1149 } |
| 1150 |
| 1151 /** |
| 1152 * Getting collation elements generated from a string |
| 1153 * @param str character string contain collation elements contained in [] and |
| 1154 * seperated by space |
| 1155 * @param ce array for storage, assuming size > 20 |
| 1156 * @param status error status |
| 1157 * @return position at the end of the codepoint section |
| 1158 */ |
| 1159 static char * getCEs(char *str, uint32_t *ces, UErrorCode *status) { |
| 1160 char *pStartCP = uprv_strchr(str, '['); |
| 1161 int count = 0; |
| 1162 char *pEndCP; |
| 1163 char primary[100]; |
| 1164 char secondary[100]; |
| 1165 char tertiary[100]; |
| 1166 |
| 1167 while (*pStartCP == '[') { |
| 1168 uint32_t primarycount = 0; |
| 1169 uint32_t secondarycount = 0; |
| 1170 uint32_t tertiarycount = 0; |
| 1171 uint32_t CEi = 1; |
| 1172 pEndCP = strchr(pStartCP, ']'); |
| 1173 if(pEndCP == NULL) { |
| 1174 break; |
| 1175 } |
| 1176 pStartCP ++; |
| 1177 |
| 1178 primarycount = readElement(&pStartCP, primary, ',', status); |
| 1179 secondarycount = readElement(&pStartCP, secondary, ',', status); |
| 1180 tertiarycount = readElement(&pStartCP, tertiary, ']', status); |
| 1181 |
| 1182 /* I want to get the CEs entered right here, including continuation */ |
| 1183 ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status); |
| 1184 if (U_FAILURE(*status)) { |
| 1185 break; |
| 1186 } |
| 1187 |
| 1188 while (2 * CEi < primarycount || CEi < secondarycount || |
| 1189 CEi < tertiarycount) { |
| 1190 uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */ |
| 1191 if (2 * CEi < primarycount) { |
| 1192 value |= ((hex2num(*(primary + 4 * CEi)) & 0xF) << 28); |
| 1193 value |= ((hex2num(*(primary + 4 * CEi + 1)) & 0xF) << 24); |
| 1194 } |
| 1195 |
| 1196 if (2 * CEi + 1 < primarycount) { |
| 1197 value |= ((hex2num(*(primary + 4 * CEi + 2)) & 0xF) << 20); |
| 1198 value |= ((hex2num(*(primary + 4 * CEi + 3)) &0xF) << 16); |
| 1199 } |
| 1200 |
| 1201 if (CEi < secondarycount) { |
| 1202 value |= ((hex2num(*(secondary + 2 * CEi)) & 0xF) << 12); |
| 1203 value |= ((hex2num(*(secondary + 2 * CEi + 1)) & 0xF) << 8); |
| 1204 } |
| 1205 |
| 1206 if (CEi < tertiarycount) { |
| 1207 value |= ((hex2num(*(tertiary + 2 * CEi)) & 0x3) << 4); |
| 1208 value |= (hex2num(*(tertiary + 2 * CEi + 1)) & 0xF); |
| 1209 } |
| 1210 |
| 1211 CEi ++; |
| 1212 ces[count ++] = value; |
| 1213 } |
| 1214 |
| 1215 pStartCP = pEndCP + 1; |
| 1216 } |
| 1217 ces[count] = 0; |
| 1218 return pStartCP; |
| 1219 } |
| 1220 |
| 1221 /** |
| 1222 * Getting the FractionalUCA.txt file stream |
| 1223 */ |
| 1224 static FileStream * getFractionalUCA(void) |
| 1225 { |
| 1226 char newPath[256]; |
| 1227 char backupPath[256]; |
| 1228 FileStream *result = NULL; |
| 1229 |
| 1230 /* Look inside ICU_DATA first */ |
| 1231 uprv_strcpy(newPath, ctest_dataSrcDir()); |
| 1232 uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING ); |
| 1233 uprv_strcat(newPath, "FractionalUCA.txt"); |
| 1234 |
| 1235 /* As a fallback, try to guess where the source data was located |
| 1236 * at the time ICU was built, and look there. |
| 1237 */ |
| 1238 #if defined (U_TOPSRCDIR) |
| 1239 strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); |
| 1240 #else |
| 1241 { |
| 1242 UErrorCode errorCode = U_ZERO_ERROR; |
| 1243 strcpy(backupPath, loadTestData(&errorCode)); |
| 1244 strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_
SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); |
| 1245 } |
| 1246 #endif |
| 1247 strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "Fractional
UCA.txt"); |
| 1248 |
| 1249 result = T_FileStream_open(newPath, "rb"); |
| 1250 |
| 1251 if (result == NULL) { |
| 1252 result = T_FileStream_open(backupPath, "rb"); |
| 1253 if (result == NULL) { |
| 1254 log_err("Failed to open either %s or %s\n", newPath, backupPath); |
| 1255 } |
| 1256 } |
| 1257 return result; |
| 1258 } |
| 1259 |
| 1260 /** |
| 1261 * Testing the CEs returned by the iterator |
| 1262 */ |
| 1263 static void TestCEs() { |
| 1264 FileStream *file = NULL; |
| 1265 char line[2048]; |
| 1266 char *str; |
| 1267 UChar codepoints[10]; |
| 1268 uint32_t ces[20]; |
| 1269 UErrorCode status = U_ZERO_ERROR; |
| 1270 UCollator *coll = ucol_open("", &status); |
| 1271 uint32_t lineNo = 0; |
| 1272 UChar contextCPs[5]; |
| 1273 |
| 1274 if (U_FAILURE(status)) { |
| 1275 log_err_status(status, "Error in opening root collator -> %s\n", u_error
Name(status)); |
| 1276 return; |
| 1277 } |
| 1278 |
| 1279 file = getFractionalUCA(); |
| 1280 |
| 1281 if (file == NULL) { |
| 1282 log_err("*** unable to open input FractionalUCA.txt file ***\n"); |
| 1283 return; |
| 1284 } |
| 1285 |
| 1286 |
| 1287 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
| 1288 int count = 0; |
| 1289 UCollationElements *iter; |
| 1290 int32_t preContextCeLen=0; |
| 1291 lineNo++; |
| 1292 /* skip this line if it is empty or a comment or is a return value |
| 1293 or start of some variable section */ |
| 1294 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
| 1295 line[0] == 0x000D || line[0] == '[') { |
| 1296 continue; |
| 1297 } |
| 1298 |
| 1299 str = getCodePoints(line, codepoints, contextCPs); |
| 1300 |
| 1301 /* these are 'fake' codepoints in the fractional UCA, and are used just |
| 1302 * for positioning of indirect values. They should not go through this |
| 1303 * test. |
| 1304 */ |
| 1305 if(*codepoints == 0xFDD0) { |
| 1306 continue; |
| 1307 } |
| 1308 if (*contextCPs != 0) { |
| 1309 iter = ucol_openElements(coll, contextCPs, -1, &status); |
| 1310 if (U_FAILURE(status)) { |
| 1311 log_err("Error in opening collation elements\n"); |
| 1312 break; |
| 1313 } |
| 1314 while((ces[preContextCeLen] = ucol_next(iter, &status)) != (uint32_t
)UCOL_NULLORDER) { |
| 1315 preContextCeLen++; |
| 1316 } |
| 1317 ucol_closeElements(iter); |
| 1318 } |
| 1319 |
| 1320 getCEs(str, ces+preContextCeLen, &status); |
| 1321 if (U_FAILURE(status)) { |
| 1322 log_err("Error in parsing collation elements in FractionalUCA.txt\n"
); |
| 1323 break; |
| 1324 } |
| 1325 iter = ucol_openElements(coll, codepoints, -1, &status); |
| 1326 if (U_FAILURE(status)) { |
| 1327 log_err("Error in opening collation elements\n"); |
| 1328 break; |
| 1329 } |
| 1330 for (;;) { |
| 1331 uint32_t ce = (uint32_t)ucol_next(iter, &status); |
| 1332 if (ce == 0xFFFFFFFF) { |
| 1333 ce = 0; |
| 1334 } |
| 1335 /* we now unconditionally reorder Thai/Lao prevowels, so this |
| 1336 * test would fail if we don't skip here. |
| 1337 */ |
| 1338 if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) { |
| 1339 continue; |
| 1340 } |
| 1341 if (ce != ces[count] || U_FAILURE(status)) { |
| 1342 log_err("Collation elements in FractionalUCA.txt and iterators d
o not match!\n"); |
| 1343 break; |
| 1344 } |
| 1345 if (ces[count] == 0) { |
| 1346 break; |
| 1347 } |
| 1348 count ++; |
| 1349 } |
| 1350 ucol_closeElements(iter); |
| 1351 } |
| 1352 |
| 1353 T_FileStream_close(file); |
| 1354 ucol_close(coll); |
| 1355 } |
| 1356 |
| 1357 /** |
| 1358 * Testing the discontigous contractions |
| 1359 */ |
| 1360 static void TestDiscontiguos() { |
| 1361 const char *rulestr = |
| 1362 "&z < AB < X\\u0300 < ABC < X\\u0300\\u0315"; |
| 1363 UChar rule[50]; |
| 1364 int rulelen = u_unescape(rulestr, rule, 50); |
| 1365 const char *src[] = { |
| 1366 "ADB", "ADBC", "A\\u0315B", "A\\u0315BC", |
| 1367 /* base character blocked */ |
| 1368 "XD\\u0300", "XD\\u0300\\u0315", |
| 1369 /* non blocking combining character */ |
| 1370 "X\\u0319\\u0300", "X\\u0319\\u0300\\u0315", |
| 1371 /* blocking combining character */ |
| 1372 "X\\u0314\\u0300", "X\\u0314\\u0300\\u0315", |
| 1373 /* contraction prefix */ |
| 1374 "ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315", |
| 1375 "X\\u0300\\u031A\\u0315", |
| 1376 /* ends not with a contraction character */ |
| 1377 "X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D", |
| 1378 "X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D" |
| 1379 }; |
| 1380 const char *tgt[] = { |
| 1381 /* non blocking combining character */ |
| 1382 "A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC", |
| 1383 /* base character blocked */ |
| 1384 "X D \\u0300", "X D \\u0300\\u0315", |
| 1385 /* non blocking combining character */ |
| 1386 "X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319", |
| 1387 /* blocking combining character */ |
| 1388 "X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315", |
| 1389 /* contraction prefix */ |
| 1390 "AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319", |
| 1391 "X\\u0300 \\u031A \\u0315", |
| 1392 /* ends not with a contraction character */ |
| 1393 "X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D", |
| 1394 "X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D" |
| 1395 }; |
| 1396 int size = 20; |
| 1397 UCollator *coll; |
| 1398 UErrorCode status = U_ZERO_ERROR; |
| 1399 int count = 0; |
| 1400 UCollationElements *iter; |
| 1401 UCollationElements *resultiter; |
| 1402 |
| 1403 coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,N
ULL, &status); |
| 1404 iter = ucol_openElements(coll, rule, 1, &status); |
| 1405 resultiter = ucol_openElements(coll, rule, 1, &status); |
| 1406 |
| 1407 if (U_FAILURE(status)) { |
| 1408 log_err_status(status, "Error opening collation rules -> %s\n", u_errorN
ame(status)); |
| 1409 return; |
| 1410 } |
| 1411 |
| 1412 while (count < size) { |
| 1413 UChar str[20]; |
| 1414 UChar tstr[20]; |
| 1415 int strLen = u_unescape(src[count], str, 20); |
| 1416 UChar *s; |
| 1417 |
| 1418 ucol_setText(iter, str, strLen, &status); |
| 1419 if (U_FAILURE(status)) { |
| 1420 log_err("Error opening collation iterator\n"); |
| 1421 return; |
| 1422 } |
| 1423 |
| 1424 u_unescape(tgt[count], tstr, 20); |
| 1425 s = tstr; |
| 1426 |
| 1427 log_verbose("count %d\n", count); |
| 1428 |
| 1429 for (;;) { |
| 1430 uint32_t ce; |
| 1431 UChar *e = u_strchr(s, 0x20); |
| 1432 if (e == 0) { |
| 1433 e = u_strchr(s, 0); |
| 1434 } |
| 1435 ucol_setText(resultiter, s, (int32_t)(e - s), &status); |
| 1436 ce = ucol_next(resultiter, &status); |
| 1437 if (U_FAILURE(status)) { |
| 1438 log_err("Error manipulating collation iterator\n"); |
| 1439 return; |
| 1440 } |
| 1441 while (ce != UCOL_NULLORDER) { |
| 1442 if (ce != (uint32_t)ucol_next(iter, &status) || |
| 1443 U_FAILURE(status)) { |
| 1444 log_err("Discontiguos contraction test mismatch\n"); |
| 1445 return; |
| 1446 } |
| 1447 ce = ucol_next(resultiter, &status); |
| 1448 if (U_FAILURE(status)) { |
| 1449 log_err("Error getting next collation element\n"); |
| 1450 return; |
| 1451 } |
| 1452 } |
| 1453 s = e + 1; |
| 1454 if (*e == 0) { |
| 1455 break; |
| 1456 } |
| 1457 } |
| 1458 ucol_reset(iter); |
| 1459 backAndForth(iter); |
| 1460 count ++; |
| 1461 } |
| 1462 ucol_closeElements(resultiter); |
| 1463 ucol_closeElements(iter); |
| 1464 ucol_close(coll); |
| 1465 } |
| 1466 |
| 1467 static void TestCEBufferOverflow() |
| 1468 { |
| 1469 UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1]; |
| 1470 UErrorCode status = U_ZERO_ERROR; |
| 1471 UChar rule[10]; |
| 1472 UCollator *coll; |
| 1473 UCollationElements *iter; |
| 1474 |
| 1475 u_uastrcpy(rule, "&z < AB"); |
| 1476 coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH,
NULL,&status); |
| 1477 if (U_FAILURE(status)) { |
| 1478 log_err_status(status, "Rule based collator not created for testing ce b
uffer overflow -> %s\n", u_errorName(status)); |
| 1479 return; |
| 1480 } |
| 1481 |
| 1482 /* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic |
| 1483 test. this will cause an overflow in getPrev */ |
| 1484 str[0] = 0x0041; /* 'A' */ |
| 1485 /*uprv_memset(str + 1, 0xE0, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);*/ |
| 1486 uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE); |
| 1487 str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */ |
| 1488 iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1, |
| 1489 &status); |
| 1490 if (ucol_previous(iter, &status) == UCOL_NULLORDER || |
| 1491 status == U_BUFFER_OVERFLOW_ERROR) { |
| 1492 log_err("CE buffer should not overflow with long string of trail surroga
tes\n"); |
| 1493 } |
| 1494 ucol_closeElements(iter); |
| 1495 ucol_close(coll); |
| 1496 } |
| 1497 |
| 1498 /** |
| 1499 * Checking collation element validity. |
| 1500 */ |
| 1501 #define MAX_CODEPOINTS_TO_SHOW 10 |
| 1502 static void showCodepoints(const UChar *codepoints, int length, char * codepoint
Text) { |
| 1503 int i, lengthToUse = length; |
| 1504 if (lengthToUse > MAX_CODEPOINTS_TO_SHOW) { |
| 1505 lengthToUse = MAX_CODEPOINTS_TO_SHOW; |
| 1506 } |
| 1507 for (i = 0; i < lengthToUse; ++i) { |
| 1508 int bytesWritten = sprintf(codepointText, " %04X", *codepoints++); |
| 1509 if (bytesWritten <= 0) { |
| 1510 break; |
| 1511 } |
| 1512 codepointText += bytesWritten; |
| 1513 } |
| 1514 if (i < length) { |
| 1515 sprintf(codepointText, " ..."); |
| 1516 } |
| 1517 } |
| 1518 |
| 1519 static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints, |
| 1520 int length) |
| 1521 { |
| 1522 UErrorCode status = U_ZERO_ERROR; |
| 1523 UCollationElements *iter = ucol_openElements(coll, codepoints, length, |
| 1524 &status); |
| 1525 UBool result = FALSE; |
| 1526 UBool primaryDone = FALSE, secondaryDone = FALSE, tertiaryDone = FALSE; |
| 1527 const char * collLocale; |
| 1528 |
| 1529 if (U_FAILURE(status)) { |
| 1530 log_err("Error creating iterator for testing validity\n"); |
| 1531 return FALSE; |
| 1532 } |
| 1533 collLocale = ucol_getLocale(coll, ULOC_VALID_LOCALE, &status); |
| 1534 if (U_FAILURE(status) || collLocale==NULL) { |
| 1535 status = U_ZERO_ERROR; |
| 1536 collLocale = "?"; |
| 1537 } |
| 1538 |
| 1539 for (;;) { |
| 1540 uint32_t ce = ucol_next(iter, &status); |
| 1541 uint32_t primary, p1, p2, secondary, tertiary; |
| 1542 if (ce == UCOL_NULLORDER) { |
| 1543 result = TRUE; |
| 1544 break; |
| 1545 } |
| 1546 if (ce == 0) { |
| 1547 continue; |
| 1548 } |
| 1549 if (ce == 0x02000202) { |
| 1550 /* special CE for merge-sort character */ |
| 1551 if (*codepoints == 0xFFFE /* && length == 1 */) { |
| 1552 /* |
| 1553 * Note: We should check for length==1 but the token parser appe
ars |
| 1554 * to give us trailing NUL characters. |
| 1555 * TODO: Ticket #8047: Change TestCEValidity to use ucol_getTail
oredSet() |
| 1556 * rather than the internal collation rule p
arser |
| 1557 */ |
| 1558 continue; |
| 1559 } else { |
| 1560 log_err("Special 02/02/02 weight for code point U+%04X [len %d]
!= U+FFFE\n", |
| 1561 (int)*codepoints, (int)length); |
| 1562 break; |
| 1563 } |
| 1564 } |
| 1565 primary = UCOL_PRIMARYORDER(ce); |
| 1566 p1 = primary >> 8; |
| 1567 p2 = primary & 0xFF; |
| 1568 secondary = UCOL_SECONDARYORDER(ce); |
| 1569 tertiary = UCOL_TERTIARYORDER(ce) & UCOL_REMOVE_CONTINUATION; |
| 1570 |
| 1571 if (!isContinuation(ce)) { |
| 1572 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { |
| 1573 log_err("Empty CE %08lX except for case bits\n", (long)ce); |
| 1574 break; |
| 1575 } |
| 1576 if (p1 == 0) { |
| 1577 if (p2 != 0) { |
| 1578 log_err("Primary 00 xx in %08lX\n", (long)ce); |
| 1579 break; |
| 1580 } |
| 1581 primaryDone = TRUE; |
| 1582 } else { |
| 1583 if (p1 <= 2 || p1 >= 0xF0) { |
| 1584 /* Primary first bytes F0..FF are specials. */ |
| 1585 log_err("Primary first byte of %08lX out of range\n", (long)
ce); |
| 1586 break; |
| 1587 } |
| 1588 if (p2 == 0) { |
| 1589 primaryDone = TRUE; |
| 1590 } else { |
| 1591 if (p2 <= 3 || p2 >= 0xFF) { |
| 1592 /* Primary second bytes 03 and FF are sort key compressi
on terminators. */ |
| 1593 log_err("Primary second byte of %08lX out of range\n", (
long)ce); |
| 1594 break; |
| 1595 } |
| 1596 primaryDone = FALSE; |
| 1597 } |
| 1598 } |
| 1599 if (secondary == 0) { |
| 1600 if (primary != 0) { |
| 1601 log_err("Primary!=0 secondary==0 in %08lX\n", (long)ce); |
| 1602 break; |
| 1603 } |
| 1604 secondaryDone = TRUE; |
| 1605 } else { |
| 1606 if (secondary <= 2 || |
| 1607 (UCOL_BYTE_COMMON < secondary && secondary <= (UCOL_BYTE_COM
MON + 0x80)) |
| 1608 ) { |
| 1609 /* Secondary first bytes common+1..+0x80 are used for sort k
ey compression. */ |
| 1610 log_err("Secondary byte of %08lX out of range\n", (long)ce); |
| 1611 break; |
| 1612 } |
| 1613 secondaryDone = FALSE; |
| 1614 } |
| 1615 if (tertiary == 0) { |
| 1616 /* We know that ce != 0. */ |
| 1617 log_err("Primary!=0 or secondary!=0 but tertiary==0 in %08lX\n",
(long)ce); |
| 1618 break; |
| 1619 } |
| 1620 if (tertiary <= 2) { |
| 1621 log_err("Tertiary byte of %08lX out of range\n", (long)ce); |
| 1622 break; |
| 1623 } |
| 1624 tertiaryDone = FALSE; |
| 1625 } else { |
| 1626 if ((ce & UCOL_REMOVE_CONTINUATION) == 0) { |
| 1627 log_err("Empty continuation %08lX\n", (long)ce); |
| 1628 break; |
| 1629 } |
| 1630 if (primaryDone && primary != 0) { |
| 1631 log_err("Primary was done but continues in %08lX\n", (long)ce); |
| 1632 break; |
| 1633 } |
| 1634 if (p1 == 0) { |
| 1635 if (p2 != 0) { |
| 1636 log_err("Primary 00 xx in %08lX\n", (long)ce); |
| 1637 break; |
| 1638 } |
| 1639 primaryDone = TRUE; |
| 1640 } else { |
| 1641 if (p1 <= 2) { |
| 1642 log_err("Primary first byte of %08lX out of range\n", (long)
ce); |
| 1643 break; |
| 1644 } |
| 1645 if (p2 == 0) { |
| 1646 primaryDone = TRUE; |
| 1647 } else { |
| 1648 if (p2 <= 3) { |
| 1649 log_err("Primary second byte of %08lX out of range\n", (
long)ce); |
| 1650 break; |
| 1651 } |
| 1652 } |
| 1653 } |
| 1654 if (secondaryDone && secondary != 0) { |
| 1655 log_err("Secondary was done but continues in %08lX\n", (long)ce)
; |
| 1656 break; |
| 1657 } |
| 1658 if (secondary == 0) { |
| 1659 secondaryDone = TRUE; |
| 1660 } else { |
| 1661 if (secondary <= 2) { |
| 1662 log_err("Secondary byte of %08lX out of range\n", (long)ce); |
| 1663 break; |
| 1664 } |
| 1665 } |
| 1666 if (tertiaryDone && tertiary != 0) { |
| 1667 log_err("Tertiary was done but continues in %08lX\n", (long)ce); |
| 1668 break; |
| 1669 } |
| 1670 if (tertiary == 0) { |
| 1671 tertiaryDone = TRUE; |
| 1672 } else if (tertiary <= 2) { |
| 1673 log_err("Tertiary byte of %08lX out of range\n", (long)ce); |
| 1674 break; |
| 1675 } |
| 1676 } |
| 1677 } |
| 1678 if (!result) { |
| 1679 char codepointText[5*MAX_CODEPOINTS_TO_SHOW + 5]; |
| 1680 showCodepoints(codepoints, length, codepointText); |
| 1681 log_err("Locale: %s Code point string: %s\n", collLocale, codepointText
); |
| 1682 } |
| 1683 ucol_closeElements(iter); |
| 1684 return result; |
| 1685 } |
| 1686 |
| 1687 static void TestCEValidity() |
| 1688 { |
| 1689 /* testing UCA collation elements */ |
| 1690 UErrorCode status = U_ZERO_ERROR; |
| 1691 /* en_US has no tailorings */ |
| 1692 UCollator *coll = ucol_open("root", &status); |
| 1693 /* tailored locales */ |
| 1694 char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh
__PINYIN"}; |
| 1695 const char *loc; |
| 1696 FileStream *file = NULL; |
| 1697 char line[2048]; |
| 1698 UChar codepoints[11]; |
| 1699 int count = 0; |
| 1700 int maxCount = 0; |
| 1701 UChar contextCPs[3]; |
| 1702 UChar32 c; |
| 1703 UParseError parseError; |
| 1704 if (U_FAILURE(status)) { |
| 1705 log_err_status(status, "en_US collator creation failed -> %s\n", u_error
Name(status)); |
| 1706 return; |
| 1707 } |
| 1708 log_verbose("Testing UCA elements\n"); |
| 1709 file = getFractionalUCA(); |
| 1710 if (file == NULL) { |
| 1711 log_err("Fractional UCA data can not be opened\n"); |
| 1712 return; |
| 1713 } |
| 1714 |
| 1715 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
| 1716 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
| 1717 line[0] == 0x000D || line[0] == '[') { |
| 1718 continue; |
| 1719 } |
| 1720 |
| 1721 getCodePoints(line, codepoints, contextCPs); |
| 1722 checkCEValidity(coll, codepoints, u_strlen(codepoints)); |
| 1723 } |
| 1724 |
| 1725 log_verbose("Testing UCA elements for the whole range of unicode characters\
n"); |
| 1726 for (c = 0; c <= 0xffff; ++c) { |
| 1727 if (u_isdefined(c)) { |
| 1728 codepoints[0] = (UChar)c; |
| 1729 checkCEValidity(coll, codepoints, 1); |
| 1730 } |
| 1731 } |
| 1732 for (; c <= 0x10ffff; ++c) { |
| 1733 if (u_isdefined(c)) { |
| 1734 int32_t i = 0; |
| 1735 U16_APPEND_UNSAFE(codepoints, i, c); |
| 1736 checkCEValidity(coll, codepoints, i); |
| 1737 } |
| 1738 } |
| 1739 |
| 1740 ucol_close(coll); |
| 1741 |
| 1742 /* testing tailored collation elements */ |
| 1743 log_verbose("Testing tailored elements\n"); |
| 1744 if(getTestOption(QUICK_OPTION)) { |
| 1745 maxCount = sizeof(locale)/sizeof(locale[0]); |
| 1746 } else { |
| 1747 maxCount = uloc_countAvailable(); |
| 1748 } |
| 1749 while (count < maxCount) { |
| 1750 const UChar *rules = NULL, |
| 1751 *current = NULL; |
| 1752 UChar *rulesCopy = NULL; |
| 1753 int32_t ruleLen = 0; |
| 1754 |
| 1755 uint32_t chOffset = 0; |
| 1756 uint32_t chLen = 0; |
| 1757 uint32_t exOffset = 0; |
| 1758 uint32_t exLen = 0; |
| 1759 uint32_t prefixOffset = 0; |
| 1760 uint32_t prefixLen = 0; |
| 1761 UBool startOfRules = TRUE; |
| 1762 UColOptionSet opts; |
| 1763 |
| 1764 UColTokenParser src; |
| 1765 uint32_t strength = 0; |
| 1766 uint16_t specs = 0; |
| 1767 if(getTestOption(QUICK_OPTION)) { |
| 1768 loc = locale[count]; |
| 1769 } else { |
| 1770 loc = uloc_getAvailable(count); |
| 1771 if(!hasCollationElements(loc)) { |
| 1772 count++; |
| 1773 continue; |
| 1774 } |
| 1775 } |
| 1776 |
| 1777 uprv_memset(&src, 0, sizeof(UColTokenParser)); |
| 1778 |
| 1779 log_verbose("Testing CEs for %s\n", loc); |
| 1780 |
| 1781 coll = ucol_open(loc, &status); |
| 1782 if (U_FAILURE(status)) { |
| 1783 log_err("%s collator creation failed\n", loc); |
| 1784 return; |
| 1785 } |
| 1786 |
| 1787 src.opts = &opts; |
| 1788 rules = ucol_getRules(coll, &ruleLen); |
| 1789 |
| 1790 if (ruleLen > 0) { |
| 1791 rulesCopy = (UChar *)uprv_malloc((ruleLen + |
| 1792 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); |
| 1793 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); |
| 1794 src.current = src.source = rulesCopy; |
| 1795 src.end = rulesCopy + ruleLen; |
| 1796 src.extraCurrent = src.end; |
| 1797 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
| 1798 |
| 1799 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse
NextToken can cause the pointer to |
| 1800 the rules copy in src.source to get reallocated, freeing the
original pointer in rulesCopy */ |
| 1801 while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parse
Error,&status)) != NULL) { |
| 1802 strength = src.parsedToken.strength; |
| 1803 chOffset = src.parsedToken.charsOffset; |
| 1804 chLen = src.parsedToken.charsLen; |
| 1805 exOffset = src.parsedToken.extensionOffset; |
| 1806 exLen = src.parsedToken.extensionLen; |
| 1807 prefixOffset = src.parsedToken.prefixOffset; |
| 1808 prefixLen = src.parsedToken.prefixLen; |
| 1809 specs = src.parsedToken.flags; |
| 1810 |
| 1811 startOfRules = FALSE; |
| 1812 uprv_memcpy(codepoints, src.source + chOffset, |
| 1813 chLen * sizeof(UChar)); |
| 1814 codepoints[chLen] = 0; |
| 1815 checkCEValidity(coll, codepoints, chLen); |
| 1816 } |
| 1817 uprv_free(src.source); |
| 1818 } |
| 1819 |
| 1820 ucol_close(coll); |
| 1821 count ++; |
| 1822 } |
| 1823 T_FileStream_close(file); |
| 1824 } |
| 1825 |
| 1826 static void printSortKeyError(const UChar *codepoints, int length, |
| 1827 uint8_t *sortkey, int sklen) |
| 1828 { |
| 1829 int count = 0; |
| 1830 log_err("Sortkey not valid for "); |
| 1831 while (length > 0) { |
| 1832 log_err("0x%04x ", *codepoints); |
| 1833 length --; |
| 1834 codepoints ++; |
| 1835 } |
| 1836 log_err("\nSortkey : "); |
| 1837 while (count < sklen) { |
| 1838 log_err("0x%02x ", sortkey[count]); |
| 1839 count ++; |
| 1840 } |
| 1841 log_err("\n"); |
| 1842 } |
| 1843 |
| 1844 /** |
| 1845 * Checking sort key validity for all levels |
| 1846 */ |
| 1847 static UBool checkSortKeyValidity(UCollator *coll, |
| 1848 const UChar *codepoints, |
| 1849 int length) |
| 1850 { |
| 1851 UErrorCode status = U_ZERO_ERROR; |
| 1852 UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY, |
| 1853 UCOL_TERTIARY, UCOL_QUATERNARY, |
| 1854 UCOL_IDENTICAL}; |
| 1855 int strengthlen = 5; |
| 1856 int strengthIndex = 0; |
| 1857 int caselevel = 0; |
| 1858 |
| 1859 while (caselevel < 1) { |
| 1860 if (caselevel == 0) { |
| 1861 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status); |
| 1862 } |
| 1863 else { |
| 1864 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); |
| 1865 } |
| 1866 |
| 1867 while (strengthIndex < strengthlen) { |
| 1868 int count01 = 0; |
| 1869 uint32_t count = 0; |
| 1870 uint8_t sortkey[128]; |
| 1871 uint32_t sklen; |
| 1872 |
| 1873 ucol_setStrength(coll, strength[strengthIndex]); |
| 1874 sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128); |
| 1875 while (sortkey[count] != 0) { |
| 1876 if (sortkey[count] == 2 || (sortkey[count] == 3 && count01 > 0 &
& strengthIndex != 4)) { |
| 1877 printSortKeyError(codepoints, length, sortkey, sklen); |
| 1878 return FALSE; |
| 1879 } |
| 1880 if (sortkey[count] == 1) { |
| 1881 count01 ++; |
| 1882 } |
| 1883 count ++; |
| 1884 } |
| 1885 |
| 1886 if (count + 1 != sklen || (count01 != strengthIndex + caselevel)) { |
| 1887 printSortKeyError(codepoints, length, sortkey, sklen); |
| 1888 return FALSE; |
| 1889 } |
| 1890 strengthIndex ++; |
| 1891 } |
| 1892 caselevel ++; |
| 1893 } |
| 1894 return TRUE; |
| 1895 } |
| 1896 |
| 1897 static void TestSortKeyValidity(void) |
| 1898 { |
| 1899 /* testing UCA collation elements */ |
| 1900 UErrorCode status = U_ZERO_ERROR; |
| 1901 /* en_US has no tailorings */ |
| 1902 UCollator *coll = ucol_open("en_US", &status); |
| 1903 /* tailored locales */ |
| 1904 char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"}; |
| 1905 FileStream *file = NULL; |
| 1906 char line[2048]; |
| 1907 UChar codepoints[10]; |
| 1908 int count = 0; |
| 1909 UChar contextCPs[5]; |
| 1910 UParseError parseError; |
| 1911 if (U_FAILURE(status)) { |
| 1912 log_err_status(status, "en_US collator creation failed -> %s\n", u_error
Name(status)); |
| 1913 return; |
| 1914 } |
| 1915 log_verbose("Testing UCA elements\n"); |
| 1916 file = getFractionalUCA(); |
| 1917 if (file == NULL) { |
| 1918 log_err("Fractional UCA data can not be opened\n"); |
| 1919 return; |
| 1920 } |
| 1921 |
| 1922 while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) { |
| 1923 if(line[0] == 0 || line[0] == '#' || line[0] == '\n' || |
| 1924 line[0] == 0x000D || line[0] == '[') { |
| 1925 continue; |
| 1926 } |
| 1927 |
| 1928 getCodePoints(line, codepoints, contextCPs); |
| 1929 if(codepoints[0] == 0xFFFE) { |
| 1930 /* Skip special merge-sort character U+FFFE which has otherwise ille
gal 02 weight bytes. */ |
| 1931 continue; |
| 1932 } |
| 1933 checkSortKeyValidity(coll, codepoints, u_strlen(codepoints)); |
| 1934 } |
| 1935 |
| 1936 log_verbose("Testing UCA elements for the whole range of unicode characters\
n"); |
| 1937 codepoints[0] = 0; |
| 1938 |
| 1939 while (codepoints[0] < 0xFFFF) { |
| 1940 if (u_isdefined((UChar32)codepoints[0])) { |
| 1941 checkSortKeyValidity(coll, codepoints, 1); |
| 1942 } |
| 1943 codepoints[0] ++; |
| 1944 } |
| 1945 |
| 1946 ucol_close(coll); |
| 1947 |
| 1948 /* testing tailored collation elements */ |
| 1949 log_verbose("Testing tailored elements\n"); |
| 1950 while (count < 5) { |
| 1951 const UChar *rules = NULL, |
| 1952 *current = NULL; |
| 1953 UChar *rulesCopy = NULL; |
| 1954 int32_t ruleLen = 0; |
| 1955 |
| 1956 uint32_t chOffset = 0; |
| 1957 uint32_t chLen = 0; |
| 1958 uint32_t exOffset = 0; |
| 1959 uint32_t exLen = 0; |
| 1960 uint32_t prefixOffset = 0; |
| 1961 uint32_t prefixLen = 0; |
| 1962 UBool startOfRules = TRUE; |
| 1963 UColOptionSet opts; |
| 1964 |
| 1965 UColTokenParser src; |
| 1966 uint32_t strength = 0; |
| 1967 uint16_t specs = 0; |
| 1968 |
| 1969 uprv_memset(&src, 0, sizeof(UColTokenParser)); |
| 1970 |
| 1971 coll = ucol_open(locale[count], &status); |
| 1972 if (U_FAILURE(status)) { |
| 1973 log_err("%s collator creation failed\n", locale[count]); |
| 1974 return; |
| 1975 } |
| 1976 |
| 1977 src.opts = &opts; |
| 1978 rules = ucol_getRules(coll, &ruleLen); |
| 1979 |
| 1980 if (ruleLen > 0) { |
| 1981 rulesCopy = (UChar *)uprv_malloc((ruleLen + |
| 1982 UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar)); |
| 1983 uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar)); |
| 1984 src.current = src.source = rulesCopy; |
| 1985 src.end = rulesCopy + ruleLen; |
| 1986 src.extraCurrent = src.end; |
| 1987 src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE; |
| 1988 |
| 1989 /* Note that as a result of tickets 7015 or 6912, ucol_tok_parse
NextToken can cause the pointer to |
| 1990 the rules copy in src.source to get reallocated, freeing the
original pointer in rulesCopy */ |
| 1991 while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseE
rror, &status)) != NULL) { |
| 1992 strength = src.parsedToken.strength; |
| 1993 chOffset = src.parsedToken.charsOffset; |
| 1994 chLen = src.parsedToken.charsLen; |
| 1995 exOffset = src.parsedToken.extensionOffset; |
| 1996 exLen = src.parsedToken.extensionLen; |
| 1997 prefixOffset = src.parsedToken.prefixOffset; |
| 1998 prefixLen = src.parsedToken.prefixLen; |
| 1999 specs = src.parsedToken.flags; |
| 2000 |
| 2001 startOfRules = FALSE; |
| 2002 uprv_memcpy(codepoints, src.source + chOffset, |
| 2003 chLen * sizeof(UChar)); |
| 2004 codepoints[chLen] = 0; |
| 2005 if(codepoints[0] == 0xFFFE) { |
| 2006 /* Skip special merge-sort character U+FFFE which has otherw
ise illegal 02 weight bytes. */ |
| 2007 continue; |
| 2008 } |
| 2009 checkSortKeyValidity(coll, codepoints, chLen); |
| 2010 } |
| 2011 uprv_free(src.source); |
| 2012 } |
| 2013 |
| 2014 ucol_close(coll); |
| 2015 count ++; |
| 2016 } |
| 2017 T_FileStream_close(file); |
| 2018 } |
| 2019 |
| 2020 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |