OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * Copyright (c) 1997-2010, International Business Machines |
| 3 * Corporation and others. All Rights Reserved. |
| 4 ******************************************************************** |
| 5 * |
| 6 * File UCNVSELTST.C |
| 7 * |
| 8 * Modification History: |
| 9 * Name Description |
| 10 * MOHAMED ELDAWY Creation |
| 11 ******************************************************************** |
| 12 */ |
| 13 |
| 14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ |
| 15 |
| 16 #include "ucnvseltst.h" |
| 17 |
| 18 #include <stdio.h> |
| 19 |
| 20 #include "unicode/utypes.h" |
| 21 #include "unicode/ucnvsel.h" |
| 22 #include "unicode/ustring.h" |
| 23 #include "cmemory.h" |
| 24 #include "cstring.h" |
| 25 #include "propsvec.h" |
| 26 |
| 27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| 28 |
| 29 #define FILENAME_BUFFER 1024 |
| 30 |
| 31 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_
FILE_SEP_STRING |
| 32 |
| 33 static void TestSelector(void); |
| 34 static void TestUPropsVector(void); |
| 35 void addCnvSelTest(TestNode** root); /* Declaration required to suppress compil
er warnings. */ |
| 36 |
| 37 void addCnvSelTest(TestNode** root) |
| 38 { |
| 39 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); |
| 40 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); |
| 41 } |
| 42 |
| 43 static const char **gAvailableNames = NULL; |
| 44 static int32_t gCountAvailable = 0; |
| 45 |
| 46 static UBool |
| 47 getAvailableNames() { |
| 48 int32_t i; |
| 49 if (gAvailableNames != NULL) { |
| 50 return TRUE; |
| 51 } |
| 52 gCountAvailable = ucnv_countAvailable(); |
| 53 if (gCountAvailable == 0) { |
| 54 log_data_err("No converters available.\n"); |
| 55 return FALSE; |
| 56 } |
| 57 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const ch
ar *)); |
| 58 if (gAvailableNames == NULL) { |
| 59 log_err("unable to allocate memory for %ld available converter names\n", |
| 60 (long)gCountAvailable); |
| 61 return FALSE; |
| 62 } |
| 63 for (i = 0; i < gCountAvailable; ++i) { |
| 64 gAvailableNames[i] = ucnv_getAvailableName(i); |
| 65 } |
| 66 return TRUE; |
| 67 } |
| 68 |
| 69 static void |
| 70 releaseAvailableNames() { |
| 71 uprv_free((void *)gAvailableNames); |
| 72 gAvailableNames = NULL; |
| 73 gCountAvailable = 0; |
| 74 } |
| 75 |
| 76 static const char ** |
| 77 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { |
| 78 const char **names; |
| 79 int32_t i; |
| 80 |
| 81 *pCount = 0; |
| 82 if (count <= 0) { |
| 83 return NULL; |
| 84 } |
| 85 names = (const char **)uprv_malloc(count * sizeof(char *)); |
| 86 if (names == NULL) { |
| 87 log_err("memory allocation error for %ld pointers\n", (long)count); |
| 88 return NULL; |
| 89 } |
| 90 if (step == 0 && count > 0) { |
| 91 step = 1; |
| 92 } |
| 93 for (i = 0; i < count; ++i) { |
| 94 if (0 <= start && start < gCountAvailable) { |
| 95 names[i] = gAvailableNames[start]; |
| 96 start += step; |
| 97 ++*pCount; |
| 98 } |
| 99 } |
| 100 return names; |
| 101 } |
| 102 |
| 103 #if 0 |
| 104 /* |
| 105 * ucnvsel_open() does not support "no encodings": |
| 106 * Given 0 encodings it will open a selector for all available ones. |
| 107 */ |
| 108 static const char ** |
| 109 getNoEncodings(int32_t *pCount) { |
| 110 *pCount = 0; |
| 111 return NULL; |
| 112 } |
| 113 #endif |
| 114 |
| 115 static const char ** |
| 116 getOneEncoding(int32_t *pCount) { |
| 117 return getEncodings(1, 0, 1, pCount); |
| 118 } |
| 119 |
| 120 static const char ** |
| 121 getFirstEvenEncodings(int32_t *pCount) { |
| 122 return getEncodings(0, 2, 25, pCount); |
| 123 } |
| 124 |
| 125 static const char ** |
| 126 getMiddleEncodings(int32_t *pCount) { |
| 127 return getEncodings(gCountAvailable - 12, 1, 22, pCount); |
| 128 } |
| 129 |
| 130 static const char ** |
| 131 getLastEncodings(int32_t *pCount) { |
| 132 return getEncodings(gCountAvailable - 1, -1, 25, pCount); |
| 133 } |
| 134 |
| 135 static const char ** |
| 136 getSomeEncodings(int32_t *pCount) { |
| 137 /* 20 evenly distributed */ |
| 138 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); |
| 139 } |
| 140 |
| 141 static const char ** |
| 142 getEveryThirdEncoding(int32_t *pCount) { |
| 143 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); |
| 144 } |
| 145 |
| 146 static const char ** |
| 147 getAllEncodings(int32_t *pCount) { |
| 148 return getEncodings(0, 1, gCountAvailable, pCount); |
| 149 } |
| 150 |
| 151 typedef const char **GetEncodingsFn(int32_t *); |
| 152 |
| 153 static GetEncodingsFn *const getEncodingsFns[] = { |
| 154 getOneEncoding, |
| 155 getFirstEvenEncodings, |
| 156 getMiddleEncodings, |
| 157 getLastEncodings, |
| 158 getSomeEncodings, |
| 159 getEveryThirdEncoding, |
| 160 getAllEncodings |
| 161 }; |
| 162 |
| 163 static FILE *fopenOrError(const char *filename) { |
| 164 int32_t needLen; |
| 165 FILE *f; |
| 166 char fnbuf[FILENAME_BUFFER]; |
| 167 const char* directory= ctest_dataSrcDir(); |
| 168 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename
)+1; |
| 169 if(needLen > FILENAME_BUFFER) { |
| 170 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d bu
t buffer is %d\n", |
| 171 filename, needLen, FILENAME_BUFFER); |
| 172 return NULL; |
| 173 } |
| 174 |
| 175 strcpy(fnbuf, directory); |
| 176 strcat(fnbuf, TDSRCPATH); |
| 177 strcat(fnbuf, filename); |
| 178 |
| 179 f = fopen(fnbuf, "rb"); |
| 180 |
| 181 if(f == NULL) { |
| 182 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); |
| 183 } |
| 184 return f; |
| 185 } |
| 186 |
| 187 typedef struct TestText { |
| 188 char *text, *textLimit; |
| 189 char *limit; |
| 190 int32_t number; |
| 191 } TestText; |
| 192 |
| 193 static void |
| 194 text_reset(TestText *tt) { |
| 195 tt->limit = tt->text; |
| 196 tt->number = 0; |
| 197 } |
| 198 |
| 199 static char * |
| 200 text_nextString(TestText *tt, int32_t *pLength) { |
| 201 char *s = tt->limit; |
| 202 if (s == tt->textLimit) { |
| 203 /* we already delivered the last string */ |
| 204 return NULL; |
| 205 } else if (s == tt->text) { |
| 206 /* first string */ |
| 207 if ((tt->textLimit - tt->text) >= 3 && |
| 208 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf |
| 209 ) { |
| 210 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ |
| 211 } |
| 212 } else { |
| 213 /* skip the string terminator */ |
| 214 ++s; |
| 215 ++tt->number; |
| 216 } |
| 217 |
| 218 /* find the end of this string */ |
| 219 tt->limit = uprv_strchr(s, 0); |
| 220 *pLength = (int32_t)(tt->limit - s); |
| 221 return s; |
| 222 } |
| 223 |
| 224 static UBool |
| 225 text_open(TestText *tt) { |
| 226 FILE *f; |
| 227 char *s; |
| 228 int32_t length; |
| 229 uprv_memset(tt, 0, sizeof(TestText)); |
| 230 f = fopenOrError("ConverterSelectorTestUTF8.txt"); |
| 231 if(!f) { |
| 232 return FALSE; |
| 233 } |
| 234 fseek(f, 0, SEEK_END); |
| 235 length = (int32_t)ftell(f); |
| 236 fseek(f, 0, SEEK_SET); |
| 237 tt->text = (char *)uprv_malloc(length + 1); |
| 238 if (tt->text == NULL) { |
| 239 fclose(f); |
| 240 return FALSE; |
| 241 } |
| 242 if (length != fread(tt->text, 1, length, f)) { |
| 243 log_err("error reading %ld bytes from test text file\n", (long)length); |
| 244 length = 0; |
| 245 uprv_free(tt->text); |
| 246 } |
| 247 fclose(f); |
| 248 tt->textLimit = tt->text + length; |
| 249 *tt->textLimit = 0; |
| 250 /* replace all Unicode '#' (U+0023) with NUL */ |
| 251 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} |
| 252 text_reset(tt); |
| 253 return TRUE; |
| 254 } |
| 255 |
| 256 static void |
| 257 text_close(TestText *tt) { |
| 258 uprv_free(tt->text); |
| 259 } |
| 260 |
| 261 static int32_t findIndex(const char* converterName) { |
| 262 int32_t i; |
| 263 for (i = 0 ; i < gCountAvailable; i++) { |
| 264 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { |
| 265 return i; |
| 266 } |
| 267 } |
| 268 return -1; |
| 269 } |
| 270 |
| 271 static UBool * |
| 272 getResultsManually(const char** encodings, int32_t num_encodings, |
| 273 const char *utf8, int32_t length, |
| 274 const USet* excludedCodePoints, const UConverterUnicodeSet wh
ichSet) { |
| 275 UBool* resultsManually; |
| 276 int32_t i; |
| 277 |
| 278 resultsManually = (UBool*) uprv_malloc(gCountAvailable); |
| 279 uprv_memset(resultsManually, 0, gCountAvailable); |
| 280 |
| 281 for(i = 0 ; i < num_encodings ; i++) { |
| 282 UErrorCode status = U_ZERO_ERROR; |
| 283 /* get unicode set for that converter */ |
| 284 USet* set; |
| 285 UConverter* test_converter; |
| 286 UChar32 cp; |
| 287 int32_t encIndex, offset; |
| 288 |
| 289 set = uset_openEmpty(); |
| 290 test_converter = ucnv_open(encodings[i], &status); |
| 291 ucnv_getUnicodeSet(test_converter, set, |
| 292 whichSet, &status); |
| 293 if (excludedCodePoints != NULL) { |
| 294 uset_addAll(set, excludedCodePoints); |
| 295 } |
| 296 uset_freeze(set); |
| 297 offset = 0; |
| 298 cp = 0; |
| 299 |
| 300 encIndex = findIndex(encodings[i]); |
| 301 /* |
| 302 * The following is almost, but not entirely, the same as |
| 303 * resultsManually[encIndex] = |
| 304 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); |
| 305 * They might be different if the set contains strings, |
| 306 * or if the utf8 string contains an illegal sequence. |
| 307 * |
| 308 * The UConverterSelector does not currently handle strings that can be |
| 309 * converted, and it treats an illegal sequence as convertible |
| 310 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. |
| 311 */ |
| 312 resultsManually[encIndex] = TRUE; |
| 313 while(offset<length) { |
| 314 U8_NEXT(utf8, offset, length, cp); |
| 315 if (cp >= 0 && !uset_contains(set, cp)) { |
| 316 resultsManually[encIndex] = FALSE; |
| 317 break; |
| 318 } |
| 319 } |
| 320 uset_close(set); |
| 321 ucnv_close(test_converter); |
| 322 } |
| 323 return resultsManually; |
| 324 } |
| 325 |
| 326 /* closes res but does not free resultsManually */ |
| 327 static void verifyResult(UEnumeration* res, const UBool *resultsManually) { |
| 328 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool
)); |
| 329 const char* name; |
| 330 UErrorCode status = U_ZERO_ERROR; |
| 331 int32_t i; |
| 332 |
| 333 /* fill the bool for the selector results! */ |
| 334 uprv_memset(resultsFromSystem, 0, gCountAvailable); |
| 335 while ((name = uenum_next(res,NULL, &status)) != NULL) { |
| 336 resultsFromSystem[findIndex(name)] = TRUE; |
| 337 } |
| 338 for(i = 0 ; i < gCountAvailable; i++) { |
| 339 if(resultsManually[i] != resultsFromSystem[i]) { |
| 340 log_err("failure in converter selector\n" |
| 341 "converter %s had conflicting results -- manual: %d, system %d\n", |
| 342 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); |
| 343 } |
| 344 } |
| 345 uprv_free(resultsFromSystem); |
| 346 uenum_close(res); |
| 347 } |
| 348 |
| 349 static UConverterSelector * |
| 350 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *stat
us) { |
| 351 char *new_buffer; |
| 352 int32_t ser_len, ser_len2; |
| 353 /* preflight */ |
| 354 ser_len = ucnvsel_serialize(sel, NULL, 0, status); |
| 355 if (*status != U_BUFFER_OVERFLOW_ERROR) { |
| 356 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)
); |
| 357 return sel; |
| 358 } |
| 359 new_buffer = (char *)uprv_malloc(ser_len); |
| 360 *status = U_ZERO_ERROR; |
| 361 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); |
| 362 if (U_FAILURE(*status) || ser_len != ser_len2) { |
| 363 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); |
| 364 uprv_free(new_buffer); |
| 365 return sel; |
| 366 } |
| 367 ucnvsel_close(sel); |
| 368 uprv_free(*buffer); |
| 369 *buffer = new_buffer; |
| 370 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); |
| 371 if (U_FAILURE(*status)) { |
| 372 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); |
| 373 return NULL; |
| 374 } |
| 375 return sel; |
| 376 } |
| 377 |
| 378 static void TestSelector() |
| 379 { |
| 380 TestText text; |
| 381 USet* excluded_sets[3] = { NULL }; |
| 382 int32_t i, testCaseIdx; |
| 383 |
| 384 if (!getAvailableNames()) { |
| 385 return; |
| 386 } |
| 387 if (!text_open(&text)) { |
| 388 releaseAvailableNames();; |
| 389 } |
| 390 |
| 391 excluded_sets[0] = uset_openEmpty(); |
| 392 for(i = 1 ; i < 3 ; i++) { |
| 393 excluded_sets[i] = uset_open(i*30, i*30+500); |
| 394 } |
| 395 |
| 396 for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) |
| 397 { |
| 398 int32_t excluded_set_id; |
| 399 int32_t num_encodings; |
| 400 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); |
| 401 if (getTestOption(QUICK_OPTION) && num_encodings > 25) { |
| 402 uprv_free((void *)encodings); |
| 403 continue; |
| 404 } |
| 405 |
| 406 /* |
| 407 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) |
| 408 * |
| 409 * This loop was replaced by the following statement because |
| 410 * the loop made the test run longer without adding to the code coverage. |
| 411 * The handling of the exclusion set is independent of the |
| 412 * set of encodings, so there is no need to test every combination. |
| 413 */ |
| 414 excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); |
| 415 { |
| 416 UConverterSelector *sel_rt, *sel_fb; |
| 417 char *buffer_fb = NULL; |
| 418 UErrorCode status = U_ZERO_ERROR; |
| 419 sel_rt = ucnvsel_open(encodings, num_encodings, |
| 420 excluded_sets[excluded_set_id], |
| 421 UCNV_ROUNDTRIP_SET, &status); |
| 422 if (num_encodings == gCountAvailable) { |
| 423 /* test the special "all converters" parameter values */ |
| 424 sel_fb = ucnvsel_open(NULL, 0, |
| 425 excluded_sets[excluded_set_id], |
| 426 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); |
| 427 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { |
| 428 /* test that a NULL set gives the same results as an empty set */ |
| 429 sel_fb = ucnvsel_open(encodings, num_encodings, |
| 430 NULL, |
| 431 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); |
| 432 } else { |
| 433 sel_fb = ucnvsel_open(encodings, num_encodings, |
| 434 excluded_sets[excluded_set_id], |
| 435 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); |
| 436 } |
| 437 if (U_FAILURE(status)) { |
| 438 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_err
orName(status)); |
| 439 ucnvsel_close(sel_rt); |
| 440 uprv_free((void *)encodings); |
| 441 continue; |
| 442 } |
| 443 |
| 444 text_reset(&text); |
| 445 for (;;) { |
| 446 UBool *manual_rt, *manual_fb; |
| 447 static UChar utf16[10000]; |
| 448 char *s; |
| 449 int32_t length8, length16; |
| 450 |
| 451 s = text_nextString(&text, &length8); |
| 452 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { |
| 453 break; |
| 454 } |
| 455 |
| 456 manual_rt = getResultsManually(encodings, num_encodings, |
| 457 s, length8, |
| 458 excluded_sets[excluded_set_id], |
| 459 UCNV_ROUNDTRIP_SET); |
| 460 manual_fb = getResultsManually(encodings, num_encodings, |
| 461 s, length8, |
| 462 excluded_sets[excluded_set_id], |
| 463 UCNV_ROUNDTRIP_AND_FALLBACK_SET); |
| 464 /* UTF-8 with length */ |
| 465 status = U_ZERO_ERROR; |
| 466 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_
rt); |
| 467 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_
fb); |
| 468 /* UTF-8 NUL-terminated */ |
| 469 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); |
| 470 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); |
| 471 |
| 472 u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); |
| 473 if (U_FAILURE(status)) { |
| 474 log_err("error converting the test text (string %ld) to UTF-16 - %s\n"
, |
| 475 (long)text.number, u_errorName(status)); |
| 476 } else { |
| 477 if (text.number == 0) { |
| 478 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); |
| 479 } |
| 480 if (U_SUCCESS(status)) { |
| 481 /* UTF-16 with length */ |
| 482 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &statu
s), manual_rt); |
| 483 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &statu
s), manual_fb); |
| 484 /* UTF-16 NUL-terminated */ |
| 485 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), ma
nual_rt); |
| 486 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), ma
nual_fb); |
| 487 } |
| 488 } |
| 489 |
| 490 uprv_free(manual_rt); |
| 491 uprv_free(manual_fb); |
| 492 } |
| 493 ucnvsel_close(sel_rt); |
| 494 ucnvsel_close(sel_fb); |
| 495 uprv_free(buffer_fb); |
| 496 } |
| 497 uprv_free((void *)encodings); |
| 498 } |
| 499 |
| 500 releaseAvailableNames(); |
| 501 text_close(&text); |
| 502 for(i = 0 ; i < 3 ; i++) { |
| 503 uset_close(excluded_sets[i]); |
| 504 } |
| 505 } |
| 506 |
| 507 /* Improve code coverage of UPropsVectors */ |
| 508 static void TestUPropsVector() { |
| 509 uint32_t value; |
| 510 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 511 UPropsVectors *pv = upvec_open(100, &errorCode); |
| 512 if (pv != NULL) { |
| 513 log_err("Should have returned NULL if UErrorCode is an error."); |
| 514 return; |
| 515 } |
| 516 errorCode = U_ZERO_ERROR; |
| 517 pv = upvec_open(-1, &errorCode); |
| 518 if (pv != NULL || U_SUCCESS(errorCode)) { |
| 519 log_err("Should have returned NULL if column is less than 0.\n"); |
| 520 return; |
| 521 } |
| 522 errorCode = U_ZERO_ERROR; |
| 523 pv = upvec_open(100, &errorCode); |
| 524 if (pv == NULL || U_FAILURE(errorCode)) { |
| 525 log_err("Unable to open UPropsVectors.\n"); |
| 526 return; |
| 527 } |
| 528 |
| 529 if (upvec_getValue(pv, 0, 1) != 0) { |
| 530 log_err("upvec_getValue should return 0.\n"); |
| 531 } |
| 532 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { |
| 533 log_err("upvec_getRow should not return NULL.\n"); |
| 534 } |
| 535 if (upvec_getArray(pv, NULL, NULL) != NULL) { |
| 536 log_err("upvec_getArray should return NULL.\n"); |
| 537 } |
| 538 |
| 539 upvec_close(pv); |
| 540 } |
OLD | NEW |