OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * Copyright (c) 1997-2014, International Business Machines | |
3 * Corporation and others. All Rights Reserved. | |
4 ******************************************************************** | |
5 * | |
6 * File UCNVSELTST.C | |
7 * | |
8 * Modification History: | |
9 * Name Description | |
10 * MOHAMED ELDAWY Creation | |
11 ******************************************************************** | |
12 */ | |
13 | |
14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ | |
15 | |
16 #include "ucnvseltst.h" | |
17 | |
18 #include <stdio.h> | |
19 | |
20 #include "unicode/utypes.h" | |
21 #include "unicode/ucnvsel.h" | |
22 #include "unicode/ustring.h" | |
23 #include "cmemory.h" | |
24 #include "cstring.h" | |
25 #include "propsvec.h" | |
26 | |
27 #define FILENAME_BUFFER 1024 | |
28 | |
29 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_
FILE_SEP_STRING | |
30 | |
31 static void TestSelector(void); | |
32 static void TestUPropsVector(void); | |
33 void addCnvSelTest(TestNode** root); /* Declaration required to suppress compil
er warnings. */ | |
34 | |
35 void addCnvSelTest(TestNode** root) | |
36 { | |
37 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); | |
38 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); | |
39 } | |
40 | |
41 static const char **gAvailableNames = NULL; | |
42 static int32_t gCountAvailable = 0; | |
43 | |
44 static UBool | |
45 getAvailableNames() { | |
46 int32_t i; | |
47 if (gAvailableNames != NULL) { | |
48 return TRUE; | |
49 } | |
50 gCountAvailable = ucnv_countAvailable(); | |
51 if (gCountAvailable == 0) { | |
52 log_data_err("No converters available.\n"); | |
53 return FALSE; | |
54 } | |
55 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const ch
ar *)); | |
56 if (gAvailableNames == NULL) { | |
57 log_err("unable to allocate memory for %ld available converter names\n", | |
58 (long)gCountAvailable); | |
59 return FALSE; | |
60 } | |
61 for (i = 0; i < gCountAvailable; ++i) { | |
62 gAvailableNames[i] = ucnv_getAvailableName(i); | |
63 } | |
64 return TRUE; | |
65 } | |
66 | |
67 static void | |
68 releaseAvailableNames() { | |
69 uprv_free((void *)gAvailableNames); | |
70 gAvailableNames = NULL; | |
71 gCountAvailable = 0; | |
72 } | |
73 | |
74 static const char ** | |
75 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { | |
76 const char **names; | |
77 int32_t i; | |
78 | |
79 *pCount = 0; | |
80 if (count <= 0) { | |
81 return NULL; | |
82 } | |
83 names = (const char **)uprv_malloc(count * sizeof(char *)); | |
84 if (names == NULL) { | |
85 log_err("memory allocation error for %ld pointers\n", (long)count); | |
86 return NULL; | |
87 } | |
88 if (step == 0 && count > 0) { | |
89 step = 1; | |
90 } | |
91 for (i = 0; i < count; ++i) { | |
92 if (0 <= start && start < gCountAvailable) { | |
93 names[i] = gAvailableNames[start]; | |
94 start += step; | |
95 ++*pCount; | |
96 } | |
97 } | |
98 return names; | |
99 } | |
100 | |
101 #if 0 | |
102 /* | |
103 * ucnvsel_open() does not support "no encodings": | |
104 * Given 0 encodings it will open a selector for all available ones. | |
105 */ | |
106 static const char ** | |
107 getNoEncodings(int32_t *pCount) { | |
108 *pCount = 0; | |
109 return NULL; | |
110 } | |
111 #endif | |
112 | |
113 static const char ** | |
114 getOneEncoding(int32_t *pCount) { | |
115 return getEncodings(1, 0, 1, pCount); | |
116 } | |
117 | |
118 static const char ** | |
119 getFirstEvenEncodings(int32_t *pCount) { | |
120 return getEncodings(0, 2, 25, pCount); | |
121 } | |
122 | |
123 static const char ** | |
124 getMiddleEncodings(int32_t *pCount) { | |
125 return getEncodings(gCountAvailable - 12, 1, 22, pCount); | |
126 } | |
127 | |
128 static const char ** | |
129 getLastEncodings(int32_t *pCount) { | |
130 return getEncodings(gCountAvailable - 1, -1, 25, pCount); | |
131 } | |
132 | |
133 static const char ** | |
134 getSomeEncodings(int32_t *pCount) { | |
135 /* 20 evenly distributed */ | |
136 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); | |
137 } | |
138 | |
139 static const char ** | |
140 getEveryThirdEncoding(int32_t *pCount) { | |
141 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); | |
142 } | |
143 | |
144 static const char ** | |
145 getAllEncodings(int32_t *pCount) { | |
146 return getEncodings(0, 1, gCountAvailable, pCount); | |
147 } | |
148 | |
149 typedef const char **GetEncodingsFn(int32_t *); | |
150 | |
151 static GetEncodingsFn *const getEncodingsFns[] = { | |
152 getOneEncoding, | |
153 getFirstEvenEncodings, | |
154 getMiddleEncodings, | |
155 getLastEncodings, | |
156 getSomeEncodings, | |
157 getEveryThirdEncoding, | |
158 getAllEncodings | |
159 }; | |
160 | |
161 static FILE *fopenOrError(const char *filename) { | |
162 int32_t needLen; | |
163 FILE *f; | |
164 char fnbuf[FILENAME_BUFFER]; | |
165 const char* directory= ctest_dataSrcDir(); | |
166 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename
)+1; | |
167 if(needLen > FILENAME_BUFFER) { | |
168 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d bu
t buffer is %d\n", | |
169 filename, needLen, FILENAME_BUFFER); | |
170 return NULL; | |
171 } | |
172 | |
173 strcpy(fnbuf, directory); | |
174 strcat(fnbuf, TDSRCPATH); | |
175 strcat(fnbuf, filename); | |
176 | |
177 f = fopen(fnbuf, "rb"); | |
178 | |
179 if(f == NULL) { | |
180 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); | |
181 } | |
182 return f; | |
183 } | |
184 | |
185 typedef struct TestText { | |
186 char *text, *textLimit; | |
187 char *limit; | |
188 int32_t number; | |
189 } TestText; | |
190 | |
191 static void | |
192 text_reset(TestText *tt) { | |
193 tt->limit = tt->text; | |
194 tt->number = 0; | |
195 } | |
196 | |
197 static char * | |
198 text_nextString(TestText *tt, int32_t *pLength) { | |
199 char *s = tt->limit; | |
200 if (s == tt->textLimit) { | |
201 /* we already delivered the last string */ | |
202 return NULL; | |
203 } else if (s == tt->text) { | |
204 /* first string */ | |
205 if ((tt->textLimit - tt->text) >= 3 && | |
206 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf | |
207 ) { | |
208 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ | |
209 } | |
210 } else { | |
211 /* skip the string terminator */ | |
212 ++s; | |
213 ++tt->number; | |
214 } | |
215 | |
216 /* find the end of this string */ | |
217 tt->limit = uprv_strchr(s, 0); | |
218 *pLength = (int32_t)(tt->limit - s); | |
219 return s; | |
220 } | |
221 | |
222 static UBool | |
223 text_open(TestText *tt) { | |
224 FILE *f; | |
225 char *s; | |
226 int32_t length; | |
227 uprv_memset(tt, 0, sizeof(TestText)); | |
228 f = fopenOrError("ConverterSelectorTestUTF8.txt"); | |
229 if(!f) { | |
230 return FALSE; | |
231 } | |
232 fseek(f, 0, SEEK_END); | |
233 length = (int32_t)ftell(f); | |
234 fseek(f, 0, SEEK_SET); | |
235 tt->text = (char *)uprv_malloc(length + 1); | |
236 if (tt->text == NULL) { | |
237 fclose(f); | |
238 return FALSE; | |
239 } | |
240 if (length != fread(tt->text, 1, length, f)) { | |
241 log_err("error reading %ld bytes from test text file\n", (long)length); | |
242 length = 0; | |
243 uprv_free(tt->text); | |
244 } | |
245 fclose(f); | |
246 tt->textLimit = tt->text + length; | |
247 *tt->textLimit = 0; | |
248 /* replace all Unicode '#' (U+0023) with NUL */ | |
249 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} | |
250 text_reset(tt); | |
251 return TRUE; | |
252 } | |
253 | |
254 static void | |
255 text_close(TestText *tt) { | |
256 uprv_free(tt->text); | |
257 } | |
258 | |
259 static int32_t findIndex(const char* converterName) { | |
260 int32_t i; | |
261 for (i = 0 ; i < gCountAvailable; i++) { | |
262 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { | |
263 return i; | |
264 } | |
265 } | |
266 return -1; | |
267 } | |
268 | |
269 static UBool * | |
270 getResultsManually(const char** encodings, int32_t num_encodings, | |
271 const char *utf8, int32_t length, | |
272 const USet* excludedCodePoints, const UConverterUnicodeSet wh
ichSet) { | |
273 UBool* resultsManually; | |
274 int32_t i; | |
275 | |
276 resultsManually = (UBool*) uprv_malloc(gCountAvailable); | |
277 uprv_memset(resultsManually, 0, gCountAvailable); | |
278 | |
279 for(i = 0 ; i < num_encodings ; i++) { | |
280 UErrorCode status = U_ZERO_ERROR; | |
281 /* get unicode set for that converter */ | |
282 USet* set; | |
283 UConverter* test_converter; | |
284 UChar32 cp; | |
285 int32_t encIndex, offset; | |
286 | |
287 set = uset_openEmpty(); | |
288 test_converter = ucnv_open(encodings[i], &status); | |
289 ucnv_getUnicodeSet(test_converter, set, | |
290 whichSet, &status); | |
291 if (excludedCodePoints != NULL) { | |
292 uset_addAll(set, excludedCodePoints); | |
293 } | |
294 uset_freeze(set); | |
295 offset = 0; | |
296 cp = 0; | |
297 | |
298 encIndex = findIndex(encodings[i]); | |
299 /* | |
300 * The following is almost, but not entirely, the same as | |
301 * resultsManually[encIndex] = | |
302 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); | |
303 * They might be different if the set contains strings, | |
304 * or if the utf8 string contains an illegal sequence. | |
305 * | |
306 * The UConverterSelector does not currently handle strings that can be | |
307 * converted, and it treats an illegal sequence as convertible | |
308 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. | |
309 */ | |
310 resultsManually[encIndex] = TRUE; | |
311 while(offset<length) { | |
312 U8_NEXT(utf8, offset, length, cp); | |
313 if (cp >= 0 && !uset_contains(set, cp)) { | |
314 resultsManually[encIndex] = FALSE; | |
315 break; | |
316 } | |
317 } | |
318 uset_close(set); | |
319 ucnv_close(test_converter); | |
320 } | |
321 return resultsManually; | |
322 } | |
323 | |
324 /* closes res but does not free resultsManually */ | |
325 static void verifyResult(UEnumeration* res, const UBool *resultsManually) { | |
326 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool
)); | |
327 const char* name; | |
328 UErrorCode status = U_ZERO_ERROR; | |
329 int32_t i; | |
330 | |
331 /* fill the bool for the selector results! */ | |
332 uprv_memset(resultsFromSystem, 0, gCountAvailable); | |
333 while ((name = uenum_next(res,NULL, &status)) != NULL) { | |
334 resultsFromSystem[findIndex(name)] = TRUE; | |
335 } | |
336 for(i = 0 ; i < gCountAvailable; i++) { | |
337 if(resultsManually[i] != resultsFromSystem[i]) { | |
338 log_err("failure in converter selector\n" | |
339 "converter %s had conflicting results -- manual: %d, system %d\n", | |
340 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); | |
341 } | |
342 } | |
343 uprv_free(resultsFromSystem); | |
344 uenum_close(res); | |
345 } | |
346 | |
347 static UConverterSelector * | |
348 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *stat
us) { | |
349 char *new_buffer; | |
350 int32_t ser_len, ser_len2; | |
351 /* preflight */ | |
352 ser_len = ucnvsel_serialize(sel, NULL, 0, status); | |
353 if (*status != U_BUFFER_OVERFLOW_ERROR) { | |
354 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)
); | |
355 return sel; | |
356 } | |
357 new_buffer = (char *)uprv_malloc(ser_len); | |
358 *status = U_ZERO_ERROR; | |
359 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); | |
360 if (U_FAILURE(*status) || ser_len != ser_len2) { | |
361 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); | |
362 uprv_free(new_buffer); | |
363 return sel; | |
364 } | |
365 ucnvsel_close(sel); | |
366 uprv_free(*buffer); | |
367 *buffer = new_buffer; | |
368 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); | |
369 if (U_FAILURE(*status)) { | |
370 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); | |
371 return NULL; | |
372 } | |
373 return sel; | |
374 } | |
375 | |
376 static void TestSelector() | |
377 { | |
378 TestText text; | |
379 USet* excluded_sets[3] = { NULL }; | |
380 int32_t i, testCaseIdx; | |
381 | |
382 if (!getAvailableNames()) { | |
383 return; | |
384 } | |
385 if (!text_open(&text)) { | |
386 releaseAvailableNames();; | |
387 } | |
388 | |
389 excluded_sets[0] = uset_openEmpty(); | |
390 for(i = 1 ; i < 3 ; i++) { | |
391 excluded_sets[i] = uset_open(i*30, i*30+500); | |
392 } | |
393 | |
394 for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx
++) | |
395 { | |
396 int32_t excluded_set_id; | |
397 int32_t num_encodings; | |
398 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); | |
399 if (getTestOption(QUICK_OPTION) && num_encodings > 25) { | |
400 uprv_free((void *)encodings); | |
401 continue; | |
402 } | |
403 | |
404 /* | |
405 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) | |
406 * | |
407 * This loop was replaced by the following statement because | |
408 * the loop made the test run longer without adding to the code coverage. | |
409 * The handling of the exclusion set is independent of the | |
410 * set of encodings, so there is no need to test every combination. | |
411 */ | |
412 excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets); | |
413 { | |
414 UConverterSelector *sel_rt, *sel_fb; | |
415 char *buffer_fb = NULL; | |
416 UErrorCode status = U_ZERO_ERROR; | |
417 sel_rt = ucnvsel_open(encodings, num_encodings, | |
418 excluded_sets[excluded_set_id], | |
419 UCNV_ROUNDTRIP_SET, &status); | |
420 if (num_encodings == gCountAvailable) { | |
421 /* test the special "all converters" parameter values */ | |
422 sel_fb = ucnvsel_open(NULL, 0, | |
423 excluded_sets[excluded_set_id], | |
424 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
425 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { | |
426 /* test that a NULL set gives the same results as an empty set */ | |
427 sel_fb = ucnvsel_open(encodings, num_encodings, | |
428 NULL, | |
429 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
430 } else { | |
431 sel_fb = ucnvsel_open(encodings, num_encodings, | |
432 excluded_sets[excluded_set_id], | |
433 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); | |
434 } | |
435 if (U_FAILURE(status)) { | |
436 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_err
orName(status)); | |
437 ucnvsel_close(sel_rt); | |
438 uprv_free((void *)encodings); | |
439 continue; | |
440 } | |
441 | |
442 text_reset(&text); | |
443 for (;;) { | |
444 UBool *manual_rt, *manual_fb; | |
445 static UChar utf16[10000]; | |
446 char *s; | |
447 int32_t length8, length16; | |
448 | |
449 s = text_nextString(&text, &length8); | |
450 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { | |
451 break; | |
452 } | |
453 | |
454 manual_rt = getResultsManually(encodings, num_encodings, | |
455 s, length8, | |
456 excluded_sets[excluded_set_id], | |
457 UCNV_ROUNDTRIP_SET); | |
458 manual_fb = getResultsManually(encodings, num_encodings, | |
459 s, length8, | |
460 excluded_sets[excluded_set_id], | |
461 UCNV_ROUNDTRIP_AND_FALLBACK_SET); | |
462 /* UTF-8 with length */ | |
463 status = U_ZERO_ERROR; | |
464 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_
rt); | |
465 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_
fb); | |
466 /* UTF-8 NUL-terminated */ | |
467 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); | |
468 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); | |
469 | |
470 u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &statu
s); | |
471 if (U_FAILURE(status)) { | |
472 log_err("error converting the test text (string %ld) to UTF-16 - %s\n"
, | |
473 (long)text.number, u_errorName(status)); | |
474 } else { | |
475 if (text.number == 0) { | |
476 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); | |
477 } | |
478 if (U_SUCCESS(status)) { | |
479 /* UTF-16 with length */ | |
480 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &statu
s), manual_rt); | |
481 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &statu
s), manual_fb); | |
482 /* UTF-16 NUL-terminated */ | |
483 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), ma
nual_rt); | |
484 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), ma
nual_fb); | |
485 } | |
486 } | |
487 | |
488 uprv_free(manual_rt); | |
489 uprv_free(manual_fb); | |
490 } | |
491 ucnvsel_close(sel_rt); | |
492 ucnvsel_close(sel_fb); | |
493 uprv_free(buffer_fb); | |
494 } | |
495 uprv_free((void *)encodings); | |
496 } | |
497 | |
498 releaseAvailableNames(); | |
499 text_close(&text); | |
500 for(i = 0 ; i < 3 ; i++) { | |
501 uset_close(excluded_sets[i]); | |
502 } | |
503 } | |
504 | |
505 /* Improve code coverage of UPropsVectors */ | |
506 static void TestUPropsVector() { | |
507 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; | |
508 UPropsVectors *pv = upvec_open(100, &errorCode); | |
509 if (pv != NULL) { | |
510 log_err("Should have returned NULL if UErrorCode is an error."); | |
511 return; | |
512 } | |
513 errorCode = U_ZERO_ERROR; | |
514 pv = upvec_open(-1, &errorCode); | |
515 if (pv != NULL || U_SUCCESS(errorCode)) { | |
516 log_err("Should have returned NULL if column is less than 0.\n"); | |
517 return; | |
518 } | |
519 errorCode = U_ZERO_ERROR; | |
520 pv = upvec_open(100, &errorCode); | |
521 if (pv == NULL || U_FAILURE(errorCode)) { | |
522 log_err("Unable to open UPropsVectors.\n"); | |
523 return; | |
524 } | |
525 | |
526 if (upvec_getValue(pv, 0, 1) != 0) { | |
527 log_err("upvec_getValue should return 0.\n"); | |
528 } | |
529 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { | |
530 log_err("upvec_getRow should not return NULL.\n"); | |
531 } | |
532 if (upvec_getArray(pv, NULL, NULL) != NULL) { | |
533 log_err("upvec_getArray should return NULL.\n"); | |
534 } | |
535 | |
536 upvec_close(pv); | |
537 } | |
OLD | NEW |