OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 1997-2015, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ********************************************************************/ | |
6 /******************************************************************************* | |
7 * | |
8 * File nucnvtst.c | |
9 * | |
10 * Modification History: | |
11 * Name Description | |
12 * Steven R. Loomis 7/8/1999 Adding input buffer test | |
13 ******************************************************************************** | |
14 */ | |
15 #include <stdio.h> | |
16 #include "cstring.h" | |
17 #include "unicode/uloc.h" | |
18 #include "unicode/ucnv.h" | |
19 #include "unicode/ucnv_err.h" | |
20 #include "unicode/ucnv_cb.h" | |
21 #include "cintltst.h" | |
22 #include "unicode/utypes.h" | |
23 #include "unicode/ustring.h" | |
24 #include "unicode/ucol.h" | |
25 #include "unicode/utf16.h" | |
26 #include "cmemory.h" | |
27 #include "nucnvtst.h" | |
28 | |
29 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit
, const int32_t results[], const char* message); | |
30 static void TestNextUCharError(UConverter* cnv, const char* source, const char*
limit, UErrorCode expected, const char* message); | |
31 #if !UCONFIG_NO_COLLATION | |
32 static void TestJitterbug981(void); | |
33 #endif | |
34 #if !UCONFIG_NO_LEGACY_CONVERSION | |
35 static void TestJitterbug1293(void); | |
36 #endif | |
37 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; | |
38 static void TestConverterTypesAndStarters(void); | |
39 static void TestAmbiguous(void); | |
40 static void TestSignatureDetection(void); | |
41 static void TestUTF7(void); | |
42 static void TestIMAP(void); | |
43 static void TestUTF8(void); | |
44 static void TestCESU8(void); | |
45 static void TestUTF16(void); | |
46 static void TestUTF16BE(void); | |
47 static void TestUTF16LE(void); | |
48 static void TestUTF32(void); | |
49 static void TestUTF32BE(void); | |
50 static void TestUTF32LE(void); | |
51 static void TestLATIN1(void); | |
52 | |
53 #if !UCONFIG_NO_LEGACY_CONVERSION | |
54 static void TestSBCS(void); | |
55 static void TestDBCS(void); | |
56 static void TestMBCS(void); | |
57 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
58 static void TestICCRunout(void); | |
59 #endif | |
60 | |
61 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
62 static void TestISO_2022(void); | |
63 #endif | |
64 | |
65 static void TestISO_2022_JP(void); | |
66 static void TestISO_2022_JP_1(void); | |
67 static void TestISO_2022_JP_2(void); | |
68 static void TestISO_2022_KR(void); | |
69 static void TestISO_2022_KR_1(void); | |
70 static void TestISO_2022_CN(void); | |
71 #if 0 | |
72 /* | |
73 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
74 */ | |
75 static void TestISO_2022_CN_EXT(void); | |
76 #endif | |
77 static void TestJIS(void); | |
78 static void TestHZ(void); | |
79 #endif | |
80 | |
81 static void TestSCSU(void); | |
82 | |
83 #if !UCONFIG_NO_LEGACY_CONVERSION | |
84 static void TestEBCDIC_STATEFUL(void); | |
85 static void TestGB18030(void); | |
86 static void TestLMBCS(void); | |
87 static void TestJitterbug255(void); | |
88 static void TestEBCDICUS4XML(void); | |
89 #if 0 | |
90 /* | |
91 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
92 */ | |
93 static void TestJitterbug915(void); | |
94 #endif | |
95 static void TestISCII(void); | |
96 | |
97 static void TestCoverageMBCS(void); | |
98 static void TestJitterbug2346(void); | |
99 static void TestJitterbug2411(void); | |
100 static void TestJB5275(void); | |
101 static void TestJB5275_1(void); | |
102 static void TestJitterbug6175(void); | |
103 | |
104 static void TestIsFixedWidth(void); | |
105 #endif | |
106 | |
107 static void TestInBufSizes(void); | |
108 | |
109 static void TestRoundTrippingAllUTF(void); | |
110 static void TestConv(const uint16_t in[], | |
111 int len, | |
112 const char* conv, | |
113 const char* lang, | |
114 char byteArr[], | |
115 int byteArrLen); | |
116 | |
117 /* open a converter, using test data if it begins with '@' */ | |
118 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); | |
119 | |
120 | |
121 #define NEW_MAX_BUFFER 999 | |
122 | |
123 static int32_t gInBufferSize = NEW_MAX_BUFFER; | |
124 static int32_t gOutBufferSize = NEW_MAX_BUFFER; | |
125 static char gNuConvTestName[1024]; | |
126 | |
127 #define nct_min(x,y) ((x<y) ? x : y) | |
128 | |
129 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) | |
130 { | |
131 if(cnv && cnv[0] == '@') { | |
132 return ucnv_openPackage(loadTestData(err), cnv+1, err); | |
133 } else { | |
134 return ucnv_open(cnv, err); | |
135 } | |
136 } | |
137 | |
138 static void printSeq(const unsigned char* a, int len) | |
139 { | |
140 int i=0; | |
141 log_verbose("{"); | |
142 while (i<len) | |
143 log_verbose("0x%02x ", a[i++]); | |
144 log_verbose("}\n"); | |
145 } | |
146 | |
147 static void printUSeq(const UChar* a, int len) | |
148 { | |
149 int i=0; | |
150 log_verbose("{U+"); | |
151 while (i<len) log_verbose("0x%04x ", a[i++]); | |
152 log_verbose("}\n"); | |
153 } | |
154 | |
155 static void printSeqErr(const unsigned char* a, int len) | |
156 { | |
157 int i=0; | |
158 fprintf(stderr, "{"); | |
159 while (i<len) | |
160 fprintf(stderr, "0x%02x ", a[i++]); | |
161 fprintf(stderr, "}\n"); | |
162 } | |
163 | |
164 static void printUSeqErr(const UChar* a, int len) | |
165 { | |
166 int i=0; | |
167 fprintf(stderr, "{U+"); | |
168 while (i<len) | |
169 fprintf(stderr, "0x%04x ", a[i++]); | |
170 fprintf(stderr,"}\n"); | |
171 } | |
172 | |
173 static void | |
174 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int3
2_t results[], const char* message) | |
175 { | |
176 const char* s0; | |
177 const char* s=(char*)source; | |
178 const int32_t *r=results; | |
179 UErrorCode errorCode=U_ZERO_ERROR; | |
180 UChar32 c; | |
181 | |
182 while(s<limit) { | |
183 s0=s; | |
184 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
185 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
186 break; /* no more significant input */ | |
187 } else if(U_FAILURE(errorCode)) { | |
188 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); | |
189 break; | |
190 } else if( | |
191 /* test the expected number of input bytes only if >=0 */ | |
192 (*r>=0 && (int32_t)(s-s0)!=*r) || | |
193 c!=*(r+1) | |
194 ) { | |
195 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should hav
e been %lx from %d bytes.\n", | |
196 message, c, (s-s0), *(r+1), *r); | |
197 break; | |
198 } | |
199 r+=2; | |
200 } | |
201 } | |
202 | |
203 static void | |
204 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErro
rCode expected, const char* message) | |
205 { | |
206 const char* s=(char*)source; | |
207 UErrorCode errorCode=U_ZERO_ERROR; | |
208 uint32_t c; | |
209 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
210 if(errorCode != expected){ | |
211 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected),
message, myErrorName(errorCode)); | |
212 } | |
213 if(c != 0xFFFD && c != 0xffff){ | |
214 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got
0x%lx\n", message, c); | |
215 } | |
216 | |
217 } | |
218 | |
219 static void TestInBufSizes(void) | |
220 { | |
221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); | |
222 #if 1 | |
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); | |
224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); | |
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); | |
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); | |
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); | |
228 TestNewConvertWithBufferSizes(1,1); | |
229 TestNewConvertWithBufferSizes(2,3); | |
230 TestNewConvertWithBufferSizes(3,2); | |
231 #endif | |
232 } | |
233 | |
234 static void TestOutBufSizes(void) | |
235 { | |
236 #if 1 | |
237 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); | |
238 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); | |
239 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); | |
240 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); | |
241 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); | |
242 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); | |
243 | |
244 #endif | |
245 } | |
246 | |
247 | |
248 void addTestNewConvert(TestNode** root) | |
249 { | |
250 #if !UCONFIG_NO_FILE_IO | |
251 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); | |
252 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); | |
253 #endif | |
254 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterT
ypesAndStarters"); | |
255 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); | |
256 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetectio
n"); | |
257 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); | |
258 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); | |
259 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); | |
260 | |
261 /* test ucnv_getNextUChar() for charsets that encode single surrogates with c
omplete byte sequences */ | |
262 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); | |
263 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); | |
264 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); | |
265 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); | |
266 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); | |
267 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); | |
268 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); | |
269 | |
270 #if !UCONFIG_NO_LEGACY_CONVERSION | |
271 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); | |
272 #endif | |
273 | |
274 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); | |
275 | |
276 #if !UCONFIG_NO_LEGACY_CONVERSION | |
277 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); | |
278 #if !UCONFIG_NO_FILE_IO | |
279 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); | |
280 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); | |
281 #endif | |
282 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); | |
283 | |
284 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
285 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); | |
286 #endif | |
287 | |
288 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); | |
289 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); | |
290 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); | |
291 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); | |
292 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); | |
293 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); | |
294 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); | |
295 /* | |
296 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
297 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); | |
298 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); | |
299 */ | |
300 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); | |
301 #endif | |
302 | |
303 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); | |
304 | |
305 #if !UCONFIG_NO_LEGACY_CONVERSION | |
306 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); | |
307 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); | |
308 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); | |
309 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); | |
310 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); | |
311 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); | |
312 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); | |
313 #if !UCONFIG_NO_COLLATION | |
314 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); | |
315 #endif | |
316 | |
317 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); | |
318 #endif | |
319 | |
320 | |
321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
322 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); | |
323 #endif | |
324 | |
325 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAll
UTF"); | |
326 | |
327 #if !UCONFIG_NO_LEGACY_CONVERSION | |
328 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); | |
329 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); | |
330 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); | |
331 | |
332 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); | |
333 #endif | |
334 } | |
335 | |
336 | |
337 /* Note that this test already makes use of statics, so it's not really | |
338 multithread safe. | |
339 This convenience function lets us make the error messages actually useful. | |
340 */ | |
341 | |
342 static void setNuConvTestName(const char *codepage, const char *direction) | |
343 { | |
344 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", | |
345 codepage, | |
346 direction, | |
347 (int)gInBufferSize, | |
348 (int)gOutBufferSize); | |
349 } | |
350 | |
351 typedef enum | |
352 { | |
353 TC_OK = 0, /* test was OK */ | |
354 TC_MISMATCH = 1, /* Match failed - err was printed */ | |
355 TC_FAIL = 2 /* Test failed, don't print an err because it was already pr
inted. */ | |
356 } ETestConvertResult; | |
357 | |
358 /* Note: This function uses global variables and it will not do offset | |
359 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
360 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,
const uint8_t *expect, int expectLen, | |
361 const char *codepage, const int32_t *expectOffsets , UBool useFa
llback) | |
362 { | |
363 UErrorCode status = U_ZERO_ERROR; | |
364 UConverter *conv = 0; | |
365 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
366 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
367 char *p; | |
368 const UChar *src; | |
369 char *end; | |
370 char *targ; | |
371 int32_t *offs; | |
372 int i; | |
373 int32_t realBufferSize; | |
374 char *realBufferEnd; | |
375 const UChar *realSourceEnd; | |
376 const UChar *sourceLimit; | |
377 UBool checkOffsets = TRUE; | |
378 UBool doFlush; | |
379 | |
380 for(i=0;i<NEW_MAX_BUFFER;i++) | |
381 junkout[i] = (char)0xF0; | |
382 for(i=0;i<NEW_MAX_BUFFER;i++) | |
383 junokout[i] = 0xFF; | |
384 | |
385 setNuConvTestName(codepage, "FROM"); | |
386 | |
387 log_verbose("\n========= %s\n", gNuConvTestName); | |
388 | |
389 conv = my_ucnv_open(codepage, &status); | |
390 | |
391 if(U_FAILURE(status)) | |
392 { | |
393 log_data_err("Couldn't open converter %s\n",codepage); | |
394 return TC_FAIL; | |
395 } | |
396 if(useFallback){ | |
397 ucnv_setFallback(conv,useFallback); | |
398 } | |
399 | |
400 log_verbose("Converter opened..\n"); | |
401 | |
402 src = source; | |
403 targ = junkout; | |
404 offs = junokout; | |
405 | |
406 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
407 realBufferEnd = junkout + realBufferSize; | |
408 realSourceEnd = source + sourceLen; | |
409 | |
410 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
411 checkOffsets = FALSE; | |
412 | |
413 do | |
414 { | |
415 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
416 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
417 | |
418 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
419 | |
420 if(targ == realBufferEnd) { | |
421 log_err("Error, overflowed the real buffer while about to call fromUnico
de! targ=%08lx %s", targ, gNuConvTestName); | |
422 return TC_FAIL; | |
423 } | |
424 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to
%08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
425 | |
426 | |
427 status = U_ZERO_ERROR; | |
428 | |
429 ucnv_fromUnicode (conv, | |
430 &targ, | |
431 end, | |
432 &src, | |
433 sourceLimit, | |
434 checkOffsets ? offs : NULL, | |
435 doFlush, /* flush if we're at the end of the input data
*/ | |
436 &status); | |
437 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourc
eLimit < realSourceEnd) ); | |
438 | |
439 if(U_FAILURE(status)) { | |
440 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myEr
rorName(status), gNuConvTestName); | |
441 return TC_FAIL; | |
442 } | |
443 | |
444 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
445 sourceLen, targ-junkout); | |
446 | |
447 if(getTestOption(VERBOSITY_OPTION)) | |
448 { | |
449 char junk[9999]; | |
450 char offset_str[9999]; | |
451 char *ptr; | |
452 | |
453 junk[0] = 0; | |
454 offset_str[0] = 0; | |
455 for(ptr = junkout;ptr<targ;ptr++) { | |
456 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); | |
457 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junoko
ut[ptr-junkout])); | |
458 } | |
459 | |
460 log_verbose(junk); | |
461 printSeq((const uint8_t *)expect, expectLen); | |
462 if ( checkOffsets ) { | |
463 log_verbose("\nOffsets:"); | |
464 log_verbose(offset_str); | |
465 } | |
466 log_verbose("\n"); | |
467 } | |
468 ucnv_close(conv); | |
469 | |
470 if(expectLen != targ-junkout) { | |
471 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNu
ConvTestName); | |
472 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout,
gNuConvTestName); | |
473 fprintf(stderr, "Got:\n"); | |
474 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
475 fprintf(stderr, "Expected:\n"); | |
476 printSeqErr((const unsigned char*)expect, expectLen); | |
477 return TC_MISMATCH; | |
478 } | |
479 | |
480 if (checkOffsets && (expectOffsets != 0) ) { | |
481 log_verbose("comparing %d offsets..\n", targ-junkout); | |
482 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
483 log_err("did not get the expected offsets. %s\n", gNuConvTestName); | |
484 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); | |
485 log_err("\n"); | |
486 log_err("Got : "); | |
487 for(p=junkout;p<targ;p++) { | |
488 log_err("%d,", junokout[p-junkout]); | |
489 } | |
490 log_err("\n"); | |
491 log_err("Expected: "); | |
492 for(i=0; i<(targ-junkout); i++) { | |
493 log_err("%d,", expectOffsets[i]); | |
494 } | |
495 log_err("\n"); | |
496 } | |
497 } | |
498 | |
499 log_verbose("comparing..\n"); | |
500 if(!memcmp(junkout, expect, expectLen)) { | |
501 log_verbose("Matches!\n"); | |
502 return TC_OK; | |
503 } else { | |
504 log_err("String does not match u->%s\n", gNuConvTestName); | |
505 printUSeqErr(source, sourceLen); | |
506 fprintf(stderr, "Got:\n"); | |
507 printSeqErr((const unsigned char *)junkout, expectLen); | |
508 fprintf(stderr, "Expected:\n"); | |
509 printSeqErr((const unsigned char *)expect, expectLen); | |
510 | |
511 return TC_MISMATCH; | |
512 } | |
513 } | |
514 | |
515 /* Note: This function uses global variables and it will not do offset | |
516 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ | |
517 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen,
const UChar *expect, int expectlen, | |
518 const char *codepage, const int32_t *e
xpectOffsets, UBool useFallback) | |
519 { | |
520 UErrorCode status = U_ZERO_ERROR; | |
521 UConverter *conv = 0; | |
522 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
523 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
524 const char *src; | |
525 const char *realSourceEnd; | |
526 const char *srcLimit; | |
527 UChar *p; | |
528 UChar *targ; | |
529 UChar *end; | |
530 int32_t *offs; | |
531 int i; | |
532 UBool checkOffsets = TRUE; | |
533 | |
534 int32_t realBufferSize; | |
535 UChar *realBufferEnd; | |
536 | |
537 | |
538 for(i=0;i<NEW_MAX_BUFFER;i++) | |
539 junkout[i] = 0xFFFE; | |
540 | |
541 for(i=0;i<NEW_MAX_BUFFER;i++) | |
542 junokout[i] = -1; | |
543 | |
544 setNuConvTestName(codepage, "TO"); | |
545 | |
546 log_verbose("\n========= %s\n", gNuConvTestName); | |
547 | |
548 conv = my_ucnv_open(codepage, &status); | |
549 | |
550 if(U_FAILURE(status)) | |
551 { | |
552 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
553 return TC_FAIL; | |
554 } | |
555 if(useFallback){ | |
556 ucnv_setFallback(conv,useFallback); | |
557 } | |
558 log_verbose("Converter opened..\n"); | |
559 | |
560 src = (const char *)source; | |
561 targ = junkout; | |
562 offs = junokout; | |
563 | |
564 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
565 realBufferEnd = junkout + realBufferSize; | |
566 realSourceEnd = src + sourcelen; | |
567 | |
568 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) | |
569 checkOffsets = FALSE; | |
570 | |
571 do | |
572 { | |
573 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
574 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
575 | |
576 if(targ == realBufferEnd) | |
577 { | |
578 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); | |
579 return TC_FAIL; | |
580 } | |
581 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
582 | |
583 /* oldTarg = targ; */ | |
584 | |
585 status = U_ZERO_ERROR; | |
586 | |
587 ucnv_toUnicode (conv, | |
588 &targ, | |
589 end, | |
590 &src, | |
591 srcLimit, | |
592 checkOffsets ? offs : NULL, | |
593 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of hte source data */ | |
594 &status); | |
595 | |
596 /* offs += (targ-oldTarg); */ | |
597 | |
598 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sr
cLimit < realSourceEnd)) ); /* while we just need another buffer */ | |
599 | |
600 if(U_FAILURE(status)) | |
601 { | |
602 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myError
Name(status), gNuConvTestName); | |
603 return TC_FAIL; | |
604 } | |
605 | |
606 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
607 sourcelen, targ-junkout); | |
608 if(getTestOption(VERBOSITY_OPTION)) | |
609 { | |
610 char junk[9999]; | |
611 char offset_str[9999]; | |
612 UChar *ptr; | |
613 | |
614 junk[0] = 0; | |
615 offset_str[0] = 0; | |
616 | |
617 for(ptr = junkout;ptr<targ;ptr++) | |
618 { | |
619 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
tr); | |
620 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[ptr-junkout]); | |
621 } | |
622 | |
623 log_verbose(junk); | |
624 printUSeq(expect, expectlen); | |
625 if ( checkOffsets ) | |
626 { | |
627 log_verbose("\nOffsets:"); | |
628 log_verbose(offset_str); | |
629 } | |
630 log_verbose("\n"); | |
631 } | |
632 ucnv_close(conv); | |
633 | |
634 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
635 | |
636 if (checkOffsets && (expectOffsets != 0)) | |
637 { | |
638 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ | |
639 log_err("did not get the expected offsets. %s\n",gNuConvTestName); | |
640 log_err("Got: "); | |
641 for(p=junkout;p<targ;p++) { | |
642 log_err("%d,", junokout[p-junkout]); | |
643 } | |
644 log_err("\n"); | |
645 log_err("Expected: "); | |
646 for(i=0; i<(targ-junkout); i++) { | |
647 log_err("%d,", expectOffsets[i]); | |
648 } | |
649 log_err("\n"); | |
650 log_err("output: "); | |
651 for(i=0; i<(targ-junkout); i++) { | |
652 log_err("%X,", junkout[i]); | |
653 } | |
654 log_err("\n"); | |
655 log_err("input: "); | |
656 for(i=0; i<(src-(const char *)source); i++) { | |
657 log_err("%X,", (unsigned char)source[i]); | |
658 } | |
659 log_err("\n"); | |
660 } | |
661 } | |
662 | |
663 if(!memcmp(junkout, expect, expectlen*2)) | |
664 { | |
665 log_verbose("Matches!\n"); | |
666 return TC_OK; | |
667 } | |
668 else | |
669 { | |
670 log_err("String does not match. %s\n", gNuConvTestName); | |
671 log_verbose("String does not match. %s\n", gNuConvTestName); | |
672 printf("\nGot:"); | |
673 printUSeqErr(junkout, expectlen); | |
674 printf("\nExpected:"); | |
675 printUSeqErr(expect, expectlen); | |
676 return TC_MISMATCH; | |
677 } | |
678 } | |
679 | |
680 | |
681 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) | |
682 { | |
683 /** test chars #1 */ | |
684 /* 1 2 3 1Han 2Han 3Han . */ | |
685 static const UChar sampleText[] = | |
686 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0
xDC21 }; | |
687 static const UChar sampleTextRoundTripUnmappable[] = | |
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; | |
689 | |
690 | |
691 static const uint8_t expectedUTF8[] = | |
692 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0
x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; | |
693 static const int32_t toUTF8Offs[] = | |
694 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0
x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
695 static const int32_t fmUTF8Offs[] = | |
696 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0
x000e }; | |
697 | |
698 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
699 /* Same as UTF8, but with ^[%B preceeding */ | |
700 static const const uint8_t expectedISO2022[] = | |
701 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0
x8c, 0xe4, 0xb8, 0x89, 0x2E }; | |
702 static const int32_t toISO2022Offs[] = | |
703 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, | |
704 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ | |
705 static const int32_t fmISO2022Offs[] = | |
706 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is t
his right? */ | |
707 #endif | |
708 | |
709 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ | |
710 static const uint8_t expectedIBM930[] = | |
711 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0
x4B, 0x0e, 0xfe, 0xfe, 0x0f }; | |
712 static const int32_t toIBM930Offs[] = | |
713 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0
x07, 0x08, 0x08, 0x08, -1 }; | |
714 static const int32_t fmIBM930Offs[] = | |
715 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; | |
716 | |
717 /* 1 2 3 0 h1 h2 h3 . MBCS*/ | |
718 static const uint8_t expectedIBM943[] = | |
719 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc,
0xfc }; | |
720 static const int32_t toIBM943Offs [] = | |
721 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08,
0x08 }; | |
722 static const int32_t fmIBM943Offs[] = | |
723 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; | |
724 | |
725 /* 1 2 3 0 h1 h2 h3 . DBCS*/ | |
726 static const uint8_t expectedIBM9027[] = | |
727 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48,
0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; | |
728 static const int32_t toIBM9027Offs [] = | |
729 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05,
0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; | |
730 | |
731 /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
732 static const uint8_t expectedIBM920[] = | |
733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; | |
734 static const int32_t toIBM920Offs [] = | |
735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
736 | |
737 /* 1 2 3 0 <?> <?> <?> . SBCS*/ | |
738 static const uint8_t expectedISO88593[] = | |
739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
740 static const int32_t toISO88593Offs[] = | |
741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
742 | |
743 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ | |
744 static const uint8_t expectedLATIN1[] = | |
745 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; | |
746 static const int32_t toLATIN1Offs[] = | |
747 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; | |
748 | |
749 | |
750 /* etc */ | |
751 static const uint8_t expectedUTF16BE[] = | |
752 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0
x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; | |
753 static const int32_t toUTF16BEOffs[]= | |
754 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
755 static const int32_t fmUTF16BEOffs[] = | |
756 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010,
0x0010 }; | |
757 | |
758 static const uint8_t expectedUTF16LE[] = | |
759 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0
x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; | |
760 static const int32_t toUTF16LEOffs[]= | |
761 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0
x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; | |
762 static const int32_t fmUTF16LEOffs[] = | |
763 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0
x0010 }; | |
764 | |
765 static const uint8_t expectedUTF32BE[] = | |
766 { 0x00, 0x00, 0x00, 0x31, | |
767 0x00, 0x00, 0x00, 0x32, | |
768 0x00, 0x00, 0x00, 0x33, | |
769 0x00, 0x00, 0x00, 0x00, | |
770 0x00, 0x00, 0x4e, 0x00, | |
771 0x00, 0x00, 0x4e, 0x8c, | |
772 0x00, 0x00, 0x4e, 0x09, | |
773 0x00, 0x00, 0x00, 0x2e, | |
774 0x00, 0x02, 0x00, 0x21 }; | |
775 static const int32_t toUTF32BEOffs[]= | |
776 { 0x00, 0x00, 0x00, 0x00, | |
777 0x01, 0x01, 0x01, 0x01, | |
778 0x02, 0x02, 0x02, 0x02, | |
779 0x03, 0x03, 0x03, 0x03, | |
780 0x04, 0x04, 0x04, 0x04, | |
781 0x05, 0x05, 0x05, 0x05, | |
782 0x06, 0x06, 0x06, 0x06, | |
783 0x07, 0x07, 0x07, 0x07, | |
784 0x08, 0x08, 0x08, 0x08, | |
785 0x08, 0x08, 0x08, 0x08 }; | |
786 static const int32_t fmUTF32BEOffs[] = | |
787 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020,
0x0020 }; | |
788 | |
789 static const uint8_t expectedUTF32LE[] = | |
790 { 0x31, 0x00, 0x00, 0x00, | |
791 0x32, 0x00, 0x00, 0x00, | |
792 0x33, 0x00, 0x00, 0x00, | |
793 0x00, 0x00, 0x00, 0x00, | |
794 0x00, 0x4e, 0x00, 0x00, | |
795 0x8c, 0x4e, 0x00, 0x00, | |
796 0x09, 0x4e, 0x00, 0x00, | |
797 0x2e, 0x00, 0x00, 0x00, | |
798 0x21, 0x00, 0x02, 0x00 }; | |
799 static const int32_t toUTF32LEOffs[]= | |
800 { 0x00, 0x00, 0x00, 0x00, | |
801 0x01, 0x01, 0x01, 0x01, | |
802 0x02, 0x02, 0x02, 0x02, | |
803 0x03, 0x03, 0x03, 0x03, | |
804 0x04, 0x04, 0x04, 0x04, | |
805 0x05, 0x05, 0x05, 0x05, | |
806 0x06, 0x06, 0x06, 0x06, | |
807 0x07, 0x07, 0x07, 0x07, | |
808 0x08, 0x08, 0x08, 0x08, | |
809 0x08, 0x08, 0x08, 0x08 }; | |
810 static const int32_t fmUTF32LEOffs[] = | |
811 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0
x0020 }; | |
812 | |
813 | |
814 | |
815 | |
816 /** Test chars #2 **/ | |
817 | |
818 /* Sahha [health], slashed h's */ | |
819 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x006
1 }; | |
820 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }
; | |
821 | |
822 /* LMBCS */ | |
823 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2
666, 0x0220 }; | |
824 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73,
0x01, 0x04, 0x14, 0x02, 0x20 }; | |
825 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03,
0x04, 0x04 , 0x05, 0x05, 0x05 }; | |
826 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0
006, 0x0008}; | |
827 /*********************************** START OF CODE finally *************/ | |
828 | |
829 gInBufferSize = insize; | |
830 gOutBufferSize = outsize; | |
831 | |
832 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBuff
erSize = %d\n", gInBufferSize, gOutBufferSize); | |
833 | |
834 | |
835 /*UTF-8*/ | |
836 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
837 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); | |
838 | |
839 log_verbose("Test surrogate behaviour for UTF8\n"); | |
840 { | |
841 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; | |
842 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, | |
843 0xf0, 0x90, 0x90, 0x81, | |
844 0xef, 0xbf, 0xbd | |
845 }; | |
846 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; | |
847 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), | |
848 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", o
ffsets,FALSE ); | |
849 | |
850 | |
851 } | |
852 | |
853 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
854 /*ISO-2022*/ | |
855 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
856 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALS
E ); | |
857 #endif | |
858 | |
859 /*UTF16 LE*/ | |
860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
861 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALS
E ); | |
862 /*UTF16 BE*/ | |
863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
864 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALS
E ); | |
865 /*UTF32 LE*/ | |
866 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
867 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALS
E ); | |
868 /*UTF32 BE*/ | |
869 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
870 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALS
E ); | |
871 | |
872 /*LATIN_1*/ | |
873 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
874 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); | |
875 | |
876 #if !UCONFIG_NO_LEGACY_CONVERSION | |
877 /*EBCDIC_STATEFUL*/ | |
878 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
879 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); | |
880 | |
881 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
882 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); | |
883 | |
884 /*MBCS*/ | |
885 | |
886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
887 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); | |
888 /*DBCS*/ | |
889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
890 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALS
E ); | |
891 /*SBCS*/ | |
892 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
893 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); | |
894 /*SBCS*/ | |
895 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), | |
896 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs
,FALSE ); | |
897 #endif | |
898 | |
899 | |
900 /****/ | |
901 | |
902 /*UTF-8*/ | |
903 testConvertToU(expectedUTF8, sizeof(expectedUTF8), | |
904 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs
,FALSE); | |
905 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) | |
906 /*ISO-2022*/ | |
907 testConvertToU(expectedISO2022, sizeof(expectedISO2022), | |
908 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2
022Offs,FALSE); | |
909 #endif | |
910 | |
911 /*UTF16 LE*/ | |
912 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
913 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); | |
914 /*UTF16 BE*/ | |
915 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), | |
916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF1
6BEOffs,FALSE); | |
917 /*UTF32 LE*/ | |
918 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), | |
919 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF3
2LEOffs,FALSE); | |
920 /*UTF32 BE*/ | |
921 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), | |
922 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF3
2BEOffs,FALSE); | |
923 | |
924 #if !UCONFIG_NO_LEGACY_CONVERSION | |
925 /*EBCDIC_STATEFUL*/ | |
926 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUn
mappable, | |
927 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-930", fmIBM930Offs,FALSE); | |
928 /*MBCS*/ | |
929 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnm
appable, | |
930 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnma
ppable[0]), "ibm-943", fmIBM943Offs,FALSE); | |
931 #endif | |
932 | |
933 /* Try it again to make sure it still works */ | |
934 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), | |
935 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF1
6LEOffs,FALSE); | |
936 | |
937 #if !UCONFIG_NO_LEGACY_CONVERSION | |
938 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), | |
939 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3",
NULL,FALSE); | |
940 | |
941 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0
]), | |
942 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE
); | |
943 | |
944 /*LMBCS*/ | |
945 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), | |
946 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); | |
947 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), | |
948 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLM
BCSOffs,FALSE); | |
949 #endif | |
950 | |
951 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ | |
952 { | |
953 /* encode directly set D and set O */ | |
954 static const uint8_t utf7[] = { | |
955 /* | |
956 Hi Mom -+Jjo--! | |
957 A+ImIDkQ. | |
958 +- | |
959 +ZeVnLIqe- | |
960 */ | |
961 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, | |
962 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
963 0x2b, 0x2d, | |
964 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
965 }; | |
966 static const UChar unicode[] = { | |
967 /* | |
968 Hi Mom -<WHITE SMILING FACE>-! | |
969 A<NOT IDENTICAL TO><ALPHA>. | |
970 + | |
971 [Japanese word "nihongo"] | |
972 */ | |
973 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
974 0x41, 0x2262, 0x0391, 0x2e, | |
975 0x2b, | |
976 0x65e5, 0x672c, 0x8a9e | |
977 }; | |
978 static const int32_t toUnicodeOffsets[] = { | |
979 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
980 15, 17, 19, 23, | |
981 24, | |
982 27, 29, 32 | |
983 }; | |
984 static const int32_t fromUnicodeOffsets[] = { | |
985 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
986 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
987 15, 15, | |
988 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
989 }; | |
990 | |
991 /* same but escaping set O (the exclamation mark) */ | |
992 static const uint8_t utf7Restricted[] = { | |
993 /* | |
994 Hi Mom -+Jjo--+ACE- | |
995 A+ImIDkQ. | |
996 +- | |
997 +ZeVnLIqe- | |
998 */ | |
999 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, | |
1000 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, | |
1001 0x2b, 0x2d, | |
1002 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d | |
1003 }; | |
1004 static const int32_t toUnicodeOffsetsR[] = { | |
1005 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, | |
1006 19, 21, 23, 27, | |
1007 28, | |
1008 31, 33, 36 | |
1009 }; | |
1010 static const int32_t fromUnicodeOffsetsR[] = { | |
1011 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, | |
1012 11, 12, 12, 12, 13, 13, 13, 13, 14, | |
1013 15, 15, | |
1014 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 | |
1015 }; | |
1016 | |
1017 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(u
tf7), "UTF-7", fromUnicodeOffsets,FALSE); | |
1018 | |
1019 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "UTF-7", toUnicodeOffsets,FALSE); | |
1020 | |
1021 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted
, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); | |
1022 | |
1023 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(u
nicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); | |
1024 } | |
1025 | |
1026 /* | |
1027 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, | |
1028 * modified according to RFC 2060, | |
1029 * and supplemented with the one example in RFC 2060 itself. | |
1030 */ | |
1031 { | |
1032 static const uint8_t imap[] = { | |
1033 /* Hi Mom -&Jjo--! | |
1034 A&ImIDkQ-. | |
1035 &- | |
1036 &ZeVnLIqe- | |
1037 \ | |
1038 ~peter | |
1039 /mail | |
1040 /&ZeVnLIqe- | |
1041 /&U,BTFw- | |
1042 */ | |
1043 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x
6f, 0x2d, 0x2d, 0x21, | |
1044 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, | |
1045 0x26, 0x2d, | |
1046 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
1047 0x5c, | |
1048 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
1049 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
1050 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, | |
1051 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d | |
1052 }; | |
1053 static const UChar unicode[] = { | |
1054 /* Hi Mom -<WHITE SMILING FACE>-! | |
1055 A<NOT IDENTICAL TO><ALPHA>. | |
1056 & | |
1057 [Japanese word "nihongo"] | |
1058 \ | |
1059 ~peter | |
1060 /mail | |
1061 /<65e5, 672c, 8a9e> | |
1062 /<53f0, 5317> | |
1063 */ | |
1064 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, | |
1065 0x41, 0x2262, 0x0391, 0x2e, | |
1066 0x26, | |
1067 0x65e5, 0x672c, 0x8a9e, | |
1068 0x5c, | |
1069 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, | |
1070 0x2f, 0x6d, 0x61, 0x69, 0x6c, | |
1071 0x2f, 0x65e5, 0x672c, 0x8a9e, | |
1072 0x2f, 0x53f0, 0x5317 | |
1073 }; | |
1074 static const int32_t toUnicodeOffsets[] = { | |
1075 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, | |
1076 15, 17, 19, 24, | |
1077 25, | |
1078 28, 30, 33, | |
1079 37, | |
1080 38, 39, 40, 41, 42, 43, | |
1081 44, 45, 46, 47, 48, | |
1082 49, 51, 53, 56, | |
1083 60, 62, 64 | |
1084 }; | |
1085 static const int32_t fromUnicodeOffsets[] = { | |
1086 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, | |
1087 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, | |
1088 15, 15, | |
1089 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, | |
1090 19, | |
1091 20, 21, 22, 23, 24, 25, | |
1092 26, 27, 28, 29, 30, | |
1093 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, | |
1094 35, 36, 36, 36, 37, 37, 37, 37, 37 | |
1095 }; | |
1096 | |
1097 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(i
map), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); | |
1098 | |
1099 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCH
AR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); | |
1100 } | |
1101 | |
1102 /* Test UTF-8 bad data handling*/ | |
1103 { | |
1104 static const uint8_t utf8[]={ | |
1105 0x61, | |
1106 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1107 0x00, | |
1108 0x62, | |
1109 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ | |
1111 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ | |
1112 0xdf, 0xbf, /* 7ff */ | |
1113 0xbf, /* truncated tail */ | |
1114 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ | |
1115 0x02 | |
1116 }; | |
1117 | |
1118 static const uint16_t utf8Expected[]={ | |
1119 0x0061, | |
1120 0xfffd, | |
1121 0x0000, | |
1122 0x0062, | |
1123 0xfffd, | |
1124 0xfffd, | |
1125 0xdbff, 0xdfff, | |
1126 0x07ff, | |
1127 0xfffd, | |
1128 0xfffd, | |
1129 0x0002 | |
1130 }; | |
1131 | |
1132 static const int32_t utf8Offsets[]={ | |
1133 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 | |
1134 }; | |
1135 testConvertToU(utf8, sizeof(utf8), | |
1136 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]
), "utf-8", utf8Offsets ,FALSE); | |
1137 | |
1138 } | |
1139 | |
1140 /* Test UTF-32BE bad data handling*/ | |
1141 { | |
1142 static const uint8_t utf32[]={ | |
1143 0x00, 0x00, 0x00, 0x61, | |
1144 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
1145 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
1146 0x00, 0x00, 0x00, 0x62, | |
1147 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
1148 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
1149 0x00, 0x00, 0x01, 0x62, | |
1150 0x00, 0x00, 0x02, 0x62 | |
1151 }; | |
1152 static const uint16_t utf32Expected[]={ | |
1153 0x0061, | |
1154 0xfffd, /* 0x110000 out of range */ | |
1155 0xDBFF, /* 0x10FFFF in range */ | |
1156 0xDFFF, | |
1157 0x0062, | |
1158 0xfffd, /* 0xffffffff out of range */ | |
1159 0xfffd, /* 0x7fffffff out of range */ | |
1160 0x0162, | |
1161 0x0262 | |
1162 }; | |
1163 static const int32_t utf32Offsets[]={ | |
1164 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
1165 }; | |
1166 static const uint8_t utf32ExpectedBack[]={ | |
1167 0x00, 0x00, 0x00, 0x61, | |
1168 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ | |
1169 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ | |
1170 0x00, 0x00, 0x00, 0x62, | |
1171 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ | |
1172 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ | |
1173 0x00, 0x00, 0x01, 0x62, | |
1174 0x00, 0x00, 0x02, 0x62 | |
1175 }; | |
1176 static const int32_t utf32OffsetsBack[]={ | |
1177 0,0,0,0, | |
1178 1,1,1,1, | |
1179 2,2,2,2, | |
1180 4,4,4,4, | |
1181 5,5,5,5, | |
1182 6,6,6,6, | |
1183 7,7,7,7, | |
1184 8,8,8,8 | |
1185 }; | |
1186 | |
1187 testConvertToU(utf32, sizeof(utf32), | |
1188 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected
[0]), "utf-32be", utf32Offsets ,FALSE); | |
1189 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), | |
1190 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32Offse
tsBack, FALSE); | |
1191 } | |
1192 | |
1193 /* Test UTF-32LE bad data handling*/ | |
1194 { | |
1195 static const uint8_t utf32[]={ | |
1196 0x61, 0x00, 0x00, 0x00, | |
1197 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
1198 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
1199 0x62, 0x00, 0x00, 0x00, | |
1200 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
1201 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
1202 0x62, 0x01, 0x00, 0x00, | |
1203 0x62, 0x02, 0x00, 0x00, | |
1204 }; | |
1205 | |
1206 static const uint16_t utf32Expected[]={ | |
1207 0x0061, | |
1208 0xfffd, /* 0x110000 out of range */ | |
1209 0xDBFF, /* 0x10FFFF in range */ | |
1210 0xDFFF, | |
1211 0x0062, | |
1212 0xfffd, /* 0xffffffff out of range */ | |
1213 0xfffd, /* 0x7fffffff out of range */ | |
1214 0x0162, | |
1215 0x0262 | |
1216 }; | |
1217 static const int32_t utf32Offsets[]={ | |
1218 0, 4, 8, 8, 12, 16, 20, 24, 28 | |
1219 }; | |
1220 static const uint8_t utf32ExpectedBack[]={ | |
1221 0x61, 0x00, 0x00, 0x00, | |
1222 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ | |
1223 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ | |
1224 0x62, 0x00, 0x00, 0x00, | |
1225 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ | |
1226 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ | |
1227 0x62, 0x01, 0x00, 0x00, | |
1228 0x62, 0x02, 0x00, 0x00 | |
1229 }; | |
1230 static const int32_t utf32OffsetsBack[]={ | |
1231 0,0,0,0, | |
1232 1,1,1,1, | |
1233 2,2,2,2, | |
1234 4,4,4,4, | |
1235 5,5,5,5, | |
1236 6,6,6,6, | |
1237 7,7,7,7, | |
1238 8,8,8,8 | |
1239 }; | |
1240 testConvertToU(utf32, sizeof(utf32), | |
1241 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-
32le", utf32Offsets,FALSE ); | |
1242 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expect
ed[0]), | |
1243 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32Offse
tsBack, FALSE); | |
1244 } | |
1245 } | |
1246 | |
1247 static void TestCoverageMBCS(){ | |
1248 #if 0 | |
1249 UErrorCode status = U_ZERO_ERROR; | |
1250 const char *directory = loadTestData(&status); | |
1251 char* tdpath = NULL; | |
1252 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory
())+1)); | |
1253 int len = strlen(directory); | |
1254 char* index=NULL; | |
1255 | |
1256 tdpath = (char*) malloc(sizeof(char) * (len * 2)); | |
1257 uprv_strcpy(saveDirectory,u_getDataDirectory()); | |
1258 log_verbose("Retrieved data directory %s \n",saveDirectory); | |
1259 uprv_strcpy(tdpath,directory); | |
1260 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); | |
1261 | |
1262 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ | |
1263 *(index+1)=0; | |
1264 } | |
1265 u_setDataDirectory(tdpath); | |
1266 log_verbose("ICU data directory is set to: %s \n" ,tdpath); | |
1267 #endif | |
1268 | |
1269 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test1.ucm | |
1270 which is test file for MBCS conversion with single-byte codepage data.*/ | |
1271 { | |
1272 | |
1273 /* MBCS with single byte codepage data test1.ucm*/ | |
1274 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34
, 0x0003}; | |
1275 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; | |
1276 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; | |
1277 | |
1278 /*from Unicode*/ | |
1279 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
1280 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); | |
1281 } | |
1282 | |
1283 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test3.ucm | |
1284 which is test file for MBCS conversion with three-byte codepage data.*/ | |
1285 { | |
1286 | |
1287 /* MBCS with three byte codepage data test3.ucm*/ | |
1288 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
1289 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0xff,}; | |
1290 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; | |
1291 | |
1292 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0
x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; | |
1293 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4
, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
1294 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; | |
1295 | |
1296 /*from Unicode*/ | |
1297 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
1298 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); | |
1299 | |
1300 /*to Unicode*/ | |
1301 testConvertToU(test3input, sizeof(test3input), | |
1302 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test3", fromtest3Offs ,FALSE); | |
1303 | |
1304 } | |
1305 | |
1306 /*some more test to increase the code coverage in MBCS. Create an test conv
erter from test4.ucm | |
1307 which is test file for MBCS conversion with four-byte codepage data.*/ | |
1308 { | |
1309 | |
1310 /* MBCS with three byte codepage data test4.ucm*/ | |
1311 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; | |
1312 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; | |
1313 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6
, 6, 8,}; | |
1314 | |
1315 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0
x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; | |
1316 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b,
0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; | |
1317 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; | |
1318 | |
1319 /*from Unicode*/ | |
1320 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[
0]), | |
1321 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); | |
1322 | |
1323 /*to Unicode*/ | |
1324 testConvertToU(test4input, sizeof(test4input), | |
1325 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]),
"@test4", fromtest4Offs,FALSE ); | |
1326 | |
1327 } | |
1328 #if 0 | |
1329 free(tdpath); | |
1330 /* restore the original data directory */ | |
1331 log_verbose("Setting the data directory to %s \n", saveDirectory); | |
1332 u_setDataDirectory(saveDirectory); | |
1333 free(saveDirectory); | |
1334 #endif | |
1335 | |
1336 } | |
1337 | |
1338 static void TestConverterType(const char *convName, UConverterType convType) { | |
1339 UConverter* myConverter; | |
1340 UErrorCode err = U_ZERO_ERROR; | |
1341 | |
1342 myConverter = my_ucnv_open(convName, &err); | |
1343 | |
1344 if (U_FAILURE(err)) { | |
1345 log_data_err("Failed to create an %s converter\n", convName); | |
1346 return; | |
1347 } | |
1348 else | |
1349 { | |
1350 if (ucnv_getType(myConverter)!=convType) { | |
1351 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", | |
1352 convName, convType); | |
1353 } | |
1354 else { | |
1355 log_verbose("ucnv_getType %s ok\n", convName); | |
1356 } | |
1357 } | |
1358 ucnv_close(myConverter); | |
1359 } | |
1360 | |
1361 static void TestConverterTypesAndStarters() | |
1362 { | |
1363 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1364 UConverter* myConverter; | |
1365 UErrorCode err = U_ZERO_ERROR; | |
1366 UBool mystarters[256]; | |
1367 | |
1368 /* const UBool expectedKSCstarters[256] = { | |
1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, | |
1383 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1386 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, | |
1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ | |
1395 | |
1396 | |
1397 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversio
n types."); | |
1398 | |
1399 myConverter = ucnv_open("ksc", &err); | |
1400 if (U_FAILURE(err)) { | |
1401 log_data_err("Failed to create an ibm-ksc converter\n"); | |
1402 return; | |
1403 } | |
1404 else | |
1405 { | |
1406 if (ucnv_getType(myConverter)!=UCNV_MBCS) | |
1407 log_err("ucnv_getType Failed for ibm-949\n"); | |
1408 else | |
1409 log_verbose("ucnv_getType ibm-949 ok\n"); | |
1410 | |
1411 if(myConverter!=NULL) | |
1412 ucnv_getStarters(myConverter, mystarters, &err); | |
1413 | |
1414 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters
))) | |
1415 log_err("Failed ucnv_getStarters for ksc\n"); | |
1416 else | |
1417 log_verbose("ucnv_getStarters ok\n");*/ | |
1418 | |
1419 } | |
1420 ucnv_close(myConverter); | |
1421 | |
1422 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); | |
1423 TestConverterType("ibm-878", UCNV_SBCS); | |
1424 #endif | |
1425 | |
1426 TestConverterType("iso-8859-1", UCNV_LATIN_1); | |
1427 | |
1428 TestConverterType("ibm-1208", UCNV_UTF8); | |
1429 | |
1430 TestConverterType("utf-8", UCNV_UTF8); | |
1431 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); | |
1432 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); | |
1433 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); | |
1434 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); | |
1435 | |
1436 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1437 | |
1438 #if defined(U_ENABLE_GENERIC_ISO_2022) | |
1439 TestConverterType("iso-2022", UCNV_ISO_2022); | |
1440 #endif | |
1441 | |
1442 TestConverterType("hz", UCNV_HZ); | |
1443 #endif | |
1444 | |
1445 TestConverterType("scsu", UCNV_SCSU); | |
1446 | |
1447 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1448 TestConverterType("x-iscii-de", UCNV_ISCII); | |
1449 #endif | |
1450 | |
1451 TestConverterType("ascii", UCNV_US_ASCII); | |
1452 TestConverterType("utf-7", UCNV_UTF7); | |
1453 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); | |
1454 TestConverterType("bocu-1", UCNV_BOCU1); | |
1455 } | |
1456 | |
1457 static void | |
1458 TestAmbiguousConverter(UConverter *cnv) { | |
1459 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; | |
1460 UChar outUnicode[20]={ 0, 0, 0, 0 }; | |
1461 | |
1462 const char *s; | |
1463 UChar *u; | |
1464 UErrorCode errorCode; | |
1465 UBool isAmbiguous; | |
1466 | |
1467 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ | |
1468 errorCode=U_ZERO_ERROR; | |
1469 s=inBytes; | |
1470 u=outUnicode; | |
1471 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); | |
1472 if(U_FAILURE(errorCode)) { | |
1473 /* we do not care about general failures in this test; the input may jus
t not be mappable */ | |
1474 return; | |
1475 } | |
1476 | |
1477 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { | |
1478 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: thi
s test is not applicable */ | |
1479 /* There are some encodings that are partially ASCII based, | |
1480 like the ISO-7 and GSM series of codepages, which we ignore. */ | |
1481 return; | |
1482 } | |
1483 | |
1484 isAmbiguous=ucnv_isAmbiguous(cnv); | |
1485 | |
1486 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous()
*/ | |
1487 if((outUnicode[2]!=0x5c)!=isAmbiguous) { | |
1488 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAm
biguous()==%d\n", | |
1489 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); | |
1490 return; | |
1491 } | |
1492 | |
1493 if(outUnicode[2]!=0x5c) { | |
1494 /* needs fixup, fix it */ | |
1495 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); | |
1496 if(outUnicode[2]!=0x5c) { | |
1497 /* the fix failed */ | |
1498 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cn
v, &errorCode)); | |
1499 return; | |
1500 } | |
1501 } | |
1502 } | |
1503 | |
1504 static void TestAmbiguous() | |
1505 { | |
1506 UErrorCode status = U_ZERO_ERROR; | |
1507 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; | |
1508 static const char target[] = { | |
1509 /* "\\usr\\local\\share\\data\\icutest.txt" */ | |
1510 0x5c, 0x75, 0x73, 0x72, | |
1511 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, | |
1512 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, | |
1513 0x5c, 0x64, 0x61, 0x74, 0x61, | |
1514 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, | |
1515 0 | |
1516 }; | |
1517 UChar asciiResult[200], sjisResult[200]; | |
1518 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; | |
1519 const char *name; | |
1520 | |
1521 /* enumerate all converters */ | |
1522 status=U_ZERO_ERROR; | |
1523 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { | |
1524 cnv=ucnv_open(name, &status); | |
1525 if(U_SUCCESS(status)) { | |
1526 TestAmbiguousConverter(cnv); | |
1527 ucnv_close(cnv); | |
1528 } else { | |
1529 log_err("error: unable to open available converter \"%s\"\n", name); | |
1530 status=U_ZERO_ERROR; | |
1531 } | |
1532 } | |
1533 | |
1534 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1535 sjis_cnv = ucnv_open("ibm-943", &status); | |
1536 if (U_FAILURE(status)) | |
1537 { | |
1538 log_data_err("Failed to create a SJIS converter\n"); | |
1539 return; | |
1540 } | |
1541 ascii_cnv = ucnv_open("LATIN-1", &status); | |
1542 if (U_FAILURE(status)) | |
1543 { | |
1544 log_data_err("Failed to create a LATIN-1 converter\n"); | |
1545 ucnv_close(sjis_cnv); | |
1546 return; | |
1547 } | |
1548 /* convert target from SJIS to Unicode */ | |
1549 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF
_UCHAR, target, (int32_t)strlen(target), &status); | |
1550 if (U_FAILURE(status)) | |
1551 { | |
1552 log_err("Failed to convert the SJIS string.\n"); | |
1553 ucnv_close(sjis_cnv); | |
1554 ucnv_close(ascii_cnv); | |
1555 return; | |
1556 } | |
1557 /* convert target from Latin-1 to Unicode */ | |
1558 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/
U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); | |
1559 if (U_FAILURE(status)) | |
1560 { | |
1561 log_err("Failed to convert the Latin-1 string.\n"); | |
1562 ucnv_close(sjis_cnv); | |
1563 ucnv_close(ascii_cnv); | |
1564 return; | |
1565 } | |
1566 if (!ucnv_isAmbiguous(sjis_cnv)) | |
1567 { | |
1568 log_err("SJIS converter should contain ambiguous character mappings.\n")
; | |
1569 ucnv_close(sjis_cnv); | |
1570 ucnv_close(ascii_cnv); | |
1571 return; | |
1572 } | |
1573 if (u_strcmp(sjisResult, asciiResult) == 0) | |
1574 { | |
1575 log_err("File separators for SJIS don't need to be fixed.\n"); | |
1576 } | |
1577 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); | |
1578 if (u_strcmp(sjisResult, asciiResult) != 0) | |
1579 { | |
1580 log_err("Fixing file separator for SJIS failed.\n"); | |
1581 } | |
1582 ucnv_close(sjis_cnv); | |
1583 ucnv_close(ascii_cnv); | |
1584 #endif | |
1585 } | |
1586 | |
1587 static void | |
1588 TestSignatureDetection(){ | |
1589 /* with null terminated strings */ | |
1590 { | |
1591 static const char* data[] = { | |
1592 "\xFE\xFF\x00\x00", /* UTF-16BE */ | |
1593 "\xFF\xFE\x00\x00", /* UTF-16LE */ | |
1594 "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
1595 "\x0E\xFE\xFF\x00", /* SCSU */ | |
1596 | |
1597 "\xFE\xFF", /* UTF-16BE */ | |
1598 "\xFF\xFE", /* UTF-16LE */ | |
1599 "\xEF\xBB\xBF", /* UTF-8 */ | |
1600 "\x0E\xFE\xFF", /* SCSU */ | |
1601 | |
1602 "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
1603 "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
1604 "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
1605 "\x0E\xFE\xFF\x41", /* SCSU */ | |
1606 | |
1607 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ | |
1608 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ | |
1609 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ | |
1610 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ | |
1611 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ | |
1612 | |
1613 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ | |
1614 }; | |
1615 static const char* expected[] = { | |
1616 "UTF-16BE", | |
1617 "UTF-16LE", | |
1618 "UTF-8", | |
1619 "SCSU", | |
1620 | |
1621 "UTF-16BE", | |
1622 "UTF-16LE", | |
1623 "UTF-8", | |
1624 "SCSU", | |
1625 | |
1626 "UTF-16BE", | |
1627 "UTF-16LE", | |
1628 "UTF-8", | |
1629 "SCSU", | |
1630 | |
1631 "UTF-7", | |
1632 "UTF-7", | |
1633 "UTF-7", | |
1634 "UTF-7", | |
1635 "UTF-7", | |
1636 "UTF-EBCDIC" | |
1637 }; | |
1638 static const int32_t expectedLength[] ={ | |
1639 2, | |
1640 2, | |
1641 3, | |
1642 3, | |
1643 | |
1644 2, | |
1645 2, | |
1646 3, | |
1647 3, | |
1648 | |
1649 2, | |
1650 2, | |
1651 3, | |
1652 3, | |
1653 | |
1654 5, | |
1655 4, | |
1656 4, | |
1657 4, | |
1658 4, | |
1659 4 | |
1660 }; | |
1661 int i=0; | |
1662 UErrorCode err; | |
1663 int32_t signatureLength = -1; | |
1664 const char* source = NULL; | |
1665 const char* enc = NULL; | |
1666 for( ; i<sizeof(data)/sizeof(char*); i++){ | |
1667 err = U_ZERO_ERROR; | |
1668 source = data[i]; | |
1669 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &er
r); | |
1670 if(U_FAILURE(err)){ | |
1671 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Error: %s\n", source,i,u_errorName(err)); | |
1672 continue; | |
1673 } | |
1674 if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
1675 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
1676 continue; | |
1677 } | |
1678 if(signatureLength != expectedLength[i]){ | |
1679 log_err("ucnv_detectUnicodeSignature failed for source : %s at i
ndex :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expecte
dLength[i]); | |
1680 } | |
1681 } | |
1682 } | |
1683 { | |
1684 static const char* data[] = { | |
1685 "\xFE\xFF\x00", /* UTF-16BE */ | |
1686 "\xFF\xFE\x00", /* UTF-16LE */ | |
1687 "\xEF\xBB\xBF\x00", /* UTF-8 */ | |
1688 "\x0E\xFE\xFF\x00", /* SCSU */ | |
1689 "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
1690 "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
1691 "\xFE\xFF", /* UTF-16BE */ | |
1692 "\xFF\xFE", /* UTF-16LE */ | |
1693 "\xEF\xBB\xBF", /* UTF-8 */ | |
1694 "\x0E\xFE\xFF", /* SCSU */ | |
1695 "\x00\x00\xFE\xFF", /* UTF-32BE */ | |
1696 "\xFF\xFE\x00\x00", /* UTF-32LE */ | |
1697 "\xFE\xFF\x41\x42", /* UTF-16BE */ | |
1698 "\xFF\xFE\x41\x41", /* UTF-16LE */ | |
1699 "\xEF\xBB\xBF\x41", /* UTF-8 */ | |
1700 "\x0E\xFE\xFF\x41", /* SCSU */ | |
1701 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ | |
1702 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ | |
1703 "\xFB\xEE\x28", /* BOCU-1 */ | |
1704 "\xFF\x41\x42" /* NULL */ | |
1705 }; | |
1706 static const int len[] = { | |
1707 3, | |
1708 3, | |
1709 4, | |
1710 4, | |
1711 4, | |
1712 4, | |
1713 2, | |
1714 2, | |
1715 3, | |
1716 3, | |
1717 4, | |
1718 4, | |
1719 4, | |
1720 4, | |
1721 4, | |
1722 4, | |
1723 5, | |
1724 5, | |
1725 3, | |
1726 3 | |
1727 }; | |
1728 | |
1729 static const char* expected[] = { | |
1730 "UTF-16BE", | |
1731 "UTF-16LE", | |
1732 "UTF-8", | |
1733 "SCSU", | |
1734 "UTF-32BE", | |
1735 "UTF-32LE", | |
1736 "UTF-16BE", | |
1737 "UTF-16LE", | |
1738 "UTF-8", | |
1739 "SCSU", | |
1740 "UTF-32BE", | |
1741 "UTF-32LE", | |
1742 "UTF-16BE", | |
1743 "UTF-16LE", | |
1744 "UTF-8", | |
1745 "SCSU", | |
1746 "UTF-32BE", | |
1747 "UTF-32LE", | |
1748 "BOCU-1", | |
1749 NULL | |
1750 }; | |
1751 static const int32_t expectedLength[] ={ | |
1752 2, | |
1753 2, | |
1754 3, | |
1755 3, | |
1756 4, | |
1757 4, | |
1758 2, | |
1759 2, | |
1760 3, | |
1761 3, | |
1762 4, | |
1763 4, | |
1764 2, | |
1765 2, | |
1766 3, | |
1767 3, | |
1768 4, | |
1769 4, | |
1770 3, | |
1771 0 | |
1772 }; | |
1773 int i=0; | |
1774 UErrorCode err; | |
1775 int32_t signatureLength = -1; | |
1776 int32_t sourceLength=-1; | |
1777 const char* source = NULL; | |
1778 const char* enc = NULL; | |
1779 for( ; i<sizeof(data)/sizeof(char*); i++){ | |
1780 err = U_ZERO_ERROR; | |
1781 source = data[i]; | |
1782 sourceLength = len[i]; | |
1783 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureL
ength, &err); | |
1784 if(U_FAILURE(err)){ | |
1785 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i. Error: %s\n", source,i,u_errorName(err)); | |
1786 continue; | |
1787 } | |
1788 if(enc == NULL || strcmp(enc,expected[i]) !=0){ | |
1789 if(expected[i] !=NULL){ | |
1790 log_err("ucnv_detectUnicodeSignature test2 failed for source :
%s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); | |
1791 continue; | |
1792 } | |
1793 } | |
1794 if(signatureLength != expectedLength[i]){ | |
1795 log_err("ucnv_detectUnicodeSignature test2 failed for source : %
s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,e
xpectedLength[i]); | |
1796 } | |
1797 } | |
1798 } | |
1799 } | |
1800 | |
1801 static void TestUTF7() { | |
1802 /* test input */ | |
1803 static const uint8_t in[]={ | |
1804 /* H - +Jjo- - ! +- +2AHcAQ */ | |
1805 0x48, | |
1806 0x2d, | |
1807 0x2b, 0x4a, 0x6a, 0x6f, | |
1808 0x2d, 0x2d, | |
1809 0x21, | |
1810 0x2b, 0x2d, | |
1811 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 | |
1812 }; | |
1813 | |
1814 /* expected test results */ | |
1815 static const int32_t results[]={ | |
1816 /* number of bytes read, code point */ | |
1817 1, 0x48, | |
1818 1, 0x2d, | |
1819 4, 0x263a, /* <WHITE SMILING FACE> */ | |
1820 2, 0x2d, | |
1821 1, 0x21, | |
1822 2, 0x2b, | |
1823 7, 0x10401 | |
1824 }; | |
1825 | |
1826 const char *cnvName; | |
1827 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
1828 UErrorCode errorCode=U_ZERO_ERROR; | |
1829 UConverter *cnv=ucnv_open("UTF-7", &errorCode); | |
1830 if(U_FAILURE(errorCode)) { | |
1831 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(error
Code)); | |
1832 return; | |
1833 } | |
1834 TestNextUChar(cnv, source, limit, results, "UTF-7"); | |
1835 /* Test the condition when source >= sourceLimit */ | |
1836 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
1837 cnvName = ucnv_getName(cnv, &errorCode); | |
1838 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { | |
1839 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(error
Code)); | |
1840 } | |
1841 ucnv_close(cnv); | |
1842 } | |
1843 | |
1844 static void TestIMAP() { | |
1845 /* test input */ | |
1846 static const uint8_t in[]={ | |
1847 /* H - &Jjo- - ! &- &2AHcAQ- \ */ | |
1848 0x48, | |
1849 0x2d, | |
1850 0x26, 0x4a, 0x6a, 0x6f, | |
1851 0x2d, 0x2d, | |
1852 0x21, | |
1853 0x26, 0x2d, | |
1854 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d | |
1855 }; | |
1856 | |
1857 /* expected test results */ | |
1858 static const int32_t results[]={ | |
1859 /* number of bytes read, code point */ | |
1860 1, 0x48, | |
1861 1, 0x2d, | |
1862 4, 0x263a, /* <WHITE SMILING FACE> */ | |
1863 2, 0x2d, | |
1864 1, 0x21, | |
1865 2, 0x26, | |
1866 7, 0x10401 | |
1867 }; | |
1868 | |
1869 const char *cnvName; | |
1870 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
1871 UErrorCode errorCode=U_ZERO_ERROR; | |
1872 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); | |
1873 if(U_FAILURE(errorCode)) { | |
1874 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_err
orName(errorCode)); | |
1875 return; | |
1876 } | |
1877 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); | |
1878 /* Test the condition when source >= sourceLimit */ | |
1879 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
1880 cnvName = ucnv_getName(cnv, &errorCode); | |
1881 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0)
{ | |
1882 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_err
orName(errorCode)); | |
1883 } | |
1884 ucnv_close(cnv); | |
1885 } | |
1886 | |
1887 static void TestUTF8() { | |
1888 /* test input */ | |
1889 static const uint8_t in[]={ | |
1890 0x61, | |
1891 0xc2, 0x80, | |
1892 0xe0, 0xa0, 0x80, | |
1893 0xf0, 0x90, 0x80, 0x80, | |
1894 0xf4, 0x84, 0x8c, 0xa1, | |
1895 0xf0, 0x90, 0x90, 0x81 | |
1896 }; | |
1897 | |
1898 /* expected test results */ | |
1899 static const int32_t results[]={ | |
1900 /* number of bytes read, code point */ | |
1901 1, 0x61, | |
1902 2, 0x80, | |
1903 3, 0x800, | |
1904 4, 0x10000, | |
1905 4, 0x104321, | |
1906 4, 0x10401 | |
1907 }; | |
1908 | |
1909 /* error test input */ | |
1910 static const uint8_t in2[]={ | |
1911 0x61, | |
1912 0xc0, 0x80, /* illegal non-shortest form */ | |
1913 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
1914 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
1915 0xc0, 0xc0, /* illegal trail byte */ | |
1916 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
1917 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
1918 0xfe, /* illegal byte altogether */ | |
1919 0x62 | |
1920 }; | |
1921 | |
1922 /* expected error test results */ | |
1923 static const int32_t results2[]={ | |
1924 /* number of bytes read, code point */ | |
1925 1, 0x61, | |
1926 22, 0x62 | |
1927 }; | |
1928 | |
1929 UConverterToUCallback cb; | |
1930 const void *p; | |
1931 | |
1932 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
1933 UErrorCode errorCode=U_ZERO_ERROR; | |
1934 UConverter *cnv=ucnv_open("UTF-8", &errorCode); | |
1935 if(U_FAILURE(errorCode)) { | |
1936 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)
); | |
1937 return; | |
1938 } | |
1939 TestNextUChar(cnv, source, limit, results, "UTF-8"); | |
1940 /* Test the condition when source >= sourceLimit */ | |
1941 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
1942 | |
1943 /* test error behavior with a skip callback */ | |
1944 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
1945 source=(const char *)in2; | |
1946 limit=(const char *)(in2+sizeof(in2)); | |
1947 TestNextUChar(cnv, source, limit, results2, "UTF-8"); | |
1948 | |
1949 ucnv_close(cnv); | |
1950 } | |
1951 | |
1952 static void TestCESU8() { | |
1953 /* test input */ | |
1954 static const uint8_t in[]={ | |
1955 0x61, | |
1956 0xc2, 0x80, | |
1957 0xe0, 0xa0, 0x80, | |
1958 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, | |
1959 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, | |
1960 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, | |
1961 0xef, 0xbf, 0xbc | |
1962 }; | |
1963 | |
1964 /* expected test results */ | |
1965 static const int32_t results[]={ | |
1966 /* number of bytes read, code point */ | |
1967 1, 0x61, | |
1968 2, 0x80, | |
1969 3, 0x800, | |
1970 6, 0x10000, | |
1971 3, 0xdc01, | |
1972 -1,0xd802, /* may read 3 or 6 bytes */ | |
1973 -1,0x10ffff,/* may read 0 or 3 bytes */ | |
1974 3, 0xfffc | |
1975 }; | |
1976 | |
1977 /* error test input */ | |
1978 static const uint8_t in2[]={ | |
1979 0x61, | |
1980 0xc0, 0x80, /* illegal non-shortest form */ | |
1981 0xe0, 0x80, 0x80, /* illegal non-shortest form */ | |
1982 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ | |
1983 0xc0, 0xc0, /* illegal trail byte */ | |
1984 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code poi
nt */ | |
1985 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code poi
nt */ | |
1986 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code poi
nt */ | |
1987 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ | |
1988 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ | |
1989 0xfe, /* illegal byte altogether */ | |
1990 0x62 | |
1991 }; | |
1992 | |
1993 /* expected error test results */ | |
1994 static const int32_t results2[]={ | |
1995 /* number of bytes read, code point */ | |
1996 1, 0x61, | |
1997 34, 0x62 | |
1998 }; | |
1999 | |
2000 UConverterToUCallback cb; | |
2001 const void *p; | |
2002 | |
2003 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); | |
2004 UErrorCode errorCode=U_ZERO_ERROR; | |
2005 UConverter *cnv=ucnv_open("CESU-8", &errorCode); | |
2006 if(U_FAILURE(errorCode)) { | |
2007 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(erro
rCode)); | |
2008 return; | |
2009 } | |
2010 TestNextUChar(cnv, source, limit, results, "CESU-8"); | |
2011 /* Test the condition when source >= sourceLimit */ | |
2012 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2013 | |
2014 /* test error behavior with a skip callback */ | |
2015 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
2016 source=(const char *)in2; | |
2017 limit=(const char *)(in2+sizeof(in2)); | |
2018 TestNextUChar(cnv, source, limit, results2, "CESU-8"); | |
2019 | |
2020 ucnv_close(cnv); | |
2021 } | |
2022 | |
2023 static void TestUTF16() { | |
2024 /* test input */ | |
2025 static const uint8_t in1[]={ | |
2026 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff | |
2027 }; | |
2028 static const uint8_t in2[]={ | |
2029 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff | |
2030 }; | |
2031 static const uint8_t in3[]={ | |
2032 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 | |
2033 }; | |
2034 | |
2035 /* expected test results */ | |
2036 static const int32_t results1[]={ | |
2037 /* number of bytes read, code point */ | |
2038 4, 0x4e00, | |
2039 2, 0xfeff | |
2040 }; | |
2041 static const int32_t results2[]={ | |
2042 /* number of bytes read, code point */ | |
2043 4, 0x004e, | |
2044 2, 0xfffe | |
2045 }; | |
2046 static const int32_t results3[]={ | |
2047 /* number of bytes read, code point */ | |
2048 2, 0xfefe, | |
2049 2, 0x4e00, | |
2050 2, 0xfeff, | |
2051 4, 0x20001 | |
2052 }; | |
2053 | |
2054 const char *source, *limit; | |
2055 | |
2056 UErrorCode errorCode=U_ZERO_ERROR; | |
2057 UConverter *cnv=ucnv_open("UTF-16", &errorCode); | |
2058 if(U_FAILURE(errorCode)) { | |
2059 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode
)); | |
2060 return; | |
2061 } | |
2062 | |
2063 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
2064 TestNextUChar(cnv, source, limit, results1, "UTF-16"); | |
2065 | |
2066 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
2067 ucnv_resetToUnicode(cnv); | |
2068 TestNextUChar(cnv, source, limit, results2, "UTF-16"); | |
2069 | |
2070 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
2071 ucnv_resetToUnicode(cnv); | |
2072 TestNextUChar(cnv, source, limit, results3, "UTF-16"); | |
2073 | |
2074 /* Test the condition when source >= sourceLimit */ | |
2075 ucnv_resetToUnicode(cnv); | |
2076 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2077 | |
2078 ucnv_close(cnv); | |
2079 } | |
2080 | |
2081 static void TestUTF16BE() { | |
2082 /* test input */ | |
2083 static const uint8_t in[]={ | |
2084 0x00, 0x61, | |
2085 0x00, 0xc0, | |
2086 0x00, 0x31, | |
2087 0x00, 0xf4, | |
2088 0xce, 0xfe, | |
2089 0xd8, 0x01, 0xdc, 0x01 | |
2090 }; | |
2091 | |
2092 /* expected test results */ | |
2093 static const int32_t results[]={ | |
2094 /* number of bytes read, code point */ | |
2095 2, 0x61, | |
2096 2, 0xc0, | |
2097 2, 0x31, | |
2098 2, 0xf4, | |
2099 2, 0xcefe, | |
2100 4, 0x10401 | |
2101 }; | |
2102 | |
2103 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2104 UErrorCode errorCode=U_ZERO_ERROR; | |
2105 UConverter *cnv=ucnv_open("utf-16be", &errorCode); | |
2106 if(U_FAILURE(errorCode)) { | |
2107 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCo
de)); | |
2108 return; | |
2109 } | |
2110 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); | |
2111 /* Test the condition when source >= sourceLimit */ | |
2112 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2113 /*Test for the condition where there is an invalid character*/ | |
2114 { | |
2115 static const uint8_t source2[]={0x61}; | |
2116 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
2117 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
2118 } | |
2119 #if 0 | |
2120 /* | |
2121 * Test disabled because currently the UTF-16BE/LE converters are supposed | |
2122 * to not set errors for unpaired surrogates. | |
2123 * This may change with | |
2124 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
2125 */ | |
2126 | |
2127 /*Test for the condition where there is a surrogate pair*/ | |
2128 { | |
2129 const uint8_t source2[]={0xd8, 0x01}; | |
2130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
2131 } | |
2132 #endif | |
2133 ucnv_close(cnv); | |
2134 } | |
2135 | |
2136 static void | |
2137 TestUTF16LE() { | |
2138 /* test input */ | |
2139 static const uint8_t in[]={ | |
2140 0x61, 0x00, | |
2141 0x31, 0x00, | |
2142 0x4e, 0x2e, | |
2143 0x4e, 0x00, | |
2144 0x01, 0xd8, 0x01, 0xdc | |
2145 }; | |
2146 | |
2147 /* expected test results */ | |
2148 static const int32_t results[]={ | |
2149 /* number of bytes read, code point */ | |
2150 2, 0x61, | |
2151 2, 0x31, | |
2152 2, 0x2e4e, | |
2153 2, 0x4e, | |
2154 4, 0x10401 | |
2155 }; | |
2156 | |
2157 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2158 UErrorCode errorCode=U_ZERO_ERROR; | |
2159 UConverter *cnv=ucnv_open("utf-16le", &errorCode); | |
2160 if(U_FAILURE(errorCode)) { | |
2161 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCo
de)); | |
2162 return; | |
2163 } | |
2164 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); | |
2165 /* Test the condition when source >= sourceLimit */ | |
2166 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2167 /*Test for the condition where there is an invalid character*/ | |
2168 { | |
2169 static const uint8_t source2[]={0x61}; | |
2170 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
2171 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); | |
2172 } | |
2173 #if 0 | |
2174 /* | |
2175 * Test disabled because currently the UTF-16BE/LE converters are supposed | |
2176 * to not set errors for unpaired surrogates. | |
2177 * This may change with | |
2178 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 | |
2179 */ | |
2180 | |
2181 /*Test for the condition where there is a surrogate character*/ | |
2182 { | |
2183 static const uint8_t source2[]={0x01, 0xd8}; | |
2184 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); | |
2185 } | |
2186 #endif | |
2187 | |
2188 ucnv_close(cnv); | |
2189 } | |
2190 | |
2191 static void TestUTF32() { | |
2192 /* test input */ | |
2193 static const uint8_t in1[]={ | |
2194 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0x
ff | |
2195 }; | |
2196 static const uint8_t in2[]={ | |
2197 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x
00 | |
2198 }; | |
2199 static const uint8_t in3[]={ | |
2200 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x
40, 0x00, 0x00, 0xdc, 0x01 | |
2201 }; | |
2202 | |
2203 /* expected test results */ | |
2204 static const int32_t results1[]={ | |
2205 /* number of bytes read, code point */ | |
2206 8, 0x100f00, | |
2207 4, 0xfeff | |
2208 }; | |
2209 static const int32_t results2[]={ | |
2210 /* number of bytes read, code point */ | |
2211 8, 0x0f1000, | |
2212 4, 0xfffe | |
2213 }; | |
2214 static const int32_t results3[]={ | |
2215 /* number of bytes read, code point */ | |
2216 4, 0xfefe, | |
2217 4, 0x100f00, | |
2218 4, 0xfffd, /* unmatched surrogate */ | |
2219 4, 0xfffd /* unmatched surrogate */ | |
2220 }; | |
2221 | |
2222 const char *source, *limit; | |
2223 | |
2224 UErrorCode errorCode=U_ZERO_ERROR; | |
2225 UConverter *cnv=ucnv_open("UTF-32", &errorCode); | |
2226 if(U_FAILURE(errorCode)) { | |
2227 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(erro
rCode)); | |
2228 return; | |
2229 } | |
2230 | |
2231 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); | |
2232 TestNextUChar(cnv, source, limit, results1, "UTF-32"); | |
2233 | |
2234 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); | |
2235 ucnv_resetToUnicode(cnv); | |
2236 TestNextUChar(cnv, source, limit, results2, "UTF-32"); | |
2237 | |
2238 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); | |
2239 ucnv_resetToUnicode(cnv); | |
2240 TestNextUChar(cnv, source, limit, results3, "UTF-32"); | |
2241 | |
2242 /* Test the condition when source >= sourceLimit */ | |
2243 ucnv_resetToUnicode(cnv); | |
2244 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2245 | |
2246 ucnv_close(cnv); | |
2247 } | |
2248 | |
2249 static void | |
2250 TestUTF32BE() { | |
2251 /* test input */ | |
2252 static const uint8_t in[]={ | |
2253 0x00, 0x00, 0x00, 0x61, | |
2254 0x00, 0x00, 0x30, 0x61, | |
2255 0x00, 0x00, 0xdc, 0x00, | |
2256 0x00, 0x00, 0xd8, 0x00, | |
2257 0x00, 0x00, 0xdf, 0xff, | |
2258 0x00, 0x00, 0xff, 0xfe, | |
2259 0x00, 0x10, 0xab, 0xcd, | |
2260 0x00, 0x10, 0xff, 0xff | |
2261 }; | |
2262 | |
2263 /* expected test results */ | |
2264 static const int32_t results[]={ | |
2265 /* number of bytes read, code point */ | |
2266 4, 0x61, | |
2267 4, 0x3061, | |
2268 4, 0xfffd, | |
2269 4, 0xfffd, | |
2270 4, 0xfffd, | |
2271 4, 0xfffe, | |
2272 4, 0x10abcd, | |
2273 4, 0x10ffff | |
2274 }; | |
2275 | |
2276 /* error test input */ | |
2277 static const uint8_t in2[]={ | |
2278 0x00, 0x00, 0x00, 0x61, | |
2279 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ | |
2280 0x00, 0x00, 0x00, 0x62, | |
2281 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
2282 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ | |
2283 0x00, 0x00, 0x01, 0x62, | |
2284 0x00, 0x00, 0x02, 0x62 | |
2285 }; | |
2286 | |
2287 /* expected error test results */ | |
2288 static const int32_t results2[]={ | |
2289 /* number of bytes read, code point */ | |
2290 4, 0x61, | |
2291 8, 0x62, | |
2292 12, 0x162, | |
2293 4, 0x262 | |
2294 }; | |
2295 | |
2296 UConverterToUCallback cb; | |
2297 const void *p; | |
2298 | |
2299 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2300 UErrorCode errorCode=U_ZERO_ERROR; | |
2301 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); | |
2302 if(U_FAILURE(errorCode)) { | |
2303 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(er
rorCode)); | |
2304 return; | |
2305 } | |
2306 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); | |
2307 | |
2308 /* Test the condition when source >= sourceLimit */ | |
2309 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2310 | |
2311 /* test error behavior with a skip callback */ | |
2312 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
2313 source=(const char *)in2; | |
2314 limit=(const char *)(in2+sizeof(in2)); | |
2315 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); | |
2316 | |
2317 ucnv_close(cnv); | |
2318 } | |
2319 | |
2320 static void | |
2321 TestUTF32LE() { | |
2322 /* test input */ | |
2323 static const uint8_t in[]={ | |
2324 0x61, 0x00, 0x00, 0x00, | |
2325 0x61, 0x30, 0x00, 0x00, | |
2326 0x00, 0xdc, 0x00, 0x00, | |
2327 0x00, 0xd8, 0x00, 0x00, | |
2328 0xff, 0xdf, 0x00, 0x00, | |
2329 0xfe, 0xff, 0x00, 0x00, | |
2330 0xcd, 0xab, 0x10, 0x00, | |
2331 0xff, 0xff, 0x10, 0x00 | |
2332 }; | |
2333 | |
2334 /* expected test results */ | |
2335 static const int32_t results[]={ | |
2336 /* number of bytes read, code point */ | |
2337 4, 0x61, | |
2338 4, 0x3061, | |
2339 4, 0xfffd, | |
2340 4, 0xfffd, | |
2341 4, 0xfffd, | |
2342 4, 0xfffe, | |
2343 4, 0x10abcd, | |
2344 4, 0x10ffff | |
2345 }; | |
2346 | |
2347 /* error test input */ | |
2348 static const uint8_t in2[]={ | |
2349 0x61, 0x00, 0x00, 0x00, | |
2350 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ | |
2351 0x62, 0x00, 0x00, 0x00, | |
2352 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ | |
2353 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ | |
2354 0x62, 0x01, 0x00, 0x00, | |
2355 0x62, 0x02, 0x00, 0x00, | |
2356 }; | |
2357 | |
2358 /* expected error test results */ | |
2359 static const int32_t results2[]={ | |
2360 /* number of bytes read, code point */ | |
2361 4, 0x61, | |
2362 8, 0x62, | |
2363 12, 0x162, | |
2364 4, 0x262, | |
2365 }; | |
2366 | |
2367 UConverterToUCallback cb; | |
2368 const void *p; | |
2369 | |
2370 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2371 UErrorCode errorCode=U_ZERO_ERROR; | |
2372 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); | |
2373 if(U_FAILURE(errorCode)) { | |
2374 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(er
rorCode)); | |
2375 return; | |
2376 } | |
2377 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); | |
2378 | |
2379 /* Test the condition when source >= sourceLimit */ | |
2380 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2381 | |
2382 /* test error behavior with a skip callback */ | |
2383 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode)
; | |
2384 source=(const char *)in2; | |
2385 limit=(const char *)(in2+sizeof(in2)); | |
2386 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); | |
2387 | |
2388 ucnv_close(cnv); | |
2389 } | |
2390 | |
2391 static void | |
2392 TestLATIN1() { | |
2393 /* test input */ | |
2394 static const uint8_t in[]={ | |
2395 0x61, | |
2396 0x31, | |
2397 0x32, | |
2398 0xc0, | |
2399 0xf0, | |
2400 0xf4, | |
2401 }; | |
2402 | |
2403 /* expected test results */ | |
2404 static const int32_t results[]={ | |
2405 /* number of bytes read, code point */ | |
2406 1, 0x61, | |
2407 1, 0x31, | |
2408 1, 0x32, | |
2409 1, 0xc0, | |
2410 1, 0xf0, | |
2411 1, 0xf4, | |
2412 }; | |
2413 static const uint16_t in1[] = { | |
2414 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
2415 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
2416 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
2417 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
2418 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
2419 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
2420 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
2421 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
2422 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
2423 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
2424 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
2425 0xcb, 0x82 | |
2426 }; | |
2427 static const uint8_t out1[] = { | |
2428 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
2429 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
2430 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
2431 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
2432 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
2433 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
2434 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
2435 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
2436 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
2437 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
2438 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
2439 0xcb, 0x82 | |
2440 }; | |
2441 static const uint16_t in2[]={ | |
2442 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
2443 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
2444 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
2445 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
2446 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
2447 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
2448 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
2449 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
2450 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
2451 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
2452 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
2453 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
2454 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
2455 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
2456 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2457 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2458 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
2459 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
2460 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2461 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2462 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
2463 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
2464 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
2465 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
2466 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
2467 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
2468 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
2469 0x37, 0x20, 0x2A, 0x2F, | |
2470 }; | |
2471 static const unsigned char out2[]={ | |
2472 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
2473 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
2474 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
2475 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
2476 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
2477 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
2478 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, | |
2479 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
2480 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
2481 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
2482 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
2483 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
2484 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
2485 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
2486 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2487 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2488 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
2489 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
2490 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
2491 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
2492 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
2493 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
2494 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
2495 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
2496 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
2497 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
2498 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
2499 0x37, 0x20, 0x2A, 0x2F, | |
2500 }; | |
2501 const char *source=(const char *)in; | |
2502 const char *limit=(const char *)in+sizeof(in); | |
2503 | |
2504 UErrorCode errorCode=U_ZERO_ERROR; | |
2505 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); | |
2506 if(U_FAILURE(errorCode)) { | |
2507 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(err
orCode)); | |
2508 return; | |
2509 } | |
2510 TestNextUChar(cnv, source, limit, results, "LATIN_1"); | |
2511 /* Test the condition when source >= sourceLimit */ | |
2512 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2513 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof
(out1)); | |
2514 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out
2)); | |
2515 | |
2516 ucnv_close(cnv); | |
2517 } | |
2518 | |
2519 static void | |
2520 TestSBCS() { | |
2521 /* test input */ | |
2522 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; | |
2523 /* expected test results */ | |
2524 static const int32_t results[]={ | |
2525 /* number of bytes read, code point */ | |
2526 1, 0x61, | |
2527 1, 0xbf, | |
2528 1, 0xc4, | |
2529 1, 0x2021, | |
2530 1, 0xf8ff, | |
2531 1, 0x00d9 | |
2532 }; | |
2533 | |
2534 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2535 UErrorCode errorCode=U_ZERO_ERROR; | |
2536 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); | |
2537 if(U_FAILURE(errorCode)) { | |
2538 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_e
rrorName(errorCode)); | |
2539 return; | |
2540 } | |
2541 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); | |
2542 /* Test the condition when source >= sourceLimit */ | |
2543 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2544 /*Test for Illegal character */ /* | |
2545 { | |
2546 static const uint8_t input1[]={ 0xA1 }; | |
2547 const char* illegalsource=(const char*)input1; | |
2548 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource),
U_INVALID_CHAR_FOUND, "source has a illegal characte"); | |
2549 } | |
2550 */ | |
2551 ucnv_close(cnv); | |
2552 } | |
2553 | |
2554 static void | |
2555 TestDBCS() { | |
2556 /* test input */ | |
2557 static const uint8_t in[]={ | |
2558 0x44, 0x6a, | |
2559 0xc4, 0x9c, | |
2560 0x7a, 0x74, | |
2561 0x46, 0xab, | |
2562 0x42, 0x5b, | |
2563 | |
2564 }; | |
2565 | |
2566 /* expected test results */ | |
2567 static const int32_t results[]={ | |
2568 /* number of bytes read, code point */ | |
2569 2, 0x00a7, | |
2570 2, 0xe1d2, | |
2571 2, 0x6962, | |
2572 2, 0xf842, | |
2573 2, 0xffe5, | |
2574 }; | |
2575 | |
2576 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2577 UErrorCode errorCode=U_ZERO_ERROR; | |
2578 | |
2579 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); | |
2580 if(U_FAILURE(errorCode)) { | |
2581 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorN
ame(errorCode)); | |
2582 return; | |
2583 } | |
2584 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); | |
2585 /* Test the condition when source >= sourceLimit */ | |
2586 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2587 /*Test for the condition where there is an invalid character*/ | |
2588 { | |
2589 static const uint8_t source2[]={0x1a, 0x1b}; | |
2590 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); | |
2591 } | |
2592 /*Test for the condition where we have a truncated char*/ | |
2593 { | |
2594 static const uint8_t source1[]={0xc4}; | |
2595 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
2596 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2597 } | |
2598 ucnv_close(cnv); | |
2599 } | |
2600 | |
2601 static void | |
2602 TestMBCS() { | |
2603 /* test input */ | |
2604 static const uint8_t in[]={ | |
2605 0x01, | |
2606 0xa6, 0xa3, | |
2607 0x00, | |
2608 0xa6, 0xa1, | |
2609 0x08, | |
2610 0xc2, 0x76, | |
2611 0xc2, 0x78, | |
2612 | |
2613 }; | |
2614 | |
2615 /* expected test results */ | |
2616 static const int32_t results[]={ | |
2617 /* number of bytes read, code point */ | |
2618 1, 0x0001, | |
2619 2, 0x250c, | |
2620 1, 0x0000, | |
2621 2, 0x2500, | |
2622 1, 0x0008, | |
2623 2, 0xd60c, | |
2624 2, 0xd60e, | |
2625 }; | |
2626 | |
2627 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2628 UErrorCode errorCode=U_ZERO_ERROR; | |
2629 | |
2630 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); | |
2631 if(U_FAILURE(errorCode)) { | |
2632 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorN
ame(errorCode)); | |
2633 return; | |
2634 } | |
2635 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); | |
2636 /* Test the condition when source >= sourceLimit */ | |
2637 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2638 /*Test for the condition where there is an invalid character*/ | |
2639 { | |
2640 static const uint8_t source2[]={0xa1, 0x80}; | |
2641 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character"); | |
2642 } | |
2643 /*Test for the condition where we have a truncated char*/ | |
2644 { | |
2645 static const uint8_t source1[]={0xc4}; | |
2646 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
2647 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2648 } | |
2649 ucnv_close(cnv); | |
2650 | |
2651 } | |
2652 | |
2653 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO | |
2654 static void | |
2655 TestICCRunout() { | |
2656 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1},
:int{0}, "\", "?", :bin{""} } */ | |
2657 | |
2658 const char *cnvName = "ibm-1363"; | |
2659 UErrorCode status = U_ZERO_ERROR; | |
2660 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; | |
2661 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ | |
2662 const char *source = sourceData; | |
2663 const char *sourceLim = sourceData+sizeof(sourceData); | |
2664 UChar c1, c2, c3; | |
2665 UConverter *cnv=ucnv_open(cnvName, &status); | |
2666 if(U_FAILURE(status)) { | |
2667 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(s
tatus)); | |
2668 return; | |
2669 } | |
2670 | |
2671 #if 0 | |
2672 { | |
2673 UChar targetBuf[256]; | |
2674 UChar *target = targetBuf; | |
2675 UChar *targetLim = target+256; | |
2676 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &sta
tus); | |
2677 | |
2678 log_info("After convert: target@%d, source@%d, status%s\n", | |
2679 target-targetBuf, source-sourceData, u_errorName(status)); | |
2680 | |
2681 if(U_FAILURE(status)) { | |
2682 log_err("Failed to convert: %s\n", u_errorName(status)); | |
2683 } else { | |
2684 | |
2685 } | |
2686 } | |
2687 #endif | |
2688 | |
2689 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2690 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_e
rrorName(status)); | |
2691 | |
2692 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2693 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_e
rrorName(status)); | |
2694 | |
2695 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); | |
2696 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_e
rrorName(status)); | |
2697 | |
2698 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { | |
2699 log_verbose("OK\n"); | |
2700 } else { | |
2701 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n
"); | |
2702 } | |
2703 | |
2704 ucnv_close(cnv); | |
2705 | |
2706 } | |
2707 #endif | |
2708 | |
2709 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
2710 | |
2711 static void | |
2712 TestISO_2022() { | |
2713 /* test input */ | |
2714 static const uint8_t in[]={ | |
2715 0x1b, 0x25, 0x42, | |
2716 0x31, | |
2717 0x32, | |
2718 0x61, | |
2719 0xc2, 0x80, | |
2720 0xe0, 0xa0, 0x80, | |
2721 0xf0, 0x90, 0x80, 0x80 | |
2722 }; | |
2723 | |
2724 | |
2725 | |
2726 /* expected test results */ | |
2727 static const int32_t results[]={ | |
2728 /* number of bytes read, code point */ | |
2729 4, 0x0031, /* 4 bytes including the escape sequence */ | |
2730 1, 0x0032, | |
2731 1, 0x61, | |
2732 2, 0x80, | |
2733 3, 0x800, | |
2734 4, 0x10000 | |
2735 }; | |
2736 | |
2737 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
2738 UErrorCode errorCode=U_ZERO_ERROR; | |
2739 UConverter *cnv; | |
2740 | |
2741 cnv=ucnv_open("ISO_2022", &errorCode); | |
2742 if(U_FAILURE(errorCode)) { | |
2743 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
2744 return; | |
2745 } | |
2746 TestNextUChar(cnv, source, limit, results, "ISO_2022"); | |
2747 | |
2748 /* Test the condition when source >= sourceLimit */ | |
2749 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceL
imit < source"); | |
2750 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
2751 /*Test for the condition where we have a truncated char*/ | |
2752 { | |
2753 static const uint8_t source1[]={0xc4}; | |
2754 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
2755 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); | |
2756 } | |
2757 /*Test for the condition where there is an invalid character*/ | |
2758 { | |
2759 static const uint8_t source2[]={0xa1, 0x01}; | |
2760 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); | |
2761 } | |
2762 ucnv_close(cnv); | |
2763 } | |
2764 | |
2765 #endif | |
2766 | |
2767 static void | |
2768 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverte
r* cnv){ | |
2769 const UChar* uSource; | |
2770 const UChar* uSourceLimit; | |
2771 const char* cSource; | |
2772 const char* cSourceLimit; | |
2773 UChar *uTargetLimit =NULL; | |
2774 UChar *uTarget; | |
2775 char *cTarget; | |
2776 const char *cTargetLimit; | |
2777 char *cBuf; | |
2778 UChar *uBuf; /*,*test;*/ | |
2779 int32_t uBufSize = 120; | |
2780 int len=0; | |
2781 int i=2; | |
2782 UErrorCode errorCode=U_ZERO_ERROR; | |
2783 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2784 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
2785 ucnv_reset(cnv); | |
2786 for(;--i>0; ){ | |
2787 uSource = (UChar*) source; | |
2788 uSourceLimit=(const UChar*)sourceLimit; | |
2789 cTarget = cBuf; | |
2790 uTarget = uBuf; | |
2791 cSource = cBuf; | |
2792 cTargetLimit = cBuf; | |
2793 uTargetLimit = uBuf; | |
2794 | |
2795 do{ | |
2796 | |
2797 cTargetLimit = cTargetLimit+ i; | |
2798 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); | |
2799 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2800 errorCode=U_ZERO_ERROR; | |
2801 continue; | |
2802 } | |
2803 | |
2804 if(U_FAILURE(errorCode)){ | |
2805 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
2806 return; | |
2807 } | |
2808 | |
2809 }while (uSource<uSourceLimit); | |
2810 | |
2811 cSourceLimit =cTarget; | |
2812 do{ | |
2813 uTargetLimit=uTargetLimit+i; | |
2814 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); | |
2815 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2816 errorCode=U_ZERO_ERROR; | |
2817 continue; | |
2818 } | |
2819 if(U_FAILURE(errorCode)){ | |
2820 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); | |
2821 return; | |
2822 } | |
2823 }while(cSource<cSourceLimit); | |
2824 | |
2825 uSource = source; | |
2826 /*test =uBuf;*/ | |
2827 for(len=0;len<(int)(source - sourceLimit);len++){ | |
2828 if(uBuf[len]!=uSource[len]){ | |
2829 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; | |
2830 } | |
2831 } | |
2832 } | |
2833 free(uBuf); | |
2834 free(cBuf); | |
2835 } | |
2836 /* Test for Jitterbug 778 */ | |
2837 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit
,UConverter* cnv){ | |
2838 const UChar* uSource; | |
2839 const UChar* uSourceLimit; | |
2840 const char* cSource; | |
2841 UChar *uTargetLimit =NULL; | |
2842 UChar *uTarget; | |
2843 char *cTarget; | |
2844 const char *cTargetLimit; | |
2845 char *cBuf; | |
2846 UChar *uBuf,*test; | |
2847 int32_t uBufSize = 120; | |
2848 int numCharsInTarget=0; | |
2849 UErrorCode errorCode=U_ZERO_ERROR; | |
2850 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2851 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
2852 uSource = source; | |
2853 uSourceLimit=sourceLimit; | |
2854 cTarget = cBuf; | |
2855 cTargetLimit = cBuf +uBufSize*5; | |
2856 uTarget = uBuf; | |
2857 uTargetLimit = uBuf+ uBufSize*5; | |
2858 ucnv_reset(cnv); | |
2859 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarge
t), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); | |
2860 if(U_FAILURE(errorCode)){ | |
2861 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
2862 return; | |
2863 } | |
2864 cSource = cBuf; | |
2865 test =uBuf; | |
2866 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsIn
Target,&errorCode); | |
2867 if(U_FAILURE(errorCode)){ | |
2868 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(erro
rCode)); | |
2869 return; | |
2870 } | |
2871 uSource = source; | |
2872 while(uSource<uSourceLimit){ | |
2873 if(*test!=*uSource){ | |
2874 | |
2875 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
2876 } | |
2877 uSource++; | |
2878 test++; | |
2879 } | |
2880 free(uBuf); | |
2881 free(cBuf); | |
2882 } | |
2883 | |
2884 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLim
it,UConverter* cnv){ | |
2885 const UChar* uSource; | |
2886 const UChar* uSourceLimit; | |
2887 const char* cSource; | |
2888 const char* cSourceLimit; | |
2889 UChar *uTargetLimit =NULL; | |
2890 UChar *uTarget; | |
2891 char *cTarget; | |
2892 const char *cTargetLimit; | |
2893 char *cBuf; | |
2894 UChar *uBuf; /*,*test;*/ | |
2895 int32_t uBufSize = 120; | |
2896 int len=0; | |
2897 int i=2; | |
2898 const UChar *temp = sourceLimit; | |
2899 UErrorCode errorCode=U_ZERO_ERROR; | |
2900 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
2901 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
2902 | |
2903 ucnv_reset(cnv); | |
2904 for(;--i>0;){ | |
2905 uSource = (UChar*) source; | |
2906 cTarget = cBuf; | |
2907 uTarget = uBuf; | |
2908 cSource = cBuf; | |
2909 cTargetLimit = cBuf; | |
2910 uTargetLimit = uBuf+uBufSize*5; | |
2911 cTargetLimit = cTargetLimit+uBufSize*10; | |
2912 uSourceLimit=uSource; | |
2913 do{ | |
2914 | |
2915 if (uSourceLimit < sourceLimit) { | |
2916 uSourceLimit = uSourceLimit+1; | |
2917 } | |
2918 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit
,NULL,FALSE, &errorCode); | |
2919 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2920 errorCode=U_ZERO_ERROR; | |
2921 continue; | |
2922 } | |
2923 | |
2924 if(U_FAILURE(errorCode)){ | |
2925 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
2926 return; | |
2927 } | |
2928 | |
2929 }while (uSource<temp); | |
2930 | |
2931 cSourceLimit =cBuf; | |
2932 do{ | |
2933 if (cSourceLimit < cBuf + (cTarget - cBuf)) { | |
2934 cSourceLimit = cSourceLimit+1; | |
2935 } | |
2936 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,
FALSE,&errorCode); | |
2937 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ | |
2938 errorCode=U_ZERO_ERROR; | |
2939 continue; | |
2940 } | |
2941 if(U_FAILURE(errorCode)){ | |
2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_err
orName(errorCode)); | |
2943 return; | |
2944 } | |
2945 }while(cSource<cTarget); | |
2946 | |
2947 uSource = source; | |
2948 /*test =uBuf;*/ | |
2949 for(;len<(int)(source - sourceLimit);len++){ | |
2950 if(uBuf[len]!=uSource[len]){ | |
2951 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int
)uBuf[len]) ; | |
2952 } | |
2953 } | |
2954 } | |
2955 free(uBuf); | |
2956 free(cBuf); | |
2957 } | |
2958 static void | |
2959 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, | |
2960 const uint16_t results[], const char* message){ | |
2961 /* const char* s0; */ | |
2962 const char* s=(char*)source; | |
2963 const uint16_t *r=results; | |
2964 UErrorCode errorCode=U_ZERO_ERROR; | |
2965 uint32_t c,exC; | |
2966 ucnv_reset(cnv); | |
2967 while(s<limit) { | |
2968 /* s0=s; */ | |
2969 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); | |
2970 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { | |
2971 break; /* no more significant input */ | |
2972 } else if(U_FAILURE(errorCode)) { | |
2973 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(
errorCode)); | |
2974 break; | |
2975 } else { | |
2976 if(U16_IS_LEAD(*r)){ | |
2977 int i =0, len = 2; | |
2978 U16_NEXT(r, i, len, exC); | |
2979 r++; | |
2980 }else{ | |
2981 exC = *r; | |
2982 } | |
2983 if(c!=(uint32_t)(exC)) | |
2984 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X
\n",message,(uint32_t) (*r),c); | |
2985 } | |
2986 r++; | |
2987 } | |
2988 } | |
2989 | |
2990 static int TestJitterbug930(const char* enc){ | |
2991 UErrorCode err = U_ZERO_ERROR; | |
2992 UConverter*converter; | |
2993 char out[80]; | |
2994 char*target = out; | |
2995 UChar in[4]; | |
2996 const UChar*source = in; | |
2997 int32_t off[80]; | |
2998 int32_t* offsets = off; | |
2999 int numOffWritten=0; | |
3000 UBool flush = 0; | |
3001 converter = my_ucnv_open(enc, &err); | |
3002 | |
3003 in[0] = 0x41; /* 0x4E00;*/ | |
3004 in[1] = 0x4E01; | |
3005 in[2] = 0x4E02; | |
3006 in[3] = 0x4E03; | |
3007 | |
3008 memset(off, '*', sizeof(off)); | |
3009 | |
3010 ucnv_fromUnicode (converter, | |
3011 &target, | |
3012 target+2, | |
3013 &source, | |
3014 source+3, | |
3015 offsets, | |
3016 flush, | |
3017 &err); | |
3018 | |
3019 /* writes three bytes into the output buffer: 41 1B 24 | |
3020 * but offsets contains 0 1 1 | |
3021 */ | |
3022 while(*offsets< off[10]){ | |
3023 numOffWritten++; | |
3024 offsets++; | |
3025 } | |
3026 log_verbose("Testing Jitterbug 930 for encoding %s",enc); | |
3027 if(numOffWritten!= (int)(target-out)){ | |
3028 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",en
c, (int)(target-out),numOffWritten); | |
3029 } | |
3030 | |
3031 err = U_ZERO_ERROR; | |
3032 | |
3033 memset(off,'*' , sizeof(off)); | |
3034 | |
3035 flush = 1; | |
3036 offsets=off; | |
3037 ucnv_fromUnicode (converter, | |
3038 &target, | |
3039 target+4, | |
3040 &source, | |
3041 source, | |
3042 offsets, | |
3043 flush, | |
3044 &err); | |
3045 numOffWritten=0; | |
3046 while(*offsets< off[10]){ | |
3047 numOffWritten++; | |
3048 if(*offsets!= -1){ | |
3049 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i
",enc,-1,*offsets) ; | |
3050 } | |
3051 offsets++; | |
3052 } | |
3053 | |
3054 /* writes 42 43 7A into output buffer, | |
3055 * offsets contains -1 -1 -1 | |
3056 */ | |
3057 ucnv_close(converter); | |
3058 return 0; | |
3059 } | |
3060 | |
3061 static void | |
3062 TestHZ() { | |
3063 /* test input */ | |
3064 static const uint16_t in[]={ | |
3065 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x30
05, 0x2014, | |
3066 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73
BB, 0x83E0, | |
3067 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94
C2, 0x7B94, | |
3068 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A
73, 0x6355, | |
3069 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C
3F, 0x90E8, | |
3070 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x24
95, 0x2496, | |
3071 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x24
76, 0x2477, | |
3072 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x24
7F, 0x2480, | |
3073 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x00
46, 0x007E, | |
3074 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x00
4F, 0x0050, | |
3075 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x00
58, 0x0059, | |
3076 0x005A, 0x005B, 0x005C, 0x000A | |
3077 }; | |
3078 const UChar* uSource; | |
3079 const UChar* uSourceLimit; | |
3080 const char* cSource; | |
3081 const char* cSourceLimit; | |
3082 UChar *uTargetLimit =NULL; | |
3083 UChar *uTarget; | |
3084 char *cTarget; | |
3085 const char *cTargetLimit; | |
3086 char *cBuf; | |
3087 UChar *uBuf,*test; | |
3088 int32_t uBufSize = 120; | |
3089 UErrorCode errorCode=U_ZERO_ERROR; | |
3090 UConverter *cnv; | |
3091 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3092 int32_t* myOff= offsets; | |
3093 cnv=ucnv_open("HZ", &errorCode); | |
3094 if(U_FAILURE(errorCode)) { | |
3095 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)
); | |
3096 return; | |
3097 } | |
3098 | |
3099 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3100 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3101 uSource = (const UChar*)in; | |
3102 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
3103 cTarget = cBuf; | |
3104 cTargetLimit = cBuf +uBufSize*5; | |
3105 uTarget = uBuf; | |
3106 uTargetLimit = uBuf+ uBufSize*5; | |
3107 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
3108 if(U_FAILURE(errorCode)){ | |
3109 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
3110 return; | |
3111 } | |
3112 cSource = cBuf; | |
3113 cSourceLimit =cTarget; | |
3114 test =uBuf; | |
3115 myOff=offsets; | |
3116 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
3117 if(U_FAILURE(errorCode)){ | |
3118 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
3119 return; | |
3120 } | |
3121 uSource = (const UChar*)in; | |
3122 while(uSource<uSourceLimit){ | |
3123 if(*test!=*uSource){ | |
3124 | |
3125 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
3126 } | |
3127 uSource++; | |
3128 test++; | |
3129 } | |
3130 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); | |
3131 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3132 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3133 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3134 TestJitterbug930("csISO2022JP"); | |
3135 ucnv_close(cnv); | |
3136 free(offsets); | |
3137 free(uBuf); | |
3138 free(cBuf); | |
3139 } | |
3140 | |
3141 static void | |
3142 TestISCII(){ | |
3143 /* test input */ | |
3144 static const uint16_t in[]={ | |
3145 /* test full range of Devanagari */ | |
3146 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, | |
3147 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, | |
3148 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, | |
3149 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, | |
3150 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, | |
3151 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, | |
3152 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, | |
3153 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, | |
3154 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, | |
3155 0x096D,0x096E,0x096F, | |
3156 /* test Soft halant*/ | |
3157 0x0915,0x094d, 0x200D, | |
3158 /* test explicit halant */ | |
3159 0x0915,0x094d, 0x200c, | |
3160 /* test double danda */ | |
3161 0x965, | |
3162 /* test ASCII */ | |
3163 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
3164 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
3165 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
3166 /* tests from Lotus */ | |
3167 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, | |
3168 0x0930,0x094D,0x200D, | |
3169 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, | |
3170 0x0915,0x0921,0x002B,0x095F, | |
3171 /* tamil range */ | |
3172 0x0B86, 0xB87, 0xB88, | |
3173 /* telugu range */ | |
3174 0x0C05, 0x0C02, 0x0C03,0x0c31, | |
3175 /* kannada range */ | |
3176 0x0C85, 0xC82, 0x0C83, | |
3177 /* test Abbr sign and Anudatta */ | |
3178 0x0970, 0x952, | |
3179 /* 0x0958, | |
3180 0x0959, | |
3181 0x095A, | |
3182 0x095B, | |
3183 0x095C, | |
3184 0x095D, | |
3185 0x095E, | |
3186 0x095F,*/ | |
3187 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, | |
3188 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, | |
3189 0x090C , | |
3190 0x0962, | |
3191 0x0961 /* Vocallic LL 0xa6, 0xE9 */, | |
3192 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, | |
3193 0x0950 /* OM Symbol 0xa1, 0xE9,*/, | |
3194 0x093D /* Avagraha 0xEA, 0xE9*/, | |
3195 0x0958, | |
3196 0x0959, | |
3197 0x095A, | |
3198 0x095B, | |
3199 0x095C, | |
3200 0x095D, | |
3201 0x095E, | |
3202 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 | |
3203 }; | |
3204 static const unsigned char byteArr[]={ | |
3205 | |
3206 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, | |
3207 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, | |
3208 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, | |
3209 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, | |
3210 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, | |
3211 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, | |
3212 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, | |
3213 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, | |
3214 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, | |
3215 0xf8,0xf9,0xfa, | |
3216 /* test soft halant */ | |
3217 0xb3, 0xE8, 0xE9, | |
3218 /* test explicit halant */ | |
3219 0xb3, 0xE8, 0xE8, | |
3220 /* test double danda */ | |
3221 0xea, 0xea, | |
3222 /* test ASCII */ | |
3223 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
3224 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
3225 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
3226 /* test ATR code */ | |
3227 | |
3228 /* tests from Lotus */ | |
3229 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, | |
3230 0xEF,0x42,0xCF,0xE8,0xD9, | |
3231 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, | |
3232 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, | |
3233 /* tamil range */ | |
3234 0xEF, 0x44, 0xa5, 0xa6, 0xa7, | |
3235 /* telugu range */ | |
3236 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, | |
3237 /* kannada range */ | |
3238 0xEF, 0x48,0xa4, 0xa2, 0xa3, | |
3239 /* anudatta and abbreviation sign */ | |
3240 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, | |
3241 | |
3242 | |
3243 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ | |
3244 | |
3245 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ | |
3246 | |
3247 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ | |
3248 | |
3249 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ | |
3250 | |
3251 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ | |
3252 | |
3253 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ | |
3254 | |
3255 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ | |
3256 | |
3257 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ | |
3258 | |
3259 0xB3, 0xE9, /* Ka + NUKTA */ | |
3260 | |
3261 0xB4, 0xE9, /* Kha + NUKTA */ | |
3262 | |
3263 0xB5, 0xE9, /* Ga + NUKTA */ | |
3264 | |
3265 0xBA, 0xE9, | |
3266 | |
3267 0xBF, 0xE9, | |
3268 | |
3269 0xC0, 0xE9, | |
3270 | |
3271 0xC9, 0xE9, | |
3272 /* INV halant RA */ | |
3273 0xD9, 0xE8, 0xCF, | |
3274 0x00, 0x00A0, | |
3275 /* just consume unhandled codepoints */ | |
3276 0xEF, 0x30, | |
3277 | |
3278 }; | |
3279 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-i
scii-de",NULL,TRUE); | |
3280 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof
(byteArr)); | |
3281 | |
3282 } | |
3283 | |
3284 static void | |
3285 TestISO_2022_JP() { | |
3286 /* test input */ | |
3287 static const uint16_t in[]={ | |
3288 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, | |
3289 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
3290 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
3291 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, | |
3292 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
3293 0x201D, 0x3014, 0x000D, 0x000A, | |
3294 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3295 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3296 }; | |
3297 const UChar* uSource; | |
3298 const UChar* uSourceLimit; | |
3299 const char* cSource; | |
3300 const char* cSourceLimit; | |
3301 UChar *uTargetLimit =NULL; | |
3302 UChar *uTarget; | |
3303 char *cTarget; | |
3304 const char *cTargetLimit; | |
3305 char *cBuf; | |
3306 UChar *uBuf,*test; | |
3307 int32_t uBufSize = 120; | |
3308 UErrorCode errorCode=U_ZERO_ERROR; | |
3309 UConverter *cnv; | |
3310 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3311 int32_t* myOff= offsets; | |
3312 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
3313 if(U_FAILURE(errorCode)) { | |
3314 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorN
ame(errorCode)); | |
3315 return; | |
3316 } | |
3317 | |
3318 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3319 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3320 uSource = (const UChar*)in; | |
3321 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
3322 cTarget = cBuf; | |
3323 cTargetLimit = cBuf +uBufSize*5; | |
3324 uTarget = uBuf; | |
3325 uTargetLimit = uBuf+ uBufSize*5; | |
3326 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
3327 if(U_FAILURE(errorCode)){ | |
3328 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
3329 return; | |
3330 } | |
3331 cSource = cBuf; | |
3332 cSourceLimit =cTarget; | |
3333 test =uBuf; | |
3334 myOff=offsets; | |
3335 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
3336 if(U_FAILURE(errorCode)){ | |
3337 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
3338 return; | |
3339 } | |
3340 | |
3341 uSource = (const UChar*)in; | |
3342 while(uSource<uSourceLimit){ | |
3343 if(*test!=*uSource){ | |
3344 | |
3345 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
3346 } | |
3347 uSource++; | |
3348 test++; | |
3349 } | |
3350 | |
3351 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3352 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3353 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); | |
3354 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3355 TestJitterbug930("csISO2022JP"); | |
3356 ucnv_close(cnv); | |
3357 free(uBuf); | |
3358 free(cBuf); | |
3359 free(offsets); | |
3360 } | |
3361 | |
3362 static void TestConv(const uint16_t in[],int len, const char* conv, const char*
lang, char byteArr[],int byteArrLen){ | |
3363 const UChar* uSource; | |
3364 const UChar* uSourceLimit; | |
3365 const char* cSource; | |
3366 const char* cSourceLimit; | |
3367 UChar *uTargetLimit =NULL; | |
3368 UChar *uTarget; | |
3369 char *cTarget; | |
3370 const char *cTargetLimit; | |
3371 char *cBuf; | |
3372 UChar *uBuf,*test; | |
3373 int32_t uBufSize = 120*10; | |
3374 UErrorCode errorCode=U_ZERO_ERROR; | |
3375 UConverter *cnv; | |
3376 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); | |
3377 int32_t* myOff= offsets; | |
3378 cnv=my_ucnv_open(conv, &errorCode); | |
3379 if(U_FAILURE(errorCode)) { | |
3380 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(er
rorCode)); | |
3381 return; | |
3382 } | |
3383 | |
3384 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); | |
3385 cBuf =(char*)malloc(uBufSize * sizeof(char)); | |
3386 uSource = (const UChar*)in; | |
3387 uSourceLimit=uSource+len; | |
3388 cTarget = cBuf; | |
3389 cTargetLimit = cBuf +uBufSize; | |
3390 uTarget = uBuf; | |
3391 uTargetLimit = uBuf+ uBufSize; | |
3392 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
3393 if(U_FAILURE(errorCode)){ | |
3394 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
3395 return; | |
3396 } | |
3397 /*log_verbose("length of compressed string for language %s using %s:%i \n",c
onv,lang,(cTarget-cBuf));*/ | |
3398 cSource = cBuf; | |
3399 cSourceLimit =cTarget; | |
3400 test =uBuf; | |
3401 myOff=offsets; | |
3402 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
3403 if(U_FAILURE(errorCode)){ | |
3404 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(er
rorCode)); | |
3405 return; | |
3406 } | |
3407 | |
3408 uSource = (const UChar*)in; | |
3409 while(uSource<uSourceLimit){ | |
3410 if(*test!=*uSource){ | |
3411 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",con
v,*uSource,(int)*test) ; | |
3412 } | |
3413 uSource++; | |
3414 test++; | |
3415 } | |
3416 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); | |
3417 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); | |
3418 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); | |
3419 if(byteArr && byteArrLen!=0){ | |
3420 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); | |
3421 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); | |
3422 { | |
3423 cSource = byteArr; | |
3424 cSourceLimit = cSource+byteArrLen; | |
3425 test=uBuf; | |
3426 myOff = offsets; | |
3427 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff
,TRUE,&errorCode); | |
3428 if(U_FAILURE(errorCode)){ | |
3429 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorN
ame(errorCode)); | |
3430 return; | |
3431 } | |
3432 | |
3433 uSource = (const UChar*)in; | |
3434 while(uSource<uSourceLimit){ | |
3435 if(*test!=*uSource){ | |
3436 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int
)*test) ; | |
3437 } | |
3438 uSource++; | |
3439 test++; | |
3440 } | |
3441 } | |
3442 } | |
3443 | |
3444 ucnv_close(cnv); | |
3445 free(uBuf); | |
3446 free(cBuf); | |
3447 free(offsets); | |
3448 } | |
3449 static UChar U_CALLCONV | |
3450 _charAt(int32_t offset, void *context) { | |
3451 return ((char*)context)[offset]; | |
3452 } | |
3453 | |
3454 static int32_t | |
3455 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *s
tatus){ | |
3456 int32_t srcIndex=0; | |
3457 int32_t dstIndex=0; | |
3458 if(U_FAILURE(*status)){ | |
3459 return 0; | |
3460 } | |
3461 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ | |
3462 *status = U_ILLEGAL_ARGUMENT_ERROR; | |
3463 return 0; | |
3464 } | |
3465 if(srcLen==-1){ | |
3466 srcLen = (int32_t)uprv_strlen(src); | |
3467 } | |
3468 | |
3469 for (; srcIndex<srcLen; ) { | |
3470 UChar32 c = src[srcIndex++]; | |
3471 if (c == 0x005C /*'\\'*/) { | |
3472 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i
*/ | |
3473 if (c == (UChar32)0xFFFFFFFF) { | |
3474 *status=U_INVALID_CHAR_FOUND; /* return empty string */ | |
3475 break; /* invalid escape sequence */ | |
3476 } | |
3477 } | |
3478 if(dstIndex < dstLen){ | |
3479 if(c>0xFFFF){ | |
3480 dst[dstIndex++] = U16_LEAD(c); | |
3481 if(dstIndex<dstLen){ | |
3482 dst[dstIndex]=U16_TRAIL(c); | |
3483 }else{ | |
3484 *status=U_BUFFER_OVERFLOW_ERROR; | |
3485 } | |
3486 }else{ | |
3487 dst[dstIndex]=(UChar)c; | |
3488 } | |
3489 | |
3490 }else{ | |
3491 *status = U_BUFFER_OVERFLOW_ERROR; | |
3492 } | |
3493 dstIndex++; /* for preflighting */ | |
3494 } | |
3495 return dstIndex; | |
3496 } | |
3497 | |
3498 static void | |
3499 TestFullRoundtrip(const char* cp){ | |
3500 UChar usource[10] ={0}; | |
3501 UChar nsrc[10] = {0}; | |
3502 uint32_t i=1; | |
3503 int len=0, ulen; | |
3504 nsrc[0]=0x0061; | |
3505 /* Test codepoint 0 */ | |
3506 TestConv(usource,1,cp,"",NULL,0); | |
3507 TestConv(usource,2,cp,"",NULL,0); | |
3508 nsrc[2]=0x5555; | |
3509 TestConv(nsrc,3,cp,"",NULL,0); | |
3510 | |
3511 for(;i<=0x10FFFF;i++){ | |
3512 if(i==0xD800){ | |
3513 i=0xDFFF; | |
3514 continue; | |
3515 } | |
3516 if(i<=0xFFFF){ | |
3517 usource[0] =(UChar) i; | |
3518 len=1; | |
3519 }else{ | |
3520 usource[0]=U16_LEAD(i); | |
3521 usource[1]=U16_TRAIL(i); | |
3522 len=2; | |
3523 } | |
3524 ulen=len; | |
3525 if(i==0x80) { | |
3526 usource[2]=0; | |
3527 } | |
3528 /* Test only single code points */ | |
3529 TestConv(usource,ulen,cp,"",NULL,0); | |
3530 /* Test codepoint repeated twice */ | |
3531 usource[ulen]=usource[0]; | |
3532 usource[ulen+1]=usource[1]; | |
3533 ulen+=len; | |
3534 TestConv(usource,ulen,cp,"",NULL,0); | |
3535 /* Test codepoint repeated 3 times */ | |
3536 usource[ulen]=usource[0]; | |
3537 usource[ulen+1]=usource[1]; | |
3538 ulen+=len; | |
3539 TestConv(usource,ulen,cp,"",NULL,0); | |
3540 /* Test codepoint in between 2 codepoints */ | |
3541 nsrc[1]=usource[0]; | |
3542 nsrc[2]=usource[1]; | |
3543 nsrc[len+1]=0x5555; | |
3544 TestConv(nsrc,len+2,cp,"",NULL,0); | |
3545 uprv_memset(usource,0,sizeof(UChar)*10); | |
3546 } | |
3547 } | |
3548 | |
3549 static void | |
3550 TestRoundTrippingAllUTF(void){ | |
3551 if(!getTestOption(QUICK_OPTION)){ | |
3552 log_verbose("Running exhaustive round trip test for BOCU-1\n"); | |
3553 TestFullRoundtrip("BOCU-1"); | |
3554 log_verbose("Running exhaustive round trip test for SCSU\n"); | |
3555 TestFullRoundtrip("SCSU"); | |
3556 log_verbose("Running exhaustive round trip test for UTF-8\n"); | |
3557 TestFullRoundtrip("UTF-8"); | |
3558 log_verbose("Running exhaustive round trip test for CESU-8\n"); | |
3559 TestFullRoundtrip("CESU-8"); | |
3560 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); | |
3561 TestFullRoundtrip("UTF-16BE"); | |
3562 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); | |
3563 TestFullRoundtrip("UTF-16LE"); | |
3564 log_verbose("Running exhaustive round trip test for UTF-16\n"); | |
3565 TestFullRoundtrip("UTF-16"); | |
3566 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); | |
3567 TestFullRoundtrip("UTF-32BE"); | |
3568 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); | |
3569 TestFullRoundtrip("UTF-32LE"); | |
3570 log_verbose("Running exhaustive round trip test for UTF-32\n"); | |
3571 TestFullRoundtrip("UTF-32"); | |
3572 log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
3573 TestFullRoundtrip("UTF-7"); | |
3574 log_verbose("Running exhaustive round trip test for UTF-7\n"); | |
3575 TestFullRoundtrip("UTF-7,version=1"); | |
3576 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"
); | |
3577 TestFullRoundtrip("IMAP-mailbox-name"); | |
3578 /* | |
3579 * | |
3580 * With the update to GB18030 2005 (Ticket #8274), this test will fail b
ecause the 2005 version of | |
3581 * GB18030 contains mappings to actual Unicode codepoints (which were pr
eviously mapped to PUA). | |
3582 * The old mappings remain as fallbacks. | |
3583 * This test may be reintroduced at a later time. | |
3584 * | |
3585 * 110118 - mow | |
3586 */ | |
3587 /* | |
3588 log_verbose("Running exhaustive round trip test for GB18030\n"); | |
3589 TestFullRoundtrip("GB18030"); | |
3590 */ | |
3591 } | |
3592 } | |
3593 | |
3594 static void | |
3595 TestSCSU() { | |
3596 | |
3597 static const uint16_t germanUTF16[]={ | |
3598 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 | |
3599 }; | |
3600 | |
3601 static const uint8_t germanSCSU[]={ | |
3602 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 | |
3603 }; | |
3604 | |
3605 static const uint16_t russianUTF16[]={ | |
3606 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 | |
3607 }; | |
3608 | |
3609 static const uint8_t russianSCSU[]={ | |
3610 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 | |
3611 }; | |
3612 | |
3613 static const uint16_t japaneseUTF16[]={ | |
3614 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, | |
3615 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, | |
3616 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, | |
3617 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, | |
3618 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, | |
3619 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, | |
3620 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, | |
3621 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, | |
3622 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, | |
3623 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, | |
3624 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, | |
3625 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, | |
3626 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, | |
3627 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, | |
3628 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 | |
3629 }; | |
3630 | |
3631 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of
one different choice: | |
3632 it uses an SQn once where a longer look-ahead could have shown that SCn is
more efficient */ | |
3633 static const uint8_t japaneseSCSU[]={ | |
3634 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef,
0x61, 0x1b, 0xe5, 0x84, | |
3635 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3,
0x94, 0x08, 0x02, 0x0f, | |
3636 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b,
0x6d, 0x41, 0x88, 0x4c, | |
3637 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e,
0x6b, 0x4c, 0x08, 0x0d, | |
3638 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa,
0x84, 0x08, 0x02, 0x0e, | |
3639 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc,
0x9f, 0x0e, 0x79, 0x3e, | |
3640 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08,
0x88, 0xbe, 0xa3, 0x8d, | |
3641 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08,
0x01, 0x93, 0xc8, 0xaa, | |
3642 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae,
0x93, 0xa8, 0xa0, 0x08, | |
3643 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80,
0x05, 0xec, 0x60, 0x8d, | |
3644 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4,
0xfe, 0xe7, 0xc2, 0x06, | |
3645 0xcb, 0x82 | |
3646 }; | |
3647 | |
3648 static const uint16_t allFeaturesUTF16[]={ | |
3649 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, | |
3650 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, | |
3651 0x01df, 0xf000, 0xdbff, 0xdfff | |
3652 }; | |
3653 | |
3654 /* see comment at japaneseSCSU: the same kind of different choice yields a s
lightly shorter | |
3655 * result here (34B vs. 35B) | |
3656 */ | |
3657 static const uint8_t allFeaturesSCSU[]={ | |
3658 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, | |
3659 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, | |
3660 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, | |
3661 0xdf, 0x14, 0x80, 0x15, 0xff | |
3662 }; | |
3663 static const uint16_t monkeyIn[]={ | |
3664 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, | |
3665 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, | |
3666 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, | |
3667 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, | |
3668 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, | |
3669 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, | |
3670 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, | |
3671 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, | |
3672 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, | |
3673 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, | |
3674 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, | |
3675 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
3676 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
3677 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3678 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, | |
3679 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
3680 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, | |
3681 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, | |
3682 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A, | |
3683 /* test non-BMP code points */ | |
3684 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, | |
3685 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, | |
3686 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, | |
3687 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, | |
3688 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, | |
3689 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, | |
3690 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, | |
3691 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, | |
3692 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, | |
3693 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF,
0xDFFF, | |
3694 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, | |
3695 | |
3696 | |
3697 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, | |
3698 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, | |
3699 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, | |
3700 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
3701 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
3702 }; | |
3703 static const char *fTestCases [] = { | |
3704 "\\ud800\\udc00", /* smallest surrogate*/ | |
3705 "\\ud8ff\\udcff", | |
3706 "\\udBff\\udFff", /* largest surrogate pair*/ | |
3707 "\\ud834\\udc00", | |
3708 "\\U0010FFFF", | |
3709 "Hello \\u9292 \\u9192 World!", | |
3710 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", | |
3711 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
3712 | |
3713 "\\u0648\\u06c8", /* catch missing reset*/ | |
3714 "\\u0648\\u06c8", | |
3715 | |
3716 "\\u4444\\uE001", /* lowest quotable*/ | |
3717 "\\u4444\\uf2FF", /* highest quotable*/ | |
3718 "\\u4444\\uf188\\u4444", | |
3719 "\\u4444\\uf188\\uf288", | |
3720 "\\u4444\\uf188abc\\u0429\\uf288", | |
3721 "\\u9292\\u2222", | |
3722 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", | |
3723 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", | |
3724 "Hello World!123456", | |
3725 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ | |
3726 | |
3727 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ | |
3728 "abc\\u4411d", /* uses SQU*/ | |
3729 "abc\\u4411\\u4412d",/* uses SCU*/ | |
3730 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ | |
3731 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data
*/ | |
3732 "\\u9292\\u2222", | |
3733 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", | |
3734 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u306
5\\u300c", | |
3735 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53e
f\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", | |
3736 | |
3737 "", /* empty input*/ | |
3738 "\\u0000", /* smallest BMP character*/ | |
3739 "\\uFFFF", /* largest BMP character*/ | |
3740 | |
3741 /* regression tests*/ | |
3742 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49f
d\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", | |
3743 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u0
15f\\u00df\\u01df\\uf000\\udbff\\udfff", | |
3744 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e
1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", | |
3745 "\\u0041\\u00df\\u0401\\u015f", | |
3746 "\\u9066\\u2123abc", | |
3747 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u
539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf51
3\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\
u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\ucc
d8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\
\u0bc0\\u06c5", | |
3748 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b
5\\u0cf3\\u6059\\u7489", | |
3749 }; | |
3750 int i=0; | |
3751 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ | |
3752 const char* cSrc = fTestCases[i]; | |
3753 UErrorCode status = U_ZERO_ERROR; | |
3754 int32_t cSrcLen,srcLen; | |
3755 UChar* src; | |
3756 /* UConverter* cnv = ucnv_open("SCSU",&status); */ | |
3757 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); | |
3758 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); | |
3759 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); | |
3760 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); | |
3761 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); | |
3762 free(src); | |
3763 } | |
3764 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
, (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
3765 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features"
,(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); | |
3766 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)
japaneseSCSU,sizeof(japaneseSCSU)); | |
3767 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese
",(char *)japaneseSCSU,sizeof(japaneseSCSU)); | |
3768 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanS
CSU,sizeof(germanSCSU)); | |
3769 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)rus
sianSCSU,sizeof(russianSCSU)); | |
3770 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); | |
3771 } | |
3772 | |
3773 #if !UCONFIG_NO_LEGACY_CONVERSION | |
3774 static void TestJitterbug2346(){ | |
3775 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, | |
3776 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; | |
3777 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; | |
3778 | |
3779 UChar uTarget[500]={'\0'}; | |
3780 UChar* utarget=uTarget; | |
3781 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
3782 | |
3783 char cTarget[500]={'\0'}; | |
3784 char* ctarget=cTarget; | |
3785 char* ctargetLimit=cTarget+sizeof(cTarget); | |
3786 const char* csource=source; | |
3787 UChar* temp = expected; | |
3788 UErrorCode err=U_ZERO_ERROR; | |
3789 | |
3790 UConverter* conv =ucnv_open("ISO_2022_JP",&err); | |
3791 if(U_FAILURE(err)) { | |
3792 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); | |
3793 return; | |
3794 } | |
3795 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NU
LL,TRUE,&err); | |
3796 if(U_FAILURE(err)) { | |
3797 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(er
r)); | |
3798 return; | |
3799 } | |
3800 utargetLimit=utarget; | |
3801 utarget = uTarget; | |
3802 while(utarget<utargetLimit){ | |
3803 if(*temp!=*utarget){ | |
3804 | |
3805 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp)
; | |
3806 } | |
3807 utarget++; | |
3808 temp++; | |
3809 } | |
3810 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); | |
3811 if(U_FAILURE(err)) { | |
3812 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(
err)); | |
3813 return; | |
3814 } | |
3815 ctargetLimit=ctarget; | |
3816 ctarget =cTarget; | |
3817 ucnv_close(conv); | |
3818 | |
3819 | |
3820 } | |
3821 | |
3822 static void | |
3823 TestISO_2022_JP_1() { | |
3824 /* test input */ | |
3825 static const uint16_t in[]={ | |
3826 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D,
0x000A, | |
3827 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
3828 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D,
0x000A, | |
3829 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
3830 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D,
0x000A, | |
3831 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, | |
3832 0x201D, 0x000D, 0x000A, | |
3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3834 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D,
0x000A, | |
3835 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3836 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D,
0x000A, | |
3837 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D,
0x000A, | |
3838 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A | |
3839 }; | |
3840 const UChar* uSource; | |
3841 const UChar* uSourceLimit; | |
3842 const char* cSource; | |
3843 const char* cSourceLimit; | |
3844 UChar *uTargetLimit =NULL; | |
3845 UChar *uTarget; | |
3846 char *cTarget; | |
3847 const char *cTargetLimit; | |
3848 char *cBuf; | |
3849 UChar *uBuf,*test; | |
3850 int32_t uBufSize = 120; | |
3851 UErrorCode errorCode=U_ZERO_ERROR; | |
3852 UConverter *cnv; | |
3853 | |
3854 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); | |
3855 if(U_FAILURE(errorCode)) { | |
3856 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
3857 return; | |
3858 } | |
3859 | |
3860 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3861 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3862 uSource = (const UChar*)in; | |
3863 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
3864 cTarget = cBuf; | |
3865 cTargetLimit = cBuf +uBufSize*5; | |
3866 uTarget = uBuf; | |
3867 uTargetLimit = uBuf+ uBufSize*5; | |
3868 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TR
UE, &errorCode); | |
3869 if(U_FAILURE(errorCode)){ | |
3870 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
3871 return; | |
3872 } | |
3873 cSource = cBuf; | |
3874 cSourceLimit =cTarget; | |
3875 test =uBuf; | |
3876 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&er
rorCode); | |
3877 if(U_FAILURE(errorCode)){ | |
3878 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
3879 return; | |
3880 } | |
3881 uSource = (const UChar*)in; | |
3882 while(uSource<uSourceLimit){ | |
3883 if(*test!=*uSource){ | |
3884 | |
3885 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
3886 } | |
3887 uSource++; | |
3888 test++; | |
3889 } | |
3890 /*ucnv_close(cnv); | |
3891 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ | |
3892 /*Test for the condition where there is an invalid character*/ | |
3893 ucnv_reset(cnv); | |
3894 { | |
3895 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
3896 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); | |
3897 } | |
3898 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3899 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3900 ucnv_close(cnv); | |
3901 free(uBuf); | |
3902 free(cBuf); | |
3903 } | |
3904 | |
3905 static void | |
3906 TestISO_2022_JP_2() { | |
3907 /* test input */ | |
3908 static const uint16_t in[]={ | |
3909 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D,
0x000A, | |
3910 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D,
0x000A, | |
3911 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D,
0x000A, | |
3912 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D,
0x000A, | |
3913 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D,
0x000A, | |
3914 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D,
0x000A, | |
3915 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D,
0x000A, | |
3916 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D,
0x000A, | |
3917 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D,
0x000A, | |
3918 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D,
0x000A, | |
3919 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D,
0x000A, | |
3920 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D,
0x000A, | |
3921 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D,
0x000A, | |
3922 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D,
0x000A, | |
3923 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D,
0x000A, | |
3924 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
3925 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D,
0x000A, | |
3926 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D,
0x000A, | |
3927 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D,
0x000A | |
3928 }; | |
3929 const UChar* uSource; | |
3930 const UChar* uSourceLimit; | |
3931 const char* cSource; | |
3932 const char* cSourceLimit; | |
3933 UChar *uTargetLimit =NULL; | |
3934 UChar *uTarget; | |
3935 char *cTarget; | |
3936 const char *cTargetLimit; | |
3937 char *cBuf; | |
3938 UChar *uBuf,*test; | |
3939 int32_t uBufSize = 120; | |
3940 UErrorCode errorCode=U_ZERO_ERROR; | |
3941 UConverter *cnv; | |
3942 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
3943 int32_t* myOff= offsets; | |
3944 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); | |
3945 if(U_FAILURE(errorCode)) { | |
3946 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
3947 return; | |
3948 } | |
3949 | |
3950 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
3951 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
3952 uSource = (const UChar*)in; | |
3953 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
3954 cTarget = cBuf; | |
3955 cTargetLimit = cBuf +uBufSize*5; | |
3956 uTarget = uBuf; | |
3957 uTargetLimit = uBuf+ uBufSize*5; | |
3958 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
3959 if(U_FAILURE(errorCode)){ | |
3960 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
3961 return; | |
3962 } | |
3963 cSource = cBuf; | |
3964 cSourceLimit =cTarget; | |
3965 test =uBuf; | |
3966 myOff=offsets; | |
3967 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
3968 if(U_FAILURE(errorCode)){ | |
3969 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
3970 return; | |
3971 } | |
3972 uSource = (const UChar*)in; | |
3973 while(uSource<uSourceLimit){ | |
3974 if(*test!=*uSource){ | |
3975 | |
3976 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
3977 } | |
3978 uSource++; | |
3979 test++; | |
3980 } | |
3981 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3982 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3983 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
3984 /*Test for the condition where there is an invalid character*/ | |
3985 ucnv_reset(cnv); | |
3986 { | |
3987 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
3988 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); | |
3989 } | |
3990 ucnv_close(cnv); | |
3991 free(uBuf); | |
3992 free(cBuf); | |
3993 free(offsets); | |
3994 } | |
3995 | |
3996 static void | |
3997 TestISO_2022_KR() { | |
3998 /* test input */ | |
3999 static const uint16_t in[]={ | |
4000 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D | |
4001 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 | |
4002 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 | |
4003 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB | |
4004 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 | |
4005 ,0x53E3,0x53E4,0x000A,0x000D}; | |
4006 const UChar* uSource; | |
4007 const UChar* uSourceLimit; | |
4008 const char* cSource; | |
4009 const char* cSourceLimit; | |
4010 UChar *uTargetLimit =NULL; | |
4011 UChar *uTarget; | |
4012 char *cTarget; | |
4013 const char *cTargetLimit; | |
4014 char *cBuf; | |
4015 UChar *uBuf,*test; | |
4016 int32_t uBufSize = 120; | |
4017 UErrorCode errorCode=U_ZERO_ERROR; | |
4018 UConverter *cnv; | |
4019 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4020 int32_t* myOff= offsets; | |
4021 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); | |
4022 if(U_FAILURE(errorCode)) { | |
4023 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
4024 return; | |
4025 } | |
4026 | |
4027 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4028 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
4029 uSource = (const UChar*)in; | |
4030 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
4031 cTarget = cBuf; | |
4032 cTargetLimit = cBuf +uBufSize*5; | |
4033 uTarget = uBuf; | |
4034 uTargetLimit = uBuf+ uBufSize*5; | |
4035 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
4036 if(U_FAILURE(errorCode)){ | |
4037 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
4038 return; | |
4039 } | |
4040 cSource = cBuf; | |
4041 cSourceLimit =cTarget; | |
4042 test =uBuf; | |
4043 myOff=offsets; | |
4044 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
4045 if(U_FAILURE(errorCode)){ | |
4046 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
4047 return; | |
4048 } | |
4049 uSource = (const UChar*)in; | |
4050 while(uSource<uSourceLimit){ | |
4051 if(*test!=*uSource){ | |
4052 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
4053 } | |
4054 uSource++; | |
4055 test++; | |
4056 } | |
4057 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
4058 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4059 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4060 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4061 TestJitterbug930("csISO2022KR"); | |
4062 /*Test for the condition where there is an invalid character*/ | |
4063 ucnv_reset(cnv); | |
4064 { | |
4065 static const uint8_t source2[]={0x1b,0x24,0x053}; | |
4066 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
4067 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
4068 } | |
4069 ucnv_close(cnv); | |
4070 free(uBuf); | |
4071 free(cBuf); | |
4072 free(offsets); | |
4073 } | |
4074 | |
4075 static void | |
4076 TestISO_2022_KR_1() { | |
4077 /* test input */ | |
4078 static const uint16_t in[]={ | |
4079 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x00
0D | |
4080 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC
04 | |
4081 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x00
28,0x0029 | |
4082 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53
CA,0x53CB | |
4083 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53
E2 | |
4084 ,0x53E3,0x53E4,0x000A,0x000D}; | |
4085 const UChar* uSource; | |
4086 const UChar* uSourceLimit; | |
4087 const char* cSource; | |
4088 const char* cSourceLimit; | |
4089 UChar *uTargetLimit =NULL; | |
4090 UChar *uTarget; | |
4091 char *cTarget; | |
4092 const char *cTargetLimit; | |
4093 char *cBuf; | |
4094 UChar *uBuf,*test; | |
4095 int32_t uBufSize = 120; | |
4096 UErrorCode errorCode=U_ZERO_ERROR; | |
4097 UConverter *cnv; | |
4098 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4099 int32_t* myOff= offsets; | |
4100 cnv=ucnv_open("ibm-25546", &errorCode); | |
4101 if(U_FAILURE(errorCode)) { | |
4102 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
4103 return; | |
4104 } | |
4105 | |
4106 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4107 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); | |
4108 uSource = (const UChar*)in; | |
4109 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
4110 cTarget = cBuf; | |
4111 cTargetLimit = cBuf +uBufSize*5; | |
4112 uTarget = uBuf; | |
4113 uTargetLimit = uBuf+ uBufSize*5; | |
4114 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
4115 if(U_FAILURE(errorCode)){ | |
4116 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
4117 return; | |
4118 } | |
4119 cSource = cBuf; | |
4120 cSourceLimit =cTarget; | |
4121 test =uBuf; | |
4122 myOff=offsets; | |
4123 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
4124 if(U_FAILURE(errorCode)){ | |
4125 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
4126 return; | |
4127 } | |
4128 uSource = (const UChar*)in; | |
4129 while(uSource<uSourceLimit){ | |
4130 if(*test!=*uSource){ | |
4131 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; | |
4132 } | |
4133 uSource++; | |
4134 test++; | |
4135 } | |
4136 ucnv_reset(cnv); | |
4137 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); | |
4138 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4139 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4140 ucnv_reset(cnv); | |
4141 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4142 /*Test for the condition where there is an invalid character*/ | |
4143 ucnv_reset(cnv); | |
4144 { | |
4145 static const uint8_t source2[]={0x1b,0x24,0x053}; | |
4146 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &err
orCode); | |
4147 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); | |
4148 } | |
4149 ucnv_close(cnv); | |
4150 free(uBuf); | |
4151 free(cBuf); | |
4152 free(offsets); | |
4153 } | |
4154 | |
4155 static void TestJitterbug2411(){ | |
4156 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6
f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" | |
4157 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x
66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; | |
4158 UConverter* kr=NULL, *kr1=NULL; | |
4159 UErrorCode errorCode = U_ZERO_ERROR; | |
4160 UChar tgt[100]={'\0'}; | |
4161 UChar* target = tgt; | |
4162 UChar* targetLimit = target+100; | |
4163 kr=ucnv_open("iso-2022-kr", &errorCode); | |
4164 if(U_FAILURE(errorCode)) { | |
4165 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName
(errorCode)); | |
4166 return; | |
4167 } | |
4168 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); | |
4169 if(U_FAILURE(errorCode)) { | |
4170 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_
errorName(errorCode)); | |
4171 return; | |
4172 } | |
4173 kr1 = ucnv_open("ibm-25546", &errorCode); | |
4174 if(U_FAILURE(errorCode)) { | |
4175 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorNa
me(errorCode)); | |
4176 return; | |
4177 } | |
4178 target = tgt; | |
4179 targetLimit = target+100; | |
4180 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NUL
L,TRUE,&errorCode); | |
4181 | |
4182 if(U_FAILURE(errorCode)) { | |
4183 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n",
u_errorName(errorCode)); | |
4184 return; | |
4185 } | |
4186 | |
4187 ucnv_close(kr); | |
4188 ucnv_close(kr1); | |
4189 | |
4190 } | |
4191 | |
4192 static void | |
4193 TestJIS(){ | |
4194 /* From Unicode moved to testdata/conversion.txt */ | |
4195 /*To Unicode*/ | |
4196 { | |
4197 static const uint8_t sampleTextJIS[] = { | |
4198 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ | |
4199 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
4200 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ | |
4201 }; | |
4202 static const uint16_t expectedISO2022JIS[] = { | |
4203 0x0041, 0x0042, | |
4204 0xFF81, 0xFF82, | |
4205 0x3000 | |
4206 }; | |
4207 static const int32_t toISO2022JISOffs[]={ | |
4208 3,4, | |
4209 8,9, | |
4210 16 | |
4211 }; | |
4212 | |
4213 static const uint8_t sampleTextJIS7[] = { | |
4214 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ | |
4215 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ | |
4216 0x1b,0x24,0x42,0x21,0x21, | |
4217 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ | |
4218 0x21,0x22, | |
4219 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>
&@*/ | |
4220 }; | |
4221 static const uint16_t expectedISO2022JIS7[] = { | |
4222 0x0041, 0x0042, | |
4223 0xFF81, 0xFF82, | |
4224 0x3000, | |
4225 0xFF81, 0xFF82, | |
4226 0x3001, | |
4227 0x3000 | |
4228 }; | |
4229 static const int32_t toISO2022JIS7Offs[]={ | |
4230 3,4, | |
4231 8,9, | |
4232 13,16, | |
4233 17, | |
4234 19,27 | |
4235 }; | |
4236 static const uint8_t sampleTextJIS8[] = { | |
4237 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ | |
4238 0xa1,0xc8,0xd9,/*Katakana Set*/ | |
4239 0x1b,0x28,0x42, | |
4240 0x41,0x42, | |
4241 0xb1,0xc3, /*Katakana Set*/ | |
4242 0x1b,0x24,0x42,0x21,0x21 | |
4243 }; | |
4244 static const uint16_t expectedISO2022JIS8[] = { | |
4245 0x0041, 0x0042, | |
4246 0xff61, 0xff88, 0xff99, | |
4247 0x0041, 0x0042, | |
4248 0xff71, 0xff83, | |
4249 0x3000 | |
4250 }; | |
4251 static const int32_t toISO2022JIS8Offs[]={ | |
4252 3, 4, 5, 6, | |
4253 7, 11, 12, 13, | |
4254 14, 18, | |
4255 }; | |
4256 | |
4257 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, | |
4258 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toIS
O2022JISOffs,TRUE); | |
4259 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7
, | |
4260 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", t
oISO2022JIS7Offs,TRUE); | |
4261 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8
, | |
4262 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", t
oISO2022JIS8Offs,TRUE); | |
4263 } | |
4264 | |
4265 } | |
4266 | |
4267 | |
4268 #if 0 | |
4269 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 | |
4270 | |
4271 static void TestJitterbug915(){ | |
4272 /* tests for roundtripping of the below sequence | |
4273 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / | |
4274 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / | |
4275 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / | |
4276 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / | |
4277 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / | |
4278 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / | |
4279 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / | |
4280 */ | |
4281 static const char cSource[]={ | |
4282 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, | |
4283 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, | |
4284 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, | |
4285 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, | |
4286 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, | |
4287 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, | |
4288 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, | |
4289 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, | |
4290 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, | |
4291 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, | |
4292 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, | |
4293 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, | |
4294 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, | |
4295 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, | |
4296 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
4297 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
4298 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, | |
4299 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, | |
4300 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, | |
4301 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, | |
4302 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, | |
4303 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, | |
4304 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, | |
4305 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, | |
4306 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, | |
4307 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, | |
4308 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, | |
4309 0x37, 0x20, 0x2A, 0x2F | |
4310 }; | |
4311 UChar uTarget[500]={'\0'}; | |
4312 UChar* utarget=uTarget; | |
4313 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; | |
4314 | |
4315 char cTarget[500]={'\0'}; | |
4316 char* ctarget=cTarget; | |
4317 char* ctargetLimit=cTarget+sizeof(cTarget); | |
4318 const char* csource=cSource; | |
4319 const char* tempSrc = cSource; | |
4320 UErrorCode err=U_ZERO_ERROR; | |
4321 | |
4322 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); | |
4323 if(U_FAILURE(err)) { | |
4324 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
r)); | |
4325 return; | |
4326 } | |
4327 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),N
ULL,TRUE,&err); | |
4328 if(U_FAILURE(err)) { | |
4329 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(er
r)); | |
4330 return; | |
4331 } | |
4332 utargetLimit=utarget; | |
4333 utarget = uTarget; | |
4334 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetL
imit,NULL,TRUE,&err); | |
4335 if(U_FAILURE(err)) { | |
4336 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(
err)); | |
4337 return; | |
4338 } | |
4339 ctargetLimit=ctarget; | |
4340 ctarget =cTarget; | |
4341 while(ctarget<ctargetLimit){ | |
4342 if(*ctarget != *tempSrc){ | |
4343 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarg
et-cTarget), *ctarget,(int)*tempSrc) ; | |
4344 } | |
4345 ++ctarget; | |
4346 ++tempSrc; | |
4347 } | |
4348 | |
4349 ucnv_close(conv); | |
4350 } | |
4351 | |
4352 static void | |
4353 TestISO_2022_CN_EXT() { | |
4354 /* test input */ | |
4355 static const uint16_t in[]={ | |
4356 /* test Non-BMP code points */ | |
4357 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869,
0xDE9F, | |
4358 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869,
0xDEA8, | |
4359 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869,
0xDEAF, | |
4360 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869,
0xDEB6, | |
4361 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869,
0xDEBB, | |
4362 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869,
0xDEC0, | |
4363 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869,
0xDEC8, | |
4364 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869,
0xDECF, | |
4365 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869,
0xDED4, | |
4366 0xD869, 0xDED5, | |
4367 | |
4368 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D,
0x000A, | |
4369 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D,
0x000A, | |
4370 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D,
0x000A, | |
4371 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
4372 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
4373 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, | |
4374 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, | |
4375 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, | |
4376 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
4377 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, | |
4378 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, | |
4379 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, | |
4380 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, | |
4381 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D,
0x000A, | |
4382 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D,
0x000A, | |
4383 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D,
0x000A, | |
4384 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D,
0x000A, | |
4385 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D,
0x000A, | |
4386 | |
4387 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A | |
4388 | |
4389 }; | |
4390 | |
4391 const UChar* uSource; | |
4392 const UChar* uSourceLimit; | |
4393 const char* cSource; | |
4394 const char* cSourceLimit; | |
4395 UChar *uTargetLimit =NULL; | |
4396 UChar *uTarget; | |
4397 char *cTarget; | |
4398 const char *cTargetLimit; | |
4399 char *cBuf; | |
4400 UChar *uBuf,*test; | |
4401 int32_t uBufSize = 180; | |
4402 UErrorCode errorCode=U_ZERO_ERROR; | |
4403 UConverter *cnv; | |
4404 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4405 int32_t* myOff= offsets; | |
4406 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); | |
4407 if(U_FAILURE(errorCode)) { | |
4408 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
4409 return; | |
4410 } | |
4411 | |
4412 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4413 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
4414 uSource = (const UChar*)in; | |
4415 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
4416 cTarget = cBuf; | |
4417 cTargetLimit = cBuf +uBufSize*5; | |
4418 uTarget = uBuf; | |
4419 uTargetLimit = uBuf+ uBufSize*5; | |
4420 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
4421 if(U_FAILURE(errorCode)){ | |
4422 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
4423 return; | |
4424 } | |
4425 cSource = cBuf; | |
4426 cSourceLimit =cTarget; | |
4427 test =uBuf; | |
4428 myOff=offsets; | |
4429 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
4430 if(U_FAILURE(errorCode)){ | |
4431 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
4432 return; | |
4433 } | |
4434 uSource = (const UChar*)in; | |
4435 while(uSource<uSourceLimit){ | |
4436 if(*test!=*uSource){ | |
4437 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
4438 } | |
4439 else{ | |
4440 log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
4441 } | |
4442 uSource++; | |
4443 test++; | |
4444 } | |
4445 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4446 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4447 /*Test for the condition where there is an invalid character*/ | |
4448 ucnv_reset(cnv); | |
4449 { | |
4450 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
4451 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); | |
4452 } | |
4453 ucnv_close(cnv); | |
4454 free(uBuf); | |
4455 free(cBuf); | |
4456 free(offsets); | |
4457 } | |
4458 #endif | |
4459 | |
4460 static void | |
4461 TestISO_2022_CN() { | |
4462 /* test input */ | |
4463 static const uint16_t in[]={ | |
4464 /* jitterbug 951 */ | |
4465 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41,
0xFF52, | |
4466 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16,
0xFF17, | |
4467 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45,
0xFF52, | |
4468 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E,
0xFF45, | |
4469 0x0020, 0x0045, 0x004e, 0x0044, | |
4470 /**/ | |
4471 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D,
0x000A, | |
4472 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D,
0x000A, | |
4473 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D,
0x000A, | |
4474 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D,
0x000A, | |
4475 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D,
0x000A, | |
4476 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D,
0x000A, | |
4477 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D,
0x000A, | |
4478 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D,
0x000A, | |
4479 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D,
0x000A, | |
4480 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D,
0x000A, | |
4481 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D,
0x000A, | |
4482 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D,
0x000A, | |
4483 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D,
0x000A, | |
4484 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D,
0x000A, | |
4485 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D,
0x000A, | |
4486 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485,
0x2486, | |
4487 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D,
0x000A, | |
4488 | |
4489 }; | |
4490 const UChar* uSource; | |
4491 const UChar* uSourceLimit; | |
4492 const char* cSource; | |
4493 const char* cSourceLimit; | |
4494 UChar *uTargetLimit =NULL; | |
4495 UChar *uTarget; | |
4496 char *cTarget; | |
4497 const char *cTargetLimit; | |
4498 char *cBuf; | |
4499 UChar *uBuf,*test; | |
4500 int32_t uBufSize = 180; | |
4501 UErrorCode errorCode=U_ZERO_ERROR; | |
4502 UConverter *cnv; | |
4503 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); | |
4504 int32_t* myOff= offsets; | |
4505 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); | |
4506 if(U_FAILURE(errorCode)) { | |
4507 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(er
rorCode)); | |
4508 return; | |
4509 } | |
4510 | |
4511 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); | |
4512 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); | |
4513 uSource = (const UChar*)in; | |
4514 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); | |
4515 cTarget = cBuf; | |
4516 cTargetLimit = cBuf +uBufSize*5; | |
4517 uTarget = uBuf; | |
4518 uTargetLimit = uBuf+ uBufSize*5; | |
4519 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,T
RUE, &errorCode); | |
4520 if(U_FAILURE(errorCode)){ | |
4521 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(er
rorCode)); | |
4522 return; | |
4523 } | |
4524 cSource = cBuf; | |
4525 cSourceLimit =cTarget; | |
4526 test =uBuf; | |
4527 myOff=offsets; | |
4528 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&e
rrorCode); | |
4529 if(U_FAILURE(errorCode)){ | |
4530 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(erro
rCode)); | |
4531 return; | |
4532 } | |
4533 uSource = (const UChar*)in; | |
4534 while(uSource<uSourceLimit){ | |
4535 if(*test!=*uSource){ | |
4536 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test)
; | |
4537 } | |
4538 else{ | |
4539 log_verbose(" Got: \\u%04X\n",(int)*test) ; | |
4540 } | |
4541 uSource++; | |
4542 test++; | |
4543 } | |
4544 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); | |
4545 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4546 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4547 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); | |
4548 TestJitterbug930("csISO2022CN"); | |
4549 /*Test for the condition where there is an invalid character*/ | |
4550 ucnv_reset(cnv); | |
4551 { | |
4552 static const uint8_t source2[]={0x0e,0x24,0x053}; | |
4553 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); | |
4554 } | |
4555 | |
4556 ucnv_close(cnv); | |
4557 free(uBuf); | |
4558 free(cBuf); | |
4559 free(offsets); | |
4560 } | |
4561 | |
4562 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallb
ackReason is UCNV_IRREGULAR */ | |
4563 typedef struct { | |
4564 const char * converterName; | |
4565 const char * inputText; | |
4566 int inputTextLength; | |
4567 } EmptySegmentTest; | |
4568 | |
4569 /* Callback for TestJitterbug6175, should only get called for empty segment erro
rs */ | |
4570 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUn
icodeArgs *toArgs, const char* codeUnits, | |
4571 int32_t length, UConverterCallbackR
eason reason, UErrorCode * err ) { | |
4572 if (reason > UCNV_IRREGULAR) { | |
4573 return; | |
4574 } | |
4575 if (reason != UCNV_IRREGULAR) { | |
4576 log_err("toUnicode callback invoked for empty segment but reason is not
UCNV_IRREGULAR\n"); | |
4577 } | |
4578 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ | |
4579 *err = U_ZERO_ERROR; | |
4580 ucnv_cbToUWriteSub(toArgs,0,err); | |
4581 } | |
4582 | |
4583 enum { kEmptySegmentToUCharsMax = 64 }; | |
4584 static void TestJitterbug6175(void) { | |
4585 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0
x42, 0x63, 0x64, 0x0D, 0x0A }; | |
4586 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F,
0x62, 0x0D, 0x0A }; | |
4587 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; | |
4588 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E,
0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; | |
4589 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63,
0x64 }; | |
4590 static const EmptySegmentTest emptySegmentTests[] = { | |
4591 /* converterName inputText inputTextLength */ | |
4592 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, | |
4593 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, | |
4594 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, | |
4595 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, | |
4596 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, | |
4597 /* terminator: */ | |
4598 { NULL, NULL, 0, } | |
4599 }; | |
4600 const EmptySegmentTest * testPtr; | |
4601 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr)
{ | |
4602 UErrorCode err = U_ZERO_ERROR; | |
4603 UConverter * cnv = ucnv_open(testPtr->converterName, &err); | |
4604 if (U_FAILURE(err)) { | |
4605 log_data_err("Unable to open %s converter: %s\n", testPtr->converter
Name, u_errorName(err)); | |
4606 return; | |
4607 } | |
4608 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NU
LL, &err); | |
4609 if (U_FAILURE(err)) { | |
4610 log_data_err("Unable to setToUCallBack for %s converter: %s\n", test
Ptr->converterName, u_errorName(err)); | |
4611 ucnv_close(cnv); | |
4612 return; | |
4613 } | |
4614 { | |
4615 UChar toUChars[kEmptySegmentToUCharsMax]; | |
4616 UChar * toUCharsPtr = toUChars; | |
4617 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax
; | |
4618 const char * inCharsPtr = testPtr->inputText; | |
4619 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; | |
4620 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inChar
sLimit, NULL, TRUE, &err); | |
4621 } | |
4622 ucnv_close(cnv); | |
4623 } | |
4624 } | |
4625 | |
4626 static void | |
4627 TestEBCDIC_STATEFUL() { | |
4628 /* test input */ | |
4629 static const uint8_t in[]={ | |
4630 0x61, | |
4631 0x1a, | |
4632 0x0f, 0x4b, | |
4633 0x42, | |
4634 0x40, | |
4635 0x36, | |
4636 }; | |
4637 | |
4638 /* expected test results */ | |
4639 static const int32_t results[]={ | |
4640 /* number of bytes read, code point */ | |
4641 1, 0x002f, | |
4642 1, 0x0092, | |
4643 2, 0x002e, | |
4644 1, 0xff62, | |
4645 1, 0x0020, | |
4646 1, 0x0096, | |
4647 | |
4648 }; | |
4649 static const uint8_t in2[]={ | |
4650 0x0f, | |
4651 0xa1, | |
4652 0x01 | |
4653 }; | |
4654 | |
4655 /* expected test results */ | |
4656 static const int32_t results2[]={ | |
4657 /* number of bytes read, code point */ | |
4658 2, 0x203E, | |
4659 1, 0x0001, | |
4660 }; | |
4661 | |
4662 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); | |
4663 UErrorCode errorCode=U_ZERO_ERROR; | |
4664 UConverter *cnv=ucnv_open("ibm-930", &errorCode); | |
4665 if(U_FAILURE(errorCode)) { | |
4666 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n"
, u_errorName(errorCode)); | |
4667 return; | |
4668 } | |
4669 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); | |
4670 ucnv_reset(cnv); | |
4671 /* Test the condition when source >= sourceLimit */ | |
4672 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLi
mit <= source"); | |
4673 ucnv_reset(cnv); | |
4674 /*Test for the condition where source > sourcelimit after consuming the shif
t chracter */ | |
4675 { | |
4676 static const uint8_t source1[]={0x0f}; | |
4677 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeo
f(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); | |
4678 } | |
4679 /*Test for the condition where there is an invalid character*/ | |
4680 ucnv_reset(cnv); | |
4681 { | |
4682 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; | |
4683 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeo
f(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); | |
4684 } | |
4685 ucnv_reset(cnv); | |
4686 source=(const char*)in2; | |
4687 limit=(const char*)in2+sizeof(in2); | |
4688 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); | |
4689 ucnv_close(cnv); | |
4690 | |
4691 } | |
4692 | |
4693 static void | |
4694 TestGB18030() { | |
4695 /* test input */ | |
4696 static const uint8_t in[]={ | |
4697 0x24, | |
4698 0x7f, | |
4699 0x81, 0x30, 0x81, 0x30, | |
4700 0xa8, 0xbf, | |
4701 0xa2, 0xe3, | |
4702 0xd2, 0xbb, | |
4703 0x82, 0x35, 0x8f, 0x33, | |
4704 0x84, 0x31, 0xa4, 0x39, | |
4705 0x90, 0x30, 0x81, 0x30, | |
4706 0xe3, 0x32, 0x9a, 0x35 | |
4707 #if 0 | |
4708 /* | |
4709 * Feature removed markus 2000-oct-26 | |
4710 * Only some codepages must match surrogate pairs into supplementary cod
e points - | |
4711 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvm
bcs.c . | |
4712 * GB 18030 provides direct encodings for supplementary code points, the
refore | |
4713 * it must not combine two single-encoded surrogates into one code point
. | |
4714 */ | |
4715 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded sur
rogates */ | |
4716 #endif | |
4717 }; | |
4718 | |
4719 /* expected test results */ | |
4720 static const int32_t results[]={ | |
4721 /* number of bytes read, code point */ | |
4722 1, 0x24, | |
4723 1, 0x7f, | |
4724 4, 0x80, | |
4725 2, 0x1f9, | |
4726 2, 0x20ac, | |
4727 2, 0x4e00, | |
4728 4, 0x9fa6, | |
4729 4, 0xffff, | |
4730 4, 0x10000, | |
4731 4, 0x10ffff | |
4732 #if 0 | |
4733 /* Feature removed. See comment above. */ | |
4734 8, 0x10000 | |
4735 #endif | |
4736 }; | |
4737 | |
4738 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ | |
4739 UErrorCode errorCode=U_ZERO_ERROR; | |
4740 UConverter *cnv=ucnv_open("gb18030", &errorCode); | |
4741 if(U_FAILURE(errorCode)) { | |
4742 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(err
orCode)); | |
4743 return; | |
4744 } | |
4745 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "
gb18030"); | |
4746 ucnv_close(cnv); | |
4747 } | |
4748 | |
4749 static void | |
4750 TestLMBCS() { | |
4751 /* LMBCS-1 string */ | |
4752 static const uint8_t pszLMBCS[]={ | |
4753 0x61, | |
4754 0x01, 0x29, | |
4755 0x81, | |
4756 0xA0, | |
4757 0x0F, 0x27, | |
4758 0x0F, 0x91, | |
4759 0x14, 0x0a, 0x74, | |
4760 0x14, 0xF6, 0x02, | |
4761 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ | |
4762 0x10, 0x88, 0xA0, | |
4763 }; | |
4764 | |
4765 /* Unicode UChar32 equivalents */ | |
4766 static const UChar32 pszUnicode32[]={ | |
4767 /* code point */ | |
4768 0x00000061, | |
4769 0x00002013, | |
4770 0x000000FC, | |
4771 0x000000E1, | |
4772 0x00000007, | |
4773 0x00000091, | |
4774 0x00000a74, | |
4775 0x00000200, | |
4776 0x00023456, /* code point for surrogate pair */ | |
4777 0x00005516 | |
4778 }; | |
4779 | |
4780 /* Unicode UChar equivalents */ | |
4781 static const UChar pszUnicode[]={ | |
4782 /* code point */ | |
4783 0x0061, | |
4784 0x2013, | |
4785 0x00FC, | |
4786 0x00E1, | |
4787 0x0007, | |
4788 0x0091, | |
4789 0x0a74, | |
4790 0x0200, | |
4791 0xD84D, /* low surrogate */ | |
4792 0xDC56, /* high surrogate */ | |
4793 0x5516 | |
4794 }; | |
4795 | |
4796 /* expected test results */ | |
4797 static const int offsets32[]={ | |
4798 /* number of bytes read, code point */ | |
4799 0, | |
4800 1, | |
4801 3, | |
4802 4, | |
4803 5, | |
4804 7, | |
4805 9, | |
4806 12, | |
4807 15, | |
4808 21, | |
4809 24 | |
4810 }; | |
4811 | |
4812 /* expected test results */ | |
4813 static const int offsets[]={ | |
4814 /* number of bytes read, code point */ | |
4815 0, | |
4816 1, | |
4817 3, | |
4818 4, | |
4819 5, | |
4820 7, | |
4821 9, | |
4822 12, | |
4823 15, | |
4824 18, | |
4825 21, | |
4826 24 | |
4827 }; | |
4828 | |
4829 | |
4830 UConverter *cnv; | |
4831 | |
4832 #define NAME_LMBCS_1 "LMBCS-1" | |
4833 #define NAME_LMBCS_2 "LMBCS-2" | |
4834 | |
4835 | |
4836 /* Some basic open/close/property tests on some LMBCS converters */ | |
4837 { | |
4838 | |
4839 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ | |
4840 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ | |
4841 char get_subchars [1]; | |
4842 const char * get_name; | |
4843 UConverter *cnv1; | |
4844 UConverter *cnv2; | |
4845 | |
4846 int8_t len = sizeof(get_subchars); | |
4847 | |
4848 UErrorCode errorCode=U_ZERO_ERROR; | |
4849 | |
4850 /* Open */ | |
4851 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); | |
4852 if(U_FAILURE(errorCode)) { | |
4853 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(er
rorCode)); | |
4854 return; | |
4855 } | |
4856 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); | |
4857 if(U_FAILURE(errorCode)) { | |
4858 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(er
rorCode)); | |
4859 return; | |
4860 } | |
4861 | |
4862 /* Name */ | |
4863 get_name = ucnv_getName (cnv1, &errorCode); | |
4864 if (strcmp(NAME_LMBCS_1,get_name)){ | |
4865 log_err("Unexpected converter name: %s\n", get_name); | |
4866 } | |
4867 get_name = ucnv_getName (cnv2, &errorCode); | |
4868 if (strcmp(NAME_LMBCS_2,get_name)){ | |
4869 log_err("Unexpected converter name: %s\n", get_name); | |
4870 } | |
4871 | |
4872 /* substitution chars */ | |
4873 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); | |
4874 if(U_FAILURE(errorCode)) { | |
4875 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
4876 } | |
4877 if (len!=1){ | |
4878 log_err("Unexpected length of sub chars\n"); | |
4879 } | |
4880 if (get_subchars[0] != expected_subchars[0]){ | |
4881 log_err("Unexpected value of sub chars\n"); | |
4882 } | |
4883 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); | |
4884 if(U_FAILURE(errorCode)) { | |
4885 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); | |
4886 } | |
4887 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); | |
4888 if(U_FAILURE(errorCode)) { | |
4889 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); | |
4890 } | |
4891 if (len!=1){ | |
4892 log_err("Unexpected length of sub chars\n"); | |
4893 } | |
4894 if (get_subchars[0] != new_subchars[0]){ | |
4895 log_err("Unexpected value of sub chars\n"); | |
4896 } | |
4897 ucnv_close(cnv1); | |
4898 ucnv_close(cnv2); | |
4899 | |
4900 } | |
4901 | |
4902 /* LMBCS to Unicode - offsets */ | |
4903 { | |
4904 UErrorCode errorCode=U_ZERO_ERROR; | |
4905 | |
4906 const char * pSource = (const char *)pszLMBCS; | |
4907 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
4908 | |
4909 UChar Out [sizeof(pszUnicode) + 1]; | |
4910 UChar * pOut = Out; | |
4911 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); | |
4912 | |
4913 int32_t off [sizeof(offsets)]; | |
4914 | |
4915 /* last 'offset' in expected results is just the final size. | |
4916 (Makes other tests easier). Compensate here: */ | |
4917 | |
4918 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); | |
4919 | |
4920 | |
4921 | |
4922 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ | |
4923 if(U_FAILURE(errorCode)) { | |
4924 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(er
rorCode)); | |
4925 return; | |
4926 } | |
4927 | |
4928 | |
4929 | |
4930 ucnv_toUnicode (cnv, | |
4931 &pOut, | |
4932 OutLimit, | |
4933 &pSource, | |
4934 sourceLimit, | |
4935 off, | |
4936 TRUE, | |
4937 &errorCode); | |
4938 | |
4939 | |
4940 if (memcmp(off,offsets,sizeof(offsets))) | |
4941 { | |
4942 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n
"); | |
4943 } | |
4944 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) | |
4945 { | |
4946 log_err("LMBCS->Uni: Calculated codepoints do not match expected result
s\n"); | |
4947 } | |
4948 ucnv_close(cnv); | |
4949 } | |
4950 { | |
4951 /* LMBCS to Unicode - getNextUChar */ | |
4952 const char * sourceStart; | |
4953 const char *source=(const char *)pszLMBCS; | |
4954 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); | |
4955 const UChar32 *results= pszUnicode32; | |
4956 const int *off = offsets32; | |
4957 | |
4958 UErrorCode errorCode=U_ZERO_ERROR; | |
4959 UChar32 uniChar; | |
4960 | |
4961 cnv=ucnv_open("LMBCS-1", &errorCode); | |
4962 if(U_FAILURE(errorCode)) { | |
4963 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(
errorCode)); | |
4964 return; | |
4965 } | |
4966 else | |
4967 { | |
4968 | |
4969 while(source<limit) { | |
4970 sourceStart=source; | |
4971 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]),
&errorCode); | |
4972 if(U_FAILURE(errorCode)) { | |
4973 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorNam
e(errorCode)); | |
4974 break; | |
4975 } else if(source-sourceStart != off[1] - off[0] || uniChar != *resul
ts) { | |
4976 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, sh
ould have been %lx from %d bytes.\n", | |
4977 uniChar, (source-sourceStart), *results, *off); | |
4978 break; | |
4979 } | |
4980 results++; | |
4981 off++; | |
4982 } | |
4983 } | |
4984 ucnv_close(cnv); | |
4985 } | |
4986 { /* test locale & optimization group operations: Unicode to LMBCS */ | |
4987 | |
4988 UErrorCode errorCode=U_ZERO_ERROR; | |
4989 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); | |
4990 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); | |
4991 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); | |
4992 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ | |
4993 const UChar * pUniOut = uniString; | |
4994 UChar * pUniIn = uniString; | |
4995 uint8_t lmbcsString [4]; | |
4996 const char * pLMBCSOut = (const char *)lmbcsString; | |
4997 char * pLMBCSIn = (char *)lmbcsString; | |
4998 | |
4999 /* 0192 (hook) converts to both group 3 & group 1. input locale should dif
ferentiate */ | |
5000 ucnv_fromUnicode (cnv16he, | |
5001 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsS
tring[0])), | |
5002 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), | |
5003 NULL, 1, &errorCode); | |
5004 | |
5005 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) | |
5006 { | |
5007 log_err("LMBCS-16,locale=he gives unexpected translation\n"); | |
5008 } | |
5009 | |
5010 pLMBCSIn= (char *)lmbcsString; | |
5011 pUniOut = uniString; | |
5012 ucnv_fromUnicode (cnv01us, | |
5013 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsStri
ng)/sizeof(lmbcsString[0])), | |
5014 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0
]), | |
5015 NULL, 1, &errorCode); | |
5016 | |
5017 if (lmbcsString[0] != 0x9F) | |
5018 { | |
5019 log_err("LMBCS-1,locale=US gives unexpected translation\n"); | |
5020 } | |
5021 | |
5022 /* single byte char from mbcs char set */ | |
5023 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ | |
5024 pLMBCSOut = (const char *)lmbcsString; | |
5025 pUniIn = uniString; | |
5026 ucnv_toUnicode (cnv16jp, | |
5027 &pUniIn, pUniIn + 1, | |
5028 &pLMBCSOut, (pLMBCSOut + 1), | |
5029 NULL, 1, &errorCode); | |
5030 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) | |
5031 { | |
5032 log_err("Unexpected results from LMBCS-16 single byte char\n"); | |
5033 } | |
5034 /* convert to group 1: should be 3 bytes */ | |
5035 pLMBCSIn = (char *)lmbcsString; | |
5036 pUniOut = uniString; | |
5037 ucnv_fromUnicode (cnv01us, | |
5038 &pLMBCSIn, (const char *)(pLMBCSIn + 3), | |
5039 &pUniOut, pUniOut + 1, | |
5040 NULL, 1, &errorCode); | |
5041 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUn
iOut != uniString+1 | |
5042 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] !
= 0xAE) | |
5043 { | |
5044 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); | |
5045 } | |
5046 pLMBCSOut = (const char *)lmbcsString; | |
5047 pUniIn = uniString; | |
5048 ucnv_toUnicode (cnv01us, | |
5049 &pUniIn, pUniIn + 1, | |
5050 &pLMBCSOut, (const char *)(pLMBCSOut + 3), | |
5051 NULL, 1, &errorCode); | |
5052 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pU
niIn != uniString+1 || uniString[0] != 0xFF6E) | |
5053 { | |
5054 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); | |
5055 } | |
5056 pLMBCSIn = (char *)lmbcsString; | |
5057 pUniOut = uniString; | |
5058 ucnv_fromUnicode (cnv16jp, | |
5059 &pLMBCSIn, (const char *)(pLMBCSIn + 1), | |
5060 &pUniOut, pUniOut + 1, | |
5061 NULL, 1, &errorCode); | |
5062 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUn
iOut != uniString+1 || lmbcsString[0] != 0xAE) | |
5063 { | |
5064 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); | |
5065 } | |
5066 ucnv_close(cnv16he); | |
5067 ucnv_close(cnv16jp); | |
5068 ucnv_close(cnv01us); | |
5069 } | |
5070 { | |
5071 /* Small source buffer testing, LMBCS -> Unicode */ | |
5072 | |
5073 UErrorCode errorCode=U_ZERO_ERROR; | |
5074 | |
5075 const char * pSource = (const char *)pszLMBCS; | |
5076 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); | |
5077 int codepointCount = 0; | |
5078 | |
5079 UChar Out [sizeof(pszUnicode) + 1]; | |
5080 UChar * pOut = Out; | |
5081 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); | |
5082 | |
5083 | |
5084 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); | |
5085 if(U_FAILURE(errorCode)) { | |
5086 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(error
Code)); | |
5087 return; | |
5088 } | |
5089 | |
5090 | |
5091 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) | |
5092 { | |
5093 ucnv_toUnicode (cnv, | |
5094 &pOut, | |
5095 OutLimit, | |
5096 &pSource, | |
5097 (pSource+1), /* claim that this is a 1- byte buffer */ | |
5098 NULL, | |
5099 FALSE, /* FALSE means there might be more chars in the next bu
ffer */ | |
5100 &errorCode); | |
5101 | |
5102 if (U_SUCCESS (errorCode)) | |
5103 { | |
5104 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount
+1]) | |
5105 { | |
5106 /* we are on to the next code point: check value */ | |
5107 | |
5108 if (Out[0] != pszUnicode[codepointCount]){ | |
5109 log_err("LMBCS->Uni result %lx should have been %lx \n", | |
5110 Out[0], pszUnicode[codepointCount]); | |
5111 } | |
5112 | |
5113 pOut = Out; /* reset for accumulating next code point */ | |
5114 codepointCount++; | |
5115 } | |
5116 } | |
5117 else | |
5118 { | |
5119 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorC
ode)); | |
5120 } | |
5121 } | |
5122 { | |
5123 /* limits & surrogate error testing */ | |
5124 char LIn [sizeof(pszLMBCS)]; | |
5125 const char * pLIn = LIn; | |
5126 | |
5127 char LOut [sizeof(pszLMBCS)]; | |
5128 char * pLOut = LOut; | |
5129 | |
5130 UChar UOut [sizeof(pszUnicode)]; | |
5131 UChar * pUOut = UOut; | |
5132 | |
5133 UChar UIn [sizeof(pszUnicode)]; | |
5134 const UChar * pUIn = UIn; | |
5135 | |
5136 int32_t off [sizeof(offsets)]; | |
5137 UChar32 uniChar; | |
5138 | |
5139 errorCode=U_ZERO_ERROR; | |
5140 | |
5141 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERRO
R */ | |
5142 pUIn++; | |
5143 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &erro
rCode); | |
5144 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5145 { | |
5146 log_err("Unexpected Error on negative source request to ucnv_fromUni
code: %s\n", u_errorName(errorCode)); | |
5147 } | |
5148 pUIn--; | |
5149 | |
5150 errorCode=U_ZERO_ERROR; | |
5151 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(
pLIn-1),off,FALSE, &errorCode); | |
5152 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5153 { | |
5154 log_err("Unexpected Error on negative source request to ucnv_toUnico
de: %s\n", u_errorName(errorCode)); | |
5155 } | |
5156 errorCode=U_ZERO_ERROR; | |
5157 | |
5158 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(p
LIn-1), &errorCode); | |
5159 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) | |
5160 { | |
5161 log_err("Unexpected Error on negative source request to ucnv_getNext
UChar: %s\n", u_errorName(errorCode)); | |
5162 } | |
5163 errorCode=U_ZERO_ERROR; | |
5164 | |
5165 /* 0 byte source request - no error, no pointer movement */ | |
5166 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)p
LIn,off,FALSE, &errorCode); | |
5167 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); | |
5168 if(U_FAILURE(errorCode)) { | |
5169 log_err("0 byte source request: unexpected error: %s\n", u_errorName
(errorCode)); | |
5170 } | |
5171 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn
)) | |
5172 { | |
5173 log_err("Unexpected pointer move in 0 byte source request \n"); | |
5174 } | |
5175 /*0 byte source request - GetNextUChar : error & value == fffe or ffff
*/ | |
5176 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pL
In, &errorCode); | |
5177 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) | |
5178 { | |
5179 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUC
har: %s\n", u_errorName(errorCode)); | |
5180 } | |
5181 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ | |
5182 { | |
5183 log_err("Unexpected value on 0-byte source request to ucnv_getnextUC
har \n"); | |
5184 } | |
5185 errorCode = U_ZERO_ERROR; | |
5186 | |
5187 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ | |
5188 | |
5189 pUIn = pszUnicode; | |
5190 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnic
ode)/sizeof(UChar),off,FALSE, &errorCode); | |
5191 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4]
|| pUIn != pszUnicode+4 ) | |
5192 { | |
5193 log_err("Unexpected results on out of target room to ucnv_fromUnicod
e\n"); | |
5194 } | |
5195 | |
5196 errorCode = U_ZERO_ERROR; | |
5197 | |
5198 pLIn = (const char *)pszLMBCS; | |
5199 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FA
LSE, &errorCode); | |
5200 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn !
= (const char *)pszLMBCS+offsets[4]) | |
5201 { | |
5202 log_err("Unexpected results on out of target room to ucnv_toUnicode\
n"); | |
5203 } | |
5204 | |
5205 /* unpaired or chopped LMBCS surrogates */ | |
5206 | |
5207 /* OK high surrogate, Low surrogate is chopped */ | |
5208 LIn [0] = (char)0x14; | |
5209 LIn [1] = (char)0xD8; | |
5210 LIn [2] = (char)0x01; | |
5211 LIn [3] = (char)0x14; | |
5212 LIn [4] = (char)0xDC; | |
5213 pLIn = LIn; | |
5214 errorCode = U_ZERO_ERROR; | |
5215 pUOut = UOut; | |
5216 | |
5217 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &er
rorCode); | |
5218 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5219 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) | |
5220 { | |
5221 log_err("Unexpected results on chopped low surrogate\n"); | |
5222 } | |
5223 | |
5224 /* chopped at surrogate boundary */ | |
5225 LIn [0] = (char)0x14; | |
5226 LIn [1] = (char)0xD8; | |
5227 LIn [2] = (char)0x01; | |
5228 pLIn = LIn; | |
5229 errorCode = U_ZERO_ERROR; | |
5230 pUOut = UOut; | |
5231 | |
5232 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); | |
5233 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || p
LIn != LIn + 3) | |
5234 { | |
5235 log_err("Unexpected results on chopped at surrogate boundary \n"); | |
5236 } | |
5237 | |
5238 /* unpaired surrogate plus valid Unichar */ | |
5239 LIn [0] = (char)0x14; | |
5240 LIn [1] = (char)0xD8; | |
5241 LIn [2] = (char)0x01; | |
5242 LIn [3] = (char)0x14; | |
5243 LIn [4] = (char)0xC9; | |
5244 LIn [5] = (char)0xD0; | |
5245 pLIn = LIn; | |
5246 errorCode = U_ZERO_ERROR; | |
5247 pUOut = UOut; | |
5248 | |
5249 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); | |
5250 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || p
UOut != UOut + 2 || pLIn != LIn + 6) | |
5251 { | |
5252 log_err("Unexpected results after unpaired surrogate plus valid Unic
har \n"); | |
5253 } | |
5254 | |
5255 /* unpaired surrogate plus chopped Unichar */ | |
5256 LIn [0] = (char)0x14; | |
5257 LIn [1] = (char)0xD8; | |
5258 LIn [2] = (char)0x01; | |
5259 LIn [3] = (char)0x14; | |
5260 LIn [4] = (char)0xC9; | |
5261 | |
5262 pLIn = LIn; | |
5263 errorCode = U_ZERO_ERROR; | |
5264 pUOut = UOut; | |
5265 | |
5266 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5267 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 5) | |
5268 { | |
5269 log_err("Unexpected results after unpaired surrogate plus chopped Un
ichar \n"); | |
5270 } | |
5271 | |
5272 /* unpaired surrogate plus valid non-Unichar */ | |
5273 LIn [0] = (char)0x14; | |
5274 LIn [1] = (char)0xD8; | |
5275 LIn [2] = (char)0x01; | |
5276 LIn [3] = (char)0x0F; | |
5277 LIn [4] = (char)0x3B; | |
5278 | |
5279 pLIn = LIn; | |
5280 errorCode = U_ZERO_ERROR; | |
5281 pUOut = UOut; | |
5282 | |
5283 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); | |
5284 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUO
ut != UOut + 2 || pLIn != LIn + 5) | |
5285 { | |
5286 log_err("Unexpected results after unpaired surrogate plus valid non-
Unichar\n"); | |
5287 } | |
5288 | |
5289 /* unpaired surrogate plus chopped non-Unichar */ | |
5290 LIn [0] = (char)0x14; | |
5291 LIn [1] = (char)0xD8; | |
5292 LIn [2] = (char)0x01; | |
5293 LIn [3] = (char)0x0F; | |
5294 | |
5295 pLIn = LIn; | |
5296 errorCode = U_ZERO_ERROR; | |
5297 pUOut = UOut; | |
5298 | |
5299 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char
**)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); | |
5300 | |
5301 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut !
= UOut + 1 || pLIn != LIn + 4) | |
5302 { | |
5303 log_err("Unexpected results after unpaired surrogate plus chopped no
n-Unichar\n"); | |
5304 } | |
5305 } | |
5306 } | |
5307 ucnv_close(cnv); /* final cleanup */ | |
5308 } | |
5309 | |
5310 | |
5311 static void TestJitterbug255() | |
5312 { | |
5313 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x0
0 }; | |
5314 const char *testBuffer = (const char *)testBytes; | |
5315 const char *testEnd = (const char *)testBytes + sizeof(testBytes); | |
5316 UErrorCode status = U_ZERO_ERROR; | |
5317 /*UChar32 result;*/ | |
5318 UConverter *cnv = 0; | |
5319 | |
5320 cnv = ucnv_open("shift-jis", &status); | |
5321 if (U_FAILURE(status) || cnv == 0) { | |
5322 log_data_err("Failed to open the converter for SJIS.\n"); | |
5323 return; | |
5324 } | |
5325 while (testBuffer != testEnd) | |
5326 { | |
5327 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); | |
5328 if (U_FAILURE(status)) | |
5329 { | |
5330 log_err("Failed to convert the next UChar for SJIS.\n"); | |
5331 break; | |
5332 } | |
5333 } | |
5334 ucnv_close(cnv); | |
5335 } | |
5336 | |
5337 static void TestEBCDICUS4XML() | |
5338 { | |
5339 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; | |
5340 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; | |
5341 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; | |
5342 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; | |
5343 char target_x[] = {0x00, 0x00, 0x00, 0x00}; | |
5344 UChar *unicodes = unicodes_x; | |
5345 const UChar *toUnicodeMaps = toUnicodeMaps_x; | |
5346 char *target = target_x; | |
5347 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; | |
5348 UErrorCode status = U_ZERO_ERROR; | |
5349 UConverter *cnv = 0; | |
5350 | |
5351 cnv = ucnv_open("ebcdic-xml-us", &status); | |
5352 if (U_FAILURE(status) || cnv == 0) { | |
5353 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); | |
5354 return; | |
5355 } | |
5356 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines
+3, NULL, TRUE, &status); | |
5357 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3)
!= 0) { | |
5358 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
5359 u_errorName(status)); | |
5360 printUSeqErr(unicodes_x, 3); | |
5361 printUSeqErr(toUnicodeMaps, 3); | |
5362 } | |
5363 status = U_ZERO_ERROR; | |
5364 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUn
icodeMaps+3, NULL, TRUE, &status); | |
5365 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) !
= 0) { | |
5366 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", | |
5367 u_errorName(status)); | |
5368 printSeqErr((const unsigned char*)target_x, 3); | |
5369 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); | |
5370 } | |
5371 ucnv_close(cnv); | |
5372 } | |
5373 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ | |
5374 | |
5375 #if !UCONFIG_NO_COLLATION | |
5376 | |
5377 static void TestJitterbug981(){ | |
5378 const UChar* rules; | |
5379 int32_t rules_length, target_cap, bytes_needed, buff_size; | |
5380 UErrorCode status = U_ZERO_ERROR; | |
5381 UConverter *utf8cnv; | |
5382 UCollator* myCollator; | |
5383 char *buff; | |
5384 int numNeeded=0; | |
5385 utf8cnv = ucnv_open ("utf8", &status); | |
5386 if(U_FAILURE(status)){ | |
5387 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(statu
s)); | |
5388 return; | |
5389 } | |
5390 myCollator = ucol_open("zh", &status); | |
5391 if(U_FAILURE(status)){ | |
5392 log_data_err("Could not open collator for zh locale. Error: %s\n", u_err
orName(status)); | |
5393 ucnv_close(utf8cnv); | |
5394 return; | |
5395 } | |
5396 | |
5397 rules = ucol_getRules(myCollator, &rules_length); | |
5398 if(rules_length == 0) { | |
5399 log_data_err("missing zh tailoring rule string\n"); | |
5400 ucol_close(myCollator); | |
5401 ucnv_close(utf8cnv); | |
5402 return; | |
5403 } | |
5404 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); | |
5405 buff = malloc(buff_size); | |
5406 | |
5407 target_cap = 0; | |
5408 do { | |
5409 ucnv_reset(utf8cnv); | |
5410 status = U_ZERO_ERROR; | |
5411 if(target_cap >= buff_size) { | |
5412 log_err("wanted %d bytes, only %d available\n", target_cap, buff_siz
e); | |
5413 break; | |
5414 } | |
5415 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, | |
5416 rules, rules_length, &status); | |
5417 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
5418 if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
5419 log_err("ucnv_fromUChars returns different values for required capac
ity in pre-flight and conversion modes"); | |
5420 break; | |
5421 } | |
5422 numNeeded = bytes_needed; | |
5423 } while (status == U_BUFFER_OVERFLOW_ERROR); | |
5424 ucol_close(myCollator); | |
5425 ucnv_close(utf8cnv); | |
5426 free(buff); | |
5427 } | |
5428 | |
5429 #endif | |
5430 | |
5431 #if !UCONFIG_NO_LEGACY_CONVERSION | |
5432 static void TestJitterbug1293(){ | |
5433 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4,
0x30D7,0x000}; | |
5434 char target[256]; | |
5435 UErrorCode status = U_ZERO_ERROR; | |
5436 UConverter* conv=NULL; | |
5437 int32_t target_cap, bytes_needed, numNeeded = 0; | |
5438 conv = ucnv_open("shift-jis",&status); | |
5439 if(U_FAILURE(status)){ | |
5440 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(
status)); | |
5441 return; | |
5442 } | |
5443 | |
5444 do{ | |
5445 target_cap =0; | |
5446 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status
); | |
5447 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; | |
5448 if(numNeeded!=0 && numNeeded!= bytes_needed){ | |
5449 log_err("ucnv_fromUChars returns different values for required capacit
y in pre-flight and conversion modes"); | |
5450 } | |
5451 numNeeded = bytes_needed; | |
5452 } while (status == U_BUFFER_OVERFLOW_ERROR); | |
5453 if(U_FAILURE(status)){ | |
5454 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(stat
us)); | |
5455 return; | |
5456 } | |
5457 ucnv_close(conv); | |
5458 } | |
5459 #endif | |
5460 | |
5461 static void TestJB5275_1(){ | |
5462 | |
5463 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ | |
5464 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test
*/ | |
5465 /* Switch script: */ | |
5466 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengal
i test */ | |
5467 "\x3B\xB3\x0A" /* Easy characters - new line, so
should default!*/ | |
5468 "\xEF\x40\x3B\xB3\x0A"; | |
5469 static const UChar expected[] ={ | |
5470 0x003b, 0x0a15, 0x000a, /* Easy characters */ | |
5471 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi
test */ | |
5472 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali
*/ | |
5473 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should def
ault!*/ | |
5474 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ | |
5475 }; | |
5476 | |
5477 UErrorCode status = U_ZERO_ERROR; | |
5478 UConverter* conv = ucnv_open("iscii-gur", &status); | |
5479 UChar dest[100] = {'\0'}; | |
5480 UChar* target = dest; | |
5481 UChar* targetLimit = dest+100; | |
5482 const char* source = data; | |
5483 const char* sourceLimit = data+strlen(data); | |
5484 const UChar* exp = expected; | |
5485 | |
5486 if (U_FAILURE(status)) { | |
5487 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n",
u_errorName(status)); | |
5488 return; | |
5489 } | |
5490 | |
5491 log_verbose("Testing switching back to default script when new line is encou
ntered.\n"); | |
5492 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); | |
5493 if(U_FAILURE(status)){ | |
5494 log_err("conversion failed: %s \n", u_errorName(status)); | |
5495 } | |
5496 targetLimit = target; | |
5497 target = dest; | |
5498 printUSeq(target, targetLimit-target); | |
5499 while(target<targetLimit){ | |
5500 if(*exp!=*target){ | |
5501 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); | |
5502 } | |
5503 target++; | |
5504 exp++; | |
5505 } | |
5506 ucnv_close(conv); | |
5507 } | |
5508 | |
5509 static void TestJB5275(){ | |
5510 static const char* data = | |
5511 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41
*/ | |
5512 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41
*/ | |
5513 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsuppor
ted sequence \xEF\x41 */ | |
5514 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ | |
5515 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ | |
5516 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ | |
5517 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ | |
5518 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ | |
5519 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ | |
5520 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; | |
5521 static const UChar expected[] ={ | |
5522 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test
*/ | |
5523 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati
test */ | |
5524 0x0038, 0x0C95, 0x000A, /* Kannada test */ | |
5525 0x0039, 0x0D15, 0x000A, /* Malayalam test */ | |
5526 0x003A, 0x0A95, 0x000A, /* Gujarati test */ | |
5527 0x003B, 0x0A15, 0x000A, /* Punjabi test */ | |
5528 }; | |
5529 | |
5530 UErrorCode status = U_ZERO_ERROR; | |
5531 UConverter* conv = ucnv_open("iscii", &status); | |
5532 UChar dest[100] = {'\0'}; | |
5533 UChar* target = dest; | |
5534 UChar* targetLimit = dest+100; | |
5535 const char* source = data; | |
5536 const char* sourceLimit = data+strlen(data); | |
5537 const UChar* exp = expected; | |
5538 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE,
&status); | |
5539 if(U_FAILURE(status)){ | |
5540 log_data_err("conversion failed: %s \n", u_errorName(status)); | |
5541 } | |
5542 targetLimit = target; | |
5543 target = dest; | |
5544 | |
5545 printUSeq(target, targetLimit-target); | |
5546 | |
5547 while(target<targetLimit){ | |
5548 if(*exp!=*target){ | |
5549 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n
", *exp, *target); | |
5550 } | |
5551 target++; | |
5552 exp++; | |
5553 } | |
5554 ucnv_close(conv); | |
5555 } | |
5556 | |
5557 static void | |
5558 TestIsFixedWidth() { | |
5559 UErrorCode status = U_ZERO_ERROR; | |
5560 UConverter *cnv = NULL; | |
5561 int32_t i; | |
5562 | |
5563 const char *fixedWidth[] = { | |
5564 "US-ASCII", | |
5565 "UTF32", | |
5566 "ibm-5478_P100-1995" | |
5567 }; | |
5568 | |
5569 const char *notFixedWidth[] = { | |
5570 "GB18030", | |
5571 "UTF8", | |
5572 "windows-949-2000", | |
5573 "UTF16" | |
5574 }; | |
5575 | |
5576 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) { | |
5577 cnv = ucnv_open(fixedWidth[i], &status); | |
5578 if (cnv == NULL || U_FAILURE(status)) { | |
5579 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_er
rorName(status)); | |
5580 continue; | |
5581 } | |
5582 | |
5583 if (!ucnv_isFixedWidth(cnv, &status)) { | |
5584 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedW
idth[i]); | |
5585 } | |
5586 ucnv_close(cnv); | |
5587 } | |
5588 | |
5589 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) { | |
5590 cnv = ucnv_open(notFixedWidth[i], &status); | |
5591 if (cnv == NULL || U_FAILURE(status)) { | |
5592 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u
_errorName(status)); | |
5593 continue; | |
5594 } | |
5595 | |
5596 if (ucnv_isFixedWidth(cnv, &status)) { | |
5597 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", not
FixedWidth[i]); | |
5598 } | |
5599 ucnv_close(cnv); | |
5600 } | |
5601 } | |
OLD | NEW |