OLD | NEW |
| (Empty) |
1 /******************************************************************** | |
2 * COPYRIGHT: | |
3 * Copyright (c) 1997-2013, International Business Machines Corporation and | |
4 * others. All Rights Reserved. | |
5 ********************************************************************/ | |
6 /* | |
7 ******************************************************************************** | |
8 * File NCCBTST.C | |
9 * | |
10 * Modification History: | |
11 * Name Description | |
12 * Madhu Katragadda 7/21/1999 Testing error callback routines | |
13 ******************************************************************************** | |
14 */ | |
15 #include <stdio.h> | |
16 #include <stdlib.h> | |
17 #include <string.h> | |
18 #include <ctype.h> | |
19 #include "cstring.h" | |
20 #include "unicode/uloc.h" | |
21 #include "unicode/ucnv.h" | |
22 #include "unicode/ucnv_err.h" | |
23 #include "cintltst.h" | |
24 #include "unicode/utypes.h" | |
25 #include "unicode/ustring.h" | |
26 #include "nccbtst.h" | |
27 #include "unicode/ucnv_cb.h" | |
28 #include "unicode/utf16.h" | |
29 | |
30 #define NEW_MAX_BUFFER 999 | |
31 | |
32 #define nct_min(x,y) ((x<y) ? x : y) | |
33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) | |
34 | |
35 static int32_t gInBufferSize = 0; | |
36 static int32_t gOutBufferSize = 0; | |
37 static char gNuConvTestName[1024]; | |
38 | |
39 static void printSeq(const uint8_t* a, int len) | |
40 { | |
41 int i=0; | |
42 log_verbose("\n{"); | |
43 while (i<len) | |
44 log_verbose("0x%02X, ", a[i++]); | |
45 log_verbose("}\n"); | |
46 } | |
47 | |
48 static void printUSeq(const UChar* a, int len) | |
49 { | |
50 int i=0; | |
51 log_verbose("{"); | |
52 while (i<len) | |
53 log_verbose(" 0x%04x, ", a[i++]); | |
54 log_verbose("}\n"); | |
55 } | |
56 | |
57 static void printSeqErr(const uint8_t* a, int len) | |
58 { | |
59 int i=0; | |
60 fprintf(stderr, "{"); | |
61 while (i<len) | |
62 fprintf(stderr, " 0x%02x, ", a[i++]); | |
63 fprintf(stderr, "}\n"); | |
64 } | |
65 | |
66 static void printUSeqErr(const UChar* a, int len) | |
67 { | |
68 int i=0; | |
69 fprintf(stderr, "{"); | |
70 while (i<len) | |
71 fprintf(stderr, "0x%04x, ", a[i++]); | |
72 fprintf(stderr,"}\n"); | |
73 } | |
74 | |
75 static void setNuConvTestName(const char *codepage, const char *direction) | |
76 { | |
77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", | |
78 codepage, | |
79 direction, | |
80 (int)gInBufferSize, | |
81 (int)gOutBufferSize); | |
82 } | |
83 | |
84 | |
85 static void TestCallBackFailure(void); | |
86 | |
87 void addTestConvertErrorCallBack(TestNode** root); | |
88 | |
89 void addTestConvertErrorCallBack(TestNode** root) | |
90 { | |
91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); | |
92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); | |
93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); | |
94 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCal
lBack"); | |
95 | |
96 #if !UCONFIG_NO_LEGACY_CONVERSION | |
97 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOther
CallBack"); | |
98 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBa
ck"); | |
99 #endif | |
100 | |
101 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); | |
102 } | |
103 | |
104 static void TestSkipCallBack() | |
105 { | |
106 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
107 TestSkip(1,NEW_MAX_BUFFER); | |
108 TestSkip(1,1); | |
109 TestSkip(NEW_MAX_BUFFER, 1); | |
110 } | |
111 | |
112 static void TestStopCallBack() | |
113 { | |
114 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
115 TestStop(1,NEW_MAX_BUFFER); | |
116 TestStop(1,1); | |
117 TestStop(NEW_MAX_BUFFER, 1); | |
118 } | |
119 | |
120 static void TestSubCallBack() | |
121 { | |
122 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
123 TestSub(1,NEW_MAX_BUFFER); | |
124 TestSub(1,1); | |
125 TestSub(NEW_MAX_BUFFER, 1); | |
126 | |
127 #if !UCONFIG_NO_LEGACY_CONVERSION | |
128 TestEBCDIC_STATEFUL_Sub(1, 1); | |
129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); | |
130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); | |
131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
132 #endif | |
133 } | |
134 | |
135 static void TestSubWithValueCallBack() | |
136 { | |
137 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
138 TestSubWithValue(1,NEW_MAX_BUFFER); | |
139 TestSubWithValue(1,1); | |
140 TestSubWithValue(NEW_MAX_BUFFER, 1); | |
141 } | |
142 | |
143 #if !UCONFIG_NO_LEGACY_CONVERSION | |
144 static void TestLegalAndOtherCallBack() | |
145 { | |
146 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
147 TestLegalAndOthers(1,NEW_MAX_BUFFER); | |
148 TestLegalAndOthers(1,1); | |
149 TestLegalAndOthers(NEW_MAX_BUFFER, 1); | |
150 } | |
151 | |
152 static void TestSingleByteCallBack() | |
153 { | |
154 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
155 TestSingleByte(1,NEW_MAX_BUFFER); | |
156 TestSingleByte(1,1); | |
157 TestSingleByte(NEW_MAX_BUFFER, 1); | |
158 } | |
159 #endif | |
160 | |
161 static void TestSkip(int32_t inputsize, int32_t outputsize) | |
162 { | |
163 static const uint8_t expskipIBM_949[]= { | |
164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
165 | |
166 static const uint8_t expskipIBM_943[] = { | |
167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; | |
168 | |
169 static const uint8_t expskipIBM_930[] = { | |
170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; | |
171 | |
172 gInBufferSize = inputsize; | |
173 gOutBufferSize = outputsize; | |
174 | |
175 /*From Unicode*/ | |
176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); | |
177 | |
178 #if !UCONFIG_NO_LEGACY_CONVERSION | |
179 { | |
180 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0
xD700 }; | |
181 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
182 | |
183 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; | |
184 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; | |
185 | |
186 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleT
ext[0]), | |
187 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", | |
188 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) | |
189 log_err("u-> ibm-949 with skip did not match.\n"); | |
190 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampl
eText2[0]), | |
191 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", | |
192 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) | |
193 log_err("u-> ibm-943 with skip did not match.\n"); | |
194 } | |
195 | |
196 { | |
197 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d6
4, 0x63, 0xff5e, 0x6d66 }; | |
198 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d
, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; | |
199 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8,
8, 8 }; | |
200 | |
201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to c
heck correct state transitions */ | |
202 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, | |
203 fromUBytes, sizeof(fromUBytes), | |
204 "ibm-930", | |
205 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, | |
206 NULL, 0) | |
207 ) { | |
208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\
n"); | |
209 } | |
210 } | |
211 #endif | |
212 | |
213 { | |
214 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800,
0xdfff, 0x39 }; | |
215 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; | |
216 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; | |
217 | |
218 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0
xdfff, 0x39 }; | |
219 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; | |
220 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; | |
221 | |
222 /* US-ASCII */ | |
223 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, | |
224 usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
225 "US-ASCII", | |
226 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, | |
227 NULL, 0) | |
228 ) { | |
229 log_err("u->US-ASCII with skip did not match.\n"); | |
230 } | |
231 | |
232 #if !UCONFIG_NO_LEGACY_CONVERSION | |
233 /* SBCS NLTC codepage 367 for US-ASCII */ | |
234 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, | |
235 usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
236 "ibm-367", | |
237 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, | |
238 NULL, 0) | |
239 ) { | |
240 log_err("u->ibm-367 with skip did not match.\n"); | |
241 } | |
242 #endif | |
243 | |
244 /* ISO-Latin-1 */ | |
245 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, | |
246 latin1FromUBytes, sizeof(latin1FromUBytes), | |
247 "LATIN_1", | |
248 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, | |
249 NULL, 0) | |
250 ) { | |
251 log_err("u->LATIN_1 with skip did not match.\n"); | |
252 } | |
253 | |
254 #if !UCONFIG_NO_LEGACY_CONVERSION | |
255 /* windows-1252 */ | |
256 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, | |
257 latin1FromUBytes, sizeof(latin1FromUBytes), | |
258 "windows-1252", | |
259 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, | |
260 NULL, 0) | |
261 ) { | |
262 log_err("u->windows-1252 with skip did not match.\n"); | |
263 } | |
264 } | |
265 | |
266 { | |
267 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
268 static const uint8_t toIBM943[]= { 0x61, 0x61 }; | |
269 static const int32_t offset[]= {0, 4}; | |
270 | |
271 /* EUC_JP*/ | |
272 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
273 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
274 0x61, 0x8e, 0xe0, | |
275 }; | |
276 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; | |
277 | |
278 /*EUC_TW*/ | |
279 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
280 static const uint8_t to_euc_tw[]={ | |
281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
282 0x61, 0xe6, 0xca, 0x8a, | |
283 }; | |
284 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7,
8,}; | |
285 | |
286 /*ISO-2022-JP*/ | |
287 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/
,0x0042, }; | |
288 static const uint8_t to_iso_2022_jp[]={ | |
289 0x41, | |
290 0x42, | |
291 | |
292 }; | |
293 static const int32_t from_iso_2022_jpOffs [] ={0,2}; | |
294 | |
295 /*ISO-2022-JP*/ | |
296 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,
0xd800/*illegal*/,0x0042, }; | |
297 static const uint8_t to_iso_2022_jp2[]={ | |
298 0x41, | |
299 0x43, | |
300 | |
301 }; | |
302 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; | |
303 | |
304 /*ISO-2022-cn*/ | |
305 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*
/, 0x0042, }; | |
306 static const uint8_t to_iso_2022_cn[]={ | |
307 0x41, 0x42 | |
308 }; | |
309 static const int32_t from_iso_2022_cnOffs [] ={ | |
310 0, 2 | |
311 }; | |
312 | |
313 /*ISO-2022-CN*/ | |
314 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*
/,0x43,0xd800/*illegal*/,0x0042, }; | |
315 static const uint8_t to_iso_2022_cn1[]={ | |
316 0x41, 0x43 | |
317 | |
318 }; | |
319 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; | |
320 | |
321 /*ISO-2022-kr*/ | |
322 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; | |
323 static const uint8_t to_iso_2022_kr[]={ | |
324 0x1b, 0x24, 0x29, 0x43, | |
325 0x41, | |
326 0x0e, 0x25, 0x50, | |
327 0x25, 0x50, | |
328 0x0f, 0x42, | |
329 }; | |
330 static const int32_t from_iso_2022_krOffs [] ={ | |
331 -1,-1,-1,-1, | |
332 0, | |
333 1,1,1, | |
334 3,3, | |
335 4,4 | |
336 }; | |
337 | |
338 /*ISO-2022-kr*/ | |
339 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*una
ssigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
340 static const uint8_t to_iso_2022_kr1[]={ | |
341 0x1b, 0x24, 0x29, 0x43, | |
342 0x41, | |
343 0x0e, 0x25, 0x50, | |
344 0x25, 0x50, | |
345 | |
346 }; | |
347 static const int32_t from_iso_2022_krOffs1 [] ={ | |
348 -1,-1,-1,-1, | |
349 0, | |
350 1,1,1, | |
351 3,3, | |
352 | |
353 }; | |
354 /* HZ encoding */ | |
355 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
356 | |
357 static const uint8_t to_hz[]={ | |
358 0x7e, 0x7d, 0x41, | |
359 0x7e, 0x7b, 0x26, 0x30, | |
360 0x26, 0x30, | |
361 0x7e, 0x7d, 0x42, | |
362 | |
363 }; | |
364 static const int32_t from_hzOffs [] ={ | |
365 0,0,0, | |
366 1,1,1,1, | |
367 3,3, | |
368 4,4,4,4 | |
369 }; | |
370 | |
371 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/
,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
372 | |
373 static const uint8_t to_hz1[]={ | |
374 0x7e, 0x7d, 0x41, | |
375 0x7e, 0x7b, 0x26, 0x30, | |
376 0x26, 0x30, | |
377 | |
378 | |
379 }; | |
380 static const int32_t from_hzOffs1 [] ={ | |
381 0,0,0, | |
382 1,1,1,1, | |
383 3,3, | |
384 | |
385 }; | |
386 | |
387 #endif | |
388 | |
389 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; | |
390 | |
391 static const uint8_t to_SCSU[]={ | |
392 0x41, | |
393 0x42 | |
394 | |
395 | |
396 }; | |
397 static const int32_t from_SCSUOffs [] ={ | |
398 0, | |
399 2, | |
400 | |
401 }; | |
402 | |
403 #if !UCONFIG_NO_LEGACY_CONVERSION | |
404 /* ISCII */ | |
405 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0
042, }; | |
406 static const uint8_t to_iscii[]={ | |
407 0x41, | |
408 0x42, | |
409 }; | |
410 static const int32_t from_isciiOffs [] ={ | |
411 0,2, | |
412 | |
413 }; | |
414 /*ISCII*/ | |
415 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43
,0xd800/*illegal*/,0x0042, }; | |
416 static const uint8_t to_iscii1[]={ | |
417 0x44, | |
418 0x43, | |
419 | |
420 }; | |
421 static const int32_t from_isciiOffs1 [] ={0,2}; | |
422 | |
423 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
424 toIBM943, sizeof(toIBM943), "ibm-943", | |
425 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) | |
426 log_err("u-> ibm-943 with skip did not match.\n"); | |
427 | |
428 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
429 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
430 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) | |
431 log_err("u-> euc-jp with skip did not match.\n"); | |
432 | |
433 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
434 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
435 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) | |
436 log_err("u-> euc-tw with skip did not match.\n"); | |
437 | |
438 /*iso_2022_jp*/ | |
439 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
440 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
441 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) | |
442 log_err("u-> iso-2022-jp with skip did not match.\n"); | |
443 | |
444 /* with context */ | |
445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso
_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
446 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
449 | |
450 /*iso_2022_cn*/ | |
451 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
452 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) | |
454 log_err("u-> iso-2022-cn with skip did not match.\n"); | |
455 /*with context*/ | |
456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso
_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), | |
457 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", | |
458 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
460 | |
461 /*iso_2022_kr*/ | |
462 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
463 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) | |
465 log_err("u-> iso-2022-kr with skip did not match.\n"); | |
466 /*with context*/ | |
467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso
_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), | |
468 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", | |
469 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
471 | |
472 /*hz*/ | |
473 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
474 to_hz, sizeof(to_hz), "HZ", | |
475 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) | |
476 log_err("u-> HZ with skip did not match.\n"); | |
477 /*with context*/ | |
478 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText
1)/sizeof(hz_inputText1[0]), | |
479 to_hz1, sizeof(to_hz1), "hz", | |
480 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_
ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.
\n"); | |
482 #endif | |
483 | |
484 /*SCSU*/ | |
485 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
486 to_SCSU, sizeof(to_SCSU), "SCSU", | |
487 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) | |
488 log_err("u-> SCSU with skip did not match.\n"); | |
489 | |
490 #if !UCONFIG_NO_LEGACY_CONVERSION | |
491 /*ISCII*/ | |
492 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
493 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
494 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) | |
495 log_err("u-> iscii with skip did not match.\n"); | |
496 /*with context*/ | |
497 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inp
utText1)/sizeof(iscii_inputText1[0]), | |
498 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", | |
499 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_ST
OP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not mat
ch.\n"); | |
501 #endif | |
502 } | |
503 | |
504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n")
; | |
505 { | |
506 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU
1 text 1 */ | |
507 0xFB, 0xEE, 0x28, /* from source offset 0 */ | |
508 0x24, 0x1E, 0x52, | |
509 0xB2, | |
510 0x20, | |
511 0xB3, | |
512 0xB1, | |
513 0x0D, | |
514 0x0A, | |
515 | |
516 0x20, /* from 8 */ | |
517 0x00, | |
518 0xD0, 0x6C, | |
519 0xB6, | |
520 0xD8, 0xA5, | |
521 0x20, | |
522 0x68, | |
523 0x59, | |
524 | |
525 0xF9, 0x28, /* from 16 */ | |
526 0x6D, | |
527 0x20, | |
528 0x73, | |
529 0xE0, 0x2D, | |
530 0xDE, 0x43, | |
531 0xD0, 0x33, | |
532 0x20, | |
533 | |
534 0xFA, 0x83, /* from 24 */ | |
535 0x25, 0x01, | |
536 0xFB, 0x16, 0x87, | |
537 0x4B, 0x16, | |
538 0x20, | |
539 0xE6, 0xBD, | |
540 0xEB, 0x5B, | |
541 0x4B, 0xCC, | |
542 | |
543 0xF9, 0xA2, /* from 32 */ | |
544 0xFC, 0x10, 0x3E, | |
545 0xFE, 0x16, 0x3A, 0x8C, | |
546 0x20, | |
547 0xFC, 0x03, 0xAC, | |
548 | |
549 0x01, /* from 41 */ | |
550 0xDE, 0x83, | |
551 0x20, | |
552 0x09 | |
553 }; | |
554 static const UChar expected[]={ | |
555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ | |
556 0x0063, 0x0061, 0x000D, 0x000A, | |
557 | |
558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ | |
559 0x0930, 0x0020, 0x0918, 0x0909, | |
560 | |
561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ | |
562 0x4000, 0x4E00, 0x7777, 0x0020, | |
563 | |
564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ | |
565 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
566 | |
567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ | |
568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
569 | |
570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ | |
571 0x0009 | |
572 }; | |
573 static const int32_t offsets[]={ | |
574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, | |
575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, | |
576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, | |
577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, | |
578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, | |
579 41, 42, 42, 43, 44 | |
580 }; | |
581 | |
582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-b
yte and offsets behavior */ | |
583 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
584 sampleText, sizeof(sampleText), | |
585 "BOCU-1", | |
586 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
587 ) { | |
588 log_err("u->BOCU-1 with skip did not match.\n"); | |
589 } | |
590 } | |
591 | |
592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n")
; | |
593 { | |
594 const uint8_t sampleText[]={ | |
595 0x61, /* 'a' */ | |
596 0xc4, 0xb5, /* U+0135 */ | |
597 0xed, 0x80, 0xa0, /* Hangul U+d020 */ | |
598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ | |
599 0xee, 0x80, 0x80, /* PUA U+e000 */ | |
600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc
01 */ | |
601 0x62, /* 'b' */ | |
602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d80
1 */ | |
603 0xd0, 0x80 /* U+0400 */ | |
604 }; | |
605 UChar expected[]={ | |
606 0x0061, | |
607 0x0135, | |
608 0xd020, | |
609 0xd801, 0xdc01, | |
610 0xe000, | |
611 0xdc01, | |
612 0x0062, | |
613 0xd801, | |
614 0x0400 | |
615 }; | |
616 int32_t offsets[]={ | |
617 0, | |
618 1, 1, | |
619 2, 2, 2, | |
620 3, 3, 3, 4, 4, 4, | |
621 5, 5, 5, | |
622 6, 6, 6, | |
623 7, | |
624 8, 8, 8, | |
625 9, 9 | |
626 }; | |
627 | |
628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversi
on and offsets behavior */ | |
629 | |
630 /* without offsets */ | |
631 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
632 sampleText, sizeof(sampleText), | |
633 "CESU-8", | |
634 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) | |
635 ) { | |
636 log_err("u->CESU-8 with skip did not match.\n"); | |
637 } | |
638 | |
639 /* with offsets */ | |
640 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
641 sampleText, sizeof(sampleText), | |
642 "CESU-8", | |
643 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
644 ) { | |
645 log_err("u->CESU-8 with skip did not match.\n"); | |
646 } | |
647 } | |
648 | |
649 /*to Unicode*/ | |
650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); | |
651 | |
652 #if !UCONFIG_NO_LEGACY_CONVERSION | |
653 { | |
654 | |
655 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD7
00 }; | |
656 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
657 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
658 | |
659 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; | |
660 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; | |
661 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; | |
662 | |
663 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), | |
664 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_9
49skiptoUnicode),"ibm-949", | |
665 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) | |
666 log_err("ibm-949->u with skip did not match.\n"); | |
667 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), | |
668 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_9
43skiptoUnicode[0]),"ibm-943", | |
669 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) | |
670 log_err("ibm-943->u with skip did not match.\n"); | |
671 | |
672 | |
673 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), | |
674 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", | |
675 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) | |
676 log_err("ibm-930->u with skip did not match.\n"); | |
677 | |
678 | |
679 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_93
0), | |
680 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", | |
681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_C
HAR_FOUND )) | |
682 log_err("ibm-930->u with skip did not match.\n"); | |
683 } | |
684 #endif | |
685 | |
686 { | |
687 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; | |
688 static const UChar usasciiToU[] = { 0x61, 0x31 }; | |
689 static const int32_t usasciiToUOffsets[] = { 0, 2 }; | |
690 | |
691 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; | |
692 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; | |
693 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; | |
694 | |
695 /* US-ASCII */ | |
696 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
697 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
698 "US-ASCII", | |
699 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
700 NULL, 0) | |
701 ) { | |
702 log_err("US-ASCII->u with skip did not match.\n"); | |
703 } | |
704 | |
705 #if !UCONFIG_NO_LEGACY_CONVERSION | |
706 /* SBCS NLTC codepage 367 for US-ASCII */ | |
707 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
708 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
709 "ibm-367", | |
710 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
711 NULL, 0) | |
712 ) { | |
713 log_err("ibm-367->u with skip did not match.\n"); | |
714 } | |
715 #endif | |
716 | |
717 /* ISO-Latin-1 */ | |
718 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
719 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
720 "LATIN_1", | |
721 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
722 NULL, 0) | |
723 ) { | |
724 log_err("LATIN_1->u with skip did not match.\n"); | |
725 } | |
726 | |
727 #if !UCONFIG_NO_LEGACY_CONVERSION | |
728 /* windows-1252 */ | |
729 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
730 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
731 "windows-1252", | |
732 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
733 NULL, 0) | |
734 ) { | |
735 log_err("windows-1252->u with skip did not match.\n"); | |
736 } | |
737 #endif | |
738 } | |
739 | |
740 #if !UCONFIG_NO_LEGACY_CONVERSION | |
741 { | |
742 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
744 }; | |
745 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 | |
746 }; | |
747 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; | |
748 | |
749 | |
750 /* euc-jp*/ | |
751 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
752 0x8f, 0xda, 0xa1, /*unassigned*/ | |
753 0x8e, 0xe0, | |
754 }; | |
755 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; | |
756 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; | |
757 | |
758 /*EUC_TW*/ | |
759 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, | |
760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
761 0xe6, 0xca, 0x8a, | |
762 }; | |
763 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0
x8a, }; | |
764 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; | |
765 /*iso-2022-jp*/ | |
766 static const uint8_t sampleTxt_iso_2022_jp[]={ | |
767 0x41, | |
768 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
769 0x1b, 0x28, 0x42, 0x42, | |
770 | |
771 }; | |
772 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; | |
773 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; | |
774 | |
775 /*iso-2022-cn*/ | |
776 static const uint8_t sampleTxt_iso_2022_cn[]={ | |
777 0x0f, 0x41, 0x44, | |
778 0x1B, 0x24, 0x29, 0x47, | |
779 0x0E, 0x40, 0x6f, /*unassigned*/ | |
780 0x0f, 0x42, | |
781 | |
782 }; | |
783 | |
784 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; | |
785 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; | |
786 | |
787 /*iso-2022-kr*/ | |
788 static const uint8_t sampleTxt_iso_2022_kr[]={ | |
789 0x1b, 0x24, 0x29, 0x43, | |
790 0x41, | |
791 0x0E, 0x7f, 0x1E, | |
792 0x0e, 0x25, 0x50, | |
793 0x0f, 0x51, | |
794 0x42, 0x43, | |
795 | |
796 }; | |
797 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0
x43}; | |
798 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 ,
14 }; | |
799 | |
800 /*hz*/ | |
801 static const uint8_t sampleTxt_hz[]={ | |
802 0x41, | |
803 0x7e, 0x7b, 0x26, 0x30, | |
804 0x7f, 0x1E, /*unassigned*/ | |
805 0x26, 0x30, | |
806 0x7e, 0x7d, 0x42, | |
807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
808 0x7e, 0x7d, 0x42, | |
809 }; | |
810 static const UChar hztoUnicode[]={ | |
811 0x41, | |
812 0x03a0, | |
813 0x03A0, | |
814 0x42, | |
815 0x42,}; | |
816 | |
817 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; | |
818 | |
819 /*ISCII*/ | |
820 static const uint8_t sampleTxt_iscii[]={ | |
821 0x41, | |
822 0xa1, | |
823 0xEB, /*unassigned*/ | |
824 0x26, | |
825 0x30, | |
826 0xa2, | |
827 0xEC, /*unassigned*/ | |
828 0x42, | |
829 }; | |
830 static const UChar isciitoUnicode[]={ | |
831 0x41, | |
832 0x0901, | |
833 0x26, | |
834 0x30, | |
835 0x0902, | |
836 0x42, | |
837 }; | |
838 | |
839 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; | |
840 | |
841 /*LMBCS*/ | |
842 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, | |
843 0x12, 0x92, 0xa0, /*unassigned*/ | |
844 0x12, 0x92, 0xA1, | |
845 }; | |
846 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; | |
847 static const int32_t fromLMBCS[] = {0, 6}; | |
848 | |
849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), | |
850 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
851 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
853 | |
854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sa
mpleTxtEBCIDIC_STATEFUL), | |
855 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
856 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U
_ILLEGAL_CHAR_FOUND )) | |
857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
858 | |
859 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
860 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP", | |
861 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) | |
862 log_err("euc-jp->u with skip did not match.\n"); | |
863 | |
864 | |
865 | |
866 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
867 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
868 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) | |
869 log_err("euc-tw->u with skip did not match.\n"); | |
870 | |
871 | |
872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), | |
873 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
874 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) | |
875 log_err("iso-2022-jp->u with skip did not match.\n"); | |
876 | |
877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), | |
878 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", | |
879 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) | |
880 log_err("iso-2022-cn->u with skip did not match.\n"); | |
881 | |
882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), | |
883 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", | |
884 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) | |
885 log_err("iso-2022-kr->u with skip did not match.\n"); | |
886 | |
887 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
888 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
889 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) | |
890 log_err("HZ->u with skip did not match.\n"); | |
891 | |
892 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
893 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", | |
894 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) | |
895 log_err("iscii->u with skip did not match.\n"); | |
896 | |
897 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), | |
898 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0])
,"LMBCS-1", | |
899 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) | |
900 log_err("LMBCS->u with skip did not match.\n"); | |
901 | |
902 } | |
903 #endif | |
904 | |
905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); | |
906 { | |
907 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
908 0xe0, 0x80, 0x61,}; | |
909 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; | |
910 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; | |
911 | |
912 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
913 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
914 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
915 log_err("utf8->u with skip did not match.\n");; | |
916 } | |
917 | |
918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); | |
919 { | |
920 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
921 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfff
e}; | |
922 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
923 | |
924 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
925 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
926 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
927 log_err("scsu->u with skip did not match.\n"); | |
928 } | |
929 | |
930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
931 { | |
932 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBO
CU1 text 1 */ | |
933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ | |
934 0x24, 0x1E, 0x52, /* 3 */ | |
935 0xB2, /* 6 */ | |
936 0x20, /* 7 */ | |
937 0x40, 0x07, /* 8 - wrong trail byte */ | |
938 0xB3, /* 10 */ | |
939 0xB1, /* 11 */ | |
940 0xD0, 0x20, /* 12 - wrong trail byte */ | |
941 0x0D, /* 14 */ | |
942 0x0A, /* 15 */ | |
943 0x20, /* 16 */ | |
944 0x00, /* 17 */ | |
945 0xD0, 0x6C, /* 18 */ | |
946 0xB6, /* 20 */ | |
947 0xD8, 0xA5, /* 21 */ | |
948 0x20, /* 23 */ | |
949 0x68, /* 24 */ | |
950 0x59, /* 25 */ | |
951 0xF9, 0x28, /* 26 */ | |
952 0x6D, /* 28 */ | |
953 0x20, /* 29 */ | |
954 0x73, /* 30 */ | |
955 0xE0, 0x2D, /* 31 */ | |
956 0xDE, 0x43, /* 33 */ | |
957 0xD0, 0x33, /* 35 */ | |
958 0x20, /* 37 */ | |
959 0xFA, 0x83, /* 38 */ | |
960 0x25, 0x01, /* 40 */ | |
961 0xFB, 0x16, 0x87, /* 42 */ | |
962 0x4B, 0x16, /* 45 */ | |
963 0x20, /* 47 */ | |
964 0xE6, 0xBD, /* 48 */ | |
965 0xEB, 0x5B, /* 50 */ | |
966 0x4B, 0xCC, /* 52 */ | |
967 0xF9, 0xA2, /* 54 */ | |
968 0xFC, 0x10, 0x3E, /* 56 */ | |
969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ | |
970 0x20, /* 63 */ | |
971 0xFC, 0x03, 0xAC, /* 64 */ | |
972 0xFF, /* 67 - FF just resets the state without enc
oding anything */ | |
973 0x01, /* 68 */ | |
974 0xDE, 0x83, /* 69 */ | |
975 0x20, /* 71 */ | |
976 0x09 /* 72 */ | |
977 }; | |
978 UChar expected[]={ | |
979 0xFEFF, 0x0061, 0x0062, 0x0020, | |
980 0x0063, 0x0061, 0x000D, 0x000A, | |
981 0x0020, 0x0000, 0x00DF, 0x00E6, | |
982 0x0930, 0x0020, 0x0918, 0x0909, | |
983 0x3086, 0x304D, 0x0020, 0x3053, | |
984 0x4000, 0x4E00, 0x7777, 0x0020, | |
985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, | |
986 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
987 0xD800, 0xDC00, 0xD845, 0xDDDD, | |
988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
989 0xDFFF, 0x0001, 0x0E40, 0x0020, | |
990 0x0009 | |
991 }; | |
992 int32_t offsets[]={ | |
993 0, 3, 6, 7, /* skip 8, */ | |
994 10, 11, /* skip 12, */ | |
995 14, 15, 16, 17, 18, | |
996 20, 21, 23, 24, 25, 26, 28, 29, | |
997 30, 31, 33, 35, 37, 38, | |
998 40, 42, 45, 47, 48, | |
999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, | |
1000 63, 64, /* trail */ 64, /* reset only 67, */ | |
1001 68, 69, | |
1002 71, 72 | |
1003 }; | |
1004 | |
1005 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1006 expected, ARRAY_LENGTH(expected), "BOCU-1", | |
1007 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1008 ) { | |
1009 log_err("BOCU-1->u with skip did not match.\n"); | |
1010 } | |
1011 } | |
1012 | |
1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
1014 { | |
1015 const uint8_t sampleText[]={ | |
1016 0x61, /* 0 'a' */ | |
1017 0xc0, 0x80, /* 1 non-shortest form */ | |
1018 0xc4, 0xb5, /* 3 U+0135 */ | |
1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ | |
1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401
*/ | |
1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ | |
1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U
+dc01 */ | |
1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+
10000 */ | |
1024 0x62, /* 24 'b' */ | |
1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+
d801 */ | |
1026 0xed, 0xa0, /* 28 incomplete sequence */ | |
1027 0xd0, 0x80 /* 30 U+0400 */ | |
1028 }; | |
1029 UChar expected[]={ | |
1030 0x0061, | |
1031 /* skip */ | |
1032 0x0135, | |
1033 0xd020, | |
1034 0xd801, 0xdc01, | |
1035 0xe000, | |
1036 0xdc01, | |
1037 /* skip */ | |
1038 0x0062, | |
1039 0xd801, | |
1040 0x0400 | |
1041 }; | |
1042 int32_t offsets[]={ | |
1043 0, | |
1044 /* skip 1, */ | |
1045 3, | |
1046 5, | |
1047 8, 11, | |
1048 14, | |
1049 17, | |
1050 /* skip 20, 20, */ | |
1051 24, | |
1052 25, | |
1053 /* skip 28 */ | |
1054 30 | |
1055 }; | |
1056 | |
1057 /* without offsets */ | |
1058 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1059 expected, ARRAY_LENGTH(expected), "CESU-8", | |
1060 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) | |
1061 ) { | |
1062 log_err("CESU-8->u with skip did not match.\n"); | |
1063 } | |
1064 | |
1065 /* with offsets */ | |
1066 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
1067 expected, ARRAY_LENGTH(expected), "CESU-8", | |
1068 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
1069 ) { | |
1070 log_err("CESU-8->u with skip did not match.\n"); | |
1071 } | |
1072 } | |
1073 } | |
1074 | |
1075 static void TestStop(int32_t inputsize, int32_t outputsize) | |
1076 { | |
1077 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; | |
1078 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1079 | |
1080 static const uint8_t expstopIBM_949[]= { | |
1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; | |
1082 | |
1083 static const uint8_t expstopIBM_943[] = { | |
1084 0x9f, 0xaf, 0x9f, 0xb1}; | |
1085 | |
1086 static const uint8_t expstopIBM_930[] = { | |
1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; | |
1088 | |
1089 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; | |
1090 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1091 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
1092 | |
1093 | |
1094 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; | |
1095 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; | |
1096 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; | |
1097 | |
1098 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; | |
1099 static const int32_t fromIBM943Offs [] = { 0, 2}; | |
1100 static const int32_t fromIBM930Offs [] = { 1, 3}; | |
1101 | |
1102 gInBufferSize = inputsize; | |
1103 gOutBufferSize = outputsize; | |
1104 | |
1105 /*From Unicode*/ | |
1106 | |
1107 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1108 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
1109 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", | |
1110 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) | |
1111 log_err("u-> ibm-949 with stop did not match.\n"); | |
1112 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1113 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", | |
1114 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) | |
1115 log_err("u-> ibm-943 with stop did not match.\n"); | |
1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1117 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", | |
1118 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) | |
1119 log_err("u-> ibm-930 with stop did not match.\n"); | |
1120 | |
1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); | |
1122 { | |
1123 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
1124 static const uint8_t toIBM943[]= { 0x61,}; | |
1125 static const int32_t offset[]= {0,} ; | |
1126 | |
1127 /*EUC_JP*/ | |
1128 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1129 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; | |
1130 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; | |
1131 | |
1132 /*EUC_TW*/ | |
1133 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1134 static const uint8_t to_euc_tw[]={ | |
1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; | |
1136 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; | |
1137 | |
1138 /*ISO-2022-JP*/ | |
1139 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; | |
1140 static const uint8_t to_iso_2022_jp[]={ | |
1141 0x41, | |
1142 | |
1143 }; | |
1144 static const int32_t from_iso_2022_jpOffs [] ={0,}; | |
1145 | |
1146 /*ISO-2022-cn*/ | |
1147 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1148 static const uint8_t to_iso_2022_cn[]={ | |
1149 0x41, | |
1150 | |
1151 }; | |
1152 static const int32_t from_iso_2022_cnOffs [] ={ | |
1153 0,0, | |
1154 2,2, | |
1155 }; | |
1156 | |
1157 /*ISO-2022-kr*/ | |
1158 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; | |
1159 static const uint8_t to_iso_2022_kr[]={ | |
1160 0x1b, 0x24, 0x29, 0x43, | |
1161 0x41, | |
1162 0x0e, 0x25, 0x50, | |
1163 }; | |
1164 static const int32_t from_iso_2022_krOffs [] ={ | |
1165 -1,-1,-1,-1, | |
1166 0, | |
1167 1,1,1, | |
1168 }; | |
1169 | |
1170 /* HZ encoding */ | |
1171 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
1172 | |
1173 static const uint8_t to_hz[]={ | |
1174 0x7e, 0x7d, 0x41, | |
1175 0x7e, 0x7b, 0x26, 0x30, | |
1176 | |
1177 }; | |
1178 static const int32_t from_hzOffs [] ={ | |
1179 0, 0,0, | |
1180 1,1,1,1, | |
1181 }; | |
1182 | |
1183 /*ISCII*/ | |
1184 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1185 static const uint8_t to_iscii[]={ | |
1186 0x41, | |
1187 }; | |
1188 static const int32_t from_isciiOffs [] ={ | |
1189 0, | |
1190 }; | |
1191 | |
1192 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
1193 toIBM943, sizeof(toIBM943), "ibm-943", | |
1194 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) | |
1195 log_err("u-> ibm-943 with stop did not match.\n"); | |
1196 | |
1197 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
1198 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
1199 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) | |
1200 log_err("u-> euc-jp with stop did not match.\n"); | |
1201 | |
1202 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
1203 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1204 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1205 log_err("u-> euc-tw with stop did not match.\n"); | |
1206 | |
1207 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
1208 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1209 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1210 log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1211 | |
1212 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
1213 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
1214 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
1215 log_err("u-> iso-2022-jp with stop did not match.\n"); | |
1216 | |
1217 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
1218 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
1219 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) | |
1220 log_err("u-> iso-2022-cn with stop did not match.\n"); | |
1221 | |
1222 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
1223 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
1224 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) | |
1225 log_err("u-> iso-2022-kr with stop did not match.\n"); | |
1226 | |
1227 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
1228 to_hz, sizeof(to_hz), "HZ", | |
1229 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) | |
1230 log_err("u-> HZ with stop did not match.\n");\ | |
1231 | |
1232 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
1233 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
1234 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) | |
1235 log_err("u-> iscii with stop did not match.\n"); | |
1236 | |
1237 | |
1238 } | |
1239 #endif | |
1240 | |
1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"
); | |
1242 { | |
1243 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; | |
1244 | |
1245 static const uint8_t to_SCSU[]={ | |
1246 0x41, | |
1247 | |
1248 }; | |
1249 int32_t from_SCSUOffs [] ={ | |
1250 0, | |
1251 | |
1252 }; | |
1253 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
1254 to_SCSU, sizeof(to_SCSU), "SCSU", | |
1255 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) | |
1256 log_err("u-> SCSU with skip did not match.\n"); | |
1257 | |
1258 } | |
1259 | |
1260 /*to Unicode*/ | |
1261 | |
1262 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1263 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), | |
1264 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949st
optoUnicode[0]),"ibm-949", | |
1265 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) | |
1266 log_err("ibm-949->u with stop did not match.\n"); | |
1267 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), | |
1268 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943st
optoUnicode[0]),"ibm-943", | |
1269 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) | |
1270 log_err("ibm-943->u with stop did not match.\n"); | |
1271 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), | |
1272 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930st
optoUnicode[0]),"ibm-930", | |
1273 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) | |
1274 log_err("ibm-930->u with stop did not match.\n"); | |
1275 | |
1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); | |
1277 { | |
1278 | |
1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1281 }; | |
1282 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; | |
1283 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; | |
1284 | |
1285 | |
1286 /*EUC-JP*/ | |
1287 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
1288 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1289 0x8e, 0xe0, | |
1290 }; | |
1291 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; | |
1292 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; | |
1293 | |
1294 /*EUC_TW*/ | |
1295 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, | |
1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1297 0xe6, 0xca, 0x8a, | |
1298 }; | |
1299 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; | |
1300 int32_t from_euc_twOffs [] ={ 0, 1, 3}; | |
1301 | |
1302 | |
1303 | |
1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBC
IDIC_STATEFUL), | |
1305 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1306 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); | |
1308 | |
1309 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1310 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0])
,"IBM-eucJP", | |
1311 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) | |
1312 log_err("euc-jp->u with stop did not match.\n"); | |
1313 | |
1314 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1315 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
1316 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
1317 log_err("euc-tw->u with stop did not match.\n"); | |
1318 } | |
1319 #endif | |
1320 | |
1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); | |
1322 { | |
1323 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1324 0xe0, 0x80, 0x61,}; | |
1325 static const UChar expected1[] = { 0x0031, 0x4e8c,}; | |
1326 static const int32_t offsets1[] = { 0x0000, 0x0001}; | |
1327 | |
1328 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1329 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1330 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1331 log_err("utf8->u with stop did not match.\n");; | |
1332 } | |
1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); | |
1334 { | |
1335 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c
,0x04}; | |
1336 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061
}; | |
1337 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; | |
1338 | |
1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
1342 log_err("scsu->u with stop did not match.\n");; | |
1343 } | |
1344 | |
1345 } | |
1346 | |
1347 static void TestSub(int32_t inputsize, int32_t outputsize) | |
1348 { | |
1349 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; | |
1350 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1351 | |
1352 static const uint8_t expsubIBM_949[] = | |
1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; | |
1354 | |
1355 static const uint8_t expsubIBM_943[] = { | |
1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; | |
1357 | |
1358 static const uint8_t expsubIBM_930[] = { | |
1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; | |
1360 | |
1361 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0
xD700 }; | |
1362 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1363 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
1364 | |
1365 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
1366 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; | |
1367 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; | |
1368 | |
1369 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; | |
1370 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; | |
1371 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; | |
1372 | |
1373 gInBufferSize = inputsize; | |
1374 gOutBufferSize = outputsize; | |
1375 | |
1376 /*from unicode*/ | |
1377 | |
1378 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1379 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
1380 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", | |
1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) | |
1382 log_err("u-> ibm-949 with subst did not match.\n"); | |
1383 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1384 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", | |
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) | |
1386 log_err("u-> ibm-943 with subst did not match.\n"); | |
1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1388 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", | |
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) | |
1390 log_err("u-> ibm-930 with subst did not match.\n"); | |
1391 | |
1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
1393 { | |
1394 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
1395 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; | |
1396 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; | |
1397 | |
1398 | |
1399 /* EUC_JP*/ | |
1400 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
1401 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1402 0xf4, 0xfe, 0xf4, 0xfe, | |
1403 0x61, 0x8e, 0xe0, | |
1404 }; | |
1405 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5,
6, 7, 7}; | |
1406 | |
1407 /*EUC_TW*/ | |
1408 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1409 static const uint8_t to_euc_tw[]={ | |
1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1411 0xfd, 0xfe, 0xfd, 0xfe, | |
1412 0x61, 0xe6, 0xca, 0x8a, | |
1413 }; | |
1414 | |
1415 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5,
5, 6, 7, 7, 8,}; | |
1416 | |
1417 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
1418 toIBM943, sizeof(toIBM943), "ibm-943", | |
1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) | |
1420 log_err("u-> ibm-943 with substitute did not match.\n"); | |
1421 | |
1422 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
1423 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) | |
1425 log_err("u-> euc-jp with substitute did not match.\n"); | |
1426 | |
1427 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
1428 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1430 log_err("u-> euc-tw with substitute did not match.\n"); | |
1431 } | |
1432 #endif | |
1433 | |
1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITU
TE \n"); | |
1435 { | |
1436 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
1437 | |
1438 const uint8_t to_SCSU[]={ | |
1439 0x41, | |
1440 0x0e, 0xff,0xfd, | |
1441 0x42 | |
1442 | |
1443 | |
1444 }; | |
1445 int32_t from_SCSUOffs [] ={ | |
1446 0, | |
1447 1,1,1, | |
1448 2, | |
1449 | |
1450 }; | |
1451 const uint8_t to_SCSU_1[]={ | |
1452 0x41, | |
1453 | |
1454 }; | |
1455 int32_t from_SCSUOffs_1 [] ={ | |
1456 0, | |
1457 | |
1458 }; | |
1459 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
1460 to_SCSU, sizeof(to_SCSU), "SCSU", | |
1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) | |
1462 log_err("u-> SCSU with substitute did not match.\n"); | |
1463 | |
1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputT
ext)/sizeof(SCSU_inputText[0]), | |
1465 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", | |
1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_
ILLEGAL_CHAR_FOUND )) | |
1467 log_err("u-> SCSU with substitute did not match.\n"); | |
1468 } | |
1469 | |
1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTIT
UTE\n"); | |
1471 { | |
1472 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801,
0xffff, 0x0061,}; | |
1473 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, | |
1474 0xf0, 0x90, 0x90, 0x81, | |
1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, | |
1476 0xef, 0xbf, 0xbf, 0x61, | |
1477 | |
1478 }; | |
1479 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4,
5, 5, 5, 6 }; | |
1480 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput
[0]), | |
1481 expectedUTF8, sizeof(expectedUTF8), "utf8", | |
1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { | |
1483 log_err("u-> utf8 with stop did not match.\n"); | |
1484 } | |
1485 } | |
1486 | |
1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); | |
1488 { | |
1489 static const UChar in[]={ 0x0041, 0xfeff }; | |
1490 | |
1491 static const uint8_t out[]={ | |
1492 #if U_IS_BIG_ENDIAN | |
1493 0xfe, 0xff, | |
1494 0x00, 0x41, | |
1495 0xfe, 0xff | |
1496 #else | |
1497 0xff, 0xfe, | |
1498 0x41, 0x00, | |
1499 0xff, 0xfe | |
1500 #endif | |
1501 }; | |
1502 static const int32_t offsets[]={ | |
1503 -1, -1, 0, 0, 1, 1 | |
1504 }; | |
1505 | |
1506 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1507 out, sizeof(out), "UTF-16", | |
1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) | |
1509 ) { | |
1510 log_err("u->UTF-16 with substitute did not match.\n"); | |
1511 } | |
1512 } | |
1513 | |
1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); | |
1515 { | |
1516 static const UChar in[]={ 0x0041, 0xfeff }; | |
1517 | |
1518 static const uint8_t out[]={ | |
1519 #if U_IS_BIG_ENDIAN | |
1520 0x00, 0x00, 0xfe, 0xff, | |
1521 0x00, 0x00, 0x00, 0x41, | |
1522 0x00, 0x00, 0xfe, 0xff | |
1523 #else | |
1524 0xff, 0xfe, 0x00, 0x00, | |
1525 0x41, 0x00, 0x00, 0x00, | |
1526 0xff, 0xfe, 0x00, 0x00 | |
1527 #endif | |
1528 }; | |
1529 static const int32_t offsets[]={ | |
1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 | |
1531 }; | |
1532 | |
1533 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
1534 out, sizeof(out), "UTF-32", | |
1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) | |
1536 ) { | |
1537 log_err("u->UTF-32 with substitute did not match.\n"); | |
1538 } | |
1539 } | |
1540 | |
1541 /*to unicode*/ | |
1542 | |
1543 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1544 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), | |
1545 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subt
oUnicode[0]),"ibm-949", | |
1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) | |
1547 log_err("ibm-949->u with substitute did not match.\n"); | |
1548 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), | |
1549 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subt
oUnicode[0]),"ibm-943", | |
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) | |
1551 log_err("ibm-943->u with substitute did not match.\n"); | |
1552 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), | |
1553 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subt
oUnicode[0]),"ibm-930", | |
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) | |
1555 log_err("ibm-930->u with substitute did not match.\n"); | |
1556 | |
1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
1558 { | |
1559 | |
1560 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
1562 }; | |
1563 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 | |
1564 }; | |
1565 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; | |
1566 | |
1567 | |
1568 /* EUC_JP*/ | |
1569 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1570 0x8f, 0xda, 0xa1, /*unassigned*/ | |
1571 0x8e, 0xe0, 0x8a | |
1572 }; | |
1573 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a
}; | |
1574 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; | |
1575 | |
1576 /*EUC_TW*/ | |
1577 const uint8_t sampleTxt_euc_tw[]={ | |
1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
1580 0xe6, 0xca, 0x8a, | |
1581 }; | |
1582 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a,
}; | |
1583 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; | |
1584 | |
1585 | |
1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), | |
1587 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof
(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
1588 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )
) | |
1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); | |
1590 | |
1591 | |
1592 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
1593 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
IBM-eucJP", | |
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) | |
1595 log_err("euc-jp->u with substitute did not match.\n"); | |
1596 | |
1597 | |
1598 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
1599 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"
euc-tw", | |
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
1601 log_err("euc-tw->u with substitute did not match.\n"); | |
1602 | |
1603 | |
1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_e
uc_jp), | |
1605 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
IBM-eucJP", | |
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGA
L_CHAR_FOUND)) | |
1607 log_err("euc-jp->u with substitute did not match.\n"); | |
1608 } | |
1609 #endif | |
1610 | |
1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE
\n"); | |
1612 { | |
1613 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
1614 0xe0, 0x80, 0x61,}; | |
1615 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; | |
1616 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; | |
1617 | |
1618 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1619 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
1620 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1621 log_err("utf8->u with substitute did not match.\n");; | |
1622 } | |
1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \
n"); | |
1624 { | |
1625 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
1626 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfff
d}; | |
1627 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
1628 | |
1629 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
1630 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
1631 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
1632 log_err("scsu->u with stop did not match.\n");; | |
1633 } | |
1634 | |
1635 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1636 log_verbose("Testing ibm-930 subchar/subchar1\n"); | |
1637 { | |
1638 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65,
0x6d66, 0xdf }; | |
1639 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0x
fe, 0x46, 0x6b, 0x0f, 0x3f }; | |
1640 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4 }; | |
1641 | |
1642 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd,
0x6d66, 0x1a }; | |
1643 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0x
fc, 0x46, 0x6b, 0x0f, 0x57 }; | |
1644 static const int32_t offsets2[]={ 1, 3, 5,
7, 10 }; | |
1645 | |
1646 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "
ibm-930", | |
1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NU
LL, 0) | |
1648 ) { | |
1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); | |
1650 } | |
1651 | |
1652 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ib
m-930", | |
1653 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
1654 ) { | |
1655 log_err("ibm-930->u subchar/subchar1 did not match.\n"); | |
1656 } | |
1657 } | |
1658 | |
1659 log_verbose("Testing GB 18030 with substitute callbacks\n"); | |
1660 { | |
1661 static const UChar u2[]={ | |
1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00,
0x9fa6, 0xffff, 0xd800, 0xdc00, 0xff
fd, 0xdbff, 0xdfff }; | |
1663 static const uint8_t gb2[]={ | |
1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0x
bb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3
, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; | |
1665 static const int32_t offsets2[]={ | |
1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; | |
1667 | |
1668 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "
gb18030", | |
1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
1670 ) { | |
1671 log_err("gb18030->u with substitute did not match.\n"); | |
1672 } | |
1673 } | |
1674 #endif | |
1675 | |
1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); | |
1677 { | |
1678 static const uint8_t utf7[]={ | |
1679 /* a~ a+AB~ a+AB\x0c
a+AB- a+AB. a+. */ | |
1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42
, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b
, 0x2e | |
1681 }; | |
1682 static const UChar unicode[]={ | |
1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd,
0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xff
fd, 0x2e | |
1684 }; | |
1685 static const int32_t offsets[]={ | |
1686 0, 1, 2, 4, 6, 7, 9,
11, 12, 14, 17, 19, 21, 22, 23,
24 | |
1687 }; | |
1688 | |
1689 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH
(unicode), "UTF-7", | |
1690 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0
) | |
1691 ) { | |
1692 log_err("UTF-7->u with substitute did not match.\n"); | |
1693 } | |
1694 } | |
1695 | |
1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); | |
1697 { | |
1698 static const uint8_t | |
1699 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, | |
1700 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, | |
1701 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; | |
1702 | |
1703 static const UChar | |
1704 out1[]={ 0x4e00, 0xfeff }, | |
1705 out2[]={ 0x004e, 0xfffe }, | |
1706 out3[]={ 0xfefd, 0x4e00, 0xfeff }; | |
1707 | |
1708 static const int32_t | |
1709 offsets1[]={ 2, 4 }, | |
1710 offsets2[]={ 2, 4 }, | |
1711 offsets3[]={ 0, 2, 4 }; | |
1712 | |
1713 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-16", | |
1714 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) | |
1715 ) { | |
1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); | |
1717 } | |
1718 | |
1719 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-16", | |
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
1721 ) { | |
1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); | |
1723 } | |
1724 | |
1725 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-16", | |
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) | |
1727 ) { | |
1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); | |
1729 } | |
1730 } | |
1731 | |
1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); | |
1733 { | |
1734 static const uint8_t | |
1735 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xfe, 0xff }, | |
1736 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0x
ff, 0x00, 0x00 }, | |
1737 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, | |
1738 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x
00, 0x4e, 0x00 }; | |
1739 | |
1740 static const UChar | |
1741 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, | |
1742 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, | |
1743 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0x
fffd }, | |
1744 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; | |
1745 | |
1746 static const int32_t | |
1747 offsets1[]={ 4, 4, 8 }, | |
1748 offsets2[]={ 4, 4, 8 }, | |
1749 offsets3[]={ 0, 4, 4, 8, 12 }, | |
1750 offsets4[]={ 0, 0, 4, 8 }; | |
1751 | |
1752 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-32", | |
1753 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) | |
1754 ) { | |
1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); | |
1756 } | |
1757 | |
1758 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-32", | |
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
1760 ) { | |
1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); | |
1762 } | |
1763 | |
1764 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-32", | |
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) | |
1766 ) { | |
1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); | |
1768 } | |
1769 | |
1770 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4
), "UTF-32", | |
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL,
0) | |
1772 ) { | |
1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not matc
h.\n"); | |
1774 } | |
1775 } | |
1776 } | |
1777 | |
1778 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) | |
1779 { | |
1780 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
1781 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
1782 | |
1783 const uint8_t expsubwvalIBM_949[]= { | |
1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2, | |
1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; | |
1786 | |
1787 const uint8_t expsubwvalIBM_943[]= { | |
1788 0x9f, 0xaf, 0x9f, 0xb1, | |
1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; | |
1790 | |
1791 const uint8_t expsubwvalIBM_930[] = { | |
1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5,
0x0e, 0x46, 0x6b, 0x0f }; | |
1793 | |
1794 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; | |
1795 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; | |
1796 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }
; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ | |
1797 | |
1798 gInBufferSize = inputsize; | |
1799 gOutBufferSize = outputsize; | |
1800 | |
1801 /*from Unicode*/ | |
1802 | |
1803 #if !UCONFIG_NO_LEGACY_CONVERSION | |
1804 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
1805 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", | |
1806 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) | |
1807 log_err("u-> ibm-949 with subst with value did not match.\n"); | |
1808 | |
1809 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1810 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", | |
1811 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) | |
1812 log_err("u-> ibm-943 with sub with value did not match.\n"); | |
1813 | |
1814 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
1815 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", | |
1816 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) | |
1817 log_err("u-> ibm-930 with subst with value did not match.\n"); | |
1818 | |
1819 | |
1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); | |
1821 { | |
1822 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
1823 static const uint8_t toIBM943[]= { 0x61, | |
1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1827 0x61 }; | |
1828 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3
, 3, 3, 3, 3, 3, 4}; | |
1829 | |
1830 | |
1831 /* EUC_JP*/ | |
1832 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2, }; | |
1833 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1837 0x61, 0x8e, 0xe0, | |
1838 }; | |
1839 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, | |
1840 3, 3, 3, 3, 3, 3, | |
1841 3, 3, 3, 3, 3, 3, | |
1842 5, 5, 5, 5, 5, 5, | |
1843 6, 7, 7, | |
1844 }; | |
1845 | |
1846 /*EUC_TW*/ | |
1847 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
1848 static const uint8_t to_euc_tw[]={ | |
1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
1853 0x61, 0xe6, 0xca, 0x8a, | |
1854 }; | |
1855 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, | |
1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, | |
1857 6, 7, 7, 8, | |
1858 }; | |
1859 /*ISO-2022-JP*/ | |
1860 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x0
0E9, 0x0042} ; | |
1861 static const uint8_t to_iso_2022_jp1[]={ | |
1862 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
| |
1864 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
1866 0x42, | |
1867 }; | |
1868 | |
1869 static const int32_t from_iso_2022_jpOffs1 [] ={ | |
1870 0,0,0,0,0, | |
1871 1,1,1,1,1,1,1,1,1, | |
1872 2,2,2,2,2, | |
1873 3,3,3,3,3,3,3,3,3, | |
1874 4, | |
1875 }; | |
1876 /* surrogate pair*/ | |
1877 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042} ; | |
1878 static const uint8_t to_iso_2022_jp2[]={ | |
1879 0x1b, 0x24, 0x42, 0x21, 0x21, | |
1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, | |
1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1882 0x1b, 0x24, 0x42, 0x21, 0x22, | |
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, | |
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1885 0x42, | |
1886 }; | |
1887 static const int32_t from_iso_2022_jpOffs2 [] ={ | |
1888 0,0,0,0,0, | |
1889 1,1,1,1,1,1,1,1,1, | |
1890 1,1,1,1,1,1, | |
1891 3,3,3,3,3, | |
1892 4,4,4,4,4,4,4,4,4, | |
1893 4,4,4,4,4,4, | |
1894 6, | |
1895 }; | |
1896 | |
1897 /*ISO-2022-cn*/ | |
1898 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
1899 static const uint8_t to_iso_2022_cn[]={ | |
1900 0x41, | |
1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, | |
1902 0x42, | |
1903 }; | |
1904 static const int32_t from_iso_2022_cnOffs [] ={ | |
1905 0, | |
1906 1,1,1,1,1,1, | |
1907 2, | |
1908 }; | |
1909 | |
1910 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042}; | |
1911 | |
1912 static const uint8_t to_iso_2022_cn4[]={ | |
1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, | |
1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, | |
1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1916 0x0e, 0x21, 0x22, | |
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, | |
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1919 0x42, | |
1920 }; | |
1921 static const int32_t from_iso_2022_cnOffs4 [] ={ | |
1922 0,0,0,0,0,0,0, | |
1923 1,1,1,1,1,1,1, | |
1924 1,1,1,1,1,1, | |
1925 3,3,3, | |
1926 4,4,4,4,4,4,4, | |
1927 4,4,4,4,4,4, | |
1928 6 | |
1929 | |
1930 }; | |
1931 | |
1932 /*ISO-2022-kr*/ | |
1933 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xD
C56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
1934 static const uint8_t to_iso_2022_kr2[]={ | |
1935 0x1b, 0x24, 0x29, 0x43, | |
1936 0x41, | |
1937 0x0e, 0x25, 0x50, | |
1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1940 0x0e, 0x25, 0x50, | |
1941 0x0f, 0x42, | |
1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
1944 0x43 | |
1945 }; | |
1946 static const int32_t from_iso_2022_krOffs2 [] ={ | |
1947 -1,-1,-1,-1, | |
1948 0, | |
1949 1,1,1, | |
1950 2,2,2,2,2,2,2, | |
1951 2,2,2,2,2,2, | |
1952 4,4,4, | |
1953 5,5, | |
1954 6,6,6,6,6,6, | |
1955 6,6,6,6,6,6, | |
1956 8, | |
1957 }; | |
1958 | |
1959 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; | |
1960 static const uint8_t to_iso_2022_kr[]={ | |
1961 0x1b, 0x24, 0x29, 0x43, | |
1962 0x41, | |
1963 0x0e, 0x25, 0x50, | |
1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*
/ | |
1965 0x0e, 0x25, 0x50, | |
1966 0x0f, 0x42, | |
1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
1968 0x43 | |
1969 }; | |
1970 | |
1971 | |
1972 static const int32_t from_iso_2022_krOffs [] ={ | |
1973 -1,-1,-1,-1, | |
1974 0, | |
1975 1,1,1, | |
1976 2,2,2,2,2,2,2, | |
1977 3,3,3, | |
1978 4,4, | |
1979 5,5,5,5,5,5, | |
1980 6, | |
1981 }; | |
1982 /* HZ encoding */ | |
1983 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
1984 | |
1985 static const uint8_t to_hz[]={ | |
1986 0x7e, 0x7d, 0x41, | |
1987 0x7e, 0x7b, 0x26, 0x30, | |
1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*una
ssigned*/ | |
1989 0x7e, 0x7b, 0x26, 0x30, | |
1990 0x7e, 0x7d, 0x42, | |
1991 | |
1992 }; | |
1993 static const int32_t from_hzOffs [] ={ | |
1994 0,0,0, | |
1995 1,1,1,1, | |
1996 2,2,2,2,2,2,2,2, | |
1997 3,3,3,3, | |
1998 4,4,4 | |
1999 }; | |
2000 | |
2001 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unas
signed*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
2002 static const uint8_t to_hz2[]={ | |
2003 0x7e, 0x7d, 0x41, | |
2004 0x7e, 0x7b, 0x26, 0x30, | |
2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2007 0x7e, 0x7b, 0x26, 0x30, | |
2008 0x7e, 0x7d, 0x42, | |
2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2011 0x43 | |
2012 }; | |
2013 static const int32_t from_hzOffs2 [] ={ | |
2014 0,0,0, | |
2015 1,1,1,1, | |
2016 2,2,2,2,2,2,2,2, | |
2017 2,2,2,2,2,2, | |
2018 4,4,4,4, | |
2019 5,5,5, | |
2020 6,6,6,6,6,6, | |
2021 6,6,6,6,6,6, | |
2022 8, | |
2023 }; | |
2024 | |
2025 /*ISCII*/ | |
2026 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned
*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; | |
2027 static const uint8_t to_iscii[]={ | |
2028 0x41, | |
2029 0xef, 0x42, 0xa1, | |
2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2031 0xa2, | |
2032 0x42, | |
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
2034 0x43 | |
2035 }; | |
2036 | |
2037 | |
2038 static const int32_t from_isciiOffs [] ={ | |
2039 0, | |
2040 1,1,1, | |
2041 2,2,2,2,2,2, | |
2042 3, | |
2043 4, | |
2044 5,5,5,5,5,5, | |
2045 6, | |
2046 }; | |
2047 | |
2048 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
2049 toIBM943, sizeof(toIBM943), "ibm-943", | |
2050 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) | |
2051 log_err("u-> ibm-943 with subst with value did not match.\n"); | |
2052 | |
2053 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
2054 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
2055 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) | |
2056 log_err("u-> euc-jp with subst with value did not match.\n"); | |
2057 | |
2058 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
2059 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
2060 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) | |
2061 log_err("u-> euc-tw with subst with value did not match.\n"); | |
2062 | |
2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), | |
2064 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2065 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2066 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2067 | |
2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), | |
2069 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
2070 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
2071 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2072 | |
2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_in
putText2)/sizeof(iso_2022_jp_inputText2[0]), | |
2074 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
2075 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) | |
2076 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
2077 /*ESCAPE OPTIONS*/ | |
2078 { | |
2079 /* surrogate pair*/ | |
2080 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; | |
2081 static const uint8_t to_iso_2022_jp3_v2[]={ | |
2082 0x1b, 0x24, 0x42, 0x21, 0x21, | |
2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, | |
2084 | |
2085 0x1b, 0x24, 0x42, 0x21, 0x22, | |
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, | |
2087 | |
2088 0x42, | |
2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b
, | |
2090 }; | |
2091 | |
2092 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ | |
2093 0,0,0,0,0, | |
2094 1,1,1,1,1,1,1,1,1,1,1,1, | |
2095 | |
2096 3,3,3,3,3, | |
2097 4,4,4,4,4,4,4,4,4,4,4,4, | |
2098 | |
2099 6, | |
2100 7,7,7,7,7,7,7,7,7 | |
2101 }; | |
2102 | |
2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof
(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), | |
2104 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp
", | |
2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL,
0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not
match.\n"); | |
2107 } | |
2108 { | |
2109 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2110 static const uint8_t to_iso_2022_cn5_v2[]={ | |
2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, | |
2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, | |
2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2114 0x0e, 0x21, 0x22, | |
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, | |
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
2117 0x42, | |
2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, | |
2119 }; | |
2120 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ | |
2121 0,0,0,0,0,0,0, | |
2122 1,1,1,1,1,1,1, | |
2123 1,1,1,1,1,1, | |
2124 3,3,3, | |
2125 4,4,4,4,4,4,4, | |
2126 4,4,4,4,4,4, | |
2127 6, | |
2128 7,7,7,7,7,7 | |
2129 }; | |
2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof
(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), | |
2131 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", | |
2132 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,U
CNV_ESCAPE_JAVA,U_ZERO_ERROR )) | |
2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not mat
ch.\n"); | |
2134 | |
2135 } | |
2136 { | |
2137 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2138 static const uint8_t to_iso_2022_cn6_v2[]={ | |
2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, | |
2141 0x0e, 0x21, 0x22, | |
2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, | |
2143 0x42, | |
2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30,
0x32, 0x7d | |
2145 }; | |
2146 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ | |
2147 0, 0, 0, 0, 0, 0, 0, | |
2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
2149 3, 3, 3, | |
2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
2151 6, | |
2152 7, 7, 7, 7, 7, 7, 7, 7, | |
2153 }; | |
2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof
(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), | |
2155 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", | |
2156 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,U
CNV_ESCAPE_UNICODE,U_ZERO_ERROR )) | |
2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not
match.\n"); | |
2158 | |
2159 } | |
2160 { | |
2161 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
2162 static const uint8_t to_iso_2022_cn7_v2[]={ | |
2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2165 0x0e, 0x21, 0x22, | |
2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30,
0x32, | |
2168 }; | |
2169 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ | |
2170 0, 0, 0, 0, 0, 0, 0, | |
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, | |
2172 3, 3, 3, | |
2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, | |
2174 6, | |
2175 7, 7, 7, 7, 7, 7, | |
2176 }; | |
2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof
(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), | |
2178 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", | |
2179 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"
K" ,U_ZERO_ERROR )) | |
2180 log_err("u-> iso-2022-cn with sub & K did not match.\n"); | |
2181 | |
2182 } | |
2183 { | |
2184 static const UChar iso_2022_cn_inputText8[]={ | |
2185 0x3000, | |
2186 0xD84D, 0xDC56, | |
2187 0x3001, | |
2188 0xD84D, 0xDC56, | |
2189 0xDBFF, 0xDFFF, | |
2190 0x0042, | |
2191 0x0902}; | |
2192 static const uint8_t to_iso_2022_cn8_v2[]={ | |
2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, | |
2195 0x0e, 0x21, 0x22, | |
2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, | |
2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46,
0x46, 0x20, | |
2198 0x42, | |
2199 0x5c, 0x39, 0x30, 0x32, 0x20 | |
2200 }; | |
2201 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ | |
2202 0, 0, 0, 0, 0, 0, 0, | |
2203 1, 1, 1, 1, 1, 1, 1, 1, | |
2204 3, 3, 3, | |
2205 4, 4, 4, 4, 4, 4, 4, 4, | |
2206 6, 6, 6, 6, 6, 6, 6, 6, | |
2207 8, | |
2208 9, 9, 9, 9, 9 | |
2209 }; | |
2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof
(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), | |
2211 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", | |
2212 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,U
CNV_ESCAPE_CSS2,U_ZERO_ERROR )) | |
2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not mat
ch.\n"); | |
2214 | |
2215 } | |
2216 { | |
2217 static const uint8_t to_iso_2022_cn4_v3[]={ | |
2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21
, | |
2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, | |
2220 0x0e, 0x21, 0x22, | |
2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, | |
2222 0x42 | |
2223 }; | |
2224 | |
2225 | |
2226 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ | |
2227 0,0,0,0,0,0,0, | |
2228 1,1,1,1,1,1,1,1,1,1,1, | |
2229 | |
2230 3,3,3, | |
2231 4,4,4,4,4,4,4,4,4,4,4, | |
2232 | |
2233 6 | |
2234 | |
2235 }; | |
2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof
(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), | |
2237 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", | |
2238 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,U
CNV_ESCAPE_C,U_ZERO_ERROR )) | |
2239 { | |
2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match
.\n"); | |
2241 } | |
2242 } | |
2243 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
2244 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) | |
2246 log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2247 | |
2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_in
putText4)/sizeof(iso_2022_cn_inputText4[0]), | |
2249 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", | |
2250 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) | |
2251 log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
2252 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
2253 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) | |
2255 log_err("u-> iso_2022_kr with subst with value did not match.\n"); | |
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_in
putText2)/sizeof(iso_2022_kr_inputText2[0]), | |
2257 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", | |
2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) | |
2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); | |
2260 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
2261 to_hz, sizeof(to_hz), "HZ", | |
2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) | |
2263 log_err("u-> hz with subst with value did not match.\n"); | |
2264 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(h
z_inputText2[0]), | |
2265 to_hz2, sizeof(to_hz2), "HZ", | |
2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) | |
2267 log_err("u-> hz with subst with value did not match.\n"); | |
2268 | |
2269 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
2270 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
2271 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) | |
2272 log_err("u-> iscii with subst with value did not match.\n"); | |
2273 } | |
2274 #endif | |
2275 | |
2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); | |
2277 /*to Unicode*/ | |
2278 { | |
2279 #if !UCONFIG_NO_LEGACY_CONVERSION | |
2280 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, | |
2281 0x81, 0xad, /*unassigned*/ | |
2282 0x89, 0xd3 }; | |
2283 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, | |
2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, | |
2285 0x7B87}; | |
2286 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3
, 3, 5}; | |
2287 | |
2288 /* EUC_JP*/ | |
2289 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
2290 0x8f, 0xda, 0xa1, /*unassigned*/ | |
2291 0x8e, 0xe0, | |
2292 }; | |
2293 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, | |
2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x
31, | |
2295 0x00a2 }; | |
2296 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, | |
2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
2298 9, | |
2299 }; | |
2300 | |
2301 /*EUC_TW*/ | |
2302 static const uint8_t sampleTxt_euc_tw[]={ | |
2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
2305 0xe6, 0xca, 0x8a, | |
2306 }; | |
2307 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, | |
2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0
x42, 0x25, 0x58, 0x43, 0x43, | |
2309 0x8706, 0x8a, }; | |
2310 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, | |
2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | |
2312 11, 13}; | |
2313 | |
2314 /*iso-2022-jp*/ | |
2315 static const uint8_t sampleTxt_iso_2022_jp[]={ | |
2316 0x1b, 0x28, 0x42, 0x41, | |
2317 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
2318 0x1b, 0x28, 0x42, 0x42, | |
2319 | |
2320 }; | |
2321 /* A % X 3 A
% X 1 A B */ | |
2322 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,
0x25,0x58,0x31,0x41, 0x42 }; | |
2323 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7,
7, 7, 7, 7, 12 }; | |
2324 | |
2325 /*iso-2022-cn*/ | |
2326 static const uint8_t sampleTxt_iso_2022_cn[]={ | |
2327 0x0f, 0x41, 0x44, | |
2328 0x1B, 0x24, 0x29, 0x47, | |
2329 0x0E, 0x40, 0x6c, /*unassigned*/ | |
2330 0x0f, 0x42, | |
2331 | |
2332 }; | |
2333 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34
,0x30,0x25,0x58,0x36,0x43,0x42 }; | |
2334 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8,
8, 8, 8, 8, 8, 11 }; | |
2335 | |
2336 /*iso-2022-kr*/ | |
2337 static const uint8_t sampleTxt_iso_2022_kr[]={ | |
2338 0x1b, 0x24, 0x29, 0x43, | |
2339 0x41, | |
2340 0x0E, 0x7f, 0x1E, | |
2341 0x0e, 0x25, 0x50, | |
2342 0x0f, 0x51, | |
2343 0x42, 0x43, | |
2344 | |
2345 }; | |
2346 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46
,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; | |
2347 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6,
6, 6, 6, 6, 9, 12, 13 , 14 }; | |
2348 | |
2349 /*hz*/ | |
2350 static const uint8_t sampleTxt_hz[]={ | |
2351 0x41, | |
2352 0x7e, 0x7b, 0x26, 0x30, | |
2353 0x7f, 0x1E, /*unassigned*/ | |
2354 0x26, 0x30, | |
2355 0x7e, 0x7d, 0x42, | |
2356 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
2357 0x7e, 0x7d, 0x42, | |
2358 }; | |
2359 static const UChar hztoUnicode[]={ | |
2360 0x41, | |
2361 0x03a0, | |
2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2363 0x03A0, | |
2364 0x42, | |
2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
2366 0x42,}; | |
2367 | |
2368 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,
14,14,14,14,14,18, }; | |
2369 | |
2370 | |
2371 /*iscii*/ | |
2372 static const uint8_t sampleTxt_iscii[]={ | |
2373 0x41, | |
2374 0x30, | |
2375 0xEB, /*unassigned*/ | |
2376 0xa3, | |
2377 0x42, | |
2378 0xEC, /*unassigned*/ | |
2379 0x42, | |
2380 }; | |
2381 static const UChar isciitoUnicode[]={ | |
2382 0x41, | |
2383 0x30, | |
2384 0x25, 0x58, 0x45, 0x42, | |
2385 0x0903, | |
2386 0x42, | |
2387 0x25, 0x58, 0x45, 0x43, | |
2388 0x42,}; | |
2389 | |
2390 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; | |
2391 #endif | |
2392 | |
2393 /*UTF8*/ | |
2394 static const uint8_t sampleTxtUTF8[]={ | |
2395 0x20, 0x64, 0x50, | |
2396 0xC2, 0x7E, /* truncated char */ | |
2397 0x20, | |
2398 0xE0, 0xB5, 0x7E, /* truncated char */ | |
2399 0x40, | |
2400 }; | |
2401 static const UChar UTF8ToUnicode[]={ | |
2402 0x0020, 0x0064, 0x0050, | |
2403 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ | |
2404 0x0020, | |
2405 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x00
7E, | |
2406 0x0040 | |
2407 }; | |
2408 static const int32_t fromUTF8[] = { | |
2409 0, 1, 2, | |
2410 3, 3, 3, 3, 4, | |
2411 5, | |
2412 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2413 9 | |
2414 }; | |
2415 static const UChar UTF8ToUnicodeXML_DEC[]={ | |
2416 0x0020, 0x0064, 0x0050, | |
2417 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~
*/ | |
2418 0x0020, | |
2419 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x00
31, 0x0038, 0x0031, 0x003B, 0x007E, | |
2420 0x0040 | |
2421 }; | |
2422 static const int32_t fromUTF8XML_DEC[] = { | |
2423 0, 1, 2, | |
2424 3, 3, 3, 3, 3, 3, 4, | |
2425 5, | |
2426 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
2427 9 | |
2428 }; | |
2429 | |
2430 | |
2431 #if !UCONFIG_NO_LEGACY_CONVERSION | |
2432 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), | |
2433 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnic
ode[0]),"ibm-943", | |
2434 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) | |
2435 log_err("ibm-943->u with substitute with value did not match.\n"); | |
2436 | |
2437 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), | |
2438 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode
[0]),"IBM-eucJP", | |
2439 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) | |
2440 log_err("euc-jp->u with substitute with value did not match.\n"); | |
2441 | |
2442 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
2443 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
2444 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) | |
2445 log_err("euc-tw->u with substitute with value did not match.\n"); | |
2446 | |
2447 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), | |
2448 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
2449 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) | |
2450 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); | |
2451 | |
2452 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sample
Txt_iso_2022_jp), | |
2453 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
2454 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_Z
ERO_ERROR)) | |
2455 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); | |
2456 | |
2457 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ | |
2458 { | |
2459 static const UChar iso_2022_jptoUnicodeDec[]={ | |
2460 0x0041, | |
2461 /* & # 5
8 ; */ | |
2462 0x0026, 0x0023, 0x0035,
0x0038, 0x003b, | |
2463 0x0026, 0x0023, 0x0032,
0x0036, 0x003b, | |
2464 0x0042 }; | |
2465 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7
,7,7,7,7,12, }; | |
2466 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
2467 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2468 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,
UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
2469 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_DEC did not match.\n"); | |
2470 } | |
2471 { | |
2472 static const UChar iso_2022_jptoUnicodeHex[]={ | |
2473 0x0041, | |
2474 /* & # x 3
A ; */ | |
2475 0x0026, 0x0023, 0x0078, 0x0033
, 0x0041, 0x003b, | |
2476 0x0026, 0x0023, 0x0078, 0x0031
, 0x0041, 0x003b, | |
2477 0x0042 }; | |
2478 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,
7,7,7,7,7,7,7,12 }; | |
2479 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
2480 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2481 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,
UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) | |
2482 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_HEX did not match.\n"); | |
2483 } | |
2484 { | |
2485 static const UChar iso_2022_jptoUnicodeC[]={ | |
2486 0x0041, | |
2487 0x005C, 0x0078, 0x0033, 0x0041,
/* \x3A */ | |
2488 0x005C, 0x0078, 0x0031, 0x0041,
/* \x1A */ | |
2489 0x0042 }; | |
2490 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; | |
2491 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
2492 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof
(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
2493 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UC
NV_ESCAPE_C,U_ZERO_ERROR )) | |
2494 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_C did not match.\n"); | |
2495 } | |
2496 } | |
2497 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), | |
2498 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", | |
2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) | |
2500 log_err("iso-2022-cn->u with substitute with value did not match.\n"
); | |
2501 | |
2502 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), | |
2503 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", | |
2504 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) | |
2505 log_err("iso-2022-kr->u with substitute with value did not match.\n"
); | |
2506 | |
2507 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
2508 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
2509 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) | |
2510 log_err("hz->u with substitute with value did not match.\n"); | |
2511 | |
2512 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
2513 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", | |
2514 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) | |
2515 log_err("ISCII ->u with substitute with value did not match.\n"); | |
2516 #endif | |
2517 | |
2518 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), | |
2519 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"U
TF-8", | |
2520 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) | |
2521 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); | |
2522 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8)
, | |
2523 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8To
UnicodeXML_DEC[0]),"UTF-8", | |
2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE
_XML_DEC, U_ZERO_ERROR)) | |
2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); | |
2526 } | |
2527 } | |
2528 | |
2529 #if !UCONFIG_NO_LEGACY_CONVERSION | |
2530 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) | |
2531 { | |
2532 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
2533 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0
xd3 }; | |
2534 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; | |
2535 | |
2536 | |
2537 static const uint8_t text943[] = { | |
2538 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; | |
2539 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,
0x5b57 }; | |
2540 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b
57 }; | |
2541 static const UChar toUnicode943stop[]= { 0x304b}; | |
2542 | |
2543 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; | |
2544 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; | |
2545 static const int32_t fromIBM943Offsstop[] = { 0}; | |
2546 | |
2547 gInBufferSize = inputsize; | |
2548 gOutBufferSize = outputsize; | |
2549 /*checking with a legal value*/ | |
2550 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0])
, | |
2551 templegal949, sizeof(templegal949), "ibm-949", | |
2552 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) | |
2553 log_err("u-> ibm-949 with skip did not match.\n"); | |
2554 | |
2555 /*checking illegal value for ibm-943 with substitute*/ | |
2556 if(!testConvertToUnicode(text943, sizeof(text943), | |
2557 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", | |
2558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2559 log_err("ibm-943->u with subst did not match.\n"); | |
2560 /*checking illegal value for ibm-943 with skip */ | |
2561 if(!testConvertToUnicode(text943, sizeof(text943), | |
2562 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[
0]),"ibm-943", | |
2563 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) | |
2564 log_err("ibm-943->u with skip did not match.\n"); | |
2565 | |
2566 /*checking illegal value for ibm-943 with stop */ | |
2567 if(!testConvertToUnicode(text943, sizeof(text943), | |
2568 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[
0]),"ibm-943", | |
2569 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) | |
2570 log_err("ibm-943->u with stop did not match.\n"); | |
2571 | |
2572 } | |
2573 | |
2574 static void TestSingleByte(int32_t inputsize, int32_t outputsize) | |
2575 { | |
2576 static const uint8_t sampleText[] = { | |
2577 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, | |
2578 0xff, 0x32, 0x33}; | |
2579 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1
a, 0x1a, 0x0032, 0x0033 }; | |
2580 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; | |
2581 /*checking illegal value for ibm-943 with substitute*/ | |
2582 gInBufferSize = inputsize; | |
2583 gOutBufferSize = outputsize; | |
2584 | |
2585 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
2586 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", | |
2587 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
2588 log_err("ibm-943->u with subst did not match.\n"); | |
2589 } | |
2590 | |
2591 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) | |
2592 { | |
2593 /*EBCDIC_STATEFUL*/ | |
2594 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x
6d65, 0x0061 }; | |
2595 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1,
0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; | |
2596 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2,
3, 4, 4, 4, 5, 5 }; | |
2597 /* s SO doubl SI sng s SO
fe fe SI s */ | |
2598 | |
2599 /*EBCDIC_STATEFUL with subChar=3f*/ | |
2600 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0
x62, 0xb1, 0x3f, 0x62 }; | |
2601 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2
, 2, 3, 4, 5 }; | |
2602 static const char mySubChar[]={ 0x3f}; | |
2603 | |
2604 gInBufferSize = inputsize; | |
2605 gOutBufferSize = outputsize; | |
2606 | |
2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), | |
2608 toIBM930, sizeof(toIBM930), "ibm-930", | |
2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) | |
2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); | |
2611 | |
2612 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), | |
2613 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", | |
2614 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) | |
2615 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) di
d not match.\n"); | |
2616 } | |
2617 #endif | |
2618 | |
2619 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t
*expect, int expectLen, | |
2620 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, | |
2621 const char *mySubChar, int8_t len) | |
2622 { | |
2623 | |
2624 | |
2625 UErrorCode status = U_ZERO_ERROR; | |
2626 UConverter *conv = 0; | |
2627 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2628 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
2629 const UChar *src; | |
2630 char *end; | |
2631 char *targ; | |
2632 int32_t *offs; | |
2633 int i; | |
2634 int32_t realBufferSize; | |
2635 char *realBufferEnd; | |
2636 const UChar *realSourceEnd; | |
2637 const UChar *sourceLimit; | |
2638 UBool checkOffsets = TRUE; | |
2639 UBool doFlush; | |
2640 char junk[9999]; | |
2641 char offset_str[9999]; | |
2642 char *p; | |
2643 UConverterFromUCallback oldAction = NULL; | |
2644 const void* oldContext = NULL; | |
2645 | |
2646 | |
2647 for(i=0;i<NEW_MAX_BUFFER;i++) | |
2648 junkout[i] = (char)0xF0; | |
2649 for(i=0;i<NEW_MAX_BUFFER;i++) | |
2650 junokout[i] = 0xFF; | |
2651 setNuConvTestName(codepage, "FROM"); | |
2652 | |
2653 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, | |
2654 gOutBufferSize); | |
2655 | |
2656 conv = ucnv_open(codepage, &status); | |
2657 if(U_FAILURE(status)) | |
2658 { | |
2659 log_data_err("Couldn't open converter %s\n",codepage); | |
2660 return TRUE; | |
2661 } | |
2662 | |
2663 log_verbose("Converter opened..\n"); | |
2664 | |
2665 /*----setting the callback routine----*/ | |
2666 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &statu
s); | |
2667 if (U_FAILURE(status)) | |
2668 { | |
2669 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
2670 } | |
2671 /*------------------------*/ | |
2672 /*setting the subChar*/ | |
2673 if(mySubChar != NULL){ | |
2674 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2675 if (U_FAILURE(status)) { | |
2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
2677 } | |
2678 } | |
2679 /*------------*/ | |
2680 | |
2681 src = source; | |
2682 targ = junkout; | |
2683 offs = junokout; | |
2684 | |
2685 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2686 realBufferEnd = junkout + realBufferSize; | |
2687 realSourceEnd = source + sourceLen; | |
2688 | |
2689 if ( gOutBufferSize != realBufferSize ) | |
2690 checkOffsets = FALSE; | |
2691 | |
2692 if( gInBufferSize != NEW_MAX_BUFFER ) | |
2693 checkOffsets = FALSE; | |
2694 | |
2695 do | |
2696 { | |
2697 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
2698 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
2699 | |
2700 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
2701 | |
2702 if(targ == realBufferEnd) | |
2703 { | |
2704 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); | |
2705 return FALSE; | |
2706 } | |
2707 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
2708 | |
2709 | |
2710 status = U_ZERO_ERROR; | |
2711 | |
2712 ucnv_fromUnicode (conv, | |
2713 (char **)&targ, | |
2714 (const char *)end, | |
2715 &src, | |
2716 sourceLimit, | |
2717 checkOffsets ? offs : NULL, | |
2718 doFlush, /* flush if we're at the end of the input data */ | |
2719 &status); | |
2720 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); | |
2721 | |
2722 | |
2723 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2724 UChar errChars[50]; /* should be sufficient */ | |
2725 int8_t errLen = 50; | |
2726 UErrorCode err = U_ZERO_ERROR; | |
2727 const UChar* start= NULL; | |
2728 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); | |
2729 if(U_FAILURE(err)){ | |
2730 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName
(err)); | |
2731 } | |
2732 /* length of in invalid chars should be equal to returned length*/ | |
2733 start = src - errLen; | |
2734 if(u_strncmp(errChars,start,errLen)!=0){ | |
2735 log_err("ucnv_getInvalidUChars did not return the correct invalid ch
ars for encoding %s \n", ucnv_getName(conv,&err)); | |
2736 } | |
2737 } | |
2738 /* allow failure codes for the stop callback */ | |
2739 if(U_FAILURE(status) && | |
2740 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND
&& status != U_ILLEGAL_CHAR_FOUND))) | |
2741 { | |
2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
2743 return FALSE; | |
2744 } | |
2745 | |
2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
2747 sourceLen, targ-junkout); | |
2748 if(getTestOption(VERBOSITY_OPTION)) | |
2749 { | |
2750 | |
2751 junk[0] = 0; | |
2752 offset_str[0] = 0; | |
2753 for(p = junkout;p<targ;p++) | |
2754 { | |
2755 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
2756 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); | |
2757 } | |
2758 | |
2759 log_verbose(junk); | |
2760 printSeq(expect, expectLen); | |
2761 if ( checkOffsets ) | |
2762 { | |
2763 log_verbose("\nOffsets:"); | |
2764 log_verbose(offset_str); | |
2765 } | |
2766 log_verbose("\n"); | |
2767 } | |
2768 ucnv_close(conv); | |
2769 | |
2770 | |
2771 if(expectLen != targ-junkout) | |
2772 { | |
2773 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); | |
2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); | |
2775 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
2776 printSeqErr(expect, expectLen); | |
2777 return FALSE; | |
2778 } | |
2779 | |
2780 if (checkOffsets && (expectOffsets != 0) ) | |
2781 { | |
2782 log_verbose("comparing %d offsets..\n", targ-junkout); | |
2783 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
2784 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
2785 log_err("Got Output : "); | |
2786 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
2787 log_err("Got Offsets: "); | |
2788 for(p=junkout;p<targ;p++) | |
2789 log_err("%d,", junokout[p-junkout]); | |
2790 log_err("\n"); | |
2791 log_err("Expected Offsets: "); | |
2792 for(i=0; i<(targ-junkout); i++) | |
2793 log_err("%d,", expectOffsets[i]); | |
2794 log_err("\n"); | |
2795 return FALSE; | |
2796 } | |
2797 } | |
2798 | |
2799 if(!memcmp(junkout, expect, expectLen)) | |
2800 { | |
2801 log_verbose("String matches! %s\n", gNuConvTestName); | |
2802 return TRUE; | |
2803 } | |
2804 else | |
2805 { | |
2806 log_err("String does not match. %s\n", gNuConvTestName); | |
2807 log_err("source: "); | |
2808 printUSeqErr(source, sourceLen); | |
2809 log_err("Got: "); | |
2810 printSeqErr((const uint8_t *)junkout, expectLen); | |
2811 log_err("Expected: "); | |
2812 printSeqErr(expect, expectLen); | |
2813 return FALSE; | |
2814 } | |
2815 } | |
2816 | |
2817 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *e
xpect, int expectlen, | |
2818 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, | |
2819 const char *mySubChar, int8_t len) | |
2820 { | |
2821 UErrorCode status = U_ZERO_ERROR; | |
2822 UConverter *conv = 0; | |
2823 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
2824 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
2825 const char *src; | |
2826 const char *realSourceEnd; | |
2827 const char *srcLimit; | |
2828 UChar *targ; | |
2829 UChar *end; | |
2830 int32_t *offs; | |
2831 int i; | |
2832 UBool checkOffsets = TRUE; | |
2833 char junk[9999]; | |
2834 char offset_str[9999]; | |
2835 UChar *p; | |
2836 UConverterToUCallback oldAction = NULL; | |
2837 const void* oldContext = NULL; | |
2838 | |
2839 int32_t realBufferSize; | |
2840 UChar *realBufferEnd; | |
2841 | |
2842 | |
2843 for(i=0;i<NEW_MAX_BUFFER;i++) | |
2844 junkout[i] = 0xFFFE; | |
2845 | |
2846 for(i=0;i<NEW_MAX_BUFFER;i++) | |
2847 junokout[i] = -1; | |
2848 | |
2849 setNuConvTestName(codepage, "TO"); | |
2850 | |
2851 log_verbose("\n========= %s\n", gNuConvTestName); | |
2852 | |
2853 conv = ucnv_open(codepage, &status); | |
2854 if(U_FAILURE(status)) | |
2855 { | |
2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
2857 return TRUE; | |
2858 } | |
2859 | |
2860 log_verbose("Converter opened..\n"); | |
2861 | |
2862 src = (const char *)source; | |
2863 targ = junkout; | |
2864 offs = junokout; | |
2865 | |
2866 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
2867 realBufferEnd = junkout + realBufferSize; | |
2868 realSourceEnd = src + sourcelen; | |
2869 /*----setting the callback routine----*/ | |
2870 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status)
; | |
2871 if (U_FAILURE(status)) | |
2872 { | |
2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
2874 } | |
2875 /*-------------------------------------*/ | |
2876 /*setting the subChar*/ | |
2877 if(mySubChar != NULL){ | |
2878 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
2879 if (U_FAILURE(status)) { | |
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
2881 } | |
2882 } | |
2883 /*------------*/ | |
2884 | |
2885 | |
2886 if ( gOutBufferSize != realBufferSize ) | |
2887 checkOffsets = FALSE; | |
2888 | |
2889 if( gInBufferSize != NEW_MAX_BUFFER ) | |
2890 checkOffsets = FALSE; | |
2891 | |
2892 do | |
2893 { | |
2894 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
2895 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
2896 | |
2897 if(targ == realBufferEnd) | |
2898 { | |
2899 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
2900 return FALSE; | |
2901 } | |
2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
2903 | |
2904 | |
2905 | |
2906 status = U_ZERO_ERROR; | |
2907 | |
2908 ucnv_toUnicode (conv, | |
2909 &targ, | |
2910 end, | |
2911 (const char **)&src, | |
2912 (const char *)srcLimit, | |
2913 checkOffsets ? offs : NULL, | |
2914 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ | |
2915 &status); | |
2916 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ | |
2917 | |
2918 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
2919 char errChars[50]; /* should be sufficient */ | |
2920 int8_t errLen = 50; | |
2921 UErrorCode err = U_ZERO_ERROR; | |
2922 const char* start= NULL; | |
2923 ucnv_getInvalidChars(conv,errChars, &errLen, &err); | |
2924 if(U_FAILURE(err)){ | |
2925 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(
err)); | |
2926 } | |
2927 /* length of in invalid chars should be equal to returned length*/ | |
2928 start = src - errLen; | |
2929 if(uprv_strncmp(errChars,start,errLen)!=0){ | |
2930 log_err("ucnv_getInvalidChars did not return the correct invalid cha
rs for encoding %s \n", ucnv_getName(conv,&err)); | |
2931 } | |
2932 } | |
2933 /* allow failure codes for the stop callback */ | |
2934 if(U_FAILURE(status) && | |
2935 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND &
& status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) | |
2936 { | |
2937 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
2938 return FALSE; | |
2939 } | |
2940 | |
2941 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
2942 sourcelen, targ-junkout); | |
2943 if(getTestOption(VERBOSITY_OPTION)) | |
2944 { | |
2945 | |
2946 junk[0] = 0; | |
2947 offset_str[0] = 0; | |
2948 | |
2949 for(p = junkout;p<targ;p++) | |
2950 { | |
2951 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); | |
2952 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); | |
2953 } | |
2954 | |
2955 log_verbose(junk); | |
2956 printUSeq(expect, expectlen); | |
2957 if ( checkOffsets ) | |
2958 { | |
2959 log_verbose("\nOffsets:"); | |
2960 log_verbose(offset_str); | |
2961 } | |
2962 log_verbose("\n"); | |
2963 } | |
2964 ucnv_close(conv); | |
2965 | |
2966 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
2967 | |
2968 if (checkOffsets && (expectOffsets != 0)) | |
2969 { | |
2970 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
2971 { | |
2972 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
2973 log_err("Got offsets: "); | |
2974 for(p=junkout;p<targ;p++) | |
2975 log_err(" %2d,", junokout[p-junkout]); | |
2976 log_err("\n"); | |
2977 log_err("Expected offsets: "); | |
2978 for(i=0; i<(targ-junkout); i++) | |
2979 log_err(" %2d,", expectOffsets[i]); | |
2980 log_err("\n"); | |
2981 log_err("Got output: "); | |
2982 for(i=0; i<(targ-junkout); i++) | |
2983 log_err("0x%04x,", junkout[i]); | |
2984 log_err("\n"); | |
2985 log_err("From source: "); | |
2986 for(i=0; i<(src-(const char *)source); i++) | |
2987 log_err(" 0x%02x,", (unsigned char)source[i]); | |
2988 log_err("\n"); | |
2989 } | |
2990 } | |
2991 | |
2992 if(!memcmp(junkout, expect, expectlen*2)) | |
2993 { | |
2994 log_verbose("Matches!\n"); | |
2995 return TRUE; | |
2996 } | |
2997 else | |
2998 { | |
2999 log_err("String does not match. %s\n", gNuConvTestName); | |
3000 log_verbose("String does not match. %s\n", gNuConvTestName); | |
3001 log_err("Got: "); | |
3002 printUSeqErr(junkout, expectlen); | |
3003 log_err("Expected: "); | |
3004 printUSeqErr(expect, expectlen); | |
3005 log_err("\n"); | |
3006 return FALSE; | |
3007 } | |
3008 } | |
3009 | |
3010 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, con
st uint8_t *expect, int expectLen, | |
3011 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, | |
3012 const char *mySubChar, int8_t len, const void* context, UErrorCo
de expectedError) | |
3013 { | |
3014 | |
3015 | |
3016 UErrorCode status = U_ZERO_ERROR; | |
3017 UConverter *conv = 0; | |
3018 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3019 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
3020 const UChar *src; | |
3021 char *end; | |
3022 char *targ; | |
3023 int32_t *offs; | |
3024 int i; | |
3025 int32_t realBufferSize; | |
3026 char *realBufferEnd; | |
3027 const UChar *realSourceEnd; | |
3028 const UChar *sourceLimit; | |
3029 UBool checkOffsets = TRUE; | |
3030 UBool doFlush; | |
3031 char junk[9999]; | |
3032 char offset_str[9999]; | |
3033 char *p; | |
3034 UConverterFromUCallback oldAction = NULL; | |
3035 const void* oldContext = NULL; | |
3036 | |
3037 | |
3038 for(i=0;i<NEW_MAX_BUFFER;i++) | |
3039 junkout[i] = (char)0xF0; | |
3040 for(i=0;i<NEW_MAX_BUFFER;i++) | |
3041 junokout[i] = 0xFF; | |
3042 setNuConvTestName(codepage, "FROM"); | |
3043 | |
3044 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, | |
3045 gOutBufferSize); | |
3046 | |
3047 conv = ucnv_open(codepage, &status); | |
3048 if(U_FAILURE(status)) | |
3049 { | |
3050 log_data_err("Couldn't open converter %s\n",codepage); | |
3051 return TRUE; /* Because the err has already been logged. */ | |
3052 } | |
3053 | |
3054 log_verbose("Converter opened..\n"); | |
3055 | |
3056 /*----setting the callback routine----*/ | |
3057 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &st
atus); | |
3058 if (U_FAILURE(status)) | |
3059 { | |
3060 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
3061 } | |
3062 /*------------------------*/ | |
3063 /*setting the subChar*/ | |
3064 if(mySubChar != NULL){ | |
3065 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3066 if (U_FAILURE(status)) { | |
3067 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(s
tatus)); | |
3068 } | |
3069 } | |
3070 /*------------*/ | |
3071 | |
3072 src = source; | |
3073 targ = junkout; | |
3074 offs = junokout; | |
3075 | |
3076 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3077 realBufferEnd = junkout + realBufferSize; | |
3078 realSourceEnd = source + sourceLen; | |
3079 | |
3080 if ( gOutBufferSize != realBufferSize ) | |
3081 checkOffsets = FALSE; | |
3082 | |
3083 if( gInBufferSize != NEW_MAX_BUFFER ) | |
3084 checkOffsets = FALSE; | |
3085 | |
3086 do | |
3087 { | |
3088 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
3089 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
3090 | |
3091 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
3092 | |
3093 if(targ == realBufferEnd) | |
3094 { | |
3095 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); | |
3096 return FALSE; | |
3097 } | |
3098 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
3099 | |
3100 | |
3101 status = U_ZERO_ERROR; | |
3102 | |
3103 ucnv_fromUnicode (conv, | |
3104 (char **)&targ, | |
3105 (const char *)end, | |
3106 &src, | |
3107 sourceLimit, | |
3108 checkOffsets ? offs : NULL, | |
3109 doFlush, /* flush if we're at the end of the input data */ | |
3110 &status); | |
3111 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); | |
3112 | |
3113 /* allow failure codes for the stop callback */ | |
3114 if(U_FAILURE(status) && status != expectedError) | |
3115 { | |
3116 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
3117 return FALSE; | |
3118 } | |
3119 | |
3120 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
3121 sourceLen, targ-junkout); | |
3122 if(getTestOption(VERBOSITY_OPTION)) | |
3123 { | |
3124 | |
3125 junk[0] = 0; | |
3126 offset_str[0] = 0; | |
3127 for(p = junkout;p<targ;p++) | |
3128 { | |
3129 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
3130 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); | |
3131 } | |
3132 | |
3133 log_verbose(junk); | |
3134 printSeq(expect, expectLen); | |
3135 if ( checkOffsets ) | |
3136 { | |
3137 log_verbose("\nOffsets:"); | |
3138 log_verbose(offset_str); | |
3139 } | |
3140 log_verbose("\n"); | |
3141 } | |
3142 ucnv_close(conv); | |
3143 | |
3144 | |
3145 if(expectLen != targ-junkout) | |
3146 { | |
3147 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); | |
3148 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); | |
3149 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
3150 printSeqErr(expect, expectLen); | |
3151 return FALSE; | |
3152 } | |
3153 | |
3154 if (checkOffsets && (expectOffsets != 0) ) | |
3155 { | |
3156 log_verbose("comparing %d offsets..\n", targ-junkout); | |
3157 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
3158 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
3159 log_err("Got Output : "); | |
3160 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
3161 log_err("Got Offsets: "); | |
3162 for(p=junkout;p<targ;p++) | |
3163 log_err("%d,", junokout[p-junkout]); | |
3164 log_err("\n"); | |
3165 log_err("Expected Offsets: "); | |
3166 for(i=0; i<(targ-junkout); i++) | |
3167 log_err("%d,", expectOffsets[i]); | |
3168 log_err("\n"); | |
3169 return FALSE; | |
3170 } | |
3171 } | |
3172 | |
3173 if(!memcmp(junkout, expect, expectLen)) | |
3174 { | |
3175 log_verbose("String matches! %s\n", gNuConvTestName); | |
3176 return TRUE; | |
3177 } | |
3178 else | |
3179 { | |
3180 log_err("String does not match. %s\n", gNuConvTestName); | |
3181 log_err("source: "); | |
3182 printUSeqErr(source, sourceLen); | |
3183 log_err("Got: "); | |
3184 printSeqErr((const uint8_t *)junkout, expectLen); | |
3185 log_err("Expected: "); | |
3186 printSeqErr(expect, expectLen); | |
3187 return FALSE; | |
3188 } | |
3189 } | |
3190 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, con
st UChar *expect, int expectlen, | |
3191 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, | |
3192 const char *mySubChar, int8_t len, const void* context, UErrorCod
e expectedError) | |
3193 { | |
3194 UErrorCode status = U_ZERO_ERROR; | |
3195 UConverter *conv = 0; | |
3196 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
3197 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
3198 const char *src; | |
3199 const char *realSourceEnd; | |
3200 const char *srcLimit; | |
3201 UChar *targ; | |
3202 UChar *end; | |
3203 int32_t *offs; | |
3204 int i; | |
3205 UBool checkOffsets = TRUE; | |
3206 char junk[9999]; | |
3207 char offset_str[9999]; | |
3208 UChar *p; | |
3209 UConverterToUCallback oldAction = NULL; | |
3210 const void* oldContext = NULL; | |
3211 | |
3212 int32_t realBufferSize; | |
3213 UChar *realBufferEnd; | |
3214 | |
3215 | |
3216 for(i=0;i<NEW_MAX_BUFFER;i++) | |
3217 junkout[i] = 0xFFFE; | |
3218 | |
3219 for(i=0;i<NEW_MAX_BUFFER;i++) | |
3220 junokout[i] = -1; | |
3221 | |
3222 setNuConvTestName(codepage, "TO"); | |
3223 | |
3224 log_verbose("\n========= %s\n", gNuConvTestName); | |
3225 | |
3226 conv = ucnv_open(codepage, &status); | |
3227 if(U_FAILURE(status)) | |
3228 { | |
3229 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
3230 return TRUE; | |
3231 } | |
3232 | |
3233 log_verbose("Converter opened..\n"); | |
3234 | |
3235 src = (const char *)source; | |
3236 targ = junkout; | |
3237 offs = junokout; | |
3238 | |
3239 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
3240 realBufferEnd = junkout + realBufferSize; | |
3241 realSourceEnd = src + sourcelen; | |
3242 /*----setting the callback routine----*/ | |
3243 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &stat
us); | |
3244 if (U_FAILURE(status)) | |
3245 { | |
3246 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
3247 } | |
3248 /*-------------------------------------*/ | |
3249 /*setting the subChar*/ | |
3250 if(mySubChar != NULL){ | |
3251 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
3252 if (U_FAILURE(status)) { | |
3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
3254 } | |
3255 } | |
3256 /*------------*/ | |
3257 | |
3258 | |
3259 if ( gOutBufferSize != realBufferSize ) | |
3260 checkOffsets = FALSE; | |
3261 | |
3262 if( gInBufferSize != NEW_MAX_BUFFER ) | |
3263 checkOffsets = FALSE; | |
3264 | |
3265 do | |
3266 { | |
3267 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
3268 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
3269 | |
3270 if(targ == realBufferEnd) | |
3271 { | |
3272 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
3273 return FALSE; | |
3274 } | |
3275 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
3276 | |
3277 | |
3278 | |
3279 status = U_ZERO_ERROR; | |
3280 | |
3281 ucnv_toUnicode (conv, | |
3282 &targ, | |
3283 end, | |
3284 (const char **)&src, | |
3285 (const char *)srcLimit, | |
3286 checkOffsets ? offs : NULL, | |
3287 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ | |
3288 &status); | |
3289 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ | |
3290 | |
3291 /* allow failure codes for the stop callback */ | |
3292 if(U_FAILURE(status) && status!=expectedError) | |
3293 { | |
3294 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
3295 return FALSE; | |
3296 } | |
3297 | |
3298 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
3299 sourcelen, targ-junkout); | |
3300 if(getTestOption(VERBOSITY_OPTION)) | |
3301 { | |
3302 | |
3303 junk[0] = 0; | |
3304 offset_str[0] = 0; | |
3305 | |
3306 for(p = junkout;p<targ;p++) | |
3307 { | |
3308 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); | |
3309 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); | |
3310 } | |
3311 | |
3312 log_verbose(junk); | |
3313 printUSeq(expect, expectlen); | |
3314 if ( checkOffsets ) | |
3315 { | |
3316 log_verbose("\nOffsets:"); | |
3317 log_verbose(offset_str); | |
3318 } | |
3319 log_verbose("\n"); | |
3320 } | |
3321 ucnv_close(conv); | |
3322 | |
3323 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
3324 | |
3325 if (checkOffsets && (expectOffsets != 0)) | |
3326 { | |
3327 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
3328 { | |
3329 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
3330 log_err("Got offsets: "); | |
3331 for(p=junkout;p<targ;p++) | |
3332 log_err(" %2d,", junokout[p-junkout]); | |
3333 log_err("\n"); | |
3334 log_err("Expected offsets: "); | |
3335 for(i=0; i<(targ-junkout); i++) | |
3336 log_err(" %2d,", expectOffsets[i]); | |
3337 log_err("\n"); | |
3338 log_err("Got output: "); | |
3339 for(i=0; i<(targ-junkout); i++) | |
3340 log_err("0x%04x,", junkout[i]); | |
3341 log_err("\n"); | |
3342 log_err("From source: "); | |
3343 for(i=0; i<(src-(const char *)source); i++) | |
3344 log_err(" 0x%02x,", (unsigned char)source[i]); | |
3345 log_err("\n"); | |
3346 } | |
3347 } | |
3348 | |
3349 if(!memcmp(junkout, expect, expectlen*2)) | |
3350 { | |
3351 log_verbose("Matches!\n"); | |
3352 return TRUE; | |
3353 } | |
3354 else | |
3355 { | |
3356 log_err("String does not match. %s\n", gNuConvTestName); | |
3357 log_verbose("String does not match. %s\n", gNuConvTestName); | |
3358 log_err("Got: "); | |
3359 printUSeqErr(junkout, expectlen); | |
3360 log_err("Expected: "); | |
3361 printUSeqErr(expect, expectlen); | |
3362 log_err("\n"); | |
3363 return FALSE; | |
3364 } | |
3365 } | |
3366 | |
3367 static void TestCallBackFailure(void) { | |
3368 UErrorCode status = U_USELESS_COLLATOR_ERROR; | |
3369 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); | |
3370 if (status != U_USELESS_COLLATOR_ERROR) { | |
3371 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad
UErrorCode\n"); | |
3372 } | |
3373 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); | |
3374 if (status != U_USELESS_COLLATOR_ERROR) { | |
3375 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad
UErrorCode\n"); | |
3376 } | |
3377 ucnv_cbFromUWriteSub(NULL, -1, &status); | |
3378 if (status != U_USELESS_COLLATOR_ERROR) { | |
3379 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UE
rrorCode\n"); | |
3380 } | |
3381 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); | |
3382 if (status != U_USELESS_COLLATOR_ERROR) { | |
3383 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad U
ErrorCode\n"); | |
3384 } | |
3385 } | |
OLD | NEW |