| OLD | NEW |
| (Empty) |
| 1 /******************************************************************** | |
| 2 * COPYRIGHT: | |
| 3 * Copyright (c) 1997-2013, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ********************************************************************/ | |
| 6 /* | |
| 7 ******************************************************************************** | |
| 8 * File NCCBTST.C | |
| 9 * | |
| 10 * Modification History: | |
| 11 * Name Description | |
| 12 * Madhu Katragadda 7/21/1999 Testing error callback routines | |
| 13 ******************************************************************************** | |
| 14 */ | |
| 15 #include <stdio.h> | |
| 16 #include <stdlib.h> | |
| 17 #include <string.h> | |
| 18 #include <ctype.h> | |
| 19 #include "cstring.h" | |
| 20 #include "unicode/uloc.h" | |
| 21 #include "unicode/ucnv.h" | |
| 22 #include "unicode/ucnv_err.h" | |
| 23 #include "cintltst.h" | |
| 24 #include "unicode/utypes.h" | |
| 25 #include "unicode/ustring.h" | |
| 26 #include "nccbtst.h" | |
| 27 #include "unicode/ucnv_cb.h" | |
| 28 #include "unicode/utf16.h" | |
| 29 | |
| 30 #define NEW_MAX_BUFFER 999 | |
| 31 | |
| 32 #define nct_min(x,y) ((x<y) ? x : y) | |
| 33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) | |
| 34 | |
| 35 static int32_t gInBufferSize = 0; | |
| 36 static int32_t gOutBufferSize = 0; | |
| 37 static char gNuConvTestName[1024]; | |
| 38 | |
| 39 static void printSeq(const uint8_t* a, int len) | |
| 40 { | |
| 41 int i=0; | |
| 42 log_verbose("\n{"); | |
| 43 while (i<len) | |
| 44 log_verbose("0x%02X, ", a[i++]); | |
| 45 log_verbose("}\n"); | |
| 46 } | |
| 47 | |
| 48 static void printUSeq(const UChar* a, int len) | |
| 49 { | |
| 50 int i=0; | |
| 51 log_verbose("{"); | |
| 52 while (i<len) | |
| 53 log_verbose(" 0x%04x, ", a[i++]); | |
| 54 log_verbose("}\n"); | |
| 55 } | |
| 56 | |
| 57 static void printSeqErr(const uint8_t* a, int len) | |
| 58 { | |
| 59 int i=0; | |
| 60 fprintf(stderr, "{"); | |
| 61 while (i<len) | |
| 62 fprintf(stderr, " 0x%02x, ", a[i++]); | |
| 63 fprintf(stderr, "}\n"); | |
| 64 } | |
| 65 | |
| 66 static void printUSeqErr(const UChar* a, int len) | |
| 67 { | |
| 68 int i=0; | |
| 69 fprintf(stderr, "{"); | |
| 70 while (i<len) | |
| 71 fprintf(stderr, "0x%04x, ", a[i++]); | |
| 72 fprintf(stderr,"}\n"); | |
| 73 } | |
| 74 | |
| 75 static void setNuConvTestName(const char *codepage, const char *direction) | |
| 76 { | |
| 77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", | |
| 78 codepage, | |
| 79 direction, | |
| 80 (int)gInBufferSize, | |
| 81 (int)gOutBufferSize); | |
| 82 } | |
| 83 | |
| 84 | |
| 85 static void TestCallBackFailure(void); | |
| 86 | |
| 87 void addTestConvertErrorCallBack(TestNode** root); | |
| 88 | |
| 89 void addTestConvertErrorCallBack(TestNode** root) | |
| 90 { | |
| 91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); | |
| 92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); | |
| 93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); | |
| 94 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCal
lBack"); | |
| 95 | |
| 96 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 97 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOther
CallBack"); | |
| 98 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBa
ck"); | |
| 99 #endif | |
| 100 | |
| 101 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); | |
| 102 } | |
| 103 | |
| 104 static void TestSkipCallBack() | |
| 105 { | |
| 106 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 107 TestSkip(1,NEW_MAX_BUFFER); | |
| 108 TestSkip(1,1); | |
| 109 TestSkip(NEW_MAX_BUFFER, 1); | |
| 110 } | |
| 111 | |
| 112 static void TestStopCallBack() | |
| 113 { | |
| 114 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 115 TestStop(1,NEW_MAX_BUFFER); | |
| 116 TestStop(1,1); | |
| 117 TestStop(NEW_MAX_BUFFER, 1); | |
| 118 } | |
| 119 | |
| 120 static void TestSubCallBack() | |
| 121 { | |
| 122 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 123 TestSub(1,NEW_MAX_BUFFER); | |
| 124 TestSub(1,1); | |
| 125 TestSub(NEW_MAX_BUFFER, 1); | |
| 126 | |
| 127 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 128 TestEBCDIC_STATEFUL_Sub(1, 1); | |
| 129 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); | |
| 130 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); | |
| 131 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 132 #endif | |
| 133 } | |
| 134 | |
| 135 static void TestSubWithValueCallBack() | |
| 136 { | |
| 137 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 138 TestSubWithValue(1,NEW_MAX_BUFFER); | |
| 139 TestSubWithValue(1,1); | |
| 140 TestSubWithValue(NEW_MAX_BUFFER, 1); | |
| 141 } | |
| 142 | |
| 143 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 144 static void TestLegalAndOtherCallBack() | |
| 145 { | |
| 146 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 147 TestLegalAndOthers(1,NEW_MAX_BUFFER); | |
| 148 TestLegalAndOthers(1,1); | |
| 149 TestLegalAndOthers(NEW_MAX_BUFFER, 1); | |
| 150 } | |
| 151 | |
| 152 static void TestSingleByteCallBack() | |
| 153 { | |
| 154 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); | |
| 155 TestSingleByte(1,NEW_MAX_BUFFER); | |
| 156 TestSingleByte(1,1); | |
| 157 TestSingleByte(NEW_MAX_BUFFER, 1); | |
| 158 } | |
| 159 #endif | |
| 160 | |
| 161 static void TestSkip(int32_t inputsize, int32_t outputsize) | |
| 162 { | |
| 163 static const uint8_t expskipIBM_949[]= { | |
| 164 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; | |
| 165 | |
| 166 static const uint8_t expskipIBM_943[] = { | |
| 167 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; | |
| 168 | |
| 169 static const uint8_t expskipIBM_930[] = { | |
| 170 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; | |
| 171 | |
| 172 gInBufferSize = inputsize; | |
| 173 gOutBufferSize = outputsize; | |
| 174 | |
| 175 /*From Unicode*/ | |
| 176 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); | |
| 177 | |
| 178 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 179 { | |
| 180 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0
xD700 }; | |
| 181 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
| 182 | |
| 183 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; | |
| 184 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; | |
| 185 | |
| 186 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleT
ext[0]), | |
| 187 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", | |
| 188 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) | |
| 189 log_err("u-> ibm-949 with skip did not match.\n"); | |
| 190 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampl
eText2[0]), | |
| 191 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", | |
| 192 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) | |
| 193 log_err("u-> ibm-943 with skip did not match.\n"); | |
| 194 } | |
| 195 | |
| 196 { | |
| 197 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d6
4, 0x63, 0xff5e, 0x6d66 }; | |
| 198 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d
, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; | |
| 199 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8,
8, 8 }; | |
| 200 | |
| 201 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to c
heck correct state transitions */ | |
| 202 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, | |
| 203 fromUBytes, sizeof(fromUBytes), | |
| 204 "ibm-930", | |
| 205 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, | |
| 206 NULL, 0) | |
| 207 ) { | |
| 208 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\
n"); | |
| 209 } | |
| 210 } | |
| 211 #endif | |
| 212 | |
| 213 { | |
| 214 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800,
0xdfff, 0x39 }; | |
| 215 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; | |
| 216 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; | |
| 217 | |
| 218 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0
xdfff, 0x39 }; | |
| 219 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; | |
| 220 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; | |
| 221 | |
| 222 /* US-ASCII */ | |
| 223 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, | |
| 224 usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
| 225 "US-ASCII", | |
| 226 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, | |
| 227 NULL, 0) | |
| 228 ) { | |
| 229 log_err("u->US-ASCII with skip did not match.\n"); | |
| 230 } | |
| 231 | |
| 232 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 233 /* SBCS NLTC codepage 367 for US-ASCII */ | |
| 234 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, | |
| 235 usasciiFromUBytes, sizeof(usasciiFromUBytes), | |
| 236 "ibm-367", | |
| 237 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, | |
| 238 NULL, 0) | |
| 239 ) { | |
| 240 log_err("u->ibm-367 with skip did not match.\n"); | |
| 241 } | |
| 242 #endif | |
| 243 | |
| 244 /* ISO-Latin-1 */ | |
| 245 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, | |
| 246 latin1FromUBytes, sizeof(latin1FromUBytes), | |
| 247 "LATIN_1", | |
| 248 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, | |
| 249 NULL, 0) | |
| 250 ) { | |
| 251 log_err("u->LATIN_1 with skip did not match.\n"); | |
| 252 } | |
| 253 | |
| 254 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 255 /* windows-1252 */ | |
| 256 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, | |
| 257 latin1FromUBytes, sizeof(latin1FromUBytes), | |
| 258 "windows-1252", | |
| 259 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, | |
| 260 NULL, 0) | |
| 261 ) { | |
| 262 log_err("u->windows-1252 with skip did not match.\n"); | |
| 263 } | |
| 264 } | |
| 265 | |
| 266 { | |
| 267 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
| 268 static const uint8_t toIBM943[]= { 0x61, 0x61 }; | |
| 269 static const int32_t offset[]= {0, 4}; | |
| 270 | |
| 271 /* EUC_JP*/ | |
| 272 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
| 273 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
| 274 0x61, 0x8e, 0xe0, | |
| 275 }; | |
| 276 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; | |
| 277 | |
| 278 /*EUC_TW*/ | |
| 279 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
| 280 static const uint8_t to_euc_tw[]={ | |
| 281 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
| 282 0x61, 0xe6, 0xca, 0x8a, | |
| 283 }; | |
| 284 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7,
8,}; | |
| 285 | |
| 286 /*ISO-2022-JP*/ | |
| 287 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/
,0x0042, }; | |
| 288 static const uint8_t to_iso_2022_jp[]={ | |
| 289 0x41, | |
| 290 0x42, | |
| 291 | |
| 292 }; | |
| 293 static const int32_t from_iso_2022_jpOffs [] ={0,2}; | |
| 294 | |
| 295 /*ISO-2022-JP*/ | |
| 296 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,
0xd800/*illegal*/,0x0042, }; | |
| 297 static const uint8_t to_iso_2022_jp2[]={ | |
| 298 0x41, | |
| 299 0x43, | |
| 300 | |
| 301 }; | |
| 302 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; | |
| 303 | |
| 304 /*ISO-2022-cn*/ | |
| 305 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*
/, 0x0042, }; | |
| 306 static const uint8_t to_iso_2022_cn[]={ | |
| 307 0x41, 0x42 | |
| 308 }; | |
| 309 static const int32_t from_iso_2022_cnOffs [] ={ | |
| 310 0, 2 | |
| 311 }; | |
| 312 | |
| 313 /*ISO-2022-CN*/ | |
| 314 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*
/,0x43,0xd800/*illegal*/,0x0042, }; | |
| 315 static const uint8_t to_iso_2022_cn1[]={ | |
| 316 0x41, 0x43 | |
| 317 | |
| 318 }; | |
| 319 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; | |
| 320 | |
| 321 /*ISO-2022-kr*/ | |
| 322 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; | |
| 323 static const uint8_t to_iso_2022_kr[]={ | |
| 324 0x1b, 0x24, 0x29, 0x43, | |
| 325 0x41, | |
| 326 0x0e, 0x25, 0x50, | |
| 327 0x25, 0x50, | |
| 328 0x0f, 0x42, | |
| 329 }; | |
| 330 static const int32_t from_iso_2022_krOffs [] ={ | |
| 331 -1,-1,-1,-1, | |
| 332 0, | |
| 333 1,1,1, | |
| 334 3,3, | |
| 335 4,4 | |
| 336 }; | |
| 337 | |
| 338 /*ISO-2022-kr*/ | |
| 339 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*una
ssigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
| 340 static const uint8_t to_iso_2022_kr1[]={ | |
| 341 0x1b, 0x24, 0x29, 0x43, | |
| 342 0x41, | |
| 343 0x0e, 0x25, 0x50, | |
| 344 0x25, 0x50, | |
| 345 | |
| 346 }; | |
| 347 static const int32_t from_iso_2022_krOffs1 [] ={ | |
| 348 -1,-1,-1,-1, | |
| 349 0, | |
| 350 1,1,1, | |
| 351 3,3, | |
| 352 | |
| 353 }; | |
| 354 /* HZ encoding */ | |
| 355 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
| 356 | |
| 357 static const uint8_t to_hz[]={ | |
| 358 0x7e, 0x7d, 0x41, | |
| 359 0x7e, 0x7b, 0x26, 0x30, | |
| 360 0x26, 0x30, | |
| 361 0x7e, 0x7d, 0x42, | |
| 362 | |
| 363 }; | |
| 364 static const int32_t from_hzOffs [] ={ | |
| 365 0,0,0, | |
| 366 1,1,1,1, | |
| 367 3,3, | |
| 368 4,4,4,4 | |
| 369 }; | |
| 370 | |
| 371 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/
,0x03A0,0xd801/*illegal*/, 0x0042, }; | |
| 372 | |
| 373 static const uint8_t to_hz1[]={ | |
| 374 0x7e, 0x7d, 0x41, | |
| 375 0x7e, 0x7b, 0x26, 0x30, | |
| 376 0x26, 0x30, | |
| 377 | |
| 378 | |
| 379 }; | |
| 380 static const int32_t from_hzOffs1 [] ={ | |
| 381 0,0,0, | |
| 382 1,1,1,1, | |
| 383 3,3, | |
| 384 | |
| 385 }; | |
| 386 | |
| 387 #endif | |
| 388 | |
| 389 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; | |
| 390 | |
| 391 static const uint8_t to_SCSU[]={ | |
| 392 0x41, | |
| 393 0x42 | |
| 394 | |
| 395 | |
| 396 }; | |
| 397 static const int32_t from_SCSUOffs [] ={ | |
| 398 0, | |
| 399 2, | |
| 400 | |
| 401 }; | |
| 402 | |
| 403 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 404 /* ISCII */ | |
| 405 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0
042, }; | |
| 406 static const uint8_t to_iscii[]={ | |
| 407 0x41, | |
| 408 0x42, | |
| 409 }; | |
| 410 static const int32_t from_isciiOffs [] ={ | |
| 411 0,2, | |
| 412 | |
| 413 }; | |
| 414 /*ISCII*/ | |
| 415 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43
,0xd800/*illegal*/,0x0042, }; | |
| 416 static const uint8_t to_iscii1[]={ | |
| 417 0x44, | |
| 418 0x43, | |
| 419 | |
| 420 }; | |
| 421 static const int32_t from_isciiOffs1 [] ={0,2}; | |
| 422 | |
| 423 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
| 424 toIBM943, sizeof(toIBM943), "ibm-943", | |
| 425 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) | |
| 426 log_err("u-> ibm-943 with skip did not match.\n"); | |
| 427 | |
| 428 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
| 429 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
| 430 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) | |
| 431 log_err("u-> euc-jp with skip did not match.\n"); | |
| 432 | |
| 433 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
| 434 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
| 435 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) | |
| 436 log_err("u-> euc-tw with skip did not match.\n"); | |
| 437 | |
| 438 /*iso_2022_jp*/ | |
| 439 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
| 440 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
| 441 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) | |
| 442 log_err("u-> iso-2022-jp with skip did not match.\n"); | |
| 443 | |
| 444 /* with context */ | |
| 445 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso
_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), | |
| 446 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
| 447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
| 448 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
| 449 | |
| 450 /*iso_2022_cn*/ | |
| 451 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
| 452 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
| 453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) | |
| 454 log_err("u-> iso-2022-cn with skip did not match.\n"); | |
| 455 /*with context*/ | |
| 456 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso
_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), | |
| 457 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", | |
| 458 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
| 459 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
| 460 | |
| 461 /*iso_2022_kr*/ | |
| 462 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
| 463 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
| 464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) | |
| 465 log_err("u-> iso-2022-kr with skip did not match.\n"); | |
| 466 /*with context*/ | |
| 467 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso
_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), | |
| 468 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", | |
| 469 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
| 470 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); | |
| 471 | |
| 472 /*hz*/ | |
| 473 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
| 474 to_hz, sizeof(to_hz), "HZ", | |
| 475 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) | |
| 476 log_err("u-> HZ with skip did not match.\n"); | |
| 477 /*with context*/ | |
| 478 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText
1)/sizeof(hz_inputText1[0]), | |
| 479 to_hz1, sizeof(to_hz1), "hz", | |
| 480 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_
ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
| 481 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.
\n"); | |
| 482 #endif | |
| 483 | |
| 484 /*SCSU*/ | |
| 485 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
| 486 to_SCSU, sizeof(to_SCSU), "SCSU", | |
| 487 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) | |
| 488 log_err("u-> SCSU with skip did not match.\n"); | |
| 489 | |
| 490 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 491 /*ISCII*/ | |
| 492 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
| 493 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
| 494 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) | |
| 495 log_err("u-> iscii with skip did not match.\n"); | |
| 496 /*with context*/ | |
| 497 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inp
utText1)/sizeof(iscii_inputText1[0]), | |
| 498 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", | |
| 499 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_ST
OP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) | |
| 500 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not mat
ch.\n"); | |
| 501 #endif | |
| 502 } | |
| 503 | |
| 504 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n")
; | |
| 505 { | |
| 506 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU
1 text 1 */ | |
| 507 0xFB, 0xEE, 0x28, /* from source offset 0 */ | |
| 508 0x24, 0x1E, 0x52, | |
| 509 0xB2, | |
| 510 0x20, | |
| 511 0xB3, | |
| 512 0xB1, | |
| 513 0x0D, | |
| 514 0x0A, | |
| 515 | |
| 516 0x20, /* from 8 */ | |
| 517 0x00, | |
| 518 0xD0, 0x6C, | |
| 519 0xB6, | |
| 520 0xD8, 0xA5, | |
| 521 0x20, | |
| 522 0x68, | |
| 523 0x59, | |
| 524 | |
| 525 0xF9, 0x28, /* from 16 */ | |
| 526 0x6D, | |
| 527 0x20, | |
| 528 0x73, | |
| 529 0xE0, 0x2D, | |
| 530 0xDE, 0x43, | |
| 531 0xD0, 0x33, | |
| 532 0x20, | |
| 533 | |
| 534 0xFA, 0x83, /* from 24 */ | |
| 535 0x25, 0x01, | |
| 536 0xFB, 0x16, 0x87, | |
| 537 0x4B, 0x16, | |
| 538 0x20, | |
| 539 0xE6, 0xBD, | |
| 540 0xEB, 0x5B, | |
| 541 0x4B, 0xCC, | |
| 542 | |
| 543 0xF9, 0xA2, /* from 32 */ | |
| 544 0xFC, 0x10, 0x3E, | |
| 545 0xFE, 0x16, 0x3A, 0x8C, | |
| 546 0x20, | |
| 547 0xFC, 0x03, 0xAC, | |
| 548 | |
| 549 0x01, /* from 41 */ | |
| 550 0xDE, 0x83, | |
| 551 0x20, | |
| 552 0x09 | |
| 553 }; | |
| 554 static const UChar expected[]={ | |
| 555 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ | |
| 556 0x0063, 0x0061, 0x000D, 0x000A, | |
| 557 | |
| 558 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ | |
| 559 0x0930, 0x0020, 0x0918, 0x0909, | |
| 560 | |
| 561 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ | |
| 562 0x4000, 0x4E00, 0x7777, 0x0020, | |
| 563 | |
| 564 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ | |
| 565 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
| 566 | |
| 567 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ | |
| 568 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
| 569 | |
| 570 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ | |
| 571 0x0009 | |
| 572 }; | |
| 573 static const int32_t offsets[]={ | |
| 574 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, | |
| 575 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, | |
| 576 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, | |
| 577 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, | |
| 578 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, | |
| 579 41, 42, 42, 43, 44 | |
| 580 }; | |
| 581 | |
| 582 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-b
yte and offsets behavior */ | |
| 583 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
| 584 sampleText, sizeof(sampleText), | |
| 585 "BOCU-1", | |
| 586 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
| 587 ) { | |
| 588 log_err("u->BOCU-1 with skip did not match.\n"); | |
| 589 } | |
| 590 } | |
| 591 | |
| 592 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n")
; | |
| 593 { | |
| 594 const uint8_t sampleText[]={ | |
| 595 0x61, /* 'a' */ | |
| 596 0xc4, 0xb5, /* U+0135 */ | |
| 597 0xed, 0x80, 0xa0, /* Hangul U+d020 */ | |
| 598 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ | |
| 599 0xee, 0x80, 0x80, /* PUA U+e000 */ | |
| 600 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc
01 */ | |
| 601 0x62, /* 'b' */ | |
| 602 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d80
1 */ | |
| 603 0xd0, 0x80 /* U+0400 */ | |
| 604 }; | |
| 605 UChar expected[]={ | |
| 606 0x0061, | |
| 607 0x0135, | |
| 608 0xd020, | |
| 609 0xd801, 0xdc01, | |
| 610 0xe000, | |
| 611 0xdc01, | |
| 612 0x0062, | |
| 613 0xd801, | |
| 614 0x0400 | |
| 615 }; | |
| 616 int32_t offsets[]={ | |
| 617 0, | |
| 618 1, 1, | |
| 619 2, 2, 2, | |
| 620 3, 3, 3, 4, 4, 4, | |
| 621 5, 5, 5, | |
| 622 6, 6, 6, | |
| 623 7, | |
| 624 8, 8, 8, | |
| 625 9, 9 | |
| 626 }; | |
| 627 | |
| 628 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversi
on and offsets behavior */ | |
| 629 | |
| 630 /* without offsets */ | |
| 631 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
| 632 sampleText, sizeof(sampleText), | |
| 633 "CESU-8", | |
| 634 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) | |
| 635 ) { | |
| 636 log_err("u->CESU-8 with skip did not match.\n"); | |
| 637 } | |
| 638 | |
| 639 /* with offsets */ | |
| 640 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), | |
| 641 sampleText, sizeof(sampleText), | |
| 642 "CESU-8", | |
| 643 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) | |
| 644 ) { | |
| 645 log_err("u->CESU-8 with skip did not match.\n"); | |
| 646 } | |
| 647 } | |
| 648 | |
| 649 /*to Unicode*/ | |
| 650 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); | |
| 651 | |
| 652 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 653 { | |
| 654 | |
| 655 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD7
00 }; | |
| 656 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
| 657 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; | |
| 658 | |
| 659 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; | |
| 660 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; | |
| 661 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; | |
| 662 | |
| 663 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), | |
| 664 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_9
49skiptoUnicode),"ibm-949", | |
| 665 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) | |
| 666 log_err("ibm-949->u with skip did not match.\n"); | |
| 667 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), | |
| 668 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_9
43skiptoUnicode[0]),"ibm-943", | |
| 669 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) | |
| 670 log_err("ibm-943->u with skip did not match.\n"); | |
| 671 | |
| 672 | |
| 673 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), | |
| 674 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", | |
| 675 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) | |
| 676 log_err("ibm-930->u with skip did not match.\n"); | |
| 677 | |
| 678 | |
| 679 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_93
0), | |
| 680 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", | |
| 681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_C
HAR_FOUND )) | |
| 682 log_err("ibm-930->u with skip did not match.\n"); | |
| 683 } | |
| 684 #endif | |
| 685 | |
| 686 { | |
| 687 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; | |
| 688 static const UChar usasciiToU[] = { 0x61, 0x31 }; | |
| 689 static const int32_t usasciiToUOffsets[] = { 0, 2 }; | |
| 690 | |
| 691 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; | |
| 692 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; | |
| 693 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; | |
| 694 | |
| 695 /* US-ASCII */ | |
| 696 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
| 697 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
| 698 "US-ASCII", | |
| 699 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
| 700 NULL, 0) | |
| 701 ) { | |
| 702 log_err("US-ASCII->u with skip did not match.\n"); | |
| 703 } | |
| 704 | |
| 705 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 706 /* SBCS NLTC codepage 367 for US-ASCII */ | |
| 707 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), | |
| 708 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, | |
| 709 "ibm-367", | |
| 710 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, | |
| 711 NULL, 0) | |
| 712 ) { | |
| 713 log_err("ibm-367->u with skip did not match.\n"); | |
| 714 } | |
| 715 #endif | |
| 716 | |
| 717 /* ISO-Latin-1 */ | |
| 718 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
| 719 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
| 720 "LATIN_1", | |
| 721 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
| 722 NULL, 0) | |
| 723 ) { | |
| 724 log_err("LATIN_1->u with skip did not match.\n"); | |
| 725 } | |
| 726 | |
| 727 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 728 /* windows-1252 */ | |
| 729 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), | |
| 730 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, | |
| 731 "windows-1252", | |
| 732 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, | |
| 733 NULL, 0) | |
| 734 ) { | |
| 735 log_err("windows-1252->u with skip did not match.\n"); | |
| 736 } | |
| 737 #endif | |
| 738 } | |
| 739 | |
| 740 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 741 { | |
| 742 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
| 743 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
| 744 }; | |
| 745 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 | |
| 746 }; | |
| 747 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; | |
| 748 | |
| 749 | |
| 750 /* euc-jp*/ | |
| 751 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
| 752 0x8f, 0xda, 0xa1, /*unassigned*/ | |
| 753 0x8e, 0xe0, | |
| 754 }; | |
| 755 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; | |
| 756 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; | |
| 757 | |
| 758 /*EUC_TW*/ | |
| 759 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, | |
| 760 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
| 761 0xe6, 0xca, 0x8a, | |
| 762 }; | |
| 763 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0
x8a, }; | |
| 764 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; | |
| 765 /*iso-2022-jp*/ | |
| 766 static const uint8_t sampleTxt_iso_2022_jp[]={ | |
| 767 0x41, | |
| 768 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
| 769 0x1b, 0x28, 0x42, 0x42, | |
| 770 | |
| 771 }; | |
| 772 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; | |
| 773 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; | |
| 774 | |
| 775 /*iso-2022-cn*/ | |
| 776 static const uint8_t sampleTxt_iso_2022_cn[]={ | |
| 777 0x0f, 0x41, 0x44, | |
| 778 0x1B, 0x24, 0x29, 0x47, | |
| 779 0x0E, 0x40, 0x6f, /*unassigned*/ | |
| 780 0x0f, 0x42, | |
| 781 | |
| 782 }; | |
| 783 | |
| 784 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; | |
| 785 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; | |
| 786 | |
| 787 /*iso-2022-kr*/ | |
| 788 static const uint8_t sampleTxt_iso_2022_kr[]={ | |
| 789 0x1b, 0x24, 0x29, 0x43, | |
| 790 0x41, | |
| 791 0x0E, 0x7f, 0x1E, | |
| 792 0x0e, 0x25, 0x50, | |
| 793 0x0f, 0x51, | |
| 794 0x42, 0x43, | |
| 795 | |
| 796 }; | |
| 797 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0
x43}; | |
| 798 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 ,
14 }; | |
| 799 | |
| 800 /*hz*/ | |
| 801 static const uint8_t sampleTxt_hz[]={ | |
| 802 0x41, | |
| 803 0x7e, 0x7b, 0x26, 0x30, | |
| 804 0x7f, 0x1E, /*unassigned*/ | |
| 805 0x26, 0x30, | |
| 806 0x7e, 0x7d, 0x42, | |
| 807 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
| 808 0x7e, 0x7d, 0x42, | |
| 809 }; | |
| 810 static const UChar hztoUnicode[]={ | |
| 811 0x41, | |
| 812 0x03a0, | |
| 813 0x03A0, | |
| 814 0x42, | |
| 815 0x42,}; | |
| 816 | |
| 817 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; | |
| 818 | |
| 819 /*ISCII*/ | |
| 820 static const uint8_t sampleTxt_iscii[]={ | |
| 821 0x41, | |
| 822 0xa1, | |
| 823 0xEB, /*unassigned*/ | |
| 824 0x26, | |
| 825 0x30, | |
| 826 0xa2, | |
| 827 0xEC, /*unassigned*/ | |
| 828 0x42, | |
| 829 }; | |
| 830 static const UChar isciitoUnicode[]={ | |
| 831 0x41, | |
| 832 0x0901, | |
| 833 0x26, | |
| 834 0x30, | |
| 835 0x0902, | |
| 836 0x42, | |
| 837 }; | |
| 838 | |
| 839 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; | |
| 840 | |
| 841 /*LMBCS*/ | |
| 842 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, | |
| 843 0x12, 0x92, 0xa0, /*unassigned*/ | |
| 844 0x12, 0x92, 0xA1, | |
| 845 }; | |
| 846 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; | |
| 847 static const int32_t fromLMBCS[] = {0, 6}; | |
| 848 | |
| 849 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), | |
| 850 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
| 851 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
| 852 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
| 853 | |
| 854 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sa
mpleTxtEBCIDIC_STATEFUL), | |
| 855 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
| 856 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U
_ILLEGAL_CHAR_FOUND )) | |
| 857 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); | |
| 858 | |
| 859 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
| 860 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode
[0]),"IBM-eucJP", | |
| 861 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) | |
| 862 log_err("euc-jp->u with skip did not match.\n"); | |
| 863 | |
| 864 | |
| 865 | |
| 866 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
| 867 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
| 868 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) | |
| 869 log_err("euc-tw->u with skip did not match.\n"); | |
| 870 | |
| 871 | |
| 872 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), | |
| 873 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
| 874 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) | |
| 875 log_err("iso-2022-jp->u with skip did not match.\n"); | |
| 876 | |
| 877 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), | |
| 878 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", | |
| 879 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) | |
| 880 log_err("iso-2022-cn->u with skip did not match.\n"); | |
| 881 | |
| 882 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), | |
| 883 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", | |
| 884 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) | |
| 885 log_err("iso-2022-kr->u with skip did not match.\n"); | |
| 886 | |
| 887 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
| 888 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
| 889 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) | |
| 890 log_err("HZ->u with skip did not match.\n"); | |
| 891 | |
| 892 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
| 893 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", | |
| 894 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) | |
| 895 log_err("iscii->u with skip did not match.\n"); | |
| 896 | |
| 897 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), | |
| 898 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0])
,"LMBCS-1", | |
| 899 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) | |
| 900 log_err("LMBCS->u with skip did not match.\n"); | |
| 901 | |
| 902 } | |
| 903 #endif | |
| 904 | |
| 905 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); | |
| 906 { | |
| 907 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
| 908 0xe0, 0x80, 0x61,}; | |
| 909 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; | |
| 910 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; | |
| 911 | |
| 912 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 913 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
| 914 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
| 915 log_err("utf8->u with skip did not match.\n");; | |
| 916 } | |
| 917 | |
| 918 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); | |
| 919 { | |
| 920 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
| 921 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfff
e}; | |
| 922 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
| 923 | |
| 924 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 925 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
| 926 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) | |
| 927 log_err("scsu->u with skip did not match.\n"); | |
| 928 } | |
| 929 | |
| 930 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
| 931 { | |
| 932 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBO
CU1 text 1 */ | |
| 933 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ | |
| 934 0x24, 0x1E, 0x52, /* 3 */ | |
| 935 0xB2, /* 6 */ | |
| 936 0x20, /* 7 */ | |
| 937 0x40, 0x07, /* 8 - wrong trail byte */ | |
| 938 0xB3, /* 10 */ | |
| 939 0xB1, /* 11 */ | |
| 940 0xD0, 0x20, /* 12 - wrong trail byte */ | |
| 941 0x0D, /* 14 */ | |
| 942 0x0A, /* 15 */ | |
| 943 0x20, /* 16 */ | |
| 944 0x00, /* 17 */ | |
| 945 0xD0, 0x6C, /* 18 */ | |
| 946 0xB6, /* 20 */ | |
| 947 0xD8, 0xA5, /* 21 */ | |
| 948 0x20, /* 23 */ | |
| 949 0x68, /* 24 */ | |
| 950 0x59, /* 25 */ | |
| 951 0xF9, 0x28, /* 26 */ | |
| 952 0x6D, /* 28 */ | |
| 953 0x20, /* 29 */ | |
| 954 0x73, /* 30 */ | |
| 955 0xE0, 0x2D, /* 31 */ | |
| 956 0xDE, 0x43, /* 33 */ | |
| 957 0xD0, 0x33, /* 35 */ | |
| 958 0x20, /* 37 */ | |
| 959 0xFA, 0x83, /* 38 */ | |
| 960 0x25, 0x01, /* 40 */ | |
| 961 0xFB, 0x16, 0x87, /* 42 */ | |
| 962 0x4B, 0x16, /* 45 */ | |
| 963 0x20, /* 47 */ | |
| 964 0xE6, 0xBD, /* 48 */ | |
| 965 0xEB, 0x5B, /* 50 */ | |
| 966 0x4B, 0xCC, /* 52 */ | |
| 967 0xF9, 0xA2, /* 54 */ | |
| 968 0xFC, 0x10, 0x3E, /* 56 */ | |
| 969 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ | |
| 970 0x20, /* 63 */ | |
| 971 0xFC, 0x03, 0xAC, /* 64 */ | |
| 972 0xFF, /* 67 - FF just resets the state without enc
oding anything */ | |
| 973 0x01, /* 68 */ | |
| 974 0xDE, 0x83, /* 69 */ | |
| 975 0x20, /* 71 */ | |
| 976 0x09 /* 72 */ | |
| 977 }; | |
| 978 UChar expected[]={ | |
| 979 0xFEFF, 0x0061, 0x0062, 0x0020, | |
| 980 0x0063, 0x0061, 0x000D, 0x000A, | |
| 981 0x0020, 0x0000, 0x00DF, 0x00E6, | |
| 982 0x0930, 0x0020, 0x0918, 0x0909, | |
| 983 0x3086, 0x304D, 0x0020, 0x3053, | |
| 984 0x4000, 0x4E00, 0x7777, 0x0020, | |
| 985 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, | |
| 986 0x0020, 0xD7A3, 0xDC00, 0xD800, | |
| 987 0xD800, 0xDC00, 0xD845, 0xDDDD, | |
| 988 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, | |
| 989 0xDFFF, 0x0001, 0x0E40, 0x0020, | |
| 990 0x0009 | |
| 991 }; | |
| 992 int32_t offsets[]={ | |
| 993 0, 3, 6, 7, /* skip 8, */ | |
| 994 10, 11, /* skip 12, */ | |
| 995 14, 15, 16, 17, 18, | |
| 996 20, 21, 23, 24, 25, 26, 28, 29, | |
| 997 30, 31, 33, 35, 37, 38, | |
| 998 40, 42, 45, 47, 48, | |
| 999 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, | |
| 1000 63, 64, /* trail */ 64, /* reset only 67, */ | |
| 1001 68, 69, | |
| 1002 71, 72 | |
| 1003 }; | |
| 1004 | |
| 1005 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
| 1006 expected, ARRAY_LENGTH(expected), "BOCU-1", | |
| 1007 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
| 1008 ) { | |
| 1009 log_err("BOCU-1->u with skip did not match.\n"); | |
| 1010 } | |
| 1011 } | |
| 1012 | |
| 1013 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); | |
| 1014 { | |
| 1015 const uint8_t sampleText[]={ | |
| 1016 0x61, /* 0 'a' */ | |
| 1017 0xc0, 0x80, /* 1 non-shortest form */ | |
| 1018 0xc4, 0xb5, /* 3 U+0135 */ | |
| 1019 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ | |
| 1020 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401
*/ | |
| 1021 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ | |
| 1022 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U
+dc01 */ | |
| 1023 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+
10000 */ | |
| 1024 0x62, /* 24 'b' */ | |
| 1025 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+
d801 */ | |
| 1026 0xed, 0xa0, /* 28 incomplete sequence */ | |
| 1027 0xd0, 0x80 /* 30 U+0400 */ | |
| 1028 }; | |
| 1029 UChar expected[]={ | |
| 1030 0x0061, | |
| 1031 /* skip */ | |
| 1032 0x0135, | |
| 1033 0xd020, | |
| 1034 0xd801, 0xdc01, | |
| 1035 0xe000, | |
| 1036 0xdc01, | |
| 1037 /* skip */ | |
| 1038 0x0062, | |
| 1039 0xd801, | |
| 1040 0x0400 | |
| 1041 }; | |
| 1042 int32_t offsets[]={ | |
| 1043 0, | |
| 1044 /* skip 1, */ | |
| 1045 3, | |
| 1046 5, | |
| 1047 8, 11, | |
| 1048 14, | |
| 1049 17, | |
| 1050 /* skip 20, 20, */ | |
| 1051 24, | |
| 1052 25, | |
| 1053 /* skip 28 */ | |
| 1054 30 | |
| 1055 }; | |
| 1056 | |
| 1057 /* without offsets */ | |
| 1058 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
| 1059 expected, ARRAY_LENGTH(expected), "CESU-8", | |
| 1060 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) | |
| 1061 ) { | |
| 1062 log_err("CESU-8->u with skip did not match.\n"); | |
| 1063 } | |
| 1064 | |
| 1065 /* with offsets */ | |
| 1066 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
| 1067 expected, ARRAY_LENGTH(expected), "CESU-8", | |
| 1068 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) | |
| 1069 ) { | |
| 1070 log_err("CESU-8->u with skip did not match.\n"); | |
| 1071 } | |
| 1072 } | |
| 1073 } | |
| 1074 | |
| 1075 static void TestStop(int32_t inputsize, int32_t outputsize) | |
| 1076 { | |
| 1077 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; | |
| 1078 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
| 1079 | |
| 1080 static const uint8_t expstopIBM_949[]= { | |
| 1081 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; | |
| 1082 | |
| 1083 static const uint8_t expstopIBM_943[] = { | |
| 1084 0x9f, 0xaf, 0x9f, 0xb1}; | |
| 1085 | |
| 1086 static const uint8_t expstopIBM_930[] = { | |
| 1087 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; | |
| 1088 | |
| 1089 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; | |
| 1090 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
| 1091 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; | |
| 1092 | |
| 1093 | |
| 1094 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; | |
| 1095 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; | |
| 1096 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; | |
| 1097 | |
| 1098 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; | |
| 1099 static const int32_t fromIBM943Offs [] = { 0, 2}; | |
| 1100 static const int32_t fromIBM930Offs [] = { 1, 3}; | |
| 1101 | |
| 1102 gInBufferSize = inputsize; | |
| 1103 gOutBufferSize = outputsize; | |
| 1104 | |
| 1105 /*From Unicode*/ | |
| 1106 | |
| 1107 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1108 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
| 1109 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", | |
| 1110 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) | |
| 1111 log_err("u-> ibm-949 with stop did not match.\n"); | |
| 1112 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1113 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", | |
| 1114 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) | |
| 1115 log_err("u-> ibm-943 with stop did not match.\n"); | |
| 1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1117 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", | |
| 1118 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) | |
| 1119 log_err("u-> ibm-930 with stop did not match.\n"); | |
| 1120 | |
| 1121 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); | |
| 1122 { | |
| 1123 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
| 1124 static const uint8_t toIBM943[]= { 0x61,}; | |
| 1125 static const int32_t offset[]= {0,} ; | |
| 1126 | |
| 1127 /*EUC_JP*/ | |
| 1128 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
| 1129 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; | |
| 1130 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; | |
| 1131 | |
| 1132 /*EUC_TW*/ | |
| 1133 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
| 1134 static const uint8_t to_euc_tw[]={ | |
| 1135 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; | |
| 1136 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; | |
| 1137 | |
| 1138 /*ISO-2022-JP*/ | |
| 1139 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; | |
| 1140 static const uint8_t to_iso_2022_jp[]={ | |
| 1141 0x41, | |
| 1142 | |
| 1143 }; | |
| 1144 static const int32_t from_iso_2022_jpOffs [] ={0,}; | |
| 1145 | |
| 1146 /*ISO-2022-cn*/ | |
| 1147 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
| 1148 static const uint8_t to_iso_2022_cn[]={ | |
| 1149 0x41, | |
| 1150 | |
| 1151 }; | |
| 1152 static const int32_t from_iso_2022_cnOffs [] ={ | |
| 1153 0,0, | |
| 1154 2,2, | |
| 1155 }; | |
| 1156 | |
| 1157 /*ISO-2022-kr*/ | |
| 1158 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; | |
| 1159 static const uint8_t to_iso_2022_kr[]={ | |
| 1160 0x1b, 0x24, 0x29, 0x43, | |
| 1161 0x41, | |
| 1162 0x0e, 0x25, 0x50, | |
| 1163 }; | |
| 1164 static const int32_t from_iso_2022_krOffs [] ={ | |
| 1165 -1,-1,-1,-1, | |
| 1166 0, | |
| 1167 1,1,1, | |
| 1168 }; | |
| 1169 | |
| 1170 /* HZ encoding */ | |
| 1171 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
| 1172 | |
| 1173 static const uint8_t to_hz[]={ | |
| 1174 0x7e, 0x7d, 0x41, | |
| 1175 0x7e, 0x7b, 0x26, 0x30, | |
| 1176 | |
| 1177 }; | |
| 1178 static const int32_t from_hzOffs [] ={ | |
| 1179 0, 0,0, | |
| 1180 1,1,1,1, | |
| 1181 }; | |
| 1182 | |
| 1183 /*ISCII*/ | |
| 1184 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
| 1185 static const uint8_t to_iscii[]={ | |
| 1186 0x41, | |
| 1187 }; | |
| 1188 static const int32_t from_isciiOffs [] ={ | |
| 1189 0, | |
| 1190 }; | |
| 1191 | |
| 1192 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
| 1193 toIBM943, sizeof(toIBM943), "ibm-943", | |
| 1194 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) | |
| 1195 log_err("u-> ibm-943 with stop did not match.\n"); | |
| 1196 | |
| 1197 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
| 1198 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
| 1199 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) | |
| 1200 log_err("u-> euc-jp with stop did not match.\n"); | |
| 1201 | |
| 1202 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
| 1203 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
| 1204 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
| 1205 log_err("u-> euc-tw with stop did not match.\n"); | |
| 1206 | |
| 1207 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
| 1208 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
| 1209 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
| 1210 log_err("u-> iso-2022-jp with stop did not match.\n"); | |
| 1211 | |
| 1212 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), | |
| 1213 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", | |
| 1214 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) | |
| 1215 log_err("u-> iso-2022-jp with stop did not match.\n"); | |
| 1216 | |
| 1217 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
| 1218 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
| 1219 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) | |
| 1220 log_err("u-> iso-2022-cn with stop did not match.\n"); | |
| 1221 | |
| 1222 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
| 1223 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
| 1224 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) | |
| 1225 log_err("u-> iso-2022-kr with stop did not match.\n"); | |
| 1226 | |
| 1227 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
| 1228 to_hz, sizeof(to_hz), "HZ", | |
| 1229 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) | |
| 1230 log_err("u-> HZ with stop did not match.\n");\ | |
| 1231 | |
| 1232 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
| 1233 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
| 1234 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) | |
| 1235 log_err("u-> iscii with stop did not match.\n"); | |
| 1236 | |
| 1237 | |
| 1238 } | |
| 1239 #endif | |
| 1240 | |
| 1241 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"
); | |
| 1242 { | |
| 1243 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; | |
| 1244 | |
| 1245 static const uint8_t to_SCSU[]={ | |
| 1246 0x41, | |
| 1247 | |
| 1248 }; | |
| 1249 int32_t from_SCSUOffs [] ={ | |
| 1250 0, | |
| 1251 | |
| 1252 }; | |
| 1253 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
| 1254 to_SCSU, sizeof(to_SCSU), "SCSU", | |
| 1255 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) | |
| 1256 log_err("u-> SCSU with skip did not match.\n"); | |
| 1257 | |
| 1258 } | |
| 1259 | |
| 1260 /*to Unicode*/ | |
| 1261 | |
| 1262 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1263 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), | |
| 1264 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949st
optoUnicode[0]),"ibm-949", | |
| 1265 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) | |
| 1266 log_err("ibm-949->u with stop did not match.\n"); | |
| 1267 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), | |
| 1268 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943st
optoUnicode[0]),"ibm-943", | |
| 1269 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) | |
| 1270 log_err("ibm-943->u with stop did not match.\n"); | |
| 1271 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), | |
| 1272 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930st
optoUnicode[0]),"ibm-930", | |
| 1273 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) | |
| 1274 log_err("ibm-930->u with stop did not match.\n"); | |
| 1275 | |
| 1276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); | |
| 1277 { | |
| 1278 | |
| 1279 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
| 1280 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
| 1281 }; | |
| 1282 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; | |
| 1283 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; | |
| 1284 | |
| 1285 | |
| 1286 /*EUC-JP*/ | |
| 1287 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
| 1288 0x8f, 0xda, 0xa1, /*unassigned*/ | |
| 1289 0x8e, 0xe0, | |
| 1290 }; | |
| 1291 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; | |
| 1292 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; | |
| 1293 | |
| 1294 /*EUC_TW*/ | |
| 1295 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, | |
| 1296 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
| 1297 0xe6, 0xca, 0x8a, | |
| 1298 }; | |
| 1299 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; | |
| 1300 int32_t from_euc_twOffs [] ={ 0, 1, 3}; | |
| 1301 | |
| 1302 | |
| 1303 | |
| 1304 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBC
IDIC_STATEFUL), | |
| 1305 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
| 1306 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) | |
| 1307 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); | |
| 1308 | |
| 1309 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
| 1310 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0])
,"IBM-eucJP", | |
| 1311 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) | |
| 1312 log_err("euc-jp->u with stop did not match.\n"); | |
| 1313 | |
| 1314 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
| 1315 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
| 1316 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) | |
| 1317 log_err("euc-tw->u with stop did not match.\n"); | |
| 1318 } | |
| 1319 #endif | |
| 1320 | |
| 1321 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); | |
| 1322 { | |
| 1323 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
| 1324 0xe0, 0x80, 0x61,}; | |
| 1325 static const UChar expected1[] = { 0x0031, 0x4e8c,}; | |
| 1326 static const int32_t offsets1[] = { 0x0000, 0x0001}; | |
| 1327 | |
| 1328 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 1329 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
| 1330 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
| 1331 log_err("utf8->u with stop did not match.\n");; | |
| 1332 } | |
| 1333 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); | |
| 1334 { | |
| 1335 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c
,0x04}; | |
| 1336 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061
}; | |
| 1337 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; | |
| 1338 | |
| 1339 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 1340 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
| 1341 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) | |
| 1342 log_err("scsu->u with stop did not match.\n");; | |
| 1343 } | |
| 1344 | |
| 1345 } | |
| 1346 | |
| 1347 static void TestSub(int32_t inputsize, int32_t outputsize) | |
| 1348 { | |
| 1349 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; | |
| 1350 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
| 1351 | |
| 1352 static const uint8_t expsubIBM_949[] = | |
| 1353 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; | |
| 1354 | |
| 1355 static const uint8_t expsubIBM_943[] = { | |
| 1356 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; | |
| 1357 | |
| 1358 static const uint8_t expsubIBM_930[] = { | |
| 1359 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; | |
| 1360 | |
| 1361 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0
xD700 }; | |
| 1362 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
| 1363 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; | |
| 1364 | |
| 1365 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; | |
| 1366 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; | |
| 1367 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; | |
| 1368 | |
| 1369 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; | |
| 1370 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; | |
| 1371 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; | |
| 1372 | |
| 1373 gInBufferSize = inputsize; | |
| 1374 gOutBufferSize = outputsize; | |
| 1375 | |
| 1376 /*from unicode*/ | |
| 1377 | |
| 1378 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1379 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
| 1380 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", | |
| 1381 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) | |
| 1382 log_err("u-> ibm-949 with subst did not match.\n"); | |
| 1383 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1384 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", | |
| 1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) | |
| 1386 log_err("u-> ibm-943 with subst did not match.\n"); | |
| 1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1388 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", | |
| 1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) | |
| 1390 log_err("u-> ibm-930 with subst did not match.\n"); | |
| 1391 | |
| 1392 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); | |
| 1393 { | |
| 1394 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
| 1395 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; | |
| 1396 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; | |
| 1397 | |
| 1398 | |
| 1399 /* EUC_JP*/ | |
| 1400 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; | |
| 1401 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
| 1402 0xf4, 0xfe, 0xf4, 0xfe, | |
| 1403 0x61, 0x8e, 0xe0, | |
| 1404 }; | |
| 1405 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5,
6, 7, 7}; | |
| 1406 | |
| 1407 /*EUC_TW*/ | |
| 1408 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
| 1409 static const uint8_t to_euc_tw[]={ | |
| 1410 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
| 1411 0xfd, 0xfe, 0xfd, 0xfe, | |
| 1412 0x61, 0xe6, 0xca, 0x8a, | |
| 1413 }; | |
| 1414 | |
| 1415 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5,
5, 6, 7, 7, 8,}; | |
| 1416 | |
| 1417 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
| 1418 toIBM943, sizeof(toIBM943), "ibm-943", | |
| 1419 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) | |
| 1420 log_err("u-> ibm-943 with substitute did not match.\n"); | |
| 1421 | |
| 1422 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
| 1423 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
| 1424 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) | |
| 1425 log_err("u-> euc-jp with substitute did not match.\n"); | |
| 1426 | |
| 1427 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
| 1428 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
| 1429 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
| 1430 log_err("u-> euc-tw with substitute did not match.\n"); | |
| 1431 } | |
| 1432 #endif | |
| 1433 | |
| 1434 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITU
TE \n"); | |
| 1435 { | |
| 1436 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; | |
| 1437 | |
| 1438 const uint8_t to_SCSU[]={ | |
| 1439 0x41, | |
| 1440 0x0e, 0xff,0xfd, | |
| 1441 0x42 | |
| 1442 | |
| 1443 | |
| 1444 }; | |
| 1445 int32_t from_SCSUOffs [] ={ | |
| 1446 0, | |
| 1447 1,1,1, | |
| 1448 2, | |
| 1449 | |
| 1450 }; | |
| 1451 const uint8_t to_SCSU_1[]={ | |
| 1452 0x41, | |
| 1453 | |
| 1454 }; | |
| 1455 int32_t from_SCSUOffs_1 [] ={ | |
| 1456 0, | |
| 1457 | |
| 1458 }; | |
| 1459 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), | |
| 1460 to_SCSU, sizeof(to_SCSU), "SCSU", | |
| 1461 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) | |
| 1462 log_err("u-> SCSU with substitute did not match.\n"); | |
| 1463 | |
| 1464 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputT
ext)/sizeof(SCSU_inputText[0]), | |
| 1465 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", | |
| 1466 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_
ILLEGAL_CHAR_FOUND )) | |
| 1467 log_err("u-> SCSU with substitute did not match.\n"); | |
| 1468 } | |
| 1469 | |
| 1470 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTIT
UTE\n"); | |
| 1471 { | |
| 1472 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801,
0xffff, 0x0061,}; | |
| 1473 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, | |
| 1474 0xf0, 0x90, 0x90, 0x81, | |
| 1475 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, | |
| 1476 0xef, 0xbf, 0xbf, 0x61, | |
| 1477 | |
| 1478 }; | |
| 1479 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4,
5, 5, 5, 6 }; | |
| 1480 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput
[0]), | |
| 1481 expectedUTF8, sizeof(expectedUTF8), "utf8", | |
| 1482 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { | |
| 1483 log_err("u-> utf8 with stop did not match.\n"); | |
| 1484 } | |
| 1485 } | |
| 1486 | |
| 1487 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); | |
| 1488 { | |
| 1489 static const UChar in[]={ 0x0041, 0xfeff }; | |
| 1490 | |
| 1491 static const uint8_t out[]={ | |
| 1492 #if U_IS_BIG_ENDIAN | |
| 1493 0xfe, 0xff, | |
| 1494 0x00, 0x41, | |
| 1495 0xfe, 0xff | |
| 1496 #else | |
| 1497 0xff, 0xfe, | |
| 1498 0x41, 0x00, | |
| 1499 0xff, 0xfe | |
| 1500 #endif | |
| 1501 }; | |
| 1502 static const int32_t offsets[]={ | |
| 1503 -1, -1, 0, 0, 1, 1 | |
| 1504 }; | |
| 1505 | |
| 1506 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
| 1507 out, sizeof(out), "UTF-16", | |
| 1508 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) | |
| 1509 ) { | |
| 1510 log_err("u->UTF-16 with substitute did not match.\n"); | |
| 1511 } | |
| 1512 } | |
| 1513 | |
| 1514 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); | |
| 1515 { | |
| 1516 static const UChar in[]={ 0x0041, 0xfeff }; | |
| 1517 | |
| 1518 static const uint8_t out[]={ | |
| 1519 #if U_IS_BIG_ENDIAN | |
| 1520 0x00, 0x00, 0xfe, 0xff, | |
| 1521 0x00, 0x00, 0x00, 0x41, | |
| 1522 0x00, 0x00, 0xfe, 0xff | |
| 1523 #else | |
| 1524 0xff, 0xfe, 0x00, 0x00, | |
| 1525 0x41, 0x00, 0x00, 0x00, | |
| 1526 0xff, 0xfe, 0x00, 0x00 | |
| 1527 #endif | |
| 1528 }; | |
| 1529 static const int32_t offsets[]={ | |
| 1530 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 | |
| 1531 }; | |
| 1532 | |
| 1533 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), | |
| 1534 out, sizeof(out), "UTF-32", | |
| 1535 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) | |
| 1536 ) { | |
| 1537 log_err("u->UTF-32 with substitute did not match.\n"); | |
| 1538 } | |
| 1539 } | |
| 1540 | |
| 1541 /*to unicode*/ | |
| 1542 | |
| 1543 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1544 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), | |
| 1545 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subt
oUnicode[0]),"ibm-949", | |
| 1546 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) | |
| 1547 log_err("ibm-949->u with substitute did not match.\n"); | |
| 1548 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), | |
| 1549 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subt
oUnicode[0]),"ibm-943", | |
| 1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) | |
| 1551 log_err("ibm-943->u with substitute did not match.\n"); | |
| 1552 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), | |
| 1553 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subt
oUnicode[0]),"ibm-930", | |
| 1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) | |
| 1555 log_err("ibm-930->u with substitute did not match.\n"); | |
| 1556 | |
| 1557 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); | |
| 1558 { | |
| 1559 | |
| 1560 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ | |
| 1561 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 | |
| 1562 }; | |
| 1563 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 | |
| 1564 }; | |
| 1565 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; | |
| 1566 | |
| 1567 | |
| 1568 /* EUC_JP*/ | |
| 1569 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
| 1570 0x8f, 0xda, 0xa1, /*unassigned*/ | |
| 1571 0x8e, 0xe0, 0x8a | |
| 1572 }; | |
| 1573 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a
}; | |
| 1574 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; | |
| 1575 | |
| 1576 /*EUC_TW*/ | |
| 1577 const uint8_t sampleTxt_euc_tw[]={ | |
| 1578 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
| 1579 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
| 1580 0xe6, 0xca, 0x8a, | |
| 1581 }; | |
| 1582 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a,
}; | |
| 1583 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; | |
| 1584 | |
| 1585 | |
| 1586 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), | |
| 1587 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof
(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", | |
| 1588 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )
) | |
| 1589 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); | |
| 1590 | |
| 1591 | |
| 1592 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), | |
| 1593 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
IBM-eucJP", | |
| 1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) | |
| 1595 log_err("euc-jp->u with substitute did not match.\n"); | |
| 1596 | |
| 1597 | |
| 1598 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
| 1599 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"
euc-tw", | |
| 1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) | |
| 1601 log_err("euc-tw->u with substitute did not match.\n"); | |
| 1602 | |
| 1603 | |
| 1604 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_e
uc_jp), | |
| 1605 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
IBM-eucJP", | |
| 1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGA
L_CHAR_FOUND)) | |
| 1607 log_err("euc-jp->u with substitute did not match.\n"); | |
| 1608 } | |
| 1609 #endif | |
| 1610 | |
| 1611 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE
\n"); | |
| 1612 { | |
| 1613 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, | |
| 1614 0xe0, 0x80, 0x61,}; | |
| 1615 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; | |
| 1616 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; | |
| 1617 | |
| 1618 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 1619 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", | |
| 1620 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
| 1621 log_err("utf8->u with substitute did not match.\n");; | |
| 1622 } | |
| 1623 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \
n"); | |
| 1624 { | |
| 1625 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; | |
| 1626 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfff
d}; | |
| 1627 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; | |
| 1628 | |
| 1629 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), | |
| 1630 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", | |
| 1631 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) | |
| 1632 log_err("scsu->u with stop did not match.\n");; | |
| 1633 } | |
| 1634 | |
| 1635 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1636 log_verbose("Testing ibm-930 subchar/subchar1\n"); | |
| 1637 { | |
| 1638 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65,
0x6d66, 0xdf }; | |
| 1639 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0x
fe, 0x46, 0x6b, 0x0f, 0x3f }; | |
| 1640 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4 }; | |
| 1641 | |
| 1642 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd,
0x6d66, 0x1a }; | |
| 1643 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0x
fc, 0x46, 0x6b, 0x0f, 0x57 }; | |
| 1644 static const int32_t offsets2[]={ 1, 3, 5,
7, 10 }; | |
| 1645 | |
| 1646 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "
ibm-930", | |
| 1647 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NU
LL, 0) | |
| 1648 ) { | |
| 1649 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); | |
| 1650 } | |
| 1651 | |
| 1652 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ib
m-930", | |
| 1653 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
| 1654 ) { | |
| 1655 log_err("ibm-930->u subchar/subchar1 did not match.\n"); | |
| 1656 } | |
| 1657 } | |
| 1658 | |
| 1659 log_verbose("Testing GB 18030 with substitute callbacks\n"); | |
| 1660 { | |
| 1661 static const UChar u2[]={ | |
| 1662 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00,
0x9fa6, 0xffff, 0xd800, 0xdc00, 0xff
fd, 0xdbff, 0xdfff }; | |
| 1663 static const uint8_t gb2[]={ | |
| 1664 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0x
bb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3
, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; | |
| 1665 static const int32_t offsets2[]={ | |
| 1666 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; | |
| 1667 | |
| 1668 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "
gb18030", | |
| 1669 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
| 1670 ) { | |
| 1671 log_err("gb18030->u with substitute did not match.\n"); | |
| 1672 } | |
| 1673 } | |
| 1674 #endif | |
| 1675 | |
| 1676 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); | |
| 1677 { | |
| 1678 static const uint8_t utf7[]={ | |
| 1679 /* a~ a+AB~ a+AB\x0c
a+AB- a+AB. a+. */ | |
| 1680 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42
, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b
, 0x2e | |
| 1681 }; | |
| 1682 static const UChar unicode[]={ | |
| 1683 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd,
0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xff
fd, 0x2e | |
| 1684 }; | |
| 1685 static const int32_t offsets[]={ | |
| 1686 0, 1, 2, 4, 6, 7, 9,
11, 12, 14, 17, 19, 21, 22, 23,
24 | |
| 1687 }; | |
| 1688 | |
| 1689 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH
(unicode), "UTF-7", | |
| 1690 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0
) | |
| 1691 ) { | |
| 1692 log_err("UTF-7->u with substitute did not match.\n"); | |
| 1693 } | |
| 1694 } | |
| 1695 | |
| 1696 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); | |
| 1697 { | |
| 1698 static const uint8_t | |
| 1699 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, | |
| 1700 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, | |
| 1701 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; | |
| 1702 | |
| 1703 static const UChar | |
| 1704 out1[]={ 0x4e00, 0xfeff }, | |
| 1705 out2[]={ 0x004e, 0xfffe }, | |
| 1706 out3[]={ 0xfefd, 0x4e00, 0xfeff }; | |
| 1707 | |
| 1708 static const int32_t | |
| 1709 offsets1[]={ 2, 4 }, | |
| 1710 offsets2[]={ 2, 4 }, | |
| 1711 offsets3[]={ 0, 2, 4 }; | |
| 1712 | |
| 1713 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-16", | |
| 1714 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) | |
| 1715 ) { | |
| 1716 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); | |
| 1717 } | |
| 1718 | |
| 1719 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-16", | |
| 1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
| 1721 ) { | |
| 1722 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); | |
| 1723 } | |
| 1724 | |
| 1725 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-16", | |
| 1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) | |
| 1727 ) { | |
| 1728 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); | |
| 1729 } | |
| 1730 } | |
| 1731 | |
| 1732 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); | |
| 1733 { | |
| 1734 static const uint8_t | |
| 1735 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xfe, 0xff }, | |
| 1736 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0x
ff, 0x00, 0x00 }, | |
| 1737 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, | |
| 1738 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x
00, 0x4e, 0x00 }; | |
| 1739 | |
| 1740 static const UChar | |
| 1741 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff }, | |
| 1742 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe }, | |
| 1743 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0x
fffd }, | |
| 1744 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 }; | |
| 1745 | |
| 1746 static const int32_t | |
| 1747 offsets1[]={ 4, 4, 8 }, | |
| 1748 offsets2[]={ 4, 4, 8 }, | |
| 1749 offsets3[]={ 0, 4, 4, 8, 12 }, | |
| 1750 offsets4[]={ 0, 0, 4, 8 }; | |
| 1751 | |
| 1752 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-32", | |
| 1753 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) | |
| 1754 ) { | |
| 1755 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); | |
| 1756 } | |
| 1757 | |
| 1758 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-32", | |
| 1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) | |
| 1760 ) { | |
| 1761 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); | |
| 1762 } | |
| 1763 | |
| 1764 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-32", | |
| 1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) | |
| 1766 ) { | |
| 1767 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); | |
| 1768 } | |
| 1769 | |
| 1770 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4
), "UTF-32", | |
| 1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL,
0) | |
| 1772 ) { | |
| 1773 log_err("UTF-32 (no BOM, with error)->u with substitute did not matc
h.\n"); | |
| 1774 } | |
| 1775 } | |
| 1776 } | |
| 1777 | |
| 1778 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) | |
| 1779 { | |
| 1780 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; | |
| 1781 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; | |
| 1782 | |
| 1783 const uint8_t expsubwvalIBM_949[]= { | |
| 1784 0x00, 0xb0, 0xa1, 0xb0, 0xa2, | |
| 1785 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; | |
| 1786 | |
| 1787 const uint8_t expsubwvalIBM_943[]= { | |
| 1788 0x9f, 0xaf, 0x9f, 0xb1, | |
| 1789 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; | |
| 1790 | |
| 1791 const uint8_t expsubwvalIBM_930[] = { | |
| 1792 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5,
0x0e, 0x46, 0x6b, 0x0f }; | |
| 1793 | |
| 1794 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; | |
| 1795 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; | |
| 1796 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }
; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ | |
| 1797 | |
| 1798 gInBufferSize = inputsize; | |
| 1799 gOutBufferSize = outputsize; | |
| 1800 | |
| 1801 /*from Unicode*/ | |
| 1802 | |
| 1803 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 1804 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), | |
| 1805 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", | |
| 1806 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) | |
| 1807 log_err("u-> ibm-949 with subst with value did not match.\n"); | |
| 1808 | |
| 1809 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1810 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", | |
| 1811 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) | |
| 1812 log_err("u-> ibm-943 with sub with value did not match.\n"); | |
| 1813 | |
| 1814 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), | |
| 1815 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", | |
| 1816 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) | |
| 1817 log_err("u-> ibm-930 with subst with value did not match.\n"); | |
| 1818 | |
| 1819 | |
| 1820 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); | |
| 1821 { | |
| 1822 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; | |
| 1823 static const uint8_t toIBM943[]= { 0x61, | |
| 1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1825 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
| 1826 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1827 0x61 }; | |
| 1828 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3
, 3, 3, 3, 3, 3, 4}; | |
| 1829 | |
| 1830 | |
| 1831 /* EUC_JP*/ | |
| 1832 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2, }; | |
| 1833 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, | |
| 1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1835 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
| 1836 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1837 0x61, 0x8e, 0xe0, | |
| 1838 }; | |
| 1839 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, | |
| 1840 3, 3, 3, 3, 3, 3, | |
| 1841 3, 3, 3, 3, 3, 3, | |
| 1842 5, 5, 5, 5, 5, 5, | |
| 1843 6, 7, 7, | |
| 1844 }; | |
| 1845 | |
| 1846 /*EUC_TW*/ | |
| 1847 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; | |
| 1848 static const uint8_t to_euc_tw[]={ | |
| 1849 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
| 1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1851 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, | |
| 1852 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, | |
| 1853 0x61, 0xe6, 0xca, 0x8a, | |
| 1854 }; | |
| 1855 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, | |
| 1856 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, | |
| 1857 6, 7, 7, 8, | |
| 1858 }; | |
| 1859 /*ISO-2022-JP*/ | |
| 1860 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x0
0E9, 0x0042} ; | |
| 1861 static const uint8_t to_iso_2022_jp1[]={ | |
| 1862 0x1b, 0x24, 0x42, 0x21, 0x21, | |
| 1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
| |
| 1864 0x1b, 0x24, 0x42, 0x21, 0x22, | |
| 1865 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, | |
| 1866 0x42, | |
| 1867 }; | |
| 1868 | |
| 1869 static const int32_t from_iso_2022_jpOffs1 [] ={ | |
| 1870 0,0,0,0,0, | |
| 1871 1,1,1,1,1,1,1,1,1, | |
| 1872 2,2,2,2,2, | |
| 1873 3,3,3,3,3,3,3,3,3, | |
| 1874 4, | |
| 1875 }; | |
| 1876 /* surrogate pair*/ | |
| 1877 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042} ; | |
| 1878 static const uint8_t to_iso_2022_jp2[]={ | |
| 1879 0x1b, 0x24, 0x42, 0x21, 0x21, | |
| 1880 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, | |
| 1881 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1882 0x1b, 0x24, 0x42, 0x21, 0x22, | |
| 1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, | |
| 1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1885 0x42, | |
| 1886 }; | |
| 1887 static const int32_t from_iso_2022_jpOffs2 [] ={ | |
| 1888 0,0,0,0,0, | |
| 1889 1,1,1,1,1,1,1,1,1, | |
| 1890 1,1,1,1,1,1, | |
| 1891 3,3,3,3,3, | |
| 1892 4,4,4,4,4,4,4,4,4, | |
| 1893 4,4,4,4,4,4, | |
| 1894 6, | |
| 1895 }; | |
| 1896 | |
| 1897 /*ISO-2022-cn*/ | |
| 1898 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; | |
| 1899 static const uint8_t to_iso_2022_cn[]={ | |
| 1900 0x41, | |
| 1901 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, | |
| 1902 0x42, | |
| 1903 }; | |
| 1904 static const int32_t from_iso_2022_cnOffs [] ={ | |
| 1905 0, | |
| 1906 1,1,1,1,1,1, | |
| 1907 2, | |
| 1908 }; | |
| 1909 | |
| 1910 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042}; | |
| 1911 | |
| 1912 static const uint8_t to_iso_2022_cn4[]={ | |
| 1913 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, | |
| 1914 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, | |
| 1915 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1916 0x0e, 0x21, 0x22, | |
| 1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, | |
| 1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1919 0x42, | |
| 1920 }; | |
| 1921 static const int32_t from_iso_2022_cnOffs4 [] ={ | |
| 1922 0,0,0,0,0,0,0, | |
| 1923 1,1,1,1,1,1,1, | |
| 1924 1,1,1,1,1,1, | |
| 1925 3,3,3, | |
| 1926 4,4,4,4,4,4,4, | |
| 1927 4,4,4,4,4,4, | |
| 1928 6 | |
| 1929 | |
| 1930 }; | |
| 1931 | |
| 1932 /*ISO-2022-kr*/ | |
| 1933 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xD
C56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
| 1934 static const uint8_t to_iso_2022_kr2[]={ | |
| 1935 0x1b, 0x24, 0x29, 0x43, | |
| 1936 0x41, | |
| 1937 0x0e, 0x25, 0x50, | |
| 1938 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
| 1939 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1940 0x0e, 0x25, 0x50, | |
| 1941 0x0f, 0x42, | |
| 1942 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
| 1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 1944 0x43 | |
| 1945 }; | |
| 1946 static const int32_t from_iso_2022_krOffs2 [] ={ | |
| 1947 -1,-1,-1,-1, | |
| 1948 0, | |
| 1949 1,1,1, | |
| 1950 2,2,2,2,2,2,2, | |
| 1951 2,2,2,2,2,2, | |
| 1952 4,4,4, | |
| 1953 5,5, | |
| 1954 6,6,6,6,6,6, | |
| 1955 6,6,6,6,6,6, | |
| 1956 8, | |
| 1957 }; | |
| 1958 | |
| 1959 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; | |
| 1960 static const uint8_t to_iso_2022_kr[]={ | |
| 1961 0x1b, 0x24, 0x29, 0x43, | |
| 1962 0x41, | |
| 1963 0x0e, 0x25, 0x50, | |
| 1964 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*
/ | |
| 1965 0x0e, 0x25, 0x50, | |
| 1966 0x0f, 0x42, | |
| 1967 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
| 1968 0x43 | |
| 1969 }; | |
| 1970 | |
| 1971 | |
| 1972 static const int32_t from_iso_2022_krOffs [] ={ | |
| 1973 -1,-1,-1,-1, | |
| 1974 0, | |
| 1975 1,1,1, | |
| 1976 2,2,2,2,2,2,2, | |
| 1977 3,3,3, | |
| 1978 4,4, | |
| 1979 5,5,5,5,5,5, | |
| 1980 6, | |
| 1981 }; | |
| 1982 /* HZ encoding */ | |
| 1983 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; | |
| 1984 | |
| 1985 static const uint8_t to_hz[]={ | |
| 1986 0x7e, 0x7d, 0x41, | |
| 1987 0x7e, 0x7b, 0x26, 0x30, | |
| 1988 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*una
ssigned*/ | |
| 1989 0x7e, 0x7b, 0x26, 0x30, | |
| 1990 0x7e, 0x7d, 0x42, | |
| 1991 | |
| 1992 }; | |
| 1993 static const int32_t from_hzOffs [] ={ | |
| 1994 0,0,0, | |
| 1995 1,1,1,1, | |
| 1996 2,2,2,2,2,2,2,2, | |
| 1997 3,3,3,3, | |
| 1998 4,4,4 | |
| 1999 }; | |
| 2000 | |
| 2001 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unas
signed*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; | |
| 2002 static const uint8_t to_hz2[]={ | |
| 2003 0x7e, 0x7d, 0x41, | |
| 2004 0x7e, 0x7b, 0x26, 0x30, | |
| 2005 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
| 2006 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 2007 0x7e, 0x7b, 0x26, 0x30, | |
| 2008 0x7e, 0x7d, 0x42, | |
| 2009 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, | |
| 2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 2011 0x43 | |
| 2012 }; | |
| 2013 static const int32_t from_hzOffs2 [] ={ | |
| 2014 0,0,0, | |
| 2015 1,1,1,1, | |
| 2016 2,2,2,2,2,2,2,2, | |
| 2017 2,2,2,2,2,2, | |
| 2018 4,4,4,4, | |
| 2019 5,5,5, | |
| 2020 6,6,6,6,6,6, | |
| 2021 6,6,6,6,6,6, | |
| 2022 8, | |
| 2023 }; | |
| 2024 | |
| 2025 /*ISCII*/ | |
| 2026 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned
*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; | |
| 2027 static const uint8_t to_iscii[]={ | |
| 2028 0x41, | |
| 2029 0xef, 0x42, 0xa1, | |
| 2030 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
| 2031 0xa2, | |
| 2032 0x42, | |
| 2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ | |
| 2034 0x43 | |
| 2035 }; | |
| 2036 | |
| 2037 | |
| 2038 static const int32_t from_isciiOffs [] ={ | |
| 2039 0, | |
| 2040 1,1,1, | |
| 2041 2,2,2,2,2,2, | |
| 2042 3, | |
| 2043 4, | |
| 2044 5,5,5,5,5,5, | |
| 2045 6, | |
| 2046 }; | |
| 2047 | |
| 2048 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), | |
| 2049 toIBM943, sizeof(toIBM943), "ibm-943", | |
| 2050 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) | |
| 2051 log_err("u-> ibm-943 with subst with value did not match.\n"); | |
| 2052 | |
| 2053 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), | |
| 2054 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP", | |
| 2055 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) | |
| 2056 log_err("u-> euc-jp with subst with value did not match.\n"); | |
| 2057 | |
| 2058 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), | |
| 2059 to_euc_tw, sizeof(to_euc_tw), "euc-tw", | |
| 2060 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) | |
| 2061 log_err("u-> euc-tw with subst with value did not match.\n"); | |
| 2062 | |
| 2063 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), | |
| 2064 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
| 2065 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
| 2066 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
| 2067 | |
| 2068 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), | |
| 2069 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", | |
| 2070 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) | |
| 2071 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
| 2072 | |
| 2073 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_in
putText2)/sizeof(iso_2022_jp_inputText2[0]), | |
| 2074 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", | |
| 2075 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) | |
| 2076 log_err("u-> iso_2022_jp with subst with value did not match.\n"); | |
| 2077 /*ESCAPE OPTIONS*/ | |
| 2078 { | |
| 2079 /* surrogate pair*/ | |
| 2080 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; | |
| 2081 static const uint8_t to_iso_2022_jp3_v2[]={ | |
| 2082 0x1b, 0x24, 0x42, 0x21, 0x21, | |
| 2083 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, | |
| 2084 | |
| 2085 0x1b, 0x24, 0x42, 0x21, 0x22, | |
| 2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, | |
| 2087 | |
| 2088 0x42, | |
| 2089 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b
, | |
| 2090 }; | |
| 2091 | |
| 2092 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ | |
| 2093 0,0,0,0,0, | |
| 2094 1,1,1,1,1,1,1,1,1,1,1,1, | |
| 2095 | |
| 2096 3,3,3,3,3, | |
| 2097 4,4,4,4,4,4,4,4,4,4,4,4, | |
| 2098 | |
| 2099 6, | |
| 2100 7,7,7,7,7,7,7,7,7 | |
| 2101 }; | |
| 2102 | |
| 2103 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof
(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), | |
| 2104 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp
", | |
| 2105 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL,
0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
| 2106 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not
match.\n"); | |
| 2107 } | |
| 2108 { | |
| 2109 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
| 2110 static const uint8_t to_iso_2022_cn5_v2[]={ | |
| 2111 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, | |
| 2112 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, | |
| 2113 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
| 2114 0x0e, 0x21, 0x22, | |
| 2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, | |
| 2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, | |
| 2117 0x42, | |
| 2118 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, | |
| 2119 }; | |
| 2120 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ | |
| 2121 0,0,0,0,0,0,0, | |
| 2122 1,1,1,1,1,1,1, | |
| 2123 1,1,1,1,1,1, | |
| 2124 3,3,3, | |
| 2125 4,4,4,4,4,4,4, | |
| 2126 4,4,4,4,4,4, | |
| 2127 6, | |
| 2128 7,7,7,7,7,7 | |
| 2129 }; | |
| 2130 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof
(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), | |
| 2131 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", | |
| 2132 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,U
CNV_ESCAPE_JAVA,U_ZERO_ERROR )) | |
| 2133 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not mat
ch.\n"); | |
| 2134 | |
| 2135 } | |
| 2136 { | |
| 2137 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
| 2138 static const uint8_t to_iso_2022_cn6_v2[]={ | |
| 2139 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
| 2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, | |
| 2141 0x0e, 0x21, 0x22, | |
| 2142 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, | |
| 2143 0x42, | |
| 2144 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30,
0x32, 0x7d | |
| 2145 }; | |
| 2146 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ | |
| 2147 0, 0, 0, 0, 0, 0, 0, | |
| 2148 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
| 2149 3, 3, 3, | |
| 2150 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | |
| 2151 6, | |
| 2152 7, 7, 7, 7, 7, 7, 7, 7, | |
| 2153 }; | |
| 2154 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof
(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), | |
| 2155 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", | |
| 2156 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,U
CNV_ESCAPE_UNICODE,U_ZERO_ERROR )) | |
| 2157 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not
match.\n"); | |
| 2158 | |
| 2159 } | |
| 2160 { | |
| 2161 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; | |
| 2162 static const uint8_t to_iso_2022_cn7_v2[]={ | |
| 2163 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
| 2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 2165 0x0e, 0x21, 0x22, | |
| 2166 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, | |
| 2167 0x42, 0x25, 0x55, 0x30, 0x39, 0x30,
0x32, | |
| 2168 }; | |
| 2169 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ | |
| 2170 0, 0, 0, 0, 0, 0, 0, | |
| 2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, | |
| 2172 3, 3, 3, | |
| 2173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, | |
| 2174 6, | |
| 2175 7, 7, 7, 7, 7, 7, | |
| 2176 }; | |
| 2177 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof
(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), | |
| 2178 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", | |
| 2179 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"
K" ,U_ZERO_ERROR )) | |
| 2180 log_err("u-> iso-2022-cn with sub & K did not match.\n"); | |
| 2181 | |
| 2182 } | |
| 2183 { | |
| 2184 static const UChar iso_2022_cn_inputText8[]={ | |
| 2185 0x3000, | |
| 2186 0xD84D, 0xDC56, | |
| 2187 0x3001, | |
| 2188 0xD84D, 0xDC56, | |
| 2189 0xDBFF, 0xDFFF, | |
| 2190 0x0042, | |
| 2191 0x0902}; | |
| 2192 static const uint8_t to_iso_2022_cn8_v2[]={ | |
| 2193 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, | |
| 2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, | |
| 2195 0x0e, 0x21, 0x22, | |
| 2196 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, | |
| 2197 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46,
0x46, 0x20, | |
| 2198 0x42, | |
| 2199 0x5c, 0x39, 0x30, 0x32, 0x20 | |
| 2200 }; | |
| 2201 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ | |
| 2202 0, 0, 0, 0, 0, 0, 0, | |
| 2203 1, 1, 1, 1, 1, 1, 1, 1, | |
| 2204 3, 3, 3, | |
| 2205 4, 4, 4, 4, 4, 4, 4, 4, | |
| 2206 6, 6, 6, 6, 6, 6, 6, 6, | |
| 2207 8, | |
| 2208 9, 9, 9, 9, 9 | |
| 2209 }; | |
| 2210 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof
(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), | |
| 2211 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", | |
| 2212 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,U
CNV_ESCAPE_CSS2,U_ZERO_ERROR )) | |
| 2213 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not mat
ch.\n"); | |
| 2214 | |
| 2215 } | |
| 2216 { | |
| 2217 static const uint8_t to_iso_2022_cn4_v3[]={ | |
| 2218 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21
, | |
| 2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, | |
| 2220 0x0e, 0x21, 0x22, | |
| 2221 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, | |
| 2222 0x42 | |
| 2223 }; | |
| 2224 | |
| 2225 | |
| 2226 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ | |
| 2227 0,0,0,0,0,0,0, | |
| 2228 1,1,1,1,1,1,1,1,1,1,1, | |
| 2229 | |
| 2230 3,3,3, | |
| 2231 4,4,4,4,4,4,4,4,4,4,4, | |
| 2232 | |
| 2233 6 | |
| 2234 | |
| 2235 }; | |
| 2236 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof
(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), | |
| 2237 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", | |
| 2238 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,U
CNV_ESCAPE_C,U_ZERO_ERROR )) | |
| 2239 { | |
| 2240 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match
.\n"); | |
| 2241 } | |
| 2242 } | |
| 2243 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), | |
| 2244 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", | |
| 2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) | |
| 2246 log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
| 2247 | |
| 2248 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_in
putText4)/sizeof(iso_2022_cn_inputText4[0]), | |
| 2249 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", | |
| 2250 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) | |
| 2251 log_err("u-> iso_2022_cn with subst with value did not match.\n"); | |
| 2252 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), | |
| 2253 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", | |
| 2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) | |
| 2255 log_err("u-> iso_2022_kr with subst with value did not match.\n"); | |
| 2256 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_in
putText2)/sizeof(iso_2022_kr_inputText2[0]), | |
| 2257 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", | |
| 2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) | |
| 2259 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); | |
| 2260 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), | |
| 2261 to_hz, sizeof(to_hz), "HZ", | |
| 2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) | |
| 2263 log_err("u-> hz with subst with value did not match.\n"); | |
| 2264 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(h
z_inputText2[0]), | |
| 2265 to_hz2, sizeof(to_hz2), "HZ", | |
| 2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) | |
| 2267 log_err("u-> hz with subst with value did not match.\n"); | |
| 2268 | |
| 2269 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), | |
| 2270 to_iscii, sizeof(to_iscii), "ISCII,version=0", | |
| 2271 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) | |
| 2272 log_err("u-> iscii with subst with value did not match.\n"); | |
| 2273 } | |
| 2274 #endif | |
| 2275 | |
| 2276 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); | |
| 2277 /*to Unicode*/ | |
| 2278 { | |
| 2279 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 2280 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, | |
| 2281 0x81, 0xad, /*unassigned*/ | |
| 2282 0x89, 0xd3 }; | |
| 2283 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, | |
| 2284 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, | |
| 2285 0x7B87}; | |
| 2286 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3
, 3, 5}; | |
| 2287 | |
| 2288 /* EUC_JP*/ | |
| 2289 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, | |
| 2290 0x8f, 0xda, 0xa1, /*unassigned*/ | |
| 2291 0x8e, 0xe0, | |
| 2292 }; | |
| 2293 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, | |
| 2294 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x
31, | |
| 2295 0x00a2 }; | |
| 2296 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, | |
| 2297 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, | |
| 2298 9, | |
| 2299 }; | |
| 2300 | |
| 2301 /*EUC_TW*/ | |
| 2302 static const uint8_t sampleTxt_euc_tw[]={ | |
| 2303 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, | |
| 2304 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ | |
| 2305 0xe6, 0xca, 0x8a, | |
| 2306 }; | |
| 2307 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, | |
| 2308 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0
x42, 0x25, 0x58, 0x43, 0x43, | |
| 2309 0x8706, 0x8a, }; | |
| 2310 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, | |
| 2311 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, | |
| 2312 11, 13}; | |
| 2313 | |
| 2314 /*iso-2022-jp*/ | |
| 2315 static const uint8_t sampleTxt_iso_2022_jp[]={ | |
| 2316 0x1b, 0x28, 0x42, 0x41, | |
| 2317 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/ | |
| 2318 0x1b, 0x28, 0x42, 0x42, | |
| 2319 | |
| 2320 }; | |
| 2321 /* A % X 3 A
% X 1 A B */ | |
| 2322 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,
0x25,0x58,0x31,0x41, 0x42 }; | |
| 2323 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7,
7, 7, 7, 7, 12 }; | |
| 2324 | |
| 2325 /*iso-2022-cn*/ | |
| 2326 static const uint8_t sampleTxt_iso_2022_cn[]={ | |
| 2327 0x0f, 0x41, 0x44, | |
| 2328 0x1B, 0x24, 0x29, 0x47, | |
| 2329 0x0E, 0x40, 0x6c, /*unassigned*/ | |
| 2330 0x0f, 0x42, | |
| 2331 | |
| 2332 }; | |
| 2333 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34
,0x30,0x25,0x58,0x36,0x43,0x42 }; | |
| 2334 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8,
8, 8, 8, 8, 8, 11 }; | |
| 2335 | |
| 2336 /*iso-2022-kr*/ | |
| 2337 static const uint8_t sampleTxt_iso_2022_kr[]={ | |
| 2338 0x1b, 0x24, 0x29, 0x43, | |
| 2339 0x41, | |
| 2340 0x0E, 0x7f, 0x1E, | |
| 2341 0x0e, 0x25, 0x50, | |
| 2342 0x0f, 0x51, | |
| 2343 0x42, 0x43, | |
| 2344 | |
| 2345 }; | |
| 2346 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46
,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; | |
| 2347 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6,
6, 6, 6, 6, 9, 12, 13 , 14 }; | |
| 2348 | |
| 2349 /*hz*/ | |
| 2350 static const uint8_t sampleTxt_hz[]={ | |
| 2351 0x41, | |
| 2352 0x7e, 0x7b, 0x26, 0x30, | |
| 2353 0x7f, 0x1E, /*unassigned*/ | |
| 2354 0x26, 0x30, | |
| 2355 0x7e, 0x7d, 0x42, | |
| 2356 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ | |
| 2357 0x7e, 0x7d, 0x42, | |
| 2358 }; | |
| 2359 static const UChar hztoUnicode[]={ | |
| 2360 0x41, | |
| 2361 0x03a0, | |
| 2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
| 2363 0x03A0, | |
| 2364 0x42, | |
| 2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, | |
| 2366 0x42,}; | |
| 2367 | |
| 2368 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,
14,14,14,14,14,18, }; | |
| 2369 | |
| 2370 | |
| 2371 /*iscii*/ | |
| 2372 static const uint8_t sampleTxt_iscii[]={ | |
| 2373 0x41, | |
| 2374 0x30, | |
| 2375 0xEB, /*unassigned*/ | |
| 2376 0xa3, | |
| 2377 0x42, | |
| 2378 0xEC, /*unassigned*/ | |
| 2379 0x42, | |
| 2380 }; | |
| 2381 static const UChar isciitoUnicode[]={ | |
| 2382 0x41, | |
| 2383 0x30, | |
| 2384 0x25, 0x58, 0x45, 0x42, | |
| 2385 0x0903, | |
| 2386 0x42, | |
| 2387 0x25, 0x58, 0x45, 0x43, | |
| 2388 0x42,}; | |
| 2389 | |
| 2390 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; | |
| 2391 #endif | |
| 2392 | |
| 2393 /*UTF8*/ | |
| 2394 static const uint8_t sampleTxtUTF8[]={ | |
| 2395 0x20, 0x64, 0x50, | |
| 2396 0xC2, 0x7E, /* truncated char */ | |
| 2397 0x20, | |
| 2398 0xE0, 0xB5, 0x7E, /* truncated char */ | |
| 2399 0x40, | |
| 2400 }; | |
| 2401 static const UChar UTF8ToUnicode[]={ | |
| 2402 0x0020, 0x0064, 0x0050, | |
| 2403 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ | |
| 2404 0x0020, | |
| 2405 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x00
7E, | |
| 2406 0x0040 | |
| 2407 }; | |
| 2408 static const int32_t fromUTF8[] = { | |
| 2409 0, 1, 2, | |
| 2410 3, 3, 3, 3, 4, | |
| 2411 5, | |
| 2412 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
| 2413 9 | |
| 2414 }; | |
| 2415 static const UChar UTF8ToUnicodeXML_DEC[]={ | |
| 2416 0x0020, 0x0064, 0x0050, | |
| 2417 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~
*/ | |
| 2418 0x0020, | |
| 2419 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x00
31, 0x0038, 0x0031, 0x003B, 0x007E, | |
| 2420 0x0040 | |
| 2421 }; | |
| 2422 static const int32_t fromUTF8XML_DEC[] = { | |
| 2423 0, 1, 2, | |
| 2424 3, 3, 3, 3, 3, 3, 4, | |
| 2425 5, | |
| 2426 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, | |
| 2427 9 | |
| 2428 }; | |
| 2429 | |
| 2430 | |
| 2431 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 2432 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), | |
| 2433 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnic
ode[0]),"ibm-943", | |
| 2434 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) | |
| 2435 log_err("ibm-943->u with substitute with value did not match.\n"); | |
| 2436 | |
| 2437 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), | |
| 2438 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode
[0]),"IBM-eucJP", | |
| 2439 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) | |
| 2440 log_err("euc-jp->u with substitute with value did not match.\n"); | |
| 2441 | |
| 2442 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), | |
| 2443 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", | |
| 2444 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) | |
| 2445 log_err("euc-tw->u with substitute with value did not match.\n"); | |
| 2446 | |
| 2447 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), | |
| 2448 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
| 2449 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) | |
| 2450 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); | |
| 2451 | |
| 2452 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sample
Txt_iso_2022_jp), | |
| 2453 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", | |
| 2454 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_Z
ERO_ERROR)) | |
| 2455 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); | |
| 2456 | |
| 2457 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ | |
| 2458 { | |
| 2459 static const UChar iso_2022_jptoUnicodeDec[]={ | |
| 2460 0x0041, | |
| 2461 /* & # 5
8 ; */ | |
| 2462 0x0026, 0x0023, 0x0035,
0x0038, 0x003b, | |
| 2463 0x0026, 0x0023, 0x0032,
0x0036, 0x003b, | |
| 2464 0x0042 }; | |
| 2465 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7
,7,7,7,7,12, }; | |
| 2466 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
| 2467 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
| 2468 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,
UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) | |
| 2469 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_DEC did not match.\n"); | |
| 2470 } | |
| 2471 { | |
| 2472 static const UChar iso_2022_jptoUnicodeHex[]={ | |
| 2473 0x0041, | |
| 2474 /* & # x 3
A ; */ | |
| 2475 0x0026, 0x0023, 0x0078, 0x0033
, 0x0041, 0x003b, | |
| 2476 0x0026, 0x0023, 0x0078, 0x0031
, 0x0041, 0x003b, | |
| 2477 0x0042 }; | |
| 2478 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,
7,7,7,7,7,7,7,12 }; | |
| 2479 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
| 2480 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
| 2481 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,
UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) | |
| 2482 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_HEX did not match.\n"); | |
| 2483 } | |
| 2484 { | |
| 2485 static const UChar iso_2022_jptoUnicodeC[]={ | |
| 2486 0x0041, | |
| 2487 0x005C, 0x0078, 0x0033, 0x0041,
/* \x3A */ | |
| 2488 0x005C, 0x0078, 0x0031, 0x0041,
/* \x1A */ | |
| 2489 0x0042 }; | |
| 2490 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; | |
| 2491 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), | |
| 2492 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof
(iso_2022_jptoUnicode[0]),"iso-2022-jp", | |
| 2493 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UC
NV_ESCAPE_C,U_ZERO_ERROR )) | |
| 2494 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_C did not match.\n"); | |
| 2495 } | |
| 2496 } | |
| 2497 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), | |
| 2498 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", | |
| 2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) | |
| 2500 log_err("iso-2022-cn->u with substitute with value did not match.\n"
); | |
| 2501 | |
| 2502 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), | |
| 2503 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", | |
| 2504 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) | |
| 2505 log_err("iso-2022-kr->u with substitute with value did not match.\n"
); | |
| 2506 | |
| 2507 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), | |
| 2508 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", | |
| 2509 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) | |
| 2510 log_err("hz->u with substitute with value did not match.\n"); | |
| 2511 | |
| 2512 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), | |
| 2513 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", | |
| 2514 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) | |
| 2515 log_err("ISCII ->u with substitute with value did not match.\n"); | |
| 2516 #endif | |
| 2517 | |
| 2518 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), | |
| 2519 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"U
TF-8", | |
| 2520 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) | |
| 2521 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); | |
| 2522 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8)
, | |
| 2523 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8To
UnicodeXML_DEC[0]),"UTF-8", | |
| 2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE
_XML_DEC, U_ZERO_ERROR)) | |
| 2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); | |
| 2526 } | |
| 2527 } | |
| 2528 | |
| 2529 #if !UCONFIG_NO_LEGACY_CONVERSION | |
| 2530 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) | |
| 2531 { | |
| 2532 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; | |
| 2533 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0
xd3 }; | |
| 2534 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; | |
| 2535 | |
| 2536 | |
| 2537 static const uint8_t text943[] = { | |
| 2538 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; | |
| 2539 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,
0x5b57 }; | |
| 2540 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b
57 }; | |
| 2541 static const UChar toUnicode943stop[]= { 0x304b}; | |
| 2542 | |
| 2543 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; | |
| 2544 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; | |
| 2545 static const int32_t fromIBM943Offsstop[] = { 0}; | |
| 2546 | |
| 2547 gInBufferSize = inputsize; | |
| 2548 gOutBufferSize = outputsize; | |
| 2549 /*checking with a legal value*/ | |
| 2550 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0])
, | |
| 2551 templegal949, sizeof(templegal949), "ibm-949", | |
| 2552 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) | |
| 2553 log_err("u-> ibm-949 with skip did not match.\n"); | |
| 2554 | |
| 2555 /*checking illegal value for ibm-943 with substitute*/ | |
| 2556 if(!testConvertToUnicode(text943, sizeof(text943), | |
| 2557 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", | |
| 2558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
| 2559 log_err("ibm-943->u with subst did not match.\n"); | |
| 2560 /*checking illegal value for ibm-943 with skip */ | |
| 2561 if(!testConvertToUnicode(text943, sizeof(text943), | |
| 2562 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[
0]),"ibm-943", | |
| 2563 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) | |
| 2564 log_err("ibm-943->u with skip did not match.\n"); | |
| 2565 | |
| 2566 /*checking illegal value for ibm-943 with stop */ | |
| 2567 if(!testConvertToUnicode(text943, sizeof(text943), | |
| 2568 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[
0]),"ibm-943", | |
| 2569 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) | |
| 2570 log_err("ibm-943->u with stop did not match.\n"); | |
| 2571 | |
| 2572 } | |
| 2573 | |
| 2574 static void TestSingleByte(int32_t inputsize, int32_t outputsize) | |
| 2575 { | |
| 2576 static const uint8_t sampleText[] = { | |
| 2577 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, | |
| 2578 0xff, 0x32, 0x33}; | |
| 2579 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1
a, 0x1a, 0x0032, 0x0033 }; | |
| 2580 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; | |
| 2581 /*checking illegal value for ibm-943 with substitute*/ | |
| 2582 gInBufferSize = inputsize; | |
| 2583 gOutBufferSize = outputsize; | |
| 2584 | |
| 2585 if(!testConvertToUnicode(sampleText, sizeof(sampleText), | |
| 2586 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", | |
| 2587 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) | |
| 2588 log_err("ibm-943->u with subst did not match.\n"); | |
| 2589 } | |
| 2590 | |
| 2591 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) | |
| 2592 { | |
| 2593 /*EBCDIC_STATEFUL*/ | |
| 2594 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x
6d65, 0x0061 }; | |
| 2595 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1,
0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; | |
| 2596 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2,
3, 4, 4, 4, 5, 5 }; | |
| 2597 /* s SO doubl SI sng s SO
fe fe SI s */ | |
| 2598 | |
| 2599 /*EBCDIC_STATEFUL with subChar=3f*/ | |
| 2600 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0
x62, 0xb1, 0x3f, 0x62 }; | |
| 2601 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2
, 2, 3, 4, 5 }; | |
| 2602 static const char mySubChar[]={ 0x3f}; | |
| 2603 | |
| 2604 gInBufferSize = inputsize; | |
| 2605 gOutBufferSize = outputsize; | |
| 2606 | |
| 2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), | |
| 2608 toIBM930, sizeof(toIBM930), "ibm-930", | |
| 2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) | |
| 2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); | |
| 2611 | |
| 2612 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), | |
| 2613 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", | |
| 2614 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) | |
| 2615 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) di
d not match.\n"); | |
| 2616 } | |
| 2617 #endif | |
| 2618 | |
| 2619 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t
*expect, int expectLen, | |
| 2620 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, | |
| 2621 const char *mySubChar, int8_t len) | |
| 2622 { | |
| 2623 | |
| 2624 | |
| 2625 UErrorCode status = U_ZERO_ERROR; | |
| 2626 UConverter *conv = 0; | |
| 2627 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 2628 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 2629 const UChar *src; | |
| 2630 char *end; | |
| 2631 char *targ; | |
| 2632 int32_t *offs; | |
| 2633 int i; | |
| 2634 int32_t realBufferSize; | |
| 2635 char *realBufferEnd; | |
| 2636 const UChar *realSourceEnd; | |
| 2637 const UChar *sourceLimit; | |
| 2638 UBool checkOffsets = TRUE; | |
| 2639 UBool doFlush; | |
| 2640 char junk[9999]; | |
| 2641 char offset_str[9999]; | |
| 2642 char *p; | |
| 2643 UConverterFromUCallback oldAction = NULL; | |
| 2644 const void* oldContext = NULL; | |
| 2645 | |
| 2646 | |
| 2647 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 2648 junkout[i] = (char)0xF0; | |
| 2649 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 2650 junokout[i] = 0xFF; | |
| 2651 setNuConvTestName(codepage, "FROM"); | |
| 2652 | |
| 2653 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, | |
| 2654 gOutBufferSize); | |
| 2655 | |
| 2656 conv = ucnv_open(codepage, &status); | |
| 2657 if(U_FAILURE(status)) | |
| 2658 { | |
| 2659 log_data_err("Couldn't open converter %s\n",codepage); | |
| 2660 return TRUE; | |
| 2661 } | |
| 2662 | |
| 2663 log_verbose("Converter opened..\n"); | |
| 2664 | |
| 2665 /*----setting the callback routine----*/ | |
| 2666 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &statu
s); | |
| 2667 if (U_FAILURE(status)) | |
| 2668 { | |
| 2669 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
| 2670 } | |
| 2671 /*------------------------*/ | |
| 2672 /*setting the subChar*/ | |
| 2673 if(mySubChar != NULL){ | |
| 2674 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
| 2675 if (U_FAILURE(status)) { | |
| 2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
| 2677 } | |
| 2678 } | |
| 2679 /*------------*/ | |
| 2680 | |
| 2681 src = source; | |
| 2682 targ = junkout; | |
| 2683 offs = junokout; | |
| 2684 | |
| 2685 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 2686 realBufferEnd = junkout + realBufferSize; | |
| 2687 realSourceEnd = source + sourceLen; | |
| 2688 | |
| 2689 if ( gOutBufferSize != realBufferSize ) | |
| 2690 checkOffsets = FALSE; | |
| 2691 | |
| 2692 if( gInBufferSize != NEW_MAX_BUFFER ) | |
| 2693 checkOffsets = FALSE; | |
| 2694 | |
| 2695 do | |
| 2696 { | |
| 2697 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
| 2698 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
| 2699 | |
| 2700 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
| 2701 | |
| 2702 if(targ == realBufferEnd) | |
| 2703 { | |
| 2704 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); | |
| 2705 return FALSE; | |
| 2706 } | |
| 2707 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
| 2708 | |
| 2709 | |
| 2710 status = U_ZERO_ERROR; | |
| 2711 | |
| 2712 ucnv_fromUnicode (conv, | |
| 2713 (char **)&targ, | |
| 2714 (const char *)end, | |
| 2715 &src, | |
| 2716 sourceLimit, | |
| 2717 checkOffsets ? offs : NULL, | |
| 2718 doFlush, /* flush if we're at the end of the input data */ | |
| 2719 &status); | |
| 2720 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); | |
| 2721 | |
| 2722 | |
| 2723 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
| 2724 UChar errChars[50]; /* should be sufficient */ | |
| 2725 int8_t errLen = 50; | |
| 2726 UErrorCode err = U_ZERO_ERROR; | |
| 2727 const UChar* start= NULL; | |
| 2728 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); | |
| 2729 if(U_FAILURE(err)){ | |
| 2730 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName
(err)); | |
| 2731 } | |
| 2732 /* length of in invalid chars should be equal to returned length*/ | |
| 2733 start = src - errLen; | |
| 2734 if(u_strncmp(errChars,start,errLen)!=0){ | |
| 2735 log_err("ucnv_getInvalidUChars did not return the correct invalid ch
ars for encoding %s \n", ucnv_getName(conv,&err)); | |
| 2736 } | |
| 2737 } | |
| 2738 /* allow failure codes for the stop callback */ | |
| 2739 if(U_FAILURE(status) && | |
| 2740 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND
&& status != U_ILLEGAL_CHAR_FOUND))) | |
| 2741 { | |
| 2742 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
| 2743 return FALSE; | |
| 2744 } | |
| 2745 | |
| 2746 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
| 2747 sourceLen, targ-junkout); | |
| 2748 if(getTestOption(VERBOSITY_OPTION)) | |
| 2749 { | |
| 2750 | |
| 2751 junk[0] = 0; | |
| 2752 offset_str[0] = 0; | |
| 2753 for(p = junkout;p<targ;p++) | |
| 2754 { | |
| 2755 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
| 2756 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); | |
| 2757 } | |
| 2758 | |
| 2759 log_verbose(junk); | |
| 2760 printSeq(expect, expectLen); | |
| 2761 if ( checkOffsets ) | |
| 2762 { | |
| 2763 log_verbose("\nOffsets:"); | |
| 2764 log_verbose(offset_str); | |
| 2765 } | |
| 2766 log_verbose("\n"); | |
| 2767 } | |
| 2768 ucnv_close(conv); | |
| 2769 | |
| 2770 | |
| 2771 if(expectLen != targ-junkout) | |
| 2772 { | |
| 2773 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); | |
| 2774 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); | |
| 2775 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
| 2776 printSeqErr(expect, expectLen); | |
| 2777 return FALSE; | |
| 2778 } | |
| 2779 | |
| 2780 if (checkOffsets && (expectOffsets != 0) ) | |
| 2781 { | |
| 2782 log_verbose("comparing %d offsets..\n", targ-junkout); | |
| 2783 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
| 2784 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
| 2785 log_err("Got Output : "); | |
| 2786 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
| 2787 log_err("Got Offsets: "); | |
| 2788 for(p=junkout;p<targ;p++) | |
| 2789 log_err("%d,", junokout[p-junkout]); | |
| 2790 log_err("\n"); | |
| 2791 log_err("Expected Offsets: "); | |
| 2792 for(i=0; i<(targ-junkout); i++) | |
| 2793 log_err("%d,", expectOffsets[i]); | |
| 2794 log_err("\n"); | |
| 2795 return FALSE; | |
| 2796 } | |
| 2797 } | |
| 2798 | |
| 2799 if(!memcmp(junkout, expect, expectLen)) | |
| 2800 { | |
| 2801 log_verbose("String matches! %s\n", gNuConvTestName); | |
| 2802 return TRUE; | |
| 2803 } | |
| 2804 else | |
| 2805 { | |
| 2806 log_err("String does not match. %s\n", gNuConvTestName); | |
| 2807 log_err("source: "); | |
| 2808 printUSeqErr(source, sourceLen); | |
| 2809 log_err("Got: "); | |
| 2810 printSeqErr((const uint8_t *)junkout, expectLen); | |
| 2811 log_err("Expected: "); | |
| 2812 printSeqErr(expect, expectLen); | |
| 2813 return FALSE; | |
| 2814 } | |
| 2815 } | |
| 2816 | |
| 2817 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *e
xpect, int expectlen, | |
| 2818 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, | |
| 2819 const char *mySubChar, int8_t len) | |
| 2820 { | |
| 2821 UErrorCode status = U_ZERO_ERROR; | |
| 2822 UConverter *conv = 0; | |
| 2823 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 2824 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 2825 const char *src; | |
| 2826 const char *realSourceEnd; | |
| 2827 const char *srcLimit; | |
| 2828 UChar *targ; | |
| 2829 UChar *end; | |
| 2830 int32_t *offs; | |
| 2831 int i; | |
| 2832 UBool checkOffsets = TRUE; | |
| 2833 char junk[9999]; | |
| 2834 char offset_str[9999]; | |
| 2835 UChar *p; | |
| 2836 UConverterToUCallback oldAction = NULL; | |
| 2837 const void* oldContext = NULL; | |
| 2838 | |
| 2839 int32_t realBufferSize; | |
| 2840 UChar *realBufferEnd; | |
| 2841 | |
| 2842 | |
| 2843 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 2844 junkout[i] = 0xFFFE; | |
| 2845 | |
| 2846 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 2847 junokout[i] = -1; | |
| 2848 | |
| 2849 setNuConvTestName(codepage, "TO"); | |
| 2850 | |
| 2851 log_verbose("\n========= %s\n", gNuConvTestName); | |
| 2852 | |
| 2853 conv = ucnv_open(codepage, &status); | |
| 2854 if(U_FAILURE(status)) | |
| 2855 { | |
| 2856 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
| 2857 return TRUE; | |
| 2858 } | |
| 2859 | |
| 2860 log_verbose("Converter opened..\n"); | |
| 2861 | |
| 2862 src = (const char *)source; | |
| 2863 targ = junkout; | |
| 2864 offs = junokout; | |
| 2865 | |
| 2866 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 2867 realBufferEnd = junkout + realBufferSize; | |
| 2868 realSourceEnd = src + sourcelen; | |
| 2869 /*----setting the callback routine----*/ | |
| 2870 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status)
; | |
| 2871 if (U_FAILURE(status)) | |
| 2872 { | |
| 2873 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
| 2874 } | |
| 2875 /*-------------------------------------*/ | |
| 2876 /*setting the subChar*/ | |
| 2877 if(mySubChar != NULL){ | |
| 2878 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
| 2879 if (U_FAILURE(status)) { | |
| 2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
| 2881 } | |
| 2882 } | |
| 2883 /*------------*/ | |
| 2884 | |
| 2885 | |
| 2886 if ( gOutBufferSize != realBufferSize ) | |
| 2887 checkOffsets = FALSE; | |
| 2888 | |
| 2889 if( gInBufferSize != NEW_MAX_BUFFER ) | |
| 2890 checkOffsets = FALSE; | |
| 2891 | |
| 2892 do | |
| 2893 { | |
| 2894 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
| 2895 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
| 2896 | |
| 2897 if(targ == realBufferEnd) | |
| 2898 { | |
| 2899 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
| 2900 return FALSE; | |
| 2901 } | |
| 2902 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
| 2903 | |
| 2904 | |
| 2905 | |
| 2906 status = U_ZERO_ERROR; | |
| 2907 | |
| 2908 ucnv_toUnicode (conv, | |
| 2909 &targ, | |
| 2910 end, | |
| 2911 (const char **)&src, | |
| 2912 (const char *)srcLimit, | |
| 2913 checkOffsets ? offs : NULL, | |
| 2914 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ | |
| 2915 &status); | |
| 2916 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ | |
| 2917 | |
| 2918 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ | |
| 2919 char errChars[50]; /* should be sufficient */ | |
| 2920 int8_t errLen = 50; | |
| 2921 UErrorCode err = U_ZERO_ERROR; | |
| 2922 const char* start= NULL; | |
| 2923 ucnv_getInvalidChars(conv,errChars, &errLen, &err); | |
| 2924 if(U_FAILURE(err)){ | |
| 2925 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(
err)); | |
| 2926 } | |
| 2927 /* length of in invalid chars should be equal to returned length*/ | |
| 2928 start = src - errLen; | |
| 2929 if(uprv_strncmp(errChars,start,errLen)!=0){ | |
| 2930 log_err("ucnv_getInvalidChars did not return the correct invalid cha
rs for encoding %s \n", ucnv_getName(conv,&err)); | |
| 2931 } | |
| 2932 } | |
| 2933 /* allow failure codes for the stop callback */ | |
| 2934 if(U_FAILURE(status) && | |
| 2935 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND &
& status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) | |
| 2936 { | |
| 2937 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
| 2938 return FALSE; | |
| 2939 } | |
| 2940 | |
| 2941 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
| 2942 sourcelen, targ-junkout); | |
| 2943 if(getTestOption(VERBOSITY_OPTION)) | |
| 2944 { | |
| 2945 | |
| 2946 junk[0] = 0; | |
| 2947 offset_str[0] = 0; | |
| 2948 | |
| 2949 for(p = junkout;p<targ;p++) | |
| 2950 { | |
| 2951 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); | |
| 2952 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); | |
| 2953 } | |
| 2954 | |
| 2955 log_verbose(junk); | |
| 2956 printUSeq(expect, expectlen); | |
| 2957 if ( checkOffsets ) | |
| 2958 { | |
| 2959 log_verbose("\nOffsets:"); | |
| 2960 log_verbose(offset_str); | |
| 2961 } | |
| 2962 log_verbose("\n"); | |
| 2963 } | |
| 2964 ucnv_close(conv); | |
| 2965 | |
| 2966 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
| 2967 | |
| 2968 if (checkOffsets && (expectOffsets != 0)) | |
| 2969 { | |
| 2970 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
| 2971 { | |
| 2972 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
| 2973 log_err("Got offsets: "); | |
| 2974 for(p=junkout;p<targ;p++) | |
| 2975 log_err(" %2d,", junokout[p-junkout]); | |
| 2976 log_err("\n"); | |
| 2977 log_err("Expected offsets: "); | |
| 2978 for(i=0; i<(targ-junkout); i++) | |
| 2979 log_err(" %2d,", expectOffsets[i]); | |
| 2980 log_err("\n"); | |
| 2981 log_err("Got output: "); | |
| 2982 for(i=0; i<(targ-junkout); i++) | |
| 2983 log_err("0x%04x,", junkout[i]); | |
| 2984 log_err("\n"); | |
| 2985 log_err("From source: "); | |
| 2986 for(i=0; i<(src-(const char *)source); i++) | |
| 2987 log_err(" 0x%02x,", (unsigned char)source[i]); | |
| 2988 log_err("\n"); | |
| 2989 } | |
| 2990 } | |
| 2991 | |
| 2992 if(!memcmp(junkout, expect, expectlen*2)) | |
| 2993 { | |
| 2994 log_verbose("Matches!\n"); | |
| 2995 return TRUE; | |
| 2996 } | |
| 2997 else | |
| 2998 { | |
| 2999 log_err("String does not match. %s\n", gNuConvTestName); | |
| 3000 log_verbose("String does not match. %s\n", gNuConvTestName); | |
| 3001 log_err("Got: "); | |
| 3002 printUSeqErr(junkout, expectlen); | |
| 3003 log_err("Expected: "); | |
| 3004 printUSeqErr(expect, expectlen); | |
| 3005 log_err("\n"); | |
| 3006 return FALSE; | |
| 3007 } | |
| 3008 } | |
| 3009 | |
| 3010 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, con
st uint8_t *expect, int expectLen, | |
| 3011 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, | |
| 3012 const char *mySubChar, int8_t len, const void* context, UErrorCo
de expectedError) | |
| 3013 { | |
| 3014 | |
| 3015 | |
| 3016 UErrorCode status = U_ZERO_ERROR; | |
| 3017 UConverter *conv = 0; | |
| 3018 char junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 3019 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 3020 const UChar *src; | |
| 3021 char *end; | |
| 3022 char *targ; | |
| 3023 int32_t *offs; | |
| 3024 int i; | |
| 3025 int32_t realBufferSize; | |
| 3026 char *realBufferEnd; | |
| 3027 const UChar *realSourceEnd; | |
| 3028 const UChar *sourceLimit; | |
| 3029 UBool checkOffsets = TRUE; | |
| 3030 UBool doFlush; | |
| 3031 char junk[9999]; | |
| 3032 char offset_str[9999]; | |
| 3033 char *p; | |
| 3034 UConverterFromUCallback oldAction = NULL; | |
| 3035 const void* oldContext = NULL; | |
| 3036 | |
| 3037 | |
| 3038 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 3039 junkout[i] = (char)0xF0; | |
| 3040 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 3041 junokout[i] = 0xFF; | |
| 3042 setNuConvTestName(codepage, "FROM"); | |
| 3043 | |
| 3044 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, | |
| 3045 gOutBufferSize); | |
| 3046 | |
| 3047 conv = ucnv_open(codepage, &status); | |
| 3048 if(U_FAILURE(status)) | |
| 3049 { | |
| 3050 log_data_err("Couldn't open converter %s\n",codepage); | |
| 3051 return TRUE; /* Because the err has already been logged. */ | |
| 3052 } | |
| 3053 | |
| 3054 log_verbose("Converter opened..\n"); | |
| 3055 | |
| 3056 /*----setting the callback routine----*/ | |
| 3057 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &st
atus); | |
| 3058 if (U_FAILURE(status)) | |
| 3059 { | |
| 3060 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
| 3061 } | |
| 3062 /*------------------------*/ | |
| 3063 /*setting the subChar*/ | |
| 3064 if(mySubChar != NULL){ | |
| 3065 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
| 3066 if (U_FAILURE(status)) { | |
| 3067 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(s
tatus)); | |
| 3068 } | |
| 3069 } | |
| 3070 /*------------*/ | |
| 3071 | |
| 3072 src = source; | |
| 3073 targ = junkout; | |
| 3074 offs = junokout; | |
| 3075 | |
| 3076 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 3077 realBufferEnd = junkout + realBufferSize; | |
| 3078 realSourceEnd = source + sourceLen; | |
| 3079 | |
| 3080 if ( gOutBufferSize != realBufferSize ) | |
| 3081 checkOffsets = FALSE; | |
| 3082 | |
| 3083 if( gInBufferSize != NEW_MAX_BUFFER ) | |
| 3084 checkOffsets = FALSE; | |
| 3085 | |
| 3086 do | |
| 3087 { | |
| 3088 end = nct_min(targ + gOutBufferSize, realBufferEnd); | |
| 3089 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); | |
| 3090 | |
| 3091 doFlush = (UBool)(sourceLimit == realSourceEnd); | |
| 3092 | |
| 3093 if(targ == realBufferEnd) | |
| 3094 { | |
| 3095 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); | |
| 3096 return FALSE; | |
| 3097 } | |
| 3098 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); | |
| 3099 | |
| 3100 | |
| 3101 status = U_ZERO_ERROR; | |
| 3102 | |
| 3103 ucnv_fromUnicode (conv, | |
| 3104 (char **)&targ, | |
| 3105 (const char *)end, | |
| 3106 &src, | |
| 3107 sourceLimit, | |
| 3108 checkOffsets ? offs : NULL, | |
| 3109 doFlush, /* flush if we're at the end of the input data */ | |
| 3110 &status); | |
| 3111 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); | |
| 3112 | |
| 3113 /* allow failure codes for the stop callback */ | |
| 3114 if(U_FAILURE(status) && status != expectedError) | |
| 3115 { | |
| 3116 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
| 3117 return FALSE; | |
| 3118 } | |
| 3119 | |
| 3120 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", | |
| 3121 sourceLen, targ-junkout); | |
| 3122 if(getTestOption(VERBOSITY_OPTION)) | |
| 3123 { | |
| 3124 | |
| 3125 junk[0] = 0; | |
| 3126 offset_str[0] = 0; | |
| 3127 for(p = junkout;p<targ;p++) | |
| 3128 { | |
| 3129 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); | |
| 3130 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); | |
| 3131 } | |
| 3132 | |
| 3133 log_verbose(junk); | |
| 3134 printSeq(expect, expectLen); | |
| 3135 if ( checkOffsets ) | |
| 3136 { | |
| 3137 log_verbose("\nOffsets:"); | |
| 3138 log_verbose(offset_str); | |
| 3139 } | |
| 3140 log_verbose("\n"); | |
| 3141 } | |
| 3142 ucnv_close(conv); | |
| 3143 | |
| 3144 | |
| 3145 if(expectLen != targ-junkout) | |
| 3146 { | |
| 3147 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); | |
| 3148 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); | |
| 3149 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
| 3150 printSeqErr(expect, expectLen); | |
| 3151 return FALSE; | |
| 3152 } | |
| 3153 | |
| 3154 if (checkOffsets && (expectOffsets != 0) ) | |
| 3155 { | |
| 3156 log_verbose("comparing %d offsets..\n", targ-junkout); | |
| 3157 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ | |
| 3158 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
| 3159 log_err("Got Output : "); | |
| 3160 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); | |
| 3161 log_err("Got Offsets: "); | |
| 3162 for(p=junkout;p<targ;p++) | |
| 3163 log_err("%d,", junokout[p-junkout]); | |
| 3164 log_err("\n"); | |
| 3165 log_err("Expected Offsets: "); | |
| 3166 for(i=0; i<(targ-junkout); i++) | |
| 3167 log_err("%d,", expectOffsets[i]); | |
| 3168 log_err("\n"); | |
| 3169 return FALSE; | |
| 3170 } | |
| 3171 } | |
| 3172 | |
| 3173 if(!memcmp(junkout, expect, expectLen)) | |
| 3174 { | |
| 3175 log_verbose("String matches! %s\n", gNuConvTestName); | |
| 3176 return TRUE; | |
| 3177 } | |
| 3178 else | |
| 3179 { | |
| 3180 log_err("String does not match. %s\n", gNuConvTestName); | |
| 3181 log_err("source: "); | |
| 3182 printUSeqErr(source, sourceLen); | |
| 3183 log_err("Got: "); | |
| 3184 printSeqErr((const uint8_t *)junkout, expectLen); | |
| 3185 log_err("Expected: "); | |
| 3186 printSeqErr(expect, expectLen); | |
| 3187 return FALSE; | |
| 3188 } | |
| 3189 } | |
| 3190 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, con
st UChar *expect, int expectlen, | |
| 3191 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, | |
| 3192 const char *mySubChar, int8_t len, const void* context, UErrorCod
e expectedError) | |
| 3193 { | |
| 3194 UErrorCode status = U_ZERO_ERROR; | |
| 3195 UConverter *conv = 0; | |
| 3196 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ | |
| 3197 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ | |
| 3198 const char *src; | |
| 3199 const char *realSourceEnd; | |
| 3200 const char *srcLimit; | |
| 3201 UChar *targ; | |
| 3202 UChar *end; | |
| 3203 int32_t *offs; | |
| 3204 int i; | |
| 3205 UBool checkOffsets = TRUE; | |
| 3206 char junk[9999]; | |
| 3207 char offset_str[9999]; | |
| 3208 UChar *p; | |
| 3209 UConverterToUCallback oldAction = NULL; | |
| 3210 const void* oldContext = NULL; | |
| 3211 | |
| 3212 int32_t realBufferSize; | |
| 3213 UChar *realBufferEnd; | |
| 3214 | |
| 3215 | |
| 3216 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 3217 junkout[i] = 0xFFFE; | |
| 3218 | |
| 3219 for(i=0;i<NEW_MAX_BUFFER;i++) | |
| 3220 junokout[i] = -1; | |
| 3221 | |
| 3222 setNuConvTestName(codepage, "TO"); | |
| 3223 | |
| 3224 log_verbose("\n========= %s\n", gNuConvTestName); | |
| 3225 | |
| 3226 conv = ucnv_open(codepage, &status); | |
| 3227 if(U_FAILURE(status)) | |
| 3228 { | |
| 3229 log_data_err("Couldn't open converter %s\n",gNuConvTestName); | |
| 3230 return TRUE; | |
| 3231 } | |
| 3232 | |
| 3233 log_verbose("Converter opened..\n"); | |
| 3234 | |
| 3235 src = (const char *)source; | |
| 3236 targ = junkout; | |
| 3237 offs = junokout; | |
| 3238 | |
| 3239 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); | |
| 3240 realBufferEnd = junkout + realBufferSize; | |
| 3241 realSourceEnd = src + sourcelen; | |
| 3242 /*----setting the callback routine----*/ | |
| 3243 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &stat
us); | |
| 3244 if (U_FAILURE(status)) | |
| 3245 { | |
| 3246 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); | |
| 3247 } | |
| 3248 /*-------------------------------------*/ | |
| 3249 /*setting the subChar*/ | |
| 3250 if(mySubChar != NULL){ | |
| 3251 ucnv_setSubstChars(conv, mySubChar, len, &status); | |
| 3252 if (U_FAILURE(status)) { | |
| 3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); | |
| 3254 } | |
| 3255 } | |
| 3256 /*------------*/ | |
| 3257 | |
| 3258 | |
| 3259 if ( gOutBufferSize != realBufferSize ) | |
| 3260 checkOffsets = FALSE; | |
| 3261 | |
| 3262 if( gInBufferSize != NEW_MAX_BUFFER ) | |
| 3263 checkOffsets = FALSE; | |
| 3264 | |
| 3265 do | |
| 3266 { | |
| 3267 end = nct_min( targ + gOutBufferSize, realBufferEnd); | |
| 3268 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); | |
| 3269 | |
| 3270 if(targ == realBufferEnd) | |
| 3271 { | |
| 3272 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); | |
| 3273 return FALSE; | |
| 3274 } | |
| 3275 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); | |
| 3276 | |
| 3277 | |
| 3278 | |
| 3279 status = U_ZERO_ERROR; | |
| 3280 | |
| 3281 ucnv_toUnicode (conv, | |
| 3282 &targ, | |
| 3283 end, | |
| 3284 (const char **)&src, | |
| 3285 (const char *)srcLimit, | |
| 3286 checkOffsets ? offs : NULL, | |
| 3287 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ | |
| 3288 &status); | |
| 3289 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ | |
| 3290 | |
| 3291 /* allow failure codes for the stop callback */ | |
| 3292 if(U_FAILURE(status) && status!=expectedError) | |
| 3293 { | |
| 3294 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); | |
| 3295 return FALSE; | |
| 3296 } | |
| 3297 | |
| 3298 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", | |
| 3299 sourcelen, targ-junkout); | |
| 3300 if(getTestOption(VERBOSITY_OPTION)) | |
| 3301 { | |
| 3302 | |
| 3303 junk[0] = 0; | |
| 3304 offset_str[0] = 0; | |
| 3305 | |
| 3306 for(p = junkout;p<targ;p++) | |
| 3307 { | |
| 3308 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); | |
| 3309 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); | |
| 3310 } | |
| 3311 | |
| 3312 log_verbose(junk); | |
| 3313 printUSeq(expect, expectlen); | |
| 3314 if ( checkOffsets ) | |
| 3315 { | |
| 3316 log_verbose("\nOffsets:"); | |
| 3317 log_verbose(offset_str); | |
| 3318 } | |
| 3319 log_verbose("\n"); | |
| 3320 } | |
| 3321 ucnv_close(conv); | |
| 3322 | |
| 3323 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); | |
| 3324 | |
| 3325 if (checkOffsets && (expectOffsets != 0)) | |
| 3326 { | |
| 3327 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) | |
| 3328 { | |
| 3329 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); | |
| 3330 log_err("Got offsets: "); | |
| 3331 for(p=junkout;p<targ;p++) | |
| 3332 log_err(" %2d,", junokout[p-junkout]); | |
| 3333 log_err("\n"); | |
| 3334 log_err("Expected offsets: "); | |
| 3335 for(i=0; i<(targ-junkout); i++) | |
| 3336 log_err(" %2d,", expectOffsets[i]); | |
| 3337 log_err("\n"); | |
| 3338 log_err("Got output: "); | |
| 3339 for(i=0; i<(targ-junkout); i++) | |
| 3340 log_err("0x%04x,", junkout[i]); | |
| 3341 log_err("\n"); | |
| 3342 log_err("From source: "); | |
| 3343 for(i=0; i<(src-(const char *)source); i++) | |
| 3344 log_err(" 0x%02x,", (unsigned char)source[i]); | |
| 3345 log_err("\n"); | |
| 3346 } | |
| 3347 } | |
| 3348 | |
| 3349 if(!memcmp(junkout, expect, expectlen*2)) | |
| 3350 { | |
| 3351 log_verbose("Matches!\n"); | |
| 3352 return TRUE; | |
| 3353 } | |
| 3354 else | |
| 3355 { | |
| 3356 log_err("String does not match. %s\n", gNuConvTestName); | |
| 3357 log_verbose("String does not match. %s\n", gNuConvTestName); | |
| 3358 log_err("Got: "); | |
| 3359 printUSeqErr(junkout, expectlen); | |
| 3360 log_err("Expected: "); | |
| 3361 printUSeqErr(expect, expectlen); | |
| 3362 log_err("\n"); | |
| 3363 return FALSE; | |
| 3364 } | |
| 3365 } | |
| 3366 | |
| 3367 static void TestCallBackFailure(void) { | |
| 3368 UErrorCode status = U_USELESS_COLLATOR_ERROR; | |
| 3369 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); | |
| 3370 if (status != U_USELESS_COLLATOR_ERROR) { | |
| 3371 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad
UErrorCode\n"); | |
| 3372 } | |
| 3373 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); | |
| 3374 if (status != U_USELESS_COLLATOR_ERROR) { | |
| 3375 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad
UErrorCode\n"); | |
| 3376 } | |
| 3377 ucnv_cbFromUWriteSub(NULL, -1, &status); | |
| 3378 if (status != U_USELESS_COLLATOR_ERROR) { | |
| 3379 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UE
rrorCode\n"); | |
| 3380 } | |
| 3381 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); | |
| 3382 if (status != U_USELESS_COLLATOR_ERROR) { | |
| 3383 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad U
ErrorCode\n"); | |
| 3384 } | |
| 3385 } | |
| OLD | NEW |