OLD | NEW |
(Empty) | |
| 1 /******************************************************************** |
| 2 * COPYRIGHT: |
| 3 * Copyright (c) 1997-2010, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. |
| 5 ********************************************************************/ |
| 6 /* |
| 7 ******************************************************************************** |
| 8 * File NCCBTST.C |
| 9 * |
| 10 * Modification History: |
| 11 * Name Description |
| 12 * Madhu Katragadda 7/21/1999 Testing error callback routines |
| 13 ******************************************************************************** |
| 14 */ |
| 15 #include <stdio.h> |
| 16 #include <stdlib.h> |
| 17 #include <string.h> |
| 18 #include <ctype.h> |
| 19 #include "cstring.h" |
| 20 #include "unicode/uloc.h" |
| 21 #include "unicode/ucnv.h" |
| 22 #include "unicode/ucnv_err.h" |
| 23 #include "cintltst.h" |
| 24 #include "unicode/utypes.h" |
| 25 #include "unicode/ustring.h" |
| 26 #include "nccbtst.h" |
| 27 #include "unicode/ucnv_cb.h" |
| 28 #define NEW_MAX_BUFFER 999 |
| 29 |
| 30 #define nct_min(x,y) ((x<y) ? x : y) |
| 31 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0])) |
| 32 |
| 33 static int32_t gInBufferSize = 0; |
| 34 static int32_t gOutBufferSize = 0; |
| 35 static char gNuConvTestName[1024]; |
| 36 |
| 37 static void printSeq(const uint8_t* a, int len) |
| 38 { |
| 39 int i=0; |
| 40 log_verbose("\n{"); |
| 41 while (i<len) |
| 42 log_verbose("0x%02X, ", a[i++]); |
| 43 log_verbose("}\n"); |
| 44 } |
| 45 |
| 46 static void printUSeq(const UChar* a, int len) |
| 47 { |
| 48 int i=0; |
| 49 log_verbose("{"); |
| 50 while (i<len) |
| 51 log_verbose(" 0x%04x, ", a[i++]); |
| 52 log_verbose("}\n"); |
| 53 } |
| 54 |
| 55 static void printSeqErr(const uint8_t* a, int len) |
| 56 { |
| 57 int i=0; |
| 58 fprintf(stderr, "{"); |
| 59 while (i<len) |
| 60 fprintf(stderr, " 0x%02x, ", a[i++]); |
| 61 fprintf(stderr, "}\n"); |
| 62 } |
| 63 |
| 64 static void printUSeqErr(const UChar* a, int len) |
| 65 { |
| 66 int i=0; |
| 67 fprintf(stderr, "{"); |
| 68 while (i<len) |
| 69 fprintf(stderr, "0x%04x, ", a[i++]); |
| 70 fprintf(stderr,"}\n"); |
| 71 } |
| 72 |
| 73 static void setNuConvTestName(const char *codepage, const char *direction) |
| 74 { |
| 75 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufS
iz=%d]", |
| 76 codepage, |
| 77 direction, |
| 78 (int)gInBufferSize, |
| 79 (int)gOutBufferSize); |
| 80 } |
| 81 |
| 82 |
| 83 static void TestCallBackFailure(void); |
| 84 |
| 85 void addTestConvertErrorCallBack(TestNode** root); |
| 86 |
| 87 void addTestConvertErrorCallBack(TestNode** root) |
| 88 { |
| 89 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack"); |
| 90 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack"); |
| 91 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack"); |
| 92 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCal
lBack"); |
| 93 |
| 94 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 95 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOther
CallBack"); |
| 96 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBa
ck"); |
| 97 #endif |
| 98 |
| 99 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure"); |
| 100 } |
| 101 |
| 102 static void TestSkipCallBack() |
| 103 { |
| 104 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 105 TestSkip(1,NEW_MAX_BUFFER); |
| 106 TestSkip(1,1); |
| 107 TestSkip(NEW_MAX_BUFFER, 1); |
| 108 } |
| 109 |
| 110 static void TestStopCallBack() |
| 111 { |
| 112 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 113 TestStop(1,NEW_MAX_BUFFER); |
| 114 TestStop(1,1); |
| 115 TestStop(NEW_MAX_BUFFER, 1); |
| 116 } |
| 117 |
| 118 static void TestSubCallBack() |
| 119 { |
| 120 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 121 TestSub(1,NEW_MAX_BUFFER); |
| 122 TestSub(1,1); |
| 123 TestSub(NEW_MAX_BUFFER, 1); |
| 124 |
| 125 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 126 TestEBCDIC_STATEFUL_Sub(1, 1); |
| 127 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER); |
| 128 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1); |
| 129 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 130 #endif |
| 131 } |
| 132 |
| 133 static void TestSubWithValueCallBack() |
| 134 { |
| 135 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 136 TestSubWithValue(1,NEW_MAX_BUFFER); |
| 137 TestSubWithValue(1,1); |
| 138 TestSubWithValue(NEW_MAX_BUFFER, 1); |
| 139 } |
| 140 |
| 141 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 142 static void TestLegalAndOtherCallBack() |
| 143 { |
| 144 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 145 TestLegalAndOthers(1,NEW_MAX_BUFFER); |
| 146 TestLegalAndOthers(1,1); |
| 147 TestLegalAndOthers(NEW_MAX_BUFFER, 1); |
| 148 } |
| 149 |
| 150 static void TestSingleByteCallBack() |
| 151 { |
| 152 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER); |
| 153 TestSingleByte(1,NEW_MAX_BUFFER); |
| 154 TestSingleByte(1,1); |
| 155 TestSingleByte(NEW_MAX_BUFFER, 1); |
| 156 } |
| 157 #endif |
| 158 |
| 159 static void TestSkip(int32_t inputsize, int32_t outputsize) |
| 160 { |
| 161 static const uint8_t expskipIBM_949[]= { |
| 162 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 }; |
| 163 |
| 164 static const uint8_t expskipIBM_943[] = { |
| 165 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 }; |
| 166 |
| 167 static const uint8_t expskipIBM_930[] = { |
| 168 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f }; |
| 169 |
| 170 gInBufferSize = inputsize; |
| 171 gOutBufferSize = outputsize; |
| 172 |
| 173 /*From Unicode*/ |
| 174 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n"); |
| 175 |
| 176 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 177 { |
| 178 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0
xD700 }; |
| 179 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; |
| 180 |
| 181 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 }; |
| 182 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 }; |
| 183 |
| 184 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleT
ext[0]), |
| 185 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949", |
| 186 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 )) |
| 187 log_err("u-> ibm-949 with skip did not match.\n"); |
| 188 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampl
eText2[0]), |
| 189 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943", |
| 190 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 )) |
| 191 log_err("u-> ibm-943 with skip did not match.\n"); |
| 192 } |
| 193 |
| 194 { |
| 195 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d6
4, 0x63, 0xff5e, 0x6d66 }; |
| 196 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d
, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f }; |
| 197 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8,
8, 8 }; |
| 198 |
| 199 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to c
heck correct state transitions */ |
| 200 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR, |
| 201 fromUBytes, sizeof(fromUBytes), |
| 202 "ibm-930", |
| 203 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets, |
| 204 NULL, 0) |
| 205 ) { |
| 206 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\
n"); |
| 207 } |
| 208 } |
| 209 #endif |
| 210 |
| 211 { |
| 212 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800,
0xdfff, 0x39 }; |
| 213 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 }; |
| 214 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 }; |
| 215 |
| 216 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0
xdfff, 0x39 }; |
| 217 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 }; |
| 218 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 }; |
| 219 |
| 220 /* US-ASCII */ |
| 221 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, |
| 222 usasciiFromUBytes, sizeof(usasciiFromUBytes), |
| 223 "US-ASCII", |
| 224 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, |
| 225 NULL, 0) |
| 226 ) { |
| 227 log_err("u->US-ASCII with skip did not match.\n"); |
| 228 } |
| 229 |
| 230 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 231 /* SBCS NLTC codepage 367 for US-ASCII */ |
| 232 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_U
CHAR, |
| 233 usasciiFromUBytes, sizeof(usasciiFromUBytes), |
| 234 "ibm-367", |
| 235 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffset
s, |
| 236 NULL, 0) |
| 237 ) { |
| 238 log_err("u->ibm-367 with skip did not match.\n"); |
| 239 } |
| 240 #endif |
| 241 |
| 242 /* ISO-Latin-1 */ |
| 243 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, |
| 244 latin1FromUBytes, sizeof(latin1FromUBytes), |
| 245 "LATIN_1", |
| 246 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, |
| 247 NULL, 0) |
| 248 ) { |
| 249 log_err("u->LATIN_1 with skip did not match.\n"); |
| 250 } |
| 251 |
| 252 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 253 /* windows-1252 */ |
| 254 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCH
AR, |
| 255 latin1FromUBytes, sizeof(latin1FromUBytes), |
| 256 "windows-1252", |
| 257 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets
, |
| 258 NULL, 0) |
| 259 ) { |
| 260 log_err("u->windows-1252 with skip did not match.\n"); |
| 261 } |
| 262 } |
| 263 |
| 264 { |
| 265 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; |
| 266 static const uint8_t toIBM943[]= { 0x61, 0x61 }; |
| 267 static const int32_t offset[]= {0, 4}; |
| 268 |
| 269 /* EUC_JP*/ |
| 270 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; |
| 271 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, |
| 272 0x61, 0x8e, 0xe0, |
| 273 }; |
| 274 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7}; |
| 275 |
| 276 /*EUC_TW*/ |
| 277 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; |
| 278 static const uint8_t to_euc_tw[]={ |
| 279 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, |
| 280 0x61, 0xe6, 0xca, 0x8a, |
| 281 }; |
| 282 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7,
8,}; |
| 283 |
| 284 /*ISO-2022-JP*/ |
| 285 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/
,0x0042, }; |
| 286 static const uint8_t to_iso_2022_jp[]={ |
| 287 0x41, |
| 288 0x42, |
| 289 |
| 290 }; |
| 291 static const int32_t from_iso_2022_jpOffs [] ={0,2}; |
| 292 |
| 293 /*ISO-2022-JP*/ |
| 294 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,
0xd800/*illegal*/,0x0042, }; |
| 295 static const uint8_t to_iso_2022_jp2[]={ |
| 296 0x41, |
| 297 0x43, |
| 298 |
| 299 }; |
| 300 static const int32_t from_iso_2022_jpOffs2 [] ={0,2}; |
| 301 |
| 302 /*ISO-2022-cn*/ |
| 303 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*
/, 0x0042, }; |
| 304 static const uint8_t to_iso_2022_cn[]={ |
| 305 0x41, 0x42 |
| 306 }; |
| 307 static const int32_t from_iso_2022_cnOffs [] ={ |
| 308 0, 2 |
| 309 }; |
| 310 |
| 311 /*ISO-2022-CN*/ |
| 312 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*
/,0x43,0xd800/*illegal*/,0x0042, }; |
| 313 static const uint8_t to_iso_2022_cn1[]={ |
| 314 0x41, 0x43 |
| 315 |
| 316 }; |
| 317 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 }; |
| 318 |
| 319 /*ISO-2022-kr*/ |
| 320 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; |
| 321 static const uint8_t to_iso_2022_kr[]={ |
| 322 0x1b, 0x24, 0x29, 0x43, |
| 323 0x41, |
| 324 0x0e, 0x25, 0x50, |
| 325 0x25, 0x50, |
| 326 0x0f, 0x42, |
| 327 }; |
| 328 static const int32_t from_iso_2022_krOffs [] ={ |
| 329 -1,-1,-1,-1, |
| 330 0, |
| 331 1,1,1, |
| 332 3,3, |
| 333 4,4 |
| 334 }; |
| 335 |
| 336 /*ISO-2022-kr*/ |
| 337 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*una
ssigned*/,0x03A0,0xd801/*illegal*/, 0x0042, }; |
| 338 static const uint8_t to_iso_2022_kr1[]={ |
| 339 0x1b, 0x24, 0x29, 0x43, |
| 340 0x41, |
| 341 0x0e, 0x25, 0x50, |
| 342 0x25, 0x50, |
| 343 |
| 344 }; |
| 345 static const int32_t from_iso_2022_krOffs1 [] ={ |
| 346 -1,-1,-1,-1, |
| 347 0, |
| 348 1,1,1, |
| 349 3,3, |
| 350 |
| 351 }; |
| 352 /* HZ encoding */ |
| 353 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; |
| 354 |
| 355 static const uint8_t to_hz[]={ |
| 356 0x7e, 0x7d, 0x41, |
| 357 0x7e, 0x7b, 0x26, 0x30, |
| 358 0x26, 0x30, |
| 359 0x7e, 0x7d, 0x42, |
| 360 |
| 361 }; |
| 362 static const int32_t from_hzOffs [] ={ |
| 363 0,0,0, |
| 364 1,1,1,1, |
| 365 3,3, |
| 366 4,4,4,4 |
| 367 }; |
| 368 |
| 369 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/
,0x03A0,0xd801/*illegal*/, 0x0042, }; |
| 370 |
| 371 static const uint8_t to_hz1[]={ |
| 372 0x7e, 0x7d, 0x41, |
| 373 0x7e, 0x7b, 0x26, 0x30, |
| 374 0x26, 0x30, |
| 375 |
| 376 |
| 377 }; |
| 378 static const int32_t from_hzOffs1 [] ={ |
| 379 0,0,0, |
| 380 1,1,1,1, |
| 381 3,3, |
| 382 |
| 383 }; |
| 384 |
| 385 #endif |
| 386 |
| 387 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; |
| 388 |
| 389 static const uint8_t to_SCSU[]={ |
| 390 0x41, |
| 391 0x42 |
| 392 |
| 393 |
| 394 }; |
| 395 static const int32_t from_SCSUOffs [] ={ |
| 396 0, |
| 397 2, |
| 398 |
| 399 }; |
| 400 |
| 401 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 402 /* ISCII */ |
| 403 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0
042, }; |
| 404 static const uint8_t to_iscii[]={ |
| 405 0x41, |
| 406 0x42, |
| 407 }; |
| 408 static const int32_t from_isciiOffs [] ={ |
| 409 0,2, |
| 410 |
| 411 }; |
| 412 /*ISCII*/ |
| 413 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43
,0xd800/*illegal*/,0x0042, }; |
| 414 static const uint8_t to_iscii1[]={ |
| 415 0x44, |
| 416 0x43, |
| 417 |
| 418 }; |
| 419 static const int32_t from_isciiOffs1 [] ={0,2}; |
| 420 |
| 421 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), |
| 422 toIBM943, sizeof(toIBM943), "ibm-943", |
| 423 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 )) |
| 424 log_err("u-> ibm-943 with skip did not match.\n"); |
| 425 |
| 426 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), |
| 427 to_euc_jp, sizeof(to_euc_jp), "euc-jp", |
| 428 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 )) |
| 429 log_err("u-> euc-jp with skip did not match.\n"); |
| 430 |
| 431 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), |
| 432 to_euc_tw, sizeof(to_euc_tw), "euc-tw", |
| 433 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 )) |
| 434 log_err("u-> euc-tw with skip did not match.\n"); |
| 435 |
| 436 /*iso_2022_jp*/ |
| 437 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), |
| 438 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", |
| 439 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 )) |
| 440 log_err("u-> iso-2022-jp with skip did not match.\n"); |
| 441 |
| 442 /* with context */ |
| 443 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso
_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]), |
| 444 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", |
| 445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) |
| 446 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); |
| 447 |
| 448 /*iso_2022_cn*/ |
| 449 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), |
| 450 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", |
| 451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 )) |
| 452 log_err("u-> iso-2022-cn with skip did not match.\n"); |
| 453 /*with context*/ |
| 454 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso
_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]), |
| 455 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn", |
| 456 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) |
| 457 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); |
| 458 |
| 459 /*iso_2022_kr*/ |
| 460 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), |
| 461 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", |
| 462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 )) |
| 463 log_err("u-> iso-2022-kr with skip did not match.\n"); |
| 464 /*with context*/ |
| 465 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso
_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]), |
| 466 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr", |
| 467 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_S
KIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) |
| 468 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did n
ot match.\n"); |
| 469 |
| 470 /*hz*/ |
| 471 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), |
| 472 to_hz, sizeof(to_hz), "HZ", |
| 473 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 )) |
| 474 log_err("u-> HZ with skip did not match.\n"); |
| 475 /*with context*/ |
| 476 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText
1)/sizeof(hz_inputText1[0]), |
| 477 to_hz1, sizeof(to_hz1), "hz", |
| 478 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_
ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) |
| 479 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.
\n"); |
| 480 #endif |
| 481 |
| 482 /*SCSU*/ |
| 483 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), |
| 484 to_SCSU, sizeof(to_SCSU), "SCSU", |
| 485 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 )) |
| 486 log_err("u-> SCSU with skip did not match.\n"); |
| 487 |
| 488 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 489 /*ISCII*/ |
| 490 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), |
| 491 to_iscii, sizeof(to_iscii), "ISCII,version=0", |
| 492 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 )) |
| 493 log_err("u-> iscii with skip did not match.\n"); |
| 494 /*with context*/ |
| 495 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inp
utText1)/sizeof(iscii_inputText1[0]), |
| 496 to_iscii1, sizeof(to_iscii1), "ISCII,version=0", |
| 497 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_ST
OP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND )) |
| 498 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not mat
ch.\n"); |
| 499 #endif |
| 500 } |
| 501 |
| 502 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n")
; |
| 503 { |
| 504 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU
1 text 1 */ |
| 505 0xFB, 0xEE, 0x28, /* from source offset 0 */ |
| 506 0x24, 0x1E, 0x52, |
| 507 0xB2, |
| 508 0x20, |
| 509 0xB3, |
| 510 0xB1, |
| 511 0x0D, |
| 512 0x0A, |
| 513 |
| 514 0x20, /* from 8 */ |
| 515 0x00, |
| 516 0xD0, 0x6C, |
| 517 0xB6, |
| 518 0xD8, 0xA5, |
| 519 0x20, |
| 520 0x68, |
| 521 0x59, |
| 522 |
| 523 0xF9, 0x28, /* from 16 */ |
| 524 0x6D, |
| 525 0x20, |
| 526 0x73, |
| 527 0xE0, 0x2D, |
| 528 0xDE, 0x43, |
| 529 0xD0, 0x33, |
| 530 0x20, |
| 531 |
| 532 0xFA, 0x83, /* from 24 */ |
| 533 0x25, 0x01, |
| 534 0xFB, 0x16, 0x87, |
| 535 0x4B, 0x16, |
| 536 0x20, |
| 537 0xE6, 0xBD, |
| 538 0xEB, 0x5B, |
| 539 0x4B, 0xCC, |
| 540 |
| 541 0xF9, 0xA2, /* from 32 */ |
| 542 0xFC, 0x10, 0x3E, |
| 543 0xFE, 0x16, 0x3A, 0x8C, |
| 544 0x20, |
| 545 0xFC, 0x03, 0xAC, |
| 546 |
| 547 0x01, /* from 41 */ |
| 548 0xDE, 0x83, |
| 549 0x20, |
| 550 0x09 |
| 551 }; |
| 552 static const UChar expected[]={ |
| 553 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */ |
| 554 0x0063, 0x0061, 0x000D, 0x000A, |
| 555 |
| 556 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */ |
| 557 0x0930, 0x0020, 0x0918, 0x0909, |
| 558 |
| 559 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */ |
| 560 0x4000, 0x4E00, 0x7777, 0x0020, |
| 561 |
| 562 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */ |
| 563 0x0020, 0xD7A3, 0xDC00, 0xD800, |
| 564 |
| 565 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */ |
| 566 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, |
| 567 |
| 568 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */ |
| 569 0x0009 |
| 570 }; |
| 571 static const int32_t offsets[]={ |
| 572 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7, |
| 573 8, 9, 10, 10, 11, 12, 12, 13, 14, 15, |
| 574 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, |
| 575 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31, |
| 576 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39, |
| 577 41, 42, 42, 43, 44 |
| 578 }; |
| 579 |
| 580 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-b
yte and offsets behavior */ |
| 581 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), |
| 582 sampleText, sizeof(sampleText), |
| 583 "BOCU-1", |
| 584 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) |
| 585 ) { |
| 586 log_err("u->BOCU-1 with skip did not match.\n"); |
| 587 } |
| 588 } |
| 589 |
| 590 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n")
; |
| 591 { |
| 592 const uint8_t sampleText[]={ |
| 593 0x61, /* 'a' */ |
| 594 0xc4, 0xb5, /* U+0135 */ |
| 595 0xed, 0x80, 0xa0, /* Hangul U+d020 */ |
| 596 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */ |
| 597 0xee, 0x80, 0x80, /* PUA U+e000 */ |
| 598 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc
01 */ |
| 599 0x62, /* 'b' */ |
| 600 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d80
1 */ |
| 601 0xd0, 0x80 /* U+0400 */ |
| 602 }; |
| 603 UChar expected[]={ |
| 604 0x0061, |
| 605 0x0135, |
| 606 0xd020, |
| 607 0xd801, 0xdc01, |
| 608 0xe000, |
| 609 0xdc01, |
| 610 0x0062, |
| 611 0xd801, |
| 612 0x0400 |
| 613 }; |
| 614 int32_t offsets[]={ |
| 615 0, |
| 616 1, 1, |
| 617 2, 2, 2, |
| 618 3, 3, 3, 4, 4, 4, |
| 619 5, 5, 5, |
| 620 6, 6, 6, |
| 621 7, |
| 622 8, 8, 8, |
| 623 9, 9 |
| 624 }; |
| 625 |
| 626 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversi
on and offsets behavior */ |
| 627 |
| 628 /* without offsets */ |
| 629 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), |
| 630 sampleText, sizeof(sampleText), |
| 631 "CESU-8", |
| 632 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0) |
| 633 ) { |
| 634 log_err("u->CESU-8 with skip did not match.\n"); |
| 635 } |
| 636 |
| 637 /* with offsets */ |
| 638 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected), |
| 639 sampleText, sizeof(sampleText), |
| 640 "CESU-8", |
| 641 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0) |
| 642 ) { |
| 643 log_err("u->CESU-8 with skip did not match.\n"); |
| 644 } |
| 645 } |
| 646 |
| 647 /*to Unicode*/ |
| 648 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n"); |
| 649 |
| 650 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 651 { |
| 652 |
| 653 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD7
00 }; |
| 654 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; |
| 655 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 }; |
| 656 |
| 657 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5}; |
| 658 static const int32_t fromIBM943Offs [] = { 0, 2, 4}; |
| 659 static const int32_t fromIBM930Offs [] = { 1, 3, 5}; |
| 660 |
| 661 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949), |
| 662 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_9
49skiptoUnicode),"ibm-949", |
| 663 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 )) |
| 664 log_err("ibm-949->u with skip did not match.\n"); |
| 665 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943), |
| 666 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_9
43skiptoUnicode[0]),"ibm-943", |
| 667 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 )) |
| 668 log_err("ibm-943->u with skip did not match.\n"); |
| 669 |
| 670 |
| 671 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930), |
| 672 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", |
| 673 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 )) |
| 674 log_err("ibm-930->u with skip did not match.\n"); |
| 675 |
| 676 |
| 677 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_93
0), |
| 678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_9
30skiptoUnicode[0]),"ibm-930", |
| 679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_C
HAR_FOUND )) |
| 680 log_err("ibm-930->u with skip did not match.\n"); |
| 681 } |
| 682 #endif |
| 683 |
| 684 { |
| 685 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 }; |
| 686 static const UChar usasciiToU[] = { 0x61, 0x31 }; |
| 687 static const int32_t usasciiToUOffsets[] = { 0, 2 }; |
| 688 |
| 689 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 }; |
| 690 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 }; |
| 691 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 }; |
| 692 |
| 693 /* US-ASCII */ |
| 694 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), |
| 695 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, |
| 696 "US-ASCII", |
| 697 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, |
| 698 NULL, 0) |
| 699 ) { |
| 700 log_err("US-ASCII->u with skip did not match.\n"); |
| 701 } |
| 702 |
| 703 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 704 /* SBCS NLTC codepage 367 for US-ASCII */ |
| 705 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes), |
| 706 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR, |
| 707 "ibm-367", |
| 708 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets, |
| 709 NULL, 0) |
| 710 ) { |
| 711 log_err("ibm-367->u with skip did not match.\n"); |
| 712 } |
| 713 #endif |
| 714 |
| 715 /* ISO-Latin-1 */ |
| 716 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), |
| 717 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, |
| 718 "LATIN_1", |
| 719 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, |
| 720 NULL, 0) |
| 721 ) { |
| 722 log_err("LATIN_1->u with skip did not match.\n"); |
| 723 } |
| 724 |
| 725 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 726 /* windows-1252 */ |
| 727 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes), |
| 728 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR, |
| 729 "windows-1252", |
| 730 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets, |
| 731 NULL, 0) |
| 732 ) { |
| 733 log_err("windows-1252->u with skip did not match.\n"); |
| 734 } |
| 735 #endif |
| 736 } |
| 737 |
| 738 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 739 { |
| 740 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ |
| 741 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 |
| 742 }; |
| 743 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4 |
| 744 }; |
| 745 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5}; |
| 746 |
| 747 |
| 748 /* euc-jp*/ |
| 749 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, |
| 750 0x8f, 0xda, 0xa1, /*unassigned*/ |
| 751 0x8e, 0xe0, |
| 752 }; |
| 753 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2}; |
| 754 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9}; |
| 755 |
| 756 /*EUC_TW*/ |
| 757 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, |
| 758 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ |
| 759 0xe6, 0xca, 0x8a, |
| 760 }; |
| 761 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0
x8a, }; |
| 762 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13}; |
| 763 /*iso-2022-jp*/ |
| 764 static const uint8_t sampleTxt_iso_2022_jp[]={ |
| 765 0x41, |
| 766 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ |
| 767 0x1b, 0x28, 0x42, 0x42, |
| 768 |
| 769 }; |
| 770 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 }; |
| 771 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 }; |
| 772 |
| 773 /*iso-2022-cn*/ |
| 774 static const uint8_t sampleTxt_iso_2022_cn[]={ |
| 775 0x0f, 0x41, 0x44, |
| 776 0x1B, 0x24, 0x29, 0x47, |
| 777 0x0E, 0x40, 0x6f, /*unassigned*/ |
| 778 0x0f, 0x42, |
| 779 |
| 780 }; |
| 781 |
| 782 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 }; |
| 783 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 }; |
| 784 |
| 785 /*iso-2022-kr*/ |
| 786 static const uint8_t sampleTxt_iso_2022_kr[]={ |
| 787 0x1b, 0x24, 0x29, 0x43, |
| 788 0x41, |
| 789 0x0E, 0x7f, 0x1E, |
| 790 0x0e, 0x25, 0x50, |
| 791 0x0f, 0x51, |
| 792 0x42, 0x43, |
| 793 |
| 794 }; |
| 795 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0
x43}; |
| 796 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 ,
14 }; |
| 797 |
| 798 /*hz*/ |
| 799 static const uint8_t sampleTxt_hz[]={ |
| 800 0x41, |
| 801 0x7e, 0x7b, 0x26, 0x30, |
| 802 0x7f, 0x1E, /*unassigned*/ |
| 803 0x26, 0x30, |
| 804 0x7e, 0x7d, 0x42, |
| 805 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ |
| 806 0x7e, 0x7d, 0x42, |
| 807 }; |
| 808 static const UChar hztoUnicode[]={ |
| 809 0x41, |
| 810 0x03a0, |
| 811 0x03A0, |
| 812 0x42, |
| 813 0x42,}; |
| 814 |
| 815 static const int32_t from_hzOffs [] ={0,3,7,11,18, }; |
| 816 |
| 817 /*ISCII*/ |
| 818 static const uint8_t sampleTxt_iscii[]={ |
| 819 0x41, |
| 820 0xa1, |
| 821 0xEB, /*unassigned*/ |
| 822 0x26, |
| 823 0x30, |
| 824 0xa2, |
| 825 0xEC, /*unassigned*/ |
| 826 0x42, |
| 827 }; |
| 828 static const UChar isciitoUnicode[]={ |
| 829 0x41, |
| 830 0x0901, |
| 831 0x26, |
| 832 0x30, |
| 833 0x0902, |
| 834 0x42, |
| 835 }; |
| 836 |
| 837 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 }; |
| 838 |
| 839 /*LMBCS*/ |
| 840 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50, |
| 841 0x12, 0x92, 0xa0, /*unassigned*/ |
| 842 0x12, 0x92, 0xA1, |
| 843 }; |
| 844 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4}; |
| 845 static const int32_t fromLMBCS[] = {0, 6}; |
| 846 |
| 847 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), |
| 848 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", |
| 849 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) |
| 850 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); |
| 851 |
| 852 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sa
mpleTxtEBCIDIC_STATEFUL), |
| 853 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", |
| 854 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U
_ILLEGAL_CHAR_FOUND )) |
| 855 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n"); |
| 856 |
| 857 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), |
| 858 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode
[0]),"euc-jp", |
| 859 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0)) |
| 860 log_err("euc-jp->u with skip did not match.\n"); |
| 861 |
| 862 |
| 863 |
| 864 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), |
| 865 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", |
| 866 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0)) |
| 867 log_err("euc-tw->u with skip did not match.\n"); |
| 868 |
| 869 |
| 870 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), |
| 871 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", |
| 872 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0)) |
| 873 log_err("iso-2022-jp->u with skip did not match.\n"); |
| 874 |
| 875 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), |
| 876 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", |
| 877 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0)) |
| 878 log_err("iso-2022-cn->u with skip did not match.\n"); |
| 879 |
| 880 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), |
| 881 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", |
| 882 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0)) |
| 883 log_err("iso-2022-kr->u with skip did not match.\n"); |
| 884 |
| 885 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), |
| 886 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", |
| 887 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0)) |
| 888 log_err("HZ->u with skip did not match.\n"); |
| 889 |
| 890 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), |
| 891 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", |
| 892 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0)) |
| 893 log_err("iscii->u with skip did not match.\n"); |
| 894 |
| 895 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS), |
| 896 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0])
,"LMBCS-1", |
| 897 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0)) |
| 898 log_err("LMBCS->u with skip did not match.\n"); |
| 899 |
| 900 } |
| 901 #endif |
| 902 |
| 903 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n"); |
| 904 { |
| 905 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, |
| 906 0xe0, 0x80, 0x61,}; |
| 907 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061}; |
| 908 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006}; |
| 909 |
| 910 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 911 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", |
| 912 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) |
| 913 log_err("utf8->u with skip did not match.\n");; |
| 914 } |
| 915 |
| 916 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n"); |
| 917 { |
| 918 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; |
| 919 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfff
e}; |
| 920 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; |
| 921 |
| 922 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 923 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", |
| 924 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 )) |
| 925 log_err("scsu->u with skip did not match.\n"); |
| 926 } |
| 927 |
| 928 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n"); |
| 929 { |
| 930 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBO
CU1 text 1 */ |
| 931 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */ |
| 932 0x24, 0x1E, 0x52, /* 3 */ |
| 933 0xB2, /* 6 */ |
| 934 0x20, /* 7 */ |
| 935 0x40, 0x07, /* 8 - wrong trail byte */ |
| 936 0xB3, /* 10 */ |
| 937 0xB1, /* 11 */ |
| 938 0xD0, 0x20, /* 12 - wrong trail byte */ |
| 939 0x0D, /* 14 */ |
| 940 0x0A, /* 15 */ |
| 941 0x20, /* 16 */ |
| 942 0x00, /* 17 */ |
| 943 0xD0, 0x6C, /* 18 */ |
| 944 0xB6, /* 20 */ |
| 945 0xD8, 0xA5, /* 21 */ |
| 946 0x20, /* 23 */ |
| 947 0x68, /* 24 */ |
| 948 0x59, /* 25 */ |
| 949 0xF9, 0x28, /* 26 */ |
| 950 0x6D, /* 28 */ |
| 951 0x20, /* 29 */ |
| 952 0x73, /* 30 */ |
| 953 0xE0, 0x2D, /* 31 */ |
| 954 0xDE, 0x43, /* 33 */ |
| 955 0xD0, 0x33, /* 35 */ |
| 956 0x20, /* 37 */ |
| 957 0xFA, 0x83, /* 38 */ |
| 958 0x25, 0x01, /* 40 */ |
| 959 0xFB, 0x16, 0x87, /* 42 */ |
| 960 0x4B, 0x16, /* 45 */ |
| 961 0x20, /* 47 */ |
| 962 0xE6, 0xBD, /* 48 */ |
| 963 0xEB, 0x5B, /* 50 */ |
| 964 0x4B, 0xCC, /* 52 */ |
| 965 0xF9, 0xA2, /* 54 */ |
| 966 0xFC, 0x10, 0x3E, /* 56 */ |
| 967 0xFE, 0x16, 0x3A, 0x8C, /* 59 */ |
| 968 0x20, /* 63 */ |
| 969 0xFC, 0x03, 0xAC, /* 64 */ |
| 970 0xFF, /* 67 - FF just resets the state without enc
oding anything */ |
| 971 0x01, /* 68 */ |
| 972 0xDE, 0x83, /* 69 */ |
| 973 0x20, /* 71 */ |
| 974 0x09 /* 72 */ |
| 975 }; |
| 976 UChar expected[]={ |
| 977 0xFEFF, 0x0061, 0x0062, 0x0020, |
| 978 0x0063, 0x0061, 0x000D, 0x000A, |
| 979 0x0020, 0x0000, 0x00DF, 0x00E6, |
| 980 0x0930, 0x0020, 0x0918, 0x0909, |
| 981 0x3086, 0x304D, 0x0020, 0x3053, |
| 982 0x4000, 0x4E00, 0x7777, 0x0020, |
| 983 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, |
| 984 0x0020, 0xD7A3, 0xDC00, 0xD800, |
| 985 0xD800, 0xDC00, 0xD845, 0xDDDD, |
| 986 0xDBBB, 0xDDEE, 0x0020, 0xDBFF, |
| 987 0xDFFF, 0x0001, 0x0E40, 0x0020, |
| 988 0x0009 |
| 989 }; |
| 990 int32_t offsets[]={ |
| 991 0, 3, 6, 7, /* skip 8, */ |
| 992 10, 11, /* skip 12, */ |
| 993 14, 15, 16, 17, 18, |
| 994 20, 21, 23, 24, 25, 26, 28, 29, |
| 995 30, 31, 33, 35, 37, 38, |
| 996 40, 42, 45, 47, 48, |
| 997 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59, |
| 998 63, 64, /* trail */ 64, /* reset only 67, */ |
| 999 68, 69, |
| 1000 71, 72 |
| 1001 }; |
| 1002 |
| 1003 if(!testConvertToUnicode(sampleText, sizeof(sampleText), |
| 1004 expected, ARRAY_LENGTH(expected), "BOCU-1", |
| 1005 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) |
| 1006 ) { |
| 1007 log_err("BOCU-1->u with skip did not match.\n"); |
| 1008 } |
| 1009 } |
| 1010 |
| 1011 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n"); |
| 1012 { |
| 1013 const uint8_t sampleText[]={ |
| 1014 0x61, /* 0 'a' */ |
| 1015 0xc0, 0x80, /* 1 non-shortest form */ |
| 1016 0xc4, 0xb5, /* 3 U+0135 */ |
| 1017 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */ |
| 1018 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401
*/ |
| 1019 0xee, 0x80, 0x80, /* 14 PUA U+e000 */ |
| 1020 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U
+dc01 */ |
| 1021 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+
10000 */ |
| 1022 0x62, /* 24 'b' */ |
| 1023 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+
d801 */ |
| 1024 0xed, 0xa0, /* 28 incomplete sequence */ |
| 1025 0xd0, 0x80 /* 30 U+0400 */ |
| 1026 }; |
| 1027 UChar expected[]={ |
| 1028 0x0061, |
| 1029 /* skip */ |
| 1030 0x0135, |
| 1031 0xd020, |
| 1032 0xd801, 0xdc01, |
| 1033 0xe000, |
| 1034 0xdc01, |
| 1035 /* skip */ |
| 1036 0x0062, |
| 1037 0xd801, |
| 1038 0x0400 |
| 1039 }; |
| 1040 int32_t offsets[]={ |
| 1041 0, |
| 1042 /* skip 1, */ |
| 1043 3, |
| 1044 5, |
| 1045 8, 11, |
| 1046 14, |
| 1047 17, |
| 1048 /* skip 20, 20, */ |
| 1049 24, |
| 1050 25, |
| 1051 /* skip 28 */ |
| 1052 30 |
| 1053 }; |
| 1054 |
| 1055 /* without offsets */ |
| 1056 if(!testConvertToUnicode(sampleText, sizeof(sampleText), |
| 1057 expected, ARRAY_LENGTH(expected), "CESU-8", |
| 1058 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0) |
| 1059 ) { |
| 1060 log_err("CESU-8->u with skip did not match.\n"); |
| 1061 } |
| 1062 |
| 1063 /* with offsets */ |
| 1064 if(!testConvertToUnicode(sampleText, sizeof(sampleText), |
| 1065 expected, ARRAY_LENGTH(expected), "CESU-8", |
| 1066 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0) |
| 1067 ) { |
| 1068 log_err("CESU-8->u with skip did not match.\n"); |
| 1069 } |
| 1070 } |
| 1071 } |
| 1072 |
| 1073 static void TestStop(int32_t inputsize, int32_t outputsize) |
| 1074 { |
| 1075 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; |
| 1076 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; |
| 1077 |
| 1078 static const uint8_t expstopIBM_949[]= { |
| 1079 0x00, 0xb0, 0xa1, 0xb0, 0xa2}; |
| 1080 |
| 1081 static const uint8_t expstopIBM_943[] = { |
| 1082 0x9f, 0xaf, 0x9f, 0xb1}; |
| 1083 |
| 1084 static const uint8_t expstopIBM_930[] = { |
| 1085 0x0e, 0x5d, 0x5f, 0x5d, 0x63}; |
| 1086 |
| 1087 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01}; |
| 1088 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64}; |
| 1089 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64}; |
| 1090 |
| 1091 |
| 1092 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2}; |
| 1093 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1}; |
| 1094 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1}; |
| 1095 |
| 1096 static const int32_t fromIBM949Offs [] = { 0, 1, 3}; |
| 1097 static const int32_t fromIBM943Offs [] = { 0, 2}; |
| 1098 static const int32_t fromIBM930Offs [] = { 1, 3}; |
| 1099 |
| 1100 gInBufferSize = inputsize; |
| 1101 gOutBufferSize = outputsize; |
| 1102 |
| 1103 /*From Unicode*/ |
| 1104 |
| 1105 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1106 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), |
| 1107 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949", |
| 1108 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 )) |
| 1109 log_err("u-> ibm-949 with stop did not match.\n"); |
| 1110 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1111 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943", |
| 1112 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0)) |
| 1113 log_err("u-> ibm-943 with stop did not match.\n"); |
| 1114 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1115 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930", |
| 1116 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 )) |
| 1117 log_err("u-> ibm-930 with stop did not match.\n"); |
| 1118 |
| 1119 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n"); |
| 1120 { |
| 1121 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; |
| 1122 static const uint8_t toIBM943[]= { 0x61,}; |
| 1123 static const int32_t offset[]= {0,} ; |
| 1124 |
| 1125 /*EUC_JP*/ |
| 1126 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; |
| 1127 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,}; |
| 1128 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,}; |
| 1129 |
| 1130 /*EUC_TW*/ |
| 1131 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; |
| 1132 static const uint8_t to_euc_tw[]={ |
| 1133 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,}; |
| 1134 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,}; |
| 1135 |
| 1136 /*ISO-2022-JP*/ |
| 1137 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, }; |
| 1138 static const uint8_t to_iso_2022_jp[]={ |
| 1139 0x41, |
| 1140 |
| 1141 }; |
| 1142 static const int32_t from_iso_2022_jpOffs [] ={0,}; |
| 1143 |
| 1144 /*ISO-2022-cn*/ |
| 1145 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; |
| 1146 static const uint8_t to_iso_2022_cn[]={ |
| 1147 0x41, |
| 1148 |
| 1149 }; |
| 1150 static const int32_t from_iso_2022_cnOffs [] ={ |
| 1151 0,0, |
| 1152 2,2, |
| 1153 }; |
| 1154 |
| 1155 /*ISO-2022-kr*/ |
| 1156 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042, }; |
| 1157 static const uint8_t to_iso_2022_kr[]={ |
| 1158 0x1b, 0x24, 0x29, 0x43, |
| 1159 0x41, |
| 1160 0x0e, 0x25, 0x50, |
| 1161 }; |
| 1162 static const int32_t from_iso_2022_krOffs [] ={ |
| 1163 -1,-1,-1,-1, |
| 1164 0, |
| 1165 1,1,1, |
| 1166 }; |
| 1167 |
| 1168 /* HZ encoding */ |
| 1169 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; |
| 1170 |
| 1171 static const uint8_t to_hz[]={ |
| 1172 0x7e, 0x7d, 0x41, |
| 1173 0x7e, 0x7b, 0x26, 0x30, |
| 1174 |
| 1175 }; |
| 1176 static const int32_t from_hzOffs [] ={ |
| 1177 0, 0,0, |
| 1178 1,1,1,1, |
| 1179 }; |
| 1180 |
| 1181 /*ISCII*/ |
| 1182 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, }; |
| 1183 static const uint8_t to_iscii[]={ |
| 1184 0x41, |
| 1185 }; |
| 1186 static const int32_t from_isciiOffs [] ={ |
| 1187 0, |
| 1188 }; |
| 1189 |
| 1190 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), |
| 1191 toIBM943, sizeof(toIBM943), "ibm-943", |
| 1192 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 )) |
| 1193 log_err("u-> ibm-943 with stop did not match.\n"); |
| 1194 |
| 1195 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), |
| 1196 to_euc_jp, sizeof(to_euc_jp), "euc-jp", |
| 1197 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 )) |
| 1198 log_err("u-> euc-jp with stop did not match.\n"); |
| 1199 |
| 1200 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), |
| 1201 to_euc_tw, sizeof(to_euc_tw), "euc-tw", |
| 1202 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) |
| 1203 log_err("u-> euc-tw with stop did not match.\n"); |
| 1204 |
| 1205 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), |
| 1206 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", |
| 1207 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) |
| 1208 log_err("u-> iso-2022-jp with stop did not match.\n"); |
| 1209 |
| 1210 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inp
utText)/sizeof(iso_2022_jp_inputText[0]), |
| 1211 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp", |
| 1212 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 )) |
| 1213 log_err("u-> iso-2022-jp with stop did not match.\n"); |
| 1214 |
| 1215 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), |
| 1216 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", |
| 1217 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 )) |
| 1218 log_err("u-> iso-2022-cn with stop did not match.\n"); |
| 1219 |
| 1220 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), |
| 1221 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", |
| 1222 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 )) |
| 1223 log_err("u-> iso-2022-kr with stop did not match.\n"); |
| 1224 |
| 1225 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), |
| 1226 to_hz, sizeof(to_hz), "HZ", |
| 1227 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 )) |
| 1228 log_err("u-> HZ with stop did not match.\n");\ |
| 1229 |
| 1230 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), |
| 1231 to_iscii, sizeof(to_iscii), "ISCII,version=0", |
| 1232 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 )) |
| 1233 log_err("u-> iscii with stop did not match.\n"); |
| 1234 |
| 1235 |
| 1236 } |
| 1237 #endif |
| 1238 |
| 1239 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n"
); |
| 1240 { |
| 1241 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042,
}; |
| 1242 |
| 1243 static const uint8_t to_SCSU[]={ |
| 1244 0x41, |
| 1245 |
| 1246 }; |
| 1247 int32_t from_SCSUOffs [] ={ |
| 1248 0, |
| 1249 |
| 1250 }; |
| 1251 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), |
| 1252 to_SCSU, sizeof(to_SCSU), "SCSU", |
| 1253 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 )) |
| 1254 log_err("u-> SCSU with skip did not match.\n"); |
| 1255 |
| 1256 } |
| 1257 |
| 1258 /*to Unicode*/ |
| 1259 |
| 1260 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1261 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949), |
| 1262 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949st
optoUnicode[0]),"ibm-949", |
| 1263 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 )) |
| 1264 log_err("ibm-949->u with stop did not match.\n"); |
| 1265 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943), |
| 1266 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943st
optoUnicode[0]),"ibm-943", |
| 1267 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 )) |
| 1268 log_err("ibm-943->u with stop did not match.\n"); |
| 1269 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930), |
| 1270 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930st
optoUnicode[0]),"ibm-930", |
| 1271 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 )) |
| 1272 log_err("ibm-930->u with stop did not match.\n"); |
| 1273 |
| 1274 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n"); |
| 1275 { |
| 1276 |
| 1277 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ |
| 1278 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 |
| 1279 }; |
| 1280 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 }; |
| 1281 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1}; |
| 1282 |
| 1283 |
| 1284 /*EUC-JP*/ |
| 1285 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, |
| 1286 0x8f, 0xda, 0xa1, /*unassigned*/ |
| 1287 0x8e, 0xe0, |
| 1288 }; |
| 1289 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec}; |
| 1290 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3}; |
| 1291 |
| 1292 /*EUC_TW*/ |
| 1293 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2,
0xdc, 0xe5, |
| 1294 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ |
| 1295 0xe6, 0xca, 0x8a, |
| 1296 }; |
| 1297 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2}; |
| 1298 int32_t from_euc_twOffs [] ={ 0, 1, 3}; |
| 1299 |
| 1300 |
| 1301 |
| 1302 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBC
IDIC_STATEFUL), |
| 1303 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/size
of(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", |
| 1304 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 )) |
| 1305 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n"); |
| 1306 |
| 1307 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), |
| 1308 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0])
,"euc-jp", |
| 1309 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0)) |
| 1310 log_err("euc-jp->u with stop did not match.\n"); |
| 1311 |
| 1312 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), |
| 1313 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", |
| 1314 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 )) |
| 1315 log_err("euc-tw->u with stop did not match.\n"); |
| 1316 } |
| 1317 #endif |
| 1318 |
| 1319 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n"); |
| 1320 { |
| 1321 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, |
| 1322 0xe0, 0x80, 0x61,}; |
| 1323 static const UChar expected1[] = { 0x0031, 0x4e8c,}; |
| 1324 static const int32_t offsets1[] = { 0x0000, 0x0001}; |
| 1325 |
| 1326 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 1327 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", |
| 1328 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) |
| 1329 log_err("utf8->u with stop did not match.\n");; |
| 1330 } |
| 1331 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n"); |
| 1332 { |
| 1333 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c
,0x04}; |
| 1334 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061
}; |
| 1335 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003}; |
| 1336 |
| 1337 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 1338 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", |
| 1339 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 )) |
| 1340 log_err("scsu->u with stop did not match.\n");; |
| 1341 } |
| 1342 |
| 1343 } |
| 1344 |
| 1345 static void TestSub(int32_t inputsize, int32_t outputsize) |
| 1346 { |
| 1347 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD70
0 }; |
| 1348 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; |
| 1349 |
| 1350 static const uint8_t expsubIBM_949[] = |
| 1351 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 }; |
| 1352 |
| 1353 static const uint8_t expsubIBM_943[] = { |
| 1354 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 }; |
| 1355 |
| 1356 static const uint8_t expsubIBM_930[] = { |
| 1357 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f }; |
| 1358 |
| 1359 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0
xD700 }; |
| 1360 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; |
| 1361 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 }; |
| 1362 |
| 1363 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 }; |
| 1364 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 }; |
| 1365 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 }; |
| 1366 |
| 1367 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 }; |
| 1368 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 }; |
| 1369 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 }; |
| 1370 |
| 1371 gInBufferSize = inputsize; |
| 1372 gOutBufferSize = outputsize; |
| 1373 |
| 1374 /*from unicode*/ |
| 1375 |
| 1376 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1377 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), |
| 1378 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949", |
| 1379 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 )) |
| 1380 log_err("u-> ibm-949 with subst did not match.\n"); |
| 1381 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1382 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943", |
| 1383 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0)) |
| 1384 log_err("u-> ibm-943 with subst did not match.\n"); |
| 1385 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1386 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930", |
| 1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 )) |
| 1388 log_err("u-> ibm-930 with subst did not match.\n"); |
| 1389 |
| 1390 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n"); |
| 1391 { |
| 1392 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; |
| 1393 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 }; |
| 1394 static const int32_t offset[]= {0, 1, 1, 3, 3, 4}; |
| 1395 |
| 1396 |
| 1397 /* EUC_JP*/ |
| 1398 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2 }; |
| 1399 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, |
| 1400 0xf4, 0xfe, 0xf4, 0xfe, |
| 1401 0x61, 0x8e, 0xe0, |
| 1402 }; |
| 1403 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5,
6, 7, 7}; |
| 1404 |
| 1405 /*EUC_TW*/ |
| 1406 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; |
| 1407 static const uint8_t to_euc_tw[]={ |
| 1408 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, |
| 1409 0xfd, 0xfe, 0xfd, 0xfe, |
| 1410 0x61, 0xe6, 0xca, 0x8a, |
| 1411 }; |
| 1412 |
| 1413 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5,
5, 6, 7, 7, 8,}; |
| 1414 |
| 1415 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), |
| 1416 toIBM943, sizeof(toIBM943), "ibm-943", |
| 1417 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 )) |
| 1418 log_err("u-> ibm-943 with substitute did not match.\n"); |
| 1419 |
| 1420 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), |
| 1421 to_euc_jp, sizeof(to_euc_jp), "euc-jp", |
| 1422 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 )) |
| 1423 log_err("u-> euc-jp with substitute did not match.\n"); |
| 1424 |
| 1425 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), |
| 1426 to_euc_tw, sizeof(to_euc_tw), "euc-tw", |
| 1427 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) |
| 1428 log_err("u-> euc-tw with substitute did not match.\n"); |
| 1429 } |
| 1430 #endif |
| 1431 |
| 1432 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITU
TE \n"); |
| 1433 { |
| 1434 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, }; |
| 1435 |
| 1436 const uint8_t to_SCSU[]={ |
| 1437 0x41, |
| 1438 0x0e, 0xff,0xfd, |
| 1439 0x42 |
| 1440 |
| 1441 |
| 1442 }; |
| 1443 int32_t from_SCSUOffs [] ={ |
| 1444 0, |
| 1445 1,1,1, |
| 1446 2, |
| 1447 |
| 1448 }; |
| 1449 const uint8_t to_SCSU_1[]={ |
| 1450 0x41, |
| 1451 |
| 1452 }; |
| 1453 int32_t from_SCSUOffs_1 [] ={ |
| 1454 0, |
| 1455 |
| 1456 }; |
| 1457 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof
(SCSU_inputText[0]), |
| 1458 to_SCSU, sizeof(to_SCSU), "SCSU", |
| 1459 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 )) |
| 1460 log_err("u-> SCSU with substitute did not match.\n"); |
| 1461 |
| 1462 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputT
ext)/sizeof(SCSU_inputText[0]), |
| 1463 to_SCSU_1, sizeof(to_SCSU_1), "SCSU", |
| 1464 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_
ILLEGAL_CHAR_FOUND )) |
| 1465 log_err("u-> SCSU with substitute did not match.\n"); |
| 1466 } |
| 1467 |
| 1468 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTIT
UTE\n"); |
| 1469 { |
| 1470 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801,
0xffff, 0x0061,}; |
| 1471 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac, |
| 1472 0xf0, 0x90, 0x90, 0x81, |
| 1473 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, |
| 1474 0xef, 0xbf, 0xbf, 0x61, |
| 1475 |
| 1476 }; |
| 1477 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4,
5, 5, 5, 6 }; |
| 1478 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput
[0]), |
| 1479 expectedUTF8, sizeof(expectedUTF8), "utf8", |
| 1480 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) { |
| 1481 log_err("u-> utf8 with stop did not match.\n"); |
| 1482 } |
| 1483 } |
| 1484 |
| 1485 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); |
| 1486 { |
| 1487 static const UChar in[]={ 0x0041, 0xfeff }; |
| 1488 |
| 1489 static const uint8_t out[]={ |
| 1490 #if U_IS_BIG_ENDIAN |
| 1491 0xfe, 0xff, |
| 1492 0x00, 0x41, |
| 1493 0xfe, 0xff |
| 1494 #else |
| 1495 0xff, 0xfe, |
| 1496 0x41, 0x00, |
| 1497 0xff, 0xfe |
| 1498 #endif |
| 1499 }; |
| 1500 static const int32_t offsets[]={ |
| 1501 -1, -1, 0, 0, 1, 1 |
| 1502 }; |
| 1503 |
| 1504 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), |
| 1505 out, sizeof(out), "UTF-16", |
| 1506 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) |
| 1507 ) { |
| 1508 log_err("u->UTF-16 with substitute did not match.\n"); |
| 1509 } |
| 1510 } |
| 1511 |
| 1512 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTI
TUTE\n"); |
| 1513 { |
| 1514 static const UChar in[]={ 0x0041, 0xfeff }; |
| 1515 |
| 1516 static const uint8_t out[]={ |
| 1517 #if U_IS_BIG_ENDIAN |
| 1518 0x00, 0x00, 0xfe, 0xff, |
| 1519 0x00, 0x00, 0x00, 0x41, |
| 1520 0x00, 0x00, 0xfe, 0xff |
| 1521 #else |
| 1522 0xff, 0xfe, 0x00, 0x00, |
| 1523 0x41, 0x00, 0x00, 0x00, |
| 1524 0xff, 0xfe, 0x00, 0x00 |
| 1525 #endif |
| 1526 }; |
| 1527 static const int32_t offsets[]={ |
| 1528 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1 |
| 1529 }; |
| 1530 |
| 1531 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in), |
| 1532 out, sizeof(out), "UTF-32", |
| 1533 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NUL
L, 0) |
| 1534 ) { |
| 1535 log_err("u->UTF-32 with substitute did not match.\n"); |
| 1536 } |
| 1537 } |
| 1538 |
| 1539 /*to unicode*/ |
| 1540 |
| 1541 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1542 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949), |
| 1543 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subt
oUnicode[0]),"ibm-949", |
| 1544 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 )) |
| 1545 log_err("ibm-949->u with substitute did not match.\n"); |
| 1546 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943), |
| 1547 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subt
oUnicode[0]),"ibm-943", |
| 1548 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 )) |
| 1549 log_err("ibm-943->u with substitute did not match.\n"); |
| 1550 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930), |
| 1551 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subt
oUnicode[0]),"ibm-930", |
| 1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 )) |
| 1553 log_err("ibm-930->u with substitute did not match.\n"); |
| 1554 |
| 1555 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); |
| 1556 { |
| 1557 |
| 1558 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={ |
| 1559 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44 |
| 1560 }; |
| 1561 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4 |
| 1562 }; |
| 1563 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5}; |
| 1564 |
| 1565 |
| 1566 /* EUC_JP*/ |
| 1567 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, |
| 1568 0x8f, 0xda, 0xa1, /*unassigned*/ |
| 1569 0x8e, 0xe0, 0x8a |
| 1570 }; |
| 1571 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a
}; |
| 1572 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 }; |
| 1573 |
| 1574 /*EUC_TW*/ |
| 1575 const uint8_t sampleTxt_euc_tw[]={ |
| 1576 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, |
| 1577 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ |
| 1578 0xe6, 0xca, 0x8a, |
| 1579 }; |
| 1580 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a,
}; |
| 1581 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13}; |
| 1582 |
| 1583 |
| 1584 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCI
DIC_STATEFUL), |
| 1585 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof
(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930", |
| 1586 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 )
) |
| 1587 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n"); |
| 1588 |
| 1589 |
| 1590 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp), |
| 1591 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
euc-jp", |
| 1592 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 )) |
| 1593 log_err("euc-jp->u with substitute did not match.\n"); |
| 1594 |
| 1595 |
| 1596 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), |
| 1597 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"
euc-tw", |
| 1598 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 )) |
| 1599 log_err("euc-tw->u with substitute did not match.\n"); |
| 1600 |
| 1601 |
| 1602 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_e
uc_jp), |
| 1603 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"
euc-jp", |
| 1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGA
L_CHAR_FOUND)) |
| 1605 log_err("euc-jp->u with substitute did not match.\n"); |
| 1606 } |
| 1607 #endif |
| 1608 |
| 1609 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE
\n"); |
| 1610 { |
| 1611 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, |
| 1612 0xe0, 0x80, 0x61,}; |
| 1613 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; |
| 1614 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; |
| 1615 |
| 1616 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 1617 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", |
| 1618 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) |
| 1619 log_err("utf8->u with substitute did not match.\n");; |
| 1620 } |
| 1621 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \
n"); |
| 1622 { |
| 1623 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,}; |
| 1624 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfff
d}; |
| 1625 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5}; |
| 1626 |
| 1627 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1), |
| 1628 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU", |
| 1629 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 )) |
| 1630 log_err("scsu->u with stop did not match.\n");; |
| 1631 } |
| 1632 |
| 1633 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1634 log_verbose("Testing ibm-930 subchar/subchar1\n"); |
| 1635 { |
| 1636 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65,
0x6d66, 0xdf }; |
| 1637 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0x
fe, 0x46, 0x6b, 0x0f, 0x3f }; |
| 1638 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2,
3, 3, 4, 4 }; |
| 1639 |
| 1640 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd,
0x6d66, 0x1a }; |
| 1641 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0x
fc, 0x46, 0x6b, 0x0f, 0x57 }; |
| 1642 static const int32_t offsets2[]={ 1, 3, 5,
7, 10 }; |
| 1643 |
| 1644 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "
ibm-930", |
| 1645 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NU
LL, 0) |
| 1646 ) { |
| 1647 log_err("u->ibm-930 subchar/subchar1 did not match.\n"); |
| 1648 } |
| 1649 |
| 1650 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ib
m-930", |
| 1651 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) |
| 1652 ) { |
| 1653 log_err("ibm-930->u subchar/subchar1 did not match.\n"); |
| 1654 } |
| 1655 } |
| 1656 |
| 1657 log_verbose("Testing GB 18030 with substitute callbacks\n"); |
| 1658 { |
| 1659 static const UChar u2[]={ |
| 1660 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00,
0x9fa6, 0xffff, 0xd800, 0xdc00, 0xff
fd, 0xdbff, 0xdfff }; |
| 1661 static const uint8_t gb2[]={ |
| 1662 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0x
bb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3
, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 }; |
| 1663 static const int32_t offsets2[]={ |
| 1664 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 }; |
| 1665 |
| 1666 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "
gb18030", |
| 1667 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) |
| 1668 ) { |
| 1669 log_err("gb18030->u with substitute did not match.\n"); |
| 1670 } |
| 1671 } |
| 1672 #endif |
| 1673 |
| 1674 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n"); |
| 1675 { |
| 1676 static const uint8_t utf7[]={ |
| 1677 /* a~ a+AB~ a+AB\x0c
a+AB- a+AB. a+. */ |
| 1678 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42,
0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x
2e |
| 1679 }; |
| 1680 static const UChar unicode[]={ |
| 1681 0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd,
0x61, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd |
| 1682 }; |
| 1683 static const int32_t offsets[]={ |
| 1684 0, 1, 2, 4, 7, 9,
12, 14, 17, 19, 22, 23 |
| 1685 }; |
| 1686 |
| 1687 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH
(unicode), "UTF-7", |
| 1688 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0
) |
| 1689 ) { |
| 1690 log_err("UTF-7->u with substitute did not match.\n"); |
| 1691 } |
| 1692 } |
| 1693 |
| 1694 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n"); |
| 1695 { |
| 1696 static const uint8_t |
| 1697 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff }, |
| 1698 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff }, |
| 1699 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff }; |
| 1700 |
| 1701 static const UChar |
| 1702 out1[]={ 0x4e00, 0xfeff }, |
| 1703 out2[]={ 0x004e, 0xfffe }, |
| 1704 out3[]={ 0xfefd, 0x4e00, 0xfeff }; |
| 1705 |
| 1706 static const int32_t |
| 1707 offsets1[]={ 2, 4 }, |
| 1708 offsets2[]={ 2, 4 }, |
| 1709 offsets3[]={ 0, 2, 4 }; |
| 1710 |
| 1711 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-16", |
| 1712 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) |
| 1713 ) { |
| 1714 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n"); |
| 1715 } |
| 1716 |
| 1717 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-16", |
| 1718 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) |
| 1719 ) { |
| 1720 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n"); |
| 1721 } |
| 1722 |
| 1723 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-16", |
| 1724 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) |
| 1725 ) { |
| 1726 log_err("UTF-16 (no BOM)->u with substitute did not match.\n"); |
| 1727 } |
| 1728 } |
| 1729 |
| 1730 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n"); |
| 1731 { |
| 1732 static const uint8_t |
| 1733 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xfe, 0xff }, |
| 1734 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0x
ff, 0x00, 0x00 }, |
| 1735 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x
00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 }, |
| 1736 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x
00, 0x4e, 0x00 }; |
| 1737 |
| 1738 static const UChar |
| 1739 out1[]={ UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfeff }, |
| 1740 out2[]={ UTF16_LEAD(0x0f1000), UTF16_TRAIL(0x0f1000), 0xfffe }, |
| 1741 out3[]={ 0xfefe, UTF16_LEAD(0x100f00), UTF16_TRAIL(0x100f00), 0xfffd
, 0xfffd }, |
| 1742 out4[]={ UTF16_LEAD(0x10203), UTF16_TRAIL(0x10203), 0xfffd, 0x4e00 }
; |
| 1743 |
| 1744 static const int32_t |
| 1745 offsets1[]={ 4, 4, 8 }, |
| 1746 offsets2[]={ 4, 4, 8 }, |
| 1747 offsets3[]={ 0, 4, 4, 8, 12 }, |
| 1748 offsets4[]={ 0, 0, 4, 8 }; |
| 1749 |
| 1750 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1
), "UTF-32", |
| 1751 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL,
0) |
| 1752 ) { |
| 1753 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n"); |
| 1754 } |
| 1755 |
| 1756 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2
), "UTF-32", |
| 1757 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL,
0) |
| 1758 ) { |
| 1759 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n"); |
| 1760 } |
| 1761 |
| 1762 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3
), "UTF-32", |
| 1763 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL,
0) |
| 1764 ) { |
| 1765 log_err("UTF-32 (no BOM)->u with substitute did not match.\n"); |
| 1766 } |
| 1767 |
| 1768 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4
), "UTF-32", |
| 1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL,
0) |
| 1770 ) { |
| 1771 log_err("UTF-32 (no BOM, with error)->u with substitute did not matc
h.\n"); |
| 1772 } |
| 1773 } |
| 1774 } |
| 1775 |
| 1776 static void TestSubWithValue(int32_t inputsize, int32_t outputsize) |
| 1777 { |
| 1778 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 }; |
| 1779 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 }; |
| 1780 |
| 1781 const uint8_t expsubwvalIBM_949[]= { |
| 1782 0x00, 0xb0, 0xa1, 0xb0, 0xa2, |
| 1783 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 }; |
| 1784 |
| 1785 const uint8_t expsubwvalIBM_943[]= { |
| 1786 0x9f, 0xaf, 0x9f, 0xb1, |
| 1787 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 }; |
| 1788 |
| 1789 const uint8_t expsubwvalIBM_930[] = { |
| 1790 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5,
0x0e, 0x46, 0x6b, 0x0f }; |
| 1791 |
| 1792 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 }; |
| 1793 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 }; |
| 1794 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }
; /* last item: 3,3,3,3 because there's SO+DBCS+SI */ |
| 1795 |
| 1796 gInBufferSize = inputsize; |
| 1797 gOutBufferSize = outputsize; |
| 1798 |
| 1799 /*from Unicode*/ |
| 1800 |
| 1801 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 1802 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[
0]), |
| 1803 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949", |
| 1804 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 )) |
| 1805 log_err("u-> ibm-949 with subst with value did not match.\n"); |
| 1806 |
| 1807 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1808 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943", |
| 1809 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 )) |
| 1810 log_err("u-> ibm-943 with sub with value did not match.\n"); |
| 1811 |
| 1812 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleTex
t2[0]), |
| 1813 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930", |
| 1814 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 )) |
| 1815 log_err("u-> ibm-930 with subst with value did not match.\n"); |
| 1816 |
| 1817 |
| 1818 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); |
| 1819 { |
| 1820 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x006
1 }; |
| 1821 static const uint8_t toIBM943[]= { 0x61, |
| 1822 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1823 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, |
| 1824 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1825 0x61 }; |
| 1826 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3
, 3, 3, 3, 3, 3, 4}; |
| 1827 |
| 1828 |
| 1829 /* EUC_JP*/ |
| 1830 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801,
0xdc01, 0xd801, 0x0061, 0x00a2, }; |
| 1831 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae, |
| 1832 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1833 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, |
| 1834 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1835 0x61, 0x8e, 0xe0, |
| 1836 }; |
| 1837 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, |
| 1838 3, 3, 3, 3, 3, 3, |
| 1839 3, 3, 3, 3, 3, 3, |
| 1840 5, 5, 5, 5, 5, 5, |
| 1841 6, 7, 7, |
| 1842 }; |
| 1843 |
| 1844 /*EUC_TW*/ |
| 1845 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801,
0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, }; |
| 1846 static const uint8_t to_euc_tw[]={ |
| 1847 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, |
| 1848 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1849 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, |
| 1850 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, |
| 1851 0x61, 0xe6, 0xca, 0x8a, |
| 1852 }; |
| 1853 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, |
| 1854 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, |
| 1855 6, 7, 7, 8, |
| 1856 }; |
| 1857 /*ISO-2022-JP*/ |
| 1858 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x0
0E9, 0x0042} ; |
| 1859 static const uint8_t to_iso_2022_jp1[]={ |
| 1860 0x1b, 0x24, 0x42, 0x21, 0x21, |
| 1861 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
|
| 1862 0x1b, 0x24, 0x42, 0x21, 0x22, |
| 1863 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39, |
| 1864 0x42, |
| 1865 }; |
| 1866 |
| 1867 static const int32_t from_iso_2022_jpOffs1 [] ={ |
| 1868 0,0,0,0,0, |
| 1869 1,1,1,1,1,1,1,1,1, |
| 1870 2,2,2,2,2, |
| 1871 3,3,3,3,3,3,3,3,3, |
| 1872 4, |
| 1873 }; |
| 1874 /* surrogate pair*/ |
| 1875 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042} ; |
| 1876 static const uint8_t to_iso_2022_jp2[]={ |
| 1877 0x1b, 0x24, 0x42, 0x21, 0x21, |
| 1878 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, |
| 1879 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1880 0x1b, 0x24, 0x42, 0x21, 0x22, |
| 1881 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44,
0x38, 0x34, 0x44, |
| 1882 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1883 0x42, |
| 1884 }; |
| 1885 static const int32_t from_iso_2022_jpOffs2 [] ={ |
| 1886 0,0,0,0,0, |
| 1887 1,1,1,1,1,1,1,1,1, |
| 1888 1,1,1,1,1,1, |
| 1889 3,3,3,3,3, |
| 1890 4,4,4,4,4,4,4,4,4, |
| 1891 4,4,4,4,4,4, |
| 1892 6, |
| 1893 }; |
| 1894 |
| 1895 /*ISO-2022-cn*/ |
| 1896 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, }; |
| 1897 static const uint8_t to_iso_2022_cn[]={ |
| 1898 0x41, |
| 1899 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, |
| 1900 0x42, |
| 1901 }; |
| 1902 static const int32_t from_iso_2022_cnOffs [] ={ |
| 1903 0, |
| 1904 1,1,1,1,1,1, |
| 1905 2, |
| 1906 }; |
| 1907 |
| 1908 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x
3001,0xD84D,0xDC56, 0x0042}; |
| 1909 |
| 1910 static const uint8_t to_iso_2022_cn4[]={ |
| 1911 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, |
| 1912 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, |
| 1913 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1914 0x0e, 0x21, 0x22, |
| 1915 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x4
4, |
| 1916 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1917 0x42, |
| 1918 }; |
| 1919 static const int32_t from_iso_2022_cnOffs4 [] ={ |
| 1920 0,0,0,0,0,0,0, |
| 1921 1,1,1,1,1,1,1, |
| 1922 1,1,1,1,1,1, |
| 1923 3,3,3, |
| 1924 4,4,4,4,4,4,4, |
| 1925 4,4,4,4,4,4, |
| 1926 6 |
| 1927 |
| 1928 }; |
| 1929 |
| 1930 /*ISO-2022-kr*/ |
| 1931 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xD
C56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; |
| 1932 static const uint8_t to_iso_2022_kr2[]={ |
| 1933 0x1b, 0x24, 0x29, 0x43, |
| 1934 0x41, |
| 1935 0x0e, 0x25, 0x50, |
| 1936 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
| 1937 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1938 0x0e, 0x25, 0x50, |
| 1939 0x0f, 0x42, |
| 1940 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
| 1941 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 1942 0x43 |
| 1943 }; |
| 1944 static const int32_t from_iso_2022_krOffs2 [] ={ |
| 1945 -1,-1,-1,-1, |
| 1946 0, |
| 1947 1,1,1, |
| 1948 2,2,2,2,2,2,2, |
| 1949 2,2,2,2,2,2, |
| 1950 4,4,4, |
| 1951 5,5, |
| 1952 6,6,6,6,6,6, |
| 1953 6,6,6,6,6,6, |
| 1954 8, |
| 1955 }; |
| 1956 |
| 1957 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unas
signed*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 }; |
| 1958 static const uint8_t to_iso_2022_kr[]={ |
| 1959 0x1b, 0x24, 0x29, 0x43, |
| 1960 0x41, |
| 1961 0x0e, 0x25, 0x50, |
| 1962 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*
/ |
| 1963 0x0e, 0x25, 0x50, |
| 1964 0x0f, 0x42, |
| 1965 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ |
| 1966 0x43 |
| 1967 }; |
| 1968 |
| 1969 |
| 1970 static const int32_t from_iso_2022_krOffs [] ={ |
| 1971 -1,-1,-1,-1, |
| 1972 0, |
| 1973 1,1,1, |
| 1974 2,2,2,2,2,2,2, |
| 1975 3,3,3, |
| 1976 4,4, |
| 1977 5,5,5,5,5,5, |
| 1978 6, |
| 1979 }; |
| 1980 /* HZ encoding */ |
| 1981 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,
0x03A0, 0x0042, }; |
| 1982 |
| 1983 static const uint8_t to_hz[]={ |
| 1984 0x7e, 0x7d, 0x41, |
| 1985 0x7e, 0x7b, 0x26, 0x30, |
| 1986 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*una
ssigned*/ |
| 1987 0x7e, 0x7b, 0x26, 0x30, |
| 1988 0x7e, 0x7d, 0x42, |
| 1989 |
| 1990 }; |
| 1991 static const int32_t from_hzOffs [] ={ |
| 1992 0,0,0, |
| 1993 1,1,1,1, |
| 1994 2,2,2,2,2,2,2,2, |
| 1995 3,3,3,3, |
| 1996 4,4,4 |
| 1997 }; |
| 1998 |
| 1999 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unas
signed*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 }; |
| 2000 static const uint8_t to_hz2[]={ |
| 2001 0x7e, 0x7d, 0x41, |
| 2002 0x7e, 0x7b, 0x26, 0x30, |
| 2003 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
| 2004 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 2005 0x7e, 0x7b, 0x26, 0x30, |
| 2006 0x7e, 0x7d, 0x42, |
| 2007 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, |
| 2008 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 2009 0x43 |
| 2010 }; |
| 2011 static const int32_t from_hzOffs2 [] ={ |
| 2012 0,0,0, |
| 2013 1,1,1,1, |
| 2014 2,2,2,2,2,2,2,2, |
| 2015 2,2,2,2,2,2, |
| 2016 4,4,4,4, |
| 2017 5,5,5, |
| 2018 6,6,6,6,6,6, |
| 2019 6,6,6,6,6,6, |
| 2020 8, |
| 2021 }; |
| 2022 |
| 2023 /*ISCII*/ |
| 2024 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned
*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 }; |
| 2025 static const uint8_t to_iscii[]={ |
| 2026 0x41, |
| 2027 0xef, 0x42, 0xa1, |
| 2028 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ |
| 2029 0xa2, |
| 2030 0x42, |
| 2031 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/ |
| 2032 0x43 |
| 2033 }; |
| 2034 |
| 2035 |
| 2036 static const int32_t from_isciiOffs [] ={ |
| 2037 0, |
| 2038 1,1,1, |
| 2039 2,2,2,2,2,2, |
| 2040 3, |
| 2041 4, |
| 2042 5,5,5,5,5,5, |
| 2043 6, |
| 2044 }; |
| 2045 |
| 2046 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest
[0]), |
| 2047 toIBM943, sizeof(toIBM943), "ibm-943", |
| 2048 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 )) |
| 2049 log_err("u-> ibm-943 with subst with value did not match.\n"); |
| 2050 |
| 2051 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/si
zeof(euc_jp_inputText[0]), |
| 2052 to_euc_jp, sizeof(to_euc_jp), "euc-jp", |
| 2053 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 )) |
| 2054 log_err("u-> euc-jp with subst with value did not match.\n"); |
| 2055 |
| 2056 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/si
zeof(euc_tw_inputText[0]), |
| 2057 to_euc_tw, sizeof(to_euc_tw), "euc-tw", |
| 2058 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 )) |
| 2059 log_err("u-> euc-tw with subst with value did not match.\n"); |
| 2060 |
| 2061 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), |
| 2062 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", |
| 2063 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) |
| 2064 log_err("u-> iso_2022_jp with subst with value did not match.\n"); |
| 2065 |
| 2066 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_in
putText1)/sizeof(iso_2022_jp_inputText1[0]), |
| 2067 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp", |
| 2068 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 )) |
| 2069 log_err("u-> iso_2022_jp with subst with value did not match.\n"); |
| 2070 |
| 2071 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_in
putText2)/sizeof(iso_2022_jp_inputText2[0]), |
| 2072 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp", |
| 2073 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 )) |
| 2074 log_err("u-> iso_2022_jp with subst with value did not match.\n"); |
| 2075 /*ESCAPE OPTIONS*/ |
| 2076 { |
| 2077 /* surrogate pair*/ |
| 2078 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ; |
| 2079 static const uint8_t to_iso_2022_jp3_v2[]={ |
| 2080 0x1b, 0x24, 0x42, 0x21, 0x21, |
| 2081 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, |
| 2082 |
| 2083 0x1b, 0x24, 0x42, 0x21, 0x22, |
| 2084 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34,
0x34, 0x37, 0x30, 0x3b, |
| 2085 |
| 2086 0x42, |
| 2087 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b
, |
| 2088 }; |
| 2089 |
| 2090 static const int32_t from_iso_2022_jpOffs3_v2 [] ={ |
| 2091 0,0,0,0,0, |
| 2092 1,1,1,1,1,1,1,1,1,1,1,1, |
| 2093 |
| 2094 3,3,3,3,3, |
| 2095 4,4,4,4,4,4,4,4,4,4,4,4, |
| 2096 |
| 2097 6, |
| 2098 7,7,7,7,7,7,7,7,7 |
| 2099 }; |
| 2100 |
| 2101 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof
(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]), |
| 2102 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp
", |
| 2103 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL,
0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) |
| 2104 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not
match.\n"); |
| 2105 } |
| 2106 { |
| 2107 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; |
| 2108 static const uint8_t to_iso_2022_cn5_v2[]={ |
| 2109 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x2
1, |
| 2110 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, |
| 2111 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, |
| 2112 0x0e, 0x21, 0x22, |
| 2113 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x4
4, |
| 2114 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36, |
| 2115 0x42, |
| 2116 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32, |
| 2117 }; |
| 2118 static const int32_t from_iso_2022_cnOffs5_v2 [] ={ |
| 2119 0,0,0,0,0,0,0, |
| 2120 1,1,1,1,1,1,1, |
| 2121 1,1,1,1,1,1, |
| 2122 3,3,3, |
| 2123 4,4,4,4,4,4,4, |
| 2124 4,4,4,4,4,4, |
| 2125 6, |
| 2126 7,7,7,7,7,7 |
| 2127 }; |
| 2128 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof
(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]), |
| 2129 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn", |
| 2130 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,U
CNV_ESCAPE_JAVA,U_ZERO_ERROR )) |
| 2131 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not mat
ch.\n"); |
| 2132 |
| 2133 } |
| 2134 { |
| 2135 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; |
| 2136 static const uint8_t to_iso_2022_cn6_v2[]={ |
| 2137 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, |
| 2138 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, |
| 2139 0x0e, 0x21, 0x22, |
| 2140 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33,
0x34, 0x35, 0x36, 0x7d, |
| 2141 0x42, |
| 2142 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30,
0x32, 0x7d |
| 2143 }; |
| 2144 static const int32_t from_iso_2022_cnOffs6_v2 [] ={ |
| 2145 0, 0, 0, 0, 0, 0, 0, |
| 2146 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 2147 3, 3, 3, |
| 2148 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 2149 6, |
| 2150 7, 7, 7, 7, 7, 7, 7, 7, |
| 2151 }; |
| 2152 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof
(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]), |
| 2153 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn", |
| 2154 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,U
CNV_ESCAPE_UNICODE,U_ZERO_ERROR )) |
| 2155 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not
match.\n"); |
| 2156 |
| 2157 } |
| 2158 { |
| 2159 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56
, 0x3001,0xD84D,0xDC56, 0x0042,0x0902}; |
| 2160 static const uint8_t to_iso_2022_cn7_v2[]={ |
| 2161 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, |
| 2162 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 2163 0x0e, 0x21, 0x22, |
| 2164 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34,
0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36, |
| 2165 0x42, 0x25, 0x55, 0x30, 0x39, 0x30,
0x32, |
| 2166 }; |
| 2167 static const int32_t from_iso_2022_cnOffs7_v2 [] ={ |
| 2168 0, 0, 0, 0, 0, 0, 0, |
| 2169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, |
| 2170 3, 3, 3, |
| 2171 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, |
| 2172 6, |
| 2173 7, 7, 7, 7, 7, 7, |
| 2174 }; |
| 2175 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof
(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]), |
| 2176 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn", |
| 2177 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"
K" ,U_ZERO_ERROR )) |
| 2178 log_err("u-> iso-2022-cn with sub & K did not match.\n"); |
| 2179 |
| 2180 } |
| 2181 { |
| 2182 static const UChar iso_2022_cn_inputText8[]={ |
| 2183 0x3000, |
| 2184 0xD84D, 0xDC56, |
| 2185 0x3001, |
| 2186 0xD84D, 0xDC56, |
| 2187 0xDBFF, 0xDFFF, |
| 2188 0x0042, |
| 2189 0x0902}; |
| 2190 static const uint8_t to_iso_2022_cn8_v2[]={ |
| 2191 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21,
0x21, |
| 2192 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, |
| 2193 0x0e, 0x21, 0x22, |
| 2194 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35,
0x36, 0x20, |
| 2195 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46,
0x46, 0x20, |
| 2196 0x42, |
| 2197 0x5c, 0x39, 0x30, 0x32, 0x20 |
| 2198 }; |
| 2199 static const int32_t from_iso_2022_cnOffs8_v2 [] ={ |
| 2200 0, 0, 0, 0, 0, 0, 0, |
| 2201 1, 1, 1, 1, 1, 1, 1, 1, |
| 2202 3, 3, 3, |
| 2203 4, 4, 4, 4, 4, 4, 4, 4, |
| 2204 6, 6, 6, 6, 6, 6, 6, 6, |
| 2205 8, |
| 2206 9, 9, 9, 9, 9 |
| 2207 }; |
| 2208 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof
(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]), |
| 2209 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn", |
| 2210 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,U
CNV_ESCAPE_CSS2,U_ZERO_ERROR )) |
| 2211 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not mat
ch.\n"); |
| 2212 |
| 2213 } |
| 2214 { |
| 2215 static const uint8_t to_iso_2022_cn4_v3[]={ |
| 2216 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21
, |
| 2217 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, |
| 2218 0x0e, 0x21, 0x22, |
| 2219 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32
, 0x33, 0x34, 0x35, 0x36, |
| 2220 0x42 |
| 2221 }; |
| 2222 |
| 2223 |
| 2224 static const int32_t from_iso_2022_cnOffs4_v3 [] ={ |
| 2225 0,0,0,0,0,0,0, |
| 2226 1,1,1,1,1,1,1,1,1,1,1, |
| 2227 |
| 2228 3,3,3, |
| 2229 4,4,4,4,4,4,4,4,4,4,4, |
| 2230 |
| 2231 6 |
| 2232 |
| 2233 }; |
| 2234 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof
(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]), |
| 2235 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn", |
| 2236 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,U
CNV_ESCAPE_C,U_ZERO_ERROR )) |
| 2237 { |
| 2238 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match
.\n"); |
| 2239 } |
| 2240 } |
| 2241 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inp
utText)/sizeof(iso_2022_cn_inputText[0]), |
| 2242 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn", |
| 2243 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 )) |
| 2244 log_err("u-> iso_2022_cn with subst with value did not match.\n"); |
| 2245 |
| 2246 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_in
putText4)/sizeof(iso_2022_cn_inputText4[0]), |
| 2247 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn", |
| 2248 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 )) |
| 2249 log_err("u-> iso_2022_cn with subst with value did not match.\n"); |
| 2250 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inp
utText)/sizeof(iso_2022_kr_inputText[0]), |
| 2251 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr", |
| 2252 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 )) |
| 2253 log_err("u-> iso_2022_kr with subst with value did not match.\n"); |
| 2254 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_in
putText2)/sizeof(iso_2022_kr_inputText2[0]), |
| 2255 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr", |
| 2256 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 )) |
| 2257 log_err("u-> iso_2022_kr2 with subst with value did not match.\n"); |
| 2258 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_
inputText[0]), |
| 2259 to_hz, sizeof(to_hz), "HZ", |
| 2260 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 )) |
| 2261 log_err("u-> hz with subst with value did not match.\n"); |
| 2262 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(h
z_inputText2[0]), |
| 2263 to_hz2, sizeof(to_hz2), "HZ", |
| 2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 )) |
| 2265 log_err("u-> hz with subst with value did not match.\n"); |
| 2266 |
| 2267 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/size
of(iscii_inputText[0]), |
| 2268 to_iscii, sizeof(to_iscii), "ISCII,version=0", |
| 2269 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 )) |
| 2270 log_err("u-> iscii with subst with value did not match.\n"); |
| 2271 } |
| 2272 #endif |
| 2273 |
| 2274 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); |
| 2275 /*to Unicode*/ |
| 2276 { |
| 2277 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 2278 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, |
| 2279 0x81, 0xad, /*unassigned*/ |
| 2280 0x89, 0xd3 }; |
| 2281 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, |
| 2282 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, |
| 2283 0x7B87}; |
| 2284 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3
, 3, 5}; |
| 2285 |
| 2286 /* EUC_JP*/ |
| 2287 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4,
0xae, |
| 2288 0x8f, 0xda, 0xa1, /*unassigned*/ |
| 2289 0x8e, 0xe0, |
| 2290 }; |
| 2291 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec, |
| 2292 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x
31, |
| 2293 0x00a2 }; |
| 2294 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3, |
| 2295 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 2296 9, |
| 2297 }; |
| 2298 |
| 2299 /*EUC_TW*/ |
| 2300 static const uint8_t sampleTxt_euc_tw[]={ |
| 2301 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5, |
| 2302 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/ |
| 2303 0xe6, 0xca, 0x8a, |
| 2304 }; |
| 2305 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, |
| 2306 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0
x42, 0x25, 0x58, 0x43, 0x43, |
| 2307 0x8706, 0x8a, }; |
| 2308 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, |
| 2309 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 2310 11, 13}; |
| 2311 |
| 2312 /*iso-2022-jp*/ |
| 2313 static const uint8_t sampleTxt_iso_2022_jp[]={ |
| 2314 0x1b, 0x28, 0x42, 0x41, |
| 2315 0x1b, 0x24, 0x42, 0x2A, 0x44, /*unassigned*/ |
| 2316 0x1b, 0x28, 0x42, 0x42, |
| 2317 |
| 2318 }; |
| 2319 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x32,0x41,
0x25,0x58,0x34,0x34, 0x42 }; |
| 2320 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7,
7, 7, 7, 7, 12 }; |
| 2321 |
| 2322 /*iso-2022-cn*/ |
| 2323 static const uint8_t sampleTxt_iso_2022_cn[]={ |
| 2324 0x0f, 0x41, 0x44, |
| 2325 0x1B, 0x24, 0x29, 0x47, |
| 2326 0x0E, 0x40, 0x6c, /*unassigned*/ |
| 2327 0x0f, 0x42, |
| 2328 |
| 2329 }; |
| 2330 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34
,0x30,0x25,0x58,0x36,0x43,0x42 }; |
| 2331 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8,
8, 8, 8, 8, 8, 11 }; |
| 2332 |
| 2333 /*iso-2022-kr*/ |
| 2334 static const uint8_t sampleTxt_iso_2022_kr[]={ |
| 2335 0x1b, 0x24, 0x29, 0x43, |
| 2336 0x41, |
| 2337 0x0E, 0x7f, 0x1E, |
| 2338 0x0e, 0x25, 0x50, |
| 2339 0x0f, 0x51, |
| 2340 0x42, 0x43, |
| 2341 |
| 2342 }; |
| 2343 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46
,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43}; |
| 2344 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6,
6, 6, 6, 6, 9, 12, 13 , 14 }; |
| 2345 |
| 2346 /*hz*/ |
| 2347 static const uint8_t sampleTxt_hz[]={ |
| 2348 0x41, |
| 2349 0x7e, 0x7b, 0x26, 0x30, |
| 2350 0x7f, 0x1E, /*unassigned*/ |
| 2351 0x26, 0x30, |
| 2352 0x7e, 0x7d, 0x42, |
| 2353 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/ |
| 2354 0x7e, 0x7d, 0x42, |
| 2355 }; |
| 2356 static const UChar hztoUnicode[]={ |
| 2357 0x41, |
| 2358 0x03a0, |
| 2359 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, |
| 2360 0x03A0, |
| 2361 0x42, |
| 2362 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45, |
| 2363 0x42,}; |
| 2364 |
| 2365 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,
14,14,14,14,14,18, }; |
| 2366 |
| 2367 |
| 2368 /*iscii*/ |
| 2369 static const uint8_t sampleTxt_iscii[]={ |
| 2370 0x41, |
| 2371 0x30, |
| 2372 0xEB, /*unassigned*/ |
| 2373 0xa3, |
| 2374 0x42, |
| 2375 0xEC, /*unassigned*/ |
| 2376 0x42, |
| 2377 }; |
| 2378 static const UChar isciitoUnicode[]={ |
| 2379 0x41, |
| 2380 0x30, |
| 2381 0x25, 0x58, 0x45, 0x42, |
| 2382 0x0903, |
| 2383 0x42, |
| 2384 0x25, 0x58, 0x45, 0x43, |
| 2385 0x42,}; |
| 2386 |
| 2387 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 }; |
| 2388 #endif |
| 2389 |
| 2390 /*UTF8*/ |
| 2391 static const uint8_t sampleTxtUTF8[]={ |
| 2392 0x20, 0x64, 0x50, |
| 2393 0xC2, 0x7E, /* truncated char */ |
| 2394 0x20, |
| 2395 0xE0, 0xB5, 0x7E, /* truncated char */ |
| 2396 0x40, |
| 2397 }; |
| 2398 static const UChar UTF8ToUnicode[]={ |
| 2399 0x0020, 0x0064, 0x0050, |
| 2400 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */ |
| 2401 0x0020, |
| 2402 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x00
7E, |
| 2403 0x0040 |
| 2404 }; |
| 2405 static const int32_t fromUTF8[] = { |
| 2406 0, 1, 2, |
| 2407 3, 3, 3, 3, 4, |
| 2408 5, |
| 2409 6, 6, 6, 6, 6, 6, 6, 6, 8, |
| 2410 9 |
| 2411 }; |
| 2412 static const UChar UTF8ToUnicodeXML_DEC[]={ |
| 2413 0x0020, 0x0064, 0x0050, |
| 2414 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~
*/ |
| 2415 0x0020, |
| 2416 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x00
31, 0x0038, 0x0031, 0x003B, 0x007E, |
| 2417 0x0040 |
| 2418 }; |
| 2419 static const int32_t fromUTF8XML_DEC[] = { |
| 2420 0, 1, 2, |
| 2421 3, 3, 3, 3, 3, 3, 4, |
| 2422 5, |
| 2423 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, |
| 2424 9 |
| 2425 }; |
| 2426 |
| 2427 |
| 2428 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 2429 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU), |
| 2430 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnic
ode[0]),"ibm-943", |
| 2431 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 )) |
| 2432 log_err("ibm-943->u with substitute with value did not match.\n"); |
| 2433 |
| 2434 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP), |
| 2435 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode
[0]),"euc-jp", |
| 2436 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0)) |
| 2437 log_err("euc-jp->u with substitute with value did not match.\n"); |
| 2438 |
| 2439 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw), |
| 2440 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode
[0]),"euc-tw", |
| 2441 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0)) |
| 2442 log_err("euc-tw->u with substitute with value did not match.\n"); |
| 2443 |
| 2444 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_202
2_jp), |
| 2445 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", |
| 2446 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0)) |
| 2447 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); |
| 2448 |
| 2449 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sample
Txt_iso_2022_jp), |
| 2450 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2
022_jptoUnicode[0]),"iso-2022-jp", |
| 2451 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_Z
ERO_ERROR)) |
| 2452 log_err("iso-2022-jp->u with substitute with value did not match.\n"
); |
| 2453 |
| 2454 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */ |
| 2455 { |
| 2456 static const UChar iso_2022_jptoUnicodeDec[]={ |
| 2457 0x0041, |
| 2458 0x0026, 0x0023, 0x0034,
0x0032, 0x003b, |
| 2459 0x0026, 0x0023, 0x0036,
0x0038, 0x003b, |
| 2460 0x0042 }; |
| 2461 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7
,7,7,7,7,12, }; |
| 2462 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), |
| 2463 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", |
| 2464 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,
UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR )) |
| 2465 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_DEC did not match.\n"); |
| 2466 } |
| 2467 { |
| 2468 static const UChar iso_2022_jptoUnicodeHex[]={ |
| 2469 0x0041, |
| 2470 0x0026, 0x0023, 0x0078, 0x0032
, 0x0041, 0x003b, |
| 2471 0x0026, 0x0023, 0x0078, 0x0034
, 0x0034, 0x003b, |
| 2472 0x0042 }; |
| 2473 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,
7,7,7,7,7,7,7,12 }; |
| 2474 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), |
| 2475 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/si
zeof(iso_2022_jptoUnicode[0]),"iso-2022-jp", |
| 2476 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,
UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR )) |
| 2477 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_XML_HEX did not match.\n"); |
| 2478 } |
| 2479 { |
| 2480 static const UChar iso_2022_jptoUnicodeC[]={ |
| 2481 0x0041, |
| 2482 0x005C, 0x0078, 0x0032, 0x0041, |
| 2483 0x005C, 0x0078, 0x0034, 0x0034, |
| 2484 0x0042 }; |
| 2485 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 }; |
| 2486 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeo
f(sampleTxt_iso_2022_jp), |
| 2487 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof
(iso_2022_jptoUnicode[0]),"iso-2022-jp", |
| 2488 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UC
NV_ESCAPE_C,U_ZERO_ERROR )) |
| 2489 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCA
PE_C did not match.\n"); |
| 2490 } |
| 2491 } |
| 2492 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_202
2_cn), |
| 2493 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2
022_cntoUnicode[0]),"iso-2022-cn", |
| 2494 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0)) |
| 2495 log_err("iso-2022-cn->u with substitute with value did not match.\n"
); |
| 2496 |
| 2497 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_202
2_kr), |
| 2498 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2
022_krtoUnicode[0]),"iso-2022-kr", |
| 2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0)) |
| 2500 log_err("iso-2022-kr->u with substitute with value did not match.\n"
); |
| 2501 |
| 2502 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz), |
| 2503 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ", |
| 2504 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0)) |
| 2505 log_err("hz->u with substitute with value did not match.\n"); |
| 2506 |
| 2507 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii), |
| 2508 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]
),"ISCII,version=0", |
| 2509 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0)) |
| 2510 log_err("ISCII ->u with substitute with value did not match.\n"); |
| 2511 #endif |
| 2512 |
| 2513 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8), |
| 2514 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"U
TF-8", |
| 2515 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0)) |
| 2516 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); |
| 2517 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8)
, |
| 2518 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8To
UnicodeXML_DEC[0]),"UTF-8", |
| 2519 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE
_XML_DEC, U_ZERO_ERROR)) |
| 2520 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not m
atch.\n"); |
| 2521 } |
| 2522 } |
| 2523 |
| 2524 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 2525 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) |
| 2526 { |
| 2527 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 }; |
| 2528 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0
xd3 }; |
| 2529 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3}; |
| 2530 |
| 2531 |
| 2532 static const uint8_t text943[] = { |
| 2533 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; |
| 2534 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,
0x5b57 }; |
| 2535 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b
57 }; |
| 2536 static const UChar toUnicode943stop[]= { 0x304b}; |
| 2537 |
| 2538 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; |
| 2539 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; |
| 2540 static const int32_t fromIBM943Offsstop[] = { 0}; |
| 2541 |
| 2542 gInBufferSize = inputsize; |
| 2543 gOutBufferSize = outputsize; |
| 2544 /*checking with a legal value*/ |
| 2545 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0])
, |
| 2546 templegal949, sizeof(templegal949), "ibm-949", |
| 2547 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 )) |
| 2548 log_err("u-> ibm-949 with skip did not match.\n"); |
| 2549 |
| 2550 /*checking illegal value for ibm-943 with substitute*/ |
| 2551 if(!testConvertToUnicode(text943, sizeof(text943), |
| 2552 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", |
| 2553 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) |
| 2554 log_err("ibm-943->u with subst did not match.\n"); |
| 2555 /*checking illegal value for ibm-943 with skip */ |
| 2556 if(!testConvertToUnicode(text943, sizeof(text943), |
| 2557 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[
0]),"ibm-943", |
| 2558 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 )) |
| 2559 log_err("ibm-943->u with skip did not match.\n"); |
| 2560 |
| 2561 /*checking illegal value for ibm-943 with stop */ |
| 2562 if(!testConvertToUnicode(text943, sizeof(text943), |
| 2563 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[
0]),"ibm-943", |
| 2564 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 )) |
| 2565 log_err("ibm-943->u with stop did not match.\n"); |
| 2566 |
| 2567 } |
| 2568 |
| 2569 static void TestSingleByte(int32_t inputsize, int32_t outputsize) |
| 2570 { |
| 2571 static const uint8_t sampleText[] = { |
| 2572 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, |
| 2573 0xff, 0x32, 0x33}; |
| 2574 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1
a, 0x1a, 0x0032, 0x0033 }; |
| 2575 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; |
| 2576 /*checking illegal value for ibm-943 with substitute*/ |
| 2577 gInBufferSize = inputsize; |
| 2578 gOutBufferSize = outputsize; |
| 2579 |
| 2580 if(!testConvertToUnicode(sampleText, sizeof(sampleText), |
| 2581 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0])
,"ibm-943", |
| 2582 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 )) |
| 2583 log_err("ibm-943->u with subst did not match.\n"); |
| 2584 } |
| 2585 |
| 2586 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize) |
| 2587 { |
| 2588 /*EBCDIC_STATEFUL*/ |
| 2589 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x
6d65, 0x0061 }; |
| 2590 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1,
0x0e, 0xfe, 0xfe, 0x0f, 0x62 }; |
| 2591 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2,
3, 4, 4, 4, 5, 5 }; |
| 2592 /* s SO doubl SI sng s SO
fe fe SI s */ |
| 2593 |
| 2594 /*EBCDIC_STATEFUL with subChar=3f*/ |
| 2595 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0
x62, 0xb1, 0x3f, 0x62 }; |
| 2596 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2
, 2, 3, 4, 5 }; |
| 2597 static const char mySubChar[]={ 0x3f}; |
| 2598 |
| 2599 gInBufferSize = inputsize; |
| 2600 gOutBufferSize = outputsize; |
| 2601 |
| 2602 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), |
| 2603 toIBM930, sizeof(toIBM930), "ibm-930", |
| 2604 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 )) |
| 2605 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n"); |
| 2606 |
| 2607 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof
(ebcdic_inputTest[0]), |
| 2608 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930", |
| 2609 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 )) |
| 2610 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) di
d not match.\n"); |
| 2611 } |
| 2612 #endif |
| 2613 |
| 2614 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t
*expect, int expectLen, |
| 2615 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, |
| 2616 const char *mySubChar, int8_t len) |
| 2617 { |
| 2618 |
| 2619 |
| 2620 UErrorCode status = U_ZERO_ERROR; |
| 2621 UConverter *conv = 0; |
| 2622 char junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 2623 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 2624 const UChar *src; |
| 2625 char *end; |
| 2626 char *targ; |
| 2627 int32_t *offs; |
| 2628 int i; |
| 2629 int32_t realBufferSize; |
| 2630 char *realBufferEnd; |
| 2631 const UChar *realSourceEnd; |
| 2632 const UChar *sourceLimit; |
| 2633 UBool checkOffsets = TRUE; |
| 2634 UBool doFlush; |
| 2635 char junk[9999]; |
| 2636 char offset_str[9999]; |
| 2637 char *p; |
| 2638 UConverterFromUCallback oldAction = NULL; |
| 2639 const void* oldContext = NULL; |
| 2640 |
| 2641 |
| 2642 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 2643 junkout[i] = (char)0xF0; |
| 2644 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 2645 junokout[i] = 0xFF; |
| 2646 setNuConvTestName(codepage, "FROM"); |
| 2647 |
| 2648 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, |
| 2649 gOutBufferSize); |
| 2650 |
| 2651 conv = ucnv_open(codepage, &status); |
| 2652 if(U_FAILURE(status)) |
| 2653 { |
| 2654 log_data_err("Couldn't open converter %s\n",codepage); |
| 2655 return TRUE; |
| 2656 } |
| 2657 |
| 2658 log_verbose("Converter opened..\n"); |
| 2659 |
| 2660 /*----setting the callback routine----*/ |
| 2661 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &statu
s); |
| 2662 if (U_FAILURE(status)) |
| 2663 { |
| 2664 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); |
| 2665 } |
| 2666 /*------------------------*/ |
| 2667 /*setting the subChar*/ |
| 2668 if(mySubChar != NULL){ |
| 2669 ucnv_setSubstChars(conv, mySubChar, len, &status); |
| 2670 if (U_FAILURE(status)) { |
| 2671 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); |
| 2672 } |
| 2673 } |
| 2674 /*------------*/ |
| 2675 |
| 2676 src = source; |
| 2677 targ = junkout; |
| 2678 offs = junokout; |
| 2679 |
| 2680 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 2681 realBufferEnd = junkout + realBufferSize; |
| 2682 realSourceEnd = source + sourceLen; |
| 2683 |
| 2684 if ( gOutBufferSize != realBufferSize ) |
| 2685 checkOffsets = FALSE; |
| 2686 |
| 2687 if( gInBufferSize != NEW_MAX_BUFFER ) |
| 2688 checkOffsets = FALSE; |
| 2689 |
| 2690 do |
| 2691 { |
| 2692 end = nct_min(targ + gOutBufferSize, realBufferEnd); |
| 2693 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); |
| 2694 |
| 2695 doFlush = (UBool)(sourceLimit == realSourceEnd); |
| 2696 |
| 2697 if(targ == realBufferEnd) |
| 2698 { |
| 2699 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); |
| 2700 return FALSE; |
| 2701 } |
| 2702 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); |
| 2703 |
| 2704 |
| 2705 status = U_ZERO_ERROR; |
| 2706 |
| 2707 ucnv_fromUnicode (conv, |
| 2708 (char **)&targ, |
| 2709 (const char *)end, |
| 2710 &src, |
| 2711 sourceLimit, |
| 2712 checkOffsets ? offs : NULL, |
| 2713 doFlush, /* flush if we're at the end of the input data */ |
| 2714 &status); |
| 2715 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); |
| 2716 |
| 2717 |
| 2718 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ |
| 2719 UChar errChars[50]; /* should be sufficient */ |
| 2720 int8_t errLen = 50; |
| 2721 UErrorCode err = U_ZERO_ERROR; |
| 2722 const UChar* limit= NULL; |
| 2723 const UChar* start= NULL; |
| 2724 ucnv_getInvalidUChars(conv,errChars, &errLen, &err); |
| 2725 if(U_FAILURE(err)){ |
| 2726 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName
(err)); |
| 2727 } |
| 2728 /* src points to limit of invalid chars */ |
| 2729 limit = src; |
| 2730 /* length of in invalid chars should be equal to returned length*/ |
| 2731 start = src - errLen; |
| 2732 if(u_strncmp(errChars,start,errLen)!=0){ |
| 2733 log_err("ucnv_getInvalidUChars did not return the correct invalid ch
ars for encoding %s \n", ucnv_getName(conv,&err)); |
| 2734 } |
| 2735 } |
| 2736 /* allow failure codes for the stop callback */ |
| 2737 if(U_FAILURE(status) && |
| 2738 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND
&& status != U_ILLEGAL_CHAR_FOUND))) |
| 2739 { |
| 2740 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); |
| 2741 return FALSE; |
| 2742 } |
| 2743 |
| 2744 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", |
| 2745 sourceLen, targ-junkout); |
| 2746 if(getTestOption(VERBOSITY_OPTION)) |
| 2747 { |
| 2748 |
| 2749 junk[0] = 0; |
| 2750 offset_str[0] = 0; |
| 2751 for(p = junkout;p<targ;p++) |
| 2752 { |
| 2753 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); |
| 2754 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); |
| 2755 } |
| 2756 |
| 2757 log_verbose(junk); |
| 2758 printSeq(expect, expectLen); |
| 2759 if ( checkOffsets ) |
| 2760 { |
| 2761 log_verbose("\nOffsets:"); |
| 2762 log_verbose(offset_str); |
| 2763 } |
| 2764 log_verbose("\n"); |
| 2765 } |
| 2766 ucnv_close(conv); |
| 2767 |
| 2768 |
| 2769 if(expectLen != targ-junkout) |
| 2770 { |
| 2771 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); |
| 2772 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); |
| 2773 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
| 2774 printSeqErr(expect, expectLen); |
| 2775 return FALSE; |
| 2776 } |
| 2777 |
| 2778 if (checkOffsets && (expectOffsets != 0) ) |
| 2779 { |
| 2780 log_verbose("comparing %d offsets..\n", targ-junkout); |
| 2781 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ |
| 2782 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); |
| 2783 log_err("Got Output : "); |
| 2784 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
| 2785 log_err("Got Offsets: "); |
| 2786 for(p=junkout;p<targ;p++) |
| 2787 log_err("%d,", junokout[p-junkout]); |
| 2788 log_err("\n"); |
| 2789 log_err("Expected Offsets: "); |
| 2790 for(i=0; i<(targ-junkout); i++) |
| 2791 log_err("%d,", expectOffsets[i]); |
| 2792 log_err("\n"); |
| 2793 return FALSE; |
| 2794 } |
| 2795 } |
| 2796 |
| 2797 if(!memcmp(junkout, expect, expectLen)) |
| 2798 { |
| 2799 log_verbose("String matches! %s\n", gNuConvTestName); |
| 2800 return TRUE; |
| 2801 } |
| 2802 else |
| 2803 { |
| 2804 log_err("String does not match. %s\n", gNuConvTestName); |
| 2805 log_err("source: "); |
| 2806 printUSeqErr(source, sourceLen); |
| 2807 log_err("Got: "); |
| 2808 printSeqErr((const uint8_t *)junkout, expectLen); |
| 2809 log_err("Expected: "); |
| 2810 printSeqErr(expect, expectLen); |
| 2811 return FALSE; |
| 2812 } |
| 2813 } |
| 2814 |
| 2815 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *e
xpect, int expectlen, |
| 2816 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, |
| 2817 const char *mySubChar, int8_t len) |
| 2818 { |
| 2819 UErrorCode status = U_ZERO_ERROR; |
| 2820 UConverter *conv = 0; |
| 2821 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 2822 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 2823 const char *src; |
| 2824 const char *realSourceEnd; |
| 2825 const char *srcLimit; |
| 2826 UChar *targ; |
| 2827 UChar *end; |
| 2828 int32_t *offs; |
| 2829 int i; |
| 2830 UBool checkOffsets = TRUE; |
| 2831 char junk[9999]; |
| 2832 char offset_str[9999]; |
| 2833 UChar *p; |
| 2834 UConverterToUCallback oldAction = NULL; |
| 2835 const void* oldContext = NULL; |
| 2836 |
| 2837 int32_t realBufferSize; |
| 2838 UChar *realBufferEnd; |
| 2839 |
| 2840 |
| 2841 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 2842 junkout[i] = 0xFFFE; |
| 2843 |
| 2844 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 2845 junokout[i] = -1; |
| 2846 |
| 2847 setNuConvTestName(codepage, "TO"); |
| 2848 |
| 2849 log_verbose("\n========= %s\n", gNuConvTestName); |
| 2850 |
| 2851 conv = ucnv_open(codepage, &status); |
| 2852 if(U_FAILURE(status)) |
| 2853 { |
| 2854 log_data_err("Couldn't open converter %s\n",gNuConvTestName); |
| 2855 return TRUE; |
| 2856 } |
| 2857 |
| 2858 log_verbose("Converter opened..\n"); |
| 2859 |
| 2860 src = (const char *)source; |
| 2861 targ = junkout; |
| 2862 offs = junokout; |
| 2863 |
| 2864 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 2865 realBufferEnd = junkout + realBufferSize; |
| 2866 realSourceEnd = src + sourcelen; |
| 2867 /*----setting the callback routine----*/ |
| 2868 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status)
; |
| 2869 if (U_FAILURE(status)) |
| 2870 { |
| 2871 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); |
| 2872 } |
| 2873 /*-------------------------------------*/ |
| 2874 /*setting the subChar*/ |
| 2875 if(mySubChar != NULL){ |
| 2876 ucnv_setSubstChars(conv, mySubChar, len, &status); |
| 2877 if (U_FAILURE(status)) { |
| 2878 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); |
| 2879 } |
| 2880 } |
| 2881 /*------------*/ |
| 2882 |
| 2883 |
| 2884 if ( gOutBufferSize != realBufferSize ) |
| 2885 checkOffsets = FALSE; |
| 2886 |
| 2887 if( gInBufferSize != NEW_MAX_BUFFER ) |
| 2888 checkOffsets = FALSE; |
| 2889 |
| 2890 do |
| 2891 { |
| 2892 end = nct_min( targ + gOutBufferSize, realBufferEnd); |
| 2893 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); |
| 2894 |
| 2895 if(targ == realBufferEnd) |
| 2896 { |
| 2897 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); |
| 2898 return FALSE; |
| 2899 } |
| 2900 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); |
| 2901 |
| 2902 |
| 2903 |
| 2904 status = U_ZERO_ERROR; |
| 2905 |
| 2906 ucnv_toUnicode (conv, |
| 2907 &targ, |
| 2908 end, |
| 2909 (const char **)&src, |
| 2910 (const char *)srcLimit, |
| 2911 checkOffsets ? offs : NULL, |
| 2912 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ |
| 2913 &status); |
| 2914 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ |
| 2915 |
| 2916 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){ |
| 2917 char errChars[50]; /* should be sufficient */ |
| 2918 int8_t errLen = 50; |
| 2919 UErrorCode err = U_ZERO_ERROR; |
| 2920 const char* limit= NULL; |
| 2921 const char* start= NULL; |
| 2922 ucnv_getInvalidChars(conv,errChars, &errLen, &err); |
| 2923 if(U_FAILURE(err)){ |
| 2924 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(
err)); |
| 2925 } |
| 2926 /* src points to limit of invalid chars */ |
| 2927 limit = src; |
| 2928 /* length of in invalid chars should be equal to returned length*/ |
| 2929 start = src - errLen; |
| 2930 if(uprv_strncmp(errChars,start,errLen)!=0){ |
| 2931 log_err("ucnv_getInvalidChars did not return the correct invalid cha
rs for encoding %s \n", ucnv_getName(conv,&err)); |
| 2932 } |
| 2933 } |
| 2934 /* allow failure codes for the stop callback */ |
| 2935 if(U_FAILURE(status) && |
| 2936 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND &
& status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND))) |
| 2937 { |
| 2938 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); |
| 2939 return FALSE; |
| 2940 } |
| 2941 |
| 2942 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", |
| 2943 sourcelen, targ-junkout); |
| 2944 if(getTestOption(VERBOSITY_OPTION)) |
| 2945 { |
| 2946 |
| 2947 junk[0] = 0; |
| 2948 offset_str[0] = 0; |
| 2949 |
| 2950 for(p = junkout;p<targ;p++) |
| 2951 { |
| 2952 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); |
| 2953 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); |
| 2954 } |
| 2955 |
| 2956 log_verbose(junk); |
| 2957 printUSeq(expect, expectlen); |
| 2958 if ( checkOffsets ) |
| 2959 { |
| 2960 log_verbose("\nOffsets:"); |
| 2961 log_verbose(offset_str); |
| 2962 } |
| 2963 log_verbose("\n"); |
| 2964 } |
| 2965 ucnv_close(conv); |
| 2966 |
| 2967 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); |
| 2968 |
| 2969 if (checkOffsets && (expectOffsets != 0)) |
| 2970 { |
| 2971 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) |
| 2972 { |
| 2973 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); |
| 2974 log_err("Got offsets: "); |
| 2975 for(p=junkout;p<targ;p++) |
| 2976 log_err(" %2d,", junokout[p-junkout]); |
| 2977 log_err("\n"); |
| 2978 log_err("Expected offsets: "); |
| 2979 for(i=0; i<(targ-junkout); i++) |
| 2980 log_err(" %2d,", expectOffsets[i]); |
| 2981 log_err("\n"); |
| 2982 log_err("Got output: "); |
| 2983 for(i=0; i<(targ-junkout); i++) |
| 2984 log_err("0x%04x,", junkout[i]); |
| 2985 log_err("\n"); |
| 2986 log_err("From source: "); |
| 2987 for(i=0; i<(src-(const char *)source); i++) |
| 2988 log_err(" 0x%02x,", (unsigned char)source[i]); |
| 2989 log_err("\n"); |
| 2990 } |
| 2991 } |
| 2992 |
| 2993 if(!memcmp(junkout, expect, expectlen*2)) |
| 2994 { |
| 2995 log_verbose("Matches!\n"); |
| 2996 return TRUE; |
| 2997 } |
| 2998 else |
| 2999 { |
| 3000 log_err("String does not match. %s\n", gNuConvTestName); |
| 3001 log_verbose("String does not match. %s\n", gNuConvTestName); |
| 3002 log_err("Got: "); |
| 3003 printUSeqErr(junkout, expectlen); |
| 3004 log_err("Expected: "); |
| 3005 printUSeqErr(expect, expectlen); |
| 3006 log_err("\n"); |
| 3007 return FALSE; |
| 3008 } |
| 3009 } |
| 3010 |
| 3011 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, con
st uint8_t *expect, int expectLen, |
| 3012 const char *codepage, UConverterFromUCallback callback , const i
nt32_t *expectOffsets, |
| 3013 const char *mySubChar, int8_t len, const void* context, UErrorCo
de expectedError) |
| 3014 { |
| 3015 |
| 3016 |
| 3017 UErrorCode status = U_ZERO_ERROR; |
| 3018 UConverter *conv = 0; |
| 3019 char junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 3020 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 3021 const UChar *src; |
| 3022 char *end; |
| 3023 char *targ; |
| 3024 int32_t *offs; |
| 3025 int i; |
| 3026 int32_t realBufferSize; |
| 3027 char *realBufferEnd; |
| 3028 const UChar *realSourceEnd; |
| 3029 const UChar *sourceLimit; |
| 3030 UBool checkOffsets = TRUE; |
| 3031 UBool doFlush; |
| 3032 char junk[9999]; |
| 3033 char offset_str[9999]; |
| 3034 char *p; |
| 3035 UConverterFromUCallback oldAction = NULL; |
| 3036 const void* oldContext = NULL; |
| 3037 |
| 3038 |
| 3039 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 3040 junkout[i] = (char)0xF0; |
| 3041 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 3042 junokout[i] = 0xFF; |
| 3043 setNuConvTestName(codepage, "FROM"); |
| 3044 |
| 3045 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer=
%d\n", codepage, gInBufferSize, |
| 3046 gOutBufferSize); |
| 3047 |
| 3048 conv = ucnv_open(codepage, &status); |
| 3049 if(U_FAILURE(status)) |
| 3050 { |
| 3051 log_data_err("Couldn't open converter %s\n",codepage); |
| 3052 return TRUE; /* Because the err has already been logged. */ |
| 3053 } |
| 3054 |
| 3055 log_verbose("Converter opened..\n"); |
| 3056 |
| 3057 /*----setting the callback routine----*/ |
| 3058 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &st
atus); |
| 3059 if (U_FAILURE(status)) |
| 3060 { |
| 3061 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); |
| 3062 } |
| 3063 /*------------------------*/ |
| 3064 /*setting the subChar*/ |
| 3065 if(mySubChar != NULL){ |
| 3066 ucnv_setSubstChars(conv, mySubChar, len, &status); |
| 3067 if (U_FAILURE(status)) { |
| 3068 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(s
tatus)); |
| 3069 } |
| 3070 } |
| 3071 /*------------*/ |
| 3072 |
| 3073 src = source; |
| 3074 targ = junkout; |
| 3075 offs = junokout; |
| 3076 |
| 3077 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 3078 realBufferEnd = junkout + realBufferSize; |
| 3079 realSourceEnd = source + sourceLen; |
| 3080 |
| 3081 if ( gOutBufferSize != realBufferSize ) |
| 3082 checkOffsets = FALSE; |
| 3083 |
| 3084 if( gInBufferSize != NEW_MAX_BUFFER ) |
| 3085 checkOffsets = FALSE; |
| 3086 |
| 3087 do |
| 3088 { |
| 3089 end = nct_min(targ + gOutBufferSize, realBufferEnd); |
| 3090 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); |
| 3091 |
| 3092 doFlush = (UBool)(sourceLimit == realSourceEnd); |
| 3093 |
| 3094 if(targ == realBufferEnd) |
| 3095 { |
| 3096 log_err("Error, overflowed the real buffer while about to call fromU
nicode! targ=%08lx %s", targ, gNuConvTestName); |
| 3097 return FALSE; |
| 3098 } |
| 3099 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx
to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); |
| 3100 |
| 3101 |
| 3102 status = U_ZERO_ERROR; |
| 3103 |
| 3104 ucnv_fromUnicode (conv, |
| 3105 (char **)&targ, |
| 3106 (const char *)end, |
| 3107 &src, |
| 3108 sourceLimit, |
| 3109 checkOffsets ? offs : NULL, |
| 3110 doFlush, /* flush if we're at the end of the input data */ |
| 3111 &status); |
| 3112 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sour
ceLimit < realSourceEnd)) ); |
| 3113 |
| 3114 /* allow failure codes for the stop callback */ |
| 3115 if(U_FAILURE(status) && status != expectedError) |
| 3116 { |
| 3117 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); |
| 3118 return FALSE; |
| 3119 } |
| 3120 |
| 3121 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", |
| 3122 sourceLen, targ-junkout); |
| 3123 if(getTestOption(VERBOSITY_OPTION)) |
| 3124 { |
| 3125 |
| 3126 junk[0] = 0; |
| 3127 offset_str[0] = 0; |
| 3128 for(p = junkout;p<targ;p++) |
| 3129 { |
| 3130 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); |
| 3131 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsig
ned int)junokout[p-junkout]); |
| 3132 } |
| 3133 |
| 3134 log_verbose(junk); |
| 3135 printSeq(expect, expectLen); |
| 3136 if ( checkOffsets ) |
| 3137 { |
| 3138 log_verbose("\nOffsets:"); |
| 3139 log_verbose(offset_str); |
| 3140 } |
| 3141 log_verbose("\n"); |
| 3142 } |
| 3143 ucnv_close(conv); |
| 3144 |
| 3145 |
| 3146 if(expectLen != targ-junkout) |
| 3147 { |
| 3148 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, g
NuConvTestName); |
| 3149 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkou
t, gNuConvTestName); |
| 3150 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
| 3151 printSeqErr(expect, expectLen); |
| 3152 return FALSE; |
| 3153 } |
| 3154 |
| 3155 if (checkOffsets && (expectOffsets != 0) ) |
| 3156 { |
| 3157 log_verbose("comparing %d offsets..\n", targ-junkout); |
| 3158 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ |
| 3159 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); |
| 3160 log_err("Got Output : "); |
| 3161 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout)); |
| 3162 log_err("Got Offsets: "); |
| 3163 for(p=junkout;p<targ;p++) |
| 3164 log_err("%d,", junokout[p-junkout]); |
| 3165 log_err("\n"); |
| 3166 log_err("Expected Offsets: "); |
| 3167 for(i=0; i<(targ-junkout); i++) |
| 3168 log_err("%d,", expectOffsets[i]); |
| 3169 log_err("\n"); |
| 3170 return FALSE; |
| 3171 } |
| 3172 } |
| 3173 |
| 3174 if(!memcmp(junkout, expect, expectLen)) |
| 3175 { |
| 3176 log_verbose("String matches! %s\n", gNuConvTestName); |
| 3177 return TRUE; |
| 3178 } |
| 3179 else |
| 3180 { |
| 3181 log_err("String does not match. %s\n", gNuConvTestName); |
| 3182 log_err("source: "); |
| 3183 printUSeqErr(source, sourceLen); |
| 3184 log_err("Got: "); |
| 3185 printSeqErr((const uint8_t *)junkout, expectLen); |
| 3186 log_err("Expected: "); |
| 3187 printSeqErr(expect, expectLen); |
| 3188 return FALSE; |
| 3189 } |
| 3190 } |
| 3191 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, con
st UChar *expect, int expectlen, |
| 3192 const char *codepage, UConverterToUCallback callback, const int32
_t *expectOffsets, |
| 3193 const char *mySubChar, int8_t len, const void* context, UErrorCod
e expectedError) |
| 3194 { |
| 3195 UErrorCode status = U_ZERO_ERROR; |
| 3196 UConverter *conv = 0; |
| 3197 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ |
| 3198 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ |
| 3199 const char *src; |
| 3200 const char *realSourceEnd; |
| 3201 const char *srcLimit; |
| 3202 UChar *targ; |
| 3203 UChar *end; |
| 3204 int32_t *offs; |
| 3205 int i; |
| 3206 UBool checkOffsets = TRUE; |
| 3207 char junk[9999]; |
| 3208 char offset_str[9999]; |
| 3209 UChar *p; |
| 3210 UConverterToUCallback oldAction = NULL; |
| 3211 const void* oldContext = NULL; |
| 3212 |
| 3213 int32_t realBufferSize; |
| 3214 UChar *realBufferEnd; |
| 3215 |
| 3216 |
| 3217 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 3218 junkout[i] = 0xFFFE; |
| 3219 |
| 3220 for(i=0;i<NEW_MAX_BUFFER;i++) |
| 3221 junokout[i] = -1; |
| 3222 |
| 3223 setNuConvTestName(codepage, "TO"); |
| 3224 |
| 3225 log_verbose("\n========= %s\n", gNuConvTestName); |
| 3226 |
| 3227 conv = ucnv_open(codepage, &status); |
| 3228 if(U_FAILURE(status)) |
| 3229 { |
| 3230 log_data_err("Couldn't open converter %s\n",gNuConvTestName); |
| 3231 return TRUE; |
| 3232 } |
| 3233 |
| 3234 log_verbose("Converter opened..\n"); |
| 3235 |
| 3236 src = (const char *)source; |
| 3237 targ = junkout; |
| 3238 offs = junokout; |
| 3239 |
| 3240 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); |
| 3241 realBufferEnd = junkout + realBufferSize; |
| 3242 realSourceEnd = src + sourcelen; |
| 3243 /*----setting the callback routine----*/ |
| 3244 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &stat
us); |
| 3245 if (U_FAILURE(status)) |
| 3246 { |
| 3247 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(st
atus)); |
| 3248 } |
| 3249 /*-------------------------------------*/ |
| 3250 /*setting the subChar*/ |
| 3251 if(mySubChar != NULL){ |
| 3252 ucnv_setSubstChars(conv, mySubChar, len, &status); |
| 3253 if (U_FAILURE(status)) { |
| 3254 log_err("FAILURE in setting the callback Function! %s\n", myErrorNam
e(status)); |
| 3255 } |
| 3256 } |
| 3257 /*------------*/ |
| 3258 |
| 3259 |
| 3260 if ( gOutBufferSize != realBufferSize ) |
| 3261 checkOffsets = FALSE; |
| 3262 |
| 3263 if( gInBufferSize != NEW_MAX_BUFFER ) |
| 3264 checkOffsets = FALSE; |
| 3265 |
| 3266 do |
| 3267 { |
| 3268 end = nct_min( targ + gOutBufferSize, realBufferEnd); |
| 3269 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); |
| 3270 |
| 3271 if(targ == realBufferEnd) |
| 3272 { |
| 3273 log_err("Error, the end would overflow the real output buffer while
about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); |
| 3274 return FALSE; |
| 3275 } |
| 3276 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); |
| 3277 |
| 3278 |
| 3279 |
| 3280 status = U_ZERO_ERROR; |
| 3281 |
| 3282 ucnv_toUnicode (conv, |
| 3283 &targ, |
| 3284 end, |
| 3285 (const char **)&src, |
| 3286 (const char *)srcLimit, |
| 3287 checkOffsets ? offs : NULL, |
| 3288 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end
of the source data */ |
| 3289 &status); |
| 3290 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcL
imit < realSourceEnd)) ); /* while we just need another buffer */ |
| 3291 |
| 3292 /* allow failure codes for the stop callback */ |
| 3293 if(U_FAILURE(status) && status!=expectedError) |
| 3294 { |
| 3295 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status),
gNuConvTestName); |
| 3296 return FALSE; |
| 3297 } |
| 3298 |
| 3299 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", |
| 3300 sourcelen, targ-junkout); |
| 3301 if(getTestOption(VERBOSITY_OPTION)) |
| 3302 { |
| 3303 |
| 3304 junk[0] = 0; |
| 3305 offset_str[0] = 0; |
| 3306 |
| 3307 for(p = junkout;p<targ;p++) |
| 3308 { |
| 3309 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p
); |
| 3310 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (uns
igned int)junokout[p-junkout]); |
| 3311 } |
| 3312 |
| 3313 log_verbose(junk); |
| 3314 printUSeq(expect, expectlen); |
| 3315 if ( checkOffsets ) |
| 3316 { |
| 3317 log_verbose("\nOffsets:"); |
| 3318 log_verbose(offset_str); |
| 3319 } |
| 3320 log_verbose("\n"); |
| 3321 } |
| 3322 ucnv_close(conv); |
| 3323 |
| 3324 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); |
| 3325 |
| 3326 if (checkOffsets && (expectOffsets != 0)) |
| 3327 { |
| 3328 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) |
| 3329 { |
| 3330 log_err("did not get the expected offsets while %s \n", gNuConvTestN
ame); |
| 3331 log_err("Got offsets: "); |
| 3332 for(p=junkout;p<targ;p++) |
| 3333 log_err(" %2d,", junokout[p-junkout]); |
| 3334 log_err("\n"); |
| 3335 log_err("Expected offsets: "); |
| 3336 for(i=0; i<(targ-junkout); i++) |
| 3337 log_err(" %2d,", expectOffsets[i]); |
| 3338 log_err("\n"); |
| 3339 log_err("Got output: "); |
| 3340 for(i=0; i<(targ-junkout); i++) |
| 3341 log_err("0x%04x,", junkout[i]); |
| 3342 log_err("\n"); |
| 3343 log_err("From source: "); |
| 3344 for(i=0; i<(src-(const char *)source); i++) |
| 3345 log_err(" 0x%02x,", (unsigned char)source[i]); |
| 3346 log_err("\n"); |
| 3347 } |
| 3348 } |
| 3349 |
| 3350 if(!memcmp(junkout, expect, expectlen*2)) |
| 3351 { |
| 3352 log_verbose("Matches!\n"); |
| 3353 return TRUE; |
| 3354 } |
| 3355 else |
| 3356 { |
| 3357 log_err("String does not match. %s\n", gNuConvTestName); |
| 3358 log_verbose("String does not match. %s\n", gNuConvTestName); |
| 3359 log_err("Got: "); |
| 3360 printUSeqErr(junkout, expectlen); |
| 3361 log_err("Expected: "); |
| 3362 printUSeqErr(expect, expectlen); |
| 3363 log_err("\n"); |
| 3364 return FALSE; |
| 3365 } |
| 3366 } |
| 3367 |
| 3368 static void TestCallBackFailure(void) { |
| 3369 UErrorCode status = U_USELESS_COLLATOR_ERROR; |
| 3370 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status); |
| 3371 if (status != U_USELESS_COLLATOR_ERROR) { |
| 3372 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad
UErrorCode\n"); |
| 3373 } |
| 3374 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status); |
| 3375 if (status != U_USELESS_COLLATOR_ERROR) { |
| 3376 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad
UErrorCode\n"); |
| 3377 } |
| 3378 ucnv_cbFromUWriteSub(NULL, -1, &status); |
| 3379 if (status != U_USELESS_COLLATOR_ERROR) { |
| 3380 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UE
rrorCode\n"); |
| 3381 } |
| 3382 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status); |
| 3383 if (status != U_USELESS_COLLATOR_ERROR) { |
| 3384 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad U
ErrorCode\n"); |
| 3385 } |
| 3386 } |
OLD | NEW |