OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2002-2009, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** |
| 6 * file name: ucnv_u32.c |
| 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) |
| 9 * indentation:4 |
| 10 * |
| 11 * created on: 2002jul01 |
| 12 * created by: Markus W. Scherer |
| 13 * |
| 14 * UTF-32 converter implementation. Used to be in ucnv_utf.c. |
| 15 */ |
| 16 |
| 17 #include "unicode/utypes.h" |
| 18 |
| 19 #if !UCONFIG_NO_CONVERSION |
| 20 |
| 21 #include "unicode/ucnv.h" |
| 22 #include "ucnv_bld.h" |
| 23 #include "ucnv_cnv.h" |
| 24 #include "cmemory.h" |
| 25 |
| 26 #define MAXIMUM_UCS2 0x0000FFFF |
| 27 #define MAXIMUM_UTF 0x0010FFFF |
| 28 #define HALF_SHIFT 10 |
| 29 #define HALF_BASE 0x0010000 |
| 30 #define HALF_MASK 0x3FF |
| 31 #define SURROGATE_HIGH_START 0xD800 |
| 32 #define SURROGATE_LOW_START 0xDC00 |
| 33 |
| 34 /* -SURROGATE_LOW_START + HALF_BASE */ |
| 35 #define SURROGATE_LOW_BASE 9216 |
| 36 |
| 37 enum { |
| 38 UCNV_NEED_TO_WRITE_BOM=1 |
| 39 }; |
| 40 |
| 41 /* UTF-32BE ----------------------------------------------------------------- */ |
| 42 |
| 43 static void |
| 44 T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, |
| 45 UErrorCode * err) |
| 46 { |
| 47 const unsigned char *mySource = (unsigned char *) args->source; |
| 48 UChar *myTarget = args->target; |
| 49 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 50 const UChar *targetLimit = args->targetLimit; |
| 51 unsigned char *toUBytes = args->converter->toUBytes; |
| 52 uint32_t ch, i; |
| 53 |
| 54 /* Restore state of current sequence */ |
| 55 if (args->converter->toUnicodeStatus && myTarget < targetLimit) { |
| 56 i = args->converter->toULength; /* restore # of bytes consumed */ |
| 57 args->converter->toULength = 0; |
| 58 |
| 59 ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calcul
ated ch from a previous call*/ |
| 60 args->converter->toUnicodeStatus = 0; |
| 61 goto morebytes; |
| 62 } |
| 63 |
| 64 while (mySource < sourceLimit && myTarget < targetLimit) { |
| 65 i = 0; |
| 66 ch = 0; |
| 67 morebytes: |
| 68 while (i < sizeof(uint32_t)) { |
| 69 if (mySource < sourceLimit) { |
| 70 ch = (ch << 8) | (uint8_t)(*mySource); |
| 71 toUBytes[i++] = (char) *(mySource++); |
| 72 } |
| 73 else { |
| 74 /* stores a partially calculated target*/ |
| 75 /* + 1 to make 0 a valid character */ |
| 76 args->converter->toUnicodeStatus = ch + 1; |
| 77 args->converter->toULength = (int8_t) i; |
| 78 goto donefornow; |
| 79 } |
| 80 } |
| 81 |
| 82 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
| 83 /* Normal valid byte when the loop has not prematurely terminated (i
< inBytes) */ |
| 84 if (ch <= MAXIMUM_UCS2) |
| 85 { |
| 86 /* fits in 16 bits */ |
| 87 *(myTarget++) = (UChar) ch; |
| 88 } |
| 89 else { |
| 90 /* write out the surrogates */ |
| 91 *(myTarget++) = U16_LEAD(ch); |
| 92 ch = U16_TRAIL(ch); |
| 93 if (myTarget < targetLimit) { |
| 94 *(myTarget++) = (UChar)ch; |
| 95 } |
| 96 else { |
| 97 /* Put in overflow buffer (not handled here) */ |
| 98 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
| 99 args->converter->UCharErrorBufferLength = 1; |
| 100 *err = U_BUFFER_OVERFLOW_ERROR; |
| 101 break; |
| 102 } |
| 103 } |
| 104 } |
| 105 else { |
| 106 args->converter->toULength = (int8_t)i; |
| 107 *err = U_ILLEGAL_CHAR_FOUND; |
| 108 break; |
| 109 } |
| 110 } |
| 111 |
| 112 donefornow: |
| 113 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
| 114 /* End of target buffer */ |
| 115 *err = U_BUFFER_OVERFLOW_ERROR; |
| 116 } |
| 117 |
| 118 args->target = myTarget; |
| 119 args->source = (const char *) mySource; |
| 120 } |
| 121 |
| 122 static void |
| 123 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
| 124 UErrorCode * err) |
| 125 { |
| 126 const unsigned char *mySource = (unsigned char *) args->source; |
| 127 UChar *myTarget = args->target; |
| 128 int32_t *myOffsets = args->offsets; |
| 129 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 130 const UChar *targetLimit = args->targetLimit; |
| 131 unsigned char *toUBytes = args->converter->toUBytes; |
| 132 uint32_t ch, i; |
| 133 int32_t offsetNum = 0; |
| 134 |
| 135 /* Restore state of current sequence */ |
| 136 if (args->converter->toUnicodeStatus && myTarget < targetLimit) { |
| 137 i = args->converter->toULength; /* restore # of bytes consumed */ |
| 138 args->converter->toULength = 0; |
| 139 |
| 140 ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calcul
ated ch from a previous call*/ |
| 141 args->converter->toUnicodeStatus = 0; |
| 142 goto morebytes; |
| 143 } |
| 144 |
| 145 while (mySource < sourceLimit && myTarget < targetLimit) { |
| 146 i = 0; |
| 147 ch = 0; |
| 148 morebytes: |
| 149 while (i < sizeof(uint32_t)) { |
| 150 if (mySource < sourceLimit) { |
| 151 ch = (ch << 8) | (uint8_t)(*mySource); |
| 152 toUBytes[i++] = (char) *(mySource++); |
| 153 } |
| 154 else { |
| 155 /* stores a partially calculated target*/ |
| 156 /* + 1 to make 0 a valid character */ |
| 157 args->converter->toUnicodeStatus = ch + 1; |
| 158 args->converter->toULength = (int8_t) i; |
| 159 goto donefornow; |
| 160 } |
| 161 } |
| 162 |
| 163 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
| 164 /* Normal valid byte when the loop has not prematurely terminated (i
< inBytes) */ |
| 165 if (ch <= MAXIMUM_UCS2) { |
| 166 /* fits in 16 bits */ |
| 167 *(myTarget++) = (UChar) ch; |
| 168 *(myOffsets++) = offsetNum; |
| 169 } |
| 170 else { |
| 171 /* write out the surrogates */ |
| 172 *(myTarget++) = U16_LEAD(ch); |
| 173 *myOffsets++ = offsetNum; |
| 174 ch = U16_TRAIL(ch); |
| 175 if (myTarget < targetLimit) |
| 176 { |
| 177 *(myTarget++) = (UChar)ch; |
| 178 *(myOffsets++) = offsetNum; |
| 179 } |
| 180 else { |
| 181 /* Put in overflow buffer (not handled here) */ |
| 182 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
| 183 args->converter->UCharErrorBufferLength = 1; |
| 184 *err = U_BUFFER_OVERFLOW_ERROR; |
| 185 break; |
| 186 } |
| 187 } |
| 188 } |
| 189 else { |
| 190 args->converter->toULength = (int8_t)i; |
| 191 *err = U_ILLEGAL_CHAR_FOUND; |
| 192 break; |
| 193 } |
| 194 offsetNum += i; |
| 195 } |
| 196 |
| 197 donefornow: |
| 198 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
| 199 { |
| 200 /* End of target buffer */ |
| 201 *err = U_BUFFER_OVERFLOW_ERROR; |
| 202 } |
| 203 |
| 204 args->target = myTarget; |
| 205 args->source = (const char *) mySource; |
| 206 args->offsets = myOffsets; |
| 207 } |
| 208 |
| 209 static void |
| 210 T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, |
| 211 UErrorCode * err) |
| 212 { |
| 213 const UChar *mySource = args->source; |
| 214 unsigned char *myTarget; |
| 215 const UChar *sourceLimit = args->sourceLimit; |
| 216 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
| 217 UChar32 ch, ch2; |
| 218 unsigned int indexToWrite; |
| 219 unsigned char temp[sizeof(uint32_t)]; |
| 220 |
| 221 if(mySource >= sourceLimit) { |
| 222 /* no input, nothing to do */ |
| 223 return; |
| 224 } |
| 225 |
| 226 /* write the BOM if necessary */ |
| 227 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
| 228 static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; |
| 229 ucnv_fromUWriteBytes(args->converter, |
| 230 bom, 4, |
| 231 &args->target, args->targetLimit, |
| 232 &args->offsets, -1, |
| 233 err); |
| 234 args->converter->fromUnicodeStatus=0; |
| 235 } |
| 236 |
| 237 myTarget = (unsigned char *) args->target; |
| 238 temp[0] = 0; |
| 239 |
| 240 if (args->converter->fromUChar32) { |
| 241 ch = args->converter->fromUChar32; |
| 242 args->converter->fromUChar32 = 0; |
| 243 goto lowsurogate; |
| 244 } |
| 245 |
| 246 while (mySource < sourceLimit && myTarget < targetLimit) { |
| 247 ch = *(mySource++); |
| 248 |
| 249 if (UTF_IS_SURROGATE(ch)) { |
| 250 if (U_IS_LEAD(ch)) { |
| 251 lowsurogate: |
| 252 if (mySource < sourceLimit) { |
| 253 ch2 = *mySource; |
| 254 if (U_IS_TRAIL(ch2)) { |
| 255 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 +
SURROGATE_LOW_BASE; |
| 256 mySource++; |
| 257 } |
| 258 else { |
| 259 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 260 /* callback(illegal) */ |
| 261 args->converter->fromUChar32 = ch; |
| 262 *err = U_ILLEGAL_CHAR_FOUND; |
| 263 break; |
| 264 } |
| 265 } |
| 266 else { |
| 267 /* ran out of source */ |
| 268 args->converter->fromUChar32 = ch; |
| 269 if (args->flush) { |
| 270 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 271 /* callback(illegal) */ |
| 272 *err = U_ILLEGAL_CHAR_FOUND; |
| 273 } |
| 274 break; |
| 275 } |
| 276 } |
| 277 else { |
| 278 /* this is an unmatched trail code unit (2nd surrogate) */ |
| 279 /* callback(illegal) */ |
| 280 args->converter->fromUChar32 = ch; |
| 281 *err = U_ILLEGAL_CHAR_FOUND; |
| 282 break; |
| 283 } |
| 284 } |
| 285 |
| 286 /* We cannot get any larger than 10FFFF because we are coming from UTF-1
6 */ |
| 287 temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
| 288 temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & F
F) */ |
| 289 temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & F
F) */ |
| 290 |
| 291 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrit
e++) { |
| 292 if (myTarget < targetLimit) { |
| 293 *(myTarget++) = temp[indexToWrite]; |
| 294 } |
| 295 else { |
| 296 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = temp[indexToWrite]; |
| 297 *err = U_BUFFER_OVERFLOW_ERROR; |
| 298 } |
| 299 } |
| 300 } |
| 301 |
| 302 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
| 303 *err = U_BUFFER_OVERFLOW_ERROR; |
| 304 } |
| 305 |
| 306 args->target = (char *) myTarget; |
| 307 args->source = mySource; |
| 308 } |
| 309 |
| 310 static void |
| 311 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
| 312 UErrorCode * err) |
| 313 { |
| 314 const UChar *mySource = args->source; |
| 315 unsigned char *myTarget; |
| 316 int32_t *myOffsets; |
| 317 const UChar *sourceLimit = args->sourceLimit; |
| 318 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
| 319 UChar32 ch, ch2; |
| 320 int32_t offsetNum = 0; |
| 321 unsigned int indexToWrite; |
| 322 unsigned char temp[sizeof(uint32_t)]; |
| 323 |
| 324 if(mySource >= sourceLimit) { |
| 325 /* no input, nothing to do */ |
| 326 return; |
| 327 } |
| 328 |
| 329 /* write the BOM if necessary */ |
| 330 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
| 331 static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; |
| 332 ucnv_fromUWriteBytes(args->converter, |
| 333 bom, 4, |
| 334 &args->target, args->targetLimit, |
| 335 &args->offsets, -1, |
| 336 err); |
| 337 args->converter->fromUnicodeStatus=0; |
| 338 } |
| 339 |
| 340 myTarget = (unsigned char *) args->target; |
| 341 myOffsets = args->offsets; |
| 342 temp[0] = 0; |
| 343 |
| 344 if (args->converter->fromUChar32) { |
| 345 ch = args->converter->fromUChar32; |
| 346 args->converter->fromUChar32 = 0; |
| 347 goto lowsurogate; |
| 348 } |
| 349 |
| 350 while (mySource < sourceLimit && myTarget < targetLimit) { |
| 351 ch = *(mySource++); |
| 352 |
| 353 if (UTF_IS_SURROGATE(ch)) { |
| 354 if (U_IS_LEAD(ch)) { |
| 355 lowsurogate: |
| 356 if (mySource < sourceLimit) { |
| 357 ch2 = *mySource; |
| 358 if (U_IS_TRAIL(ch2)) { |
| 359 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 +
SURROGATE_LOW_BASE; |
| 360 mySource++; |
| 361 } |
| 362 else { |
| 363 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 364 /* callback(illegal) */ |
| 365 args->converter->fromUChar32 = ch; |
| 366 *err = U_ILLEGAL_CHAR_FOUND; |
| 367 break; |
| 368 } |
| 369 } |
| 370 else { |
| 371 /* ran out of source */ |
| 372 args->converter->fromUChar32 = ch; |
| 373 if (args->flush) { |
| 374 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 375 /* callback(illegal) */ |
| 376 *err = U_ILLEGAL_CHAR_FOUND; |
| 377 } |
| 378 break; |
| 379 } |
| 380 } |
| 381 else { |
| 382 /* this is an unmatched trail code unit (2nd surrogate) */ |
| 383 /* callback(illegal) */ |
| 384 args->converter->fromUChar32 = ch; |
| 385 *err = U_ILLEGAL_CHAR_FOUND; |
| 386 break; |
| 387 } |
| 388 } |
| 389 |
| 390 /* We cannot get any larger than 10FFFF because we are coming from UTF-1
6 */ |
| 391 temp[1] = (uint8_t) (ch >> 16 & 0x1F); |
| 392 temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & F
F) */ |
| 393 temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & F
F) */ |
| 394 |
| 395 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrit
e++) { |
| 396 if (myTarget < targetLimit) { |
| 397 *(myTarget++) = temp[indexToWrite]; |
| 398 *(myOffsets++) = offsetNum; |
| 399 } |
| 400 else { |
| 401 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = temp[indexToWrite]; |
| 402 *err = U_BUFFER_OVERFLOW_ERROR; |
| 403 } |
| 404 } |
| 405 offsetNum = offsetNum + 1 + (temp[1] != 0); |
| 406 } |
| 407 |
| 408 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { |
| 409 *err = U_BUFFER_OVERFLOW_ERROR; |
| 410 } |
| 411 |
| 412 args->target = (char *) myTarget; |
| 413 args->source = mySource; |
| 414 args->offsets = myOffsets; |
| 415 } |
| 416 |
| 417 static UChar32 |
| 418 T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, |
| 419 UErrorCode* err) |
| 420 { |
| 421 const uint8_t *mySource; |
| 422 UChar32 myUChar; |
| 423 int32_t length; |
| 424 |
| 425 mySource = (const uint8_t *)args->source; |
| 426 if (mySource >= (const uint8_t *)args->sourceLimit) |
| 427 { |
| 428 /* no input */ |
| 429 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| 430 return 0xffff; |
| 431 } |
| 432 |
| 433 length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
| 434 if (length < 4) |
| 435 { |
| 436 /* got a partial character */ |
| 437 uprv_memcpy(args->converter->toUBytes, mySource, length); |
| 438 args->converter->toULength = (int8_t)length; |
| 439 args->source = (const char *)(mySource + length); |
| 440 *err = U_TRUNCATED_CHAR_FOUND; |
| 441 return 0xffff; |
| 442 } |
| 443 |
| 444 /* Don't even try to do a direct cast because the value may be on an odd add
ress. */ |
| 445 myUChar = ((UChar32)mySource[0] << 24) |
| 446 | ((UChar32)mySource[1] << 16) |
| 447 | ((UChar32)mySource[2] << 8) |
| 448 | ((UChar32)mySource[3]); |
| 449 |
| 450 args->source = (const char *)(mySource + 4); |
| 451 if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
| 452 return myUChar; |
| 453 } |
| 454 |
| 455 uprv_memcpy(args->converter->toUBytes, mySource, 4); |
| 456 args->converter->toULength = 4; |
| 457 |
| 458 *err = U_ILLEGAL_CHAR_FOUND; |
| 459 return 0xffff; |
| 460 } |
| 461 |
| 462 static const UConverterImpl _UTF32BEImpl = { |
| 463 UCNV_UTF32_BigEndian, |
| 464 |
| 465 NULL, |
| 466 NULL, |
| 467 |
| 468 NULL, |
| 469 NULL, |
| 470 NULL, |
| 471 |
| 472 T_UConverter_toUnicode_UTF32_BE, |
| 473 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, |
| 474 T_UConverter_fromUnicode_UTF32_BE, |
| 475 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
| 476 T_UConverter_getNextUChar_UTF32_BE, |
| 477 |
| 478 NULL, |
| 479 NULL, |
| 480 NULL, |
| 481 NULL, |
| 482 ucnv_getNonSurrogateUnicodeSet |
| 483 }; |
| 484 |
| 485 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32
*/ |
| 486 static const UConverterStaticData _UTF32BEStaticData = { |
| 487 sizeof(UConverterStaticData), |
| 488 "UTF-32BE", |
| 489 1232, |
| 490 UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, |
| 491 { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, |
| 492 0, |
| 493 0, |
| 494 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
| 495 }; |
| 496 |
| 497 const UConverterSharedData _UTF32BEData = { |
| 498 sizeof(UConverterSharedData), ~((uint32_t) 0), |
| 499 NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl, |
| 500 0 |
| 501 }; |
| 502 |
| 503 /* UTF-32LE ---------------------------------------------------------- */ |
| 504 |
| 505 static void |
| 506 T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, |
| 507 UErrorCode * err) |
| 508 { |
| 509 const unsigned char *mySource = (unsigned char *) args->source; |
| 510 UChar *myTarget = args->target; |
| 511 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 512 const UChar *targetLimit = args->targetLimit; |
| 513 unsigned char *toUBytes = args->converter->toUBytes; |
| 514 uint32_t ch, i; |
| 515 |
| 516 /* Restore state of current sequence */ |
| 517 if (args->converter->toUnicodeStatus && myTarget < targetLimit) |
| 518 { |
| 519 i = args->converter->toULength; /* restore # of bytes consumed */ |
| 520 args->converter->toULength = 0; |
| 521 |
| 522 /* Stores the previously calculated ch from a previous call*/ |
| 523 ch = args->converter->toUnicodeStatus - 1; |
| 524 args->converter->toUnicodeStatus = 0; |
| 525 goto morebytes; |
| 526 } |
| 527 |
| 528 while (mySource < sourceLimit && myTarget < targetLimit) |
| 529 { |
| 530 i = 0; |
| 531 ch = 0; |
| 532 morebytes: |
| 533 while (i < sizeof(uint32_t)) |
| 534 { |
| 535 if (mySource < sourceLimit) |
| 536 { |
| 537 ch |= ((uint8_t)(*mySource)) << (i * 8); |
| 538 toUBytes[i++] = (char) *(mySource++); |
| 539 } |
| 540 else |
| 541 { |
| 542 /* stores a partially calculated target*/ |
| 543 /* + 1 to make 0 a valid character */ |
| 544 args->converter->toUnicodeStatus = ch + 1; |
| 545 args->converter->toULength = (int8_t) i; |
| 546 goto donefornow; |
| 547 } |
| 548 } |
| 549 |
| 550 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { |
| 551 /* Normal valid byte when the loop has not prematurely terminated (i
< inBytes) */ |
| 552 if (ch <= MAXIMUM_UCS2) { |
| 553 /* fits in 16 bits */ |
| 554 *(myTarget++) = (UChar) ch; |
| 555 } |
| 556 else { |
| 557 /* write out the surrogates */ |
| 558 *(myTarget++) = U16_LEAD(ch); |
| 559 ch = U16_TRAIL(ch); |
| 560 if (myTarget < targetLimit) { |
| 561 *(myTarget++) = (UChar)ch; |
| 562 } |
| 563 else { |
| 564 /* Put in overflow buffer (not handled here) */ |
| 565 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
| 566 args->converter->UCharErrorBufferLength = 1; |
| 567 *err = U_BUFFER_OVERFLOW_ERROR; |
| 568 break; |
| 569 } |
| 570 } |
| 571 } |
| 572 else { |
| 573 args->converter->toULength = (int8_t)i; |
| 574 *err = U_ILLEGAL_CHAR_FOUND; |
| 575 break; |
| 576 } |
| 577 } |
| 578 |
| 579 donefornow: |
| 580 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
| 581 { |
| 582 /* End of target buffer */ |
| 583 *err = U_BUFFER_OVERFLOW_ERROR; |
| 584 } |
| 585 |
| 586 args->target = myTarget; |
| 587 args->source = (const char *) mySource; |
| 588 } |
| 589 |
| 590 static void |
| 591 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, |
| 592 UErrorCode * err) |
| 593 { |
| 594 const unsigned char *mySource = (unsigned char *) args->source; |
| 595 UChar *myTarget = args->target; |
| 596 int32_t *myOffsets = args->offsets; |
| 597 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 598 const UChar *targetLimit = args->targetLimit; |
| 599 unsigned char *toUBytes = args->converter->toUBytes; |
| 600 uint32_t ch, i; |
| 601 int32_t offsetNum = 0; |
| 602 |
| 603 /* Restore state of current sequence */ |
| 604 if (args->converter->toUnicodeStatus && myTarget < targetLimit) |
| 605 { |
| 606 i = args->converter->toULength; /* restore # of bytes consumed */ |
| 607 args->converter->toULength = 0; |
| 608 |
| 609 /* Stores the previously calculated ch from a previous call*/ |
| 610 ch = args->converter->toUnicodeStatus - 1; |
| 611 args->converter->toUnicodeStatus = 0; |
| 612 goto morebytes; |
| 613 } |
| 614 |
| 615 while (mySource < sourceLimit && myTarget < targetLimit) |
| 616 { |
| 617 i = 0; |
| 618 ch = 0; |
| 619 morebytes: |
| 620 while (i < sizeof(uint32_t)) |
| 621 { |
| 622 if (mySource < sourceLimit) |
| 623 { |
| 624 ch |= ((uint8_t)(*mySource)) << (i * 8); |
| 625 toUBytes[i++] = (char) *(mySource++); |
| 626 } |
| 627 else |
| 628 { |
| 629 /* stores a partially calculated target*/ |
| 630 /* + 1 to make 0 a valid character */ |
| 631 args->converter->toUnicodeStatus = ch + 1; |
| 632 args->converter->toULength = (int8_t) i; |
| 633 goto donefornow; |
| 634 } |
| 635 } |
| 636 |
| 637 if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) |
| 638 { |
| 639 /* Normal valid byte when the loop has not prematurely terminated (i
< inBytes) */ |
| 640 if (ch <= MAXIMUM_UCS2) |
| 641 { |
| 642 /* fits in 16 bits */ |
| 643 *(myTarget++) = (UChar) ch; |
| 644 *(myOffsets++) = offsetNum; |
| 645 } |
| 646 else { |
| 647 /* write out the surrogates */ |
| 648 *(myTarget++) = U16_LEAD(ch); |
| 649 *(myOffsets++) = offsetNum; |
| 650 ch = U16_TRAIL(ch); |
| 651 if (myTarget < targetLimit) |
| 652 { |
| 653 *(myTarget++) = (UChar)ch; |
| 654 *(myOffsets++) = offsetNum; |
| 655 } |
| 656 else |
| 657 { |
| 658 /* Put in overflow buffer (not handled here) */ |
| 659 args->converter->UCharErrorBuffer[0] = (UChar) ch; |
| 660 args->converter->UCharErrorBufferLength = 1; |
| 661 *err = U_BUFFER_OVERFLOW_ERROR; |
| 662 break; |
| 663 } |
| 664 } |
| 665 } |
| 666 else |
| 667 { |
| 668 args->converter->toULength = (int8_t)i; |
| 669 *err = U_ILLEGAL_CHAR_FOUND; |
| 670 break; |
| 671 } |
| 672 offsetNum += i; |
| 673 } |
| 674 |
| 675 donefornow: |
| 676 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
| 677 { |
| 678 /* End of target buffer */ |
| 679 *err = U_BUFFER_OVERFLOW_ERROR; |
| 680 } |
| 681 |
| 682 args->target = myTarget; |
| 683 args->source = (const char *) mySource; |
| 684 args->offsets = myOffsets; |
| 685 } |
| 686 |
| 687 static void |
| 688 T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, |
| 689 UErrorCode * err) |
| 690 { |
| 691 const UChar *mySource = args->source; |
| 692 unsigned char *myTarget; |
| 693 const UChar *sourceLimit = args->sourceLimit; |
| 694 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
| 695 UChar32 ch, ch2; |
| 696 unsigned int indexToWrite; |
| 697 unsigned char temp[sizeof(uint32_t)]; |
| 698 |
| 699 if(mySource >= sourceLimit) { |
| 700 /* no input, nothing to do */ |
| 701 return; |
| 702 } |
| 703 |
| 704 /* write the BOM if necessary */ |
| 705 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
| 706 static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; |
| 707 ucnv_fromUWriteBytes(args->converter, |
| 708 bom, 4, |
| 709 &args->target, args->targetLimit, |
| 710 &args->offsets, -1, |
| 711 err); |
| 712 args->converter->fromUnicodeStatus=0; |
| 713 } |
| 714 |
| 715 myTarget = (unsigned char *) args->target; |
| 716 temp[3] = 0; |
| 717 |
| 718 if (args->converter->fromUChar32) |
| 719 { |
| 720 ch = args->converter->fromUChar32; |
| 721 args->converter->fromUChar32 = 0; |
| 722 goto lowsurogate; |
| 723 } |
| 724 |
| 725 while (mySource < sourceLimit && myTarget < targetLimit) |
| 726 { |
| 727 ch = *(mySource++); |
| 728 |
| 729 if (UTF_IS_SURROGATE(ch)) { |
| 730 if (U_IS_LEAD(ch)) |
| 731 { |
| 732 lowsurogate: |
| 733 if (mySource < sourceLimit) |
| 734 { |
| 735 ch2 = *mySource; |
| 736 if (U_IS_TRAIL(ch2)) { |
| 737 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 +
SURROGATE_LOW_BASE; |
| 738 mySource++; |
| 739 } |
| 740 else { |
| 741 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 742 /* callback(illegal) */ |
| 743 args->converter->fromUChar32 = ch; |
| 744 *err = U_ILLEGAL_CHAR_FOUND; |
| 745 break; |
| 746 } |
| 747 } |
| 748 else { |
| 749 /* ran out of source */ |
| 750 args->converter->fromUChar32 = ch; |
| 751 if (args->flush) { |
| 752 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 753 /* callback(illegal) */ |
| 754 *err = U_ILLEGAL_CHAR_FOUND; |
| 755 } |
| 756 break; |
| 757 } |
| 758 } |
| 759 else { |
| 760 /* this is an unmatched trail code unit (2nd surrogate) */ |
| 761 /* callback(illegal) */ |
| 762 args->converter->fromUChar32 = ch; |
| 763 *err = U_ILLEGAL_CHAR_FOUND; |
| 764 break; |
| 765 } |
| 766 } |
| 767 |
| 768 /* We cannot get any larger than 10FFFF because we are coming from UTF-1
6 */ |
| 769 temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
| 770 temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & F
F) */ |
| 771 temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & F
F) */ |
| 772 |
| 773 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrit
e++) |
| 774 { |
| 775 if (myTarget < targetLimit) |
| 776 { |
| 777 *(myTarget++) = temp[indexToWrite]; |
| 778 } |
| 779 else |
| 780 { |
| 781 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = temp[indexToWrite]; |
| 782 *err = U_BUFFER_OVERFLOW_ERROR; |
| 783 } |
| 784 } |
| 785 } |
| 786 |
| 787 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
| 788 { |
| 789 *err = U_BUFFER_OVERFLOW_ERROR; |
| 790 } |
| 791 |
| 792 args->target = (char *) myTarget; |
| 793 args->source = mySource; |
| 794 } |
| 795 |
| 796 static void |
| 797 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, |
| 798 UErrorCode * err) |
| 799 { |
| 800 const UChar *mySource = args->source; |
| 801 unsigned char *myTarget; |
| 802 int32_t *myOffsets; |
| 803 const UChar *sourceLimit = args->sourceLimit; |
| 804 const unsigned char *targetLimit = (unsigned char *) args->targetLimit; |
| 805 UChar32 ch, ch2; |
| 806 unsigned int indexToWrite; |
| 807 unsigned char temp[sizeof(uint32_t)]; |
| 808 int32_t offsetNum = 0; |
| 809 |
| 810 if(mySource >= sourceLimit) { |
| 811 /* no input, nothing to do */ |
| 812 return; |
| 813 } |
| 814 |
| 815 /* write the BOM if necessary */ |
| 816 if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { |
| 817 static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; |
| 818 ucnv_fromUWriteBytes(args->converter, |
| 819 bom, 4, |
| 820 &args->target, args->targetLimit, |
| 821 &args->offsets, -1, |
| 822 err); |
| 823 args->converter->fromUnicodeStatus=0; |
| 824 } |
| 825 |
| 826 myTarget = (unsigned char *) args->target; |
| 827 myOffsets = args->offsets; |
| 828 temp[3] = 0; |
| 829 |
| 830 if (args->converter->fromUChar32) |
| 831 { |
| 832 ch = args->converter->fromUChar32; |
| 833 args->converter->fromUChar32 = 0; |
| 834 goto lowsurogate; |
| 835 } |
| 836 |
| 837 while (mySource < sourceLimit && myTarget < targetLimit) |
| 838 { |
| 839 ch = *(mySource++); |
| 840 |
| 841 if (UTF_IS_SURROGATE(ch)) { |
| 842 if (U_IS_LEAD(ch)) |
| 843 { |
| 844 lowsurogate: |
| 845 if (mySource < sourceLimit) |
| 846 { |
| 847 ch2 = *mySource; |
| 848 if (U_IS_TRAIL(ch2)) |
| 849 { |
| 850 ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 +
SURROGATE_LOW_BASE; |
| 851 mySource++; |
| 852 } |
| 853 else { |
| 854 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 855 /* callback(illegal) */ |
| 856 args->converter->fromUChar32 = ch; |
| 857 *err = U_ILLEGAL_CHAR_FOUND; |
| 858 break; |
| 859 } |
| 860 } |
| 861 else { |
| 862 /* ran out of source */ |
| 863 args->converter->fromUChar32 = ch; |
| 864 if (args->flush) { |
| 865 /* this is an unmatched trail code unit (2nd surrogate)
*/ |
| 866 /* callback(illegal) */ |
| 867 *err = U_ILLEGAL_CHAR_FOUND; |
| 868 } |
| 869 break; |
| 870 } |
| 871 } |
| 872 else { |
| 873 /* this is an unmatched trail code unit (2nd surrogate) */ |
| 874 /* callback(illegal) */ |
| 875 args->converter->fromUChar32 = ch; |
| 876 *err = U_ILLEGAL_CHAR_FOUND; |
| 877 break; |
| 878 } |
| 879 } |
| 880 |
| 881 /* We cannot get any larger than 10FFFF because we are coming from UTF-1
6 */ |
| 882 temp[2] = (uint8_t) (ch >> 16 & 0x1F); |
| 883 temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & F
F) */ |
| 884 temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & F
F) */ |
| 885 |
| 886 for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrit
e++) |
| 887 { |
| 888 if (myTarget < targetLimit) |
| 889 { |
| 890 *(myTarget++) = temp[indexToWrite]; |
| 891 *(myOffsets++) = offsetNum; |
| 892 } |
| 893 else |
| 894 { |
| 895 args->converter->charErrorBuffer[args->converter->charErrorBuffe
rLength++] = temp[indexToWrite]; |
| 896 *err = U_BUFFER_OVERFLOW_ERROR; |
| 897 } |
| 898 } |
| 899 offsetNum = offsetNum + 1 + (temp[2] != 0); |
| 900 } |
| 901 |
| 902 if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) |
| 903 { |
| 904 *err = U_BUFFER_OVERFLOW_ERROR; |
| 905 } |
| 906 |
| 907 args->target = (char *) myTarget; |
| 908 args->source = mySource; |
| 909 args->offsets = myOffsets; |
| 910 } |
| 911 |
| 912 static UChar32 |
| 913 T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, |
| 914 UErrorCode* err) |
| 915 { |
| 916 const uint8_t *mySource; |
| 917 UChar32 myUChar; |
| 918 int32_t length; |
| 919 |
| 920 mySource = (const uint8_t *)args->source; |
| 921 if (mySource >= (const uint8_t *)args->sourceLimit) |
| 922 { |
| 923 /* no input */ |
| 924 *err = U_INDEX_OUTOFBOUNDS_ERROR; |
| 925 return 0xffff; |
| 926 } |
| 927 |
| 928 length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); |
| 929 if (length < 4) |
| 930 { |
| 931 /* got a partial character */ |
| 932 uprv_memcpy(args->converter->toUBytes, mySource, length); |
| 933 args->converter->toULength = (int8_t)length; |
| 934 args->source = (const char *)(mySource + length); |
| 935 *err = U_TRUNCATED_CHAR_FOUND; |
| 936 return 0xffff; |
| 937 } |
| 938 |
| 939 /* Don't even try to do a direct cast because the value may be on an odd add
ress. */ |
| 940 myUChar = ((UChar32)mySource[3] << 24) |
| 941 | ((UChar32)mySource[2] << 16) |
| 942 | ((UChar32)mySource[1] << 8) |
| 943 | ((UChar32)mySource[0]); |
| 944 |
| 945 args->source = (const char *)(mySource + 4); |
| 946 if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { |
| 947 return myUChar; |
| 948 } |
| 949 |
| 950 uprv_memcpy(args->converter->toUBytes, mySource, 4); |
| 951 args->converter->toULength = 4; |
| 952 |
| 953 *err = U_ILLEGAL_CHAR_FOUND; |
| 954 return 0xffff; |
| 955 } |
| 956 |
| 957 static const UConverterImpl _UTF32LEImpl = { |
| 958 UCNV_UTF32_LittleEndian, |
| 959 |
| 960 NULL, |
| 961 NULL, |
| 962 |
| 963 NULL, |
| 964 NULL, |
| 965 NULL, |
| 966 |
| 967 T_UConverter_toUnicode_UTF32_LE, |
| 968 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, |
| 969 T_UConverter_fromUnicode_UTF32_LE, |
| 970 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
| 971 T_UConverter_getNextUChar_UTF32_LE, |
| 972 |
| 973 NULL, |
| 974 NULL, |
| 975 NULL, |
| 976 NULL, |
| 977 ucnv_getNonSurrogateUnicodeSet |
| 978 }; |
| 979 |
| 980 /* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32
*/ |
| 981 static const UConverterStaticData _UTF32LEStaticData = { |
| 982 sizeof(UConverterStaticData), |
| 983 "UTF-32LE", |
| 984 1234, |
| 985 UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, |
| 986 { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, |
| 987 0, |
| 988 0, |
| 989 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
| 990 }; |
| 991 |
| 992 |
| 993 const UConverterSharedData _UTF32LEData = { |
| 994 sizeof(UConverterSharedData), ~((uint32_t) 0), |
| 995 NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl, |
| 996 0 |
| 997 }; |
| 998 |
| 999 /* UTF-32 (Detect BOM) ------------------------------------------------------ */ |
| 1000 |
| 1001 /* |
| 1002 * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE |
| 1003 * accordingly. |
| 1004 * |
| 1005 * State values: |
| 1006 * 0 initial state |
| 1007 * 1 saw 00 |
| 1008 * 2 saw 00 00 |
| 1009 * 3 saw 00 00 FE |
| 1010 * 4 - |
| 1011 * 5 saw FF |
| 1012 * 6 saw FF FE |
| 1013 * 7 saw FF FE 00 |
| 1014 * 8 UTF-32BE mode |
| 1015 * 9 UTF-32LE mode |
| 1016 * |
| 1017 * During detection: state&3==number of matching bytes so far. |
| 1018 * |
| 1019 * On output, emit U+FEFF as the first code point. |
| 1020 */ |
| 1021 |
| 1022 static void |
| 1023 _UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { |
| 1024 if(choice<=UCNV_RESET_TO_UNICODE) { |
| 1025 /* reset toUnicode: state=0 */ |
| 1026 cnv->mode=0; |
| 1027 } |
| 1028 if(choice!=UCNV_RESET_TO_UNICODE) { |
| 1029 /* reset fromUnicode: prepare to output the UTF-32PE BOM */ |
| 1030 cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; |
| 1031 } |
| 1032 } |
| 1033 |
| 1034 static void |
| 1035 _UTF32Open(UConverter *cnv, |
| 1036 UConverterLoadArgs *pArgs, |
| 1037 UErrorCode *pErrorCode) { |
| 1038 _UTF32Reset(cnv, UCNV_RESET_BOTH); |
| 1039 } |
| 1040 |
| 1041 static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (ch
ar)0xfe, 0, 0 }; |
| 1042 |
| 1043 static void |
| 1044 _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, |
| 1045 UErrorCode *pErrorCode) { |
| 1046 UConverter *cnv=pArgs->converter; |
| 1047 const char *source=pArgs->source; |
| 1048 const char *sourceLimit=pArgs->sourceLimit; |
| 1049 int32_t *offsets=pArgs->offsets; |
| 1050 |
| 1051 int32_t state, offsetDelta; |
| 1052 char b; |
| 1053 |
| 1054 state=cnv->mode; |
| 1055 |
| 1056 /* |
| 1057 * If we detect a BOM in this buffer, then we must add the BOM size to the |
| 1058 * offsets because the actual converter function will not see and count the
BOM. |
| 1059 * offsetDelta will have the number of the BOM bytes that are in the current
buffer. |
| 1060 */ |
| 1061 offsetDelta=0; |
| 1062 |
| 1063 while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { |
| 1064 switch(state) { |
| 1065 case 0: |
| 1066 b=*source; |
| 1067 if(b==0) { |
| 1068 state=1; /* could be 00 00 FE FF */ |
| 1069 } else if(b==(char)0xff) { |
| 1070 state=5; /* could be FF FE 00 00 */ |
| 1071 } else { |
| 1072 state=8; /* default to UTF-32BE */ |
| 1073 continue; |
| 1074 } |
| 1075 ++source; |
| 1076 break; |
| 1077 case 1: |
| 1078 case 2: |
| 1079 case 3: |
| 1080 case 5: |
| 1081 case 6: |
| 1082 case 7: |
| 1083 if(*source==utf32BOM[state]) { |
| 1084 ++state; |
| 1085 ++source; |
| 1086 if(state==4) { |
| 1087 state=8; /* detect UTF-32BE */ |
| 1088 offsetDelta=(int32_t)(source-pArgs->source); |
| 1089 } else if(state==8) { |
| 1090 state=9; /* detect UTF-32LE */ |
| 1091 offsetDelta=(int32_t)(source-pArgs->source); |
| 1092 } |
| 1093 } else { |
| 1094 /* switch to UTF-32BE and pass the previous bytes */ |
| 1095 int32_t count=(int32_t)(source-pArgs->source); /* number of byte
s from this buffer */ |
| 1096 |
| 1097 /* reset the source */ |
| 1098 source=pArgs->source; |
| 1099 |
| 1100 if(count==(state&3)) { |
| 1101 /* simple: all in the same buffer, just reset source */ |
| 1102 } else { |
| 1103 UBool oldFlush=pArgs->flush; |
| 1104 |
| 1105 /* some of the bytes are from a previous buffer, replay thos
e first */ |
| 1106 pArgs->source=utf32BOM+(state&4); /* select the correct BOM
*/ |
| 1107 pArgs->sourceLimit=pArgs->source+((state&3)-count); /* repla
y previous bytes */ |
| 1108 pArgs->flush=FALSE; /* this sourceLimit is not the real sour
ce stream limit */ |
| 1109 |
| 1110 /* no offsets: bytes from previous buffer, and not enough fo
r output */ |
| 1111 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
| 1112 |
| 1113 /* restore real pointers; pArgs->source will be set in case
8/9 */ |
| 1114 pArgs->sourceLimit=sourceLimit; |
| 1115 pArgs->flush=oldFlush; |
| 1116 } |
| 1117 state=8; |
| 1118 continue; |
| 1119 } |
| 1120 break; |
| 1121 case 8: |
| 1122 /* call UTF-32BE */ |
| 1123 pArgs->source=source; |
| 1124 if(offsets==NULL) { |
| 1125 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
| 1126 } else { |
| 1127 T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); |
| 1128 } |
| 1129 source=pArgs->source; |
| 1130 break; |
| 1131 case 9: |
| 1132 /* call UTF-32LE */ |
| 1133 pArgs->source=source; |
| 1134 if(offsets==NULL) { |
| 1135 T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
| 1136 } else { |
| 1137 T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); |
| 1138 } |
| 1139 source=pArgs->source; |
| 1140 break; |
| 1141 default: |
| 1142 break; /* does not occur */ |
| 1143 } |
| 1144 } |
| 1145 |
| 1146 /* add BOM size to offsets - see comment at offsetDelta declaration */ |
| 1147 if(offsets!=NULL && offsetDelta!=0) { |
| 1148 int32_t *offsetsLimit=pArgs->offsets; |
| 1149 while(offsets<offsetsLimit) { |
| 1150 *offsets++ += offsetDelta; |
| 1151 } |
| 1152 } |
| 1153 |
| 1154 pArgs->source=source; |
| 1155 |
| 1156 if(source==sourceLimit && pArgs->flush) { |
| 1157 /* handle truncated input */ |
| 1158 switch(state) { |
| 1159 case 0: |
| 1160 break; /* no input at all, nothing to do */ |
| 1161 case 8: |
| 1162 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
| 1163 break; |
| 1164 case 9: |
| 1165 T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); |
| 1166 break; |
| 1167 default: |
| 1168 /* handle 0<state<8: call UTF-32BE with too-short input */ |
| 1169 pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ |
| 1170 pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ |
| 1171 |
| 1172 /* no offsets: not enough for output */ |
| 1173 T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); |
| 1174 pArgs->source=source; |
| 1175 pArgs->sourceLimit=sourceLimit; |
| 1176 state=8; |
| 1177 break; |
| 1178 } |
| 1179 } |
| 1180 |
| 1181 cnv->mode=state; |
| 1182 } |
| 1183 |
| 1184 static UChar32 |
| 1185 _UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, |
| 1186 UErrorCode *pErrorCode) { |
| 1187 switch(pArgs->converter->mode) { |
| 1188 case 8: |
| 1189 return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); |
| 1190 case 9: |
| 1191 return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); |
| 1192 default: |
| 1193 return UCNV_GET_NEXT_UCHAR_USE_TO_U; |
| 1194 } |
| 1195 } |
| 1196 |
| 1197 static const UConverterImpl _UTF32Impl = { |
| 1198 UCNV_UTF32, |
| 1199 |
| 1200 NULL, |
| 1201 NULL, |
| 1202 |
| 1203 _UTF32Open, |
| 1204 NULL, |
| 1205 _UTF32Reset, |
| 1206 |
| 1207 _UTF32ToUnicodeWithOffsets, |
| 1208 _UTF32ToUnicodeWithOffsets, |
| 1209 #if U_IS_BIG_ENDIAN |
| 1210 T_UConverter_fromUnicode_UTF32_BE, |
| 1211 T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, |
| 1212 #else |
| 1213 T_UConverter_fromUnicode_UTF32_LE, |
| 1214 T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, |
| 1215 #endif |
| 1216 _UTF32GetNextUChar, |
| 1217 |
| 1218 NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ |
| 1219 NULL, |
| 1220 NULL, |
| 1221 NULL, |
| 1222 ucnv_getNonSurrogateUnicodeSet |
| 1223 }; |
| 1224 |
| 1225 /* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianes
s of UTF-32 */ |
| 1226 static const UConverterStaticData _UTF32StaticData = { |
| 1227 sizeof(UConverterStaticData), |
| 1228 "UTF-32", |
| 1229 1236, |
| 1230 UCNV_IBM, UCNV_UTF32, 4, 4, |
| 1231 #if U_IS_BIG_ENDIAN |
| 1232 { 0, 0, 0xff, 0xfd }, 4, |
| 1233 #else |
| 1234 { 0xfd, 0xff, 0, 0 }, 4, |
| 1235 #endif |
| 1236 FALSE, FALSE, |
| 1237 0, |
| 1238 0, |
| 1239 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ |
| 1240 }; |
| 1241 |
| 1242 const UConverterSharedData _UTF32Data = { |
| 1243 sizeof(UConverterSharedData), ~((uint32_t) 0), |
| 1244 NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl, |
| 1245 0 |
| 1246 }; |
| 1247 |
| 1248 #endif |
OLD | NEW |