| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2002-2012, International Business Machines | 3 * Copyright (C) 2002-2012, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * file name: ucnv_u8.c | 6 * file name: ucnv_u8.c |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 80 | 80 |
| 81 /* | 81 /* |
| 82 * Starting with Unicode 3.0.1: | 82 * Starting with Unicode 3.0.1: |
| 83 * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_m
inChar32[N]; | 83 * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_m
inChar32[N]; |
| 84 * byte sequences with more than 4 bytes are illegal in UTF-8, | 84 * byte sequences with more than 4 bytes are illegal in UTF-8, |
| 85 * which is tested with impossible values for them | 85 * which is tested with impossible values for them |
| 86 */ | 86 */ |
| 87 static const uint32_t | 87 static const uint32_t |
| 88 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | 88 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
| 89 | 89 |
| 90 static UBool hasCESU8Data(const UConverter *cnv) |
| 91 { |
| 92 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 93 return FALSE; |
| 94 #else |
| 95 return (UBool)(cnv->sharedData == &_CESU8Data); |
| 96 #endif |
| 97 } |
| 98 |
| 90 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, | 99 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, |
| 91 UErrorCode * err) | 100 UErrorCode * err) |
| 92 { | 101 { |
| 93 UConverter *cnv = args->converter; | 102 UConverter *cnv = args->converter; |
| 94 const unsigned char *mySource = (unsigned char *) args->source; | 103 const unsigned char *mySource = (unsigned char *) args->source; |
| 95 UChar *myTarget = args->target; | 104 UChar *myTarget = args->target; |
| 96 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 105 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 97 const UChar *targetLimit = args->targetLimit; | 106 const UChar *targetLimit = args->targetLimit; |
| 98 unsigned char *toUBytes = cnv->toUBytes; | 107 unsigned char *toUBytes = cnv->toUBytes; |
| 99 UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 108 UBool isCESU8 = hasCESU8Data(cnv); |
| 100 uint32_t ch, ch2 = 0; | 109 uint32_t ch, ch2 = 0; |
| 101 int32_t i, inBytes; | 110 int32_t i, inBytes; |
| 102 | 111 |
| 103 /* Restore size of current sequence */ | 112 /* Restore size of current sequence */ |
| 104 if (cnv->toUnicodeStatus && myTarget < targetLimit) | 113 if (cnv->toUnicodeStatus && myTarget < targetLimit) |
| 105 { | 114 { |
| 106 inBytes = cnv->mode; /* restore # of bytes to consume */ | 115 inBytes = cnv->mode; /* restore # of bytes to consume */ |
| 107 i = cnv->toULength; /* restore # of bytes consumed */ | 116 i = cnv->toULength; /* restore # of bytes consumed */ |
| 108 cnv->toULength = 0; | 117 cnv->toULength = 0; |
| 109 | 118 |
| 110 ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a p
revious call*/ | 119 ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a p
revious call*/ |
| 111 cnv->toUnicodeStatus = 0; | 120 cnv->toUnicodeStatus = 0; |
| 112 goto morebytes; | 121 goto morebytes; |
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 219 UErrorCode * err) | 228 UErrorCode * err) |
| 220 { | 229 { |
| 221 UConverter *cnv = args->converter; | 230 UConverter *cnv = args->converter; |
| 222 const unsigned char *mySource = (unsigned char *) args->source; | 231 const unsigned char *mySource = (unsigned char *) args->source; |
| 223 UChar *myTarget = args->target; | 232 UChar *myTarget = args->target; |
| 224 int32_t *myOffsets = args->offsets; | 233 int32_t *myOffsets = args->offsets; |
| 225 int32_t offsetNum = 0; | 234 int32_t offsetNum = 0; |
| 226 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 235 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| 227 const UChar *targetLimit = args->targetLimit; | 236 const UChar *targetLimit = args->targetLimit; |
| 228 unsigned char *toUBytes = cnv->toUBytes; | 237 unsigned char *toUBytes = cnv->toUBytes; |
| 229 UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 238 UBool isCESU8 = hasCESU8Data(cnv); |
| 230 uint32_t ch, ch2 = 0; | 239 uint32_t ch, ch2 = 0; |
| 231 int32_t i, inBytes; | 240 int32_t i, inBytes; |
| 232 | 241 |
| 233 /* Restore size of current sequence */ | 242 /* Restore size of current sequence */ |
| 234 if (cnv->toUnicodeStatus && myTarget < targetLimit) | 243 if (cnv->toUnicodeStatus && myTarget < targetLimit) |
| 235 { | 244 { |
| 236 inBytes = cnv->mode; /* restore # of bytes to consume */ | 245 inBytes = cnv->mode; /* restore # of bytes to consume */ |
| 237 i = cnv->toULength; /* restore # of bytes consumed */ | 246 i = cnv->toULength; /* restore # of bytes consumed */ |
| 238 cnv->toULength = 0; | 247 cnv->toULength = 0; |
| 239 | 248 |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 350 { | 359 { |
| 351 UConverter *cnv = args->converter; | 360 UConverter *cnv = args->converter; |
| 352 const UChar *mySource = args->source; | 361 const UChar *mySource = args->source; |
| 353 const UChar *sourceLimit = args->sourceLimit; | 362 const UChar *sourceLimit = args->sourceLimit; |
| 354 uint8_t *myTarget = (uint8_t *) args->target; | 363 uint8_t *myTarget = (uint8_t *) args->target; |
| 355 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; | 364 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; |
| 356 uint8_t *tempPtr; | 365 uint8_t *tempPtr; |
| 357 UChar32 ch; | 366 UChar32 ch; |
| 358 uint8_t tempBuf[4]; | 367 uint8_t tempBuf[4]; |
| 359 int32_t indexToWrite; | 368 int32_t indexToWrite; |
| 360 UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 369 UBool isNotCESU8 = !hasCESU8Data(cnv); |
| 361 | 370 |
| 362 if (cnv->fromUChar32 && myTarget < targetLimit) | 371 if (cnv->fromUChar32 && myTarget < targetLimit) |
| 363 { | 372 { |
| 364 ch = cnv->fromUChar32; | 373 ch = cnv->fromUChar32; |
| 365 cnv->fromUChar32 = 0; | 374 cnv->fromUChar32 = 0; |
| 366 goto lowsurrogate; | 375 goto lowsurrogate; |
| 367 } | 376 } |
| 368 | 377 |
| 369 while (mySource < sourceLimit && myTarget < targetLimit) | 378 while (mySource < sourceLimit && myTarget < targetLimit) |
| 370 { | 379 { |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 466 const UChar *mySource = args->source; | 475 const UChar *mySource = args->source; |
| 467 int32_t *myOffsets = args->offsets; | 476 int32_t *myOffsets = args->offsets; |
| 468 const UChar *sourceLimit = args->sourceLimit; | 477 const UChar *sourceLimit = args->sourceLimit; |
| 469 uint8_t *myTarget = (uint8_t *) args->target; | 478 uint8_t *myTarget = (uint8_t *) args->target; |
| 470 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; | 479 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; |
| 471 uint8_t *tempPtr; | 480 uint8_t *tempPtr; |
| 472 UChar32 ch; | 481 UChar32 ch; |
| 473 int32_t offsetNum, nextSourceIndex; | 482 int32_t offsetNum, nextSourceIndex; |
| 474 int32_t indexToWrite; | 483 int32_t indexToWrite; |
| 475 uint8_t tempBuf[4]; | 484 uint8_t tempBuf[4]; |
| 476 UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 485 UBool isNotCESU8 = !hasCESU8Data(cnv); |
| 477 | 486 |
| 478 if (cnv->fromUChar32 && myTarget < targetLimit) | 487 if (cnv->fromUChar32 && myTarget < targetLimit) |
| 479 { | 488 { |
| 480 ch = cnv->fromUChar32; | 489 ch = cnv->fromUChar32; |
| 481 cnv->fromUChar32 = 0; | 490 cnv->fromUChar32 = 0; |
| 482 offsetNum = -1; | 491 offsetNum = -1; |
| 483 nextSourceIndex = 0; | 492 nextSourceIndex = 0; |
| 484 goto lowsurrogate; | 493 goto lowsurrogate; |
| 485 } else { | 494 } else { |
| 486 offsetNum = 0; | 495 offsetNum = 0; |
| (...skipping 590 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1077 }; | 1086 }; |
| 1078 | 1087 |
| 1079 | 1088 |
| 1080 const UConverterSharedData _CESU8Data={ | 1089 const UConverterSharedData _CESU8Data={ |
| 1081 sizeof(UConverterSharedData), ~((uint32_t) 0), | 1090 sizeof(UConverterSharedData), ~((uint32_t) 0), |
| 1082 NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, | 1091 NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, |
| 1083 0 | 1092 0 |
| 1084 }; | 1093 }; |
| 1085 | 1094 |
| 1086 #endif | 1095 #endif |
| OLD | NEW |