OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2002-2012, International Business Machines | 3 * Copyright (C) 2002-2012, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: ucnv_u8.c | 6 * file name: ucnv_u8.c |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
9 * indentation:4 | 9 * indentation:4 |
10 * | 10 * |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
80 | 80 |
81 /* | 81 /* |
82 * Starting with Unicode 3.0.1: | 82 * Starting with Unicode 3.0.1: |
83 * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_m
inChar32[N]; | 83 * UTF-8 byte sequences of length N _must_ encode code points of or above utf8_m
inChar32[N]; |
84 * byte sequences with more than 4 bytes are illegal in UTF-8, | 84 * byte sequences with more than 4 bytes are illegal in UTF-8, |
85 * which is tested with impossible values for them | 85 * which is tested with impossible values for them |
86 */ | 86 */ |
87 static const uint32_t | 87 static const uint32_t |
88 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | 88 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
89 | 89 |
| 90 static UBool hasCESU8Data(const UConverter *cnv) |
| 91 { |
| 92 #if UCONFIG_NO_NON_HTML5_CONVERSION |
| 93 return FALSE; |
| 94 #else |
| 95 return (UBool)(cnv->sharedData == &_CESU8Data); |
| 96 #endif |
| 97 } |
| 98 |
90 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, | 99 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, |
91 UErrorCode * err) | 100 UErrorCode * err) |
92 { | 101 { |
93 UConverter *cnv = args->converter; | 102 UConverter *cnv = args->converter; |
94 const unsigned char *mySource = (unsigned char *) args->source; | 103 const unsigned char *mySource = (unsigned char *) args->source; |
95 UChar *myTarget = args->target; | 104 UChar *myTarget = args->target; |
96 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 105 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
97 const UChar *targetLimit = args->targetLimit; | 106 const UChar *targetLimit = args->targetLimit; |
98 unsigned char *toUBytes = cnv->toUBytes; | 107 unsigned char *toUBytes = cnv->toUBytes; |
99 UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 108 UBool isCESU8 = hasCESU8Data(cnv); |
100 uint32_t ch, ch2 = 0; | 109 uint32_t ch, ch2 = 0; |
101 int32_t i, inBytes; | 110 int32_t i, inBytes; |
102 | 111 |
103 /* Restore size of current sequence */ | 112 /* Restore size of current sequence */ |
104 if (cnv->toUnicodeStatus && myTarget < targetLimit) | 113 if (cnv->toUnicodeStatus && myTarget < targetLimit) |
105 { | 114 { |
106 inBytes = cnv->mode; /* restore # of bytes to consume */ | 115 inBytes = cnv->mode; /* restore # of bytes to consume */ |
107 i = cnv->toULength; /* restore # of bytes consumed */ | 116 i = cnv->toULength; /* restore # of bytes consumed */ |
108 cnv->toULength = 0; | 117 cnv->toULength = 0; |
109 | 118 |
110 ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a p
revious call*/ | 119 ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a p
revious call*/ |
111 cnv->toUnicodeStatus = 0; | 120 cnv->toUnicodeStatus = 0; |
112 goto morebytes; | 121 goto morebytes; |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 UErrorCode * err) | 228 UErrorCode * err) |
220 { | 229 { |
221 UConverter *cnv = args->converter; | 230 UConverter *cnv = args->converter; |
222 const unsigned char *mySource = (unsigned char *) args->source; | 231 const unsigned char *mySource = (unsigned char *) args->source; |
223 UChar *myTarget = args->target; | 232 UChar *myTarget = args->target; |
224 int32_t *myOffsets = args->offsets; | 233 int32_t *myOffsets = args->offsets; |
225 int32_t offsetNum = 0; | 234 int32_t offsetNum = 0; |
226 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 235 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
227 const UChar *targetLimit = args->targetLimit; | 236 const UChar *targetLimit = args->targetLimit; |
228 unsigned char *toUBytes = cnv->toUBytes; | 237 unsigned char *toUBytes = cnv->toUBytes; |
229 UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 238 UBool isCESU8 = hasCESU8Data(cnv); |
230 uint32_t ch, ch2 = 0; | 239 uint32_t ch, ch2 = 0; |
231 int32_t i, inBytes; | 240 int32_t i, inBytes; |
232 | 241 |
233 /* Restore size of current sequence */ | 242 /* Restore size of current sequence */ |
234 if (cnv->toUnicodeStatus && myTarget < targetLimit) | 243 if (cnv->toUnicodeStatus && myTarget < targetLimit) |
235 { | 244 { |
236 inBytes = cnv->mode; /* restore # of bytes to consume */ | 245 inBytes = cnv->mode; /* restore # of bytes to consume */ |
237 i = cnv->toULength; /* restore # of bytes consumed */ | 246 i = cnv->toULength; /* restore # of bytes consumed */ |
238 cnv->toULength = 0; | 247 cnv->toULength = 0; |
239 | 248 |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
350 { | 359 { |
351 UConverter *cnv = args->converter; | 360 UConverter *cnv = args->converter; |
352 const UChar *mySource = args->source; | 361 const UChar *mySource = args->source; |
353 const UChar *sourceLimit = args->sourceLimit; | 362 const UChar *sourceLimit = args->sourceLimit; |
354 uint8_t *myTarget = (uint8_t *) args->target; | 363 uint8_t *myTarget = (uint8_t *) args->target; |
355 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; | 364 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; |
356 uint8_t *tempPtr; | 365 uint8_t *tempPtr; |
357 UChar32 ch; | 366 UChar32 ch; |
358 uint8_t tempBuf[4]; | 367 uint8_t tempBuf[4]; |
359 int32_t indexToWrite; | 368 int32_t indexToWrite; |
360 UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 369 UBool isNotCESU8 = !hasCESU8Data(cnv); |
361 | 370 |
362 if (cnv->fromUChar32 && myTarget < targetLimit) | 371 if (cnv->fromUChar32 && myTarget < targetLimit) |
363 { | 372 { |
364 ch = cnv->fromUChar32; | 373 ch = cnv->fromUChar32; |
365 cnv->fromUChar32 = 0; | 374 cnv->fromUChar32 = 0; |
366 goto lowsurrogate; | 375 goto lowsurrogate; |
367 } | 376 } |
368 | 377 |
369 while (mySource < sourceLimit && myTarget < targetLimit) | 378 while (mySource < sourceLimit && myTarget < targetLimit) |
370 { | 379 { |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
466 const UChar *mySource = args->source; | 475 const UChar *mySource = args->source; |
467 int32_t *myOffsets = args->offsets; | 476 int32_t *myOffsets = args->offsets; |
468 const UChar *sourceLimit = args->sourceLimit; | 477 const UChar *sourceLimit = args->sourceLimit; |
469 uint8_t *myTarget = (uint8_t *) args->target; | 478 uint8_t *myTarget = (uint8_t *) args->target; |
470 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; | 479 const uint8_t *targetLimit = (uint8_t *) args->targetLimit; |
471 uint8_t *tempPtr; | 480 uint8_t *tempPtr; |
472 UChar32 ch; | 481 UChar32 ch; |
473 int32_t offsetNum, nextSourceIndex; | 482 int32_t offsetNum, nextSourceIndex; |
474 int32_t indexToWrite; | 483 int32_t indexToWrite; |
475 uint8_t tempBuf[4]; | 484 uint8_t tempBuf[4]; |
476 UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 485 UBool isNotCESU8 = !hasCESU8Data(cnv); |
477 | 486 |
478 if (cnv->fromUChar32 && myTarget < targetLimit) | 487 if (cnv->fromUChar32 && myTarget < targetLimit) |
479 { | 488 { |
480 ch = cnv->fromUChar32; | 489 ch = cnv->fromUChar32; |
481 cnv->fromUChar32 = 0; | 490 cnv->fromUChar32 = 0; |
482 offsetNum = -1; | 491 offsetNum = -1; |
483 nextSourceIndex = 0; | 492 nextSourceIndex = 0; |
484 goto lowsurrogate; | 493 goto lowsurrogate; |
485 } else { | 494 } else { |
486 offsetNum = 0; | 495 offsetNum = 0; |
(...skipping 590 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1077 }; | 1086 }; |
1078 | 1087 |
1079 | 1088 |
1080 const UConverterSharedData _CESU8Data={ | 1089 const UConverterSharedData _CESU8Data={ |
1081 sizeof(UConverterSharedData), ~((uint32_t) 0), | 1090 sizeof(UConverterSharedData), ~((uint32_t) 0), |
1082 NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, | 1091 NULL, NULL, &_CESU8StaticData, FALSE, &_CESU8Impl, |
1083 0 | 1092 0 |
1084 }; | 1093 }; |
1085 | 1094 |
1086 #endif | 1095 #endif |
OLD | NEW |