Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(362)

Side by Side Diff: third_party/icu38/source/common/ucnv2022.c

Issue 52030: Apply ICU patches for ICU tickets 6175 (ISO-2022 and ... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/
Patch Set: '' Created 11 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/icu38/source/common/ucnv.c ('k') | third_party/icu38/source/common/ucnv_bld.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ********************************************************************** 2 **********************************************************************
3 * Copyright (C) 2000-2007, International Business Machines 3 * Copyright (C) 2000-2007, International Business Machines
4 * Corporation and others. All Rights Reserved. 4 * Corporation and others. All Rights Reserved.
5 ********************************************************************** 5 **********************************************************************
6 * file name: ucnv2022.c 6 * file name: ucnv2022.c
7 * encoding: US-ASCII 7 * encoding: US-ASCII
8 * tab size: 8 (not used) 8 * tab size: 8 (not used)
9 * indentation:4 9 * indentation:4
10 * 10 *
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
194 typedef struct{ 194 typedef struct{
195 UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; 195 UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];
196 UConverter *currentConverter; 196 UConverter *currentConverter;
197 Cnv2022Type currentType; 197 Cnv2022Type currentType;
198 ISO2022State toU2022State, fromU2022State; 198 ISO2022State toU2022State, fromU2022State;
199 uint32_t key; 199 uint32_t key;
200 uint32_t version; 200 uint32_t version;
201 #ifdef U_ENABLE_GENERIC_ISO_2022 201 #ifdef U_ENABLE_GENERIC_ISO_2022
202 UBool isFirstBuffer; 202 UBool isFirstBuffer;
203 #endif 203 #endif
204 UBool isEmptySegment;
204 char name[30]; 205 char name[30];
205 char locale[3]; 206 char locale[3];
206 }UConverterDataISO2022; 207 }UConverterDataISO2022;
207 208
208 /* Protos */ 209 /* Protos */
209 /* ISO-2022 ----------------------------------------------------------------- */ 210 /* ISO-2022 ----------------------------------------------------------------- */
210 211
211 /*Forward declaration */ 212 /*Forward declaration */
212 U_CFUNC void 213 U_CFUNC void
213 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, 214 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,
(...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after
602 } 603 }
603 } 604 }
604 } 605 }
605 606
606 static void 607 static void
607 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { 608 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {
608 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter- >extraInfo); 609 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter- >extraInfo);
609 if(choice<=UCNV_RESET_TO_UNICODE) { 610 if(choice<=UCNV_RESET_TO_UNICODE) {
610 uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); 611 uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));
611 myConverterData->key = 0; 612 myConverterData->key = 0;
613 myConverterData->isEmptySegment = FALSE;
612 } 614 }
613 if(choice!=UCNV_RESET_TO_UNICODE) { 615 if(choice!=UCNV_RESET_TO_UNICODE) {
614 uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); 616 uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));
615 } 617 }
616 #ifdef U_ENABLE_GENERIC_ISO_2022 618 #ifdef U_ENABLE_GENERIC_ISO_2022
617 if(myConverterData->locale[0] == 0){ 619 if(myConverterData->locale[0] == 0){
618 if(choice<=UCNV_RESET_TO_UNICODE) { 620 if(choice<=UCNV_RESET_TO_UNICODE) {
619 myConverterData->isFirstBuffer = TRUE; 621 myConverterData->isFirstBuffer = TRUE;
620 myConverterData->key = 0; 622 myConverterData->key = 0;
621 if (converter->mode == UCNV_SO){ 623 if (converter->mode == UCNV_SO){
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
807 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 809 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
808 } else /* value == VALID_TERMINAL_2022 */ { 810 } else /* value == VALID_TERMINAL_2022 */ {
809 switch(var){ 811 switch(var){
810 #ifdef U_ENABLE_GENERIC_ISO_2022 812 #ifdef U_ENABLE_GENERIC_ISO_2022
811 case ISO_2022: 813 case ISO_2022:
812 { 814 {
813 const char *chosenConverterName = escSeqStateTable_Result_2022[offse t]; 815 const char *chosenConverterName = escSeqStateTable_Result_2022[offse t];
814 if(chosenConverterName == NULL) { 816 if(chosenConverterName == NULL) {
815 /* SS2 or SS3 */ 817 /* SS2 or SS3 */
816 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 818 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
819 _this->toUCallbackReason = UCNV_UNASSIGNED;
817 return; 820 return;
818 } 821 }
819 822
820 _this->mode = UCNV_SI; 823 _this->mode = UCNV_SI;
821 ucnv_close(myData2022->currentConverter); 824 ucnv_close(myData2022->currentConverter);
822 myData2022->currentConverter = myUConverter = ucnv_open(chosenConver terName, err); 825 myData2022->currentConverter = myUConverter = ucnv_open(chosenConver terName, err);
823 if(U_SUCCESS(*err)) { 826 if(U_SUCCESS(*err)) {
824 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; 827 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;
825 _this->mode = UCNV_SO; 828 _this->mode = UCNV_SO;
826 } 829 }
(...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after
957 } else { 960 } else {
958 /* Back out bytes from the previous buffer: Need to replay them. */ 961 /* Back out bytes from the previous buffer: Need to replay them. */
959 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance ); 962 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance );
960 /* same as -(initialToULength-1) */ 963 /* same as -(initialToULength-1) */
961 /* preToULength is negative! */ 964 /* preToULength is negative! */
962 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th); 965 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng th);
963 *source-=bytesFromThisBuffer; 966 *source-=bytesFromThisBuffer;
964 } 967 }
965 _this->toULength=1; 968 _this->toULength=1;
966 } 969 }
970 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) {
971 _this->toUCallbackReason = UCNV_UNASSIGNED;
967 } 972 }
968 } 973 }
969 974
970 /*Checks the characters of the buffer against valid 2022 escape sequences 975 /*Checks the characters of the buffer against valid 2022 escape sequences
971 *if the match we return a pointer to the initial start of the sequence otherwise 976 *if the match we return a pointer to the initial start of the sequence otherwise
972 *we return sourceLimit 977 *we return sourceLimit
973 */ 978 */
974 /*for 2022 looks ahead in the stream 979 /*for 2022 looks ahead in the stream
975 *to determine the longest possible convertible 980 *to determine the longest possible convertible
976 *data stream 981 *data stream
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
1119 if(value>=0xf00) { 1124 if(value>=0xf00) {
1120 return 1; /* roundtrip */ 1125 return 1; /* roundtrip */
1121 } else if(useFallback ? value>=0x800 : value>=0xc00) { 1126 } else if(useFallback ? value>=0x800 : value>=0xc00) {
1122 return -1; /* fallback taken */ 1127 return -1; /* fallback taken */
1123 } else { 1128 } else {
1124 return 0; /* no mapping */ 1129 return 0; /* no mapping */
1125 } 1130 }
1126 } 1131 }
1127 1132
1128 /* 1133 /*
1129 * * Check that the result is a 2-byte value with each byte in the range A1..FE 1134 * Check that the result is a 2-byte value with each byte in the range A1..FE
1130 * * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte 1135 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte
1131 * * to move it to the ISO 2022 range 21..7E. 1136 * to move it to the ISO 2022 range 21..7E.
1132 * * Return 0 if out of range. 1137 * Return 0 if out of range.
1133 * */ 1138 */
1134 static U_INLINE uint32_t 1139 static U_INLINE uint32_t
1135 _2022FromGR94DBCS(uint32_t value) { 1140 _2022FromGR94DBCS(uint32_t value) {
1136 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && 1141 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&
1137 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) 1142 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1)
1138 ) { 1143 ) {
1139 return value - 0x8080; /* shift down to 21..7e byte range */ 1144 return value - 0x8080; /* shift down to 21..7e byte range */
1140 } else { 1145 } else {
1141 return 0; /* not valid for ISO 2022 */ 1146 return 0; /* not valid for ISO 2022 */
1142 } 1147 }
1143 } 1148 }
1144 1149
1145 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */ 1150 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */
1146 /* 1151 /*
1147 * Check that the result is a 2-byte value with each byte in the range A1..FE 1152 * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code poin t, it returns the
1148 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte 1153 * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point
1149 * to move it to the ISO 2022 range 21..7E. 1154 * unchanged.
1150 * Return 0 if out of range.
1151 */ 1155 */
1152 static U_INLINE uint32_t 1156 static U_INLINE uint32_t
1153 _2022FromGR94DBCS(uint32_t value) { 1157 _2022ToGR94DBCS(uint32_t value) {
1154 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && 1158 uint32_t returnValue = value + 0x8080;
1155 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) 1159 if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) &&
1156 ) { 1160 (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) {
1157 return value - 0x8080; /* shift down to 21..7e byte range */ 1161 return returnValue;
1158 } else { 1162 } else {
1159 return 0; /* not valid for ISO 2022 */ 1163 return value;
1160 } 1164 }
1161 } 1165 }
1162 #endif 1166 #endif
1163 1167
1164 #ifdef U_ENABLE_GENERIC_ISO_2022 1168 #ifdef U_ENABLE_GENERIC_ISO_2022
1165 1169
1166 /******************************************************************************* *** 1170 /******************************************************************************* ***
1167 * ISO-2022 Converter 1171 * ISO-2022 Converter
1168 * 1172 *
1169 * 1173 *
(...skipping 859 matching lines...) Expand 10 before | Expand all | Expand 10 after
2029 2033
2030 mySourceChar= (unsigned char) *mySource++; 2034 mySourceChar= (unsigned char) *mySource++;
2031 2035
2032 switch(mySourceChar) { 2036 switch(mySourceChar) {
2033 case UCNV_SI: 2037 case UCNV_SI:
2034 if(myData->version==3) { 2038 if(myData->version==3) {
2035 pToU2022State->g=0; 2039 pToU2022State->g=0;
2036 continue; 2040 continue;
2037 } else { 2041 } else {
2038 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 2042 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
2043 myData->isEmptySegment = FALSE; /* reset this, we have a different error */
2039 break; 2044 break;
2040 } 2045 }
2041 2046
2042 case UCNV_SO: 2047 case UCNV_SO:
2043 if(myData->version==3) { 2048 if(myData->version==3) {
2044 /* JIS7: switch to G1 half-width Katakana */ 2049 /* JIS7: switch to G1 half-width Katakana */
2045 pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; 2050 pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;
2046 pToU2022State->g=1; 2051 pToU2022State->g=1;
2047 continue; 2052 continue;
2048 } else { 2053 } else {
2049 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ 2054 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */
2055 myData->isEmptySegment = FALSE; /* reset this, we have a different error */
2050 break; 2056 break;
2051 } 2057 }
2052 2058
2053 case ESC_2022: 2059 case ESC_2022:
2054 mySource--; 2060 mySource--;
2055 escape: 2061 escape:
2056 changeState_2022(args->converter,&(mySource), 2062 {
2057 mySourceLimit, ISO_2022_JP,err); 2063 const char * mySourceBefore = mySource;
2064 int8_t toULengthBefore = args->converter->toULength;
2065
2066 changeState_2022(args->converter,&(mySource),
2067 mySourceLimit, ISO_2022_JP,err);
2068
2069 /* If in ISO-2022-JP only and we successully completed an es cape sequence, but previous segment was empty, create an error */
2070 if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) & & myData->isEmptySegment) {
2071 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
2072 args->converter->toUCallbackReason = UCNV_IRREGULAR;
2073 args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
2074 }
2075 }
2058 2076
2059 /* invalid or illegal escape sequence */ 2077 /* invalid or illegal escape sequence */
2060 if(U_FAILURE(*err)){ 2078 if(U_FAILURE(*err)){
2061 args->target = myTarget; 2079 args->target = myTarget;
2062 args->source = mySource; 2080 args->source = mySource;
2081 myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
2063 return; 2082 return;
2064 } 2083 }
2084 /* If we successfully completed an escape sequence, we begin a n ew segment, empty so far */
2085 if(myData->key==0) {
2086 myData->isEmptySegment = TRUE;
2087 }
2065 continue; 2088 continue;
2066 2089
2067 /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ 2090 /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */
2068 2091
2069 case CR: 2092 case CR:
2070 /*falls through*/ 2093 /*falls through*/
2071 case LF: 2094 case LF:
2072 /* automatically reset to single-byte mode */ 2095 /* automatically reset to single-byte mode */
2073 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU20 22State->cs[0] != JISX201) { 2096 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU20 22State->cs[0] != JISX201) {
2074 pToU2022State->cs[0] = (int8_t)ASCII; 2097 pToU2022State->cs[0] = (int8_t)ASCII;
2075 } 2098 }
2076 pToU2022State->cs[2] = 0; 2099 pToU2022State->cs[2] = 0;
2077 pToU2022State->g = 0; 2100 pToU2022State->g = 0;
2078 /* falls through */ 2101 /* falls through */
2079 default: 2102 default:
2080 /* convert one or two bytes */ 2103 /* convert one or two bytes */
2104 myData->isEmptySegment = FALSE;
2081 cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; 2105 cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
2082 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->v ersion==4 && 2106 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->v ersion==4 &&
2083 !IS_JP_DBCS(cs) 2107 !IS_JP_DBCS(cs)
2084 ) { 2108 ) {
2085 /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ 2109 /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */
2086 targetUniChar = mySourceChar + (HWKANA_START - 0xa1); 2110 targetUniChar = mySourceChar + (HWKANA_START - 0xa1);
2087 2111
2088 /* return from a single-shift state to the previous one */ 2112 /* return from a single-shift state to the previous one */
2089 if(pToU2022State->g >= 2) { 2113 if(pToU2022State->g >= 2) {
2090 pToU2022State->g=pToU2022State->prevG; 2114 pToU2022State->g=pToU2022State->prevG;
(...skipping 508 matching lines...) Expand 10 before | Expand all | Expand 10 after
2599 } 2623 }
2600 2624
2601 while(mySource< mySourceLimit){ 2625 while(mySource< mySourceLimit){
2602 2626
2603 if(myTarget < args->targetLimit){ 2627 if(myTarget < args->targetLimit){
2604 2628
2605 mySourceChar= (unsigned char) *mySource++; 2629 mySourceChar= (unsigned char) *mySource++;
2606 2630
2607 if(mySourceChar==UCNV_SI){ 2631 if(mySourceChar==UCNV_SI){
2608 myData->toU2022State.g = 0; 2632 myData->toU2022State.g = 0;
2633 if (myData->isEmptySegment) {
2634 myData->isEmptySegment = FALSE; /* we are handling it, r eset to avoid future spurious errors */
2635 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
2636 args->converter->toUCallbackReason = UCNV_IRREGULAR;
2637 args->converter->toUBytes[0] = (uint8_t)mySourceChar;
2638 args->converter->toULength = 1;
2639 args->target = myTarget;
2640 args->source = mySource;
2641 return;
2642 }
2609 /*consume the source */ 2643 /*consume the source */
2610 continue; 2644 continue;
2611 }else if(mySourceChar==UCNV_SO){ 2645 }else if(mySourceChar==UCNV_SO){
2612 myData->toU2022State.g = 1; 2646 myData->toU2022State.g = 1;
2647 myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
2613 /*consume the source */ 2648 /*consume the source */
2614 continue; 2649 continue;
2615 }else if(mySourceChar==ESC_2022){ 2650 }else if(mySourceChar==ESC_2022){
2616 mySource--; 2651 mySource--;
2617 escape: 2652 escape:
2653 myData->isEmptySegment = FALSE; /* Any invalid ESC sequences wil l be detected separately, so just reset this */
2618 changeState_2022(args->converter,&(mySource), 2654 changeState_2022(args->converter,&(mySource),
2619 mySourceLimit, ISO_2022_KR, err); 2655 mySourceLimit, ISO_2022_KR, err);
2620 if(U_FAILURE(*err)){ 2656 if(U_FAILURE(*err)){
2621 args->target = myTarget; 2657 args->target = myTarget;
2622 args->source = mySource; 2658 args->source = mySource;
2623 return; 2659 return;
2624 } 2660 }
2625 continue; 2661 continue;
2626 } 2662 }
2627 2663
2664 myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */
2628 if(myData->toU2022State.g == 1) { 2665 if(myData->toU2022State.g == 1) {
2629 if(mySource < mySourceLimit) { 2666 if(mySource < mySourceLimit) {
2630 int leadIsOk, trailIsOk; 2667 int leadIsOk, trailIsOk;
2631 uint8_t trailByte; 2668 uint8_t trailByte;
2632 getTrailByte: 2669 getTrailByte:
2633 targetUniChar = missingCharMarker; 2670 targetUniChar = missingCharMarker;
2634 trailByte = (uint8_t)*mySource; 2671 trailByte = (uint8_t)*mySource;
2635 /* 2672 /*
2636 * Ticket 5691: consistent illegal sequences: 2673 * Ticket 5691: consistent illegal sequences:
2637 * - We include at least the first byte in the illegal seque nce. 2674 * - We include at least the first byte in the illegal seque nce.
(...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after
3170 3207
3171 targetUniChar =missingCharMarker; 3208 targetUniChar =missingCharMarker;
3172 3209
3173 if(myTarget < args->targetLimit){ 3210 if(myTarget < args->targetLimit){
3174 3211
3175 mySourceChar= (unsigned char) *mySource++; 3212 mySourceChar= (unsigned char) *mySource++;
3176 3213
3177 switch(mySourceChar){ 3214 switch(mySourceChar){
3178 case UCNV_SI: 3215 case UCNV_SI:
3179 pToU2022State->g=0; 3216 pToU2022State->g=0;
3217 if (myData->isEmptySegment) {
3218 myData->isEmptySegment = FALSE; /* we are handling it, r eset to avoid future spurious errors */
3219 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
3220 args->converter->toUCallbackReason = UCNV_IRREGULAR;
3221 args->converter->toUBytes[0] = mySourceChar;
3222 args->converter->toULength = 1;
3223 args->target = myTarget;
3224 args->source = mySource;
3225 return;
3226 }
3180 continue; 3227 continue;
3181 3228
3182 case UCNV_SO: 3229 case UCNV_SO:
3183 if(pToU2022State->cs[1] != 0) { 3230 if(pToU2022State->cs[1] != 0) {
3184 pToU2022State->g=1; 3231 pToU2022State->g=1;
3232 myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */
3185 continue; 3233 continue;
3186 } else { 3234 } else {
3187 /* illegal to have SO before a matching designator */ 3235 /* illegal to have SO before a matching designator */
3236 myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */
3188 break; 3237 break;
3189 } 3238 }
3190 3239
3191 case ESC_2022: 3240 case ESC_2022:
3192 mySource--; 3241 mySource--;
3193 escape: 3242 escape:
3194 changeState_2022(args->converter,&(mySource), 3243 {
3195 mySourceLimit, ISO_2022_CN,err); 3244 const char * mySourceBefore = mySource;
3245 int8_t toULengthBefore = args->converter->toULength;
3246
3247 changeState_2022(args->converter,&(mySource),
3248 mySourceLimit, ISO_2022_CN,err);
3249
3250 /* After SO there must be at least one character before a de signator (designator error handled separately) */
3251 if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegme nt) {
3252 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
3253 args->converter->toUCallbackReason = UCNV_IRREGULAR;
3254 args->converter->toULength = toULengthBefore + (mySource - mySourceBefore);
3255 }
3256 }
3196 3257
3197 /* invalid or illegal escape sequence */ 3258 /* invalid or illegal escape sequence */
3198 if(U_FAILURE(*err)){ 3259 if(U_FAILURE(*err)){
3199 args->target = myTarget; 3260 args->target = myTarget;
3200 args->source = mySource; 3261 args->source = mySource;
3262 myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */
3201 return; 3263 return;
3202 } 3264 }
3203 continue; 3265 continue;
3204 3266
3205 /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ 3267 /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */
3206 3268
3207 case CR: 3269 case CR:
3208 /*falls through*/ 3270 /*falls through*/
3209 case LF: 3271 case LF:
3210 uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); 3272 uprv_memset(pToU2022State, 0, sizeof(ISO2022State));
3211 /* falls through */ 3273 /* falls through */
3212 default: 3274 default:
3213 /* convert one or two bytes */ 3275 /* convert one or two bytes */
3276 myData->isEmptySegment = FALSE;
3214 if(pToU2022State->g != 0) { 3277 if(pToU2022State->g != 0) {
3215 if(mySource < mySourceLimit) { 3278 if(mySource < mySourceLimit) {
3216 UConverterSharedData *cnv; 3279 UConverterSharedData *cnv;
3217 StateEnum tempState; 3280 StateEnum tempState;
3218 int32_t tempBufLen; 3281 int32_t tempBufLen;
3219 int leadIsOk, trailIsOk; 3282 int leadIsOk, trailIsOk;
3220 uint8_t trailByte; 3283 uint8_t trailByte;
3221 getTrailByte: 3284 getTrailByte:
3222 trailByte = (uint8_t)*mySource; 3285 trailByte = (uint8_t)*mySource;
3223 /* 3286 /*
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
3515 /* include JIS X 0201 which is hardcoded */ 3578 /* include JIS X 0201 which is hardcoded */
3516 sa->add(sa->set, 0xa5); 3579 sa->add(sa->set, 0xa5);
3517 sa->add(sa->set, 0x203e); 3580 sa->add(sa->set, 0x203e);
3518 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 3581 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
3519 /* include Latin-1 for some variants of JP */ 3582 /* include Latin-1 for some variants of JP */
3520 sa->addRange(sa->set, 0, 0xff); 3583 sa->addRange(sa->set, 0, 0xff);
3521 } else { 3584 } else {
3522 /* include ASCII for JP */ 3585 /* include ASCII for JP */
3523 sa->addRange(sa->set, 0, 0x7f); 3586 sa->addRange(sa->set, 0, 0x7f);
3524 } 3587 }
3525 if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) { 3588 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A ND_FALLBACK_SET) {
3526 /* 3589 /*
3527 * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks , 3590 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!= 0
3528 * we need to include half-width Katakana for all JP variants becaus e 3591 * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8)
3529 * JIS X 0208 has hardcoded fallbacks for them. 3592 * use half-width Katakana.
3593 * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode)
3594 * half-width Katakana via the ESC ( I sequence.
3595 * However, we only emit (fromUnicode) half-width Katakana according to the
3596 * definition of each variant.
3597 *
3598 * When including fallbacks,
3599 * we need to include half-width Katakana Unicode code points for al l JP variants because
3600 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi dth Katakana).
3530 */ 3601 */
3531 /* include half-width Katakana for JP */ 3602 /* include half-width Katakana for JP */
3532 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 3603 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
3533 } 3604 }
3534 break; 3605 break;
3535 case 'c': 3606 case 'c':
3536 case 'z': 3607 case 'z':
3537 /* include ASCII for CN */ 3608 /* include ASCII for CN */
3538 sa->addRange(sa->set, 0, 0x7f); 3609 sa->addRange(sa->set, 0, 0x7f);
3539 break; 3610 break;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
3573 * CN version 1 (-EXT) does map them all. 3644 * CN version 1 (-EXT) does map them all.
3574 * The two versions create different Unicode sets. 3645 * The two versions create different Unicode sets.
3575 */ 3646 */
3576 filter=UCNV_SET_FILTER_2022_CN; 3647 filter=UCNV_SET_FILTER_2022_CN;
3577 } else if(cnvData->locale[0]=='j' && i==JISX208) { 3648 } else if(cnvData->locale[0]=='j' && i==JISX208) {
3578 /* 3649 /*
3579 * Only add code points that map to Shift-JIS codes 3650 * Only add code points that map to Shift-JIS codes
3580 * corresponding to JIS X 0208. 3651 * corresponding to JIS X 0208.
3581 */ 3652 */
3582 filter=UCNV_SET_FILTER_SJIS; 3653 filter=UCNV_SET_FILTER_SJIS;
3654 } else if(i==KSC5601) {
3655 /*
3656 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
3657 * are broader than GR94.
3658 */
3659 filter=UCNV_SET_FILTER_GR94DBCS;
3583 } else { 3660 } else {
3584 filter=UCNV_SET_FILTER_NONE; 3661 filter=UCNV_SET_FILTER_NONE;
3585 } 3662 }
3586 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode); 3663 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i ], sa, which, filter, pErrorCode);
3587 } 3664 }
3588 } 3665 }
3589 3666
3590 /* 3667 /*
3591 * ISO 2022 converters must not convert SO/SI/ESC despite what 3668 * ISO 2022 converters must not convert SO/SI/ESC despite what
3592 * sub-converters do by themselves. 3669 * sub-converters do by themselves.
3593 * Remove these characters from the set. 3670 * Remove these characters from the set.
3594 */ 3671 */
3595 sa->remove(sa->set, 0x0e); 3672 sa->remove(sa->set, 0x0e);
3596 sa->remove(sa->set, 0x0f); 3673 sa->remove(sa->set, 0x0f);
3597 sa->remove(sa->set, 0x1b); 3674 sa->remove(sa->set, 0x1b);
3675
3676 /* ISO 2022 converters do not convert C1 controls either */
3677 sa->removeRange(sa->set, 0x80, 0x9f);
3598 } 3678 }
3599 3679
3600 static const UConverterImpl _ISO2022Impl={ 3680 static const UConverterImpl _ISO2022Impl={
3601 UCNV_ISO_2022, 3681 UCNV_ISO_2022,
3602 3682
3603 NULL, 3683 NULL,
3604 NULL, 3684 NULL,
3605 3685
3606 _ISO2022Open, 3686 _ISO2022Open,
3607 _ISO2022Close, 3687 _ISO2022Close,
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after
3800 NULL, 3880 NULL,
3801 &_ISO2022CNStaticData, 3881 &_ISO2022CNStaticData,
3802 FALSE, 3882 FALSE,
3803 &_ISO2022CNImpl, 3883 &_ISO2022CNImpl,
3804 0 3884 0
3805 }; 3885 };
3806 3886
3807 3887
3808 3888
3809 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 3889 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
OLDNEW
« no previous file with comments | « third_party/icu38/source/common/ucnv.c ('k') | third_party/icu38/source/common/ucnv_bld.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698