| OLD | NEW |
| 1 /* | 1 /* |
| 2 ********************************************************************** | 2 ********************************************************************** |
| 3 * Copyright (C) 2000-2007, International Business Machines | 3 * Copyright (C) 2000-2007, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ********************************************************************** | 5 ********************************************************************** |
| 6 * file name: ucnv2022.c | 6 * file name: ucnv2022.c |
| 7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
| 8 * tab size: 8 (not used) | 8 * tab size: 8 (not used) |
| 9 * indentation:4 | 9 * indentation:4 |
| 10 * | 10 * |
| (...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 194 typedef struct{ | 194 typedef struct{ |
| 195 UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; | 195 UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; |
| 196 UConverter *currentConverter; | 196 UConverter *currentConverter; |
| 197 Cnv2022Type currentType; | 197 Cnv2022Type currentType; |
| 198 ISO2022State toU2022State, fromU2022State; | 198 ISO2022State toU2022State, fromU2022State; |
| 199 uint32_t key; | 199 uint32_t key; |
| 200 uint32_t version; | 200 uint32_t version; |
| 201 #ifdef U_ENABLE_GENERIC_ISO_2022 | 201 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 202 UBool isFirstBuffer; | 202 UBool isFirstBuffer; |
| 203 #endif | 203 #endif |
| 204 UBool isEmptySegment; |
| 204 char name[30]; | 205 char name[30]; |
| 205 char locale[3]; | 206 char locale[3]; |
| 206 }UConverterDataISO2022; | 207 }UConverterDataISO2022; |
| 207 | 208 |
| 208 /* Protos */ | 209 /* Protos */ |
| 209 /* ISO-2022 ----------------------------------------------------------------- */ | 210 /* ISO-2022 ----------------------------------------------------------------- */ |
| 210 | 211 |
| 211 /*Forward declaration */ | 212 /*Forward declaration */ |
| 212 U_CFUNC void | 213 U_CFUNC void |
| 213 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, | 214 ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, |
| (...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 602 } | 603 } |
| 603 } | 604 } |
| 604 } | 605 } |
| 605 | 606 |
| 606 static void | 607 static void |
| 607 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { | 608 _ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { |
| 608 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter-
>extraInfo); | 609 UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter-
>extraInfo); |
| 609 if(choice<=UCNV_RESET_TO_UNICODE) { | 610 if(choice<=UCNV_RESET_TO_UNICODE) { |
| 610 uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); | 611 uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); |
| 611 myConverterData->key = 0; | 612 myConverterData->key = 0; |
| 613 myConverterData->isEmptySegment = FALSE; |
| 612 } | 614 } |
| 613 if(choice!=UCNV_RESET_TO_UNICODE) { | 615 if(choice!=UCNV_RESET_TO_UNICODE) { |
| 614 uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); | 616 uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); |
| 615 } | 617 } |
| 616 #ifdef U_ENABLE_GENERIC_ISO_2022 | 618 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 617 if(myConverterData->locale[0] == 0){ | 619 if(myConverterData->locale[0] == 0){ |
| 618 if(choice<=UCNV_RESET_TO_UNICODE) { | 620 if(choice<=UCNV_RESET_TO_UNICODE) { |
| 619 myConverterData->isFirstBuffer = TRUE; | 621 myConverterData->isFirstBuffer = TRUE; |
| 620 myConverterData->key = 0; | 622 myConverterData->key = 0; |
| 621 if (converter->mode == UCNV_SO){ | 623 if (converter->mode == UCNV_SO){ |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 807 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 809 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 808 } else /* value == VALID_TERMINAL_2022 */ { | 810 } else /* value == VALID_TERMINAL_2022 */ { |
| 809 switch(var){ | 811 switch(var){ |
| 810 #ifdef U_ENABLE_GENERIC_ISO_2022 | 812 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 811 case ISO_2022: | 813 case ISO_2022: |
| 812 { | 814 { |
| 813 const char *chosenConverterName = escSeqStateTable_Result_2022[offse
t]; | 815 const char *chosenConverterName = escSeqStateTable_Result_2022[offse
t]; |
| 814 if(chosenConverterName == NULL) { | 816 if(chosenConverterName == NULL) { |
| 815 /* SS2 or SS3 */ | 817 /* SS2 or SS3 */ |
| 816 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 818 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| 819 _this->toUCallbackReason = UCNV_UNASSIGNED; |
| 817 return; | 820 return; |
| 818 } | 821 } |
| 819 | 822 |
| 820 _this->mode = UCNV_SI; | 823 _this->mode = UCNV_SI; |
| 821 ucnv_close(myData2022->currentConverter); | 824 ucnv_close(myData2022->currentConverter); |
| 822 myData2022->currentConverter = myUConverter = ucnv_open(chosenConver
terName, err); | 825 myData2022->currentConverter = myUConverter = ucnv_open(chosenConver
terName, err); |
| 823 if(U_SUCCESS(*err)) { | 826 if(U_SUCCESS(*err)) { |
| 824 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; | 827 myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; |
| 825 _this->mode = UCNV_SO; | 828 _this->mode = UCNV_SO; |
| 826 } | 829 } |
| (...skipping 130 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 957 } else { | 960 } else { |
| 958 /* Back out bytes from the previous buffer: Need to replay them.
*/ | 961 /* Back out bytes from the previous buffer: Need to replay them.
*/ |
| 959 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance
); | 962 _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance
); |
| 960 /* same as -(initialToULength-1) */ | 963 /* same as -(initialToULength-1) */ |
| 961 /* preToULength is negative! */ | 964 /* preToULength is negative! */ |
| 962 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng
th); | 965 uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULeng
th); |
| 963 *source-=bytesFromThisBuffer; | 966 *source-=bytesFromThisBuffer; |
| 964 } | 967 } |
| 965 _this->toULength=1; | 968 _this->toULength=1; |
| 966 } | 969 } |
| 970 } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { |
| 971 _this->toUCallbackReason = UCNV_UNASSIGNED; |
| 967 } | 972 } |
| 968 } | 973 } |
| 969 | 974 |
| 970 /*Checks the characters of the buffer against valid 2022 escape sequences | 975 /*Checks the characters of the buffer against valid 2022 escape sequences |
| 971 *if the match we return a pointer to the initial start of the sequence otherwise | 976 *if the match we return a pointer to the initial start of the sequence otherwise |
| 972 *we return sourceLimit | 977 *we return sourceLimit |
| 973 */ | 978 */ |
| 974 /*for 2022 looks ahead in the stream | 979 /*for 2022 looks ahead in the stream |
| 975 *to determine the longest possible convertible | 980 *to determine the longest possible convertible |
| 976 *data stream | 981 *data stream |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1119 if(value>=0xf00) { | 1124 if(value>=0xf00) { |
| 1120 return 1; /* roundtrip */ | 1125 return 1; /* roundtrip */ |
| 1121 } else if(useFallback ? value>=0x800 : value>=0xc00) { | 1126 } else if(useFallback ? value>=0x800 : value>=0xc00) { |
| 1122 return -1; /* fallback taken */ | 1127 return -1; /* fallback taken */ |
| 1123 } else { | 1128 } else { |
| 1124 return 0; /* no mapping */ | 1129 return 0; /* no mapping */ |
| 1125 } | 1130 } |
| 1126 } | 1131 } |
| 1127 | 1132 |
| 1128 /* | 1133 /* |
| 1129 * * Check that the result is a 2-byte value with each byte in the range A1..FE | 1134 * Check that the result is a 2-byte value with each byte in the range A1..FE |
| 1130 * * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte | 1135 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte |
| 1131 * * to move it to the ISO 2022 range 21..7E. | 1136 * to move it to the ISO 2022 range 21..7E. |
| 1132 * * Return 0 if out of range. | 1137 * Return 0 if out of range. |
| 1133 * */ | 1138 */ |
| 1134 static U_INLINE uint32_t | 1139 static U_INLINE uint32_t |
| 1135 _2022FromGR94DBCS(uint32_t value) { | 1140 _2022FromGR94DBCS(uint32_t value) { |
| 1136 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && | 1141 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && |
| 1137 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) | 1142 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) |
| 1138 ) { | 1143 ) { |
| 1139 return value - 0x8080; /* shift down to 21..7e byte range */ | 1144 return value - 0x8080; /* shift down to 21..7e byte range */ |
| 1140 } else { | 1145 } else { |
| 1141 return 0; /* not valid for ISO 2022 */ | 1146 return 0; /* not valid for ISO 2022 */ |
| 1142 } | 1147 } |
| 1143 } | 1148 } |
| 1144 | 1149 |
| 1145 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after
that. */ | 1150 #if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after
that. */ |
| 1146 /* | 1151 /* |
| 1147 * Check that the result is a 2-byte value with each byte in the range A1..FE | 1152 * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code poin
t, it returns the |
| 1148 * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte | 1153 * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns
the 2022 code point |
| 1149 * to move it to the ISO 2022 range 21..7E. | 1154 * unchanged. |
| 1150 * Return 0 if out of range. | |
| 1151 */ | 1155 */ |
| 1152 static U_INLINE uint32_t | 1156 static U_INLINE uint32_t |
| 1153 _2022FromGR94DBCS(uint32_t value) { | 1157 _2022ToGR94DBCS(uint32_t value) { |
| 1154 if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && | 1158 uint32_t returnValue = value + 0x8080; |
| 1155 (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) | 1159 if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) && |
| 1156 ) { | 1160 (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) { |
| 1157 return value - 0x8080; /* shift down to 21..7e byte range */ | 1161 return returnValue; |
| 1158 } else { | 1162 } else { |
| 1159 return 0; /* not valid for ISO 2022 */ | 1163 return value; |
| 1160 } | 1164 } |
| 1161 } | 1165 } |
| 1162 #endif | 1166 #endif |
| 1163 | 1167 |
| 1164 #ifdef U_ENABLE_GENERIC_ISO_2022 | 1168 #ifdef U_ENABLE_GENERIC_ISO_2022 |
| 1165 | 1169 |
| 1166 /*******************************************************************************
*** | 1170 /*******************************************************************************
*** |
| 1167 * ISO-2022 Converter | 1171 * ISO-2022 Converter |
| 1168 * | 1172 * |
| 1169 * | 1173 * |
| (...skipping 859 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2029 | 2033 |
| 2030 mySourceChar= (unsigned char) *mySource++; | 2034 mySourceChar= (unsigned char) *mySource++; |
| 2031 | 2035 |
| 2032 switch(mySourceChar) { | 2036 switch(mySourceChar) { |
| 2033 case UCNV_SI: | 2037 case UCNV_SI: |
| 2034 if(myData->version==3) { | 2038 if(myData->version==3) { |
| 2035 pToU2022State->g=0; | 2039 pToU2022State->g=0; |
| 2036 continue; | 2040 continue; |
| 2037 } else { | 2041 } else { |
| 2038 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ | 2042 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ |
| 2043 myData->isEmptySegment = FALSE; /* reset this, we have a
different error */ |
| 2039 break; | 2044 break; |
| 2040 } | 2045 } |
| 2041 | 2046 |
| 2042 case UCNV_SO: | 2047 case UCNV_SO: |
| 2043 if(myData->version==3) { | 2048 if(myData->version==3) { |
| 2044 /* JIS7: switch to G1 half-width Katakana */ | 2049 /* JIS7: switch to G1 half-width Katakana */ |
| 2045 pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; | 2050 pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; |
| 2046 pToU2022State->g=1; | 2051 pToU2022State->g=1; |
| 2047 continue; | 2052 continue; |
| 2048 } else { | 2053 } else { |
| 2049 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ | 2054 /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ |
| 2055 myData->isEmptySegment = FALSE; /* reset this, we have a
different error */ |
| 2050 break; | 2056 break; |
| 2051 } | 2057 } |
| 2052 | 2058 |
| 2053 case ESC_2022: | 2059 case ESC_2022: |
| 2054 mySource--; | 2060 mySource--; |
| 2055 escape: | 2061 escape: |
| 2056 changeState_2022(args->converter,&(mySource), | 2062 { |
| 2057 mySourceLimit, ISO_2022_JP,err); | 2063 const char * mySourceBefore = mySource; |
| 2064 int8_t toULengthBefore = args->converter->toULength; |
| 2065 |
| 2066 changeState_2022(args->converter,&(mySource), |
| 2067 mySourceLimit, ISO_2022_JP,err); |
| 2068 |
| 2069 /* If in ISO-2022-JP only and we successully completed an es
cape sequence, but previous segment was empty, create an error */ |
| 2070 if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) &
& myData->isEmptySegment) { |
| 2071 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 2072 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 2073 args->converter->toULength = toULengthBefore + (mySource
- mySourceBefore); |
| 2074 } |
| 2075 } |
| 2058 | 2076 |
| 2059 /* invalid or illegal escape sequence */ | 2077 /* invalid or illegal escape sequence */ |
| 2060 if(U_FAILURE(*err)){ | 2078 if(U_FAILURE(*err)){ |
| 2061 args->target = myTarget; | 2079 args->target = myTarget; |
| 2062 args->source = mySource; | 2080 args->source = mySource; |
| 2081 myData->isEmptySegment = FALSE; /* Reset to avoid future
spurious errors */ |
| 2063 return; | 2082 return; |
| 2064 } | 2083 } |
| 2084 /* If we successfully completed an escape sequence, we begin a n
ew segment, empty so far */ |
| 2085 if(myData->key==0) { |
| 2086 myData->isEmptySegment = TRUE; |
| 2087 } |
| 2065 continue; | 2088 continue; |
| 2066 | 2089 |
| 2067 /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ | 2090 /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ |
| 2068 | 2091 |
| 2069 case CR: | 2092 case CR: |
| 2070 /*falls through*/ | 2093 /*falls through*/ |
| 2071 case LF: | 2094 case LF: |
| 2072 /* automatically reset to single-byte mode */ | 2095 /* automatically reset to single-byte mode */ |
| 2073 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU20
22State->cs[0] != JISX201) { | 2096 if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU20
22State->cs[0] != JISX201) { |
| 2074 pToU2022State->cs[0] = (int8_t)ASCII; | 2097 pToU2022State->cs[0] = (int8_t)ASCII; |
| 2075 } | 2098 } |
| 2076 pToU2022State->cs[2] = 0; | 2099 pToU2022State->cs[2] = 0; |
| 2077 pToU2022State->g = 0; | 2100 pToU2022State->g = 0; |
| 2078 /* falls through */ | 2101 /* falls through */ |
| 2079 default: | 2102 default: |
| 2080 /* convert one or two bytes */ | 2103 /* convert one or two bytes */ |
| 2104 myData->isEmptySegment = FALSE; |
| 2081 cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; | 2105 cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; |
| 2082 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->v
ersion==4 && | 2106 if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->v
ersion==4 && |
| 2083 !IS_JP_DBCS(cs) | 2107 !IS_JP_DBCS(cs) |
| 2084 ) { | 2108 ) { |
| 2085 /* 8-bit halfwidth katakana in any single-byte mode for JIS8
*/ | 2109 /* 8-bit halfwidth katakana in any single-byte mode for JIS8
*/ |
| 2086 targetUniChar = mySourceChar + (HWKANA_START - 0xa1); | 2110 targetUniChar = mySourceChar + (HWKANA_START - 0xa1); |
| 2087 | 2111 |
| 2088 /* return from a single-shift state to the previous one */ | 2112 /* return from a single-shift state to the previous one */ |
| 2089 if(pToU2022State->g >= 2) { | 2113 if(pToU2022State->g >= 2) { |
| 2090 pToU2022State->g=pToU2022State->prevG; | 2114 pToU2022State->g=pToU2022State->prevG; |
| (...skipping 508 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2599 } | 2623 } |
| 2600 | 2624 |
| 2601 while(mySource< mySourceLimit){ | 2625 while(mySource< mySourceLimit){ |
| 2602 | 2626 |
| 2603 if(myTarget < args->targetLimit){ | 2627 if(myTarget < args->targetLimit){ |
| 2604 | 2628 |
| 2605 mySourceChar= (unsigned char) *mySource++; | 2629 mySourceChar= (unsigned char) *mySource++; |
| 2606 | 2630 |
| 2607 if(mySourceChar==UCNV_SI){ | 2631 if(mySourceChar==UCNV_SI){ |
| 2608 myData->toU2022State.g = 0; | 2632 myData->toU2022State.g = 0; |
| 2633 if (myData->isEmptySegment) { |
| 2634 myData->isEmptySegment = FALSE; /* we are handling it, r
eset to avoid future spurious errors */ |
| 2635 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 2636 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 2637 args->converter->toUBytes[0] = (uint8_t)mySourceChar; |
| 2638 args->converter->toULength = 1; |
| 2639 args->target = myTarget; |
| 2640 args->source = mySource; |
| 2641 return; |
| 2642 } |
| 2609 /*consume the source */ | 2643 /*consume the source */ |
| 2610 continue; | 2644 continue; |
| 2611 }else if(mySourceChar==UCNV_SO){ | 2645 }else if(mySourceChar==UCNV_SO){ |
| 2612 myData->toU2022State.g = 1; | 2646 myData->toU2022State.g = 1; |
| 2647 myData->isEmptySegment = TRUE; /* Begin a new segment, empty so
far */ |
| 2613 /*consume the source */ | 2648 /*consume the source */ |
| 2614 continue; | 2649 continue; |
| 2615 }else if(mySourceChar==ESC_2022){ | 2650 }else if(mySourceChar==ESC_2022){ |
| 2616 mySource--; | 2651 mySource--; |
| 2617 escape: | 2652 escape: |
| 2653 myData->isEmptySegment = FALSE; /* Any invalid ESC sequences wil
l be detected separately, so just reset this */ |
| 2618 changeState_2022(args->converter,&(mySource), | 2654 changeState_2022(args->converter,&(mySource), |
| 2619 mySourceLimit, ISO_2022_KR, err); | 2655 mySourceLimit, ISO_2022_KR, err); |
| 2620 if(U_FAILURE(*err)){ | 2656 if(U_FAILURE(*err)){ |
| 2621 args->target = myTarget; | 2657 args->target = myTarget; |
| 2622 args->source = mySource; | 2658 args->source = mySource; |
| 2623 return; | 2659 return; |
| 2624 } | 2660 } |
| 2625 continue; | 2661 continue; |
| 2626 } | 2662 } |
| 2627 | 2663 |
| 2664 myData->isEmptySegment = FALSE; /* Any invalid char errors will
be detected separately, so just reset this */ |
| 2628 if(myData->toU2022State.g == 1) { | 2665 if(myData->toU2022State.g == 1) { |
| 2629 if(mySource < mySourceLimit) { | 2666 if(mySource < mySourceLimit) { |
| 2630 int leadIsOk, trailIsOk; | 2667 int leadIsOk, trailIsOk; |
| 2631 uint8_t trailByte; | 2668 uint8_t trailByte; |
| 2632 getTrailByte: | 2669 getTrailByte: |
| 2633 targetUniChar = missingCharMarker; | 2670 targetUniChar = missingCharMarker; |
| 2634 trailByte = (uint8_t)*mySource; | 2671 trailByte = (uint8_t)*mySource; |
| 2635 /* | 2672 /* |
| 2636 * Ticket 5691: consistent illegal sequences: | 2673 * Ticket 5691: consistent illegal sequences: |
| 2637 * - We include at least the first byte in the illegal seque
nce. | 2674 * - We include at least the first byte in the illegal seque
nce. |
| (...skipping 532 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3170 | 3207 |
| 3171 targetUniChar =missingCharMarker; | 3208 targetUniChar =missingCharMarker; |
| 3172 | 3209 |
| 3173 if(myTarget < args->targetLimit){ | 3210 if(myTarget < args->targetLimit){ |
| 3174 | 3211 |
| 3175 mySourceChar= (unsigned char) *mySource++; | 3212 mySourceChar= (unsigned char) *mySource++; |
| 3176 | 3213 |
| 3177 switch(mySourceChar){ | 3214 switch(mySourceChar){ |
| 3178 case UCNV_SI: | 3215 case UCNV_SI: |
| 3179 pToU2022State->g=0; | 3216 pToU2022State->g=0; |
| 3217 if (myData->isEmptySegment) { |
| 3218 myData->isEmptySegment = FALSE; /* we are handling it, r
eset to avoid future spurious errors */ |
| 3219 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 3220 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 3221 args->converter->toUBytes[0] = mySourceChar; |
| 3222 args->converter->toULength = 1; |
| 3223 args->target = myTarget; |
| 3224 args->source = mySource; |
| 3225 return; |
| 3226 } |
| 3180 continue; | 3227 continue; |
| 3181 | 3228 |
| 3182 case UCNV_SO: | 3229 case UCNV_SO: |
| 3183 if(pToU2022State->cs[1] != 0) { | 3230 if(pToU2022State->cs[1] != 0) { |
| 3184 pToU2022State->g=1; | 3231 pToU2022State->g=1; |
| 3232 myData->isEmptySegment = TRUE; /* Begin a new segment,
empty so far */ |
| 3185 continue; | 3233 continue; |
| 3186 } else { | 3234 } else { |
| 3187 /* illegal to have SO before a matching designator */ | 3235 /* illegal to have SO before a matching designator */ |
| 3236 myData->isEmptySegment = FALSE; /* Handling a different
error, reset this to avoid future spurious errs */ |
| 3188 break; | 3237 break; |
| 3189 } | 3238 } |
| 3190 | 3239 |
| 3191 case ESC_2022: | 3240 case ESC_2022: |
| 3192 mySource--; | 3241 mySource--; |
| 3193 escape: | 3242 escape: |
| 3194 changeState_2022(args->converter,&(mySource), | 3243 { |
| 3195 mySourceLimit, ISO_2022_CN,err); | 3244 const char * mySourceBefore = mySource; |
| 3245 int8_t toULengthBefore = args->converter->toULength; |
| 3246 |
| 3247 changeState_2022(args->converter,&(mySource), |
| 3248 mySourceLimit, ISO_2022_CN,err); |
| 3249 |
| 3250 /* After SO there must be at least one character before a de
signator (designator error handled separately) */ |
| 3251 if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegme
nt) { |
| 3252 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 3253 args->converter->toUCallbackReason = UCNV_IRREGULAR; |
| 3254 args->converter->toULength = toULengthBefore + (mySource
- mySourceBefore); |
| 3255 } |
| 3256 } |
| 3196 | 3257 |
| 3197 /* invalid or illegal escape sequence */ | 3258 /* invalid or illegal escape sequence */ |
| 3198 if(U_FAILURE(*err)){ | 3259 if(U_FAILURE(*err)){ |
| 3199 args->target = myTarget; | 3260 args->target = myTarget; |
| 3200 args->source = mySource; | 3261 args->source = mySource; |
| 3262 myData->isEmptySegment = FALSE; /* Reset to avoid future
spurious errors */ |
| 3201 return; | 3263 return; |
| 3202 } | 3264 } |
| 3203 continue; | 3265 continue; |
| 3204 | 3266 |
| 3205 /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ | 3267 /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ |
| 3206 | 3268 |
| 3207 case CR: | 3269 case CR: |
| 3208 /*falls through*/ | 3270 /*falls through*/ |
| 3209 case LF: | 3271 case LF: |
| 3210 uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); | 3272 uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); |
| 3211 /* falls through */ | 3273 /* falls through */ |
| 3212 default: | 3274 default: |
| 3213 /* convert one or two bytes */ | 3275 /* convert one or two bytes */ |
| 3276 myData->isEmptySegment = FALSE; |
| 3214 if(pToU2022State->g != 0) { | 3277 if(pToU2022State->g != 0) { |
| 3215 if(mySource < mySourceLimit) { | 3278 if(mySource < mySourceLimit) { |
| 3216 UConverterSharedData *cnv; | 3279 UConverterSharedData *cnv; |
| 3217 StateEnum tempState; | 3280 StateEnum tempState; |
| 3218 int32_t tempBufLen; | 3281 int32_t tempBufLen; |
| 3219 int leadIsOk, trailIsOk; | 3282 int leadIsOk, trailIsOk; |
| 3220 uint8_t trailByte; | 3283 uint8_t trailByte; |
| 3221 getTrailByte: | 3284 getTrailByte: |
| 3222 trailByte = (uint8_t)*mySource; | 3285 trailByte = (uint8_t)*mySource; |
| 3223 /* | 3286 /* |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3515 /* include JIS X 0201 which is hardcoded */ | 3578 /* include JIS X 0201 which is hardcoded */ |
| 3516 sa->add(sa->set, 0xa5); | 3579 sa->add(sa->set, 0xa5); |
| 3517 sa->add(sa->set, 0x203e); | 3580 sa->add(sa->set, 0x203e); |
| 3518 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 3581 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
| 3519 /* include Latin-1 for some variants of JP */ | 3582 /* include Latin-1 for some variants of JP */ |
| 3520 sa->addRange(sa->set, 0, 0xff); | 3583 sa->addRange(sa->set, 0, 0xff); |
| 3521 } else { | 3584 } else { |
| 3522 /* include ASCII for JP */ | 3585 /* include ASCII for JP */ |
| 3523 sa->addRange(sa->set, 0, 0x7f); | 3586 sa->addRange(sa->set, 0, 0x7f); |
| 3524 } | 3587 } |
| 3525 if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) { | 3588 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_A
ND_FALLBACK_SET) { |
| 3526 /* | 3589 /* |
| 3527 * TODO(markus): If and when ucnv_getUnicodeSet() supports fallbacks
, | 3590 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=
0 |
| 3528 * we need to include half-width Katakana for all JP variants becaus
e | 3591 * because the bit is on for all JP versions although only versions
3 & 4 (JIS7 & JIS8) |
| 3529 * JIS X 0208 has hardcoded fallbacks for them. | 3592 * use half-width Katakana. |
| 3593 * This is because all ISO-2022-JP variants are lenient in that they
accept (in toUnicode) |
| 3594 * half-width Katakana via the ESC ( I sequence. |
| 3595 * However, we only emit (fromUnicode) half-width Katakana according
to the |
| 3596 * definition of each variant. |
| 3597 * |
| 3598 * When including fallbacks, |
| 3599 * we need to include half-width Katakana Unicode code points for al
l JP variants because |
| 3600 * JIS X 0208 has hardcoded fallbacks for them (which map to full-wi
dth Katakana). |
| 3530 */ | 3601 */ |
| 3531 /* include half-width Katakana for JP */ | 3602 /* include half-width Katakana for JP */ |
| 3532 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 3603 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
| 3533 } | 3604 } |
| 3534 break; | 3605 break; |
| 3535 case 'c': | 3606 case 'c': |
| 3536 case 'z': | 3607 case 'z': |
| 3537 /* include ASCII for CN */ | 3608 /* include ASCII for CN */ |
| 3538 sa->addRange(sa->set, 0, 0x7f); | 3609 sa->addRange(sa->set, 0, 0x7f); |
| 3539 break; | 3610 break; |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3573 * CN version 1 (-EXT) does map them all. | 3644 * CN version 1 (-EXT) does map them all. |
| 3574 * The two versions create different Unicode sets. | 3645 * The two versions create different Unicode sets. |
| 3575 */ | 3646 */ |
| 3576 filter=UCNV_SET_FILTER_2022_CN; | 3647 filter=UCNV_SET_FILTER_2022_CN; |
| 3577 } else if(cnvData->locale[0]=='j' && i==JISX208) { | 3648 } else if(cnvData->locale[0]=='j' && i==JISX208) { |
| 3578 /* | 3649 /* |
| 3579 * Only add code points that map to Shift-JIS codes | 3650 * Only add code points that map to Shift-JIS codes |
| 3580 * corresponding to JIS X 0208. | 3651 * corresponding to JIS X 0208. |
| 3581 */ | 3652 */ |
| 3582 filter=UCNV_SET_FILTER_SJIS; | 3653 filter=UCNV_SET_FILTER_SJIS; |
| 3654 } else if(i==KSC5601) { |
| 3655 /* |
| 3656 * Some of the KSC 5601 tables (convrtrs.txt has this aliases on
multiple tables) |
| 3657 * are broader than GR94. |
| 3658 */ |
| 3659 filter=UCNV_SET_FILTER_GR94DBCS; |
| 3583 } else { | 3660 } else { |
| 3584 filter=UCNV_SET_FILTER_NONE; | 3661 filter=UCNV_SET_FILTER_NONE; |
| 3585 } | 3662 } |
| 3586 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); | 3663 ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i
], sa, which, filter, pErrorCode); |
| 3587 } | 3664 } |
| 3588 } | 3665 } |
| 3589 | 3666 |
| 3590 /* | 3667 /* |
| 3591 * ISO 2022 converters must not convert SO/SI/ESC despite what | 3668 * ISO 2022 converters must not convert SO/SI/ESC despite what |
| 3592 * sub-converters do by themselves. | 3669 * sub-converters do by themselves. |
| 3593 * Remove these characters from the set. | 3670 * Remove these characters from the set. |
| 3594 */ | 3671 */ |
| 3595 sa->remove(sa->set, 0x0e); | 3672 sa->remove(sa->set, 0x0e); |
| 3596 sa->remove(sa->set, 0x0f); | 3673 sa->remove(sa->set, 0x0f); |
| 3597 sa->remove(sa->set, 0x1b); | 3674 sa->remove(sa->set, 0x1b); |
| 3675 |
| 3676 /* ISO 2022 converters do not convert C1 controls either */ |
| 3677 sa->removeRange(sa->set, 0x80, 0x9f); |
| 3598 } | 3678 } |
| 3599 | 3679 |
| 3600 static const UConverterImpl _ISO2022Impl={ | 3680 static const UConverterImpl _ISO2022Impl={ |
| 3601 UCNV_ISO_2022, | 3681 UCNV_ISO_2022, |
| 3602 | 3682 |
| 3603 NULL, | 3683 NULL, |
| 3604 NULL, | 3684 NULL, |
| 3605 | 3685 |
| 3606 _ISO2022Open, | 3686 _ISO2022Open, |
| 3607 _ISO2022Close, | 3687 _ISO2022Close, |
| (...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3800 NULL, | 3880 NULL, |
| 3801 &_ISO2022CNStaticData, | 3881 &_ISO2022CNStaticData, |
| 3802 FALSE, | 3882 FALSE, |
| 3803 &_ISO2022CNImpl, | 3883 &_ISO2022CNImpl, |
| 3804 0 | 3884 0 |
| 3805 }; | 3885 }; |
| 3806 | 3886 |
| 3807 | 3887 |
| 3808 | 3888 |
| 3809 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 3889 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| OLD | NEW |