Index: source/common/ucnv2022.cpp |
diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp |
index ea165eca5c783b4c64e6f773bc7489e3cac988d2..0c12dd0d7ba3dc2c144ac19e31b1bd4dc90c0c89 100644 |
--- a/source/common/ucnv2022.cpp |
+++ b/source/common/ucnv2022.cpp |
@@ -1,6 +1,6 @@ |
/* |
********************************************************************** |
-* Copyright (C) 2000-2014, International Business Machines |
+* Copyright (C) 2000-2015, International Business Machines |
* Corporation and others. All Rights Reserved. |
********************************************************************** |
* file name: ucnv2022.cpp |
@@ -75,8 +75,10 @@ |
*/ |
#endif |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
static const char SHIFT_IN_STR[] = "\x0F"; |
// static const char SHIFT_OUT_STR[] = "\x0E"; |
+#endif |
#define CR 0x0D |
#define LF 0x0A |
@@ -152,7 +154,7 @@ typedef enum { |
} StateEnum; |
/* is the StateEnum charset value for a DBCS charset? */ |
-#if UCONFIG_NO_NON_HTML5_CONVERSION |
+#if UCONFIG_ONLY_HTML_CONVERSION |
#define IS_JP_DBCS(cs) (JISX208==(cs)) |
#else |
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
@@ -169,18 +171,14 @@ typedef enum { |
* all versions, not just JIS7 and JIS8. |
* - ICU does not distinguish between different versions of JIS X 0208. |
*/ |
-#if UCONFIG_NO_NON_HTML5_CONVERSION |
+#if UCONFIG_ONLY_HTML_CONVERSION |
enum { MAX_JA_VERSION=0 }; |
#else |
enum { MAX_JA_VERSION=4 }; |
#endif |
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
-/* |
- * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
- * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
- */ |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
@@ -377,10 +375,8 @@ typedef enum{ |
#ifdef U_ENABLE_GENERIC_ISO_2022 |
ISO_2022=0, |
#endif |
-#if UCONFIG_NO_NON_HTML5_CONVERSION |
- ISO_2022_JP=1 |
-#else |
ISO_2022_JP=1, |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
ISO_2022_KR=2, |
ISO_2022_CN=3 |
#endif |
@@ -414,8 +410,11 @@ namespace { |
/*const UConverterSharedData _ISO2022Data;*/ |
extern const UConverterSharedData _ISO2022JPData; |
+ |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
extern const UConverterSharedData _ISO2022KRData; |
extern const UConverterSharedData _ISO2022CNData; |
+#endif |
} // namespace |
@@ -494,21 +493,20 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && |
(myLocale[2]=='_' || myLocale[2]=='\0')) |
{ |
- size_t len=0; |
/* open the required converters and cache them */ |
if(version>MAX_JA_VERSION) { |
- /* prevent indexing beyond jpCharsetMasks[] */ |
- myConverterData->version = version = 0; |
+ // ICU 55 fails to open a converter for an unsupported version. |
+ // Previously, it fell back to version 0, but that would yield |
+ // unexpected behavior. |
+ *errorCode = U_MISSING_RESOURCE_ERROR; |
+ return; |
} |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
myConverterData->myConverterArray[ISO8859_7] = |
ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); |
} |
-#endif |
myConverterData->myConverterArray[JISX208] = |
ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[version]&CSM(JISX212)) { |
myConverterData->myConverterArray[JISX212] = |
ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); |
@@ -521,21 +519,27 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
myConverterData->myConverterArray[KSC5601] = |
ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); |
} |
-#endif |
/* set the function pointers to appropriate funtions */ |
cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
uprv_strcpy(myConverterData->locale,"ja"); |
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="); |
- len = uprv_strlen(myConverterData->name); |
+ size_t len = uprv_strlen(myConverterData->name); |
myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); |
myConverterData->name[len+1]='\0'; |
} |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
(myLocale[2]=='_' || myLocale[2]=='\0')) |
{ |
+ if(version>1) { |
+ // ICU 55 fails to open a converter for an unsupported version. |
+ // Previously, it fell back to version 0, but that would yield |
+ // unexpected behavior. |
+ *errorCode = U_MISSING_RESOURCE_ERROR; |
+ return; |
+ } |
const char *cnvName; |
if(version==1) { |
cnvName="icu-internal-25546"; |
@@ -575,6 +579,13 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&& |
(myLocale[2]=='_' || myLocale[2]=='\0')) |
{ |
+ if(version>2) { |
+ // ICU 55 fails to open a converter for an unsupported version. |
+ // Previously, it fell back to version 0, but that would yield |
+ // unexpected behavior. |
+ *errorCode = U_MISSING_RESOURCE_ERROR; |
+ return; |
+ } |
/* open the required converters and cache them */ |
myConverterData->myConverterArray[GB2312_1] = |
@@ -602,7 +613,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); |
} |
} |
-#endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
+#endif // !UCONFIG_ONLY_HTML_CONVERSION |
else{ |
#ifdef U_ENABLE_GENERIC_ISO_2022 |
myConverterData->isFirstBuffer = TRUE; |
@@ -617,7 +628,9 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
/* initialize the state variables */ |
uprv_strcpy(myConverterData->name,"ISO_2022"); |
#else |
- *errorCode = U_UNSUPPORTED_ERROR; |
+ *errorCode = U_MISSING_RESOURCE_ERROR; |
+ // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard |
+ // data loading error code. |
return; |
#endif |
} |
@@ -737,7 +750,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { |
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
}; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
/*************** to unicode *******************/ |
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
/* 0 1 2 3 4 5 6 7 8 9 */ |
@@ -903,7 +916,6 @@ DONE: |
} |
break; |
/* case SS3_STATE: not used in ISO-2022-JP-x */ |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
case ISO8859_7: |
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
@@ -913,7 +925,6 @@ DONE: |
myData2022->toU2022State.cs[2]=(int8_t)tempState; |
} |
break; |
-#endif |
default: |
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
@@ -925,7 +936,7 @@ DONE: |
} |
} |
break; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
case ISO_2022_CN: |
{ |
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
@@ -987,7 +998,7 @@ DONE: |
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
} |
break; |
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
+#endif // !UCONFIG_ONLY_HTML_CONVERSION |
default: |
*err = U_ILLEGAL_ESCAPE_SEQUENCE; |
@@ -1030,6 +1041,7 @@ DONE: |
} |
} |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
/*Checks the characters of the buffer against valid 2022 escape sequences |
*if the match we return a pointer to the initial start of the sequence otherwise |
*we return sourceLimit |
@@ -1084,7 +1096,7 @@ getEndOfBuffer_2022(const char** source, |
return mySource; |
#endif |
} |
- |
+#endif |
/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c |
* any future change in _MBCSFromUChar32() function should be reflected here. |
@@ -1408,16 +1420,12 @@ toUnicodeCallback(UConverter *cnv, |
static const StateEnum jpCharsetPref[]={ |
ASCII, |
JISX201, |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
ISO8859_1, |
- ISO8859_7, |
-#endif |
JISX208, |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+ ISO8859_7, |
JISX212, |
GB2312, |
KSC5601, |
-#endif |
HWKANA_7BIT |
}; |
@@ -1787,7 +1795,6 @@ getTrail: |
g = 0; |
} |
break; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
targetValue = (uint32_t)sourceChar - 0x80; |
@@ -1796,7 +1803,6 @@ getTrail: |
g = 2; |
} |
break; |
-#endif |
case HWKANA_7BIT: |
if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { |
if(converterData->version==3) { |
@@ -1858,7 +1864,6 @@ getTrail: |
useFallback = FALSE; |
} |
break; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_7: |
/* G0 SBCS forced to 7-bit output */ |
len2 = MBCS_SINGLE_FROM_UCHAR32( |
@@ -1873,7 +1878,6 @@ getTrail: |
useFallback = FALSE; |
} |
break; |
-#endif |
default: |
/* G0 DBCS */ |
len2 = MBCS_FROM_UCHAR32_ISO2022( |
@@ -1881,7 +1885,6 @@ getTrail: |
sourceChar, &value, |
useFallback, MBCS_OUTPUT_2); |
if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(cs0 == KSC5601) { |
/* |
* Check for valid bytes for the encoding scheme. |
@@ -1893,7 +1896,6 @@ getTrail: |
break; |
} |
} |
-#endif |
targetValue = value; |
len = len2; |
cs = cs0; |
@@ -2187,7 +2189,6 @@ escape: |
targetUniChar = mySourceChar; |
} |
break; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
if(mySourceChar <= 0x7f) { |
targetUniChar = mySourceChar + 0x80; |
@@ -2206,7 +2207,6 @@ escape: |
/* return from a single-shift state to the previous one */ |
pToU2022State->g=pToU2022State->prevG; |
break; |
-#endif |
case JISX201: |
if(mySourceChar <= 0x7f) { |
targetUniChar = jisx201ToU(mySourceChar); |
@@ -2246,11 +2246,9 @@ getTrailByte: |
} else { |
/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ |
mySourceChar = tmpSourceChar; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if (cs == KSC5601) { |
tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ |
} |
-#endif |
tempBuf[0] = (char)(tmpSourceChar >> 8); |
tempBuf[1] = (char)(tmpSourceChar); |
} |
@@ -2312,7 +2310,7 @@ endloop: |
} |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
/*************************************************************** |
* Rules for ISO-2022-KR encoding |
* i) The KSC5601 designator sequence should appear only once in a file, |
@@ -3456,7 +3454,7 @@ endloop: |
args->target = myTarget; |
args->source = mySource; |
} |
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
static void |
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { |
@@ -3658,7 +3656,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
/* include JIS X 0201 which is hardcoded */ |
sa->add(sa->set, 0xa5); |
sa->add(sa->set, 0x203e); |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
/* include Latin-1 for some variants of JP */ |
sa->addRange(sa->set, 0, 0xff); |
@@ -3666,10 +3663,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
/* include ASCII for JP */ |
sa->addRange(sa->set, 0, 0x7f); |
} |
-#else |
- /* include ASCII for JP */ |
- sa->addRange(sa->set, 0, 0x7f); |
-#endif |
if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { |
/* |
* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 |
@@ -3688,7 +3681,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
} |
break; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
case 'c': |
case 'z': |
/* include ASCII for CN */ |
@@ -3727,7 +3720,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
* corresponding to JIS X 0208. |
*/ |
filter=UCNV_SET_FILTER_SJIS; |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
} else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
cnvData->version==0 && i==CNS_11643) { |
/* |
@@ -3866,7 +3859,7 @@ const UConverterSharedData _ISO2022JPData= |
} // namespace |
-#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+#if !UCONFIG_ONLY_HTML_CONVERSION |
/************* KR ***************/ |
static const UConverterImpl _ISO2022KRImpl={ |
UCNV_ISO_2022, |
@@ -3967,6 +3960,6 @@ const UConverterSharedData _ISO2022CNData= |
UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl); |
} // namespace |
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |