Index: source/common/ucnv2022.cpp |
diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp |
index 9556dd2c096fc22e0e8977796730a7c47efd1e45..7b8df9bdd1833a4e50fe2491899d546b7c18c0b0 100644 |
--- a/source/common/ucnv2022.cpp |
+++ b/source/common/ucnv2022.cpp |
@@ -152,7 +152,11 @@ typedef enum { |
} StateEnum; |
/* is the StateEnum charset value for a DBCS charset? */ |
+#if UCONFIG_NO_NON_HTML5_CONVERSION |
+#define IS_JP_DBCS(cs) (JISX208==(cs)) |
+#else |
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
+#endif |
#define CSM(cs) ((uint16_t)1<<(cs)) |
@@ -165,13 +169,23 @@ typedef enum { |
* all versions, not just JIS7 and JIS8. |
* - ICU does not distinguish between different versions of JIS X 0208. |
*/ |
+#if UCONFIG_NO_NON_HTML5_CONVERSION |
+enum { MAX_JA_VERSION=0 }; |
+#else |
enum { MAX_JA_VERSION=4 }; |
+#endif |
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
+/* |
+ * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
+ * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
+ */ |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
+#endif |
}; |
typedef enum { |
@@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { |
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 |
}; |
- |
/* Type def for refactoring changeState_2022 code*/ |
typedef enum{ |
#ifdef U_ENABLE_GENERIC_ISO_2022 |
ISO_2022=0, |
#endif |
+#if UCONFIG_NO_NON_HTML5_CONVERSION |
+ ISO_2022_JP=1 |
+#else |
ISO_2022_JP=1, |
ISO_2022_KR=2, |
ISO_2022_CN=3 |
+#endif |
} Variant2022; |
/*********** ISO 2022 Converter Protos ***********/ |
@@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
/* prevent indexing beyond jpCharsetMasks[] */ |
myConverterData->version = version = 0; |
} |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
myConverterData->myConverterArray[ISO8859_7] = |
ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); |
} |
+#endif |
myConverterData->myConverterArray[JISX208] = |
ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[version]&CSM(JISX212)) { |
myConverterData->myConverterArray[JISX212] = |
ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); |
@@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
myConverterData->myConverterArray[KSC5601] = |
ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); |
} |
+#endif |
/* set the function pointers to appropriate funtions */ |
cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
@@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); |
myConverterData->name[len+1]='\0'; |
} |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
(myLocale[2]=='_' || myLocale[2]=='\0')) |
{ |
@@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ |
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); |
} |
} |
+#endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
else{ |
#ifdef U_ENABLE_GENERIC_ISO_2022 |
myConverterData->isFirstBuffer = TRUE; |
@@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { |
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
}; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
/*************** to unicode *******************/ |
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
/* 0 1 2 3 4 5 6 7 8 9 */ |
@@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
}; |
+#endif |
static UCNV_TableStates_2022 |
@@ -878,6 +903,7 @@ DONE: |
} |
break; |
/* case SS3_STATE: not used in ISO-2022-JP-x */ |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
case ISO8859_7: |
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
@@ -887,6 +913,7 @@ DONE: |
myData2022->toU2022State.cs[2]=(int8_t)tempState; |
} |
break; |
+#endif |
default: |
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
@@ -898,6 +925,7 @@ DONE: |
} |
} |
break; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO_2022_CN: |
{ |
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
@@ -959,6 +987,7 @@ DONE: |
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
} |
break; |
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
default: |
*err = U_ILLEGAL_ESCAPE_SEQUENCE; |
@@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv, |
static const StateEnum jpCharsetPref[]={ |
ASCII, |
JISX201, |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
ISO8859_1, |
ISO8859_7, |
+#endif |
JISX208, |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
JISX212, |
GB2312, |
KSC5601, |
+#endif |
HWKANA_7BIT |
}; |
@@ -1754,6 +1787,7 @@ getTrail: |
g = 0; |
} |
break; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
targetValue = (uint32_t)sourceChar - 0x80; |
@@ -1762,6 +1796,7 @@ getTrail: |
g = 2; |
} |
break; |
+#endif |
case HWKANA_7BIT: |
if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { |
if(converterData->version==3) { |
@@ -1823,6 +1858,7 @@ getTrail: |
useFallback = FALSE; |
} |
break; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_7: |
/* G0 SBCS forced to 7-bit output */ |
len2 = MBCS_SINGLE_FROM_UCHAR32( |
@@ -1837,6 +1873,7 @@ getTrail: |
useFallback = FALSE; |
} |
break; |
+#endif |
default: |
/* G0 DBCS */ |
len2 = MBCS_FROM_UCHAR32_ISO2022( |
@@ -1844,6 +1881,7 @@ getTrail: |
sourceChar, &value, |
useFallback, MBCS_OUTPUT_2); |
if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(cs0 == KSC5601) { |
/* |
* Check for valid bytes for the encoding scheme. |
@@ -1855,6 +1893,7 @@ getTrail: |
break; |
} |
} |
+#endif |
targetValue = value; |
len = len2; |
cs = cs0; |
@@ -2148,6 +2187,7 @@ escape: |
targetUniChar = mySourceChar; |
} |
break; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case ISO8859_1: |
if(mySourceChar <= 0x7f) { |
targetUniChar = mySourceChar + 0x80; |
@@ -2166,6 +2206,7 @@ escape: |
/* return from a single-shift state to the previous one */ |
pToU2022State->g=pToU2022State->prevG; |
break; |
+#endif |
case JISX201: |
if(mySourceChar <= 0x7f) { |
targetUniChar = jisx201ToU(mySourceChar); |
@@ -2205,9 +2246,11 @@ getTrailByte: |
} else { |
/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ |
mySourceChar = tmpSourceChar; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if (cs == KSC5601) { |
tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ |
} |
+#endif |
tempBuf[0] = (char)(tmpSourceChar >> 8); |
tempBuf[1] = (char)(tmpSourceChar); |
} |
@@ -2269,6 +2312,7 @@ endloop: |
} |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
/*************************************************************** |
* Rules for ISO-2022-KR encoding |
* i) The KSC5601 designator sequence should appear only once in a file, |
@@ -3412,6 +3456,7 @@ endloop: |
args->target = myTarget; |
args->source = mySource; |
} |
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
static void |
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { |
@@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
/* include JIS X 0201 which is hardcoded */ |
sa->add(sa->set, 0xa5); |
sa->add(sa->set, 0x203e); |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
/* include Latin-1 for some variants of JP */ |
sa->addRange(sa->set, 0, 0xff); |
@@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
/* include ASCII for JP */ |
sa->addRange(sa->set, 0, 0x7f); |
} |
+#else |
+ /* include ASCII for JP */ |
+ sa->addRange(sa->set, 0, 0x7f); |
+#endif |
if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { |
/* |
* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 |
@@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
} |
break; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
case 'c': |
case 'z': |
/* include ASCII for CN */ |
@@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
cnvData->currentConverter, sa, which, pErrorCode); |
/* the loop over myConverterArray[] will simply not find another converter */ |
break; |
+#endif |
default: |
break; |
} |
@@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
UConverterSetFilter filter; |
if(cnvData->myConverterArray[i]!=NULL) { |
- if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
- cnvData->version==0 && i==CNS_11643 |
- ) { |
+ if(cnvData->locale[0]=='j' && i==JISX208) { |
+ /* |
+ * Only add code points that map to Shift-JIS codes |
+ * corresponding to JIS X 0208. |
+ */ |
+ filter=UCNV_SET_FILTER_SJIS; |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
+ cnvData->version==0 && i==CNS_11643) { |
/* |
* Version-specific for CN: |
* CN version 0 does not map CNS planes 3..7 although |
@@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
* The two versions create different Unicode sets. |
*/ |
filter=UCNV_SET_FILTER_2022_CN; |
- } else if(cnvData->locale[0]=='j' && i==JISX208) { |
- /* |
- * Only add code points that map to Shift-JIS codes |
- * corresponding to JIS X 0208. |
- */ |
- filter=UCNV_SET_FILTER_SJIS; |
} else if(i==KSC5601) { |
/* |
* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) |
* are broader than GR94. |
*/ |
filter=UCNV_SET_FILTER_GR94DBCS; |
+#endif |
} else { |
filter=UCNV_SET_FILTER_NONE; |
} |
@@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={ |
} // namespace |
+#if !UCONFIG_NO_NON_HTML5_CONVERSION |
/************* KR ***************/ |
static const UConverterImpl _ISO2022KRImpl={ |
UCNV_ISO_2022, |
@@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={ |
}; |
} // namespace |
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |