| Index: source/common/ucnv2022.cpp
|
| diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
|
| index ea165eca5c783b4c64e6f773bc7489e3cac988d2..582865af6a26708563dd586e13b9cda163685057 100644
|
| --- a/source/common/ucnv2022.cpp
|
| +++ b/source/common/ucnv2022.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| **********************************************************************
|
| -* Copyright (C) 2000-2014, International Business Machines
|
| +* Copyright (C) 2000-2015, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| **********************************************************************
|
| * file name: ucnv2022.cpp
|
| @@ -75,8 +75,10 @@
|
| */
|
| #endif
|
|
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| static const char SHIFT_IN_STR[] = "\x0F";
|
| // static const char SHIFT_OUT_STR[] = "\x0E";
|
| +#endif
|
|
|
| #define CR 0x0D
|
| #define LF 0x0A
|
| @@ -152,7 +154,7 @@ typedef enum {
|
| } StateEnum;
|
|
|
| /* is the StateEnum charset value for a DBCS charset? */
|
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if UCONFIG_ONLY_HTML_CONVERSION
|
| #define IS_JP_DBCS(cs) (JISX208==(cs))
|
| #else
|
| #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
|
| @@ -169,18 +171,14 @@ typedef enum {
|
| * all versions, not just JIS7 and JIS8.
|
| * - ICU does not distinguish between different versions of JIS X 0208.
|
| */
|
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if UCONFIG_ONLY_HTML_CONVERSION
|
| enum { MAX_JA_VERSION=0 };
|
| #else
|
| enum { MAX_JA_VERSION=4 };
|
| #endif
|
| static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
|
| -/*
|
| - * TODO(jshin): The encoding spec has JISX212, but we don't support it.
|
| - * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
|
| - */
|
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
|
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
|
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
|
| @@ -377,10 +375,8 @@ typedef enum{
|
| #ifdef U_ENABLE_GENERIC_ISO_2022
|
| ISO_2022=0,
|
| #endif
|
| -#if UCONFIG_NO_NON_HTML5_CONVERSION
|
| - ISO_2022_JP=1
|
| -#else
|
| ISO_2022_JP=1,
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| ISO_2022_KR=2,
|
| ISO_2022_CN=3
|
| #endif
|
| @@ -414,8 +410,11 @@ namespace {
|
|
|
| /*const UConverterSharedData _ISO2022Data;*/
|
| extern const UConverterSharedData _ISO2022JPData;
|
| +
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| extern const UConverterSharedData _ISO2022KRData;
|
| extern const UConverterSharedData _ISO2022CNData;
|
| +#endif
|
|
|
| } // namespace
|
|
|
| @@ -500,15 +499,12 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| /* prevent indexing beyond jpCharsetMasks[] */
|
| myConverterData->version = version = 0;
|
| }
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
|
| myConverterData->myConverterArray[ISO8859_7] =
|
| ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
|
| }
|
| -#endif
|
| myConverterData->myConverterArray[JISX208] =
|
| ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| if(jpCharsetMasks[version]&CSM(JISX212)) {
|
| myConverterData->myConverterArray[JISX212] =
|
| ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
|
| @@ -521,7 +517,6 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| myConverterData->myConverterArray[KSC5601] =
|
| ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
|
| }
|
| -#endif
|
|
|
| /* set the function pointers to appropriate funtions */
|
| cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
|
| @@ -532,7 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
|
| myConverterData->name[len+1]='\0';
|
| }
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
|
| (myLocale[2]=='_' || myLocale[2]=='\0'))
|
| {
|
| @@ -602,7 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
|
| }
|
| }
|
| -#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#endif // !UCONFIG_ONLY_HTML_CONVERSION
|
| else{
|
| #ifdef U_ENABLE_GENERIC_ISO_2022
|
| myConverterData->isFirstBuffer = TRUE;
|
| @@ -737,7 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
|
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
| };
|
|
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| /*************** to unicode *******************/
|
| static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
|
| /* 0 1 2 3 4 5 6 7 8 9 */
|
| @@ -903,7 +898,6 @@ DONE:
|
| }
|
| break;
|
| /* case SS3_STATE: not used in ISO-2022-JP-x */
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| case ISO8859_1:
|
| case ISO8859_7:
|
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
|
| @@ -913,7 +907,6 @@ DONE:
|
| myData2022->toU2022State.cs[2]=(int8_t)tempState;
|
| }
|
| break;
|
| -#endif
|
| default:
|
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
|
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
|
| @@ -925,7 +918,7 @@ DONE:
|
| }
|
| }
|
| break;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| case ISO_2022_CN:
|
| {
|
| StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
|
| @@ -987,7 +980,7 @@ DONE:
|
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
|
| }
|
| break;
|
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
|
| +#endif // !UCONFIG_ONLY_HTML_CONVERSION
|
|
|
| default:
|
| *err = U_ILLEGAL_ESCAPE_SEQUENCE;
|
| @@ -1030,6 +1023,7 @@ DONE:
|
| }
|
| }
|
|
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| /*Checks the characters of the buffer against valid 2022 escape sequences
|
| *if the match we return a pointer to the initial start of the sequence otherwise
|
| *we return sourceLimit
|
| @@ -1084,7 +1078,7 @@ getEndOfBuffer_2022(const char** source,
|
| return mySource;
|
| #endif
|
| }
|
| -
|
| +#endif
|
|
|
| /* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
|
| * any future change in _MBCSFromUChar32() function should be reflected here.
|
| @@ -1408,16 +1402,12 @@ toUnicodeCallback(UConverter *cnv,
|
| static const StateEnum jpCharsetPref[]={
|
| ASCII,
|
| JISX201,
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| ISO8859_1,
|
| ISO8859_7,
|
| -#endif
|
| JISX208,
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| JISX212,
|
| GB2312,
|
| KSC5601,
|
| -#endif
|
| HWKANA_7BIT
|
| };
|
|
|
| @@ -1787,7 +1777,6 @@ getTrail:
|
| g = 0;
|
| }
|
| break;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| case ISO8859_1:
|
| if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
|
| targetValue = (uint32_t)sourceChar - 0x80;
|
| @@ -1796,7 +1785,6 @@ getTrail:
|
| g = 2;
|
| }
|
| break;
|
| -#endif
|
| case HWKANA_7BIT:
|
| if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
|
| if(converterData->version==3) {
|
| @@ -1858,7 +1846,6 @@ getTrail:
|
| useFallback = FALSE;
|
| }
|
| break;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| case ISO8859_7:
|
| /* G0 SBCS forced to 7-bit output */
|
| len2 = MBCS_SINGLE_FROM_UCHAR32(
|
| @@ -1873,7 +1860,6 @@ getTrail:
|
| useFallback = FALSE;
|
| }
|
| break;
|
| -#endif
|
| default:
|
| /* G0 DBCS */
|
| len2 = MBCS_FROM_UCHAR32_ISO2022(
|
| @@ -1881,7 +1867,6 @@ getTrail:
|
| sourceChar, &value,
|
| useFallback, MBCS_OUTPUT_2);
|
| if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| if(cs0 == KSC5601) {
|
| /*
|
| * Check for valid bytes for the encoding scheme.
|
| @@ -1893,7 +1878,6 @@ getTrail:
|
| break;
|
| }
|
| }
|
| -#endif
|
| targetValue = value;
|
| len = len2;
|
| cs = cs0;
|
| @@ -2187,7 +2171,6 @@ escape:
|
| targetUniChar = mySourceChar;
|
| }
|
| break;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| case ISO8859_1:
|
| if(mySourceChar <= 0x7f) {
|
| targetUniChar = mySourceChar + 0x80;
|
| @@ -2206,7 +2189,6 @@ escape:
|
| /* return from a single-shift state to the previous one */
|
| pToU2022State->g=pToU2022State->prevG;
|
| break;
|
| -#endif
|
| case JISX201:
|
| if(mySourceChar <= 0x7f) {
|
| targetUniChar = jisx201ToU(mySourceChar);
|
| @@ -2246,11 +2228,9 @@ getTrailByte:
|
| } else {
|
| /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
|
| mySourceChar = tmpSourceChar;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| if (cs == KSC5601) {
|
| tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
|
| }
|
| -#endif
|
| tempBuf[0] = (char)(tmpSourceChar >> 8);
|
| tempBuf[1] = (char)(tmpSourceChar);
|
| }
|
| @@ -2312,7 +2292,7 @@ endloop:
|
| }
|
|
|
|
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| /***************************************************************
|
| * Rules for ISO-2022-KR encoding
|
| * i) The KSC5601 designator sequence should appear only once in a file,
|
| @@ -3456,7 +3436,7 @@ endloop:
|
| args->target = myTarget;
|
| args->source = mySource;
|
| }
|
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
|
| +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
|
|
|
| static void
|
| _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
|
| @@ -3658,7 +3638,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| /* include JIS X 0201 which is hardcoded */
|
| sa->add(sa->set, 0xa5);
|
| sa->add(sa->set, 0x203e);
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
|
| /* include Latin-1 for some variants of JP */
|
| sa->addRange(sa->set, 0, 0xff);
|
| @@ -3666,10 +3645,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| /* include ASCII for JP */
|
| sa->addRange(sa->set, 0, 0x7f);
|
| }
|
| -#else
|
| - /* include ASCII for JP */
|
| - sa->addRange(sa->set, 0, 0x7f);
|
| -#endif
|
| if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
|
| /*
|
| * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
|
| @@ -3688,7 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| sa->addRange(sa->set, HWKANA_START, HWKANA_END);
|
| }
|
| break;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| case 'c':
|
| case 'z':
|
| /* include ASCII for CN */
|
| @@ -3727,7 +3702,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| * corresponding to JIS X 0208.
|
| */
|
| filter=UCNV_SET_FILTER_SJIS;
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
|
| cnvData->version==0 && i==CNS_11643) {
|
| /*
|
| @@ -3866,7 +3841,7 @@ const UConverterSharedData _ISO2022JPData=
|
|
|
| } // namespace
|
|
|
| -#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +#if !UCONFIG_ONLY_HTML_CONVERSION
|
| /************* KR ***************/
|
| static const UConverterImpl _ISO2022KRImpl={
|
| UCNV_ISO_2022,
|
| @@ -3967,6 +3942,6 @@ const UConverterSharedData _ISO2022CNData=
|
| UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
|
|
|
| } // namespace
|
| -#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
|
| +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
|
|
|
| #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
|
|