source/common/ucnv2022.cpp - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/common/ucnv2022.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/common/ucnv2022.cpp

diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp

index ea165eca5c783b4c64e6f773bc7489e3cac988d2..0c12dd0d7ba3dc2c144ac19e31b1bd4dc90c0c89 100644

--- a/source/common/ucnv2022.cpp

+++ b/source/common/ucnv2022.cpp

@@ -1,6 +1,6 @@

**********************************************************************

* file name: ucnv2022.cpp

@@ -75,8 +75,10 @@

#endif

+#if !UCONFIG_ONLY_HTML_CONVERSION

static const char SHIFT_IN_STR[] = "\x0F";

// static const char SHIFT_OUT_STR[] = "\x0E";

+#endif

#define CR 0x0D

#define LF 0x0A

@@ -152,7 +154,7 @@ typedef enum {

} StateEnum;

/* is the StateEnum charset value for a DBCS charset? */

-#if UCONFIG_NO_NON_HTML5_CONVERSION

+#if UCONFIG_ONLY_HTML_CONVERSION

#define IS_JP_DBCS(cs) (JISX208==(cs))

#else

#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)

@@ -169,18 +171,14 @@ typedef enum {

* all versions, not just JIS7 and JIS8.

* - ICU does not distinguish between different versions of JIS X 0208.

-#if UCONFIG_NO_NON_HTML5_CONVERSION

+#if UCONFIG_ONLY_HTML_CONVERSION

enum { MAX_JA_VERSION=0 };

#else

enum { MAX_JA_VERSION=4 };

#endif

static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={

-/*

- * TODO(jshin): The encoding spec has JISX212, but we don't support it.

- * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885

- */

CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),

@@ -377,10 +375,8 @@ typedef enum{

#ifdef U_ENABLE_GENERIC_ISO_2022

ISO_2022=0,

#endif

-#if UCONFIG_NO_NON_HTML5_CONVERSION

- ISO_2022_JP=1

-#else

ISO_2022_JP=1,

+#if !UCONFIG_ONLY_HTML_CONVERSION

ISO_2022_KR=2,

ISO_2022_CN=3

#endif

@@ -414,8 +410,11 @@ namespace {

/*const UConverterSharedData _ISO2022Data;*/

extern const UConverterSharedData _ISO2022JPData;

+#if !UCONFIG_ONLY_HTML_CONVERSION

extern const UConverterSharedData _ISO2022KRData;

extern const UConverterSharedData _ISO2022CNData;

+#endif

} // namespace

@@ -494,21 +493,20 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&

(myLocale[2]=='_' || myLocale[2]=='\0'))

{

- size_t len=0;

/* open the required converters and cache them */

if(version>MAX_JA_VERSION) {

- /* prevent indexing beyond jpCharsetMasks[] */

- myConverterData->version = version = 0;

+ // ICU 55 fails to open a converter for an unsupported version.

+ // Previously, it fell back to version 0, but that would yield

+ // unexpected behavior.

+ *errorCode = U_MISSING_RESOURCE_ERROR;

+ return;

}

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

if(jpCharsetMasks[version]&CSM(ISO8859_7)) {

myConverterData->myConverterArray[ISO8859_7] =

ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);

}

-#endif

myConverterData->myConverterArray[JISX208] =

ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

if(jpCharsetMasks[version]&CSM(JISX212)) {

myConverterData->myConverterArray[JISX212] =

ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);

@@ -521,21 +519,27 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

myConverterData->myConverterArray[KSC5601] =

ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);

}

-#endif

/* set the function pointers to appropriate funtions */

cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

uprv_strcpy(myConverterData->locale,"ja");

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");

- len = uprv_strlen(myConverterData->name);

+ size_t len = uprv_strlen(myConverterData->name);

myConverterData->name[len]=(char)(myConverterData->version+(int)'0');

myConverterData->name[len+1]='\0';

}

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&

(myLocale[2]=='_' || myLocale[2]=='\0'))

{

+ if(version>1) {

+ // ICU 55 fails to open a converter for an unsupported version.

+ // Previously, it fell back to version 0, but that would yield

+ // unexpected behavior.

+ *errorCode = U_MISSING_RESOURCE_ERROR;

+ return;

+ }

const char *cnvName;

if(version==1) {

cnvName="icu-internal-25546";

@@ -575,6 +579,13 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&

(myLocale[2]=='_' || myLocale[2]=='\0'))

{

+ if(version>2) {

+ // ICU 55 fails to open a converter for an unsupported version.

+ // Previously, it fell back to version 0, but that would yield

+ // unexpected behavior.

+ *errorCode = U_MISSING_RESOURCE_ERROR;

+ return;

+ }

/* open the required converters and cache them */

myConverterData->myConverterArray[GB2312_1] =

@@ -602,7 +613,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");

}

-#endif // !UCONFIG_NO_NON_HTML5_CONVERSION

+#endif // !UCONFIG_ONLY_HTML_CONVERSION

else{

#ifdef U_ENABLE_GENERIC_ISO_2022

myConverterData->isFirstBuffer = TRUE;

@@ -617,7 +628,9 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

/* initialize the state variables */

uprv_strcpy(myConverterData->name,"ISO_2022");

#else

- *errorCode = U_UNSUPPORTED_ERROR;

+ *errorCode = U_MISSING_RESOURCE_ERROR;

+ // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard

+ // data loading error code.

return;

#endif

}

@@ -737,7 +750,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

};

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

/*************** to unicode *******************/

static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

/* 0 1 2 3 4 5 6 7 8 9 */

@@ -903,7 +916,6 @@ DONE:

}

break;

/* case SS3_STATE: not used in ISO-2022-JP-x */

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

case ISO8859_1:

case ISO8859_7:

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

@@ -913,7 +925,6 @@ DONE:

myData2022->toU2022State.cs[2]=(int8_t)tempState;

}

break;

-#endif

default:

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

@@ -925,7 +936,7 @@ DONE:

}

break;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

case ISO_2022_CN:

{

StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];

@@ -987,7 +998,7 @@ DONE:

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

}

break;

-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

+#endif // !UCONFIG_ONLY_HTML_CONVERSION

default:

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

@@ -1030,6 +1041,7 @@ DONE:

}

+#if !UCONFIG_ONLY_HTML_CONVERSION

/*Checks the characters of the buffer against valid 2022 escape sequences

*if the match we return a pointer to the initial start of the sequence otherwise

*we return sourceLimit

@@ -1084,7 +1096,7 @@ getEndOfBuffer_2022(const char** source,

return mySource;

#endif

}

+#endif

/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c

* any future change in _MBCSFromUChar32() function should be reflected here.

@@ -1408,16 +1420,12 @@ toUnicodeCallback(UConverter *cnv,

static const StateEnum jpCharsetPref[]={

ASCII,

JISX201,

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

ISO8859_1,

- ISO8859_7,

-#endif

JISX208,

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+ ISO8859_7,

JISX212,

GB2312,

KSC5601,

-#endif

HWKANA_7BIT

};

@@ -1787,7 +1795,6 @@ getTrail:

g = 0;

}

break;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

case ISO8859_1:

if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

targetValue = (uint32_t)sourceChar - 0x80;

@@ -1796,7 +1803,6 @@ getTrail:

g = 2;

}

break;

-#endif

case HWKANA_7BIT:

if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {

if(converterData->version==3) {

@@ -1858,7 +1864,6 @@ getTrail:

useFallback = FALSE;

}

break;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

case ISO8859_7:

/* G0 SBCS forced to 7-bit output */

len2 = MBCS_SINGLE_FROM_UCHAR32(

@@ -1873,7 +1878,6 @@ getTrail:

useFallback = FALSE;

}

break;

-#endif

default:

/* G0 DBCS */

len2 = MBCS_FROM_UCHAR32_ISO2022(

@@ -1881,7 +1885,6 @@ getTrail:

sourceChar, &value,

useFallback, MBCS_OUTPUT_2);

if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

if(cs0 == KSC5601) {

* Check for valid bytes for the encoding scheme.

@@ -1893,7 +1896,6 @@ getTrail:

break;

}

-#endif

targetValue = value;

len = len2;

cs = cs0;

@@ -2187,7 +2189,6 @@ escape:

targetUniChar = mySourceChar;

}

break;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

case ISO8859_1:

if(mySourceChar <= 0x7f) {

targetUniChar = mySourceChar + 0x80;

@@ -2206,7 +2207,6 @@ escape:

/* return from a single-shift state to the previous one */

pToU2022State->g=pToU2022State->prevG;

break;

-#endif

case JISX201:

if(mySourceChar <= 0x7f) {

targetUniChar = jisx201ToU(mySourceChar);

@@ -2246,11 +2246,9 @@ getTrailByte:

} else {

/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */

mySourceChar = tmpSourceChar;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

if (cs == KSC5601) {

tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */

}

-#endif

tempBuf[0] = (char)(tmpSourceChar >> 8);

tempBuf[1] = (char)(tmpSourceChar);

}

@@ -2312,7 +2310,7 @@ endloop:

}

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

/***************************************************************

* Rules for ISO-2022-KR encoding

* i) The KSC5601 designator sequence should appear only once in a file,

@@ -3456,7 +3454,7 @@ endloop:

args->target = myTarget;

args->source = mySource;

}

-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */

static void

_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {

@@ -3658,7 +3656,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

/* include JIS X 0201 which is hardcoded */

sa->add(sa->set, 0xa5);

sa->add(sa->set, 0x203e);

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

/* include Latin-1 for some variants of JP */

sa->addRange(sa->set, 0, 0xff);

@@ -3666,10 +3663,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

/* include ASCII for JP */

sa->addRange(sa->set, 0, 0x7f);

}

-#else

- /* include ASCII for JP */

- sa->addRange(sa->set, 0, 0x7f);

-#endif

if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0

@@ -3688,7 +3681,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

sa->addRange(sa->set, HWKANA_START, HWKANA_END);

}

break;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

case 'c':

case 'z':

/* include ASCII for CN */

@@ -3727,7 +3720,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

* corresponding to JIS X 0208.

filter=UCNV_SET_FILTER_SJIS;

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

} else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&

cnvData->version==0 && i==CNS_11643) {

@@ -3866,7 +3859,7 @@ const UConverterSharedData _ISO2022JPData=

} // namespace

-#if !UCONFIG_NO_NON_HTML5_CONVERSION

+#if !UCONFIG_ONLY_HTML_CONVERSION

/************* KR ***************/

static const UConverterImpl _ISO2022KRImpl={

UCNV_ISO_2022,

@@ -3967,6 +3960,6 @@ const UConverterSharedData _ISO2022CNData=

UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);

} // namespace

-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */

#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »