Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(473)

Unified Diff: source/common/ucnv2022.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/common/ucnv2022.cpp
diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
index ea165eca5c783b4c64e6f773bc7489e3cac988d2..0c12dd0d7ba3dc2c144ac19e31b1bd4dc90c0c89 100644
--- a/source/common/ucnv2022.cpp
+++ b/source/common/ucnv2022.cpp
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2000-2014, International Business Machines
+* Copyright (C) 2000-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv2022.cpp
@@ -75,8 +75,10 @@
*/
#endif
+#if !UCONFIG_ONLY_HTML_CONVERSION
static const char SHIFT_IN_STR[] = "\x0F";
// static const char SHIFT_OUT_STR[] = "\x0E";
+#endif
#define CR 0x0D
#define LF 0x0A
@@ -152,7 +154,7 @@ typedef enum {
} StateEnum;
/* is the StateEnum charset value for a DBCS charset? */
-#if UCONFIG_NO_NON_HTML5_CONVERSION
+#if UCONFIG_ONLY_HTML_CONVERSION
#define IS_JP_DBCS(cs) (JISX208==(cs))
#else
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
@@ -169,18 +171,14 @@ typedef enum {
* all versions, not just JIS7 and JIS8.
* - ICU does not distinguish between different versions of JIS X 0208.
*/
-#if UCONFIG_NO_NON_HTML5_CONVERSION
+#if UCONFIG_ONLY_HTML_CONVERSION
enum { MAX_JA_VERSION=0 };
#else
enum { MAX_JA_VERSION=4 };
#endif
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
-/*
- * TODO(jshin): The encoding spec has JISX212, but we don't support it.
- * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
- */
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
@@ -377,10 +375,8 @@ typedef enum{
#ifdef U_ENABLE_GENERIC_ISO_2022
ISO_2022=0,
#endif
-#if UCONFIG_NO_NON_HTML5_CONVERSION
- ISO_2022_JP=1
-#else
ISO_2022_JP=1,
+#if !UCONFIG_ONLY_HTML_CONVERSION
ISO_2022_KR=2,
ISO_2022_CN=3
#endif
@@ -414,8 +410,11 @@ namespace {
/*const UConverterSharedData _ISO2022Data;*/
extern const UConverterSharedData _ISO2022JPData;
+
+#if !UCONFIG_ONLY_HTML_CONVERSION
extern const UConverterSharedData _ISO2022KRData;
extern const UConverterSharedData _ISO2022CNData;
+#endif
} // namespace
@@ -494,21 +493,20 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
- size_t len=0;
/* open the required converters and cache them */
if(version>MAX_JA_VERSION) {
- /* prevent indexing beyond jpCharsetMasks[] */
- myConverterData->version = version = 0;
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
}
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
myConverterData->myConverterArray[ISO8859_7] =
ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
}
-#endif
myConverterData->myConverterArray[JISX208] =
ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(jpCharsetMasks[version]&CSM(JISX212)) {
myConverterData->myConverterArray[JISX212] =
ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
@@ -521,21 +519,27 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
myConverterData->myConverterArray[KSC5601] =
ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
}
-#endif
/* set the function pointers to appropriate funtions */
cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
uprv_strcpy(myConverterData->locale,"ja");
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");
- len = uprv_strlen(myConverterData->name);
+ size_t len = uprv_strlen(myConverterData->name);
myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
myConverterData->name[len+1]='\0';
}
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
+ if(version>1) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
const char *cnvName;
if(version==1) {
cnvName="icu-internal-25546";
@@ -575,6 +579,13 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
+ if(version>2) {
+ // ICU 55 fails to open a converter for an unsupported version.
+ // Previously, it fell back to version 0, but that would yield
+ // unexpected behavior.
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ return;
+ }
/* open the required converters and cache them */
myConverterData->myConverterArray[GB2312_1] =
@@ -602,7 +613,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
}
}
-#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
else{
#ifdef U_ENABLE_GENERIC_ISO_2022
myConverterData->isFirstBuffer = TRUE;
@@ -617,7 +628,9 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
/* initialize the state variables */
uprv_strcpy(myConverterData->name,"ISO_2022");
#else
- *errorCode = U_UNSUPPORTED_ERROR;
+ *errorCode = U_MISSING_RESOURCE_ERROR;
+ // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard
+ // data loading error code.
return;
#endif
}
@@ -737,7 +750,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*************** to unicode *******************/
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
/* 0 1 2 3 4 5 6 7 8 9 */
@@ -903,7 +916,6 @@ DONE:
}
break;
/* case SS3_STATE: not used in ISO-2022-JP-x */
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
case ISO8859_7:
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
@@ -913,7 +925,6 @@ DONE:
myData2022->toU2022State.cs[2]=(int8_t)tempState;
}
break;
-#endif
default:
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
@@ -925,7 +936,7 @@ DONE:
}
}
break;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
case ISO_2022_CN:
{
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
@@ -987,7 +998,7 @@ DONE:
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
}
break;
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
+#endif // !UCONFIG_ONLY_HTML_CONVERSION
default:
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
@@ -1030,6 +1041,7 @@ DONE:
}
}
+#if !UCONFIG_ONLY_HTML_CONVERSION
/*Checks the characters of the buffer against valid 2022 escape sequences
*if the match we return a pointer to the initial start of the sequence otherwise
*we return sourceLimit
@@ -1084,7 +1096,7 @@ getEndOfBuffer_2022(const char** source,
return mySource;
#endif
}
-
+#endif
/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c
* any future change in _MBCSFromUChar32() function should be reflected here.
@@ -1408,16 +1420,12 @@ toUnicodeCallback(UConverter *cnv,
static const StateEnum jpCharsetPref[]={
ASCII,
JISX201,
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
ISO8859_1,
- ISO8859_7,
-#endif
JISX208,
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+ ISO8859_7,
JISX212,
GB2312,
KSC5601,
-#endif
HWKANA_7BIT
};
@@ -1787,7 +1795,6 @@ getTrail:
g = 0;
}
break;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
targetValue = (uint32_t)sourceChar - 0x80;
@@ -1796,7 +1803,6 @@ getTrail:
g = 2;
}
break;
-#endif
case HWKANA_7BIT:
if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
if(converterData->version==3) {
@@ -1858,7 +1864,6 @@ getTrail:
useFallback = FALSE;
}
break;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_7:
/* G0 SBCS forced to 7-bit output */
len2 = MBCS_SINGLE_FROM_UCHAR32(
@@ -1873,7 +1878,6 @@ getTrail:
useFallback = FALSE;
}
break;
-#endif
default:
/* G0 DBCS */
len2 = MBCS_FROM_UCHAR32_ISO2022(
@@ -1881,7 +1885,6 @@ getTrail:
sourceChar, &value,
useFallback, MBCS_OUTPUT_2);
if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(cs0 == KSC5601) {
/*
* Check for valid bytes for the encoding scheme.
@@ -1893,7 +1896,6 @@ getTrail:
break;
}
}
-#endif
targetValue = value;
len = len2;
cs = cs0;
@@ -2187,7 +2189,6 @@ escape:
targetUniChar = mySourceChar;
}
break;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
if(mySourceChar <= 0x7f) {
targetUniChar = mySourceChar + 0x80;
@@ -2206,7 +2207,6 @@ escape:
/* return from a single-shift state to the previous one */
pToU2022State->g=pToU2022State->prevG;
break;
-#endif
case JISX201:
if(mySourceChar <= 0x7f) {
targetUniChar = jisx201ToU(mySourceChar);
@@ -2246,11 +2246,9 @@ getTrailByte:
} else {
/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
mySourceChar = tmpSourceChar;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
if (cs == KSC5601) {
tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
}
-#endif
tempBuf[0] = (char)(tmpSourceChar >> 8);
tempBuf[1] = (char)(tmpSourceChar);
}
@@ -2312,7 +2310,7 @@ endloop:
}
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
@@ -3456,7 +3454,7 @@ endloop:
args->target = myTarget;
args->source = mySource;
}
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
static void
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
@@ -3658,7 +3656,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
/* include JIS X 0201 which is hardcoded */
sa->add(sa->set, 0xa5);
sa->add(sa->set, 0x203e);
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
/* include Latin-1 for some variants of JP */
sa->addRange(sa->set, 0, 0xff);
@@ -3666,10 +3663,6 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
/* include ASCII for JP */
sa->addRange(sa->set, 0, 0x7f);
}
-#else
- /* include ASCII for JP */
- sa->addRange(sa->set, 0, 0x7f);
-#endif
if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
/*
* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
@@ -3688,7 +3681,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
}
break;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
case 'c':
case 'z':
/* include ASCII for CN */
@@ -3727,7 +3720,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
* corresponding to JIS X 0208.
*/
filter=UCNV_SET_FILTER_SJIS;
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
} else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
cnvData->version==0 && i==CNS_11643) {
/*
@@ -3866,7 +3859,7 @@ const UConverterSharedData _ISO2022JPData=
} // namespace
-#if !UCONFIG_NO_NON_HTML5_CONVERSION
+#if !UCONFIG_ONLY_HTML_CONVERSION
/************* KR ***************/
static const UConverterImpl _ISO2022KRImpl={
UCNV_ISO_2022,
@@ -3967,6 +3960,6 @@ const UConverterSharedData _ISO2022CNData=
UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl);
} // namespace
-#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
+#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
« no previous file with comments | « source/common/ucmndata.h ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698