Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Unified Diff: source/common/ucnv2022.cpp

Issue 587833004: Turn on UCONFIG_NO_NON_HTML5_CONVERTER to save 100kB (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/icu52/
Patch Set: more tests added to desc Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « scripts/ibm866_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/common/ucnv2022.cpp
===================================================================
--- source/common/ucnv2022.cpp (revision 291619)
+++ source/common/ucnv2022.cpp (working copy)
@@ -154,7 +154,11 @@
} StateEnum;
/* is the StateEnum charset value for a DBCS charset? */
+#if UCONFIG_NO_NON_HTML5_CONVERSION
+#define IS_JP_DBCS(cs) (JISX208==(cs))
+#else
#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
+#endif
#define CSM(cs) ((uint16_t)1<<(cs))
@@ -173,6 +177,10 @@
enum { MAX_JA_VERSION=4 };
#endif
static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
+/*
+ * TODO(jshin): The encoding spec has JISX212, but we don't support it.
+ * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
+ */
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
#if !UCONFIG_NO_NON_HTML5_CONVERSION
CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
@@ -366,23 +374,15 @@
,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
};
-
-/* Enable ISO-2022-{KR,CN,CN-Ext} for now.
- * TODO(jshin): Disable it when we know what to do about 'replacement'
- * encodings. See http://crbug.com/277037 and
- * https://codereview.chromium.org/145973021/
- */
-#ifndef U_ENABLE_ISO_2022_KR_CN
-#define U_ENABLE_ISO_2022_KR_CN 1
-#endif
-
/* Type def for refactoring changeState_2022 code*/
typedef enum{
#ifdef U_ENABLE_GENERIC_ISO_2022
ISO_2022=0,
#endif
+#if UCONFIG_NO_NON_HTML5_CONVERSION
+ ISO_2022_JP=1
+#else
ISO_2022_JP=1,
-#ifdef U_ENABLE_ISO_2022_KR_CN
ISO_2022_KR=2,
ISO_2022_CN=3
#endif
@@ -517,7 +517,7 @@
}
if(jpCharsetMasks[version]&CSM(GB2312)) {
myConverterData->myConverterArray[GB2312] =
- ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */
}
if(jpCharsetMasks[version]&CSM(KSC5601)) {
myConverterData->myConverterArray[KSC5601] =
@@ -534,7 +534,7 @@
myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
myConverterData->name[len+1]='\0';
}
-#ifdef U_ENABLE_ISO_2022_KR_CN
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
(myLocale[2]=='_' || myLocale[2]=='\0'))
{
@@ -580,13 +580,13 @@
/* open the required converters and cache them */
myConverterData->myConverterArray[GB2312_1] =
- ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
+ ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
if(version==1) {
myConverterData->myConverterArray[ISO_IR_165] =
- ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
+ ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
}
myConverterData->myConverterArray[CNS_11643] =
- ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
+ ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
/* set the function pointers to appropriate funtions */
@@ -604,7 +604,7 @@
(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
}
}
-#endif // U_ENABLE_ISO_2022_KR_CN
+#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
else{
#ifdef U_ENABLE_GENERIC_ISO_2022
myConverterData->isFirstBuffer = TRUE;
@@ -739,6 +739,7 @@
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
/*************** to unicode *******************/
static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
/* 0 1 2 3 4 5 6 7 8 9 */
@@ -751,6 +752,7 @@
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
};
+#endif
static UCNV_TableStates_2022
@@ -903,6 +905,7 @@
}
break;
/* case SS3_STATE: not used in ISO-2022-JP-x */
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
case ISO8859_7:
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
@@ -912,6 +915,7 @@
myData2022->toU2022State.cs[2]=(int8_t)tempState;
}
break;
+#endif
default:
if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
@@ -923,6 +927,7 @@
}
}
break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO_2022_CN:
{
StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
@@ -984,6 +989,7 @@
*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
}
break;
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
default:
*err = U_ILLEGAL_ESCAPE_SEQUENCE;
@@ -1404,12 +1410,16 @@
static const StateEnum jpCharsetPref[]={
ASCII,
JISX201,
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
ISO8859_1,
ISO8859_7,
+#endif
JISX208,
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
JISX212,
GB2312,
KSC5601,
+#endif
HWKANA_7BIT
};
@@ -1779,6 +1789,7 @@
g = 0;
}
break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
targetValue = (uint32_t)sourceChar - 0x80;
@@ -1787,6 +1798,7 @@
g = 2;
}
break;
+#endif
case HWKANA_7BIT:
if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
if(converterData->version==3) {
@@ -1848,6 +1860,7 @@
useFallback = FALSE;
}
break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_7:
/* G0 SBCS forced to 7-bit output */
len2 = MBCS_SINGLE_FROM_UCHAR32(
@@ -1862,6 +1875,7 @@
useFallback = FALSE;
}
break;
+#endif
default:
/* G0 DBCS */
len2 = MBCS_FROM_UCHAR32_ISO2022(
@@ -1869,6 +1883,7 @@
sourceChar, &value,
useFallback, MBCS_OUTPUT_2);
if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(cs0 == KSC5601) {
/*
* Check for valid bytes for the encoding scheme.
@@ -1880,6 +1895,7 @@
break;
}
}
+#endif
targetValue = value;
len = len2;
cs = cs0;
@@ -2173,6 +2189,7 @@
targetUniChar = mySourceChar;
}
break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case ISO8859_1:
if(mySourceChar <= 0x7f) {
targetUniChar = mySourceChar + 0x80;
@@ -2191,6 +2208,7 @@
/* return from a single-shift state to the previous one */
pToU2022State->g=pToU2022State->prevG;
break;
+#endif
case JISX201:
if(mySourceChar <= 0x7f) {
targetUniChar = jisx201ToU(mySourceChar);
@@ -2230,9 +2248,11 @@
} else {
/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
mySourceChar = tmpSourceChar;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
if (cs == KSC5601) {
tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */
}
+#endif
tempBuf[0] = (char)(tmpSourceChar >> 8);
tempBuf[1] = (char)(tmpSourceChar);
}
@@ -2294,6 +2314,7 @@
}
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
/***************************************************************
* Rules for ISO-2022-KR encoding
* i) The KSC5601 designator sequence should appear only once in a file,
@@ -3437,6 +3458,7 @@
args->target = myTarget;
args->source = mySource;
}
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
static void
_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
@@ -3638,6 +3660,7 @@
/* include JIS X 0201 which is hardcoded */
sa->add(sa->set, 0xa5);
sa->add(sa->set, 0x203e);
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
/* include Latin-1 for some variants of JP */
sa->addRange(sa->set, 0, 0xff);
@@ -3645,6 +3668,10 @@
/* include ASCII for JP */
sa->addRange(sa->set, 0, 0x7f);
}
+#else
+ /* include ASCII for JP */
+ sa->addRange(sa->set, 0, 0x7f);
+#endif
if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
/*
* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
@@ -3663,6 +3690,7 @@
sa->addRange(sa->set, HWKANA_START, HWKANA_END);
}
break;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
case 'c':
case 'z':
/* include ASCII for CN */
@@ -3674,6 +3702,7 @@
cnvData->currentConverter, sa, which, pErrorCode);
/* the loop over myConverterArray[] will simply not find another converter */
break;
+#endif
default:
break;
}
@@ -3694,10 +3723,16 @@
for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
UConverterSetFilter filter;
if(cnvData->myConverterArray[i]!=NULL) {
- if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
- cnvData->version==0 && i==CNS_11643
- ) {
+ if(cnvData->locale[0]=='j' && i==JISX208) {
/*
+ * Only add code points that map to Shift-JIS codes
+ * corresponding to JIS X 0208.
+ */
+ filter=UCNV_SET_FILTER_SJIS;
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
+ cnvData->version==0 && i==CNS_11643) {
+ /*
* Version-specific for CN:
* CN version 0 does not map CNS planes 3..7 although
* they are all available in the CNS conversion table;
@@ -3705,18 +3740,13 @@
* The two versions create different Unicode sets.
*/
filter=UCNV_SET_FILTER_2022_CN;
- } else if(cnvData->locale[0]=='j' && i==JISX208) {
- /*
- * Only add code points that map to Shift-JIS codes
- * corresponding to JIS X 0208.
- */
- filter=UCNV_SET_FILTER_SJIS;
} else if(i==KSC5601) {
/*
* Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables)
* are broader than GR94.
*/
filter=UCNV_SET_FILTER_GR94DBCS;
+#endif
} else {
filter=UCNV_SET_FILTER_NONE;
}
@@ -3854,6 +3884,7 @@
} // namespace
+#if !UCONFIG_NO_NON_HTML5_CONVERSION
/************* KR ***************/
static const UConverterImpl _ISO2022KRImpl={
UCNV_ISO_2022,
@@ -3970,5 +4001,6 @@
};
} // namespace
+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
« no previous file with comments | « scripts/ibm866_gen.sh ('k') | source/common/ucnv_bld.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698