patches/uconv.patch - Issue 839713003: ICU update to 54 step 3

Unified Diff: patches/uconv.patch

Issue 839713003: ICU update to 54 step 3 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: fix big5 mapping Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: patches/uconv.patch

diff --git a/patches/uconv.patch b/patches/uconv.patch

index 55e3327b62ae4b231b1892daa451c2f0d0554521..9f997a6a3ab198cd50072cd4097dee2a7b8cba40 100644

--- a/patches/uconv.patch

+++ b/patches/uconv.patch

@@ -1,8 +1,8 @@

-Index: source/common/ucnv2022.cpp

-===================================================================

---- source/common/ucnv2022.cpp (revision 259715)

-+++ source/common/ucnv2022.cpp (working copy)

-@@ -154,7 +154,11 @@

+diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp

+index 9556dd2..7b8df9b 100644

+--- a/source/common/ucnv2022.cpp

++++ b/source/common/ucnv2022.cpp

+@@ -152,7 +152,11 @@ typedef enum {

} StateEnum;

/* is the StateEnum charset value for a DBCS charset? */

@@ -14,7 +14,7 @@ Index: source/common/ucnv2022.cpp

#define CSM(cs) ((uint16_t)1<<(cs))

-@@ -167,13 +171,23 @@

+@@ -165,13 +169,23 @@ typedef enum {

* all versions, not just JIS7 and JIS8.

* - ICU does not distinguish between different versions of JIS X 0208.

@@ -38,7 +38,7 @@ Index: source/common/ucnv2022.cpp

};

typedef enum {

-@@ -360,15 +374,18 @@

+@@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {

,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

};

@@ -58,7 +58,7 @@ Index: source/common/ucnv2022.cpp

} Variant2022;

/*********** ISO 2022 Converter Protos ***********/

-@@ -485,12 +502,15 @@

+@@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

/* prevent indexing beyond jpCharsetMasks[] */

myConverterData->version = version = 0;

}

@@ -74,7 +74,7 @@ Index: source/common/ucnv2022.cpp

if(jpCharsetMasks[version]&CSM(JISX212)) {

myConverterData->myConverterArray[JISX212] =

ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);

-@@ -503,6 +523,7 @@

+@@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

myConverterData->myConverterArray[KSC5601] =

ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);

}

@@ -82,7 +82,7 @@ Index: source/common/ucnv2022.cpp

/* set the function pointers to appropriate funtions */

cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

-@@ -513,6 +534,7 @@

+@@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

myConverterData->name[len]=(char)(myConverterData->version+(int)'0');

myConverterData->name[len+1]='\0';

}

@@ -90,7 +90,7 @@ Index: source/common/ucnv2022.cpp

else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&

(myLocale[2]=='_' || myLocale[2]=='\0'))

{

-@@ -582,6 +604,7 @@

+@@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");

}

@@ -98,7 +98,7 @@ Index: source/common/ucnv2022.cpp

else{

#ifdef U_ENABLE_GENERIC_ISO_2022

myConverterData->isFirstBuffer = TRUE;

-@@ -716,6 +739,7 @@

+@@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

};

@@ -106,7 +106,7 @@ Index: source/common/ucnv2022.cpp

/*************** to unicode *******************/

static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

/* 0 1 2 3 4 5 6 7 8 9 */

-@@ -728,6 +752,7 @@

+@@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

};

@@ -114,7 +114,7 @@ Index: source/common/ucnv2022.cpp

static UCNV_TableStates_2022

-@@ -880,6 +905,7 @@

+@@ -878,6 +903,7 @@ DONE:

}

break;

/* case SS3_STATE: not used in ISO-2022-JP-x */

@@ -122,7 +122,7 @@ Index: source/common/ucnv2022.cpp

case ISO8859_1:

case ISO8859_7:

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

-@@ -889,6 +915,7 @@

+@@ -887,6 +913,7 @@ DONE:

myData2022->toU2022State.cs[2]=(int8_t)tempState;

}

break;

@@ -130,7 +130,7 @@ Index: source/common/ucnv2022.cpp

default:

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

-@@ -900,6 +927,7 @@

+@@ -898,6 +925,7 @@ DONE:

}

break;

@@ -138,7 +138,7 @@ Index: source/common/ucnv2022.cpp

case ISO_2022_CN:

{

StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];

-@@ -961,6 +989,7 @@

+@@ -959,6 +987,7 @@ DONE:

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

}

break;

@@ -146,7 +146,7 @@ Index: source/common/ucnv2022.cpp

default:

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

-@@ -1381,12 +1410,16 @@

+@@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv,

static const StateEnum jpCharsetPref[]={

ASCII,

JISX201,

@@ -163,7 +163,7 @@ Index: source/common/ucnv2022.cpp

HWKANA_7BIT

};

-@@ -1756,6 +1789,7 @@

+@@ -1754,6 +1787,7 @@ getTrail:

g = 0;

}

break;

@@ -171,7 +171,7 @@ Index: source/common/ucnv2022.cpp

case ISO8859_1:

if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

targetValue = (uint32_t)sourceChar - 0x80;

-@@ -1764,6 +1798,7 @@

+@@ -1762,6 +1796,7 @@ getTrail:

g = 2;

}

break;

@@ -179,7 +179,7 @@ Index: source/common/ucnv2022.cpp

case HWKANA_7BIT:

if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {

if(converterData->version==3) {

-@@ -1825,6 +1860,7 @@

+@@ -1823,6 +1858,7 @@ getTrail:

useFallback = FALSE;

}

break;

@@ -187,7 +187,7 @@ Index: source/common/ucnv2022.cpp

case ISO8859_7:

/* G0 SBCS forced to 7-bit output */

len2 = MBCS_SINGLE_FROM_UCHAR32(

-@@ -1839,6 +1875,7 @@

+@@ -1837,6 +1873,7 @@ getTrail:

useFallback = FALSE;

}

break;

@@ -195,7 +195,7 @@ Index: source/common/ucnv2022.cpp

default:

/* G0 DBCS */

len2 = MBCS_FROM_UCHAR32_ISO2022(

-@@ -1846,6 +1883,7 @@

+@@ -1844,6 +1881,7 @@ getTrail:

sourceChar, &value,

useFallback, MBCS_OUTPUT_2);

if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

@@ -203,7 +203,7 @@ Index: source/common/ucnv2022.cpp

if(cs0 == KSC5601) {

* Check for valid bytes for the encoding scheme.

-@@ -1857,6 +1895,7 @@

+@@ -1855,6 +1893,7 @@ getTrail:

break;

}

@@ -211,7 +211,7 @@ Index: source/common/ucnv2022.cpp

targetValue = value;

len = len2;

cs = cs0;

-@@ -2150,6 +2189,7 @@

+@@ -2148,6 +2187,7 @@ escape:

targetUniChar = mySourceChar;

}

break;

@@ -219,7 +219,7 @@ Index: source/common/ucnv2022.cpp

case ISO8859_1:

if(mySourceChar <= 0x7f) {

targetUniChar = mySourceChar + 0x80;

-@@ -2168,6 +2208,7 @@

+@@ -2166,6 +2206,7 @@ escape:

/* return from a single-shift state to the previous one */

pToU2022State->g=pToU2022State->prevG;

break;

@@ -227,7 +227,7 @@ Index: source/common/ucnv2022.cpp

case JISX201:

if(mySourceChar <= 0x7f) {

targetUniChar = jisx201ToU(mySourceChar);

-@@ -2207,9 +2248,11 @@

+@@ -2205,9 +2246,11 @@ getTrailByte:

} else {

/* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */

mySourceChar = tmpSourceChar;

@@ -239,7 +239,7 @@ Index: source/common/ucnv2022.cpp

tempBuf[0] = (char)(tmpSourceChar >> 8);

tempBuf[1] = (char)(tmpSourceChar);

}

-@@ -2271,6 +2314,7 @@

+@@ -2269,6 +2312,7 @@ endloop:

}

@@ -247,7 +247,7 @@ Index: source/common/ucnv2022.cpp

/***************************************************************

* Rules for ISO-2022-KR encoding

* i) The KSC5601 designator sequence should appear only once in a file,

-@@ -3414,6 +3458,7 @@

+@@ -3412,6 +3456,7 @@ endloop:

args->target = myTarget;

args->source = mySource;

}

@@ -255,7 +255,7 @@ Index: source/common/ucnv2022.cpp

static void

_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {

-@@ -3615,6 +3660,7 @@

+@@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

/* include JIS X 0201 which is hardcoded */

sa->add(sa->set, 0xa5);

sa->add(sa->set, 0x203e);

@@ -263,7 +263,7 @@ Index: source/common/ucnv2022.cpp

if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

/* include Latin-1 for some variants of JP */

sa->addRange(sa->set, 0, 0xff);

-@@ -3622,6 +3668,10 @@

+@@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

/* include ASCII for JP */

sa->addRange(sa->set, 0, 0x7f);

}

@@ -274,7 +274,7 @@ Index: source/common/ucnv2022.cpp

if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {

* Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0

-@@ -3640,6 +3690,7 @@

+@@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

sa->addRange(sa->set, HWKANA_START, HWKANA_END);

}

break;

@@ -282,7 +282,7 @@ Index: source/common/ucnv2022.cpp

case 'c':

case 'z':

/* include ASCII for CN */

-@@ -3651,6 +3702,7 @@

+@@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

cnvData->currentConverter, sa, which, pErrorCode);

/* the loop over myConverterArray[] will simply not find another converter */

break;

@@ -290,7 +290,7 @@ Index: source/common/ucnv2022.cpp

default:

break;

}

-@@ -3671,10 +3723,16 @@

+@@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

UConverterSetFilter filter;

if(cnvData->myConverterArray[i]!=NULL) {

@@ -298,7 +298,7 @@ Index: source/common/ucnv2022.cpp

- cnvData->version==0 && i==CNS_11643

- ) {

+ if(cnvData->locale[0]=='j' && i==JISX208) {

- /*

++ /*

+ * Only add code points that map to Shift-JIS codes

+ * corresponding to JIS X 0208.

+ */

@@ -306,11 +306,10 @@ Index: source/common/ucnv2022.cpp

+#if !UCONFIG_NO_NON_HTML5_CONVERSION

+ } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&

+ cnvData->version==0 && i==CNS_11643) {

-+ /*

+ /*

* Version-specific for CN:

* CN version 0 does not map CNS planes 3..7 although

- * they are all available in the CNS conversion table;

-@@ -3682,18 +3740,13 @@

+@@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,

* The two versions create different Unicode sets.

filter=UCNV_SET_FILTER_2022_CN;

@@ -330,7 +329,7 @@ Index: source/common/ucnv2022.cpp

} else {

filter=UCNV_SET_FILTER_NONE;

}

-@@ -3831,6 +3884,7 @@

+@@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={

} // namespace

@@ -338,89 +337,18 @@ Index: source/common/ucnv2022.cpp

/************* KR ***************/

static const UConverterImpl _ISO2022KRImpl={

UCNV_ISO_2022,

-@@ -3947,5 +4001,6 @@

+@@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={

};

} // namespace

+#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */

#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

-Index: source/common/ucnvbocu.cpp

-===================================================================

---- source/common/ucnvbocu.cpp (revision 259715)

-+++ source/common/ucnvbocu.cpp (working copy)

-@@ -19,7 +19,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv.h"

- #include "unicode/ucnv_cb.h"

-Index: source/common/ucnvisci.c

-===================================================================

---- source/common/ucnvisci.c (revision 259715)

-+++ source/common/ucnvisci.c (working copy)

-@@ -17,7 +17,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv.h"

- #include "unicode/ucnv_cb.h"

-Index: source/common/ucnvscsu.c

-===================================================================

---- source/common/ucnvscsu.c (revision 259715)

-+++ source/common/ucnvscsu.c (working copy)

-@@ -21,7 +21,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv.h"

- #include "unicode/ucnv_cb.h"

-Index: source/common/ucnv_u7.c

-===================================================================

---- source/common/ucnv_u7.c (revision 259715)

-+++ source/common/ucnv_u7.c (working copy)

-@@ -16,7 +16,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv.h"

- #include "ucnv_bld.h"

-Index: source/common/unicode/uconfig.h

-===================================================================

---- source/common/unicode/uconfig.h (revision 259715)

-+++ source/common/unicode/uconfig.h (working copy)

-@@ -265,6 +265,14 @@

- #endif

- /**

-+ * This switch turns off all the converters NOT listed in

-+ * the encoding standard : http://encoding.spec.whatwg.org

-+ */

-+#ifndef UCONFIG_NO_NON_HTML5_CONVERSION

-+#define UCONFIG_NO_NON_HTML5_CONVERSION 0

-+#endif

-+/**

- * \def UCONFIG_NO_LEGACY_CONVERSION

- * This switch turns off all converters except for

- * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)

-Index: source/common/ucnv_bld.cpp

-===================================================================

---- source/common/ucnv_bld.cpp (revision 259715)

-+++ source/common/ucnv_bld.cpp (working copy)

-@@ -69,28 +69,41 @@

+diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp

+index 4940310..047f18a 100644

+--- a/source/common/ucnv_bld.cpp

++++ b/source/common/ucnv_bld.cpp

+@@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={

#if UCONFIG_NO_LEGACY_CONVERSION

NULL,

@@ -465,7 +393,7 @@ Index: source/common/ucnv_bld.cpp

NULL,

#else

&_CompoundTextData

-@@ -105,18 +118,24 @@

+@@ -105,18 +118,24 @@ static struct {

const char *name;

const UConverterType type;

} const cnvNameType[] = {

@@ -479,12 +407,12 @@ Index: source/common/ucnv_bld.cpp

#endif

+#if !UCONFIG_NO_NON_HTML5_CONVERSION

{ "imapmailboxname", UCNV_IMAP_MAILBOX },

+-#if !UCONFIG_NO_LEGACY_CONVERSION

+#endif

+#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

-+ { "iscii", UCNV_ISCII },

+ { "iscii", UCNV_ISCII },

+#endif

- #if !UCONFIG_NO_LEGACY_CONVERSION

-- { "iscii", UCNV_ISCII },

++#if !UCONFIG_NO_LEGACY_CONVERSION

{ "iso2022", UCNV_ISO_2022 },

#endif

{ "iso88591", UCNV_LATIN_1 },

@@ -493,7 +421,7 @@ Index: source/common/ucnv_bld.cpp

{ "lmbcs1", UCNV_LMBCS_1 },

{ "lmbcs11",UCNV_LMBCS_11 },

{ "lmbcs16",UCNV_LMBCS_16 },

-@@ -130,7 +149,9 @@

+@@ -130,7 +149,9 @@ static struct {

{ "lmbcs6", UCNV_LMBCS_6 },

{ "lmbcs8", UCNV_LMBCS_8 },

#endif

@@ -503,7 +431,7 @@ Index: source/common/ucnv_bld.cpp

{ "usascii", UCNV_US_ASCII },

{ "utf16", UCNV_UTF16 },

{ "utf16be", UCNV_UTF16_BigEndian },

-@@ -152,9 +173,13 @@

+@@ -152,9 +173,13 @@ static struct {

{ "utf32oppositeendian", UCNV_UTF32_BigEndian },

{ "utf32platformendian", UCNV_UTF32_LittleEndian },

#endif

@@ -517,11 +445,71 @@ Index: source/common/ucnv_bld.cpp

};

-Index: source/common/ucnv_u8.c

-===================================================================

---- source/common/ucnv_u8.c (revision 259715)

-+++ source/common/ucnv_u8.c (working copy)

-@@ -87,6 +87,15 @@

+diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h

+index 402e2c9..5fad446 100644

+--- a/source/common/ucnv_cnv.h

++++ b/source/common/ucnv_cnv.h

+@@ -256,11 +256,15 @@ struct UConverterImpl {

+ extern const UConverterSharedData

+ _MBCSData, _Latin1Data,

+ _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,

+- _ISO2022Data,

++ _ISO2022Data,

++#if !UCONFIG_NO_NON_HTML5_CONVERSION

+ _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,

+ _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,

+ _HZData,_ISCIIData, _SCSUData, _ASCIIData,

+ _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;

++#else

++ _ASCIIData, _UTF16Data, _UTF32Data;

++#endif

+ U_CDECL_END

+diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c

+index ec0e9c2..e723fa6 100644

+--- a/source/common/ucnv_ct.c

++++ b/source/common/ucnv_ct.c

+@@ -14,7 +14,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv.h"

+ #include "unicode/uset.h"

+diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c

+index 1d921dd..a4fccee 100644

+--- a/source/common/ucnv_lmb.c

++++ b/source/common/ucnv_lmb.c

+@@ -25,7 +25,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv_err.h"

+ #include "unicode/ucnv.h"

+diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c

+index 42943f4..6466b87 100644

+--- a/source/common/ucnv_u7.c

++++ b/source/common/ucnv_u7.c

+@@ -16,7 +16,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv.h"

+ #include "ucnv_bld.h"

+diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c

+index 8ee9fe5..24205f5 100644

+--- a/source/common/ucnv_u8.c

++++ b/source/common/ucnv_u8.c

+@@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {

static const uint32_t

utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };

@@ -537,7 +525,7 @@ Index: source/common/ucnv_u8.c

static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,

UErrorCode * err)

{

-@@ -96,10 +105,10 @@

+@@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,

const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;

const UChar *targetLimit = args->targetLimit;

unsigned char *toUBytes = cnv->toUBytes;

@@ -550,7 +538,7 @@ Index: source/common/ucnv_u8.c

/* Restore size of current sequence */

if (cnv->toUnicodeStatus && myTarget < targetLimit)

{

-@@ -226,7 +235,7 @@

+@@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,

const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;

const UChar *targetLimit = args->targetLimit;

unsigned char *toUBytes = cnv->toUBytes;

@@ -559,7 +547,7 @@ Index: source/common/ucnv_u8.c

uint32_t ch, ch2 = 0;

int32_t i, inBytes;

-@@ -357,7 +366,7 @@

+@@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,

UChar32 ch;

uint8_t tempBuf[4];

int32_t indexToWrite;

@@ -568,7 +556,7 @@ Index: source/common/ucnv_u8.c

if (cnv->fromUChar32 && myTarget < targetLimit)

{

-@@ -473,7 +482,7 @@

+@@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar

int32_t offsetNum, nextSourceIndex;

int32_t indexToWrite;

uint8_t tempBuf[4];

@@ -577,10 +565,87 @@ Index: source/common/ucnv_u8.c

if (cnv->fromUChar32 && myTarget < targetLimit)

{

-Index: source/common/unicode/urename.h

-===================================================================

---- source/common/unicode/urename.h (revision 259715)

-+++ source/common/unicode/urename.h (working copy)

+diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp

+index b97d666..281d6d9 100644

+--- a/source/common/ucnvbocu.cpp

++++ b/source/common/ucnvbocu.cpp

+@@ -19,7 +19,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv.h"

+ #include "unicode/ucnv_cb.h"

+diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c

+index 3760c39..51825e2 100644

+--- a/source/common/ucnvhz.c

++++ b/source/common/ucnvhz.c

+@@ -16,7 +16,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "cmemory.h"

+ #include "unicode/ucnv.h"

+@@ -635,4 +635,4 @@ const UConverterSharedData _HZData={

+ 0

+ };

+-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

++#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */

+diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c

+index fe61d40..16fd0a3 100644

+--- a/source/common/ucnvisci.c

++++ b/source/common/ucnvisci.c

+@@ -17,7 +17,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv.h"

+ #include "unicode/ucnv_cb.h"

+diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c

+index c6e96e1..a6f8c9e 100644

+--- a/source/common/ucnvscsu.c

++++ b/source/common/ucnvscsu.c

+@@ -21,7 +21,7 @@

+ #include "unicode/utypes.h"

+-#if !UCONFIG_NO_CONVERSION

++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

+ #include "unicode/ucnv.h"

+ #include "unicode/ucnv_cb.h"

+diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h

+index ed073b6..8df56e6 100644

+--- a/source/common/unicode/uconfig.h

++++ b/source/common/unicode/uconfig.h

+@@ -270,6 +270,14 @@

+ #endif

+ /**

++ * This switch turns off all the converters NOT listed in

++ * the encoding standard : http://encoding.spec.whatwg.org

++ */

++#ifndef UCONFIG_NO_NON_HTML5_CONVERSION

++#define UCONFIG_NO_NON_HTML5_CONVERSION 0

++#endif

++/**

+ * \def UCONFIG_NO_LEGACY_CONVERSION

+ * This switch turns off all converters except for

+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)

+diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h

+index a817262..89becca 100644

+--- a/source/common/unicode/urename.h

++++ b/source/common/unicode/urename.h

@@ -73,12 +73,14 @@

#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)

#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)

@@ -615,133 +680,31 @@ Index: source/common/unicode/urename.h

#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)

#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)

#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)

-Index: source/common/ucnv_cnv.h

-===================================================================

---- source/common/ucnv_cnv.h (revision 259715)

-+++ source/common/ucnv_cnv.h (working copy)

-@@ -256,11 +256,15 @@

- extern const UConverterSharedData

- _MBCSData, _Latin1Data,

- _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,

-- _ISO2022Data,

-+ _ISO2022Data,

-+#if !UCONFIG_NO_NON_HTML5_CONVERSION

- _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,

- _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,

- _HZData,_ISCIIData, _SCSUData, _ASCIIData,

- _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;

-+#else

-+ _ASCIIData, _UTF16Data, _UTF32Data;

-+#endif

- U_CDECL_END

-Index: source/common/ucnv_lmb.c

-===================================================================

---- source/common/ucnv_lmb.c (revision 291619)

-+++ source/common/ucnv_lmb.c (working copy)

-@@ -25,7 +25,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv_err.h"

- #include "unicode/ucnv.h"

-Index: source/common/ucnvhz.c

-===================================================================

---- source/common/ucnvhz.c (revision 291619)

-+++ source/common/ucnvhz.c (working copy)

-@@ -16,7 +16,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "cmemory.h"

- #include "unicode/ucnv.h"

-@@ -637,4 +637,4 @@

- 0

- };

--#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

-+#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */

-Index: source/common/ucnv_ct.c

-===================================================================

---- source/common/ucnv_ct.c (revision 291619)

-+++ source/common/ucnv_ct.c (working copy)

-@@ -14,7 +14,7 @@

- #include "unicode/utypes.h"

--#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

-+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION

- #include "unicode/ucnv.h"

- #include "unicode/uset.h"

-Index: source/i18n/csrsbcs.h

-===================================================================

---- source/i18n/csrsbcs.h (revision 291619)

-+++ source/i18n/csrsbcs.h (working copy)

-@@ -50,6 +50,7 @@

- };

-+#if !UCONFIG_NO_NON_HTML5_CONVERSION

- class NGramParser_IBM420 : public NGramParser

- {

- private:

-@@ -61,6 +62,7 @@

- public:

- NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);

- };

-+#endif

- class CharsetRecog_sbcs : public CharsetRecognizer

-@@ -229,6 +231,7 @@

- virtual UBool match(InputText *det, CharsetMatch *results) const;

- };

-+#if !UCONFIG_NO_NON_HTML5_CONVERSION

- class CharsetRecog_IBM424_he : public CharsetRecog_sbcs

- {

- public:

-@@ -280,6 +283,7 @@

- virtual UBool match(InputText *det, CharsetMatch *results) const;

- };

-+#endif

- U_NAMESPACE_END

-Index: source/i18n/csr2022.h

-===================================================================

---- source/i18n/csr2022.h (revision 291619)

-+++ source/i18n/csr2022.h (working copy)

-@@ -65,6 +65,7 @@

- UBool match(InputText *textIn, CharsetMatch *results) const;

- };

+diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp

+index 3efbd49..ba5b18c 100644

+--- a/source/i18n/csdetect.cpp

++++ b/source/i18n/csdetect.cpp

+@@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {

+ new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),

+ new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),

+#if !UCONFIG_NO_NON_HTML5_CONVERSION

- class CharsetRecog_2022KR :public CharsetRecog_2022 {

- public:

- virtual ~CharsetRecog_2022KR();

-@@ -84,6 +85,7 @@

+ new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),

+ new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),

- UBool match(InputText *textIn, CharsetMatch *results) const;

- };

+@@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {

+ new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),

+ new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),

+ new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)

+#endif

+ };

+ int32_t rCount = ARRAY_SIZE(tempArray);

- U_NAMESPACE_END

-Index: source/i18n/csr2022.cpp

-===================================================================

---- source/i18n/csr2022.cpp (revision 291619)

-+++ source/i18n/csr2022.cpp (working copy)

-@@ -119,6 +119,7 @@

+diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp

+index 3db0bc9..be3eafa 100644

+--- a/source/i18n/csr2022.cpp

++++ b/source/i18n/csr2022.cpp

+@@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {

{0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7

};

@@ -749,7 +712,7 @@ Index: source/i18n/csr2022.cpp

static const uint8_t escapeSequences_2022KR[][5] = {

{0x1b, 0x24, 0x29, 0x43, 0x00}

};

-@@ -136,6 +137,7 @@

+@@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {

{0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2

{0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3

};

@@ -757,7 +720,7 @@ Index: source/i18n/csr2022.cpp

CharsetRecog_2022JP::~CharsetRecog_2022JP() {}

-@@ -152,6 +154,7 @@

+@@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const

return (confidence > 0);

}

@@ -765,7 +728,7 @@ Index: source/i18n/csr2022.cpp

CharsetRecog_2022KR::~CharsetRecog_2022KR() {}

const char *CharsetRecog_2022KR::getName() const {

-@@ -181,6 +184,7 @@

+@@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const

results->set(textIn, this, confidence);

return (confidence > 0);

}

@@ -773,31 +736,31 @@ Index: source/i18n/csr2022.cpp

CharsetRecog_2022::~CharsetRecog_2022() {

// nothing to do

-Index: source/i18n/csdetect.cpp

-===================================================================

---- source/i18n/csdetect.cpp (revision 291619)

-+++ source/i18n/csdetect.cpp (working copy)

-@@ -110,6 +110,7 @@

- new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),

+diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h

+index 2ac2b87..dad22c7 100644

+--- a/source/i18n/csr2022.h

++++ b/source/i18n/csr2022.h

+@@ -65,6 +65,7 @@ public:

+ UBool match(InputText *textIn, CharsetMatch *results) const;

+ };

- new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),

+#if !UCONFIG_NO_NON_HTML5_CONVERSION

- new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),

- new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),

+ class CharsetRecog_2022KR :public CharsetRecog_2022 {

+ public:

+ virtual ~CharsetRecog_2022KR();

+@@ -84,6 +85,7 @@ public:

-@@ -117,6 +118,7 @@

- new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),

- new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),

- new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)

+ UBool match(InputText *textIn, CharsetMatch *results) const;

+ };

+#endif

- };

- int32_t rCount = ARRAY_SIZE(tempArray);

-Index: source/i18n/csrsbcs.cpp

-===================================================================

---- source/i18n/csrsbcs.cpp (revision 291619)

-+++ source/i18n/csrsbcs.cpp (working copy)

-@@ -137,6 +137,7 @@

+ U_NAMESPACE_END

+diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp

+index d03367c..7b70dc1 100644

+--- a/source/i18n/csrsbcs.cpp

++++ b/source/i18n/csrsbcs.cpp

+@@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)

return (int32_t) (rawPercent * 300.0);

}

@@ -805,7 +768,7 @@ Index: source/i18n/csrsbcs.cpp

static const uint8_t unshapeMap_IBM420[] = {

/* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */

/* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,

-@@ -232,6 +233,7 @@

+@@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)

}

@@ -813,7 +776,7 @@ Index: source/i18n/csrsbcs.cpp

CharsetRecog_sbcs::CharsetRecog_sbcs()

{

-@@ -624,6 +626,7 @@

+@@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {

0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,

};

@@ -821,7 +784,7 @@ Index: source/i18n/csrsbcs.cpp

static const int32_t ngrams_IBM424_he_rtl[] = {

0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,

0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,

-@@ -691,6 +694,7 @@

+@@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {

/* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,

/* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,

};

@@ -829,7 +792,7 @@ Index: source/i18n/csrsbcs.cpp

//ISO-8859-1,2,5,6,7,8,9 Ngrams

-@@ -1155,6 +1159,7 @@

+@@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const

return (confidence > 0);

}

@@ -837,7 +800,7 @@ Index: source/i18n/csrsbcs.cpp

CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()

{

// nothing to do

-@@ -1253,6 +1258,7 @@

+@@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results

results->set(textIn, this, confidence);

return (confidence > 0);

}

@@ -845,3 +808,39 @@ Index: source/i18n/csrsbcs.cpp

U_NAMESPACE_END

#endif

+diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h

+index 2579c02..7789f9b 100644

+--- a/source/i18n/csrsbcs.h

++++ b/source/i18n/csrsbcs.h

+@@ -50,6 +50,7 @@ public:

+ };

++#if !UCONFIG_NO_NON_HTML5_CONVERSION

+ class NGramParser_IBM420 : public NGramParser

+ {

+ private:

+@@ -61,6 +62,7 @@ private:

+ public:

+ NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);

+ };

++#endif

+ class CharsetRecog_sbcs : public CharsetRecognizer

+@@ -229,6 +231,7 @@ public:

+ virtual UBool match(InputText *det, CharsetMatch *results) const;

+ };

++#if !UCONFIG_NO_NON_HTML5_CONVERSION

+ class CharsetRecog_IBM424_he : public CharsetRecog_sbcs

+ {

+ public:

+@@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {

+ virtual UBool match(InputText *det, CharsetMatch *results) const;

+ };

++#endif

+ U_NAMESPACE_END

« no previous file with comments | « README.chromium ('k') | scripts/big5_gen.sh » ('j') | no next file with comments »