| Index: patches/uconv.patch
|
| diff --git a/patches/uconv.patch b/patches/uconv.patch
|
| index 55e3327b62ae4b231b1892daa451c2f0d0554521..9f997a6a3ab198cd50072cd4097dee2a7b8cba40 100644
|
| --- a/patches/uconv.patch
|
| +++ b/patches/uconv.patch
|
| @@ -1,8 +1,8 @@
|
| -Index: source/common/ucnv2022.cpp
|
| -===================================================================
|
| ---- source/common/ucnv2022.cpp (revision 259715)
|
| -+++ source/common/ucnv2022.cpp (working copy)
|
| -@@ -154,7 +154,11 @@
|
| +diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
|
| +index 9556dd2..7b8df9b 100644
|
| +--- a/source/common/ucnv2022.cpp
|
| ++++ b/source/common/ucnv2022.cpp
|
| +@@ -152,7 +152,11 @@ typedef enum {
|
| } StateEnum;
|
|
|
| /* is the StateEnum charset value for a DBCS charset? */
|
| @@ -14,7 +14,7 @@ Index: source/common/ucnv2022.cpp
|
|
|
| #define CSM(cs) ((uint16_t)1<<(cs))
|
|
|
| -@@ -167,13 +171,23 @@
|
| +@@ -165,13 +169,23 @@ typedef enum {
|
| * all versions, not just JIS7 and JIS8.
|
| * - ICU does not distinguish between different versions of JIS X 0208.
|
| */
|
| @@ -38,7 +38,7 @@ Index: source/common/ucnv2022.cpp
|
| };
|
|
|
| typedef enum {
|
| -@@ -360,15 +374,18 @@
|
| +@@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = {
|
| ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022
|
| };
|
|
|
| @@ -58,7 +58,7 @@ Index: source/common/ucnv2022.cpp
|
| } Variant2022;
|
|
|
| /*********** ISO 2022 Converter Protos ***********/
|
| -@@ -485,12 +502,15 @@
|
| +@@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| /* prevent indexing beyond jpCharsetMasks[] */
|
| myConverterData->version = version = 0;
|
| }
|
| @@ -74,7 +74,7 @@ Index: source/common/ucnv2022.cpp
|
| if(jpCharsetMasks[version]&CSM(JISX212)) {
|
| myConverterData->myConverterArray[JISX212] =
|
| ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
|
| -@@ -503,6 +523,7 @@
|
| +@@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| myConverterData->myConverterArray[KSC5601] =
|
| ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
|
| }
|
| @@ -82,7 +82,7 @@ Index: source/common/ucnv2022.cpp
|
|
|
| /* set the function pointers to appropriate funtions */
|
| cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
|
| -@@ -513,6 +534,7 @@
|
| +@@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
|
| myConverterData->name[len+1]='\0';
|
| }
|
| @@ -90,7 +90,7 @@ Index: source/common/ucnv2022.cpp
|
| else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
|
| (myLocale[2]=='_' || myLocale[2]=='\0'))
|
| {
|
| -@@ -582,6 +604,7 @@
|
| +@@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
|
| (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
|
| }
|
| }
|
| @@ -98,7 +98,7 @@ Index: source/common/ucnv2022.cpp
|
| else{
|
| #ifdef U_ENABLE_GENERIC_ISO_2022
|
| myConverterData->isFirstBuffer = TRUE;
|
| -@@ -716,6 +739,7 @@
|
| +@@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
|
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
| };
|
|
|
| @@ -106,7 +106,7 @@ Index: source/common/ucnv2022.cpp
|
| /*************** to unicode *******************/
|
| static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
|
| /* 0 1 2 3 4 5 6 7 8 9 */
|
| -@@ -728,6 +752,7 @@
|
| +@@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
|
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
|
| };
|
| @@ -114,7 +114,7 @@ Index: source/common/ucnv2022.cpp
|
|
|
|
|
| static UCNV_TableStates_2022
|
| -@@ -880,6 +905,7 @@
|
| +@@ -878,6 +903,7 @@ DONE:
|
| }
|
| break;
|
| /* case SS3_STATE: not used in ISO-2022-JP-x */
|
| @@ -122,7 +122,7 @@ Index: source/common/ucnv2022.cpp
|
| case ISO8859_1:
|
| case ISO8859_7:
|
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
|
| -@@ -889,6 +915,7 @@
|
| +@@ -887,6 +913,7 @@ DONE:
|
| myData2022->toU2022State.cs[2]=(int8_t)tempState;
|
| }
|
| break;
|
| @@ -130,7 +130,7 @@ Index: source/common/ucnv2022.cpp
|
| default:
|
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {
|
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
|
| -@@ -900,6 +927,7 @@
|
| +@@ -898,6 +925,7 @@ DONE:
|
| }
|
| }
|
| break;
|
| @@ -138,7 +138,7 @@ Index: source/common/ucnv2022.cpp
|
| case ISO_2022_CN:
|
| {
|
| StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
|
| -@@ -961,6 +989,7 @@
|
| +@@ -959,6 +987,7 @@ DONE:
|
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
|
| }
|
| break;
|
| @@ -146,7 +146,7 @@ Index: source/common/ucnv2022.cpp
|
|
|
| default:
|
| *err = U_ILLEGAL_ESCAPE_SEQUENCE;
|
| -@@ -1381,12 +1410,16 @@
|
| +@@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv,
|
| static const StateEnum jpCharsetPref[]={
|
| ASCII,
|
| JISX201,
|
| @@ -163,7 +163,7 @@ Index: source/common/ucnv2022.cpp
|
| HWKANA_7BIT
|
| };
|
|
|
| -@@ -1756,6 +1789,7 @@
|
| +@@ -1754,6 +1787,7 @@ getTrail:
|
| g = 0;
|
| }
|
| break;
|
| @@ -171,7 +171,7 @@ Index: source/common/ucnv2022.cpp
|
| case ISO8859_1:
|
| if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
|
| targetValue = (uint32_t)sourceChar - 0x80;
|
| -@@ -1764,6 +1798,7 @@
|
| +@@ -1762,6 +1796,7 @@ getTrail:
|
| g = 2;
|
| }
|
| break;
|
| @@ -179,7 +179,7 @@ Index: source/common/ucnv2022.cpp
|
| case HWKANA_7BIT:
|
| if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) {
|
| if(converterData->version==3) {
|
| -@@ -1825,6 +1860,7 @@
|
| +@@ -1823,6 +1858,7 @@ getTrail:
|
| useFallback = FALSE;
|
| }
|
| break;
|
| @@ -187,7 +187,7 @@ Index: source/common/ucnv2022.cpp
|
| case ISO8859_7:
|
| /* G0 SBCS forced to 7-bit output */
|
| len2 = MBCS_SINGLE_FROM_UCHAR32(
|
| -@@ -1839,6 +1875,7 @@
|
| +@@ -1837,6 +1873,7 @@ getTrail:
|
| useFallback = FALSE;
|
| }
|
| break;
|
| @@ -195,7 +195,7 @@ Index: source/common/ucnv2022.cpp
|
| default:
|
| /* G0 DBCS */
|
| len2 = MBCS_FROM_UCHAR32_ISO2022(
|
| -@@ -1846,6 +1883,7 @@
|
| +@@ -1844,6 +1881,7 @@ getTrail:
|
| sourceChar, &value,
|
| useFallback, MBCS_OUTPUT_2);
|
| if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
|
| @@ -203,7 +203,7 @@ Index: source/common/ucnv2022.cpp
|
| if(cs0 == KSC5601) {
|
| /*
|
| * Check for valid bytes for the encoding scheme.
|
| -@@ -1857,6 +1895,7 @@
|
| +@@ -1855,6 +1893,7 @@ getTrail:
|
| break;
|
| }
|
| }
|
| @@ -211,7 +211,7 @@ Index: source/common/ucnv2022.cpp
|
| targetValue = value;
|
| len = len2;
|
| cs = cs0;
|
| -@@ -2150,6 +2189,7 @@
|
| +@@ -2148,6 +2187,7 @@ escape:
|
| targetUniChar = mySourceChar;
|
| }
|
| break;
|
| @@ -219,7 +219,7 @@ Index: source/common/ucnv2022.cpp
|
| case ISO8859_1:
|
| if(mySourceChar <= 0x7f) {
|
| targetUniChar = mySourceChar + 0x80;
|
| -@@ -2168,6 +2208,7 @@
|
| +@@ -2166,6 +2206,7 @@ escape:
|
| /* return from a single-shift state to the previous one */
|
| pToU2022State->g=pToU2022State->prevG;
|
| break;
|
| @@ -227,7 +227,7 @@ Index: source/common/ucnv2022.cpp
|
| case JISX201:
|
| if(mySourceChar <= 0x7f) {
|
| targetUniChar = jisx201ToU(mySourceChar);
|
| -@@ -2207,9 +2248,11 @@
|
| +@@ -2205,9 +2246,11 @@ getTrailByte:
|
| } else {
|
| /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */
|
| mySourceChar = tmpSourceChar;
|
| @@ -239,7 +239,7 @@ Index: source/common/ucnv2022.cpp
|
| tempBuf[0] = (char)(tmpSourceChar >> 8);
|
| tempBuf[1] = (char)(tmpSourceChar);
|
| }
|
| -@@ -2271,6 +2314,7 @@
|
| +@@ -2269,6 +2312,7 @@ endloop:
|
| }
|
|
|
|
|
| @@ -247,7 +247,7 @@ Index: source/common/ucnv2022.cpp
|
| /***************************************************************
|
| * Rules for ISO-2022-KR encoding
|
| * i) The KSC5601 designator sequence should appear only once in a file,
|
| -@@ -3414,6 +3458,7 @@
|
| +@@ -3412,6 +3456,7 @@ endloop:
|
| args->target = myTarget;
|
| args->source = mySource;
|
| }
|
| @@ -255,7 +255,7 @@ Index: source/common/ucnv2022.cpp
|
|
|
| static void
|
| _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
|
| -@@ -3615,6 +3660,7 @@
|
| +@@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| /* include JIS X 0201 which is hardcoded */
|
| sa->add(sa->set, 0xa5);
|
| sa->add(sa->set, 0x203e);
|
| @@ -263,7 +263,7 @@ Index: source/common/ucnv2022.cpp
|
| if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
|
| /* include Latin-1 for some variants of JP */
|
| sa->addRange(sa->set, 0, 0xff);
|
| -@@ -3622,6 +3668,10 @@
|
| +@@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| /* include ASCII for JP */
|
| sa->addRange(sa->set, 0, 0x7f);
|
| }
|
| @@ -274,7 +274,7 @@ Index: source/common/ucnv2022.cpp
|
| if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) {
|
| /*
|
| * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0
|
| -@@ -3640,6 +3690,7 @@
|
| +@@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| sa->addRange(sa->set, HWKANA_START, HWKANA_END);
|
| }
|
| break;
|
| @@ -282,7 +282,7 @@ Index: source/common/ucnv2022.cpp
|
| case 'c':
|
| case 'z':
|
| /* include ASCII for CN */
|
| -@@ -3651,6 +3702,7 @@
|
| +@@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| cnvData->currentConverter, sa, which, pErrorCode);
|
| /* the loop over myConverterArray[] will simply not find another converter */
|
| break;
|
| @@ -290,7 +290,7 @@ Index: source/common/ucnv2022.cpp
|
| default:
|
| break;
|
| }
|
| -@@ -3671,10 +3723,16 @@
|
| +@@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
|
| UConverterSetFilter filter;
|
| if(cnvData->myConverterArray[i]!=NULL) {
|
| @@ -298,7 +298,7 @@ Index: source/common/ucnv2022.cpp
|
| - cnvData->version==0 && i==CNS_11643
|
| - ) {
|
| + if(cnvData->locale[0]=='j' && i==JISX208) {
|
| - /*
|
| ++ /*
|
| + * Only add code points that map to Shift-JIS codes
|
| + * corresponding to JIS X 0208.
|
| + */
|
| @@ -306,11 +306,10 @@ Index: source/common/ucnv2022.cpp
|
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
|
| + cnvData->version==0 && i==CNS_11643) {
|
| -+ /*
|
| + /*
|
| * Version-specific for CN:
|
| * CN version 0 does not map CNS planes 3..7 although
|
| - * they are all available in the CNS conversion table;
|
| -@@ -3682,18 +3740,13 @@
|
| +@@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
|
| * The two versions create different Unicode sets.
|
| */
|
| filter=UCNV_SET_FILTER_2022_CN;
|
| @@ -330,7 +329,7 @@ Index: source/common/ucnv2022.cpp
|
| } else {
|
| filter=UCNV_SET_FILTER_NONE;
|
| }
|
| -@@ -3831,6 +3884,7 @@
|
| +@@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={
|
|
|
| } // namespace
|
|
|
| @@ -338,89 +337,18 @@ Index: source/common/ucnv2022.cpp
|
| /************* KR ***************/
|
| static const UConverterImpl _ISO2022KRImpl={
|
| UCNV_ISO_2022,
|
| -@@ -3947,5 +4001,6 @@
|
| +@@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={
|
| };
|
|
|
| } // namespace
|
| +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
|
|
|
| #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
| -Index: source/common/ucnvbocu.cpp
|
| -===================================================================
|
| ---- source/common/ucnvbocu.cpp (revision 259715)
|
| -+++ source/common/ucnvbocu.cpp (working copy)
|
| -@@ -19,7 +19,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv.h"
|
| - #include "unicode/ucnv_cb.h"
|
| -Index: source/common/ucnvisci.c
|
| -===================================================================
|
| ---- source/common/ucnvisci.c (revision 259715)
|
| -+++ source/common/ucnvisci.c (working copy)
|
| -@@ -17,7 +17,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv.h"
|
| - #include "unicode/ucnv_cb.h"
|
| -Index: source/common/ucnvscsu.c
|
| -===================================================================
|
| ---- source/common/ucnvscsu.c (revision 259715)
|
| -+++ source/common/ucnvscsu.c (working copy)
|
| -@@ -21,7 +21,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv.h"
|
| - #include "unicode/ucnv_cb.h"
|
| -Index: source/common/ucnv_u7.c
|
| -===================================================================
|
| ---- source/common/ucnv_u7.c (revision 259715)
|
| -+++ source/common/ucnv_u7.c (working copy)
|
| -@@ -16,7 +16,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv.h"
|
| - #include "ucnv_bld.h"
|
| -Index: source/common/unicode/uconfig.h
|
| -===================================================================
|
| ---- source/common/unicode/uconfig.h (revision 259715)
|
| -+++ source/common/unicode/uconfig.h (working copy)
|
| -@@ -265,6 +265,14 @@
|
| - #endif
|
| -
|
| - /**
|
| -+ * This switch turns off all the converters NOT listed in
|
| -+ * the encoding standard : http://encoding.spec.whatwg.org
|
| -+ */
|
| -+#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
|
| -+#define UCONFIG_NO_NON_HTML5_CONVERSION 0
|
| -+#endif
|
| -+
|
| -+/**
|
| - * \def UCONFIG_NO_LEGACY_CONVERSION
|
| - * This switch turns off all converters except for
|
| - * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
|
| -Index: source/common/ucnv_bld.cpp
|
| -===================================================================
|
| ---- source/common/ucnv_bld.cpp (revision 259715)
|
| -+++ source/common/ucnv_bld.cpp (working copy)
|
| -@@ -69,28 +69,41 @@
|
| +diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp
|
| +index 4940310..047f18a 100644
|
| +--- a/source/common/ucnv_bld.cpp
|
| ++++ b/source/common/ucnv_bld.cpp
|
| +@@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
|
|
|
| #if UCONFIG_NO_LEGACY_CONVERSION
|
| NULL,
|
| @@ -465,7 +393,7 @@ Index: source/common/ucnv_bld.cpp
|
| NULL,
|
| #else
|
| &_CompoundTextData
|
| -@@ -105,18 +118,24 @@
|
| +@@ -105,18 +118,24 @@ static struct {
|
| const char *name;
|
| const UConverterType type;
|
| } const cnvNameType[] = {
|
| @@ -479,12 +407,12 @@ Index: source/common/ucnv_bld.cpp
|
| #endif
|
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| { "imapmailboxname", UCNV_IMAP_MAILBOX },
|
| +-#if !UCONFIG_NO_LEGACY_CONVERSION
|
| +#endif
|
| +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -+ { "iscii", UCNV_ISCII },
|
| + { "iscii", UCNV_ISCII },
|
| +#endif
|
| - #if !UCONFIG_NO_LEGACY_CONVERSION
|
| -- { "iscii", UCNV_ISCII },
|
| ++#if !UCONFIG_NO_LEGACY_CONVERSION
|
| { "iso2022", UCNV_ISO_2022 },
|
| #endif
|
| { "iso88591", UCNV_LATIN_1 },
|
| @@ -493,7 +421,7 @@ Index: source/common/ucnv_bld.cpp
|
| { "lmbcs1", UCNV_LMBCS_1 },
|
| { "lmbcs11",UCNV_LMBCS_11 },
|
| { "lmbcs16",UCNV_LMBCS_16 },
|
| -@@ -130,7 +149,9 @@
|
| +@@ -130,7 +149,9 @@ static struct {
|
| { "lmbcs6", UCNV_LMBCS_6 },
|
| { "lmbcs8", UCNV_LMBCS_8 },
|
| #endif
|
| @@ -503,7 +431,7 @@ Index: source/common/ucnv_bld.cpp
|
| { "usascii", UCNV_US_ASCII },
|
| { "utf16", UCNV_UTF16 },
|
| { "utf16be", UCNV_UTF16_BigEndian },
|
| -@@ -152,9 +173,13 @@
|
| +@@ -152,9 +173,13 @@ static struct {
|
| { "utf32oppositeendian", UCNV_UTF32_BigEndian },
|
| { "utf32platformendian", UCNV_UTF32_LittleEndian },
|
| #endif
|
| @@ -517,11 +445,71 @@ Index: source/common/ucnv_bld.cpp
|
| };
|
|
|
|
|
| -Index: source/common/ucnv_u8.c
|
| -===================================================================
|
| ---- source/common/ucnv_u8.c (revision 259715)
|
| -+++ source/common/ucnv_u8.c (working copy)
|
| -@@ -87,6 +87,15 @@
|
| +diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h
|
| +index 402e2c9..5fad446 100644
|
| +--- a/source/common/ucnv_cnv.h
|
| ++++ b/source/common/ucnv_cnv.h
|
| +@@ -256,11 +256,15 @@ struct UConverterImpl {
|
| + extern const UConverterSharedData
|
| + _MBCSData, _Latin1Data,
|
| + _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
|
| +- _ISO2022Data,
|
| ++ _ISO2022Data,
|
| ++#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| + _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
|
| + _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
|
| + _HZData,_ISCIIData, _SCSUData, _ASCIIData,
|
| + _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
|
| ++#else
|
| ++ _ASCIIData, _UTF16Data, _UTF32Data;
|
| ++#endif
|
| +
|
| + U_CDECL_END
|
| +
|
| +diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c
|
| +index ec0e9c2..e723fa6 100644
|
| +--- a/source/common/ucnv_ct.c
|
| ++++ b/source/common/ucnv_ct.c
|
| +@@ -14,7 +14,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv.h"
|
| + #include "unicode/uset.h"
|
| +diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c
|
| +index 1d921dd..a4fccee 100644
|
| +--- a/source/common/ucnv_lmb.c
|
| ++++ b/source/common/ucnv_lmb.c
|
| +@@ -25,7 +25,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv_err.h"
|
| + #include "unicode/ucnv.h"
|
| +diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c
|
| +index 42943f4..6466b87 100644
|
| +--- a/source/common/ucnv_u7.c
|
| ++++ b/source/common/ucnv_u7.c
|
| +@@ -16,7 +16,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv.h"
|
| + #include "ucnv_bld.h"
|
| +diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c
|
| +index 8ee9fe5..24205f5 100644
|
| +--- a/source/common/ucnv_u8.c
|
| ++++ b/source/common/ucnv_u8.c
|
| +@@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
|
| static const uint32_t
|
| utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
|
|
|
| @@ -537,7 +525,7 @@ Index: source/common/ucnv_u8.c
|
| static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
| UErrorCode * err)
|
| {
|
| -@@ -96,10 +105,10 @@
|
| +@@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
|
| const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
|
| const UChar *targetLimit = args->targetLimit;
|
| unsigned char *toUBytes = cnv->toUBytes;
|
| @@ -550,7 +538,7 @@ Index: source/common/ucnv_u8.c
|
| /* Restore size of current sequence */
|
| if (cnv->toUnicodeStatus && myTarget < targetLimit)
|
| {
|
| -@@ -226,7 +235,7 @@
|
| +@@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args,
|
| const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
|
| const UChar *targetLimit = args->targetLimit;
|
| unsigned char *toUBytes = cnv->toUBytes;
|
| @@ -559,7 +547,7 @@ Index: source/common/ucnv_u8.c
|
| uint32_t ch, ch2 = 0;
|
| int32_t i, inBytes;
|
|
|
| -@@ -357,7 +366,7 @@
|
| +@@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args,
|
| UChar32 ch;
|
| uint8_t tempBuf[4];
|
| int32_t indexToWrite;
|
| @@ -568,7 +556,7 @@ Index: source/common/ucnv_u8.c
|
|
|
| if (cnv->fromUChar32 && myTarget < targetLimit)
|
| {
|
| -@@ -473,7 +482,7 @@
|
| +@@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * ar
|
| int32_t offsetNum, nextSourceIndex;
|
| int32_t indexToWrite;
|
| uint8_t tempBuf[4];
|
| @@ -577,10 +565,87 @@ Index: source/common/ucnv_u8.c
|
|
|
| if (cnv->fromUChar32 && myTarget < targetLimit)
|
| {
|
| -Index: source/common/unicode/urename.h
|
| -===================================================================
|
| ---- source/common/unicode/urename.h (revision 259715)
|
| -+++ source/common/unicode/urename.h (working copy)
|
| +diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp
|
| +index b97d666..281d6d9 100644
|
| +--- a/source/common/ucnvbocu.cpp
|
| ++++ b/source/common/ucnvbocu.cpp
|
| +@@ -19,7 +19,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv.h"
|
| + #include "unicode/ucnv_cb.h"
|
| +diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c
|
| +index 3760c39..51825e2 100644
|
| +--- a/source/common/ucnvhz.c
|
| ++++ b/source/common/ucnvhz.c
|
| +@@ -16,7 +16,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "cmemory.h"
|
| + #include "unicode/ucnv.h"
|
| +@@ -635,4 +635,4 @@ const UConverterSharedData _HZData={
|
| + 0
|
| + };
|
| +
|
| +-#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
| ++#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */
|
| +diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c
|
| +index fe61d40..16fd0a3 100644
|
| +--- a/source/common/ucnvisci.c
|
| ++++ b/source/common/ucnvisci.c
|
| +@@ -17,7 +17,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv.h"
|
| + #include "unicode/ucnv_cb.h"
|
| +diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c
|
| +index c6e96e1..a6f8c9e 100644
|
| +--- a/source/common/ucnvscsu.c
|
| ++++ b/source/common/ucnvscsu.c
|
| +@@ -21,7 +21,7 @@
|
| +
|
| + #include "unicode/utypes.h"
|
| +
|
| +-#if !UCONFIG_NO_CONVERSION
|
| ++#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| +
|
| + #include "unicode/ucnv.h"
|
| + #include "unicode/ucnv_cb.h"
|
| +diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h
|
| +index ed073b6..8df56e6 100644
|
| +--- a/source/common/unicode/uconfig.h
|
| ++++ b/source/common/unicode/uconfig.h
|
| +@@ -270,6 +270,14 @@
|
| + #endif
|
| +
|
| + /**
|
| ++ * This switch turns off all the converters NOT listed in
|
| ++ * the encoding standard : http://encoding.spec.whatwg.org
|
| ++ */
|
| ++#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
|
| ++#define UCONFIG_NO_NON_HTML5_CONVERSION 0
|
| ++#endif
|
| ++
|
| ++/**
|
| + * \def UCONFIG_NO_LEGACY_CONVERSION
|
| + * This switch turns off all converters except for
|
| + * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
|
| +diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h
|
| +index a817262..89becca 100644
|
| +--- a/source/common/unicode/urename.h
|
| ++++ b/source/common/unicode/urename.h
|
| @@ -73,12 +73,14 @@
|
| #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
|
| #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
|
| @@ -615,133 +680,31 @@ Index: source/common/unicode/urename.h
|
| #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
|
| #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
|
| #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
|
| -Index: source/common/ucnv_cnv.h
|
| -===================================================================
|
| ---- source/common/ucnv_cnv.h (revision 259715)
|
| -+++ source/common/ucnv_cnv.h (working copy)
|
| -@@ -256,11 +256,15 @@
|
| - extern const UConverterSharedData
|
| - _MBCSData, _Latin1Data,
|
| - _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
|
| -- _ISO2022Data,
|
| -+ _ISO2022Data,
|
| -+#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| - _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
|
| - _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
|
| - _HZData,_ISCIIData, _SCSUData, _ASCIIData,
|
| - _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
|
| -+#else
|
| -+ _ASCIIData, _UTF16Data, _UTF32Data;
|
| -+#endif
|
| -
|
| - U_CDECL_END
|
| -
|
| -Index: source/common/ucnv_lmb.c
|
| -===================================================================
|
| ---- source/common/ucnv_lmb.c (revision 291619)
|
| -+++ source/common/ucnv_lmb.c (working copy)
|
| -@@ -25,7 +25,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv_err.h"
|
| - #include "unicode/ucnv.h"
|
| -Index: source/common/ucnvhz.c
|
| -===================================================================
|
| ---- source/common/ucnvhz.c (revision 291619)
|
| -+++ source/common/ucnvhz.c (working copy)
|
| -@@ -16,7 +16,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "cmemory.h"
|
| - #include "unicode/ucnv.h"
|
| -@@ -637,4 +637,4 @@
|
| - 0
|
| - };
|
| -
|
| --#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
|
| -+#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */
|
| -Index: source/common/ucnv_ct.c
|
| -===================================================================
|
| ---- source/common/ucnv_ct.c (revision 291619)
|
| -+++ source/common/ucnv_ct.c (working copy)
|
| -@@ -14,7 +14,7 @@
|
| -
|
| - #include "unicode/utypes.h"
|
| -
|
| --#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
|
| -+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
|
| -
|
| - #include "unicode/ucnv.h"
|
| - #include "unicode/uset.h"
|
| -Index: source/i18n/csrsbcs.h
|
| -===================================================================
|
| ---- source/i18n/csrsbcs.h (revision 291619)
|
| -+++ source/i18n/csrsbcs.h (working copy)
|
| -@@ -50,6 +50,7 @@
|
| -
|
| - };
|
| -
|
| -+#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| - class NGramParser_IBM420 : public NGramParser
|
| - {
|
| - private:
|
| -@@ -61,6 +62,7 @@
|
| - public:
|
| - NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
|
| - };
|
| -+#endif
|
| -
|
| -
|
| - class CharsetRecog_sbcs : public CharsetRecognizer
|
| -@@ -229,6 +231,7 @@
|
| - virtual UBool match(InputText *det, CharsetMatch *results) const;
|
| - };
|
| -
|
| -+#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| - class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
|
| - {
|
| - public:
|
| -@@ -280,6 +283,7 @@
|
| -
|
| - virtual UBool match(InputText *det, CharsetMatch *results) const;
|
| - };
|
| -+#endif
|
| -
|
| - U_NAMESPACE_END
|
| -
|
| -Index: source/i18n/csr2022.h
|
| -===================================================================
|
| ---- source/i18n/csr2022.h (revision 291619)
|
| -+++ source/i18n/csr2022.h (working copy)
|
| -@@ -65,6 +65,7 @@
|
| - UBool match(InputText *textIn, CharsetMatch *results) const;
|
| - };
|
| +diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp
|
| +index 3efbd49..ba5b18c 100644
|
| +--- a/source/i18n/csdetect.cpp
|
| ++++ b/source/i18n/csdetect.cpp
|
| +@@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
|
| + new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
|
|
|
| + new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
|
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| - class CharsetRecog_2022KR :public CharsetRecog_2022 {
|
| - public:
|
| - virtual ~CharsetRecog_2022KR();
|
| -@@ -84,6 +85,7 @@
|
| + new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
|
| + new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
|
|
|
| - UBool match(InputText *textIn, CharsetMatch *results) const;
|
| - };
|
| +@@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
|
| + new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
|
| + new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
|
| + new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
|
| +#endif
|
| + };
|
| + int32_t rCount = ARRAY_SIZE(tempArray);
|
|
|
| - U_NAMESPACE_END
|
| -
|
| -Index: source/i18n/csr2022.cpp
|
| -===================================================================
|
| ---- source/i18n/csr2022.cpp (revision 291619)
|
| -+++ source/i18n/csr2022.cpp (working copy)
|
| -@@ -119,6 +119,7 @@
|
| +diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp
|
| +index 3db0bc9..be3eafa 100644
|
| +--- a/source/i18n/csr2022.cpp
|
| ++++ b/source/i18n/csr2022.cpp
|
| +@@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
|
| {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
|
| };
|
|
|
| @@ -749,7 +712,7 @@ Index: source/i18n/csr2022.cpp
|
| static const uint8_t escapeSequences_2022KR[][5] = {
|
| {0x1b, 0x24, 0x29, 0x43, 0x00}
|
| };
|
| -@@ -136,6 +137,7 @@
|
| +@@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
|
| {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
|
| {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
|
| };
|
| @@ -757,7 +720,7 @@ Index: source/i18n/csr2022.cpp
|
|
|
| CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
|
|
|
| -@@ -152,6 +154,7 @@
|
| +@@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetMatch *results) const
|
| return (confidence > 0);
|
| }
|
|
|
| @@ -765,7 +728,7 @@ Index: source/i18n/csr2022.cpp
|
| CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
|
|
|
| const char *CharsetRecog_2022KR::getName() const {
|
| -@@ -181,6 +184,7 @@
|
| +@@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetMatch *results) const
|
| results->set(textIn, this, confidence);
|
| return (confidence > 0);
|
| }
|
| @@ -773,31 +736,31 @@ Index: source/i18n/csr2022.cpp
|
|
|
| CharsetRecog_2022::~CharsetRecog_2022() {
|
| // nothing to do
|
| -Index: source/i18n/csdetect.cpp
|
| -===================================================================
|
| ---- source/i18n/csdetect.cpp (revision 291619)
|
| -+++ source/i18n/csdetect.cpp (working copy)
|
| -@@ -110,6 +110,7 @@
|
| - new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
|
| +diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h
|
| +index 2ac2b87..dad22c7 100644
|
| +--- a/source/i18n/csr2022.h
|
| ++++ b/source/i18n/csr2022.h
|
| +@@ -65,6 +65,7 @@ public:
|
| + UBool match(InputText *textIn, CharsetMatch *results) const;
|
| + };
|
|
|
| - new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
|
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| - new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
|
| - new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
|
| + class CharsetRecog_2022KR :public CharsetRecog_2022 {
|
| + public:
|
| + virtual ~CharsetRecog_2022KR();
|
| +@@ -84,6 +85,7 @@ public:
|
|
|
| -@@ -117,6 +118,7 @@
|
| - new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
|
| - new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
|
| - new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
|
| + UBool match(InputText *textIn, CharsetMatch *results) const;
|
| + };
|
| +#endif
|
| - };
|
| - int32_t rCount = ARRAY_SIZE(tempArray);
|
|
|
| -Index: source/i18n/csrsbcs.cpp
|
| -===================================================================
|
| ---- source/i18n/csrsbcs.cpp (revision 291619)
|
| -+++ source/i18n/csrsbcs.cpp (working copy)
|
| -@@ -137,6 +137,7 @@
|
| + U_NAMESPACE_END
|
| +
|
| +diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp
|
| +index d03367c..7b70dc1 100644
|
| +--- a/source/i18n/csrsbcs.cpp
|
| ++++ b/source/i18n/csrsbcs.cpp
|
| +@@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
|
| return (int32_t) (rawPercent * 300.0);
|
| }
|
|
|
| @@ -805,7 +768,7 @@ Index: source/i18n/csrsbcs.cpp
|
| static const uint8_t unshapeMap_IBM420[] = {
|
| /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
|
| /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
|
| -@@ -232,6 +233,7 @@
|
| +@@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
|
| }
|
| }
|
| }
|
| @@ -813,7 +776,7 @@ Index: source/i18n/csrsbcs.cpp
|
|
|
| CharsetRecog_sbcs::CharsetRecog_sbcs()
|
| {
|
| -@@ -624,6 +626,7 @@
|
| +@@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
|
| 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
|
| };
|
|
|
| @@ -821,7 +784,7 @@ Index: source/i18n/csrsbcs.cpp
|
| static const int32_t ngrams_IBM424_he_rtl[] = {
|
| 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
|
| 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
|
| -@@ -691,6 +694,7 @@
|
| +@@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
|
| /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
|
| /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
|
| };
|
| @@ -829,7 +792,7 @@ Index: source/i18n/csrsbcs.cpp
|
|
|
| //ISO-8859-1,2,5,6,7,8,9 Ngrams
|
|
|
| -@@ -1155,6 +1159,7 @@
|
| +@@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
|
| return (confidence > 0);
|
| }
|
|
|
| @@ -837,7 +800,7 @@ Index: source/i18n/csrsbcs.cpp
|
| CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
|
| {
|
| // nothing to do
|
| -@@ -1253,6 +1258,7 @@
|
| +@@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
|
| results->set(textIn, this, confidence);
|
| return (confidence > 0);
|
| }
|
| @@ -845,3 +808,39 @@ Index: source/i18n/csrsbcs.cpp
|
|
|
| U_NAMESPACE_END
|
| #endif
|
| +diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h
|
| +index 2579c02..7789f9b 100644
|
| +--- a/source/i18n/csrsbcs.h
|
| ++++ b/source/i18n/csrsbcs.h
|
| +@@ -50,6 +50,7 @@ public:
|
| +
|
| + };
|
| +
|
| ++#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| + class NGramParser_IBM420 : public NGramParser
|
| + {
|
| + private:
|
| +@@ -61,6 +62,7 @@ private:
|
| + public:
|
| + NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
|
| + };
|
| ++#endif
|
| +
|
| +
|
| + class CharsetRecog_sbcs : public CharsetRecognizer
|
| +@@ -229,6 +231,7 @@ public:
|
| + virtual UBool match(InputText *det, CharsetMatch *results) const;
|
| + };
|
| +
|
| ++#if !UCONFIG_NO_NON_HTML5_CONVERSION
|
| + class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
|
| + {
|
| + public:
|
| +@@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
|
| +
|
| + virtual UBool match(InputText *det, CharsetMatch *results) const;
|
| + };
|
| ++#endif
|
| +
|
| + U_NAMESPACE_END
|
| +
|
|
|