| OLD | NEW |
| (Empty) |
| 1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp | |
| 2 index 9556dd2..7b8df9b 100644 | |
| 3 --- a/source/common/ucnv2022.cpp | |
| 4 +++ b/source/common/ucnv2022.cpp | |
| 5 @@ -152,7 +152,11 @@ typedef enum { | |
| 6 } StateEnum; | |
| 7 | |
| 8 /* is the StateEnum charset value for a DBCS charset? */ | |
| 9 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) | |
| 11 +#else | |
| 12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | |
| 13 +#endif | |
| 14 | |
| 15 #define CSM(cs) ((uint16_t)1<<(cs)) | |
| 16 | |
| 17 @@ -165,13 +169,23 @@ typedef enum { | |
| 18 * all versions, not just JIS7 and JIS8. | |
| 19 * - ICU does not distinguish between different versions of JIS X 0208. | |
| 20 */ | |
| 21 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 22 +enum { MAX_JA_VERSION=0 }; | |
| 23 +#else | |
| 24 enum { MAX_JA_VERSION=4 }; | |
| 25 +#endif | |
| 26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | |
| 27 +/* | |
| 28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. | |
| 29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 | |
| 30 + */ | |
| 31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | |
| 32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | |
| 34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | |
| 35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | |
| 36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | |
| 37 +#endif | |
| 38 }; | |
| 39 | |
| 40 typedef enum { | |
| 41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES
_2022] = { | |
| 42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 | |
| 43 }; | |
| 44 | |
| 45 - | |
| 46 /* Type def for refactoring changeState_2022 code*/ | |
| 47 typedef enum{ | |
| 48 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 49 ISO_2022=0, | |
| 50 #endif | |
| 51 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 52 + ISO_2022_JP=1 | |
| 53 +#else | |
| 54 ISO_2022_JP=1, | |
| 55 ISO_2022_KR=2, | |
| 56 ISO_2022_CN=3 | |
| 57 +#endif | |
| 58 } Variant2022; | |
| 59 | |
| 60 /*********** ISO 2022 Converter Protos ***********/ | |
| 61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U
ErrorCode *errorCode){ | |
| 62 /* prevent indexing beyond jpCharsetMasks[] */ | |
| 63 myConverterData->version = version = 0; | |
| 64 } | |
| 65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | |
| 67 myConverterData->myConverterArray[ISO8859_7] = | |
| 68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); | |
| 69 } | |
| 70 +#endif | |
| 71 myConverterData->myConverterArray[JISX208] = | |
| 72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); | |
| 73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 74 if(jpCharsetMasks[version]&CSM(JISX212)) { | |
| 75 myConverterData->myConverterArray[JISX212] = | |
| 76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); | |
| 77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | |
| 78 myConverterData->myConverterArray[KSC5601] = | |
| 79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); | |
| 80 } | |
| 81 +#endif | |
| 82 | |
| 83 /* set the function pointers to appropriate funtions */ | |
| 84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | |
| 85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | |
| 86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); | |
| 87 myConverterData->name[len+1]='\0'; | |
| 88 } | |
| 89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | |
| 91 (myLocale[2]=='_' || myLocale[2]=='\0')) | |
| 92 { | |
| 93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | |
| 94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); | |
| 95 } | |
| 96 } | |
| 97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 98 else{ | |
| 99 #ifdef U_ENABLE_GENERIC_ISO_2022 | |
| 100 myConverterData->isFirstBuffer = TRUE; | |
| 101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { | |
| 102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | |
| 103 }; | |
| 104 | |
| 105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 106 /*************** to unicode *******************/ | |
| 107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | |
| 108 /* 0 1 2 3 4
5 6 7 8 9 *
/ | |
| 109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | |
| 110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE | |
| 111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | |
| 112 }; | |
| 113 +#endif | |
| 114 | |
| 115 | |
| 116 static UCNV_TableStates_2022 | |
| 117 @@ -878,6 +903,7 @@ DONE: | |
| 118 } | |
| 119 break; | |
| 120 /* case SS3_STATE: not used in ISO-2022-JP-x */ | |
| 121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 122 case ISO8859_1: | |
| 123 case ISO8859_7: | |
| 124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | |
| 125 @@ -887,6 +913,7 @@ DONE: | |
| 126 myData2022->toU2022State.cs[2]=(int8_t)tempState; | |
| 127 } | |
| 128 break; | |
| 129 +#endif | |
| 130 default: | |
| 131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | |
| 132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | |
| 133 @@ -898,6 +925,7 @@ DONE: | |
| 134 } | |
| 135 } | |
| 136 break; | |
| 137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 138 case ISO_2022_CN: | |
| 139 { | |
| 140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | |
| 141 @@ -959,6 +987,7 @@ DONE: | |
| 142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | |
| 143 } | |
| 144 break; | |
| 145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
| 146 | |
| 147 default: | |
| 148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | |
| 149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv, | |
| 150 static const StateEnum jpCharsetPref[]={ | |
| 151 ASCII, | |
| 152 JISX201, | |
| 153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 154 ISO8859_1, | |
| 155 ISO8859_7, | |
| 156 +#endif | |
| 157 JISX208, | |
| 158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 159 JISX212, | |
| 160 GB2312, | |
| 161 KSC5601, | |
| 162 +#endif | |
| 163 HWKANA_7BIT | |
| 164 }; | |
| 165 | |
| 166 @@ -1754,6 +1787,7 @@ getTrail: | |
| 167 g = 0; | |
| 168 } | |
| 169 break; | |
| 170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 171 case ISO8859_1: | |
| 172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | |
| 173 targetValue = (uint32_t)sourceChar - 0x80; | |
| 174 @@ -1762,6 +1796,7 @@ getTrail: | |
| 175 g = 2; | |
| 176 } | |
| 177 break; | |
| 178 +#endif | |
| 179 case HWKANA_7BIT: | |
| 180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H
WKANA_START)) { | |
| 181 if(converterData->version==3) { | |
| 182 @@ -1823,6 +1858,7 @@ getTrail: | |
| 183 useFallback = FALSE; | |
| 184 } | |
| 185 break; | |
| 186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 187 case ISO8859_7: | |
| 188 /* G0 SBCS forced to 7-bit output */ | |
| 189 len2 = MBCS_SINGLE_FROM_UCHAR32( | |
| 190 @@ -1837,6 +1873,7 @@ getTrail: | |
| 191 useFallback = FALSE; | |
| 192 } | |
| 193 break; | |
| 194 +#endif | |
| 195 default: | |
| 196 /* G0 DBCS */ | |
| 197 len2 = MBCS_FROM_UCHAR32_ISO2022( | |
| 198 @@ -1844,6 +1881,7 @@ getTrail: | |
| 199 sourceChar, &value, | |
| 200 useFallback, MBCS_OUTPUT_2); | |
| 201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | |
| 202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 203 if(cs0 == KSC5601) { | |
| 204 /* | |
| 205 * Check for valid bytes for the encoding scheme. | |
| 206 @@ -1855,6 +1893,7 @@ getTrail: | |
| 207 break; | |
| 208 } | |
| 209 } | |
| 210 +#endif | |
| 211 targetValue = value; | |
| 212 len = len2; | |
| 213 cs = cs0; | |
| 214 @@ -2148,6 +2187,7 @@ escape: | |
| 215 targetUniChar = mySourceChar; | |
| 216 } | |
| 217 break; | |
| 218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 219 case ISO8859_1: | |
| 220 if(mySourceChar <= 0x7f) { | |
| 221 targetUniChar = mySourceChar + 0x80; | |
| 222 @@ -2166,6 +2206,7 @@ escape: | |
| 223 /* return from a single-shift state to the previous one */ | |
| 224 pToU2022State->g=pToU2022State->prevG; | |
| 225 break; | |
| 226 +#endif | |
| 227 case JISX201: | |
| 228 if(mySourceChar <= 0x7f) { | |
| 229 targetUniChar = jisx201ToU(mySourceChar); | |
| 230 @@ -2205,9 +2246,11 @@ getTrailByte: | |
| 231 } else { | |
| 232 /* Copy before we modify tmpSourceChar so toUni
codeCallback() sees the correct bytes. */ | |
| 233 mySourceChar = tmpSourceChar; | |
| 234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 235 if (cs == KSC5601) { | |
| 236 tmpSourceChar += 0x8080; /* = _2022ToGR94D
BCS(tmpSourceChar) */ | |
| 237 } | |
| 238 +#endif | |
| 239 tempBuf[0] = (char)(tmpSourceChar >> 8); | |
| 240 tempBuf[1] = (char)(tmpSourceChar); | |
| 241 } | |
| 242 @@ -2269,6 +2312,7 @@ endloop: | |
| 243 } | |
| 244 | |
| 245 | |
| 246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 247 /*************************************************************** | |
| 248 * Rules for ISO-2022-KR encoding | |
| 249 * i) The KSC5601 designator sequence should appear only once in a file, | |
| 250 @@ -3412,6 +3456,7 @@ endloop: | |
| 251 args->target = myTarget; | |
| 252 args->source = mySource; | |
| 253 } | |
| 254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
| 255 | |
| 256 static void | |
| 257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { | |
| 258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 259 /* include JIS X 0201 which is hardcoded */ | |
| 260 sa->add(sa->set, 0xa5); | |
| 261 sa->add(sa->set, 0x203e); | |
| 262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | |
| 264 /* include Latin-1 for some variants of JP */ | |
| 265 sa->addRange(sa->set, 0, 0xff); | |
| 266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 267 /* include ASCII for JP */ | |
| 268 sa->addRange(sa->set, 0, 0x7f); | |
| 269 } | |
| 270 +#else | |
| 271 + /* include ASCII for JP */ | |
| 272 + sa->addRange(sa->set, 0, 0x7f); | |
| 273 +#endif | |
| 274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_
AND_FALLBACK_SET) { | |
| 275 /* | |
| 276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!
=0 | |
| 277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | |
| 279 } | |
| 280 break; | |
| 281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 282 case 'c': | |
| 283 case 'z': | |
| 284 /* include ASCII for CN */ | |
| 285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 286 cnvData->currentConverter, sa, which, pErrorCode); | |
| 287 /* the loop over myConverterArray[] will simply not find another conver
ter */ | |
| 288 break; | |
| 289 +#endif | |
| 290 default: | |
| 291 break; | |
| 292 } | |
| 293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | |
| 295 UConverterSetFilter filter; | |
| 296 if(cnvData->myConverterArray[i]!=NULL) { | |
| 297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | |
| 298 - cnvData->version==0 && i==CNS_11643 | |
| 299 - ) { | |
| 300 + if(cnvData->locale[0]=='j' && i==JISX208) { | |
| 301 + /* | |
| 302 + * Only add code points that map to Shift-JIS codes | |
| 303 + * corresponding to JIS X 0208. | |
| 304 + */ | |
| 305 + filter=UCNV_SET_FILTER_SJIS; | |
| 306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | |
| 308 + cnvData->version==0 && i==CNS_11643) { | |
| 309 /* | |
| 310 * Version-specific for CN: | |
| 311 * CN version 0 does not map CNS planes 3..7 although | |
| 312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
| 313 * The two versions create different Unicode sets. | |
| 314 */ | |
| 315 filter=UCNV_SET_FILTER_2022_CN; | |
| 316 - } else if(cnvData->locale[0]=='j' && i==JISX208) { | |
| 317 - /* | |
| 318 - * Only add code points that map to Shift-JIS codes | |
| 319 - * corresponding to JIS X 0208. | |
| 320 - */ | |
| 321 - filter=UCNV_SET_FILTER_SJIS; | |
| 322 } else if(i==KSC5601) { | |
| 323 /* | |
| 324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) | |
| 325 * are broader than GR94. | |
| 326 */ | |
| 327 filter=UCNV_SET_FILTER_GR94DBCS; | |
| 328 +#endif | |
| 329 } else { | |
| 330 filter=UCNV_SET_FILTER_NONE; | |
| 331 } | |
| 332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={ | |
| 333 | |
| 334 } // namespace | |
| 335 | |
| 336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 337 /************* KR ***************/ | |
| 338 static const UConverterImpl _ISO2022KRImpl={ | |
| 339 UCNV_ISO_2022, | |
| 340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={ | |
| 341 }; | |
| 342 | |
| 343 } // namespace | |
| 344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | |
| 345 | |
| 346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
| 347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp | |
| 348 index 4940310..047f18a 100644 | |
| 349 --- a/source/common/ucnv_bld.cpp | |
| 350 +++ b/source/common/ucnv_bld.cpp | |
| 351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ | |
| 352 | |
| 353 #if UCONFIG_NO_LEGACY_CONVERSION | |
| 354 NULL, | |
| 355 +#else | |
| 356 + &_ISO2022Data, | |
| 357 +#endif | |
| 358 + | |
| 359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | |
| 360 NULL, NULL, NULL, NULL, NULL, NULL, | |
| 361 NULL, NULL, NULL, NULL, NULL, NULL, | |
| 362 NULL, | |
| 363 #else | |
| 364 - &_ISO2022Data, | |
| 365 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC
SData6, | |
| 366 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC
SData19, | |
| 367 &_HZData, | |
| 368 #endif | |
| 369 | |
| 370 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 371 + NULL, | |
| 372 +#else | |
| 373 &_SCSUData, | |
| 374 +#endif | |
| 375 | |
| 376 -#if UCONFIG_NO_LEGACY_CONVERSION | |
| 377 + | |
| 378 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | |
| 379 NULL, | |
| 380 #else | |
| 381 &_ISCIIData, | |
| 382 #endif | |
| 383 | |
| 384 &_ASCIIData, | |
| 385 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, | |
| 387 +#else | |
| 388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, | |
| 389 +#endif | |
| 390 | |
| 391 -#if UCONFIG_NO_LEGACY_CONVERSION | |
| 392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | |
| 393 NULL, | |
| 394 #else | |
| 395 &_CompoundTextData | |
| 396 @@ -105,18 +118,24 @@ static struct { | |
| 397 const char *name; | |
| 398 const UConverterType type; | |
| 399 } const cnvNameType[] = { | |
| 400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 401 { "bocu1", UCNV_BOCU1 }, | |
| 402 { "cesu8", UCNV_CESU8 }, | |
| 403 -#if !UCONFIG_NO_LEGACY_CONVERSION | |
| 404 +#endif | |
| 405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 406 { "hz",UCNV_HZ }, | |
| 407 #endif | |
| 408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 409 { "imapmailboxname", UCNV_IMAP_MAILBOX }, | |
| 410 -#if !UCONFIG_NO_LEGACY_CONVERSION | |
| 411 +#endif | |
| 412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 413 { "iscii", UCNV_ISCII }, | |
| 414 +#endif | |
| 415 +#if !UCONFIG_NO_LEGACY_CONVERSION | |
| 416 { "iso2022", UCNV_ISO_2022 }, | |
| 417 #endif | |
| 418 { "iso88591", UCNV_LATIN_1 }, | |
| 419 -#if !UCONFIG_NO_LEGACY_CONVERSION | |
| 420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 421 { "lmbcs1", UCNV_LMBCS_1 }, | |
| 422 { "lmbcs11",UCNV_LMBCS_11 }, | |
| 423 { "lmbcs16",UCNV_LMBCS_16 }, | |
| 424 @@ -130,7 +149,9 @@ static struct { | |
| 425 { "lmbcs6", UCNV_LMBCS_6 }, | |
| 426 { "lmbcs8", UCNV_LMBCS_8 }, | |
| 427 #endif | |
| 428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 429 { "scsu", UCNV_SCSU }, | |
| 430 +#endif | |
| 431 { "usascii", UCNV_US_ASCII }, | |
| 432 { "utf16", UCNV_UTF16 }, | |
| 433 { "utf16be", UCNV_UTF16_BigEndian }, | |
| 434 @@ -152,9 +173,13 @@ static struct { | |
| 435 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, | |
| 436 { "utf32platformendian", UCNV_UTF32_LittleEndian }, | |
| 437 #endif | |
| 438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 439 { "utf7", UCNV_UTF7 }, | |
| 440 +#endif | |
| 441 { "utf8", UCNV_UTF8 }, | |
| 442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 443 { "x11compoundtext", UCNV_COMPOUND_TEXT} | |
| 444 +#endif | |
| 445 }; | |
| 446 | |
| 447 | |
| 448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h | |
| 449 index 402e2c9..5fad446 100644 | |
| 450 --- a/source/common/ucnv_cnv.h | |
| 451 +++ b/source/common/ucnv_cnv.h | |
| 452 @@ -256,11 +256,15 @@ struct UConverterImpl { | |
| 453 extern const UConverterSharedData | |
| 454 _MBCSData, _Latin1Data, | |
| 455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, | |
| 456 - _ISO2022Data, | |
| 457 + _ISO2022Data, | |
| 458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, | |
| 460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, | |
| 461 _HZData,_ISCIIData, _SCSUData, _ASCIIData, | |
| 462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; | |
| 463 +#else | |
| 464 + _ASCIIData, _UTF16Data, _UTF32Data; | |
| 465 +#endif | |
| 466 | |
| 467 U_CDECL_END | |
| 468 | |
| 469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c | |
| 470 index ec0e9c2..e723fa6 100644 | |
| 471 --- a/source/common/ucnv_ct.c | |
| 472 +++ b/source/common/ucnv_ct.c | |
| 473 @@ -14,7 +14,7 @@ | |
| 474 | |
| 475 #include "unicode/utypes.h" | |
| 476 | |
| 477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
| 478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
| 479 | |
| 480 #include "unicode/ucnv.h" | |
| 481 #include "unicode/uset.h" | |
| 482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c | |
| 483 index 1d921dd..a4fccee 100644 | |
| 484 --- a/source/common/ucnv_lmb.c | |
| 485 +++ b/source/common/ucnv_lmb.c | |
| 486 @@ -25,7 +25,7 @@ | |
| 487 | |
| 488 #include "unicode/utypes.h" | |
| 489 | |
| 490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
| 491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
| 492 | |
| 493 #include "unicode/ucnv_err.h" | |
| 494 #include "unicode/ucnv.h" | |
| 495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c | |
| 496 index 42943f4..6466b87 100644 | |
| 497 --- a/source/common/ucnv_u7.c | |
| 498 +++ b/source/common/ucnv_u7.c | |
| 499 @@ -16,7 +16,7 @@ | |
| 500 | |
| 501 #include "unicode/utypes.h" | |
| 502 | |
| 503 -#if !UCONFIG_NO_CONVERSION | |
| 504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 505 | |
| 506 #include "unicode/ucnv.h" | |
| 507 #include "ucnv_bld.h" | |
| 508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c | |
| 509 index 8ee9fe5..24205f5 100644 | |
| 510 --- a/source/common/ucnv_u8.c | |
| 511 +++ b/source/common/ucnv_u8.c | |
| 512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { | |
| 513 static const uint32_t | |
| 514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | |
| 515 | |
| 516 +static UBool hasCESU8Data(const UConverter *cnv) | |
| 517 +{ | |
| 518 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
| 519 + return FALSE; | |
| 520 +#else | |
| 521 + return (UBool)(cnv->sharedData == &_CESU8Data); | |
| 522 +#endif | |
| 523 +} | |
| 524 + | |
| 525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, | |
| 526 UErrorCode * err) | |
| 527 { | |
| 528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs *
args, | |
| 529 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | |
| 530 const UChar *targetLimit = args->targetLimit; | |
| 531 unsigned char *toUBytes = cnv->toUBytes; | |
| 532 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | |
| 533 + UBool isCESU8 = hasCESU8Data(cnv); | |
| 534 uint32_t ch, ch2 = 0; | |
| 535 int32_t i, inBytes; | |
| 536 - | |
| 537 + | |
| 538 /* Restore size of current sequence */ | |
| 539 if (cnv->toUnicodeStatus && myTarget < targetLimit) | |
| 540 { | |
| 541 @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToU
nicodeArgs * args, | |
| 542 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | |
| 543 const UChar *targetLimit = args->targetLimit; | |
| 544 unsigned char *toUBytes = cnv->toUBytes; | |
| 545 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | |
| 546 + UBool isCESU8 = hasCESU8Data(cnv); | |
| 547 uint32_t ch, ch2 = 0; | |
| 548 int32_t i, inBytes; | |
| 549 | |
| 550 @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArg
s * args, | |
| 551 UChar32 ch; | |
| 552 uint8_t tempBuf[4]; | |
| 553 int32_t indexToWrite; | |
| 554 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | |
| 555 + UBool isNotCESU8 = !hasCESU8Data(cnv); | |
| 556 | |
| 557 if (cnv->fromUChar32 && myTarget < targetLimit) | |
| 558 { | |
| 559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter
FromUnicodeArgs * ar | |
| 560 int32_t offsetNum, nextSourceIndex; | |
| 561 int32_t indexToWrite; | |
| 562 uint8_t tempBuf[4]; | |
| 563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | |
| 564 + UBool isNotCESU8 = !hasCESU8Data(cnv); | |
| 565 | |
| 566 if (cnv->fromUChar32 && myTarget < targetLimit) | |
| 567 { | |
| 568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp | |
| 569 index b97d666..281d6d9 100644 | |
| 570 --- a/source/common/ucnvbocu.cpp | |
| 571 +++ b/source/common/ucnvbocu.cpp | |
| 572 @@ -19,7 +19,7 @@ | |
| 573 | |
| 574 #include "unicode/utypes.h" | |
| 575 | |
| 576 -#if !UCONFIG_NO_CONVERSION | |
| 577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 578 | |
| 579 #include "unicode/ucnv.h" | |
| 580 #include "unicode/ucnv_cb.h" | |
| 581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c | |
| 582 index 3760c39..51825e2 100644 | |
| 583 --- a/source/common/ucnvhz.c | |
| 584 +++ b/source/common/ucnvhz.c | |
| 585 @@ -16,7 +16,7 @@ | |
| 586 | |
| 587 #include "unicode/utypes.h" | |
| 588 | |
| 589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
| 590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
| 591 | |
| 592 #include "cmemory.h" | |
| 593 #include "unicode/ucnv.h" | |
| 594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={ | |
| 595 0 | |
| 596 }; | |
| 597 | |
| 598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
| 599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_NO_NON_HTML5_CONVERSION */ | |
| 600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c | |
| 601 index fe61d40..16fd0a3 100644 | |
| 602 --- a/source/common/ucnvisci.c | |
| 603 +++ b/source/common/ucnvisci.c | |
| 604 @@ -17,7 +17,7 @@ | |
| 605 | |
| 606 #include "unicode/utypes.h" | |
| 607 | |
| 608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
| 609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
| 610 | |
| 611 #include "unicode/ucnv.h" | |
| 612 #include "unicode/ucnv_cb.h" | |
| 613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c | |
| 614 index c6e96e1..a6f8c9e 100644 | |
| 615 --- a/source/common/ucnvscsu.c | |
| 616 +++ b/source/common/ucnvscsu.c | |
| 617 @@ -21,7 +21,7 @@ | |
| 618 | |
| 619 #include "unicode/utypes.h" | |
| 620 | |
| 621 -#if !UCONFIG_NO_CONVERSION | |
| 622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 623 | |
| 624 #include "unicode/ucnv.h" | |
| 625 #include "unicode/ucnv_cb.h" | |
| 626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h | |
| 627 index ed073b6..8df56e6 100644 | |
| 628 --- a/source/common/unicode/uconfig.h | |
| 629 +++ b/source/common/unicode/uconfig.h | |
| 630 @@ -270,6 +270,14 @@ | |
| 631 #endif | |
| 632 | |
| 633 /** | |
| 634 + * This switch turns off all the converters NOT listed in | |
| 635 + * the encoding standard : http://encoding.spec.whatwg.org | |
| 636 + */ | |
| 637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION | |
| 638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 | |
| 639 +#endif | |
| 640 + | |
| 641 +/** | |
| 642 * \def UCONFIG_NO_LEGACY_CONVERSION | |
| 643 * This switch turns off all converters except for | |
| 644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) | |
| 645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h | |
| 646 index a817262..89becca 100644 | |
| 647 --- a/source/common/unicode/urename.h | |
| 648 +++ b/source/common/unicode/urename.h | |
| 649 @@ -73,12 +73,14 @@ | |
| 650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) | |
| 651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) | |
| 652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) | |
| 653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) | |
| 655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) | |
| 656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) | |
| 657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) | |
| 658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) | |
| 659 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) | |
| 660 +#endif | |
| 661 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) | |
| 662 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) | |
| 663 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) | |
| 664 @@ -94,14 +96,18 @@ | |
| 665 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) | |
| 666 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) | |
| 667 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) | |
| 668 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 669 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) | |
| 670 +#endif | |
| 671 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) | |
| 672 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) | |
| 673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) | |
| 674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) | |
| 675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) | |
| 676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) | |
| 677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) | |
| 679 +#endif | |
| 680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) | |
| 681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) | |
| 682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) | |
| 683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp | |
| 684 index 3efbd49..ba5b18c 100644 | |
| 685 --- a/source/i18n/csdetect.cpp | |
| 686 +++ b/source/i18n/csdetect.cpp | |
| 687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { | |
| 688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), | |
| 689 | |
| 690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), | |
| 691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), | |
| 693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), | |
| 694 | |
| 695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { | |
| 696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), | |
| 697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), | |
| 698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) | |
| 699 +#endif | |
| 700 }; | |
| 701 int32_t rCount = ARRAY_SIZE(tempArray); | |
| 702 | |
| 703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp | |
| 704 index 3db0bc9..be3eafa 100644 | |
| 705 --- a/source/i18n/csr2022.cpp | |
| 706 +++ b/source/i18n/csr2022.cpp | |
| 707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { | |
| 708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | |
| 709 }; | |
| 710 | |
| 711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 712 static const uint8_t escapeSequences_2022KR[][5] = { | |
| 713 {0x1b, 0x24, 0x29, 0x43, 0x00} | |
| 714 }; | |
| 715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { | |
| 716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | |
| 717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | |
| 718 }; | |
| 719 +#endif | |
| 720 | |
| 721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | |
| 722 | |
| 723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM
atch *results) const | |
| 724 return (confidence > 0); | |
| 725 } | |
| 726 | |
| 727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | |
| 729 | |
| 730 const char *CharsetRecog_2022KR::getName() const { | |
| 731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM
atch *results) const | |
| 732 results->set(textIn, this, confidence); | |
| 733 return (confidence > 0); | |
| 734 } | |
| 735 +#endif | |
| 736 | |
| 737 CharsetRecog_2022::~CharsetRecog_2022() { | |
| 738 // nothing to do | |
| 739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h | |
| 740 index 2ac2b87..dad22c7 100644 | |
| 741 --- a/source/i18n/csr2022.h | |
| 742 +++ b/source/i18n/csr2022.h | |
| 743 @@ -65,6 +65,7 @@ public: | |
| 744 UBool match(InputText *textIn, CharsetMatch *results) const; | |
| 745 }; | |
| 746 | |
| 747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 748 class CharsetRecog_2022KR :public CharsetRecog_2022 { | |
| 749 public: | |
| 750 virtual ~CharsetRecog_2022KR(); | |
| 751 @@ -84,6 +85,7 @@ public: | |
| 752 | |
| 753 UBool match(InputText *textIn, CharsetMatch *results) const; | |
| 754 }; | |
| 755 +#endif | |
| 756 | |
| 757 U_NAMESPACE_END | |
| 758 | |
| 759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp | |
| 760 index d03367c..7b70dc1 100644 | |
| 761 --- a/source/i18n/csrsbcs.cpp | |
| 762 +++ b/source/i18n/csrsbcs.cpp | |
| 763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) | |
| 764 return (int32_t) (rawPercent * 300.0); | |
| 765 } | |
| 766 | |
| 767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 768 static const uint8_t unshapeMap_IBM420[] = { | |
| 769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ | |
| 770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, | |
| 771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) | |
| 772 } | |
| 773 } | |
| 774 } | |
| 775 +#endif | |
| 776 | |
| 777 CharsetRecog_sbcs::CharsetRecog_sbcs() | |
| 778 { | |
| 779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { | |
| 780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | |
| 781 }; | |
| 782 | |
| 783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 784 static const int32_t ngrams_IBM424_he_rtl[] = { | |
| 785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, | |
| 786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, | |
| 787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { | |
| 788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, | |
| 789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, | |
| 790 }; | |
| 791 +#endif | |
| 792 | |
| 793 //ISO-8859-1,2,5,6,7,8,9 Ngrams | |
| 794 | |
| 795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse
tMatch *results) const | |
| 796 return (confidence > 0); | |
| 797 } | |
| 798 | |
| 799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() | |
| 801 { | |
| 802 // nothing to do | |
| 803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn,
CharsetMatch *results | |
| 804 results->set(textIn, this, confidence); | |
| 805 return (confidence > 0); | |
| 806 } | |
| 807 +#endif | |
| 808 | |
| 809 U_NAMESPACE_END | |
| 810 #endif | |
| 811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h | |
| 812 index 2579c02..7789f9b 100644 | |
| 813 --- a/source/i18n/csrsbcs.h | |
| 814 +++ b/source/i18n/csrsbcs.h | |
| 815 @@ -50,6 +50,7 @@ public: | |
| 816 | |
| 817 }; | |
| 818 | |
| 819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 820 class NGramParser_IBM420 : public NGramParser | |
| 821 { | |
| 822 private: | |
| 823 @@ -61,6 +62,7 @@ private: | |
| 824 public: | |
| 825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); | |
| 826 }; | |
| 827 +#endif | |
| 828 | |
| 829 | |
| 830 class CharsetRecog_sbcs : public CharsetRecognizer | |
| 831 @@ -229,6 +231,7 @@ public: | |
| 832 virtual UBool match(InputText *det, CharsetMatch *results) const; | |
| 833 }; | |
| 834 | |
| 835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
| 836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs | |
| 837 { | |
| 838 public: | |
| 839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42
0_ar { | |
| 840 | |
| 841 virtual UBool match(InputText *det, CharsetMatch *results) const; | |
| 842 }; | |
| 843 +#endif | |
| 844 | |
| 845 U_NAMESPACE_END | |
| 846 | |
| OLD | NEW |