OLD | NEW |
1 Index: source/common/ucnv2022.cpp | 1 Index: source/common/ucnv2022.cpp |
2 =================================================================== | 2 =================================================================== |
3 --- source/common/ucnv2022.cpp (revision 259715) | 3 --- source/common/ucnv2022.cpp (revision 259715) |
4 +++ source/common/ucnv2022.cpp (working copy) | 4 +++ source/common/ucnv2022.cpp (working copy) |
5 @@ -167,13 +167,19 @@ | 5 @@ -154,7 +154,11 @@ |
| 6 } StateEnum; |
| 7 |
| 8 /* is the StateEnum charset value for a DBCS charset? */ |
| 9 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| 10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) |
| 11 +#else |
| 12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
| 13 +#endif |
| 14 |
| 15 #define CSM(cs) ((uint16_t)1<<(cs)) |
| 16 |
| 17 @@ -167,13 +171,23 @@ |
6 * all versions, not just JIS7 and JIS8. | 18 * all versions, not just JIS7 and JIS8. |
7 * - ICU does not distinguish between different versions of JIS X 0208. | 19 * - ICU does not distinguish between different versions of JIS X 0208. |
8 */ | 20 */ |
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 21 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
10 +enum { MAX_JA_VERSION=0 }; | 22 +enum { MAX_JA_VERSION=0 }; |
11 +#else | 23 +#else |
12 enum { MAX_JA_VERSION=4 }; | 24 enum { MAX_JA_VERSION=4 }; |
13 +#endif | 25 +#endif |
14 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
| 27 +/* |
| 28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
| 29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
| 30 + */ |
15 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
16 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
17 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
18 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
19 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
20 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
21 +#endif | 37 +#endif |
22 }; | 38 }; |
23 | 39 |
24 typedef enum { | 40 typedef enum { |
25 @@ -361,14 +367,25 @@ | 41 @@ -360,15 +374,18 @@ |
| 42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 |
26 }; | 43 }; |
27 | 44 |
28 | 45 - |
29 +/* Enable ISO-2022-{KR,CN,CN-Ext} for now. | |
30 + * TODO(jshin): Disable it when we know what to do about 'replacement' | |
31 + * encodings. See http://crbug.com/277037 and | |
32 + * https://codereview.chromium.org/145973021/ | |
33 + */ | |
34 +#ifndef U_ENABLE_ISO_2022_KR_CN | |
35 +#define U_ENABLE_ISO_2022_KR_CN 1 | |
36 +#endif | |
37 + | |
38 /* Type def for refactoring changeState_2022 code*/ | 46 /* Type def for refactoring changeState_2022 code*/ |
39 typedef enum{ | 47 typedef enum{ |
40 #ifdef U_ENABLE_GENERIC_ISO_2022 | 48 #ifdef U_ENABLE_GENERIC_ISO_2022 |
41 ISO_2022=0, | 49 ISO_2022=0, |
42 #endif | 50 #endif |
| 51 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| 52 + ISO_2022_JP=1 |
| 53 +#else |
43 ISO_2022_JP=1, | 54 ISO_2022_JP=1, |
44 +#ifdef U_ENABLE_ISO_2022_KR_CN | |
45 ISO_2022_KR=2, | 55 ISO_2022_KR=2, |
46 ISO_2022_CN=3 | 56 ISO_2022_CN=3 |
47 +#endif | 57 +#endif |
48 } Variant2022; | 58 } Variant2022; |
49 | 59 |
50 /*********** ISO 2022 Converter Protos ***********/ | 60 /*********** ISO 2022 Converter Protos ***********/ |
51 @@ -485,24 +502,28 @@ | 61 @@ -485,12 +502,15 @@ |
52 /* prevent indexing beyond jpCharsetMasks[] */ | 62 /* prevent indexing beyond jpCharsetMasks[] */ |
53 myConverterData->version = version = 0; | 63 myConverterData->version = version = 0; |
54 } | 64 } |
55 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
56 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
57 myConverterData->myConverterArray[ISO8859_7] = | 67 myConverterData->myConverterArray[ISO8859_7] = |
58 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); | 68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); |
59 } | 69 } |
60 +#endif | 70 +#endif |
61 myConverterData->myConverterArray[JISX208] = | 71 myConverterData->myConverterArray[JISX208] = |
62 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); | 72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); |
63 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
64 if(jpCharsetMasks[version]&CSM(JISX212)) { | 74 if(jpCharsetMasks[version]&CSM(JISX212)) { |
65 myConverterData->myConverterArray[JISX212] = | 75 myConverterData->myConverterArray[JISX212] = |
66 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); | 76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); |
67 } | 77 @@ -503,6 +523,7 @@ |
68 if(jpCharsetMasks[version]&CSM(GB2312)) { | |
69 myConverterData->myConverterArray[GB2312] = | |
70 - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, e
rrorCode); /* gb_2312_80-1 */ | |
71 + ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackA
rgs, errorCode); /* gb_2312_80-1 */ | |
72 } | |
73 if(jpCharsetMasks[version]&CSM(KSC5601)) { | |
74 myConverterData->myConverterArray[KSC5601] = | 78 myConverterData->myConverterArray[KSC5601] = |
75 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); | 79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); |
76 } | 80 } |
77 +#endif | 81 +#endif |
78 | 82 |
79 /* set the function pointers to appropriate funtions */ | 83 /* set the function pointers to appropriate funtions */ |
80 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
81 @@ -513,6 +534,7 @@ | 85 @@ -513,6 +534,7 @@ |
82 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); | 86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); |
83 myConverterData->name[len+1]='\0'; | 87 myConverterData->name[len+1]='\0'; |
84 } | 88 } |
85 +#ifdef U_ENABLE_ISO_2022_KR_CN | 89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
86 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
87 (myLocale[2]=='_' || myLocale[2]=='\0')) | 91 (myLocale[2]=='_' || myLocale[2]=='\0')) |
88 { | 92 { |
89 @@ -558,13 +580,13 @@ | |
90 | |
91 /* open the required converters and cache them */ | |
92 myConverterData->myConverterArray[GB2312_1] = | |
93 - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, error
Code); | |
94 + ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs,
errorCode); | |
95 if(version==1) { | |
96 myConverterData->myConverterArray[ISO_IR_165] = | |
97 - ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs,
errorCode); | |
98 + ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stack
Args, errorCode); | |
99 } | |
100 myConverterData->myConverterArray[CNS_11643] = | |
101 - ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs,
errorCode); | |
102 + ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs,
errorCode); | |
103 | |
104 | |
105 /* set the function pointers to appropriate funtions */ | |
106 @@ -582,6 +604,7 @@ | 93 @@ -582,6 +604,7 @@ |
107 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); | 94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); |
108 } | 95 } |
109 } | 96 } |
110 +#endif // U_ENABLE_ISO_2022_KR_CN | 97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
111 else{ | 98 else{ |
112 #ifdef U_ENABLE_GENERIC_ISO_2022 | 99 #ifdef U_ENABLE_GENERIC_ISO_2022 |
113 myConverterData->isFirstBuffer = TRUE; | 100 myConverterData->isFirstBuffer = TRUE; |
| 101 @@ -716,6 +739,7 @@ |
| 102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
| 103 }; |
| 104 |
| 105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 106 /*************** to unicode *******************/ |
| 107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
| 108 /* 0 1 2 3 4
5 6 7 8 9 *
/ |
| 109 @@ -728,6 +752,7 @@ |
| 110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE |
| 111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
| 112 }; |
| 113 +#endif |
| 114 |
| 115 |
| 116 static UCNV_TableStates_2022 |
| 117 @@ -880,6 +905,7 @@ |
| 118 } |
| 119 break; |
| 120 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
| 121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 122 case ISO8859_1: |
| 123 case ISO8859_7: |
| 124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { |
| 125 @@ -889,6 +915,7 @@ |
| 126 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
| 127 } |
| 128 break; |
| 129 +#endif |
| 130 default: |
| 131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { |
| 132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| 133 @@ -900,6 +927,7 @@ |
| 134 } |
| 135 } |
| 136 break; |
| 137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 138 case ISO_2022_CN: |
| 139 { |
| 140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
| 141 @@ -961,6 +989,7 @@ |
| 142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| 143 } |
| 144 break; |
| 145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| 146 |
| 147 default: |
| 148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| 149 @@ -1381,12 +1410,16 @@ |
| 150 static const StateEnum jpCharsetPref[]={ |
| 151 ASCII, |
| 152 JISX201, |
| 153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 154 ISO8859_1, |
| 155 ISO8859_7, |
| 156 +#endif |
| 157 JISX208, |
| 158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 159 JISX212, |
| 160 GB2312, |
| 161 KSC5601, |
| 162 +#endif |
| 163 HWKANA_7BIT |
| 164 }; |
| 165 |
| 166 @@ -1756,6 +1789,7 @@ |
| 167 g = 0; |
| 168 } |
| 169 break; |
| 170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 171 case ISO8859_1: |
| 172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
| 173 targetValue = (uint32_t)sourceChar - 0x80; |
| 174 @@ -1764,6 +1798,7 @@ |
| 175 g = 2; |
| 176 } |
| 177 break; |
| 178 +#endif |
| 179 case HWKANA_7BIT: |
| 180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H
WKANA_START)) { |
| 181 if(converterData->version==3) { |
| 182 @@ -1825,6 +1860,7 @@ |
| 183 useFallback = FALSE; |
| 184 } |
| 185 break; |
| 186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 187 case ISO8859_7: |
| 188 /* G0 SBCS forced to 7-bit output */ |
| 189 len2 = MBCS_SINGLE_FROM_UCHAR32( |
| 190 @@ -1839,6 +1875,7 @@ |
| 191 useFallback = FALSE; |
| 192 } |
| 193 break; |
| 194 +#endif |
| 195 default: |
| 196 /* G0 DBCS */ |
| 197 len2 = MBCS_FROM_UCHAR32_ISO2022( |
| 198 @@ -1846,6 +1883,7 @@ |
| 199 sourceChar, &value, |
| 200 useFallback, MBCS_OUTPUT_2); |
| 201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
| 202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 203 if(cs0 == KSC5601) { |
| 204 /* |
| 205 * Check for valid bytes for the encoding scheme. |
| 206 @@ -1857,6 +1895,7 @@ |
| 207 break; |
| 208 } |
| 209 } |
| 210 +#endif |
| 211 targetValue = value; |
| 212 len = len2; |
| 213 cs = cs0; |
| 214 @@ -2150,6 +2189,7 @@ |
| 215 targetUniChar = mySourceChar; |
| 216 } |
| 217 break; |
| 218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 219 case ISO8859_1: |
| 220 if(mySourceChar <= 0x7f) { |
| 221 targetUniChar = mySourceChar + 0x80; |
| 222 @@ -2168,6 +2208,7 @@ |
| 223 /* return from a single-shift state to the previous one */ |
| 224 pToU2022State->g=pToU2022State->prevG; |
| 225 break; |
| 226 +#endif |
| 227 case JISX201: |
| 228 if(mySourceChar <= 0x7f) { |
| 229 targetUniChar = jisx201ToU(mySourceChar); |
| 230 @@ -2207,9 +2248,11 @@ |
| 231 } else { |
| 232 /* Copy before we modify tmpSourceChar so toUni
codeCallback() sees the correct bytes. */ |
| 233 mySourceChar = tmpSourceChar; |
| 234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 235 if (cs == KSC5601) { |
| 236 tmpSourceChar += 0x8080; /* = _2022ToGR94D
BCS(tmpSourceChar) */ |
| 237 } |
| 238 +#endif |
| 239 tempBuf[0] = (char)(tmpSourceChar >> 8); |
| 240 tempBuf[1] = (char)(tmpSourceChar); |
| 241 } |
| 242 @@ -2271,6 +2314,7 @@ |
| 243 } |
| 244 |
| 245 |
| 246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 247 /*************************************************************** |
| 248 * Rules for ISO-2022-KR encoding |
| 249 * i) The KSC5601 designator sequence should appear only once in a file, |
| 250 @@ -3414,6 +3458,7 @@ |
| 251 args->target = myTarget; |
| 252 args->source = mySource; |
| 253 } |
| 254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| 255 |
| 256 static void |
| 257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { |
| 258 @@ -3615,6 +3660,7 @@ |
| 259 /* include JIS X 0201 which is hardcoded */ |
| 260 sa->add(sa->set, 0xa5); |
| 261 sa->add(sa->set, 0x203e); |
| 262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
| 264 /* include Latin-1 for some variants of JP */ |
| 265 sa->addRange(sa->set, 0, 0xff); |
| 266 @@ -3622,6 +3668,10 @@ |
| 267 /* include ASCII for JP */ |
| 268 sa->addRange(sa->set, 0, 0x7f); |
| 269 } |
| 270 +#else |
| 271 + /* include ASCII for JP */ |
| 272 + sa->addRange(sa->set, 0, 0x7f); |
| 273 +#endif |
| 274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_
AND_FALLBACK_SET) { |
| 275 /* |
| 276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!
=0 |
| 277 @@ -3640,6 +3690,7 @@ |
| 278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
| 279 } |
| 280 break; |
| 281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 282 case 'c': |
| 283 case 'z': |
| 284 /* include ASCII for CN */ |
| 285 @@ -3651,6 +3702,7 @@ |
| 286 cnvData->currentConverter, sa, which, pErrorCode); |
| 287 /* the loop over myConverterArray[] will simply not find another conver
ter */ |
| 288 break; |
| 289 +#endif |
| 290 default: |
| 291 break; |
| 292 } |
| 293 @@ -3671,10 +3723,16 @@ |
| 294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
| 295 UConverterSetFilter filter; |
| 296 if(cnvData->myConverterArray[i]!=NULL) { |
| 297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| 298 - cnvData->version==0 && i==CNS_11643 |
| 299 - ) { |
| 300 + if(cnvData->locale[0]=='j' && i==JISX208) { |
| 301 /* |
| 302 + * Only add code points that map to Shift-JIS codes |
| 303 + * corresponding to JIS X 0208. |
| 304 + */ |
| 305 + filter=UCNV_SET_FILTER_SJIS; |
| 306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| 308 + cnvData->version==0 && i==CNS_11643) { |
| 309 + /* |
| 310 * Version-specific for CN: |
| 311 * CN version 0 does not map CNS planes 3..7 although |
| 312 * they are all available in the CNS conversion table; |
| 313 @@ -3682,18 +3740,13 @@ |
| 314 * The two versions create different Unicode sets. |
| 315 */ |
| 316 filter=UCNV_SET_FILTER_2022_CN; |
| 317 - } else if(cnvData->locale[0]=='j' && i==JISX208) { |
| 318 - /* |
| 319 - * Only add code points that map to Shift-JIS codes |
| 320 - * corresponding to JIS X 0208. |
| 321 - */ |
| 322 - filter=UCNV_SET_FILTER_SJIS; |
| 323 } else if(i==KSC5601) { |
| 324 /* |
| 325 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) |
| 326 * are broader than GR94. |
| 327 */ |
| 328 filter=UCNV_SET_FILTER_GR94DBCS; |
| 329 +#endif |
| 330 } else { |
| 331 filter=UCNV_SET_FILTER_NONE; |
| 332 } |
| 333 @@ -3831,6 +3884,7 @@ |
| 334 |
| 335 } // namespace |
| 336 |
| 337 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 338 /************* KR ***************/ |
| 339 static const UConverterImpl _ISO2022KRImpl={ |
| 340 UCNV_ISO_2022, |
| 341 @@ -3947,5 +4001,6 @@ |
| 342 }; |
| 343 |
| 344 } // namespace |
| 345 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| 346 |
| 347 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
114 Index: source/common/ucnvbocu.cpp | 348 Index: source/common/ucnvbocu.cpp |
115 =================================================================== | 349 =================================================================== |
116 --- source/common/ucnvbocu.cpp (revision 259715) | 350 --- source/common/ucnvbocu.cpp (revision 259715) |
117 +++ source/common/ucnvbocu.cpp (working copy) | 351 +++ source/common/ucnvbocu.cpp (working copy) |
118 @@ -19,7 +19,7 @@ | 352 @@ -19,7 +19,7 @@ |
119 | 353 |
120 #include "unicode/utypes.h" | 354 #include "unicode/utypes.h" |
121 | 355 |
122 -#if !UCONFIG_NO_CONVERSION | 356 -#if !UCONFIG_NO_CONVERSION |
123 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 357 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
179 +#endif | 413 +#endif |
180 + | 414 + |
181 +/** | 415 +/** |
182 * \def UCONFIG_NO_LEGACY_CONVERSION | 416 * \def UCONFIG_NO_LEGACY_CONVERSION |
183 * This switch turns off all converters except for | 417 * This switch turns off all converters except for |
184 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) | 418 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) |
185 Index: source/common/ucnv_bld.cpp | 419 Index: source/common/ucnv_bld.cpp |
186 =================================================================== | 420 =================================================================== |
187 --- source/common/ucnv_bld.cpp (revision 259715) | 421 --- source/common/ucnv_bld.cpp (revision 259715) |
188 +++ source/common/ucnv_bld.cpp (working copy) | 422 +++ source/common/ucnv_bld.cpp (working copy) |
189 @@ -79,16 +79,25 @@ | 423 @@ -69,28 +69,41 @@ |
| 424 |
| 425 #if UCONFIG_NO_LEGACY_CONVERSION |
| 426 NULL, |
| 427 +#else |
| 428 + &_ISO2022Data, |
| 429 +#endif |
| 430 + |
| 431 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
| 432 NULL, NULL, NULL, NULL, NULL, NULL, |
| 433 NULL, NULL, NULL, NULL, NULL, NULL, |
| 434 NULL, |
| 435 #else |
| 436 - &_ISO2022Data, |
| 437 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC
SData6, |
| 438 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC
SData19, |
190 &_HZData, | 439 &_HZData, |
191 #endif | 440 #endif |
192 | 441 |
193 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 442 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
194 + NULL, | 443 + NULL, |
195 +#else | 444 +#else |
196 &_SCSUData, | 445 &_SCSUData, |
197 +#endif | 446 +#endif |
198 | 447 |
199 -#if UCONFIG_NO_LEGACY_CONVERSION | 448 -#if UCONFIG_NO_LEGACY_CONVERSION |
200 + | 449 + |
201 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 450 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
202 NULL, | 451 NULL, |
203 #else | 452 #else |
204 &_ISCIIData, | 453 &_ISCIIData, |
205 #endif | 454 #endif |
206 | 455 |
207 &_ASCIIData, | 456 &_ASCIIData, |
208 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 457 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
209 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, | 458 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, |
210 +#else | 459 +#else |
211 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, | 460 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
212 +#endif | 461 +#endif |
213 | 462 |
214 #if UCONFIG_NO_LEGACY_CONVERSION | 463 -#if UCONFIG_NO_LEGACY_CONVERSION |
| 464 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
215 NULL, | 465 NULL, |
| 466 #else |
| 467 &_CompoundTextData |
| 468 @@ -105,18 +118,24 @@ |
| 469 const char *name; |
| 470 const UConverterType type; |
| 471 } const cnvNameType[] = { |
| 472 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 473 { "bocu1", UCNV_BOCU1 }, |
| 474 { "cesu8", UCNV_CESU8 }, |
| 475 -#if !UCONFIG_NO_LEGACY_CONVERSION |
| 476 +#endif |
| 477 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 478 { "hz",UCNV_HZ }, |
| 479 #endif |
| 480 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 481 { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
| 482 +#endif |
| 483 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 484 + { "iscii", UCNV_ISCII }, |
| 485 +#endif |
| 486 #if !UCONFIG_NO_LEGACY_CONVERSION |
| 487 - { "iscii", UCNV_ISCII }, |
| 488 { "iso2022", UCNV_ISO_2022 }, |
| 489 #endif |
| 490 { "iso88591", UCNV_LATIN_1 }, |
| 491 -#if !UCONFIG_NO_LEGACY_CONVERSION |
| 492 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 493 { "lmbcs1", UCNV_LMBCS_1 }, |
| 494 { "lmbcs11",UCNV_LMBCS_11 }, |
| 495 { "lmbcs16",UCNV_LMBCS_16 }, |
| 496 @@ -130,7 +149,9 @@ |
| 497 { "lmbcs6", UCNV_LMBCS_6 }, |
| 498 { "lmbcs8", UCNV_LMBCS_8 }, |
| 499 #endif |
| 500 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 501 { "scsu", UCNV_SCSU }, |
| 502 +#endif |
| 503 { "usascii", UCNV_US_ASCII }, |
| 504 { "utf16", UCNV_UTF16 }, |
| 505 { "utf16be", UCNV_UTF16_BigEndian }, |
| 506 @@ -152,9 +173,13 @@ |
| 507 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
| 508 { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
| 509 #endif |
| 510 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 511 { "utf7", UCNV_UTF7 }, |
| 512 +#endif |
| 513 { "utf8", UCNV_UTF8 }, |
| 514 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 515 { "x11compoundtext", UCNV_COMPOUND_TEXT} |
| 516 +#endif |
| 517 }; |
| 518 |
| 519 |
216 Index: source/common/ucnv_u8.c | 520 Index: source/common/ucnv_u8.c |
217 =================================================================== | 521 =================================================================== |
218 --- source/common/ucnv_u8.c (revision 259715) | 522 --- source/common/ucnv_u8.c (revision 259715) |
219 +++ source/common/ucnv_u8.c (working copy) | 523 +++ source/common/ucnv_u8.c (working copy) |
220 @@ -87,6 +87,15 @@ | 524 @@ -87,6 +87,15 @@ |
221 static const uint32_t | 525 static const uint32_t |
222 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | 526 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
223 | 527 |
224 +static UBool hasCESU8Data(const UConverter *cnv) | 528 +static UBool hasCESU8Data(const UConverter *cnv) |
225 +{ | 529 +{ |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
270 uint8_t tempBuf[4]; | 574 uint8_t tempBuf[4]; |
271 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 575 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
272 + UBool isNotCESU8 = !hasCESU8Data(cnv); | 576 + UBool isNotCESU8 = !hasCESU8Data(cnv); |
273 | 577 |
274 if (cnv->fromUChar32 && myTarget < targetLimit) | 578 if (cnv->fromUChar32 && myTarget < targetLimit) |
275 { | 579 { |
276 Index: source/common/unicode/urename.h | 580 Index: source/common/unicode/urename.h |
277 =================================================================== | 581 =================================================================== |
278 --- source/common/unicode/urename.h (revision 259715) | 582 --- source/common/unicode/urename.h (revision 259715) |
279 +++ source/common/unicode/urename.h (working copy) | 583 +++ source/common/unicode/urename.h (working copy) |
280 @@ -73,12 +73,16 @@ | 584 @@ -73,12 +73,14 @@ |
281 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) | 585 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) |
282 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) | 586 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) |
283 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) | 587 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) |
284 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 588 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
285 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) | 589 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) |
286 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) | 590 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) |
287 +#endif | |
288 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) | 591 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) |
289 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) | 592 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) |
290 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
291 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) | 593 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) |
292 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) | 594 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) |
293 +#endif | 595 +#endif |
294 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) | 596 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) |
295 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) | 597 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) |
296 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) | 598 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) |
297 @@ -94,14 +98,18 @@ | 599 @@ -94,14 +96,18 @@ |
298 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) | 600 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) |
299 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) | 601 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) |
300 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) | 602 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) |
301 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 603 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
302 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) | 604 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) |
303 +#endif | 605 +#endif |
304 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) | 606 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) |
305 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) | 607 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) |
306 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) | 608 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) |
307 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) | 609 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) |
308 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) | 610 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) |
309 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) | 611 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) |
310 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 612 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
311 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) | 613 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) |
312 +#endif | 614 +#endif |
313 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) | 615 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) |
314 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) | 616 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) |
315 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) | 617 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) |
316 Index: source/common/ucnv_cnv.h | 618 Index: source/common/ucnv_cnv.h |
317 =================================================================== | 619 =================================================================== |
318 --- source/common/ucnv_cnv.h (revision 259715) | 620 --- source/common/ucnv_cnv.h (revision 259715) |
319 +++ source/common/ucnv_cnv.h (working copy) | 621 +++ source/common/ucnv_cnv.h (working copy) |
320 @@ -259,8 +259,13 @@ | 622 @@ -256,11 +256,15 @@ |
321 _ISO2022Data, | 623 extern const UConverterSharedData |
| 624 _MBCSData, _Latin1Data, |
| 625 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, |
| 626 - _ISO2022Data, |
| 627 + _ISO2022Data, |
| 628 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
322 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, | 629 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, |
323 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, | 630 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, |
324 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
325 _HZData,_ISCIIData, _SCSUData, _ASCIIData, | 631 _HZData,_ISCIIData, _SCSUData, _ASCIIData, |
326 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; | 632 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; |
327 +#else | 633 +#else |
328 + _HZData, _ASCIIData, | 634 + _ASCIIData, _UTF16Data, _UTF32Data; |
329 + _UTF16Data, _UTF32Data, _CompoundTextData; | |
330 +#endif | 635 +#endif |
331 | 636 |
332 U_CDECL_END | 637 U_CDECL_END |
333 | 638 |
| 639 Index: source/common/ucnv_lmb.c |
| 640 =================================================================== |
| 641 --- source/common/ucnv_lmb.c (revision 291619) |
| 642 +++ source/common/ucnv_lmb.c (working copy) |
| 643 @@ -25,7 +25,7 @@ |
| 644 |
| 645 #include "unicode/utypes.h" |
| 646 |
| 647 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 648 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 649 |
| 650 #include "unicode/ucnv_err.h" |
| 651 #include "unicode/ucnv.h" |
| 652 Index: source/common/ucnvhz.c |
| 653 =================================================================== |
| 654 --- source/common/ucnvhz.c (revision 291619) |
| 655 +++ source/common/ucnvhz.c (working copy) |
| 656 @@ -16,7 +16,7 @@ |
| 657 |
| 658 #include "unicode/utypes.h" |
| 659 |
| 660 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 661 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 662 |
| 663 #include "cmemory.h" |
| 664 #include "unicode/ucnv.h" |
| 665 @@ -637,4 +637,4 @@ |
| 666 0 |
| 667 }; |
| 668 |
| 669 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| 670 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_NO_NON_HTML5_CONVERSION */ |
| 671 Index: source/common/ucnv_ct.c |
| 672 =================================================================== |
| 673 --- source/common/ucnv_ct.c (revision 291619) |
| 674 +++ source/common/ucnv_ct.c (working copy) |
| 675 @@ -14,7 +14,7 @@ |
| 676 |
| 677 #include "unicode/utypes.h" |
| 678 |
| 679 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 680 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 681 |
| 682 #include "unicode/ucnv.h" |
| 683 #include "unicode/uset.h" |
| 684 Index: source/i18n/csrsbcs.h |
| 685 =================================================================== |
| 686 --- source/i18n/csrsbcs.h (revision 291619) |
| 687 +++ source/i18n/csrsbcs.h (working copy) |
| 688 @@ -50,6 +50,7 @@ |
| 689 |
| 690 }; |
| 691 |
| 692 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 693 class NGramParser_IBM420 : public NGramParser |
| 694 { |
| 695 private: |
| 696 @@ -61,6 +62,7 @@ |
| 697 public: |
| 698 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); |
| 699 }; |
| 700 +#endif |
| 701 |
| 702 |
| 703 class CharsetRecog_sbcs : public CharsetRecognizer |
| 704 @@ -229,6 +231,7 @@ |
| 705 virtual UBool match(InputText *det, CharsetMatch *results) const; |
| 706 }; |
| 707 |
| 708 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 709 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs |
| 710 { |
| 711 public: |
| 712 @@ -280,6 +283,7 @@ |
| 713 |
| 714 virtual UBool match(InputText *det, CharsetMatch *results) const; |
| 715 }; |
| 716 +#endif |
| 717 |
| 718 U_NAMESPACE_END |
| 719 |
| 720 Index: source/i18n/csr2022.h |
| 721 =================================================================== |
| 722 --- source/i18n/csr2022.h (revision 291619) |
| 723 +++ source/i18n/csr2022.h (working copy) |
| 724 @@ -65,6 +65,7 @@ |
| 725 UBool match(InputText *textIn, CharsetMatch *results) const; |
| 726 }; |
| 727 |
| 728 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 729 class CharsetRecog_2022KR :public CharsetRecog_2022 { |
| 730 public: |
| 731 virtual ~CharsetRecog_2022KR(); |
| 732 @@ -84,6 +85,7 @@ |
| 733 |
| 734 UBool match(InputText *textIn, CharsetMatch *results) const; |
| 735 }; |
| 736 +#endif |
| 737 |
| 738 U_NAMESPACE_END |
| 739 |
| 740 Index: source/i18n/csr2022.cpp |
| 741 =================================================================== |
| 742 --- source/i18n/csr2022.cpp (revision 291619) |
| 743 +++ source/i18n/csr2022.cpp (working copy) |
| 744 @@ -119,6 +119,7 @@ |
| 745 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
| 746 }; |
| 747 |
| 748 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 749 static const uint8_t escapeSequences_2022KR[][5] = { |
| 750 {0x1b, 0x24, 0x29, 0x43, 0x00} |
| 751 }; |
| 752 @@ -136,6 +137,7 @@ |
| 753 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
| 754 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
| 755 }; |
| 756 +#endif |
| 757 |
| 758 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
| 759 |
| 760 @@ -152,6 +154,7 @@ |
| 761 return (confidence > 0); |
| 762 } |
| 763 |
| 764 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 765 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
| 766 |
| 767 const char *CharsetRecog_2022KR::getName() const { |
| 768 @@ -181,6 +184,7 @@ |
| 769 results->set(textIn, this, confidence); |
| 770 return (confidence > 0); |
| 771 } |
| 772 +#endif |
| 773 |
| 774 CharsetRecog_2022::~CharsetRecog_2022() { |
| 775 // nothing to do |
| 776 Index: source/i18n/csdetect.cpp |
| 777 =================================================================== |
| 778 --- source/i18n/csdetect.cpp (revision 291619) |
| 779 +++ source/i18n/csdetect.cpp (working copy) |
| 780 @@ -110,6 +110,7 @@ |
| 781 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), |
| 782 |
| 783 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), |
| 784 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 785 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), |
| 786 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), |
| 787 |
| 788 @@ -117,6 +118,7 @@ |
| 789 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), |
| 790 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), |
| 791 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) |
| 792 +#endif |
| 793 }; |
| 794 int32_t rCount = ARRAY_SIZE(tempArray); |
| 795 |
| 796 Index: source/i18n/csrsbcs.cpp |
| 797 =================================================================== |
| 798 --- source/i18n/csrsbcs.cpp (revision 291619) |
| 799 +++ source/i18n/csrsbcs.cpp (working copy) |
| 800 @@ -137,6 +137,7 @@ |
| 801 return (int32_t) (rawPercent * 300.0); |
| 802 } |
| 803 |
| 804 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 805 static const uint8_t unshapeMap_IBM420[] = { |
| 806 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ |
| 807 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, |
| 808 @@ -232,6 +233,7 @@ |
| 809 } |
| 810 } |
| 811 } |
| 812 +#endif |
| 813 |
| 814 CharsetRecog_sbcs::CharsetRecog_sbcs() |
| 815 { |
| 816 @@ -624,6 +626,7 @@ |
| 817 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
| 818 }; |
| 819 |
| 820 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 821 static const int32_t ngrams_IBM424_he_rtl[] = { |
| 822 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, |
| 823 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, |
| 824 @@ -691,6 +694,7 @@ |
| 825 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, |
| 826 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, |
| 827 }; |
| 828 +#endif |
| 829 |
| 830 //ISO-8859-1,2,5,6,7,8,9 Ngrams |
| 831 |
| 832 @@ -1155,6 +1159,7 @@ |
| 833 return (confidence > 0); |
| 834 } |
| 835 |
| 836 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 837 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() |
| 838 { |
| 839 // nothing to do |
| 840 @@ -1253,6 +1258,7 @@ |
| 841 results->set(textIn, this, confidence); |
| 842 return (confidence > 0); |
| 843 } |
| 844 +#endif |
| 845 |
| 846 U_NAMESPACE_END |
| 847 #endif |
OLD | NEW |