OLD | NEW |
| 1 Cherry-picked from SVN r37045. |
| 2 |
| 3 ticket:11296: based on patch from Jungshik, approved option name UCONFIG_ONLY_HT
ML_CONVERSION, turn off UTF-32, simplify changes, fix warnings |
| 4 |
1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp | 5 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp |
2 index 9556dd2..7b8df9b 100644 | 6 index 7bfc271..582865a 100644 |
3 --- a/source/common/ucnv2022.cpp | 7 --- a/source/common/ucnv2022.cpp |
4 +++ b/source/common/ucnv2022.cpp | 8 +++ b/source/common/ucnv2022.cpp |
5 @@ -152,7 +152,11 @@ typedef enum { | 9 @@ -1,6 +1,6 @@ |
| 10 /* |
| 11 ********************************************************************** |
| 12 -* Copyright (C) 2000-2014, International Business Machines |
| 13 +* Copyright (C) 2000-2015, International Business Machines |
| 14 * Corporation and others. All Rights Reserved. |
| 15 ********************************************************************** |
| 16 * file name: ucnv2022.cpp |
| 17 @@ -75,8 +75,10 @@ |
| 18 */ |
| 19 #endif |
| 20 |
| 21 +#if !UCONFIG_ONLY_HTML_CONVERSION |
| 22 static const char SHIFT_IN_STR[] = "\x0F"; |
| 23 // static const char SHIFT_OUT_STR[] = "\x0E"; |
| 24 +#endif |
| 25 |
| 26 #define CR 0x0D |
| 27 #define LF 0x0A |
| 28 @@ -152,7 +154,11 @@ typedef enum { |
6 } StateEnum; | 29 } StateEnum; |
7 | 30 |
8 /* is the StateEnum charset value for a DBCS charset? */ | 31 /* is the StateEnum charset value for a DBCS charset? */ |
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 32 +#if UCONFIG_ONLY_HTML_CONVERSION |
10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) | 33 +#define IS_JP_DBCS(cs) (JISX208==(cs)) |
11 +#else | 34 +#else |
12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 35 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
13 +#endif | 36 +#endif |
14 | 37 |
15 #define CSM(cs) ((uint16_t)1<<(cs)) | 38 #define CSM(cs) ((uint16_t)1<<(cs)) |
16 | 39 |
17 @@ -165,13 +169,23 @@ typedef enum { | 40 @@ -165,13 +171,19 @@ typedef enum { |
18 * all versions, not just JIS7 and JIS8. | 41 * all versions, not just JIS7 and JIS8. |
19 * - ICU does not distinguish between different versions of JIS X 0208. | 42 * - ICU does not distinguish between different versions of JIS X 0208. |
20 */ | 43 */ |
21 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 44 +#if UCONFIG_ONLY_HTML_CONVERSION |
22 +enum { MAX_JA_VERSION=0 }; | 45 +enum { MAX_JA_VERSION=0 }; |
23 +#else | 46 +#else |
24 enum { MAX_JA_VERSION=4 }; | 47 enum { MAX_JA_VERSION=4 }; |
25 +#endif | 48 +#endif |
26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 49 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
27 +/* | |
28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. | |
29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 | |
30 + */ | |
31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 50 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 51 +#if !UCONFIG_ONLY_HTML_CONVERSION |
33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 52 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 53 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 54 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 55 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
37 +#endif | 56 +#endif |
38 }; | 57 }; |
39 | 58 |
40 typedef enum { | 59 typedef enum { |
41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES
_2022] = { | 60 @@ -358,15 +370,16 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES
_2022] = { |
42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 | 61 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 |
43 }; | 62 }; |
44 | 63 |
45 - | 64 - |
46 /* Type def for refactoring changeState_2022 code*/ | 65 /* Type def for refactoring changeState_2022 code*/ |
47 typedef enum{ | 66 typedef enum{ |
48 #ifdef U_ENABLE_GENERIC_ISO_2022 | 67 #ifdef U_ENABLE_GENERIC_ISO_2022 |
49 ISO_2022=0, | 68 ISO_2022=0, |
50 #endif | 69 #endif |
51 +#if UCONFIG_NO_NON_HTML5_CONVERSION | |
52 + ISO_2022_JP=1 | |
53 +#else | |
54 ISO_2022_JP=1, | 70 ISO_2022_JP=1, |
| 71 +#if !UCONFIG_ONLY_HTML_CONVERSION |
55 ISO_2022_KR=2, | 72 ISO_2022_KR=2, |
56 ISO_2022_CN=3 | 73 ISO_2022_CN=3 |
57 +#endif | 74 +#endif |
58 } Variant2022; | 75 } Variant2022; |
59 | 76 |
60 /*********** ISO 2022 Converter Protos ***********/ | 77 /*********** ISO 2022 Converter Protos ***********/ |
61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U
ErrorCode *errorCode){ | 78 @@ -397,8 +410,11 @@ namespace { |
62 /* prevent indexing beyond jpCharsetMasks[] */ | 79 |
63 myConverterData->version = version = 0; | 80 /*const UConverterSharedData _ISO2022Data;*/ |
64 } | 81 extern const UConverterSharedData _ISO2022JPData; |
65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 82 + |
66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 83 +#if !UCONFIG_ONLY_HTML_CONVERSION |
67 myConverterData->myConverterArray[ISO8859_7] = | 84 extern const UConverterSharedData _ISO2022KRData; |
68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); | 85 extern const UConverterSharedData _ISO2022CNData; |
69 } | |
70 +#endif | |
71 myConverterData->myConverterArray[JISX208] = | |
72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); | |
73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
74 if(jpCharsetMasks[version]&CSM(JISX212)) { | |
75 myConverterData->myConverterArray[JISX212] = | |
76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); | |
77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | |
78 myConverterData->myConverterArray[KSC5601] = | |
79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); | |
80 } | |
81 +#endif | 86 +#endif |
82 | 87 |
83 /* set the function pointers to appropriate funtions */ | 88 } // namespace |
84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 89 |
85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | 90 @@ -511,6 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ |
86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); | 91 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); |
87 myConverterData->name[len+1]='\0'; | 92 myConverterData->name[len+1]='\0'; |
88 } | 93 } |
89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 94 +#if !UCONFIG_ONLY_HTML_CONVERSION |
90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 95 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
91 (myLocale[2]=='_' || myLocale[2]=='\0')) | 96 (myLocale[2]=='_' || myLocale[2]=='\0')) |
92 { | 97 { |
93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ | 98 @@ -580,6 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ |
94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); | 99 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); |
95 } | 100 } |
96 } | 101 } |
97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION | 102 +#endif // !UCONFIG_ONLY_HTML_CONVERSION |
98 else{ | 103 else{ |
99 #ifdef U_ENABLE_GENERIC_ISO_2022 | 104 #ifdef U_ENABLE_GENERIC_ISO_2022 |
100 myConverterData->isFirstBuffer = TRUE; | 105 myConverterData->isFirstBuffer = TRUE; |
101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { | 106 @@ -714,6 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { |
102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 107 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
103 }; | 108 }; |
104 | 109 |
105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 110 +#if !UCONFIG_ONLY_HTML_CONVERSION |
106 /*************** to unicode *******************/ | 111 /*************** to unicode *******************/ |
107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 112 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
108 /* 0 1 2 3 4
5 6 7 8 9 *
/ | 113 /* 0 1 2 3 4
5 6 7 8 9 *
/ |
109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 114 @@ -726,6 +745,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE | 115 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE |
111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 116 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
112 }; | 117 }; |
113 +#endif | 118 +#endif |
114 | 119 |
115 | 120 |
116 static UCNV_TableStates_2022 | 121 static UCNV_TableStates_2022 |
117 @@ -878,6 +903,7 @@ DONE: | 122 @@ -898,6 +918,7 @@ DONE: |
118 } | |
119 break; | |
120 /* case SS3_STATE: not used in ISO-2022-JP-x */ | |
121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
122 case ISO8859_1: | |
123 case ISO8859_7: | |
124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | |
125 @@ -887,6 +913,7 @@ DONE: | |
126 myData2022->toU2022State.cs[2]=(int8_t)tempState; | |
127 } | |
128 break; | |
129 +#endif | |
130 default: | |
131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | |
132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | |
133 @@ -898,6 +925,7 @@ DONE: | |
134 } | 123 } |
135 } | 124 } |
136 break; | 125 break; |
137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 126 +#if !UCONFIG_ONLY_HTML_CONVERSION |
138 case ISO_2022_CN: | 127 case ISO_2022_CN: |
139 { | 128 { |
140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 129 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
141 @@ -959,6 +987,7 @@ DONE: | 130 @@ -959,6 +980,7 @@ DONE: |
142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 131 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
143 } | 132 } |
144 break; | 133 break; |
145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 134 +#endif // !UCONFIG_ONLY_HTML_CONVERSION |
146 | 135 |
147 default: | 136 default: |
148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 137 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv, | 138 @@ -1001,6 +1023,7 @@ DONE: |
150 static const StateEnum jpCharsetPref[]={ | 139 } |
151 ASCII, | 140 } |
152 JISX201, | 141 |
153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 142 +#if !UCONFIG_ONLY_HTML_CONVERSION |
154 ISO8859_1, | 143 /*Checks the characters of the buffer against valid 2022 escape sequences |
155 ISO8859_7, | 144 *if the match we return a pointer to the initial start of the sequence otherwis
e |
| 145 *we return sourceLimit |
| 146 @@ -1055,7 +1078,7 @@ getEndOfBuffer_2022(const char** source, |
| 147 return mySource; |
| 148 #endif |
| 149 } |
| 150 - |
156 +#endif | 151 +#endif |
157 JISX208, | |
158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
159 JISX212, | |
160 GB2312, | |
161 KSC5601, | |
162 +#endif | |
163 HWKANA_7BIT | |
164 }; | |
165 | 152 |
166 @@ -1754,6 +1787,7 @@ getTrail: | 153 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvm
bcs.c |
167 g = 0; | 154 * any future change in _MBCSFromUChar32() function should be reflected here. |
168 } | 155 @@ -2269,6 +2292,7 @@ endloop: |
169 break; | |
170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
171 case ISO8859_1: | |
172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | |
173 targetValue = (uint32_t)sourceChar - 0x80; | |
174 @@ -1762,6 +1796,7 @@ getTrail: | |
175 g = 2; | |
176 } | |
177 break; | |
178 +#endif | |
179 case HWKANA_7BIT: | |
180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H
WKANA_START)) { | |
181 if(converterData->version==3) { | |
182 @@ -1823,6 +1858,7 @@ getTrail: | |
183 useFallback = FALSE; | |
184 } | |
185 break; | |
186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
187 case ISO8859_7: | |
188 /* G0 SBCS forced to 7-bit output */ | |
189 len2 = MBCS_SINGLE_FROM_UCHAR32( | |
190 @@ -1837,6 +1873,7 @@ getTrail: | |
191 useFallback = FALSE; | |
192 } | |
193 break; | |
194 +#endif | |
195 default: | |
196 /* G0 DBCS */ | |
197 len2 = MBCS_FROM_UCHAR32_ISO2022( | |
198 @@ -1844,6 +1881,7 @@ getTrail: | |
199 sourceChar, &value, | |
200 useFallback, MBCS_OUTPUT_2); | |
201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | |
202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
203 if(cs0 == KSC5601) { | |
204 /* | |
205 * Check for valid bytes for the encoding scheme. | |
206 @@ -1855,6 +1893,7 @@ getTrail: | |
207 break; | |
208 } | |
209 } | |
210 +#endif | |
211 targetValue = value; | |
212 len = len2; | |
213 cs = cs0; | |
214 @@ -2148,6 +2187,7 @@ escape: | |
215 targetUniChar = mySourceChar; | |
216 } | |
217 break; | |
218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
219 case ISO8859_1: | |
220 if(mySourceChar <= 0x7f) { | |
221 targetUniChar = mySourceChar + 0x80; | |
222 @@ -2166,6 +2206,7 @@ escape: | |
223 /* return from a single-shift state to the previous one */ | |
224 pToU2022State->g=pToU2022State->prevG; | |
225 break; | |
226 +#endif | |
227 case JISX201: | |
228 if(mySourceChar <= 0x7f) { | |
229 targetUniChar = jisx201ToU(mySourceChar); | |
230 @@ -2205,9 +2246,11 @@ getTrailByte: | |
231 } else { | |
232 /* Copy before we modify tmpSourceChar so toUni
codeCallback() sees the correct bytes. */ | |
233 mySourceChar = tmpSourceChar; | |
234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
235 if (cs == KSC5601) { | |
236 tmpSourceChar += 0x8080; /* = _2022ToGR94D
BCS(tmpSourceChar) */ | |
237 } | |
238 +#endif | |
239 tempBuf[0] = (char)(tmpSourceChar >> 8); | |
240 tempBuf[1] = (char)(tmpSourceChar); | |
241 } | |
242 @@ -2269,6 +2312,7 @@ endloop: | |
243 } | 156 } |
244 | 157 |
245 | 158 |
246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 159 +#if !UCONFIG_ONLY_HTML_CONVERSION |
247 /*************************************************************** | 160 /*************************************************************** |
248 * Rules for ISO-2022-KR encoding | 161 * Rules for ISO-2022-KR encoding |
249 * i) The KSC5601 designator sequence should appear only once in a file, | 162 * i) The KSC5601 designator sequence should appear only once in a file, |
250 @@ -3412,6 +3456,7 @@ endloop: | 163 @@ -3412,6 +3436,7 @@ endloop: |
251 args->target = myTarget; | 164 args->target = myTarget; |
252 args->source = mySource; | 165 args->source = mySource; |
253 } | 166 } |
254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 167 +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
255 | 168 |
256 static void | 169 static void |
257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { | 170 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { |
258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | 171 @@ -3638,6 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
259 /* include JIS X 0201 which is hardcoded */ | |
260 sa->add(sa->set, 0xa5); | |
261 sa->add(sa->set, 0x203e); | |
262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | |
264 /* include Latin-1 for some variants of JP */ | |
265 sa->addRange(sa->set, 0, 0xff); | |
266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
267 /* include ASCII for JP */ | |
268 sa->addRange(sa->set, 0, 0x7f); | |
269 } | |
270 +#else | |
271 + /* include ASCII for JP */ | |
272 + sa->addRange(sa->set, 0, 0x7f); | |
273 +#endif | |
274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_
AND_FALLBACK_SET) { | |
275 /* | |
276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!
=0 | |
277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | |
278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 172 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
279 } | 173 } |
280 break; | 174 break; |
281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 175 +#if !UCONFIG_ONLY_HTML_CONVERSION |
282 case 'c': | 176 case 'c': |
283 case 'z': | 177 case 'z': |
284 /* include ASCII for CN */ | 178 /* include ASCII for CN */ |
285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | 179 @@ -3649,6 +3675,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
286 cnvData->currentConverter, sa, which, pErrorCode); | 180 cnvData->currentConverter, sa, which, pErrorCode); |
287 /* the loop over myConverterArray[] will simply not find another conver
ter */ | 181 /* the loop over myConverterArray[] will simply not find another conver
ter */ |
288 break; | 182 break; |
289 +#endif | 183 +#endif |
290 default: | 184 default: |
291 break; | 185 break; |
292 } | 186 } |
293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | 187 @@ -3669,9 +3696,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 188 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
295 UConverterSetFilter filter; | 189 UConverterSetFilter filter; |
296 if(cnvData->myConverterArray[i]!=NULL) { | 190 if(cnvData->myConverterArray[i]!=NULL) { |
297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 191 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
298 - cnvData->version==0 && i==CNS_11643 | 192 - cnvData->version==0 && i==CNS_11643 |
299 - ) { | 193 - ) { |
300 + if(cnvData->locale[0]=='j' && i==JISX208) { | 194 + if(cnvData->locale[0]=='j' && i==JISX208) { |
301 + /* | 195 + /* |
302 + * Only add code points that map to Shift-JIS codes | 196 + * Only add code points that map to Shift-JIS codes |
303 + * corresponding to JIS X 0208. | 197 + * corresponding to JIS X 0208. |
304 + */ | 198 + */ |
305 + filter=UCNV_SET_FILTER_SJIS; | 199 + filter=UCNV_SET_FILTER_SJIS; |
306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 200 +#if !UCONFIG_ONLY_HTML_CONVERSION |
307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 201 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
308 + cnvData->version==0 && i==CNS_11643) { | 202 + cnvData->version==0 && i==CNS_11643) { |
309 /* | 203 /* |
310 * Version-specific for CN: | 204 * Version-specific for CN: |
311 * CN version 0 does not map CNS planes 3..7 although | 205 * CN version 0 does not map CNS planes 3..7 although |
312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, | 206 @@ -3680,18 +3713,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
313 * The two versions create different Unicode sets. | 207 * The two versions create different Unicode sets. |
314 */ | 208 */ |
315 filter=UCNV_SET_FILTER_2022_CN; | 209 filter=UCNV_SET_FILTER_2022_CN; |
316 - } else if(cnvData->locale[0]=='j' && i==JISX208) { | 210 - } else if(cnvData->locale[0]=='j' && i==JISX208) { |
317 - /* | 211 - /* |
318 - * Only add code points that map to Shift-JIS codes | 212 - * Only add code points that map to Shift-JIS codes |
319 - * corresponding to JIS X 0208. | 213 - * corresponding to JIS X 0208. |
320 - */ | 214 - */ |
321 - filter=UCNV_SET_FILTER_SJIS; | 215 - filter=UCNV_SET_FILTER_SJIS; |
322 } else if(i==KSC5601) { | 216 } else if(i==KSC5601) { |
323 /* | 217 /* |
324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) | 218 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) |
325 * are broader than GR94. | 219 * are broader than GR94. |
326 */ | 220 */ |
327 filter=UCNV_SET_FILTER_GR94DBCS; | 221 filter=UCNV_SET_FILTER_GR94DBCS; |
328 +#endif | 222 +#endif |
329 } else { | 223 } else { |
330 filter=UCNV_SET_FILTER_NONE; | 224 filter=UCNV_SET_FILTER_NONE; |
331 } | 225 } |
332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={ | 226 @@ -3813,6 +3841,7 @@ const UConverterSharedData _ISO2022JPData= |
333 | 227 |
334 } // namespace | 228 } // namespace |
335 | 229 |
336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 230 +#if !UCONFIG_ONLY_HTML_CONVERSION |
337 /************* KR ***************/ | 231 /************* KR ***************/ |
338 static const UConverterImpl _ISO2022KRImpl={ | 232 static const UConverterImpl _ISO2022KRImpl={ |
339 UCNV_ISO_2022, | 233 UCNV_ISO_2022, |
340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={ | 234 @@ -3913,5 +3942,6 @@ const UConverterSharedData _ISO2022CNData= |
341 }; | 235 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022
CNImpl); |
342 | 236 |
343 } // namespace | 237 } // namespace |
344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 238 +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ |
345 | 239 |
346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 240 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp | 241 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp |
348 index 4940310..047f18a 100644 | 242 index ee1cf9e..a70021b 100644 |
349 --- a/source/common/ucnv_bld.cpp | 243 --- a/source/common/ucnv_bld.cpp |
350 +++ b/source/common/ucnv_bld.cpp | 244 +++ b/source/common/ucnv_bld.cpp |
351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ | 245 @@ -1,11 +1,11 @@ |
| 246 /* |
| 247 ******************************************************************** |
| 248 * COPYRIGHT: |
| 249 - * Copyright (c) 1996-2013, International Business Machines Corporation and |
| 250 + * Copyright (c) 1996-2015, International Business Machines Corporation and |
| 251 * others. All Rights Reserved. |
| 252 ******************************************************************** |
| 253 * |
| 254 - * uconv_bld.cpp: |
| 255 + * ucnv_bld.cpp: |
| 256 * |
| 257 * Defines functions that are used in the creation/initialization/deletion |
| 258 * of converters and related structures. |
| 259 @@ -64,33 +64,51 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ |
| 260 #endif |
| 261 |
| 262 &_Latin1Data, |
| 263 - &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData, |
| 264 + &_UTF8Data, &_UTF16BEData, &_UTF16LEData, |
| 265 +#if UCONFIG_ONLY_HTML_CONVERSION |
| 266 + NULL, NULL, |
| 267 +#else |
| 268 + &_UTF32BEData, &_UTF32LEData, |
| 269 +#endif |
| 270 NULL, |
352 | 271 |
353 #if UCONFIG_NO_LEGACY_CONVERSION | 272 #if UCONFIG_NO_LEGACY_CONVERSION |
354 NULL, | 273 NULL, |
355 +#else | 274 +#else |
356 + &_ISO2022Data, | 275 + &_ISO2022Data, |
357 +#endif | 276 +#endif |
358 + | 277 + |
359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 278 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
360 NULL, NULL, NULL, NULL, NULL, NULL, | 279 NULL, NULL, NULL, NULL, NULL, NULL, |
361 NULL, NULL, NULL, NULL, NULL, NULL, | 280 NULL, NULL, NULL, NULL, NULL, NULL, |
362 NULL, | 281 NULL, |
363 #else | 282 #else |
364 - &_ISO2022Data, | 283 - &_ISO2022Data, |
365 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC
SData6, | 284 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC
SData6, |
366 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC
SData19, | 285 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC
SData19, |
367 &_HZData, | 286 &_HZData, |
368 #endif | 287 #endif |
369 | 288 |
370 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 289 +#if UCONFIG_ONLY_HTML_CONVERSION |
371 + NULL, | 290 + NULL, |
372 +#else | 291 +#else |
373 &_SCSUData, | 292 &_SCSUData, |
374 +#endif | 293 +#endif |
375 | 294 |
376 -#if UCONFIG_NO_LEGACY_CONVERSION | 295 -#if UCONFIG_NO_LEGACY_CONVERSION |
377 + | 296 + |
378 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 297 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
379 NULL, | 298 NULL, |
380 #else | 299 #else |
381 &_ISCIIData, | 300 &_ISCIIData, |
382 #endif | 301 #endif |
383 | 302 |
384 &_ASCIIData, | 303 &_ASCIIData, |
385 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 304 +#if UCONFIG_ONLY_HTML_CONVERSION |
386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, | 305 + NULL, NULL, &_UTF16Data, NULL, NULL, NULL, |
387 +#else | 306 +#else |
388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, | 307 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
389 +#endif | 308 +#endif |
390 | 309 |
391 -#if UCONFIG_NO_LEGACY_CONVERSION | 310 -#if UCONFIG_NO_LEGACY_CONVERSION |
392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 311 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION |
393 NULL, | 312 NULL, |
394 #else | 313 #else |
395 &_CompoundTextData | 314 &_CompoundTextData |
396 @@ -105,18 +118,24 @@ static struct { | 315 @@ -105,18 +123,24 @@ static struct { |
397 const char *name; | 316 const char *name; |
398 const UConverterType type; | 317 const UConverterType type; |
399 } const cnvNameType[] = { | 318 } const cnvNameType[] = { |
400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 319 +#if !UCONFIG_ONLY_HTML_CONVERSION |
401 { "bocu1", UCNV_BOCU1 }, | 320 { "bocu1", UCNV_BOCU1 }, |
402 { "cesu8", UCNV_CESU8 }, | 321 { "cesu8", UCNV_CESU8 }, |
403 -#if !UCONFIG_NO_LEGACY_CONVERSION | 322 -#if !UCONFIG_NO_LEGACY_CONVERSION |
404 +#endif | 323 +#endif |
405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 324 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
406 { "hz",UCNV_HZ }, | 325 { "hz",UCNV_HZ }, |
407 #endif | 326 #endif |
408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 327 +#if !UCONFIG_ONLY_HTML_CONVERSION |
409 { "imapmailboxname", UCNV_IMAP_MAILBOX }, | 328 { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
410 -#if !UCONFIG_NO_LEGACY_CONVERSION | 329 -#if !UCONFIG_NO_LEGACY_CONVERSION |
411 +#endif | 330 +#endif |
412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 331 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
413 { "iscii", UCNV_ISCII }, | 332 { "iscii", UCNV_ISCII }, |
414 +#endif | 333 +#endif |
415 +#if !UCONFIG_NO_LEGACY_CONVERSION | 334 +#if !UCONFIG_NO_LEGACY_CONVERSION |
416 { "iso2022", UCNV_ISO_2022 }, | 335 { "iso2022", UCNV_ISO_2022 }, |
417 #endif | 336 #endif |
418 { "iso88591", UCNV_LATIN_1 }, | 337 { "iso88591", UCNV_LATIN_1 }, |
419 -#if !UCONFIG_NO_LEGACY_CONVERSION | 338 -#if !UCONFIG_NO_LEGACY_CONVERSION |
420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 339 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
421 { "lmbcs1", UCNV_LMBCS_1 }, | 340 { "lmbcs1", UCNV_LMBCS_1 }, |
422 { "lmbcs11",UCNV_LMBCS_11 }, | 341 { "lmbcs11",UCNV_LMBCS_11 }, |
423 { "lmbcs16",UCNV_LMBCS_16 }, | 342 { "lmbcs16",UCNV_LMBCS_16 }, |
424 @@ -130,7 +149,9 @@ static struct { | 343 @@ -130,7 +154,9 @@ static struct { |
425 { "lmbcs6", UCNV_LMBCS_6 }, | 344 { "lmbcs6", UCNV_LMBCS_6 }, |
426 { "lmbcs8", UCNV_LMBCS_8 }, | 345 { "lmbcs8", UCNV_LMBCS_8 }, |
427 #endif | 346 #endif |
428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 347 +#if !UCONFIG_ONLY_HTML_CONVERSION |
429 { "scsu", UCNV_SCSU }, | 348 { "scsu", UCNV_SCSU }, |
430 +#endif | 349 +#endif |
431 { "usascii", UCNV_US_ASCII }, | 350 { "usascii", UCNV_US_ASCII }, |
432 { "utf16", UCNV_UTF16 }, | 351 { "utf16", UCNV_UTF16 }, |
433 { "utf16be", UCNV_UTF16_BigEndian }, | 352 { "utf16be", UCNV_UTF16_BigEndian }, |
434 @@ -152,9 +173,13 @@ static struct { | 353 @@ -142,6 +168,7 @@ static struct { |
| 354 { "utf16oppositeendian", UCNV_UTF16_BigEndian}, |
| 355 { "utf16platformendian", UCNV_UTF16_LittleEndian }, |
| 356 #endif |
| 357 +#if !UCONFIG_ONLY_HTML_CONVERSION |
| 358 { "utf32", UCNV_UTF32 }, |
| 359 { "utf32be", UCNV_UTF32_BigEndian }, |
| 360 { "utf32le", UCNV_UTF32_LittleEndian }, |
| 361 @@ -152,9 +179,14 @@ static struct { |
435 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, | 362 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
436 { "utf32platformendian", UCNV_UTF32_LittleEndian }, | 363 { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
437 #endif | 364 #endif |
438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 365 +#endif |
| 366 +#if !UCONFIG_ONLY_HTML_CONVERSION |
439 { "utf7", UCNV_UTF7 }, | 367 { "utf7", UCNV_UTF7 }, |
440 +#endif | 368 +#endif |
441 { "utf8", UCNV_UTF8 }, | 369 { "utf8", UCNV_UTF8 }, |
442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 370 +#if !UCONFIG_ONLY_HTML_CONVERSION |
443 { "x11compoundtext", UCNV_COMPOUND_TEXT} | 371 { "x11compoundtext", UCNV_COMPOUND_TEXT} |
444 +#endif | 372 +#endif |
445 }; | 373 }; |
446 | 374 |
447 | 375 |
448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h | |
449 index 402e2c9..5fad446 100644 | |
450 --- a/source/common/ucnv_cnv.h | |
451 +++ b/source/common/ucnv_cnv.h | |
452 @@ -256,11 +256,15 @@ struct UConverterImpl { | |
453 extern const UConverterSharedData | |
454 _MBCSData, _Latin1Data, | |
455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, | |
456 - _ISO2022Data, | |
457 + _ISO2022Data, | |
458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, | |
460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, | |
461 _HZData,_ISCIIData, _SCSUData, _ASCIIData, | |
462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; | |
463 +#else | |
464 + _ASCIIData, _UTF16Data, _UTF32Data; | |
465 +#endif | |
466 | |
467 U_CDECL_END | |
468 | |
469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c | 376 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c |
470 index ec0e9c2..e723fa6 100644 | 377 index ecba6c6..91d66e1 100644 |
471 --- a/source/common/ucnv_ct.c | 378 --- a/source/common/ucnv_ct.c |
472 +++ b/source/common/ucnv_ct.c | 379 +++ b/source/common/ucnv_ct.c |
| 380 @@ -1,6 +1,6 @@ |
| 381 /* |
| 382 ********************************************************************** |
| 383 -* Copyright (C) 2010-2014, International Business Machines |
| 384 +* Copyright (C) 2010-2015, International Business Machines |
| 385 * Corporation and others. All Rights Reserved. |
| 386 ********************************************************************** |
| 387 * file name: ucnv_ct.c |
473 @@ -14,7 +14,7 @@ | 388 @@ -14,7 +14,7 @@ |
474 | 389 |
475 #include "unicode/utypes.h" | 390 #include "unicode/utypes.h" |
476 | 391 |
477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | 392 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | 393 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT
ML_CONVERSION |
479 | 394 |
480 #include "unicode/ucnv.h" | 395 #include "unicode/ucnv.h" |
481 #include "unicode/uset.h" | 396 #include "unicode/uset.h" |
482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c | 397 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c |
483 index 1d921dd..a4fccee 100644 | 398 index 81ac9f9..01d0aa1 100644 |
484 --- a/source/common/ucnv_lmb.c | 399 --- a/source/common/ucnv_lmb.c |
485 +++ b/source/common/ucnv_lmb.c | 400 +++ b/source/common/ucnv_lmb.c |
| 401 @@ -1,6 +1,6 @@ |
| 402 /* |
| 403 ********************************************************************** |
| 404 -* Copyright (C) 2000-2014, International Business Machines |
| 405 +* Copyright (C) 2000-2015, International Business Machines |
| 406 * Corporation and others. All Rights Reserved. |
| 407 ********************************************************************** |
| 408 * file name: ucnv_lmb.cpp |
486 @@ -25,7 +25,7 @@ | 409 @@ -25,7 +25,7 @@ |
487 | 410 |
488 #include "unicode/utypes.h" | 411 #include "unicode/utypes.h" |
489 | 412 |
490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | 413 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | 414 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT
ML_CONVERSION |
492 | 415 |
493 #include "unicode/ucnv_err.h" | 416 #include "unicode/ucnv_err.h" |
494 #include "unicode/ucnv.h" | 417 #include "unicode/ucnv.h" |
495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c | 418 diff --git a/source/common/ucnv_u32.c b/source/common/ucnv_u32.c |
496 index 42943f4..6466b87 100644 | 419 index 49d6746..b6804ca 100644 |
497 --- a/source/common/ucnv_u7.c | 420 --- a/source/common/ucnv_u32.c |
498 +++ b/source/common/ucnv_u7.c | 421 +++ b/source/common/ucnv_u32.c |
| 422 @@ -1,6 +1,6 @@ |
| 423 /* |
| 424 ********************************************************************** |
| 425 -* Copyright (C) 2002-2011, International Business Machines |
| 426 +* Copyright (C) 2002-2015, International Business Machines |
| 427 * Corporation and others. All Rights Reserved. |
| 428 ********************************************************************** |
| 429 * file name: ucnv_u32.c |
499 @@ -16,7 +16,7 @@ | 430 @@ -16,7 +16,7 @@ |
500 | 431 |
501 #include "unicode/utypes.h" | 432 #include "unicode/utypes.h" |
502 | 433 |
503 -#if !UCONFIG_NO_CONVERSION | 434 -#if !UCONFIG_NO_CONVERSION |
504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 435 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
| 436 |
| 437 #include "unicode/ucnv.h" |
| 438 #include "unicode/utf.h" |
| 439 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c |
| 440 index fe39028..d35bae2 100644 |
| 441 --- a/source/common/ucnv_u7.c |
| 442 +++ b/source/common/ucnv_u7.c |
| 443 @@ -1,6 +1,6 @@ |
| 444 /* |
| 445 ********************************************************************** |
| 446 -* Copyright (C) 2002-2011, International Business Machines |
| 447 +* Copyright (C) 2002-2015, International Business Machines |
| 448 * Corporation and others. All Rights Reserved. |
| 449 ********************************************************************** |
| 450 * file name: ucnv_u7.c |
| 451 @@ -16,7 +16,7 @@ |
| 452 |
| 453 #include "unicode/utypes.h" |
| 454 |
| 455 -#if !UCONFIG_NO_CONVERSION |
| 456 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
505 | 457 |
506 #include "unicode/ucnv.h" | 458 #include "unicode/ucnv.h" |
507 #include "ucnv_bld.h" | 459 #include "ucnv_bld.h" |
508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c | 460 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c |
509 index 8ee9fe5..24205f5 100644 | 461 index 2d045d4..b785873 100644 |
510 --- a/source/common/ucnv_u8.c | 462 --- a/source/common/ucnv_u8.c |
511 +++ b/source/common/ucnv_u8.c | 463 +++ b/source/common/ucnv_u8.c |
| 464 @@ -1,6 +1,6 @@ |
| 465 /* |
| 466 ********************************************************************** |
| 467 -* Copyright (C) 2002-2012, International Business Machines |
| 468 +* Copyright (C) 2002-2015, International Business Machines |
| 469 * Corporation and others. All Rights Reserved. |
| 470 ********************************************************************** |
| 471 * file name: ucnv_u8.c |
512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { | 472 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { |
513 static const uint32_t | 473 static const uint32_t |
514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | 474 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
515 | 475 |
516 +static UBool hasCESU8Data(const UConverter *cnv) | 476 +static UBool hasCESU8Data(const UConverter *cnv) |
517 +{ | 477 +{ |
518 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 478 +#if UCONFIG_ONLY_HTML_CONVERSION |
519 + return FALSE; | 479 + return FALSE; |
520 +#else | 480 +#else |
521 + return (UBool)(cnv->sharedData == &_CESU8Data); | 481 + return (UBool)(cnv->sharedData == &_CESU8Data); |
522 +#endif | 482 +#endif |
523 +} | 483 +} |
524 + | 484 + |
525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, | 485 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, |
526 UErrorCode * err) | 486 UErrorCode * err) |
527 { | 487 { |
528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs *
args, | 488 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs *
args, |
(...skipping 30 matching lines...) Expand all Loading... |
559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter
FromUnicodeArgs * ar | 519 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter
FromUnicodeArgs * ar |
560 int32_t offsetNum, nextSourceIndex; | 520 int32_t offsetNum, nextSourceIndex; |
561 int32_t indexToWrite; | 521 int32_t indexToWrite; |
562 uint8_t tempBuf[4]; | 522 uint8_t tempBuf[4]; |
563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 523 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
564 + UBool isNotCESU8 = !hasCESU8Data(cnv); | 524 + UBool isNotCESU8 = !hasCESU8Data(cnv); |
565 | 525 |
566 if (cnv->fromUChar32 && myTarget < targetLimit) | 526 if (cnv->fromUChar32 && myTarget < targetLimit) |
567 { | 527 { |
568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp | 528 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp |
569 index b97d666..281d6d9 100644 | 529 index 5497f7d..b37d12c 100644 |
570 --- a/source/common/ucnvbocu.cpp | 530 --- a/source/common/ucnvbocu.cpp |
571 +++ b/source/common/ucnvbocu.cpp | 531 +++ b/source/common/ucnvbocu.cpp |
| 532 @@ -1,7 +1,7 @@ |
| 533 /* |
| 534 ****************************************************************************** |
| 535 * |
| 536 -* Copyright (C) 2002-2011, International Business Machines |
| 537 +* Copyright (C) 2002-2015, International Business Machines |
| 538 * Corporation and others. All Rights Reserved. |
| 539 * |
| 540 ****************************************************************************** |
572 @@ -19,7 +19,7 @@ | 541 @@ -19,7 +19,7 @@ |
573 | 542 |
574 #include "unicode/utypes.h" | 543 #include "unicode/utypes.h" |
575 | 544 |
576 -#if !UCONFIG_NO_CONVERSION | 545 -#if !UCONFIG_NO_CONVERSION |
577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 546 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
578 | 547 |
579 #include "unicode/ucnv.h" | 548 #include "unicode/ucnv.h" |
580 #include "unicode/ucnv_cb.h" | 549 #include "unicode/ucnv_cb.h" |
581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c | 550 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c |
582 index 3760c39..51825e2 100644 | 551 index 4a89c47..4ca2e78 100644 |
583 --- a/source/common/ucnvhz.c | 552 --- a/source/common/ucnvhz.c |
584 +++ b/source/common/ucnvhz.c | 553 +++ b/source/common/ucnvhz.c |
| 554 @@ -1,6 +1,6 @@ |
| 555 /* |
| 556 ********************************************************************** |
| 557 -* Copyright (C) 2000-2014, International Business Machines |
| 558 +* Copyright (C) 2000-2015, International Business Machines |
| 559 * Corporation and others. All Rights Reserved. |
| 560 ********************************************************************** |
| 561 * file name: ucnvhz.c |
585 @@ -16,7 +16,7 @@ | 562 @@ -16,7 +16,7 @@ |
586 | 563 |
587 #include "unicode/utypes.h" | 564 #include "unicode/utypes.h" |
588 | 565 |
589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | 566 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | 567 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT
ML_CONVERSION |
591 | 568 |
592 #include "cmemory.h" | 569 #include "cmemory.h" |
593 #include "unicode/ucnv.h" | 570 #include "unicode/ucnv.h" |
594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={ | 571 @@ -626,4 +626,4 @@ static const UConverterStaticData _HZStaticData={ |
595 0 | 572 const UConverterSharedData _HZData= |
596 }; | 573 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); |
597 | 574 |
598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 575 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_NO_NON_HTML5_CONVERSION */ | 576 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_ONLY_HTML_CONVERSION */ |
600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c | 577 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c |
601 index fe61d40..16fd0a3 100644 | 578 index 8f0e312..d34d088 100644 |
602 --- a/source/common/ucnvisci.c | 579 --- a/source/common/ucnvisci.c |
603 +++ b/source/common/ucnvisci.c | 580 +++ b/source/common/ucnvisci.c |
| 581 @@ -1,6 +1,6 @@ |
| 582 /* |
| 583 ********************************************************************** |
| 584 -* Copyright (C) 2000-2012, International Business Machines |
| 585 +* Copyright (C) 2000-2015, International Business Machines |
| 586 * Corporation and others. All Rights Reserved. |
| 587 ********************************************************************** |
| 588 * file name: ucnvisci.c |
604 @@ -17,7 +17,7 @@ | 589 @@ -17,7 +17,7 @@ |
605 | 590 |
606 #include "unicode/utypes.h" | 591 #include "unicode/utypes.h" |
607 | 592 |
608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | 593 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | 594 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT
ML_CONVERSION |
610 | 595 |
611 #include "unicode/ucnv.h" | 596 #include "unicode/ucnv.h" |
612 #include "unicode/ucnv_cb.h" | 597 #include "unicode/ucnv_cb.h" |
613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c | 598 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c |
614 index c6e96e1..a6f8c9e 100644 | 599 index 3ced9e4..1aacd81 100644 |
615 --- a/source/common/ucnvscsu.c | 600 --- a/source/common/ucnvscsu.c |
616 +++ b/source/common/ucnvscsu.c | 601 +++ b/source/common/ucnvscsu.c |
| 602 @@ -1,7 +1,7 @@ |
| 603 /* |
| 604 ****************************************************************************** |
| 605 * |
| 606 -* Copyright (C) 2000-2011, International Business Machines |
| 607 +* Copyright (C) 2000-2015, International Business Machines |
| 608 * Corporation and others. All Rights Reserved. |
| 609 * |
| 610 ****************************************************************************** |
617 @@ -21,7 +21,7 @@ | 611 @@ -21,7 +21,7 @@ |
618 | 612 |
619 #include "unicode/utypes.h" | 613 #include "unicode/utypes.h" |
620 | 614 |
621 -#if !UCONFIG_NO_CONVERSION | 615 -#if !UCONFIG_NO_CONVERSION |
622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 616 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION |
623 | 617 |
624 #include "unicode/ucnv.h" | 618 #include "unicode/ucnv.h" |
625 #include "unicode/ucnv_cb.h" | 619 #include "unicode/ucnv_cb.h" |
626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h | 620 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h |
627 index ed073b6..8df56e6 100644 | 621 index ed073b6..f6223bb 100644 |
628 --- a/source/common/unicode/uconfig.h | 622 --- a/source/common/unicode/uconfig.h |
629 +++ b/source/common/unicode/uconfig.h | 623 +++ b/source/common/unicode/uconfig.h |
630 @@ -270,6 +270,14 @@ | 624 @@ -1,6 +1,6 @@ |
| 625 /* |
| 626 ********************************************************************** |
| 627 -* Copyright (C) 2002-2014, International Business Machines |
| 628 +* Copyright (C) 2002-2015, International Business Machines |
| 629 * Corporation and others. All Rights Reserved. |
| 630 ********************************************************************** |
| 631 * file name: uconfig.h |
| 632 @@ -200,7 +200,7 @@ |
| 633 * It does not turn off legacy conversion because that is necessary |
| 634 * for ICU to work on EBCDIC platforms (for the default converter). |
| 635 * If you want "only collation" and do not build for EBCDIC, |
| 636 - * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well. |
| 637 + * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to
1 as well. |
| 638 * |
| 639 * @stable ICU 2.4 |
| 640 */ |
| 641 @@ -270,6 +270,21 @@ |
631 #endif | 642 #endif |
632 | 643 |
633 /** | 644 /** |
634 + * This switch turns off all the converters NOT listed in | 645 + * \def UCONFIG_ONLY_HTML_CONVERSION |
635 + * the encoding standard : http://encoding.spec.whatwg.org | 646 + * This switch turns off all of the converters NOT listed in |
| 647 + * the HTML encoding standard: |
| 648 + * http://www.w3.org/TR/encoding/#names-and-labels |
| 649 + * |
| 650 + * This is not possible on EBCDIC platforms |
| 651 + * because they need ibm-37 or ibm-1047 default converters. |
| 652 + * |
| 653 + * @draft ICU 55 |
636 + */ | 654 + */ |
637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION | 655 +#ifndef UCONFIG_ONLY_HTML_CONVERSION |
638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 | 656 +# define UCONFIG_ONLY_HTML_CONVERSION 0 |
639 +#endif | 657 +#endif |
640 + | 658 + |
641 +/** | 659 +/** |
642 * \def UCONFIG_NO_LEGACY_CONVERSION | 660 * \def UCONFIG_NO_LEGACY_CONVERSION |
643 * This switch turns off all converters except for | 661 * This switch turns off all converters except for |
644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) | 662 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) |
645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h | |
646 index a817262..89becca 100644 | |
647 --- a/source/common/unicode/urename.h | |
648 +++ b/source/common/unicode/urename.h | |
649 @@ -73,12 +73,14 @@ | |
650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) | |
651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) | |
652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) | |
653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) | |
655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) | |
656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) | |
657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) | |
658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) | |
659 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) | |
660 +#endif | |
661 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) | |
662 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) | |
663 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) | |
664 @@ -94,14 +96,18 @@ | |
665 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) | |
666 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) | |
667 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) | |
668 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
669 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) | |
670 +#endif | |
671 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) | |
672 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) | |
673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) | |
674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) | |
675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) | |
676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) | |
677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) | |
679 +#endif | |
680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) | |
681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) | |
682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) | |
683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp | 663 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp |
684 index 3efbd49..ba5b18c 100644 | 664 index 3efbd49..66d8f3a 100644 |
685 --- a/source/i18n/csdetect.cpp | 665 --- a/source/i18n/csdetect.cpp |
686 +++ b/source/i18n/csdetect.cpp | 666 +++ b/source/i18n/csdetect.cpp |
| 667 @@ -1,6 +1,6 @@ |
| 668 /* |
| 669 ********************************************************************** |
| 670 - * Copyright (C) 2005-2013, International Business Machines |
| 671 + * Copyright (C) 2005-2015, International Business Machines |
| 672 * Corporation and others. All Rights Reserved. |
| 673 ********************************************************************** |
| 674 */ |
687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { | 675 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { |
688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), | 676 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), |
689 | 677 |
690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), | 678 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), |
691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 679 +#if !UCONFIG_ONLY_HTML_CONVERSION |
692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), | 680 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), |
693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), | 681 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), |
694 | 682 |
695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { | 683 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { |
696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), | 684 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), |
697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), | 685 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), |
698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) | 686 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) |
699 +#endif | 687 +#endif |
700 }; | 688 }; |
701 int32_t rCount = ARRAY_SIZE(tempArray); | 689 int32_t rCount = ARRAY_SIZE(tempArray); |
702 | 690 |
703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp | 691 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp |
704 index 3db0bc9..be3eafa 100644 | 692 index 3db0bc9..236a526 100644 |
705 --- a/source/i18n/csr2022.cpp | 693 --- a/source/i18n/csr2022.cpp |
706 +++ b/source/i18n/csr2022.cpp | 694 +++ b/source/i18n/csr2022.cpp |
| 695 @@ -1,6 +1,6 @@ |
| 696 /* |
| 697 ********************************************************************** |
| 698 - * Copyright (C) 2005-2012, International Business Machines |
| 699 + * Copyright (C) 2005-2015, International Business Machines |
| 700 * Corporation and others. All Rights Reserved. |
| 701 ********************************************************************** |
| 702 */ |
707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { | 703 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { |
708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 704 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
709 }; | 705 }; |
710 | 706 |
711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 707 +#if !UCONFIG_ONLY_HTML_CONVERSION |
712 static const uint8_t escapeSequences_2022KR[][5] = { | 708 static const uint8_t escapeSequences_2022KR[][5] = { |
713 {0x1b, 0x24, 0x29, 0x43, 0x00} | 709 {0x1b, 0x24, 0x29, 0x43, 0x00} |
714 }; | 710 }; |
715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { | 711 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { |
716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | 712 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | 713 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
718 }; | 714 }; |
719 +#endif | 715 +#endif |
720 | 716 |
721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | 717 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
722 | 718 |
723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM
atch *results) const | 719 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM
atch *results) const |
724 return (confidence > 0); | 720 return (confidence > 0); |
725 } | 721 } |
726 | 722 |
727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 723 +#if !UCONFIG_ONLY_HTML_CONVERSION |
728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 724 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
729 | 725 |
730 const char *CharsetRecog_2022KR::getName() const { | 726 const char *CharsetRecog_2022KR::getName() const { |
731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM
atch *results) const | 727 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM
atch *results) const |
732 results->set(textIn, this, confidence); | 728 results->set(textIn, this, confidence); |
733 return (confidence > 0); | 729 return (confidence > 0); |
734 } | 730 } |
735 +#endif | 731 +#endif |
736 | 732 |
737 CharsetRecog_2022::~CharsetRecog_2022() { | 733 CharsetRecog_2022::~CharsetRecog_2022() { |
738 // nothing to do | 734 // nothing to do |
739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h | 735 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h |
740 index 2ac2b87..dad22c7 100644 | 736 index 2ac2b87..f63e337 100644 |
741 --- a/source/i18n/csr2022.h | 737 --- a/source/i18n/csr2022.h |
742 +++ b/source/i18n/csr2022.h | 738 +++ b/source/i18n/csr2022.h |
| 739 @@ -1,6 +1,6 @@ |
| 740 /* |
| 741 ********************************************************************** |
| 742 - * Copyright (C) 2005-2012, International Business Machines |
| 743 + * Copyright (C) 2005-2015, International Business Machines |
| 744 * Corporation and others. All Rights Reserved. |
| 745 ********************************************************************** |
| 746 */ |
743 @@ -65,6 +65,7 @@ public: | 747 @@ -65,6 +65,7 @@ public: |
744 UBool match(InputText *textIn, CharsetMatch *results) const; | 748 UBool match(InputText *textIn, CharsetMatch *results) const; |
745 }; | 749 }; |
746 | 750 |
747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 751 +#if !UCONFIG_ONLY_HTML_CONVERSION |
748 class CharsetRecog_2022KR :public CharsetRecog_2022 { | 752 class CharsetRecog_2022KR :public CharsetRecog_2022 { |
749 public: | 753 public: |
750 virtual ~CharsetRecog_2022KR(); | 754 virtual ~CharsetRecog_2022KR(); |
751 @@ -84,6 +85,7 @@ public: | 755 @@ -84,6 +85,7 @@ public: |
752 | 756 |
753 UBool match(InputText *textIn, CharsetMatch *results) const; | 757 UBool match(InputText *textIn, CharsetMatch *results) const; |
754 }; | 758 }; |
755 +#endif | 759 +#endif |
756 | 760 |
757 U_NAMESPACE_END | 761 U_NAMESPACE_END |
758 | 762 |
759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp | 763 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp |
760 index d03367c..7b70dc1 100644 | 764 index d03367c..72fb959 100644 |
761 --- a/source/i18n/csrsbcs.cpp | 765 --- a/source/i18n/csrsbcs.cpp |
762 +++ b/source/i18n/csrsbcs.cpp | 766 +++ b/source/i18n/csrsbcs.cpp |
| 767 @@ -1,6 +1,6 @@ |
| 768 /* |
| 769 ********************************************************************** |
| 770 - * Copyright (C) 2005-2013, International Business Machines |
| 771 + * Copyright (C) 2005-2015, International Business Machines |
| 772 * Corporation and others. All Rights Reserved. |
| 773 ********************************************************************** |
| 774 */ |
763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) | 775 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) |
764 return (int32_t) (rawPercent * 300.0); | 776 return (int32_t) (rawPercent * 300.0); |
765 } | 777 } |
766 | 778 |
767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 779 +#if !UCONFIG_ONLY_HTML_CONVERSION |
768 static const uint8_t unshapeMap_IBM420[] = { | 780 static const uint8_t unshapeMap_IBM420[] = { |
769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ | 781 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ |
770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, | 782 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, |
771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) | 783 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) |
772 } | 784 } |
773 } | 785 } |
774 } | 786 } |
775 +#endif | 787 +#endif |
776 | 788 |
777 CharsetRecog_sbcs::CharsetRecog_sbcs() | 789 CharsetRecog_sbcs::CharsetRecog_sbcs() |
778 { | 790 { |
779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { | 791 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { |
780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | 792 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
781 }; | 793 }; |
782 | 794 |
783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 795 +#if !UCONFIG_ONLY_HTML_CONVERSION |
784 static const int32_t ngrams_IBM424_he_rtl[] = { | 796 static const int32_t ngrams_IBM424_he_rtl[] = { |
785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, | 797 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, |
786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, | 798 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, |
787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { | 799 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { |
788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, | 800 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, |
789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, | 801 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, |
790 }; | 802 }; |
791 +#endif | 803 +#endif |
792 | 804 |
793 //ISO-8859-1,2,5,6,7,8,9 Ngrams | 805 //ISO-8859-1,2,5,6,7,8,9 Ngrams |
794 | 806 |
795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse
tMatch *results) const | 807 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse
tMatch *results) const |
796 return (confidence > 0); | 808 return (confidence > 0); |
797 } | 809 } |
798 | 810 |
799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 811 +#if !UCONFIG_ONLY_HTML_CONVERSION |
800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() | 812 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() |
801 { | 813 { |
802 // nothing to do | 814 // nothing to do |
803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn,
CharsetMatch *results | 815 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn,
CharsetMatch *results |
804 results->set(textIn, this, confidence); | 816 results->set(textIn, this, confidence); |
805 return (confidence > 0); | 817 return (confidence > 0); |
806 } | 818 } |
807 +#endif | 819 +#endif |
808 | 820 |
809 U_NAMESPACE_END | 821 U_NAMESPACE_END |
810 #endif | 822 #endif |
811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h | 823 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h |
812 index 2579c02..7789f9b 100644 | 824 index 2579c02..bd2a264 100644 |
813 --- a/source/i18n/csrsbcs.h | 825 --- a/source/i18n/csrsbcs.h |
814 +++ b/source/i18n/csrsbcs.h | 826 +++ b/source/i18n/csrsbcs.h |
| 827 @@ -1,6 +1,6 @@ |
| 828 /* |
| 829 ********************************************************************** |
| 830 - * Copyright (C) 2005-2013, International Business Machines |
| 831 + * Copyright (C) 2005-2015, International Business Machines |
| 832 * Corporation and others. All Rights Reserved. |
| 833 ********************************************************************** |
| 834 */ |
815 @@ -50,6 +50,7 @@ public: | 835 @@ -50,6 +50,7 @@ public: |
816 | 836 |
817 }; | 837 }; |
818 | 838 |
819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 839 +#if !UCONFIG_ONLY_HTML_CONVERSION |
820 class NGramParser_IBM420 : public NGramParser | 840 class NGramParser_IBM420 : public NGramParser |
821 { | 841 { |
822 private: | 842 private: |
823 @@ -61,6 +62,7 @@ private: | 843 @@ -61,6 +62,7 @@ private: |
824 public: | 844 public: |
825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); | 845 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); |
826 }; | 846 }; |
827 +#endif | 847 +#endif |
828 | 848 |
829 | 849 |
830 class CharsetRecog_sbcs : public CharsetRecognizer | 850 class CharsetRecog_sbcs : public CharsetRecognizer |
831 @@ -229,6 +231,7 @@ public: | 851 @@ -229,6 +231,7 @@ public: |
832 virtual UBool match(InputText *det, CharsetMatch *results) const; | 852 virtual UBool match(InputText *det, CharsetMatch *results) const; |
833 }; | 853 }; |
834 | 854 |
835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 855 +#if !UCONFIG_ONLY_HTML_CONVERSION |
836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs | 856 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs |
837 { | 857 { |
838 public: | 858 public: |
839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42
0_ar { | 859 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42
0_ar { |
840 | 860 |
841 virtual UBool match(InputText *det, CharsetMatch *results) const; | 861 virtual UBool match(InputText *det, CharsetMatch *results) const; |
842 }; | 862 }; |
843 +#endif | 863 +#endif |
844 | 864 |
845 U_NAMESPACE_END | 865 U_NAMESPACE_END |
846 | 866 |
OLD | NEW |