Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(123)

Side by Side Diff: patches/uconv.patch

Issue 1222643002: Update uconv.patch to exactly match upstream SVN r37045. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « icu.gyp ('k') | source/common/ucnv2022.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 Cherry-picked from SVN r37045.
2
3 ticket:11296: based on patch from Jungshik, approved option name UCONFIG_ONLY_HT ML_CONVERSION, turn off UTF-32, simplify changes, fix warnings
4
1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp 5 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
2 index 9556dd2..7b8df9b 100644 6 index 7bfc271..582865a 100644
3 --- a/source/common/ucnv2022.cpp 7 --- a/source/common/ucnv2022.cpp
4 +++ b/source/common/ucnv2022.cpp 8 +++ b/source/common/ucnv2022.cpp
5 @@ -152,7 +152,11 @@ typedef enum { 9 @@ -1,6 +1,6 @@
10 /*
11 **********************************************************************
12 -* Copyright (C) 2000-2014, International Business Machines
13 +* Copyright (C) 2000-2015, International Business Machines
14 * Corporation and others. All Rights Reserved.
15 **********************************************************************
16 * file name: ucnv2022.cpp
17 @@ -75,8 +75,10 @@
18 */
19 #endif
20
21 +#if !UCONFIG_ONLY_HTML_CONVERSION
22 static const char SHIFT_IN_STR[] = "\x0F";
23 // static const char SHIFT_OUT_STR[] = "\x0E";
24 +#endif
25
26 #define CR 0x0D
27 #define LF 0x0A
28 @@ -152,7 +154,11 @@ typedef enum {
6 } StateEnum; 29 } StateEnum;
7 30
8 /* is the StateEnum charset value for a DBCS charset? */ 31 /* is the StateEnum charset value for a DBCS charset? */
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION 32 +#if UCONFIG_ONLY_HTML_CONVERSION
10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) 33 +#define IS_JP_DBCS(cs) (JISX208==(cs))
11 +#else 34 +#else
12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 35 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
13 +#endif 36 +#endif
14 37
15 #define CSM(cs) ((uint16_t)1<<(cs)) 38 #define CSM(cs) ((uint16_t)1<<(cs))
16 39
17 @@ -165,13 +169,23 @@ typedef enum { 40 @@ -165,13 +171,19 @@ typedef enum {
18 * all versions, not just JIS7 and JIS8. 41 * all versions, not just JIS7 and JIS8.
19 * - ICU does not distinguish between different versions of JIS X 0208. 42 * - ICU does not distinguish between different versions of JIS X 0208.
20 */ 43 */
21 +#if UCONFIG_NO_NON_HTML5_CONVERSION 44 +#if UCONFIG_ONLY_HTML_CONVERSION
22 +enum { MAX_JA_VERSION=0 }; 45 +enum { MAX_JA_VERSION=0 };
23 +#else 46 +#else
24 enum { MAX_JA_VERSION=4 }; 47 enum { MAX_JA_VERSION=4 };
25 +#endif 48 +#endif
26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 49 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
27 +/*
28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it.
29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
30 + */
31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 50 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 51 +#if !UCONFIG_ONLY_HTML_CONVERSION
33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 52 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 53 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 54 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 55 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
37 +#endif 56 +#endif
38 }; 57 };
39 58
40 typedef enum { 59 typedef enum {
41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES _2022] = { 60 @@ -358,15 +370,16 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES _2022] = {
42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022 61 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022
43 }; 62 };
44 63
45 - 64 -
46 /* Type def for refactoring changeState_2022 code*/ 65 /* Type def for refactoring changeState_2022 code*/
47 typedef enum{ 66 typedef enum{
48 #ifdef U_ENABLE_GENERIC_ISO_2022 67 #ifdef U_ENABLE_GENERIC_ISO_2022
49 ISO_2022=0, 68 ISO_2022=0,
50 #endif 69 #endif
51 +#if UCONFIG_NO_NON_HTML5_CONVERSION
52 + ISO_2022_JP=1
53 +#else
54 ISO_2022_JP=1, 70 ISO_2022_JP=1,
71 +#if !UCONFIG_ONLY_HTML_CONVERSION
55 ISO_2022_KR=2, 72 ISO_2022_KR=2,
56 ISO_2022_CN=3 73 ISO_2022_CN=3
57 +#endif 74 +#endif
58 } Variant2022; 75 } Variant2022;
59 76
60 /*********** ISO 2022 Converter Protos ***********/ 77 /*********** ISO 2022 Converter Protos ***********/
61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U ErrorCode *errorCode){ 78 @@ -397,8 +410,11 @@ namespace {
62 /* prevent indexing beyond jpCharsetMasks[] */ 79
63 myConverterData->version = version = 0; 80 /*const UConverterSharedData _ISO2022Data;*/
64 } 81 extern const UConverterSharedData _ISO2022JPData;
65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 82 +
66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 83 +#if !UCONFIG_ONLY_HTML_CONVERSION
67 myConverterData->myConverterArray[ISO8859_7] = 84 extern const UConverterSharedData _ISO2022KRData;
68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 85 extern const UConverterSharedData _ISO2022CNData;
69 }
70 +#endif
71 myConverterData->myConverterArray[JISX208] =
72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro rCode);
73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
74 if(jpCharsetMasks[version]&CSM(JISX212)) {
75 myConverterData->myConverterArray[JISX212] =
76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e rrorCode);
77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
78 myConverterData->myConverterArray[KSC5601] =
79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e rrorCode);
80 }
81 +#endif 86 +#endif
82 87
83 /* set the function pointers to appropriate funtions */ 88 } // namespace
84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 89
85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){ 90 @@ -511,6 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' ); 91 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' );
87 myConverterData->name[len+1]='\0'; 92 myConverterData->name[len+1]='\0';
88 } 93 }
89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 94 +#if !UCONFIG_ONLY_HTML_CONVERSION
90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 95 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
91 (myLocale[2]=='_' || myLocale[2]=='\0')) 96 (myLocale[2]=='_' || myLocale[2]=='\0'))
92 { 97 {
93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){ 98 @@ -580,6 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2"); 99 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2");
95 } 100 }
96 } 101 }
97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION 102 +#endif // !UCONFIG_ONLY_HTML_CONVERSION
98 else{ 103 else{
99 #ifdef U_ENABLE_GENERIC_ISO_2022 104 #ifdef U_ENABLE_GENERIC_ISO_2022
100 myConverterData->isFirstBuffer = TRUE; 105 myConverterData->isFirstBuffer = TRUE;
101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { 106 @@ -714,6 +732,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 107 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
103 }; 108 };
104 109
105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 110 +#if !UCONFIG_ONLY_HTML_CONVERSION
106 /*************** to unicode *******************/ 111 /*************** to unicode *******************/
107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 112 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
108 /* 0 1 2 3 4 5 6 7 8 9 * / 113 /* 0 1 2 3 4 5 6 7 8 9 * /
109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 114 @@ -726,6 +745,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE 115 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE
111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 116 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
112 }; 117 };
113 +#endif 118 +#endif
114 119
115 120
116 static UCNV_TableStates_2022 121 static UCNV_TableStates_2022
117 @@ -878,6 +903,7 @@ DONE: 122 @@ -898,6 +918,7 @@ DONE:
118 }
119 break;
120 /* case SS3_STATE: not used in ISO-2022-JP-x */
121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
122 case ISO8859_1:
123 case ISO8859_7:
124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
125 @@ -887,6 +913,7 @@ DONE:
126 myData2022->toU2022State.cs[2]=(int8_t)tempState;
127 }
128 break;
129 +#endif
130 default:
131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
133 @@ -898,6 +925,7 @@ DONE:
134 } 123 }
135 } 124 }
136 break; 125 break;
137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 126 +#if !UCONFIG_ONLY_HTML_CONVERSION
138 case ISO_2022_CN: 127 case ISO_2022_CN:
139 { 128 {
140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 129 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
141 @@ -959,6 +987,7 @@ DONE: 130 @@ -959,6 +980,7 @@ DONE:
142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 131 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
143 } 132 }
144 break; 133 break;
145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 134 +#endif // !UCONFIG_ONLY_HTML_CONVERSION
146 135
147 default: 136 default:
148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 137 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv, 138 @@ -1001,6 +1023,7 @@ DONE:
150 static const StateEnum jpCharsetPref[]={ 139 }
151 ASCII, 140 }
152 JISX201, 141
153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 142 +#if !UCONFIG_ONLY_HTML_CONVERSION
154 ISO8859_1, 143 /*Checks the characters of the buffer against valid 2022 escape sequences
155 ISO8859_7, 144 *if the match we return a pointer to the initial start of the sequence otherwis e
145 *we return sourceLimit
146 @@ -1055,7 +1078,7 @@ getEndOfBuffer_2022(const char** source,
147 return mySource;
148 #endif
149 }
150 -
156 +#endif 151 +#endif
157 JISX208,
158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
159 JISX212,
160 GB2312,
161 KSC5601,
162 +#endif
163 HWKANA_7BIT
164 };
165 152
166 @@ -1754,6 +1787,7 @@ getTrail: 153 /* This inline function replicates code in _MBCSFromUChar32() function in ucnvm bcs.c
167 g = 0; 154 * any future change in _MBCSFromUChar32() function should be reflected here.
168 } 155 @@ -2269,6 +2292,7 @@ endloop:
169 break;
170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
171 case ISO8859_1:
172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
173 targetValue = (uint32_t)sourceChar - 0x80;
174 @@ -1762,6 +1796,7 @@ getTrail:
175 g = 2;
176 }
177 break;
178 +#endif
179 case HWKANA_7BIT:
180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H WKANA_START)) {
181 if(converterData->version==3) {
182 @@ -1823,6 +1858,7 @@ getTrail:
183 useFallback = FALSE;
184 }
185 break;
186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
187 case ISO8859_7:
188 /* G0 SBCS forced to 7-bit output */
189 len2 = MBCS_SINGLE_FROM_UCHAR32(
190 @@ -1837,6 +1873,7 @@ getTrail:
191 useFallback = FALSE;
192 }
193 break;
194 +#endif
195 default:
196 /* G0 DBCS */
197 len2 = MBCS_FROM_UCHAR32_ISO2022(
198 @@ -1844,6 +1881,7 @@ getTrail:
199 sourceChar, &value,
200 useFallback, MBCS_OUTPUT_2);
201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
203 if(cs0 == KSC5601) {
204 /*
205 * Check for valid bytes for the encoding scheme.
206 @@ -1855,6 +1893,7 @@ getTrail:
207 break;
208 }
209 }
210 +#endif
211 targetValue = value;
212 len = len2;
213 cs = cs0;
214 @@ -2148,6 +2187,7 @@ escape:
215 targetUniChar = mySourceChar;
216 }
217 break;
218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
219 case ISO8859_1:
220 if(mySourceChar <= 0x7f) {
221 targetUniChar = mySourceChar + 0x80;
222 @@ -2166,6 +2206,7 @@ escape:
223 /* return from a single-shift state to the previous one */
224 pToU2022State->g=pToU2022State->prevG;
225 break;
226 +#endif
227 case JISX201:
228 if(mySourceChar <= 0x7f) {
229 targetUniChar = jisx201ToU(mySourceChar);
230 @@ -2205,9 +2246,11 @@ getTrailByte:
231 } else {
232 /* Copy before we modify tmpSourceChar so toUni codeCallback() sees the correct bytes. */
233 mySourceChar = tmpSourceChar;
234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
235 if (cs == KSC5601) {
236 tmpSourceChar += 0x8080; /* = _2022ToGR94D BCS(tmpSourceChar) */
237 }
238 +#endif
239 tempBuf[0] = (char)(tmpSourceChar >> 8);
240 tempBuf[1] = (char)(tmpSourceChar);
241 }
242 @@ -2269,6 +2312,7 @@ endloop:
243 } 156 }
244 157
245 158
246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 159 +#if !UCONFIG_ONLY_HTML_CONVERSION
247 /*************************************************************** 160 /***************************************************************
248 * Rules for ISO-2022-KR encoding 161 * Rules for ISO-2022-KR encoding
249 * i) The KSC5601 designator sequence should appear only once in a file, 162 * i) The KSC5601 designator sequence should appear only once in a file,
250 @@ -3412,6 +3456,7 @@ endloop: 163 @@ -3412,6 +3436,7 @@ endloop:
251 args->target = myTarget; 164 args->target = myTarget;
252 args->source = mySource; 165 args->source = mySource;
253 } 166 }
254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 167 +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
255 168
256 static void 169 static void
257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError Code *err) { 170 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError Code *err) {
258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, 171 @@ -3638,6 +3663,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
259 /* include JIS X 0201 which is hardcoded */
260 sa->add(sa->set, 0xa5);
261 sa->add(sa->set, 0x203e);
262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
264 /* include Latin-1 for some variants of JP */
265 sa->addRange(sa->set, 0, 0xff);
266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
267 /* include ASCII for JP */
268 sa->addRange(sa->set, 0, 0x7f);
269 }
270 +#else
271 + /* include ASCII for JP */
272 + sa->addRange(sa->set, 0, 0x7f);
273 +#endif
274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_ AND_FALLBACK_SET) {
275 /*
276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))! =0
277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 172 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
279 } 173 }
280 break; 174 break;
281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 175 +#if !UCONFIG_ONLY_HTML_CONVERSION
282 case 'c': 176 case 'c':
283 case 'z': 177 case 'z':
284 /* include ASCII for CN */ 178 /* include ASCII for CN */
285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, 179 @@ -3649,6 +3675,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
286 cnvData->currentConverter, sa, which, pErrorCode); 180 cnvData->currentConverter, sa, which, pErrorCode);
287 /* the loop over myConverterArray[] will simply not find another conver ter */ 181 /* the loop over myConverterArray[] will simply not find another conver ter */
288 break; 182 break;
289 +#endif 183 +#endif
290 default: 184 default:
291 break; 185 break;
292 } 186 }
293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, 187 @@ -3669,9 +3696,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 188 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
295 UConverterSetFilter filter; 189 UConverterSetFilter filter;
296 if(cnvData->myConverterArray[i]!=NULL) { 190 if(cnvData->myConverterArray[i]!=NULL) {
297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 191 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
298 - cnvData->version==0 && i==CNS_11643 192 - cnvData->version==0 && i==CNS_11643
299 - ) { 193 - ) {
300 + if(cnvData->locale[0]=='j' && i==JISX208) { 194 + if(cnvData->locale[0]=='j' && i==JISX208) {
301 + /* 195 + /*
302 + * Only add code points that map to Shift-JIS codes 196 + * Only add code points that map to Shift-JIS codes
303 + * corresponding to JIS X 0208. 197 + * corresponding to JIS X 0208.
304 + */ 198 + */
305 + filter=UCNV_SET_FILTER_SJIS; 199 + filter=UCNV_SET_FILTER_SJIS;
306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 200 +#if !UCONFIG_ONLY_HTML_CONVERSION
307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 201 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
308 + cnvData->version==0 && i==CNS_11643) { 202 + cnvData->version==0 && i==CNS_11643) {
309 /* 203 /*
310 * Version-specific for CN: 204 * Version-specific for CN:
311 * CN version 0 does not map CNS planes 3..7 although 205 * CN version 0 does not map CNS planes 3..7 although
312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, 206 @@ -3680,18 +3713,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
313 * The two versions create different Unicode sets. 207 * The two versions create different Unicode sets.
314 */ 208 */
315 filter=UCNV_SET_FILTER_2022_CN; 209 filter=UCNV_SET_FILTER_2022_CN;
316 - } else if(cnvData->locale[0]=='j' && i==JISX208) { 210 - } else if(cnvData->locale[0]=='j' && i==JISX208) {
317 - /* 211 - /*
318 - * Only add code points that map to Shift-JIS codes 212 - * Only add code points that map to Shift-JIS codes
319 - * corresponding to JIS X 0208. 213 - * corresponding to JIS X 0208.
320 - */ 214 - */
321 - filter=UCNV_SET_FILTER_SJIS; 215 - filter=UCNV_SET_FILTER_SJIS;
322 } else if(i==KSC5601) { 216 } else if(i==KSC5601) {
323 /* 217 /*
324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables) 218 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables)
325 * are broader than GR94. 219 * are broader than GR94.
326 */ 220 */
327 filter=UCNV_SET_FILTER_GR94DBCS; 221 filter=UCNV_SET_FILTER_GR94DBCS;
328 +#endif 222 +#endif
329 } else { 223 } else {
330 filter=UCNV_SET_FILTER_NONE; 224 filter=UCNV_SET_FILTER_NONE;
331 } 225 }
332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={ 226 @@ -3813,6 +3841,7 @@ const UConverterSharedData _ISO2022JPData=
333 227
334 } // namespace 228 } // namespace
335 229
336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 230 +#if !UCONFIG_ONLY_HTML_CONVERSION
337 /************* KR ***************/ 231 /************* KR ***************/
338 static const UConverterImpl _ISO2022KRImpl={ 232 static const UConverterImpl _ISO2022KRImpl={
339 UCNV_ISO_2022, 233 UCNV_ISO_2022,
340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={ 234 @@ -3913,5 +3942,6 @@ const UConverterSharedData _ISO2022CNData=
341 }; 235 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022 CNImpl);
342 236
343 } // namespace 237 } // namespace
344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 238 +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */
345 239
346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 240 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp 241 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp
348 index 4940310..047f18a 100644 242 index ee1cf9e..a70021b 100644
349 --- a/source/common/ucnv_bld.cpp 243 --- a/source/common/ucnv_bld.cpp
350 +++ b/source/common/ucnv_bld.cpp 244 +++ b/source/common/ucnv_bld.cpp
351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ 245 @@ -1,11 +1,11 @@
246 /*
247 ********************************************************************
248 * COPYRIGHT:
249 - * Copyright (c) 1996-2013, International Business Machines Corporation and
250 + * Copyright (c) 1996-2015, International Business Machines Corporation and
251 * others. All Rights Reserved.
252 ********************************************************************
253 *
254 - * uconv_bld.cpp:
255 + * ucnv_bld.cpp:
256 *
257 * Defines functions that are used in the creation/initialization/deletion
258 * of converters and related structures.
259 @@ -64,33 +64,51 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
260 #endif
261
262 &_Latin1Data,
263 - &_UTF8Data, &_UTF16BEData, &_UTF16LEData, &_UTF32BEData, &_UTF32LEData,
264 + &_UTF8Data, &_UTF16BEData, &_UTF16LEData,
265 +#if UCONFIG_ONLY_HTML_CONVERSION
266 + NULL, NULL,
267 +#else
268 + &_UTF32BEData, &_UTF32LEData,
269 +#endif
270 NULL,
352 271
353 #if UCONFIG_NO_LEGACY_CONVERSION 272 #if UCONFIG_NO_LEGACY_CONVERSION
354 NULL, 273 NULL,
355 +#else 274 +#else
356 + &_ISO2022Data, 275 + &_ISO2022Data,
357 +#endif 276 +#endif
358 + 277 +
359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 278 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
360 NULL, NULL, NULL, NULL, NULL, NULL, 279 NULL, NULL, NULL, NULL, NULL, NULL,
361 NULL, NULL, NULL, NULL, NULL, NULL, 280 NULL, NULL, NULL, NULL, NULL, NULL,
362 NULL, 281 NULL,
363 #else 282 #else
364 - &_ISO2022Data, 283 - &_ISO2022Data,
365 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC SData6, 284 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC SData6,
366 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC SData19, 285 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC SData19,
367 &_HZData, 286 &_HZData,
368 #endif 287 #endif
369 288
370 +#if UCONFIG_NO_NON_HTML5_CONVERSION 289 +#if UCONFIG_ONLY_HTML_CONVERSION
371 + NULL, 290 + NULL,
372 +#else 291 +#else
373 &_SCSUData, 292 &_SCSUData,
374 +#endif 293 +#endif
375 294
376 -#if UCONFIG_NO_LEGACY_CONVERSION 295 -#if UCONFIG_NO_LEGACY_CONVERSION
377 + 296 +
378 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 297 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
379 NULL, 298 NULL,
380 #else 299 #else
381 &_ISCIIData, 300 &_ISCIIData,
382 #endif 301 #endif
383 302
384 &_ASCIIData, 303 &_ASCIIData,
385 +#if UCONFIG_NO_NON_HTML5_CONVERSION 304 +#if UCONFIG_ONLY_HTML_CONVERSION
386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, 305 + NULL, NULL, &_UTF16Data, NULL, NULL, NULL,
387 +#else 306 +#else
388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 307 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
389 +#endif 308 +#endif
390 309
391 -#if UCONFIG_NO_LEGACY_CONVERSION 310 -#if UCONFIG_NO_LEGACY_CONVERSION
392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 311 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION
393 NULL, 312 NULL,
394 #else 313 #else
395 &_CompoundTextData 314 &_CompoundTextData
396 @@ -105,18 +118,24 @@ static struct { 315 @@ -105,18 +123,24 @@ static struct {
397 const char *name; 316 const char *name;
398 const UConverterType type; 317 const UConverterType type;
399 } const cnvNameType[] = { 318 } const cnvNameType[] = {
400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 319 +#if !UCONFIG_ONLY_HTML_CONVERSION
401 { "bocu1", UCNV_BOCU1 }, 320 { "bocu1", UCNV_BOCU1 },
402 { "cesu8", UCNV_CESU8 }, 321 { "cesu8", UCNV_CESU8 },
403 -#if !UCONFIG_NO_LEGACY_CONVERSION 322 -#if !UCONFIG_NO_LEGACY_CONVERSION
404 +#endif 323 +#endif
405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 324 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
406 { "hz",UCNV_HZ }, 325 { "hz",UCNV_HZ },
407 #endif 326 #endif
408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 327 +#if !UCONFIG_ONLY_HTML_CONVERSION
409 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 328 { "imapmailboxname", UCNV_IMAP_MAILBOX },
410 -#if !UCONFIG_NO_LEGACY_CONVERSION 329 -#if !UCONFIG_NO_LEGACY_CONVERSION
411 +#endif 330 +#endif
412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 331 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
413 { "iscii", UCNV_ISCII }, 332 { "iscii", UCNV_ISCII },
414 +#endif 333 +#endif
415 +#if !UCONFIG_NO_LEGACY_CONVERSION 334 +#if !UCONFIG_NO_LEGACY_CONVERSION
416 { "iso2022", UCNV_ISO_2022 }, 335 { "iso2022", UCNV_ISO_2022 },
417 #endif 336 #endif
418 { "iso88591", UCNV_LATIN_1 }, 337 { "iso88591", UCNV_LATIN_1 },
419 -#if !UCONFIG_NO_LEGACY_CONVERSION 338 -#if !UCONFIG_NO_LEGACY_CONVERSION
420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 339 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
421 { "lmbcs1", UCNV_LMBCS_1 }, 340 { "lmbcs1", UCNV_LMBCS_1 },
422 { "lmbcs11",UCNV_LMBCS_11 }, 341 { "lmbcs11",UCNV_LMBCS_11 },
423 { "lmbcs16",UCNV_LMBCS_16 }, 342 { "lmbcs16",UCNV_LMBCS_16 },
424 @@ -130,7 +149,9 @@ static struct { 343 @@ -130,7 +154,9 @@ static struct {
425 { "lmbcs6", UCNV_LMBCS_6 }, 344 { "lmbcs6", UCNV_LMBCS_6 },
426 { "lmbcs8", UCNV_LMBCS_8 }, 345 { "lmbcs8", UCNV_LMBCS_8 },
427 #endif 346 #endif
428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 347 +#if !UCONFIG_ONLY_HTML_CONVERSION
429 { "scsu", UCNV_SCSU }, 348 { "scsu", UCNV_SCSU },
430 +#endif 349 +#endif
431 { "usascii", UCNV_US_ASCII }, 350 { "usascii", UCNV_US_ASCII },
432 { "utf16", UCNV_UTF16 }, 351 { "utf16", UCNV_UTF16 },
433 { "utf16be", UCNV_UTF16_BigEndian }, 352 { "utf16be", UCNV_UTF16_BigEndian },
434 @@ -152,9 +173,13 @@ static struct { 353 @@ -142,6 +168,7 @@ static struct {
354 { "utf16oppositeendian", UCNV_UTF16_BigEndian},
355 { "utf16platformendian", UCNV_UTF16_LittleEndian },
356 #endif
357 +#if !UCONFIG_ONLY_HTML_CONVERSION
358 { "utf32", UCNV_UTF32 },
359 { "utf32be", UCNV_UTF32_BigEndian },
360 { "utf32le", UCNV_UTF32_LittleEndian },
361 @@ -152,9 +179,14 @@ static struct {
435 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 362 { "utf32oppositeendian", UCNV_UTF32_BigEndian },
436 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 363 { "utf32platformendian", UCNV_UTF32_LittleEndian },
437 #endif 364 #endif
438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 365 +#endif
366 +#if !UCONFIG_ONLY_HTML_CONVERSION
439 { "utf7", UCNV_UTF7 }, 367 { "utf7", UCNV_UTF7 },
440 +#endif 368 +#endif
441 { "utf8", UCNV_UTF8 }, 369 { "utf8", UCNV_UTF8 },
442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 370 +#if !UCONFIG_ONLY_HTML_CONVERSION
443 { "x11compoundtext", UCNV_COMPOUND_TEXT} 371 { "x11compoundtext", UCNV_COMPOUND_TEXT}
444 +#endif 372 +#endif
445 }; 373 };
446 374
447 375
448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h
449 index 402e2c9..5fad446 100644
450 --- a/source/common/ucnv_cnv.h
451 +++ b/source/common/ucnv_cnv.h
452 @@ -256,11 +256,15 @@ struct UConverterImpl {
453 extern const UConverterSharedData
454 _MBCSData, _Latin1Data,
455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
456 - _ISO2022Data,
457 + _ISO2022Data,
458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6 ,
460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1 9,
461 _HZData,_ISCIIData, _SCSUData, _ASCIIData,
462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp oundTextData;
463 +#else
464 + _ASCIIData, _UTF16Data, _UTF32Data;
465 +#endif
466
467 U_CDECL_END
468
469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c 376 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c
470 index ec0e9c2..e723fa6 100644 377 index ecba6c6..91d66e1 100644
471 --- a/source/common/ucnv_ct.c 378 --- a/source/common/ucnv_ct.c
472 +++ b/source/common/ucnv_ct.c 379 +++ b/source/common/ucnv_ct.c
380 @@ -1,6 +1,6 @@
381 /*
382 **********************************************************************
383 -* Copyright (C) 2010-2014, International Business Machines
384 +* Copyright (C) 2010-2015, International Business Machines
385 * Corporation and others. All Rights Reserved.
386 **********************************************************************
387 * file name: ucnv_ct.c
473 @@ -14,7 +14,7 @@ 388 @@ -14,7 +14,7 @@
474 389
475 #include "unicode/utypes.h" 390 #include "unicode/utypes.h"
476 391
477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 392 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION 393 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT ML_CONVERSION
479 394
480 #include "unicode/ucnv.h" 395 #include "unicode/ucnv.h"
481 #include "unicode/uset.h" 396 #include "unicode/uset.h"
482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c 397 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c
483 index 1d921dd..a4fccee 100644 398 index 81ac9f9..01d0aa1 100644
484 --- a/source/common/ucnv_lmb.c 399 --- a/source/common/ucnv_lmb.c
485 +++ b/source/common/ucnv_lmb.c 400 +++ b/source/common/ucnv_lmb.c
401 @@ -1,6 +1,6 @@
402 /*
403 **********************************************************************
404 -* Copyright (C) 2000-2014, International Business Machines
405 +* Copyright (C) 2000-2015, International Business Machines
406 * Corporation and others. All Rights Reserved.
407 **********************************************************************
408 * file name: ucnv_lmb.cpp
486 @@ -25,7 +25,7 @@ 409 @@ -25,7 +25,7 @@
487 410
488 #include "unicode/utypes.h" 411 #include "unicode/utypes.h"
489 412
490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 413 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION 414 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT ML_CONVERSION
492 415
493 #include "unicode/ucnv_err.h" 416 #include "unicode/ucnv_err.h"
494 #include "unicode/ucnv.h" 417 #include "unicode/ucnv.h"
495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c 418 diff --git a/source/common/ucnv_u32.c b/source/common/ucnv_u32.c
496 index 42943f4..6466b87 100644 419 index 49d6746..b6804ca 100644
497 --- a/source/common/ucnv_u7.c 420 --- a/source/common/ucnv_u32.c
498 +++ b/source/common/ucnv_u7.c 421 +++ b/source/common/ucnv_u32.c
422 @@ -1,6 +1,6 @@
423 /*
424 **********************************************************************
425 -* Copyright (C) 2002-2011, International Business Machines
426 +* Copyright (C) 2002-2015, International Business Machines
427 * Corporation and others. All Rights Reserved.
428 **********************************************************************
429 * file name: ucnv_u32.c
499 @@ -16,7 +16,7 @@ 430 @@ -16,7 +16,7 @@
500 431
501 #include "unicode/utypes.h" 432 #include "unicode/utypes.h"
502 433
503 -#if !UCONFIG_NO_CONVERSION 434 -#if !UCONFIG_NO_CONVERSION
504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 435 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
436
437 #include "unicode/ucnv.h"
438 #include "unicode/utf.h"
439 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c
440 index fe39028..d35bae2 100644
441 --- a/source/common/ucnv_u7.c
442 +++ b/source/common/ucnv_u7.c
443 @@ -1,6 +1,6 @@
444 /*
445 **********************************************************************
446 -* Copyright (C) 2002-2011, International Business Machines
447 +* Copyright (C) 2002-2015, International Business Machines
448 * Corporation and others. All Rights Reserved.
449 **********************************************************************
450 * file name: ucnv_u7.c
451 @@ -16,7 +16,7 @@
452
453 #include "unicode/utypes.h"
454
455 -#if !UCONFIG_NO_CONVERSION
456 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
505 457
506 #include "unicode/ucnv.h" 458 #include "unicode/ucnv.h"
507 #include "ucnv_bld.h" 459 #include "ucnv_bld.h"
508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c 460 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c
509 index 8ee9fe5..24205f5 100644 461 index 2d045d4..b785873 100644
510 --- a/source/common/ucnv_u8.c 462 --- a/source/common/ucnv_u8.c
511 +++ b/source/common/ucnv_u8.c 463 +++ b/source/common/ucnv_u8.c
464 @@ -1,6 +1,6 @@
465 /*
466 **********************************************************************
467 -* Copyright (C) 2002-2012, International Business Machines
468 +* Copyright (C) 2002-2015, International Business Machines
469 * Corporation and others. All Rights Reserved.
470 **********************************************************************
471 * file name: ucnv_u8.c
512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { 472 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
513 static const uint32_t 473 static const uint32_t
514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 474 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
515 475
516 +static UBool hasCESU8Data(const UConverter *cnv) 476 +static UBool hasCESU8Data(const UConverter *cnv)
517 +{ 477 +{
518 +#if UCONFIG_NO_NON_HTML5_CONVERSION 478 +#if UCONFIG_ONLY_HTML_CONVERSION
519 + return FALSE; 479 + return FALSE;
520 +#else 480 +#else
521 + return (UBool)(cnv->sharedData == &_CESU8Data); 481 + return (UBool)(cnv->sharedData == &_CESU8Data);
522 +#endif 482 +#endif
523 +} 483 +}
524 + 484 +
525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 485 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
526 UErrorCode * err) 486 UErrorCode * err)
527 { 487 {
528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 488 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
(...skipping 30 matching lines...) Expand all
559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter FromUnicodeArgs * ar 519 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter FromUnicodeArgs * ar
560 int32_t offsetNum, nextSourceIndex; 520 int32_t offsetNum, nextSourceIndex;
561 int32_t indexToWrite; 521 int32_t indexToWrite;
562 uint8_t tempBuf[4]; 522 uint8_t tempBuf[4];
563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 523 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
564 + UBool isNotCESU8 = !hasCESU8Data(cnv); 524 + UBool isNotCESU8 = !hasCESU8Data(cnv);
565 525
566 if (cnv->fromUChar32 && myTarget < targetLimit) 526 if (cnv->fromUChar32 && myTarget < targetLimit)
567 { 527 {
568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp 528 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp
569 index b97d666..281d6d9 100644 529 index 5497f7d..b37d12c 100644
570 --- a/source/common/ucnvbocu.cpp 530 --- a/source/common/ucnvbocu.cpp
571 +++ b/source/common/ucnvbocu.cpp 531 +++ b/source/common/ucnvbocu.cpp
532 @@ -1,7 +1,7 @@
533 /*
534 ******************************************************************************
535 *
536 -* Copyright (C) 2002-2011, International Business Machines
537 +* Copyright (C) 2002-2015, International Business Machines
538 * Corporation and others. All Rights Reserved.
539 *
540 ******************************************************************************
572 @@ -19,7 +19,7 @@ 541 @@ -19,7 +19,7 @@
573 542
574 #include "unicode/utypes.h" 543 #include "unicode/utypes.h"
575 544
576 -#if !UCONFIG_NO_CONVERSION 545 -#if !UCONFIG_NO_CONVERSION
577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 546 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
578 547
579 #include "unicode/ucnv.h" 548 #include "unicode/ucnv.h"
580 #include "unicode/ucnv_cb.h" 549 #include "unicode/ucnv_cb.h"
581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c 550 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c
582 index 3760c39..51825e2 100644 551 index 4a89c47..4ca2e78 100644
583 --- a/source/common/ucnvhz.c 552 --- a/source/common/ucnvhz.c
584 +++ b/source/common/ucnvhz.c 553 +++ b/source/common/ucnvhz.c
554 @@ -1,6 +1,6 @@
555 /*
556 **********************************************************************
557 -* Copyright (C) 2000-2014, International Business Machines
558 +* Copyright (C) 2000-2015, International Business Machines
559 * Corporation and others. All Rights Reserved.
560 **********************************************************************
561 * file name: ucnvhz.c
585 @@ -16,7 +16,7 @@ 562 @@ -16,7 +16,7 @@
586 563
587 #include "unicode/utypes.h" 564 #include "unicode/utypes.h"
588 565
589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 566 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION 567 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT ML_CONVERSION
591 568
592 #include "cmemory.h" 569 #include "cmemory.h"
593 #include "unicode/ucnv.h" 570 #include "unicode/ucnv.h"
594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={ 571 @@ -626,4 +626,4 @@ static const UConverterStaticData _HZStaticData={
595 0 572 const UConverterSharedData _HZData=
596 }; 573 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl);
597 574
598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 575 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_NO_NON_HTML5_CONVERSION */ 576 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_ONLY_HTML_CONVERSION */
600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c 577 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c
601 index fe61d40..16fd0a3 100644 578 index 8f0e312..d34d088 100644
602 --- a/source/common/ucnvisci.c 579 --- a/source/common/ucnvisci.c
603 +++ b/source/common/ucnvisci.c 580 +++ b/source/common/ucnvisci.c
581 @@ -1,6 +1,6 @@
582 /*
583 **********************************************************************
584 -* Copyright (C) 2000-2012, International Business Machines
585 +* Copyright (C) 2000-2015, International Business Machines
586 * Corporation and others. All Rights Reserved.
587 **********************************************************************
588 * file name: ucnvisci.c
604 @@ -17,7 +17,7 @@ 589 @@ -17,7 +17,7 @@
605 590
606 #include "unicode/utypes.h" 591 #include "unicode/utypes.h"
607 592
608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 593 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION 594 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HT ML_CONVERSION
610 595
611 #include "unicode/ucnv.h" 596 #include "unicode/ucnv.h"
612 #include "unicode/ucnv_cb.h" 597 #include "unicode/ucnv_cb.h"
613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c 598 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c
614 index c6e96e1..a6f8c9e 100644 599 index 3ced9e4..1aacd81 100644
615 --- a/source/common/ucnvscsu.c 600 --- a/source/common/ucnvscsu.c
616 +++ b/source/common/ucnvscsu.c 601 +++ b/source/common/ucnvscsu.c
602 @@ -1,7 +1,7 @@
603 /*
604 ******************************************************************************
605 *
606 -* Copyright (C) 2000-2011, International Business Machines
607 +* Copyright (C) 2000-2015, International Business Machines
608 * Corporation and others. All Rights Reserved.
609 *
610 ******************************************************************************
617 @@ -21,7 +21,7 @@ 611 @@ -21,7 +21,7 @@
618 612
619 #include "unicode/utypes.h" 613 #include "unicode/utypes.h"
620 614
621 -#if !UCONFIG_NO_CONVERSION 615 -#if !UCONFIG_NO_CONVERSION
622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 616 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
623 617
624 #include "unicode/ucnv.h" 618 #include "unicode/ucnv.h"
625 #include "unicode/ucnv_cb.h" 619 #include "unicode/ucnv_cb.h"
626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h 620 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h
627 index ed073b6..8df56e6 100644 621 index ed073b6..f6223bb 100644
628 --- a/source/common/unicode/uconfig.h 622 --- a/source/common/unicode/uconfig.h
629 +++ b/source/common/unicode/uconfig.h 623 +++ b/source/common/unicode/uconfig.h
630 @@ -270,6 +270,14 @@ 624 @@ -1,6 +1,6 @@
625 /*
626 **********************************************************************
627 -* Copyright (C) 2002-2014, International Business Machines
628 +* Copyright (C) 2002-2015, International Business Machines
629 * Corporation and others. All Rights Reserved.
630 **********************************************************************
631 * file name: uconfig.h
632 @@ -200,7 +200,7 @@
633 * It does not turn off legacy conversion because that is necessary
634 * for ICU to work on EBCDIC platforms (for the default converter).
635 * If you want "only collation" and do not build for EBCDIC,
636 - * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well.
637 + * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
638 *
639 * @stable ICU 2.4
640 */
641 @@ -270,6 +270,21 @@
631 #endif 642 #endif
632 643
633 /** 644 /**
634 + * This switch turns off all the converters NOT listed in 645 + * \def UCONFIG_ONLY_HTML_CONVERSION
635 + * the encoding standard : http://encoding.spec.whatwg.org 646 + * This switch turns off all of the converters NOT listed in
647 + * the HTML encoding standard:
648 + * http://www.w3.org/TR/encoding/#names-and-labels
649 + *
650 + * This is not possible on EBCDIC platforms
651 + * because they need ibm-37 or ibm-1047 default converters.
652 + *
653 + * @draft ICU 55
636 + */ 654 + */
637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION 655 +#ifndef UCONFIG_ONLY_HTML_CONVERSION
638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 656 +# define UCONFIG_ONLY_HTML_CONVERSION 0
639 +#endif 657 +#endif
640 + 658 +
641 +/** 659 +/**
642 * \def UCONFIG_NO_LEGACY_CONVERSION 660 * \def UCONFIG_NO_LEGACY_CONVERSION
643 * This switch turns off all converters except for 661 * This switch turns off all converters except for
644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) 662 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h
646 index a817262..89becca 100644
647 --- a/source/common/unicode/urename.h
648 +++ b/source/common/unicode/urename.h
649 @@ -73,12 +73,14 @@
650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
659 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
660 +#endif
661 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
662 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
663 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
664 @@ -94,14 +96,18 @@
665 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
666 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
667 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
668 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
669 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
670 +#endif
671 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
672 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
679 +#endif
680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp 663 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp
684 index 3efbd49..ba5b18c 100644 664 index 3efbd49..66d8f3a 100644
685 --- a/source/i18n/csdetect.cpp 665 --- a/source/i18n/csdetect.cpp
686 +++ b/source/i18n/csdetect.cpp 666 +++ b/source/i18n/csdetect.cpp
667 @@ -1,6 +1,6 @@
668 /*
669 **********************************************************************
670 - * Copyright (C) 2005-2013, International Business Machines
671 + * Copyright (C) 2005-2015, International Business Machines
672 * Corporation and others. All Rights Reserved.
673 **********************************************************************
674 */
687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { 675 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), 676 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
689 677
690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), 678 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 679 +#if !UCONFIG_ONLY_HTML_CONVERSION
692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), 680 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), 681 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
694 682
695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { 683 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), 684 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), 685 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) 686 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
699 +#endif 687 +#endif
700 }; 688 };
701 int32_t rCount = ARRAY_SIZE(tempArray); 689 int32_t rCount = ARRAY_SIZE(tempArray);
702 690
703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp 691 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp
704 index 3db0bc9..be3eafa 100644 692 index 3db0bc9..236a526 100644
705 --- a/source/i18n/csr2022.cpp 693 --- a/source/i18n/csr2022.cpp
706 +++ b/source/i18n/csr2022.cpp 694 +++ b/source/i18n/csr2022.cpp
695 @@ -1,6 +1,6 @@
696 /*
697 **********************************************************************
698 - * Copyright (C) 2005-2012, International Business Machines
699 + * Copyright (C) 2005-2015, International Business Machines
700 * Corporation and others. All Rights Reserved.
701 **********************************************************************
702 */
707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { 703 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 704 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
709 }; 705 };
710 706
711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 707 +#if !UCONFIG_ONLY_HTML_CONVERSION
712 static const uint8_t escapeSequences_2022KR[][5] = { 708 static const uint8_t escapeSequences_2022KR[][5] = {
713 {0x1b, 0x24, 0x29, 0x43, 0x00} 709 {0x1b, 0x24, 0x29, 0x43, 0x00}
714 }; 710 };
715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { 711 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 712 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 713 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
718 }; 714 };
719 +#endif 715 +#endif
720 716
721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} 717 CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
722 718
723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM atch *results) const 719 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM atch *results) const
724 return (confidence > 0); 720 return (confidence > 0);
725 } 721 }
726 722
727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 723 +#if !UCONFIG_ONLY_HTML_CONVERSION
728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} 724 CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
729 725
730 const char *CharsetRecog_2022KR::getName() const { 726 const char *CharsetRecog_2022KR::getName() const {
731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM atch *results) const 727 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM atch *results) const
732 results->set(textIn, this, confidence); 728 results->set(textIn, this, confidence);
733 return (confidence > 0); 729 return (confidence > 0);
734 } 730 }
735 +#endif 731 +#endif
736 732
737 CharsetRecog_2022::~CharsetRecog_2022() { 733 CharsetRecog_2022::~CharsetRecog_2022() {
738 // nothing to do 734 // nothing to do
739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h 735 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h
740 index 2ac2b87..dad22c7 100644 736 index 2ac2b87..f63e337 100644
741 --- a/source/i18n/csr2022.h 737 --- a/source/i18n/csr2022.h
742 +++ b/source/i18n/csr2022.h 738 +++ b/source/i18n/csr2022.h
739 @@ -1,6 +1,6 @@
740 /*
741 **********************************************************************
742 - * Copyright (C) 2005-2012, International Business Machines
743 + * Copyright (C) 2005-2015, International Business Machines
744 * Corporation and others. All Rights Reserved.
745 **********************************************************************
746 */
743 @@ -65,6 +65,7 @@ public: 747 @@ -65,6 +65,7 @@ public:
744 UBool match(InputText *textIn, CharsetMatch *results) const; 748 UBool match(InputText *textIn, CharsetMatch *results) const;
745 }; 749 };
746 750
747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 751 +#if !UCONFIG_ONLY_HTML_CONVERSION
748 class CharsetRecog_2022KR :public CharsetRecog_2022 { 752 class CharsetRecog_2022KR :public CharsetRecog_2022 {
749 public: 753 public:
750 virtual ~CharsetRecog_2022KR(); 754 virtual ~CharsetRecog_2022KR();
751 @@ -84,6 +85,7 @@ public: 755 @@ -84,6 +85,7 @@ public:
752 756
753 UBool match(InputText *textIn, CharsetMatch *results) const; 757 UBool match(InputText *textIn, CharsetMatch *results) const;
754 }; 758 };
755 +#endif 759 +#endif
756 760
757 U_NAMESPACE_END 761 U_NAMESPACE_END
758 762
759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp 763 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp
760 index d03367c..7b70dc1 100644 764 index d03367c..72fb959 100644
761 --- a/source/i18n/csrsbcs.cpp 765 --- a/source/i18n/csrsbcs.cpp
762 +++ b/source/i18n/csrsbcs.cpp 766 +++ b/source/i18n/csrsbcs.cpp
767 @@ -1,6 +1,6 @@
768 /*
769 **********************************************************************
770 - * Copyright (C) 2005-2013, International Business Machines
771 + * Copyright (C) 2005-2015, International Business Machines
772 * Corporation and others. All Rights Reserved.
773 **********************************************************************
774 */
763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) 775 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
764 return (int32_t) (rawPercent * 300.0); 776 return (int32_t) (rawPercent * 300.0);
765 } 777 }
766 778
767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 779 +#if !UCONFIG_ONLY_HTML_CONVERSION
768 static const uint8_t unshapeMap_IBM420[] = { 780 static const uint8_t unshapeMap_IBM420[] = {
769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 781 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40, 782 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40,
771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) 783 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
772 } 784 }
773 } 785 }
774 } 786 }
775 +#endif 787 +#endif
776 788
777 CharsetRecog_sbcs::CharsetRecog_sbcs() 789 CharsetRecog_sbcs::CharsetRecog_sbcs()
778 { 790 {
779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { 791 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 792 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
781 }; 793 };
782 794
783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 795 +#if !UCONFIG_ONLY_HTML_CONVERSION
784 static const int32_t ngrams_IBM424_he_rtl[] = { 796 static const int32_t ngrams_IBM424_he_rtl[] = {
785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641, 797 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641,
786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045, 798 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045,
787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { 799 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF, 800 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF,
789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40, 801 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40,
790 }; 802 };
791 +#endif 803 +#endif
792 804
793 //ISO-8859-1,2,5,6,7,8,9 Ngrams 805 //ISO-8859-1,2,5,6,7,8,9 Ngrams
794 806
795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse tMatch *results) const 807 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse tMatch *results) const
796 return (confidence > 0); 808 return (confidence > 0);
797 } 809 }
798 810
799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 811 +#if !UCONFIG_ONLY_HTML_CONVERSION
800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() 812 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
801 { 813 {
802 // nothing to do 814 // nothing to do
803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results 815 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
804 results->set(textIn, this, confidence); 816 results->set(textIn, this, confidence);
805 return (confidence > 0); 817 return (confidence > 0);
806 } 818 }
807 +#endif 819 +#endif
808 820
809 U_NAMESPACE_END 821 U_NAMESPACE_END
810 #endif 822 #endif
811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h 823 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h
812 index 2579c02..7789f9b 100644 824 index 2579c02..bd2a264 100644
813 --- a/source/i18n/csrsbcs.h 825 --- a/source/i18n/csrsbcs.h
814 +++ b/source/i18n/csrsbcs.h 826 +++ b/source/i18n/csrsbcs.h
827 @@ -1,6 +1,6 @@
828 /*
829 **********************************************************************
830 - * Copyright (C) 2005-2013, International Business Machines
831 + * Copyright (C) 2005-2015, International Business Machines
832 * Corporation and others. All Rights Reserved.
833 **********************************************************************
834 */
815 @@ -50,6 +50,7 @@ public: 835 @@ -50,6 +50,7 @@ public:
816 836
817 }; 837 };
818 838
819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 839 +#if !UCONFIG_ONLY_HTML_CONVERSION
820 class NGramParser_IBM420 : public NGramParser 840 class NGramParser_IBM420 : public NGramParser
821 { 841 {
822 private: 842 private:
823 @@ -61,6 +62,7 @@ private: 843 @@ -61,6 +62,7 @@ private:
824 public: 844 public:
825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); 845 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
826 }; 846 };
827 +#endif 847 +#endif
828 848
829 849
830 class CharsetRecog_sbcs : public CharsetRecognizer 850 class CharsetRecog_sbcs : public CharsetRecognizer
831 @@ -229,6 +231,7 @@ public: 851 @@ -229,6 +231,7 @@ public:
832 virtual UBool match(InputText *det, CharsetMatch *results) const; 852 virtual UBool match(InputText *det, CharsetMatch *results) const;
833 }; 853 };
834 854
835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 855 +#if !UCONFIG_ONLY_HTML_CONVERSION
836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs 856 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
837 { 857 {
838 public: 858 public:
839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42 0_ar { 859 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42 0_ar {
840 860
841 virtual UBool match(InputText *det, CharsetMatch *results) const; 861 virtual UBool match(InputText *det, CharsetMatch *results) const;
842 }; 862 };
843 +#endif 863 +#endif
844 864
845 U_NAMESPACE_END 865 U_NAMESPACE_END
846 866
OLDNEW
« no previous file with comments | « icu.gyp ('k') | source/common/ucnv2022.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698