Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(248)

Side by Side Diff: patches/uconv.patch

Issue 1637683002: ICU 56 step 5: Apply the remaining local patches (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@56post56
Patch Set: drop uconv.patch Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « patches/linebrk.patch ('k') | patches/vscomp.patch » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
2 index 9556dd2..7b8df9b 100644
3 --- a/source/common/ucnv2022.cpp
4 +++ b/source/common/ucnv2022.cpp
5 @@ -152,7 +152,11 @@ typedef enum {
6 } StateEnum;
7
8 /* is the StateEnum charset value for a DBCS charset? */
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION
10 +#define IS_JP_DBCS(cs) (JISX208==(cs))
11 +#else
12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
13 +#endif
14
15 #define CSM(cs) ((uint16_t)1<<(cs))
16
17 @@ -165,13 +169,23 @@ typedef enum {
18 * all versions, not just JIS7 and JIS8.
19 * - ICU does not distinguish between different versions of JIS X 0208.
20 */
21 +#if UCONFIG_NO_NON_HTML5_CONVERSION
22 +enum { MAX_JA_VERSION=0 };
23 +#else
24 enum { MAX_JA_VERSION=4 };
25 +#endif
26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
27 +/*
28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it.
29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
30 + */
31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
37 +#endif
38 };
39
40 typedef enum {
41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES _2022] = {
42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022
43 };
44
45 -
46 /* Type def for refactoring changeState_2022 code*/
47 typedef enum{
48 #ifdef U_ENABLE_GENERIC_ISO_2022
49 ISO_2022=0,
50 #endif
51 +#if UCONFIG_NO_NON_HTML5_CONVERSION
52 + ISO_2022_JP=1
53 +#else
54 ISO_2022_JP=1,
55 ISO_2022_KR=2,
56 ISO_2022_CN=3
57 +#endif
58 } Variant2022;
59
60 /*********** ISO 2022 Converter Protos ***********/
61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U ErrorCode *errorCode){
62 /* prevent indexing beyond jpCharsetMasks[] */
63 myConverterData->version = version = 0;
64 }
65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
67 myConverterData->myConverterArray[ISO8859_7] =
68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
69 }
70 +#endif
71 myConverterData->myConverterArray[JISX208] =
72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro rCode);
73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
74 if(jpCharsetMasks[version]&CSM(JISX212)) {
75 myConverterData->myConverterArray[JISX212] =
76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e rrorCode);
77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
78 myConverterData->myConverterArray[KSC5601] =
79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e rrorCode);
80 }
81 +#endif
82
83 /* set the function pointers to appropriate funtions */
84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' );
87 myConverterData->name[len+1]='\0';
88 }
89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
91 (myLocale[2]=='_' || myLocale[2]=='\0'))
92 {
93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2");
95 }
96 }
97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
98 else{
99 #ifdef U_ENABLE_GENERIC_ISO_2022
100 myConverterData->isFirstBuffer = TRUE;
101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
103 };
104
105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
106 /*************** to unicode *******************/
107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
108 /* 0 1 2 3 4 5 6 7 8 9 * /
109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE
111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
112 };
113 +#endif
114
115
116 static UCNV_TableStates_2022
117 @@ -878,6 +903,7 @@ DONE:
118 }
119 break;
120 /* case SS3_STATE: not used in ISO-2022-JP-x */
121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
122 case ISO8859_1:
123 case ISO8859_7:
124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
125 @@ -887,6 +913,7 @@ DONE:
126 myData2022->toU2022State.cs[2]=(int8_t)tempState;
127 }
128 break;
129 +#endif
130 default:
131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
133 @@ -898,6 +925,7 @@ DONE:
134 }
135 }
136 break;
137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
138 case ISO_2022_CN:
139 {
140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
141 @@ -959,6 +987,7 @@ DONE:
142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
143 }
144 break;
145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
146
147 default:
148 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv,
150 static const StateEnum jpCharsetPref[]={
151 ASCII,
152 JISX201,
153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
154 ISO8859_1,
155 ISO8859_7,
156 +#endif
157 JISX208,
158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
159 JISX212,
160 GB2312,
161 KSC5601,
162 +#endif
163 HWKANA_7BIT
164 };
165
166 @@ -1754,6 +1787,7 @@ getTrail:
167 g = 0;
168 }
169 break;
170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
171 case ISO8859_1:
172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
173 targetValue = (uint32_t)sourceChar - 0x80;
174 @@ -1762,6 +1796,7 @@ getTrail:
175 g = 2;
176 }
177 break;
178 +#endif
179 case HWKANA_7BIT:
180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H WKANA_START)) {
181 if(converterData->version==3) {
182 @@ -1823,6 +1858,7 @@ getTrail:
183 useFallback = FALSE;
184 }
185 break;
186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
187 case ISO8859_7:
188 /* G0 SBCS forced to 7-bit output */
189 len2 = MBCS_SINGLE_FROM_UCHAR32(
190 @@ -1837,6 +1873,7 @@ getTrail:
191 useFallback = FALSE;
192 }
193 break;
194 +#endif
195 default:
196 /* G0 DBCS */
197 len2 = MBCS_FROM_UCHAR32_ISO2022(
198 @@ -1844,6 +1881,7 @@ getTrail:
199 sourceChar, &value,
200 useFallback, MBCS_OUTPUT_2);
201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
203 if(cs0 == KSC5601) {
204 /*
205 * Check for valid bytes for the encoding scheme.
206 @@ -1855,6 +1893,7 @@ getTrail:
207 break;
208 }
209 }
210 +#endif
211 targetValue = value;
212 len = len2;
213 cs = cs0;
214 @@ -2148,6 +2187,7 @@ escape:
215 targetUniChar = mySourceChar;
216 }
217 break;
218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
219 case ISO8859_1:
220 if(mySourceChar <= 0x7f) {
221 targetUniChar = mySourceChar + 0x80;
222 @@ -2166,6 +2206,7 @@ escape:
223 /* return from a single-shift state to the previous one */
224 pToU2022State->g=pToU2022State->prevG;
225 break;
226 +#endif
227 case JISX201:
228 if(mySourceChar <= 0x7f) {
229 targetUniChar = jisx201ToU(mySourceChar);
230 @@ -2205,9 +2246,11 @@ getTrailByte:
231 } else {
232 /* Copy before we modify tmpSourceChar so toUni codeCallback() sees the correct bytes. */
233 mySourceChar = tmpSourceChar;
234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
235 if (cs == KSC5601) {
236 tmpSourceChar += 0x8080; /* = _2022ToGR94D BCS(tmpSourceChar) */
237 }
238 +#endif
239 tempBuf[0] = (char)(tmpSourceChar >> 8);
240 tempBuf[1] = (char)(tmpSourceChar);
241 }
242 @@ -2269,6 +2312,7 @@ endloop:
243 }
244
245
246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
247 /***************************************************************
248 * Rules for ISO-2022-KR encoding
249 * i) The KSC5601 designator sequence should appear only once in a file,
250 @@ -3412,6 +3456,7 @@ endloop:
251 args->target = myTarget;
252 args->source = mySource;
253 }
254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
255
256 static void
257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError Code *err) {
258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
259 /* include JIS X 0201 which is hardcoded */
260 sa->add(sa->set, 0xa5);
261 sa->add(sa->set, 0x203e);
262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
264 /* include Latin-1 for some variants of JP */
265 sa->addRange(sa->set, 0, 0xff);
266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
267 /* include ASCII for JP */
268 sa->addRange(sa->set, 0, 0x7f);
269 }
270 +#else
271 + /* include ASCII for JP */
272 + sa->addRange(sa->set, 0, 0x7f);
273 +#endif
274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_ AND_FALLBACK_SET) {
275 /*
276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))! =0
277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
278 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
279 }
280 break;
281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
282 case 'c':
283 case 'z':
284 /* include ASCII for CN */
285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
286 cnvData->currentConverter, sa, which, pErrorCode);
287 /* the loop over myConverterArray[] will simply not find another conver ter */
288 break;
289 +#endif
290 default:
291 break;
292 }
293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
295 UConverterSetFilter filter;
296 if(cnvData->myConverterArray[i]!=NULL) {
297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
298 - cnvData->version==0 && i==CNS_11643
299 - ) {
300 + if(cnvData->locale[0]=='j' && i==JISX208) {
301 + /*
302 + * Only add code points that map to Shift-JIS codes
303 + * corresponding to JIS X 0208.
304 + */
305 + filter=UCNV_SET_FILTER_SJIS;
306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
308 + cnvData->version==0 && i==CNS_11643) {
309 /*
310 * Version-specific for CN:
311 * CN version 0 does not map CNS planes 3..7 although
312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
313 * The two versions create different Unicode sets.
314 */
315 filter=UCNV_SET_FILTER_2022_CN;
316 - } else if(cnvData->locale[0]=='j' && i==JISX208) {
317 - /*
318 - * Only add code points that map to Shift-JIS codes
319 - * corresponding to JIS X 0208.
320 - */
321 - filter=UCNV_SET_FILTER_SJIS;
322 } else if(i==KSC5601) {
323 /*
324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables)
325 * are broader than GR94.
326 */
327 filter=UCNV_SET_FILTER_GR94DBCS;
328 +#endif
329 } else {
330 filter=UCNV_SET_FILTER_NONE;
331 }
332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={
333
334 } // namespace
335
336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
337 /************* KR ***************/
338 static const UConverterImpl _ISO2022KRImpl={
339 UCNV_ISO_2022,
340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={
341 };
342
343 } // namespace
344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
345
346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp
348 index 4940310..047f18a 100644
349 --- a/source/common/ucnv_bld.cpp
350 +++ b/source/common/ucnv_bld.cpp
351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
352
353 #if UCONFIG_NO_LEGACY_CONVERSION
354 NULL,
355 +#else
356 + &_ISO2022Data,
357 +#endif
358 +
359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
360 NULL, NULL, NULL, NULL, NULL, NULL,
361 NULL, NULL, NULL, NULL, NULL, NULL,
362 NULL,
363 #else
364 - &_ISO2022Data,
365 &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBC SData6,
366 &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBC SData19,
367 &_HZData,
368 #endif
369
370 +#if UCONFIG_NO_NON_HTML5_CONVERSION
371 + NULL,
372 +#else
373 &_SCSUData,
374 +#endif
375
376 -#if UCONFIG_NO_LEGACY_CONVERSION
377 +
378 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
379 NULL,
380 #else
381 &_ISCIIData,
382 #endif
383
384 &_ASCIIData,
385 +#if UCONFIG_NO_NON_HTML5_CONVERSION
386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL,
387 +#else
388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
389 +#endif
390
391 -#if UCONFIG_NO_LEGACY_CONVERSION
392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
393 NULL,
394 #else
395 &_CompoundTextData
396 @@ -105,18 +118,24 @@ static struct {
397 const char *name;
398 const UConverterType type;
399 } const cnvNameType[] = {
400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
401 { "bocu1", UCNV_BOCU1 },
402 { "cesu8", UCNV_CESU8 },
403 -#if !UCONFIG_NO_LEGACY_CONVERSION
404 +#endif
405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
406 { "hz",UCNV_HZ },
407 #endif
408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
409 { "imapmailboxname", UCNV_IMAP_MAILBOX },
410 -#if !UCONFIG_NO_LEGACY_CONVERSION
411 +#endif
412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
413 { "iscii", UCNV_ISCII },
414 +#endif
415 +#if !UCONFIG_NO_LEGACY_CONVERSION
416 { "iso2022", UCNV_ISO_2022 },
417 #endif
418 { "iso88591", UCNV_LATIN_1 },
419 -#if !UCONFIG_NO_LEGACY_CONVERSION
420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
421 { "lmbcs1", UCNV_LMBCS_1 },
422 { "lmbcs11",UCNV_LMBCS_11 },
423 { "lmbcs16",UCNV_LMBCS_16 },
424 @@ -130,7 +149,9 @@ static struct {
425 { "lmbcs6", UCNV_LMBCS_6 },
426 { "lmbcs8", UCNV_LMBCS_8 },
427 #endif
428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
429 { "scsu", UCNV_SCSU },
430 +#endif
431 { "usascii", UCNV_US_ASCII },
432 { "utf16", UCNV_UTF16 },
433 { "utf16be", UCNV_UTF16_BigEndian },
434 @@ -152,9 +173,13 @@ static struct {
435 { "utf32oppositeendian", UCNV_UTF32_BigEndian },
436 { "utf32platformendian", UCNV_UTF32_LittleEndian },
437 #endif
438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
439 { "utf7", UCNV_UTF7 },
440 +#endif
441 { "utf8", UCNV_UTF8 },
442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
443 { "x11compoundtext", UCNV_COMPOUND_TEXT}
444 +#endif
445 };
446
447
448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h
449 index 402e2c9..5fad446 100644
450 --- a/source/common/ucnv_cnv.h
451 +++ b/source/common/ucnv_cnv.h
452 @@ -256,11 +256,15 @@ struct UConverterImpl {
453 extern const UConverterSharedData
454 _MBCSData, _Latin1Data,
455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
456 - _ISO2022Data,
457 + _ISO2022Data,
458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6 ,
460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1 9,
461 _HZData,_ISCIIData, _SCSUData, _ASCIIData,
462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp oundTextData;
463 +#else
464 + _ASCIIData, _UTF16Data, _UTF32Data;
465 +#endif
466
467 U_CDECL_END
468
469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c
470 index ec0e9c2..e723fa6 100644
471 --- a/source/common/ucnv_ct.c
472 +++ b/source/common/ucnv_ct.c
473 @@ -14,7 +14,7 @@
474
475 #include "unicode/utypes.h"
476
477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
479
480 #include "unicode/ucnv.h"
481 #include "unicode/uset.h"
482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c
483 index 1d921dd..a4fccee 100644
484 --- a/source/common/ucnv_lmb.c
485 +++ b/source/common/ucnv_lmb.c
486 @@ -25,7 +25,7 @@
487
488 #include "unicode/utypes.h"
489
490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
492
493 #include "unicode/ucnv_err.h"
494 #include "unicode/ucnv.h"
495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c
496 index 42943f4..6466b87 100644
497 --- a/source/common/ucnv_u7.c
498 +++ b/source/common/ucnv_u7.c
499 @@ -16,7 +16,7 @@
500
501 #include "unicode/utypes.h"
502
503 -#if !UCONFIG_NO_CONVERSION
504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
505
506 #include "unicode/ucnv.h"
507 #include "ucnv_bld.h"
508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c
509 index 8ee9fe5..24205f5 100644
510 --- a/source/common/ucnv_u8.c
511 +++ b/source/common/ucnv_u8.c
512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
513 static const uint32_t
514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
515
516 +static UBool hasCESU8Data(const UConverter *cnv)
517 +{
518 +#if UCONFIG_NO_NON_HTML5_CONVERSION
519 + return FALSE;
520 +#else
521 + return (UBool)(cnv->sharedData == &_CESU8Data);
522 +#endif
523 +}
524 +
525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
526 UErrorCode * err)
527 {
528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
529 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
530 const UChar *targetLimit = args->targetLimit;
531 unsigned char *toUBytes = cnv->toUBytes;
532 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
533 + UBool isCESU8 = hasCESU8Data(cnv);
534 uint32_t ch, ch2 = 0;
535 int32_t i, inBytes;
536 -
537 +
538 /* Restore size of current sequence */
539 if (cnv->toUnicodeStatus && myTarget < targetLimit)
540 {
541 @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToU nicodeArgs * args,
542 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
543 const UChar *targetLimit = args->targetLimit;
544 unsigned char *toUBytes = cnv->toUBytes;
545 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
546 + UBool isCESU8 = hasCESU8Data(cnv);
547 uint32_t ch, ch2 = 0;
548 int32_t i, inBytes;
549
550 @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArg s * args,
551 UChar32 ch;
552 uint8_t tempBuf[4];
553 int32_t indexToWrite;
554 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
555 + UBool isNotCESU8 = !hasCESU8Data(cnv);
556
557 if (cnv->fromUChar32 && myTarget < targetLimit)
558 {
559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter FromUnicodeArgs * ar
560 int32_t offsetNum, nextSourceIndex;
561 int32_t indexToWrite;
562 uint8_t tempBuf[4];
563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
564 + UBool isNotCESU8 = !hasCESU8Data(cnv);
565
566 if (cnv->fromUChar32 && myTarget < targetLimit)
567 {
568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp
569 index b97d666..281d6d9 100644
570 --- a/source/common/ucnvbocu.cpp
571 +++ b/source/common/ucnvbocu.cpp
572 @@ -19,7 +19,7 @@
573
574 #include "unicode/utypes.h"
575
576 -#if !UCONFIG_NO_CONVERSION
577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
578
579 #include "unicode/ucnv.h"
580 #include "unicode/ucnv_cb.h"
581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c
582 index 3760c39..51825e2 100644
583 --- a/source/common/ucnvhz.c
584 +++ b/source/common/ucnvhz.c
585 @@ -16,7 +16,7 @@
586
587 #include "unicode/utypes.h"
588
589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
591
592 #include "cmemory.h"
593 #include "unicode/ucnv.h"
594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={
595 0
596 };
597
598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_NO_NON_HTML5_CONVERSION */
600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c
601 index fe61d40..16fd0a3 100644
602 --- a/source/common/ucnvisci.c
603 +++ b/source/common/ucnvisci.c
604 @@ -17,7 +17,7 @@
605
606 #include "unicode/utypes.h"
607
608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
610
611 #include "unicode/ucnv.h"
612 #include "unicode/ucnv_cb.h"
613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c
614 index c6e96e1..a6f8c9e 100644
615 --- a/source/common/ucnvscsu.c
616 +++ b/source/common/ucnvscsu.c
617 @@ -21,7 +21,7 @@
618
619 #include "unicode/utypes.h"
620
621 -#if !UCONFIG_NO_CONVERSION
622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
623
624 #include "unicode/ucnv.h"
625 #include "unicode/ucnv_cb.h"
626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h
627 index ed073b6..8df56e6 100644
628 --- a/source/common/unicode/uconfig.h
629 +++ b/source/common/unicode/uconfig.h
630 @@ -270,6 +270,14 @@
631 #endif
632
633 /**
634 + * This switch turns off all the converters NOT listed in
635 + * the encoding standard : http://encoding.spec.whatwg.org
636 + */
637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0
639 +#endif
640 +
641 +/**
642 * \def UCONFIG_NO_LEGACY_CONVERSION
643 * This switch turns off all converters except for
644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h
646 index a817262..89becca 100644
647 --- a/source/common/unicode/urename.h
648 +++ b/source/common/unicode/urename.h
649 @@ -73,12 +73,14 @@
650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
659 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
660 +#endif
661 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
662 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
663 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
664 @@ -94,14 +96,18 @@
665 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
666 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
667 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
668 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
669 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
670 +#endif
671 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
672 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
679 +#endif
680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp
684 index 3efbd49..ba5b18c 100644
685 --- a/source/i18n/csdetect.cpp
686 +++ b/source/i18n/csdetect.cpp
687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
689
690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
694
695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
699 +#endif
700 };
701 int32_t rCount = ARRAY_SIZE(tempArray);
702
703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp
704 index 3db0bc9..be3eafa 100644
705 --- a/source/i18n/csr2022.cpp
706 +++ b/source/i18n/csr2022.cpp
707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
709 };
710
711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
712 static const uint8_t escapeSequences_2022KR[][5] = {
713 {0x1b, 0x24, 0x29, 0x43, 0x00}
714 };
715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
718 };
719 +#endif
720
721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
722
723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM atch *results) const
724 return (confidence > 0);
725 }
726
727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
729
730 const char *CharsetRecog_2022KR::getName() const {
731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM atch *results) const
732 results->set(textIn, this, confidence);
733 return (confidence > 0);
734 }
735 +#endif
736
737 CharsetRecog_2022::~CharsetRecog_2022() {
738 // nothing to do
739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h
740 index 2ac2b87..dad22c7 100644
741 --- a/source/i18n/csr2022.h
742 +++ b/source/i18n/csr2022.h
743 @@ -65,6 +65,7 @@ public:
744 UBool match(InputText *textIn, CharsetMatch *results) const;
745 };
746
747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
748 class CharsetRecog_2022KR :public CharsetRecog_2022 {
749 public:
750 virtual ~CharsetRecog_2022KR();
751 @@ -84,6 +85,7 @@ public:
752
753 UBool match(InputText *textIn, CharsetMatch *results) const;
754 };
755 +#endif
756
757 U_NAMESPACE_END
758
759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp
760 index d03367c..7b70dc1 100644
761 --- a/source/i18n/csrsbcs.cpp
762 +++ b/source/i18n/csrsbcs.cpp
763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
764 return (int32_t) (rawPercent * 300.0);
765 }
766
767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
768 static const uint8_t unshapeMap_IBM420[] = {
769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40,
771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
772 }
773 }
774 }
775 +#endif
776
777 CharsetRecog_sbcs::CharsetRecog_sbcs()
778 {
779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
781 };
782
783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
784 static const int32_t ngrams_IBM424_he_rtl[] = {
785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641,
786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045,
787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF,
789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40,
790 };
791 +#endif
792
793 //ISO-8859-1,2,5,6,7,8,9 Ngrams
794
795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse tMatch *results) const
796 return (confidence > 0);
797 }
798
799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
801 {
802 // nothing to do
803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
804 results->set(textIn, this, confidence);
805 return (confidence > 0);
806 }
807 +#endif
808
809 U_NAMESPACE_END
810 #endif
811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h
812 index 2579c02..7789f9b 100644
813 --- a/source/i18n/csrsbcs.h
814 +++ b/source/i18n/csrsbcs.h
815 @@ -50,6 +50,7 @@ public:
816
817 };
818
819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
820 class NGramParser_IBM420 : public NGramParser
821 {
822 private:
823 @@ -61,6 +62,7 @@ private:
824 public:
825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
826 };
827 +#endif
828
829
830 class CharsetRecog_sbcs : public CharsetRecognizer
831 @@ -229,6 +231,7 @@ public:
832 virtual UBool match(InputText *det, CharsetMatch *results) const;
833 };
834
835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
837 {
838 public:
839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42 0_ar {
840
841 virtual UBool match(InputText *det, CharsetMatch *results) const;
842 };
843 +#endif
844
845 U_NAMESPACE_END
846
OLDNEW
« no previous file with comments | « patches/linebrk.patch ('k') | patches/vscomp.patch » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698