Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Side by Side Diff: patches/uconv.patch

Issue 839713003: ICU update to 54 step 3 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: fix big5 mapping Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « README.chromium ('k') | scripts/big5_gen.sh » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 Index: source/common/ucnv2022.cpp 1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp
2 =================================================================== 2 index 9556dd2..7b8df9b 100644
3 --- source/common/ucnv2022.cpp» (revision 259715) 3 --- a/source/common/ucnv2022.cpp
4 +++ source/common/ucnv2022.cpp» (working copy) 4 +++ b/source/common/ucnv2022.cpp
5 @@ -154,7 +154,11 @@ 5 @@ -152,7 +152,11 @@ typedef enum {
6 } StateEnum; 6 } StateEnum;
7 7
8 /* is the StateEnum charset value for a DBCS charset? */ 8 /* is the StateEnum charset value for a DBCS charset? */
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION 9 +#if UCONFIG_NO_NON_HTML5_CONVERSION
10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) 10 +#define IS_JP_DBCS(cs) (JISX208==(cs))
11 +#else 11 +#else
12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) 12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)
13 +#endif 13 +#endif
14 14
15 #define CSM(cs) ((uint16_t)1<<(cs)) 15 #define CSM(cs) ((uint16_t)1<<(cs))
16 16
17 @@ -167,13 +171,23 @@ 17 @@ -165,13 +169,23 @@ typedef enum {
18 * all versions, not just JIS7 and JIS8. 18 * all versions, not just JIS7 and JIS8.
19 * - ICU does not distinguish between different versions of JIS X 0208. 19 * - ICU does not distinguish between different versions of JIS X 0208.
20 */ 20 */
21 +#if UCONFIG_NO_NON_HTML5_CONVERSION 21 +#if UCONFIG_NO_NON_HTML5_CONVERSION
22 +enum { MAX_JA_VERSION=0 }; 22 +enum { MAX_JA_VERSION=0 };
23 +#else 23 +#else
24 enum { MAX_JA_VERSION=4 }; 24 enum { MAX_JA_VERSION=4 };
25 +#endif 25 +#endif
26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
27 +/* 27 +/*
28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. 28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it.
29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885
30 + */ 30 + */
31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23 12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
37 +#endif 37 +#endif
38 }; 38 };
39 39
40 typedef enum { 40 typedef enum {
41 @@ -360,15 +374,18 @@ 41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES _2022] = {
42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022 42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202 2 ,VALID_TERMINAL_2022
43 }; 43 };
44 44
45 - 45 -
46 /* Type def for refactoring changeState_2022 code*/ 46 /* Type def for refactoring changeState_2022 code*/
47 typedef enum{ 47 typedef enum{
48 #ifdef U_ENABLE_GENERIC_ISO_2022 48 #ifdef U_ENABLE_GENERIC_ISO_2022
49 ISO_2022=0, 49 ISO_2022=0,
50 #endif 50 #endif
51 +#if UCONFIG_NO_NON_HTML5_CONVERSION 51 +#if UCONFIG_NO_NON_HTML5_CONVERSION
52 + ISO_2022_JP=1 52 + ISO_2022_JP=1
53 +#else 53 +#else
54 ISO_2022_JP=1, 54 ISO_2022_JP=1,
55 ISO_2022_KR=2, 55 ISO_2022_KR=2,
56 ISO_2022_CN=3 56 ISO_2022_CN=3
57 +#endif 57 +#endif
58 } Variant2022; 58 } Variant2022;
59 59
60 /*********** ISO 2022 Converter Protos ***********/ 60 /*********** ISO 2022 Converter Protos ***********/
61 @@ -485,12 +502,15 @@ 61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U ErrorCode *errorCode){
62 /* prevent indexing beyond jpCharsetMasks[] */ 62 /* prevent indexing beyond jpCharsetMasks[] */
63 myConverterData->version = version = 0; 63 myConverterData->version = version = 0;
64 } 64 }
65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
67 myConverterData->myConverterArray[ISO8859_7] = 67 myConverterData->myConverterArray[ISO8859_7] =
68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
69 } 69 }
70 +#endif 70 +#endif
71 myConverterData->myConverterArray[JISX208] = 71 myConverterData->myConverterArray[JISX208] =
72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro rCode); 72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro rCode);
73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
74 if(jpCharsetMasks[version]&CSM(JISX212)) { 74 if(jpCharsetMasks[version]&CSM(JISX212)) {
75 myConverterData->myConverterArray[JISX212] = 75 myConverterData->myConverterArray[JISX212] =
76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e rrorCode); 76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e rrorCode);
77 @@ -503,6 +523,7 @@ 77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
78 myConverterData->myConverterArray[KSC5601] = 78 myConverterData->myConverterArray[KSC5601] =
79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e rrorCode); 79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e rrorCode);
80 } 80 }
81 +#endif 81 +#endif
82 82
83 /* set the function pointers to appropriate funtions */ 83 /* set the function pointers to appropriate funtions */
84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
85 @@ -513,6 +534,7 @@ 85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' ); 86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0' );
87 myConverterData->name[len+1]='\0'; 87 myConverterData->name[len+1]='\0';
88 } 88 }
89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
91 (myLocale[2]=='_' || myLocale[2]=='\0')) 91 (myLocale[2]=='_' || myLocale[2]=='\0'))
92 { 92 {
93 @@ -582,6 +604,7 @@ 93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr rorCode *errorCode){
94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2"); 94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver sion=2");
95 } 95 }
96 } 96 }
97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION 97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION
98 else{ 98 else{
99 #ifdef U_ENABLE_GENERIC_ISO_2022 99 #ifdef U_ENABLE_GENERIC_ISO_2022
100 myConverterData->isFirstBuffer = TRUE; 100 myConverterData->isFirstBuffer = TRUE;
101 @@ -716,6 +739,7 @@ 101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= {
102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
103 }; 103 };
104 104
105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
106 /*************** to unicode *******************/ 106 /*************** to unicode *******************/
107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { 107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
108 /* 0 1 2 3 4 5 6 7 8 9 * / 108 /* 0 1 2 3 4 5 6 7 8 9 * /
109 @@ -728,6 +752,7 @@ 109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= {
110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE 110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST ATE
111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE 111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE
112 }; 112 };
113 +#endif 113 +#endif
114 114
115 115
116 static UCNV_TableStates_2022 116 static UCNV_TableStates_2022
117 @@ -880,6 +905,7 @@ 117 @@ -878,6 +903,7 @@ DONE:
118 } 118 }
119 break; 119 break;
120 /* case SS3_STATE: not used in ISO-2022-JP-x */ 120 /* case SS3_STATE: not used in ISO-2022-JP-x */
121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
122 case ISO8859_1: 122 case ISO8859_1:
123 case ISO8859_7: 123 case ISO8859_7:
124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) { 124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
125 @@ -889,6 +915,7 @@ 125 @@ -887,6 +913,7 @@ DONE:
126 myData2022->toU2022State.cs[2]=(int8_t)tempState; 126 myData2022->toU2022State.cs[2]=(int8_t)tempState;
127 } 127 }
128 break; 128 break;
129 +#endif 129 +#endif
130 default: 130 default:
131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) { 131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) = = 0) {
132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
133 @@ -900,6 +927,7 @@ 133 @@ -898,6 +925,7 @@ DONE:
134 } 134 }
135 } 135 }
136 break; 136 break;
137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
138 case ISO_2022_CN: 138 case ISO_2022_CN:
139 { 139 {
140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; 140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset];
141 @@ -961,6 +989,7 @@ 141 @@ -959,6 +987,7 @@ DONE:
142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; 142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE;
143 } 143 }
144 break; 144 break;
145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
146 146
147 default: 147 default:
148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; 148 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
149 @@ -1381,12 +1410,16 @@ 149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv,
150 static const StateEnum jpCharsetPref[]={ 150 static const StateEnum jpCharsetPref[]={
151 ASCII, 151 ASCII,
152 JISX201, 152 JISX201,
153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
154 ISO8859_1, 154 ISO8859_1,
155 ISO8859_7, 155 ISO8859_7,
156 +#endif 156 +#endif
157 JISX208, 157 JISX208,
158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
159 JISX212, 159 JISX212,
160 GB2312, 160 GB2312,
161 KSC5601, 161 KSC5601,
162 +#endif 162 +#endif
163 HWKANA_7BIT 163 HWKANA_7BIT
164 }; 164 };
165 165
166 @@ -1756,6 +1789,7 @@ 166 @@ -1754,6 +1787,7 @@ getTrail:
167 g = 0; 167 g = 0;
168 } 168 }
169 break; 169 break;
170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
171 case ISO8859_1: 171 case ISO8859_1:
172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { 172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) {
173 targetValue = (uint32_t)sourceChar - 0x80; 173 targetValue = (uint32_t)sourceChar - 0x80;
174 @@ -1764,6 +1798,7 @@ 174 @@ -1762,6 +1796,7 @@ getTrail:
175 g = 2; 175 g = 2;
176 } 176 }
177 break; 177 break;
178 +#endif 178 +#endif
179 case HWKANA_7BIT: 179 case HWKANA_7BIT:
180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H WKANA_START)) { 180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H WKANA_START)) {
181 if(converterData->version==3) { 181 if(converterData->version==3) {
182 @@ -1825,6 +1860,7 @@ 182 @@ -1823,6 +1858,7 @@ getTrail:
183 useFallback = FALSE; 183 useFallback = FALSE;
184 } 184 }
185 break; 185 break;
186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
187 case ISO8859_7: 187 case ISO8859_7:
188 /* G0 SBCS forced to 7-bit output */ 188 /* G0 SBCS forced to 7-bit output */
189 len2 = MBCS_SINGLE_FROM_UCHAR32( 189 len2 = MBCS_SINGLE_FROM_UCHAR32(
190 @@ -1839,6 +1875,7 @@ 190 @@ -1837,6 +1873,7 @@ getTrail:
191 useFallback = FALSE; 191 useFallback = FALSE;
192 } 192 }
193 break; 193 break;
194 +#endif 194 +#endif
195 default: 195 default:
196 /* G0 DBCS */ 196 /* G0 DBCS */
197 len2 = MBCS_FROM_UCHAR32_ISO2022( 197 len2 = MBCS_FROM_UCHAR32_ISO2022(
198 @@ -1846,6 +1883,7 @@ 198 @@ -1844,6 +1881,7 @@ getTrail:
199 sourceChar, &value, 199 sourceChar, &value,
200 useFallback, MBCS_OUTPUT_2); 200 useFallback, MBCS_OUTPUT_2);
201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ 201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */
202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
203 if(cs0 == KSC5601) { 203 if(cs0 == KSC5601) {
204 /* 204 /*
205 * Check for valid bytes for the encoding scheme. 205 * Check for valid bytes for the encoding scheme.
206 @@ -1857,6 +1895,7 @@ 206 @@ -1855,6 +1893,7 @@ getTrail:
207 break; 207 break;
208 } 208 }
209 } 209 }
210 +#endif 210 +#endif
211 targetValue = value; 211 targetValue = value;
212 len = len2; 212 len = len2;
213 cs = cs0; 213 cs = cs0;
214 @@ -2150,6 +2189,7 @@ 214 @@ -2148,6 +2187,7 @@ escape:
215 targetUniChar = mySourceChar; 215 targetUniChar = mySourceChar;
216 } 216 }
217 break; 217 break;
218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
219 case ISO8859_1: 219 case ISO8859_1:
220 if(mySourceChar <= 0x7f) { 220 if(mySourceChar <= 0x7f) {
221 targetUniChar = mySourceChar + 0x80; 221 targetUniChar = mySourceChar + 0x80;
222 @@ -2168,6 +2208,7 @@ 222 @@ -2166,6 +2206,7 @@ escape:
223 /* return from a single-shift state to the previous one */ 223 /* return from a single-shift state to the previous one */
224 pToU2022State->g=pToU2022State->prevG; 224 pToU2022State->g=pToU2022State->prevG;
225 break; 225 break;
226 +#endif 226 +#endif
227 case JISX201: 227 case JISX201:
228 if(mySourceChar <= 0x7f) { 228 if(mySourceChar <= 0x7f) {
229 targetUniChar = jisx201ToU(mySourceChar); 229 targetUniChar = jisx201ToU(mySourceChar);
230 @@ -2207,9 +2248,11 @@ 230 @@ -2205,9 +2246,11 @@ getTrailByte:
231 } else { 231 } else {
232 /* Copy before we modify tmpSourceChar so toUni codeCallback() sees the correct bytes. */ 232 /* Copy before we modify tmpSourceChar so toUni codeCallback() sees the correct bytes. */
233 mySourceChar = tmpSourceChar; 233 mySourceChar = tmpSourceChar;
234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
235 if (cs == KSC5601) { 235 if (cs == KSC5601) {
236 tmpSourceChar += 0x8080; /* = _2022ToGR94D BCS(tmpSourceChar) */ 236 tmpSourceChar += 0x8080; /* = _2022ToGR94D BCS(tmpSourceChar) */
237 } 237 }
238 +#endif 238 +#endif
239 tempBuf[0] = (char)(tmpSourceChar >> 8); 239 tempBuf[0] = (char)(tmpSourceChar >> 8);
240 tempBuf[1] = (char)(tmpSourceChar); 240 tempBuf[1] = (char)(tmpSourceChar);
241 } 241 }
242 @@ -2271,6 +2314,7 @@ 242 @@ -2269,6 +2312,7 @@ endloop:
243 } 243 }
244 244
245 245
246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
247 /*************************************************************** 247 /***************************************************************
248 * Rules for ISO-2022-KR encoding 248 * Rules for ISO-2022-KR encoding
249 * i) The KSC5601 designator sequence should appear only once in a file, 249 * i) The KSC5601 designator sequence should appear only once in a file,
250 @@ -3414,6 +3458,7 @@ 250 @@ -3412,6 +3456,7 @@ endloop:
251 args->target = myTarget; 251 args->target = myTarget;
252 args->source = mySource; 252 args->source = mySource;
253 } 253 }
254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
255 255
256 static void 256 static void
257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError Code *err) { 257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError Code *err) {
258 @@ -3615,6 +3660,7 @@ 258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
259 /* include JIS X 0201 which is hardcoded */ 259 /* include JIS X 0201 which is hardcoded */
260 sa->add(sa->set, 0xa5); 260 sa->add(sa->set, 0xa5);
261 sa->add(sa->set, 0x203e); 261 sa->add(sa->set, 0x203e);
262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { 263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {
264 /* include Latin-1 for some variants of JP */ 264 /* include Latin-1 for some variants of JP */
265 sa->addRange(sa->set, 0, 0xff); 265 sa->addRange(sa->set, 0, 0xff);
266 @@ -3622,6 +3668,10 @@ 266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
267 /* include ASCII for JP */ 267 /* include ASCII for JP */
268 sa->addRange(sa->set, 0, 0x7f); 268 sa->addRange(sa->set, 0, 0x7f);
269 } 269 }
270 +#else 270 +#else
271 + /* include ASCII for JP */ 271 + /* include ASCII for JP */
272 + sa->addRange(sa->set, 0, 0x7f); 272 + sa->addRange(sa->set, 0, 0x7f);
273 +#endif 273 +#endif
274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_ AND_FALLBACK_SET) { 274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_ AND_FALLBACK_SET) {
275 /* 275 /*
276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))! =0 276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))! =0
277 @@ -3640,6 +3690,7 @@ 277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); 278 sa->addRange(sa->set, HWKANA_START, HWKANA_END);
279 } 279 }
280 break; 280 break;
281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
282 case 'c': 282 case 'c':
283 case 'z': 283 case 'z':
284 /* include ASCII for CN */ 284 /* include ASCII for CN */
285 @@ -3651,6 +3702,7 @@ 285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
286 cnvData->currentConverter, sa, which, pErrorCode); 286 cnvData->currentConverter, sa, which, pErrorCode);
287 /* the loop over myConverterArray[] will simply not find another conver ter */ 287 /* the loop over myConverterArray[] will simply not find another conver ter */
288 break; 288 break;
289 +#endif 289 +#endif
290 default: 290 default:
291 break; 291 break;
292 } 292 }
293 @@ -3671,10 +3723,16 @@ 293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { 294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {
295 UConverterSetFilter filter; 295 UConverterSetFilter filter;
296 if(cnvData->myConverterArray[i]!=NULL) { 296 if(cnvData->myConverterArray[i]!=NULL) {
297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
298 - cnvData->version==0 && i==CNS_11643 298 - cnvData->version==0 && i==CNS_11643
299 - ) { 299 - ) {
300 + if(cnvData->locale[0]=='j' && i==JISX208) { 300 + if(cnvData->locale[0]=='j' && i==JISX208) {
301 /* 301 + /*
302 + * Only add code points that map to Shift-JIS codes 302 + * Only add code points that map to Shift-JIS codes
303 + * corresponding to JIS X 0208. 303 + * corresponding to JIS X 0208.
304 + */ 304 + */
305 + filter=UCNV_SET_FILTER_SJIS; 305 + filter=UCNV_SET_FILTER_SJIS;
306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && 307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&
308 + cnvData->version==0 && i==CNS_11643) { 308 + cnvData->version==0 && i==CNS_11643) {
309 + /* 309 /*
310 * Version-specific for CN: 310 * Version-specific for CN:
311 * CN version 0 does not map CNS planes 3..7 although 311 * CN version 0 does not map CNS planes 3..7 although
312 * they are all available in the CNS conversion table; 312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv,
313 @@ -3682,18 +3740,13 @@
314 * The two versions create different Unicode sets. 313 * The two versions create different Unicode sets.
315 */ 314 */
316 filter=UCNV_SET_FILTER_2022_CN; 315 filter=UCNV_SET_FILTER_2022_CN;
317 - } else if(cnvData->locale[0]=='j' && i==JISX208) { 316 - } else if(cnvData->locale[0]=='j' && i==JISX208) {
318 - /* 317 - /*
319 - * Only add code points that map to Shift-JIS codes 318 - * Only add code points that map to Shift-JIS codes
320 - * corresponding to JIS X 0208. 319 - * corresponding to JIS X 0208.
321 - */ 320 - */
322 - filter=UCNV_SET_FILTER_SJIS; 321 - filter=UCNV_SET_FILTER_SJIS;
323 } else if(i==KSC5601) { 322 } else if(i==KSC5601) {
324 /* 323 /*
325 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables) 324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o n multiple tables)
326 * are broader than GR94. 325 * are broader than GR94.
327 */ 326 */
328 filter=UCNV_SET_FILTER_GR94DBCS; 327 filter=UCNV_SET_FILTER_GR94DBCS;
329 +#endif 328 +#endif
330 } else { 329 } else {
331 filter=UCNV_SET_FILTER_NONE; 330 filter=UCNV_SET_FILTER_NONE;
332 } 331 }
333 @@ -3831,6 +3884,7 @@ 332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={
334 333
335 } // namespace 334 } // namespace
336 335
337 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
338 /************* KR ***************/ 337 /************* KR ***************/
339 static const UConverterImpl _ISO2022KRImpl={ 338 static const UConverterImpl _ISO2022KRImpl={
340 UCNV_ISO_2022, 339 UCNV_ISO_2022,
341 @@ -3947,5 +4001,6 @@ 340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={
342 }; 341 };
343 342
344 } // namespace 343 } // namespace
345 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ 344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */
346 345
347 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
348 Index: source/common/ucnvbocu.cpp 347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp
349 =================================================================== 348 index 4940310..047f18a 100644
350 --- source/common/ucnvbocu.cpp» (revision 259715) 349 --- a/source/common/ucnv_bld.cpp
351 +++ source/common/ucnvbocu.cpp» (working copy) 350 +++ b/source/common/ucnv_bld.cpp
352 @@ -19,7 +19,7 @@ 351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={
353
354 #include "unicode/utypes.h"
355
356 -#if !UCONFIG_NO_CONVERSION
357 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
358
359 #include "unicode/ucnv.h"
360 #include "unicode/ucnv_cb.h"
361 Index: source/common/ucnvisci.c
362 ===================================================================
363 --- source/common/ucnvisci.c» (revision 259715)
364 +++ source/common/ucnvisci.c» (working copy)
365 @@ -17,7 +17,7 @@
366
367 #include "unicode/utypes.h"
368
369 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
370 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
371
372 #include "unicode/ucnv.h"
373 #include "unicode/ucnv_cb.h"
374 Index: source/common/ucnvscsu.c
375 ===================================================================
376 --- source/common/ucnvscsu.c» (revision 259715)
377 +++ source/common/ucnvscsu.c» (working copy)
378 @@ -21,7 +21,7 @@
379
380 #include "unicode/utypes.h"
381
382 -#if !UCONFIG_NO_CONVERSION
383 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
384
385 #include "unicode/ucnv.h"
386 #include "unicode/ucnv_cb.h"
387 Index: source/common/ucnv_u7.c
388 ===================================================================
389 --- source/common/ucnv_u7.c» (revision 259715)
390 +++ source/common/ucnv_u7.c» (working copy)
391 @@ -16,7 +16,7 @@
392
393 #include "unicode/utypes.h"
394
395 -#if !UCONFIG_NO_CONVERSION
396 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
397
398 #include "unicode/ucnv.h"
399 #include "ucnv_bld.h"
400 Index: source/common/unicode/uconfig.h
401 ===================================================================
402 --- source/common/unicode/uconfig.h» (revision 259715)
403 +++ source/common/unicode/uconfig.h» (working copy)
404 @@ -265,6 +265,14 @@
405 #endif
406
407 /**
408 + * This switch turns off all the converters NOT listed in
409 + * the encoding standard : http://encoding.spec.whatwg.org
410 + */
411 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
412 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0
413 +#endif
414 +
415 +/**
416 * \def UCONFIG_NO_LEGACY_CONVERSION
417 * This switch turns off all converters except for
418 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
419 Index: source/common/ucnv_bld.cpp
420 ===================================================================
421 --- source/common/ucnv_bld.cpp» (revision 259715)
422 +++ source/common/ucnv_bld.cpp» (working copy)
423 @@ -69,28 +69,41 @@
424 352
425 #if UCONFIG_NO_LEGACY_CONVERSION 353 #if UCONFIG_NO_LEGACY_CONVERSION
426 NULL, 354 NULL,
427 +#else 355 +#else
428 + &_ISO2022Data, 356 + &_ISO2022Data,
429 +#endif 357 +#endif
430 + 358 +
431 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
432 NULL, NULL, NULL, NULL, NULL, NULL, 360 NULL, NULL, NULL, NULL, NULL, NULL,
433 NULL, NULL, NULL, NULL, NULL, NULL, 361 NULL, NULL, NULL, NULL, NULL, NULL,
(...skipping 24 matching lines...) Expand all
458 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, 386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL,
459 +#else 387 +#else
460 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
461 +#endif 389 +#endif
462 390
463 -#if UCONFIG_NO_LEGACY_CONVERSION 391 -#if UCONFIG_NO_LEGACY_CONVERSION
464 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
465 NULL, 393 NULL,
466 #else 394 #else
467 &_CompoundTextData 395 &_CompoundTextData
468 @@ -105,18 +118,24 @@ 396 @@ -105,18 +118,24 @@ static struct {
469 const char *name; 397 const char *name;
470 const UConverterType type; 398 const UConverterType type;
471 } const cnvNameType[] = { 399 } const cnvNameType[] = {
472 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
473 { "bocu1", UCNV_BOCU1 }, 401 { "bocu1", UCNV_BOCU1 },
474 { "cesu8", UCNV_CESU8 }, 402 { "cesu8", UCNV_CESU8 },
475 -#if !UCONFIG_NO_LEGACY_CONVERSION 403 -#if !UCONFIG_NO_LEGACY_CONVERSION
476 +#endif 404 +#endif
477 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
478 { "hz",UCNV_HZ }, 406 { "hz",UCNV_HZ },
479 #endif 407 #endif
480 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
481 { "imapmailboxname", UCNV_IMAP_MAILBOX }, 409 { "imapmailboxname", UCNV_IMAP_MAILBOX },
410 -#if !UCONFIG_NO_LEGACY_CONVERSION
482 +#endif 411 +#endif
483 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
484 + { "iscii", UCNV_ISCII }, 413 { "iscii", UCNV_ISCII },
485 +#endif 414 +#endif
486 #if !UCONFIG_NO_LEGACY_CONVERSION 415 +#if !UCONFIG_NO_LEGACY_CONVERSION
487 - { "iscii", UCNV_ISCII },
488 { "iso2022", UCNV_ISO_2022 }, 416 { "iso2022", UCNV_ISO_2022 },
489 #endif 417 #endif
490 { "iso88591", UCNV_LATIN_1 }, 418 { "iso88591", UCNV_LATIN_1 },
491 -#if !UCONFIG_NO_LEGACY_CONVERSION 419 -#if !UCONFIG_NO_LEGACY_CONVERSION
492 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
493 { "lmbcs1", UCNV_LMBCS_1 }, 421 { "lmbcs1", UCNV_LMBCS_1 },
494 { "lmbcs11",UCNV_LMBCS_11 }, 422 { "lmbcs11",UCNV_LMBCS_11 },
495 { "lmbcs16",UCNV_LMBCS_16 }, 423 { "lmbcs16",UCNV_LMBCS_16 },
496 @@ -130,7 +149,9 @@ 424 @@ -130,7 +149,9 @@ static struct {
497 { "lmbcs6", UCNV_LMBCS_6 }, 425 { "lmbcs6", UCNV_LMBCS_6 },
498 { "lmbcs8", UCNV_LMBCS_8 }, 426 { "lmbcs8", UCNV_LMBCS_8 },
499 #endif 427 #endif
500 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
501 { "scsu", UCNV_SCSU }, 429 { "scsu", UCNV_SCSU },
502 +#endif 430 +#endif
503 { "usascii", UCNV_US_ASCII }, 431 { "usascii", UCNV_US_ASCII },
504 { "utf16", UCNV_UTF16 }, 432 { "utf16", UCNV_UTF16 },
505 { "utf16be", UCNV_UTF16_BigEndian }, 433 { "utf16be", UCNV_UTF16_BigEndian },
506 @@ -152,9 +173,13 @@ 434 @@ -152,9 +173,13 @@ static struct {
507 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, 435 { "utf32oppositeendian", UCNV_UTF32_BigEndian },
508 { "utf32platformendian", UCNV_UTF32_LittleEndian }, 436 { "utf32platformendian", UCNV_UTF32_LittleEndian },
509 #endif 437 #endif
510 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
511 { "utf7", UCNV_UTF7 }, 439 { "utf7", UCNV_UTF7 },
512 +#endif 440 +#endif
513 { "utf8", UCNV_UTF8 }, 441 { "utf8", UCNV_UTF8 },
514 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
515 { "x11compoundtext", UCNV_COMPOUND_TEXT} 443 { "x11compoundtext", UCNV_COMPOUND_TEXT}
516 +#endif 444 +#endif
517 }; 445 };
518 446
519 447
520 Index: source/common/ucnv_u8.c 448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h
521 =================================================================== 449 index 402e2c9..5fad446 100644
522 --- source/common/ucnv_u8.c» (revision 259715) 450 --- a/source/common/ucnv_cnv.h
523 +++ source/common/ucnv_u8.c» (working copy) 451 +++ b/source/common/ucnv_cnv.h
524 @@ -87,6 +87,15 @@ 452 @@ -256,11 +256,15 @@ struct UConverterImpl {
453 extern const UConverterSharedData
454 _MBCSData, _Latin1Data,
455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData,
456 - _ISO2022Data,
457 + _ISO2022Data,
458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6 ,
460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1 9,
461 _HZData,_ISCIIData, _SCSUData, _ASCIIData,
462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp oundTextData;
463 +#else
464 + _ASCIIData, _UTF16Data, _UTF32Data;
465 +#endif
466
467 U_CDECL_END
468
469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c
470 index ec0e9c2..e723fa6 100644
471 --- a/source/common/ucnv_ct.c
472 +++ b/source/common/ucnv_ct.c
473 @@ -14,7 +14,7 @@
474
475 #include "unicode/utypes.h"
476
477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
479
480 #include "unicode/ucnv.h"
481 #include "unicode/uset.h"
482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c
483 index 1d921dd..a4fccee 100644
484 --- a/source/common/ucnv_lmb.c
485 +++ b/source/common/ucnv_lmb.c
486 @@ -25,7 +25,7 @@
487
488 #include "unicode/utypes.h"
489
490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
492
493 #include "unicode/ucnv_err.h"
494 #include "unicode/ucnv.h"
495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c
496 index 42943f4..6466b87 100644
497 --- a/source/common/ucnv_u7.c
498 +++ b/source/common/ucnv_u7.c
499 @@ -16,7 +16,7 @@
500
501 #include "unicode/utypes.h"
502
503 -#if !UCONFIG_NO_CONVERSION
504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
505
506 #include "unicode/ucnv.h"
507 #include "ucnv_bld.h"
508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c
509 index 8ee9fe5..24205f5 100644
510 --- a/source/common/ucnv_u8.c
511 +++ b/source/common/ucnv_u8.c
512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = {
525 static const uint32_t 513 static const uint32_t
526 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
527 515
528 +static UBool hasCESU8Data(const UConverter *cnv) 516 +static UBool hasCESU8Data(const UConverter *cnv)
529 +{ 517 +{
530 +#if UCONFIG_NO_NON_HTML5_CONVERSION 518 +#if UCONFIG_NO_NON_HTML5_CONVERSION
531 + return FALSE; 519 + return FALSE;
532 +#else 520 +#else
533 + return (UBool)(cnv->sharedData == &_CESU8Data); 521 + return (UBool)(cnv->sharedData == &_CESU8Data);
534 +#endif 522 +#endif
535 +} 523 +}
536 + 524 +
537 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
538 UErrorCode * err) 526 UErrorCode * err)
539 { 527 {
540 @@ -96,10 +105,10 @@ 528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
541 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 529 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
542 const UChar *targetLimit = args->targetLimit; 530 const UChar *targetLimit = args->targetLimit;
543 unsigned char *toUBytes = cnv->toUBytes; 531 unsigned char *toUBytes = cnv->toUBytes;
544 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 532 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
545 + UBool isCESU8 = hasCESU8Data(cnv); 533 + UBool isCESU8 = hasCESU8Data(cnv);
546 uint32_t ch, ch2 = 0; 534 uint32_t ch, ch2 = 0;
547 int32_t i, inBytes; 535 int32_t i, inBytes;
548 - 536 -
549 + 537 +
550 /* Restore size of current sequence */ 538 /* Restore size of current sequence */
551 if (cnv->toUnicodeStatus && myTarget < targetLimit) 539 if (cnv->toUnicodeStatus && myTarget < targetLimit)
552 { 540 {
553 @@ -226,7 +235,7 @@ 541 @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToU nicodeArgs * args,
554 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 542 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
555 const UChar *targetLimit = args->targetLimit; 543 const UChar *targetLimit = args->targetLimit;
556 unsigned char *toUBytes = cnv->toUBytes; 544 unsigned char *toUBytes = cnv->toUBytes;
557 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 545 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
558 + UBool isCESU8 = hasCESU8Data(cnv); 546 + UBool isCESU8 = hasCESU8Data(cnv);
559 uint32_t ch, ch2 = 0; 547 uint32_t ch, ch2 = 0;
560 int32_t i, inBytes; 548 int32_t i, inBytes;
561 549
562 @@ -357,7 +366,7 @@ 550 @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArg s * args,
563 UChar32 ch; 551 UChar32 ch;
564 uint8_t tempBuf[4]; 552 uint8_t tempBuf[4];
565 int32_t indexToWrite; 553 int32_t indexToWrite;
566 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 554 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
567 + UBool isNotCESU8 = !hasCESU8Data(cnv); 555 + UBool isNotCESU8 = !hasCESU8Data(cnv);
568 556
569 if (cnv->fromUChar32 && myTarget < targetLimit) 557 if (cnv->fromUChar32 && myTarget < targetLimit)
570 { 558 {
571 @@ -473,7 +482,7 @@ 559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter FromUnicodeArgs * ar
572 int32_t offsetNum, nextSourceIndex; 560 int32_t offsetNum, nextSourceIndex;
573 int32_t indexToWrite; 561 int32_t indexToWrite;
574 uint8_t tempBuf[4]; 562 uint8_t tempBuf[4];
575 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
576 + UBool isNotCESU8 = !hasCESU8Data(cnv); 564 + UBool isNotCESU8 = !hasCESU8Data(cnv);
577 565
578 if (cnv->fromUChar32 && myTarget < targetLimit) 566 if (cnv->fromUChar32 && myTarget < targetLimit)
579 { 567 {
580 Index: source/common/unicode/urename.h 568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp
581 =================================================================== 569 index b97d666..281d6d9 100644
582 --- source/common/unicode/urename.h» (revision 259715) 570 --- a/source/common/ucnvbocu.cpp
583 +++ source/common/unicode/urename.h» (working copy) 571 +++ b/source/common/ucnvbocu.cpp
572 @@ -19,7 +19,7 @@
573
574 #include "unicode/utypes.h"
575
576 -#if !UCONFIG_NO_CONVERSION
577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
578
579 #include "unicode/ucnv.h"
580 #include "unicode/ucnv_cb.h"
581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c
582 index 3760c39..51825e2 100644
583 --- a/source/common/ucnvhz.c
584 +++ b/source/common/ucnvhz.c
585 @@ -16,7 +16,7 @@
586
587 #include "unicode/utypes.h"
588
589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
591
592 #include "cmemory.h"
593 #include "unicode/ucnv.h"
594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={
595 0
596 };
597
598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_NO_NON_HTML5_CONVERSION */
600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c
601 index fe61d40..16fd0a3 100644
602 --- a/source/common/ucnvisci.c
603 +++ b/source/common/ucnvisci.c
604 @@ -17,7 +17,7 @@
605
606 #include "unicode/utypes.h"
607
608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
610
611 #include "unicode/ucnv.h"
612 #include "unicode/ucnv_cb.h"
613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c
614 index c6e96e1..a6f8c9e 100644
615 --- a/source/common/ucnvscsu.c
616 +++ b/source/common/ucnvscsu.c
617 @@ -21,7 +21,7 @@
618
619 #include "unicode/utypes.h"
620
621 -#if !UCONFIG_NO_CONVERSION
622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
623
624 #include "unicode/ucnv.h"
625 #include "unicode/ucnv_cb.h"
626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h
627 index ed073b6..8df56e6 100644
628 --- a/source/common/unicode/uconfig.h
629 +++ b/source/common/unicode/uconfig.h
630 @@ -270,6 +270,14 @@
631 #endif
632
633 /**
634 + * This switch turns off all the converters NOT listed in
635 + * the encoding standard : http://encoding.spec.whatwg.org
636 + */
637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0
639 +#endif
640 +
641 +/**
642 * \def UCONFIG_NO_LEGACY_CONVERSION
643 * This switch turns off all converters except for
644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h
646 index a817262..89becca 100644
647 --- a/source/common/unicode/urename.h
648 +++ b/source/common/unicode/urename.h
584 @@ -73,12 +73,14 @@ 649 @@ -73,12 +73,14 @@
585 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) 650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
586 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) 651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
587 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) 652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
588 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
589 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) 654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
590 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) 655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
591 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) 656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
592 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) 657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
593 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) 658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
(...skipping 14 matching lines...) Expand all
608 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) 673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
609 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) 674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
610 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) 675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
611 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) 676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
612 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
613 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) 678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
614 +#endif 679 +#endif
615 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) 680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
616 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) 681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
617 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) 682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
618 Index: source/common/ucnv_cnv.h 683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp
619 =================================================================== 684 index 3efbd49..ba5b18c 100644
620 --- source/common/ucnv_cnv.h» (revision 259715) 685 --- a/source/i18n/csdetect.cpp
621 +++ source/common/ucnv_cnv.h» (working copy) 686 +++ b/source/i18n/csdetect.cpp
622 @@ -256,11 +256,15 @@ 687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
623 extern const UConverterSharedData 688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
624 _MBCSData, _Latin1Data, 689
625 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, 690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
626 - _ISO2022Data,
627 + _ISO2022Data,
628 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
629 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6 , 692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
630 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1 9, 693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
631 _HZData,_ISCIIData, _SCSUData, _ASCIIData, 694
632 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp oundTextData; 695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) {
633 +#else 696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
634 + _ASCIIData, _UTF16Data, _UTF32Data; 697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
635 +#endif 699 +#endif
700 };
701 int32_t rCount = ARRAY_SIZE(tempArray);
636 702
637 U_CDECL_END 703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp
638 704 index 3db0bc9..be3eafa 100644
639 Index: source/common/ucnv_lmb.c 705 --- a/source/i18n/csr2022.cpp
640 =================================================================== 706 +++ b/source/i18n/csr2022.cpp
641 --- source/common/ucnv_lmb.c» (revision 291619) 707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = {
642 +++ source/common/ucnv_lmb.c» (working copy)
643 @@ -25,7 +25,7 @@
644
645 #include "unicode/utypes.h"
646
647 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
648 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
649
650 #include "unicode/ucnv_err.h"
651 #include "unicode/ucnv.h"
652 Index: source/common/ucnvhz.c
653 ===================================================================
654 --- source/common/ucnvhz.c» (revision 291619)
655 +++ source/common/ucnvhz.c» (working copy)
656 @@ -16,7 +16,7 @@
657
658 #include "unicode/utypes.h"
659
660 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
661 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
662
663 #include "cmemory.h"
664 #include "unicode/ucnv.h"
665 @@ -637,4 +637,4 @@
666 0
667 };
668
669 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
670 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF IG_NO_NON_HTML5_CONVERSION */
671 Index: source/common/ucnv_ct.c
672 ===================================================================
673 --- source/common/ucnv_ct.c» (revision 291619)
674 +++ source/common/ucnv_ct.c» (working copy)
675 @@ -14,7 +14,7 @@
676
677 #include "unicode/utypes.h"
678
679 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
680 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_ HTML5_CONVERSION
681
682 #include "unicode/ucnv.h"
683 #include "unicode/uset.h"
684 Index: source/i18n/csrsbcs.h
685 ===================================================================
686 --- source/i18n/csrsbcs.h» (revision 291619)
687 +++ source/i18n/csrsbcs.h» (working copy)
688 @@ -50,6 +50,7 @@
689
690 };
691
692 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
693 class NGramParser_IBM420 : public NGramParser
694 {
695 private:
696 @@ -61,6 +62,7 @@
697 public:
698 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
699 };
700 +#endif
701
702
703 class CharsetRecog_sbcs : public CharsetRecognizer
704 @@ -229,6 +231,7 @@
705 virtual UBool match(InputText *det, CharsetMatch *results) const;
706 };
707
708 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
709 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
710 {
711 public:
712 @@ -280,6 +283,7 @@
713
714 virtual UBool match(InputText *det, CharsetMatch *results) const;
715 };
716 +#endif
717
718 U_NAMESPACE_END
719
720 Index: source/i18n/csr2022.h
721 ===================================================================
722 --- source/i18n/csr2022.h» (revision 291619)
723 +++ source/i18n/csr2022.h» (working copy)
724 @@ -65,6 +65,7 @@
725 UBool match(InputText *textIn, CharsetMatch *results) const;
726 };
727
728 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
729 class CharsetRecog_2022KR :public CharsetRecog_2022 {
730 public:
731 virtual ~CharsetRecog_2022KR();
732 @@ -84,6 +85,7 @@
733
734 UBool match(InputText *textIn, CharsetMatch *results) const;
735 };
736 +#endif
737
738 U_NAMESPACE_END
739
740 Index: source/i18n/csr2022.cpp
741 ===================================================================
742 --- source/i18n/csr2022.cpp» (revision 291619)
743 +++ source/i18n/csr2022.cpp» (working copy)
744 @@ -119,6 +119,7 @@
745 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7
746 }; 709 };
747 710
748 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
749 static const uint8_t escapeSequences_2022KR[][5] = { 712 static const uint8_t escapeSequences_2022KR[][5] = {
750 {0x1b, 0x24, 0x29, 0x43, 0x00} 713 {0x1b, 0x24, 0x29, 0x43, 0x00}
751 }; 714 };
752 @@ -136,6 +137,7 @@ 715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = {
753 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2
754 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3
755 }; 718 };
756 +#endif 719 +#endif
757 720
758 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} 721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {}
759 722
760 @@ -152,6 +154,7 @@ 723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM atch *results) const
761 return (confidence > 0); 724 return (confidence > 0);
762 } 725 }
763 726
764 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
765 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} 728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {}
766 729
767 const char *CharsetRecog_2022KR::getName() const { 730 const char *CharsetRecog_2022KR::getName() const {
768 @@ -181,6 +184,7 @@ 731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM atch *results) const
769 results->set(textIn, this, confidence); 732 results->set(textIn, this, confidence);
770 return (confidence > 0); 733 return (confidence > 0);
771 } 734 }
772 +#endif 735 +#endif
773 736
774 CharsetRecog_2022::~CharsetRecog_2022() { 737 CharsetRecog_2022::~CharsetRecog_2022() {
775 // nothing to do 738 // nothing to do
776 Index: source/i18n/csdetect.cpp 739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h
777 =================================================================== 740 index 2ac2b87..dad22c7 100644
778 --- source/i18n/csdetect.cpp» (revision 291619) 741 --- a/source/i18n/csr2022.h
779 +++ source/i18n/csdetect.cpp» (working copy) 742 +++ b/source/i18n/csr2022.h
780 @@ -110,6 +110,7 @@ 743 @@ -65,6 +65,7 @@ public:
781 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), 744 UBool match(InputText *textIn, CharsetMatch *results) const;
745 };
782 746
783 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
784 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
785 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), 748 class CharsetRecog_2022KR :public CharsetRecog_2022 {
786 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), 749 public:
750 virtual ~CharsetRecog_2022KR();
751 @@ -84,6 +85,7 @@ public:
787 752
788 @@ -117,6 +118,7 @@ 753 UBool match(InputText *textIn, CharsetMatch *results) const;
789 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), 754 };
790 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
791 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
792 +#endif 755 +#endif
793 };
794 int32_t rCount = ARRAY_SIZE(tempArray);
795 756
796 Index: source/i18n/csrsbcs.cpp 757 U_NAMESPACE_END
797 =================================================================== 758
798 --- source/i18n/csrsbcs.cpp» (revision 291619) 759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp
799 +++ source/i18n/csrsbcs.cpp» (working copy) 760 index d03367c..7b70dc1 100644
800 @@ -137,6 +137,7 @@ 761 --- a/source/i18n/csrsbcs.cpp
762 +++ b/source/i18n/csrsbcs.cpp
763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det)
801 return (int32_t) (rawPercent * 300.0); 764 return (int32_t) (rawPercent * 300.0);
802 } 765 }
803 766
804 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
805 static const uint8_t unshapeMap_IBM420[] = { 768 static const uint8_t unshapeMap_IBM420[] = {
806 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ 769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */
807 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40, 770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 x40, 0x40, 0x40, 0x40, 0x40,
808 @@ -232,6 +233,7 @@ 771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det)
809 } 772 }
810 } 773 }
811 } 774 }
812 +#endif 775 +#endif
813 776
814 CharsetRecog_sbcs::CharsetRecog_sbcs() 777 CharsetRecog_sbcs::CharsetRecog_sbcs()
815 { 778 {
816 @@ -624,6 +626,7 @@ 779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = {
817 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, 780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
818 }; 781 };
819 782
820 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
821 static const int32_t ngrams_IBM424_he_rtl[] = { 784 static const int32_t ngrams_IBM424_he_rtl[] = {
822 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641, 785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404 546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405 641,
823 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045, 786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454 056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514 045,
824 @@ -691,6 +694,7 @@ 787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= {
825 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF, 788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0 xEB, 0x40, 0xED, 0xEE, 0xEF,
826 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40, 789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0 xFB, 0xFC, 0xFD, 0xFE, 0x40,
827 }; 790 };
828 +#endif 791 +#endif
829 792
830 //ISO-8859-1,2,5,6,7,8,9 Ngrams 793 //ISO-8859-1,2,5,6,7,8,9 Ngrams
831 794
832 @@ -1155,6 +1159,7 @@ 795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse tMatch *results) const
833 return (confidence > 0); 796 return (confidence > 0);
834 } 797 }
835 798
836 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
837 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() 800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
838 { 801 {
839 // nothing to do 802 // nothing to do
840 @@ -1253,6 +1258,7 @@ 803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results
841 results->set(textIn, this, confidence); 804 results->set(textIn, this, confidence);
842 return (confidence > 0); 805 return (confidence > 0);
843 } 806 }
844 +#endif 807 +#endif
845 808
846 U_NAMESPACE_END 809 U_NAMESPACE_END
847 #endif 810 #endif
811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h
812 index 2579c02..7789f9b 100644
813 --- a/source/i18n/csrsbcs.h
814 +++ b/source/i18n/csrsbcs.h
815 @@ -50,6 +50,7 @@ public:
816
817 };
818
819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
820 class NGramParser_IBM420 : public NGramParser
821 {
822 private:
823 @@ -61,6 +62,7 @@ private:
824 public:
825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
826 };
827 +#endif
828
829
830 class CharsetRecog_sbcs : public CharsetRecognizer
831 @@ -229,6 +231,7 @@ public:
832 virtual UBool match(InputText *det, CharsetMatch *results) const;
833 };
834
835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION
836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
837 {
838 public:
839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42 0_ar {
840
841 virtual UBool match(InputText *det, CharsetMatch *results) const;
842 };
843 +#endif
844
845 U_NAMESPACE_END
846
OLDNEW
« no previous file with comments | « README.chromium ('k') | scripts/big5_gen.sh » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698