OLD | NEW |
1 Index: source/common/ucnv2022.cpp | 1 diff --git a/source/common/ucnv2022.cpp b/source/common/ucnv2022.cpp |
2 =================================================================== | 2 index 9556dd2..7b8df9b 100644 |
3 --- source/common/ucnv2022.cpp» (revision 259715) | 3 --- a/source/common/ucnv2022.cpp |
4 +++ source/common/ucnv2022.cpp» (working copy) | 4 +++ b/source/common/ucnv2022.cpp |
5 @@ -154,7 +154,11 @@ | 5 @@ -152,7 +152,11 @@ typedef enum { |
6 } StateEnum; | 6 } StateEnum; |
7 | 7 |
8 /* is the StateEnum charset value for a DBCS charset? */ | 8 /* is the StateEnum charset value for a DBCS charset? */ |
9 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 9 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) | 10 +#define IS_JP_DBCS(cs) (JISX208==(cs)) |
11 +#else | 11 +#else |
12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) | 12 #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
13 +#endif | 13 +#endif |
14 | 14 |
15 #define CSM(cs) ((uint16_t)1<<(cs)) | 15 #define CSM(cs) ((uint16_t)1<<(cs)) |
16 | 16 |
17 @@ -167,13 +171,23 @@ | 17 @@ -165,13 +169,23 @@ typedef enum { |
18 * all versions, not just JIS7 and JIS8. | 18 * all versions, not just JIS7 and JIS8. |
19 * - ICU does not distinguish between different versions of JIS X 0208. | 19 * - ICU does not distinguish between different versions of JIS X 0208. |
20 */ | 20 */ |
21 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 21 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
22 +enum { MAX_JA_VERSION=0 }; | 22 +enum { MAX_JA_VERSION=0 }; |
23 +#else | 23 +#else |
24 enum { MAX_JA_VERSION=4 }; | 24 enum { MAX_JA_VERSION=4 }; |
25 +#endif | 25 +#endif |
26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ | 26 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
27 +/* | 27 +/* |
28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. | 28 + * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 | 29 + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
30 + */ | 30 + */ |
31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), | 31 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 32 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), | 33 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 34 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), | 35 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) | 36 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB23
12)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
37 +#endif | 37 +#endif |
38 }; | 38 }; |
39 | 39 |
40 typedef enum { | 40 typedef enum { |
41 @@ -360,15 +374,18 @@ | 41 @@ -358,15 +372,18 @@ static const int8_t escSeqStateTable_Value_2022[MAX_STATES
_2022] = { |
42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 | 42 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_202
2 ,VALID_TERMINAL_2022 |
43 }; | 43 }; |
44 | 44 |
45 - | 45 - |
46 /* Type def for refactoring changeState_2022 code*/ | 46 /* Type def for refactoring changeState_2022 code*/ |
47 typedef enum{ | 47 typedef enum{ |
48 #ifdef U_ENABLE_GENERIC_ISO_2022 | 48 #ifdef U_ENABLE_GENERIC_ISO_2022 |
49 ISO_2022=0, | 49 ISO_2022=0, |
50 #endif | 50 #endif |
51 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 51 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
52 + ISO_2022_JP=1 | 52 + ISO_2022_JP=1 |
53 +#else | 53 +#else |
54 ISO_2022_JP=1, | 54 ISO_2022_JP=1, |
55 ISO_2022_KR=2, | 55 ISO_2022_KR=2, |
56 ISO_2022_CN=3 | 56 ISO_2022_CN=3 |
57 +#endif | 57 +#endif |
58 } Variant2022; | 58 } Variant2022; |
59 | 59 |
60 /*********** ISO 2022 Converter Protos ***********/ | 60 /*********** ISO 2022 Converter Protos ***********/ |
61 @@ -485,12 +502,15 @@ | 61 @@ -483,12 +500,15 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, U
ErrorCode *errorCode){ |
62 /* prevent indexing beyond jpCharsetMasks[] */ | 62 /* prevent indexing beyond jpCharsetMasks[] */ |
63 myConverterData->version = version = 0; | 63 myConverterData->version = version = 0; |
64 } | 64 } |
65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 65 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { | 66 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
67 myConverterData->myConverterArray[ISO8859_7] = | 67 myConverterData->myConverterArray[ISO8859_7] = |
68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); | 68 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs,
errorCode); |
69 } | 69 } |
70 +#endif | 70 +#endif |
71 myConverterData->myConverterArray[JISX208] = | 71 myConverterData->myConverterArray[JISX208] = |
72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); | 72 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, erro
rCode); |
73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 73 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
74 if(jpCharsetMasks[version]&CSM(JISX212)) { | 74 if(jpCharsetMasks[version]&CSM(JISX212)) { |
75 myConverterData->myConverterArray[JISX212] = | 75 myConverterData->myConverterArray[JISX212] = |
76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); | 76 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, e
rrorCode); |
77 @@ -503,6 +523,7 @@ | 77 @@ -501,6 +521,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ |
78 myConverterData->myConverterArray[KSC5601] = | 78 myConverterData->myConverterArray[KSC5601] = |
79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); | 79 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, e
rrorCode); |
80 } | 80 } |
81 +#endif | 81 +#endif |
82 | 82 |
83 /* set the function pointers to appropriate funtions */ | 83 /* set the function pointers to appropriate funtions */ |
84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); | 84 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
85 @@ -513,6 +534,7 @@ | 85 @@ -511,6 +532,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ |
86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); | 86 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'
); |
87 myConverterData->name[len+1]='\0'; | 87 myConverterData->name[len+1]='\0'; |
88 } | 88 } |
89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 89 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && | 90 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
91 (myLocale[2]=='_' || myLocale[2]=='\0')) | 91 (myLocale[2]=='_' || myLocale[2]=='\0')) |
92 { | 92 { |
93 @@ -582,6 +604,7 @@ | 93 @@ -580,6 +602,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UEr
rorCode *errorCode){ |
94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); | 94 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,ver
sion=2"); |
95 } | 95 } |
96 } | 96 } |
97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION | 97 +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
98 else{ | 98 else{ |
99 #ifdef U_ENABLE_GENERIC_ISO_2022 | 99 #ifdef U_ENABLE_GENERIC_ISO_2022 |
100 myConverterData->isFirstBuffer = TRUE; | 100 myConverterData->isFirstBuffer = TRUE; |
101 @@ -716,6 +739,7 @@ | 101 @@ -714,6 +737,7 @@ static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { |
102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 102 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
103 }; | 103 }; |
104 | 104 |
105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 105 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
106 /*************** to unicode *******************/ | 106 /*************** to unicode *******************/ |
107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { | 107 static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
108 /* 0 1 2 3 4
5 6 7 8 9 *
/ | 108 /* 0 1 2 3 4
5 6 7 8 9 *
/ |
109 @@ -728,6 +752,7 @@ | 109 @@ -726,6 +750,7 @@ static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE | 110 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_ST
ATE |
111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE | 111 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
112 }; | 112 }; |
113 +#endif | 113 +#endif |
114 | 114 |
115 | 115 |
116 static UCNV_TableStates_2022 | 116 static UCNV_TableStates_2022 |
117 @@ -880,6 +905,7 @@ | 117 @@ -878,6 +903,7 @@ DONE: |
118 } | 118 } |
119 break; | 119 break; |
120 /* case SS3_STATE: not used in ISO-2022-JP-x */ | 120 /* case SS3_STATE: not used in ISO-2022-JP-x */ |
121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 121 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
122 case ISO8859_1: | 122 case ISO8859_1: |
123 case ISO8859_7: | 123 case ISO8859_7: |
124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | 124 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { |
125 @@ -889,6 +915,7 @@ | 125 @@ -887,6 +913,7 @@ DONE: |
126 myData2022->toU2022State.cs[2]=(int8_t)tempState; | 126 myData2022->toU2022State.cs[2]=(int8_t)tempState; |
127 } | 127 } |
128 break; | 128 break; |
129 +#endif | 129 +#endif |
130 default: | 130 default: |
131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { | 131 if((jpCharsetMasks[myData2022->version] & CSM(tempState)) =
= 0) { |
132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 132 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
133 @@ -900,6 +927,7 @@ | 133 @@ -898,6 +925,7 @@ DONE: |
134 } | 134 } |
135 } | 135 } |
136 break; | 136 break; |
137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 137 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
138 case ISO_2022_CN: | 138 case ISO_2022_CN: |
139 { | 139 { |
140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; | 140 StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
141 @@ -961,6 +989,7 @@ | 141 @@ -959,6 +987,7 @@ DONE: |
142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; | 142 *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
143 } | 143 } |
144 break; | 144 break; |
145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 145 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
146 | 146 |
147 default: | 147 default: |
148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; | 148 *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
149 @@ -1381,12 +1410,16 @@ | 149 @@ -1379,12 +1408,16 @@ toUnicodeCallback(UConverter *cnv, |
150 static const StateEnum jpCharsetPref[]={ | 150 static const StateEnum jpCharsetPref[]={ |
151 ASCII, | 151 ASCII, |
152 JISX201, | 152 JISX201, |
153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 153 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
154 ISO8859_1, | 154 ISO8859_1, |
155 ISO8859_7, | 155 ISO8859_7, |
156 +#endif | 156 +#endif |
157 JISX208, | 157 JISX208, |
158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 158 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
159 JISX212, | 159 JISX212, |
160 GB2312, | 160 GB2312, |
161 KSC5601, | 161 KSC5601, |
162 +#endif | 162 +#endif |
163 HWKANA_7BIT | 163 HWKANA_7BIT |
164 }; | 164 }; |
165 | 165 |
166 @@ -1756,6 +1789,7 @@ | 166 @@ -1754,6 +1787,7 @@ getTrail: |
167 g = 0; | 167 g = 0; |
168 } | 168 } |
169 break; | 169 break; |
170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 170 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
171 case ISO8859_1: | 171 case ISO8859_1: |
172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { | 172 if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
173 targetValue = (uint32_t)sourceChar - 0x80; | 173 targetValue = (uint32_t)sourceChar - 0x80; |
174 @@ -1764,6 +1798,7 @@ | 174 @@ -1762,6 +1796,7 @@ getTrail: |
175 g = 2; | 175 g = 2; |
176 } | 176 } |
177 break; | 177 break; |
178 +#endif | 178 +#endif |
179 case HWKANA_7BIT: | 179 case HWKANA_7BIT: |
180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H
WKANA_START)) { | 180 if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - H
WKANA_START)) { |
181 if(converterData->version==3) { | 181 if(converterData->version==3) { |
182 @@ -1825,6 +1860,7 @@ | 182 @@ -1823,6 +1858,7 @@ getTrail: |
183 useFallback = FALSE; | 183 useFallback = FALSE; |
184 } | 184 } |
185 break; | 185 break; |
186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 186 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
187 case ISO8859_7: | 187 case ISO8859_7: |
188 /* G0 SBCS forced to 7-bit output */ | 188 /* G0 SBCS forced to 7-bit output */ |
189 len2 = MBCS_SINGLE_FROM_UCHAR32( | 189 len2 = MBCS_SINGLE_FROM_UCHAR32( |
190 @@ -1839,6 +1875,7 @@ | 190 @@ -1837,6 +1873,7 @@ getTrail: |
191 useFallback = FALSE; | 191 useFallback = FALSE; |
192 } | 192 } |
193 break; | 193 break; |
194 +#endif | 194 +#endif |
195 default: | 195 default: |
196 /* G0 DBCS */ | 196 /* G0 DBCS */ |
197 len2 = MBCS_FROM_UCHAR32_ISO2022( | 197 len2 = MBCS_FROM_UCHAR32_ISO2022( |
198 @@ -1846,6 +1883,7 @@ | 198 @@ -1844,6 +1881,7 @@ getTrail: |
199 sourceChar, &value, | 199 sourceChar, &value, |
200 useFallback, MBCS_OUTPUT_2); | 200 useFallback, MBCS_OUTPUT_2); |
201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ | 201 if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept
DBCS: abs(len)==2 */ |
202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 202 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
203 if(cs0 == KSC5601) { | 203 if(cs0 == KSC5601) { |
204 /* | 204 /* |
205 * Check for valid bytes for the encoding scheme. | 205 * Check for valid bytes for the encoding scheme. |
206 @@ -1857,6 +1895,7 @@ | 206 @@ -1855,6 +1893,7 @@ getTrail: |
207 break; | 207 break; |
208 } | 208 } |
209 } | 209 } |
210 +#endif | 210 +#endif |
211 targetValue = value; | 211 targetValue = value; |
212 len = len2; | 212 len = len2; |
213 cs = cs0; | 213 cs = cs0; |
214 @@ -2150,6 +2189,7 @@ | 214 @@ -2148,6 +2187,7 @@ escape: |
215 targetUniChar = mySourceChar; | 215 targetUniChar = mySourceChar; |
216 } | 216 } |
217 break; | 217 break; |
218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 218 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
219 case ISO8859_1: | 219 case ISO8859_1: |
220 if(mySourceChar <= 0x7f) { | 220 if(mySourceChar <= 0x7f) { |
221 targetUniChar = mySourceChar + 0x80; | 221 targetUniChar = mySourceChar + 0x80; |
222 @@ -2168,6 +2208,7 @@ | 222 @@ -2166,6 +2206,7 @@ escape: |
223 /* return from a single-shift state to the previous one */ | 223 /* return from a single-shift state to the previous one */ |
224 pToU2022State->g=pToU2022State->prevG; | 224 pToU2022State->g=pToU2022State->prevG; |
225 break; | 225 break; |
226 +#endif | 226 +#endif |
227 case JISX201: | 227 case JISX201: |
228 if(mySourceChar <= 0x7f) { | 228 if(mySourceChar <= 0x7f) { |
229 targetUniChar = jisx201ToU(mySourceChar); | 229 targetUniChar = jisx201ToU(mySourceChar); |
230 @@ -2207,9 +2248,11 @@ | 230 @@ -2205,9 +2246,11 @@ getTrailByte: |
231 } else { | 231 } else { |
232 /* Copy before we modify tmpSourceChar so toUni
codeCallback() sees the correct bytes. */ | 232 /* Copy before we modify tmpSourceChar so toUni
codeCallback() sees the correct bytes. */ |
233 mySourceChar = tmpSourceChar; | 233 mySourceChar = tmpSourceChar; |
234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 234 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
235 if (cs == KSC5601) { | 235 if (cs == KSC5601) { |
236 tmpSourceChar += 0x8080; /* = _2022ToGR94D
BCS(tmpSourceChar) */ | 236 tmpSourceChar += 0x8080; /* = _2022ToGR94D
BCS(tmpSourceChar) */ |
237 } | 237 } |
238 +#endif | 238 +#endif |
239 tempBuf[0] = (char)(tmpSourceChar >> 8); | 239 tempBuf[0] = (char)(tmpSourceChar >> 8); |
240 tempBuf[1] = (char)(tmpSourceChar); | 240 tempBuf[1] = (char)(tmpSourceChar); |
241 } | 241 } |
242 @@ -2271,6 +2314,7 @@ | 242 @@ -2269,6 +2312,7 @@ endloop: |
243 } | 243 } |
244 | 244 |
245 | 245 |
246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 246 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
247 /*************************************************************** | 247 /*************************************************************** |
248 * Rules for ISO-2022-KR encoding | 248 * Rules for ISO-2022-KR encoding |
249 * i) The KSC5601 designator sequence should appear only once in a file, | 249 * i) The KSC5601 designator sequence should appear only once in a file, |
250 @@ -3414,6 +3458,7 @@ | 250 @@ -3412,6 +3456,7 @@ endloop: |
251 args->target = myTarget; | 251 args->target = myTarget; |
252 args->source = mySource; | 252 args->source = mySource; |
253 } | 253 } |
254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 254 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
255 | 255 |
256 static void | 256 static void |
257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { | 257 _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UError
Code *err) { |
258 @@ -3615,6 +3660,7 @@ | 258 @@ -3613,6 +3658,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
259 /* include JIS X 0201 which is hardcoded */ | 259 /* include JIS X 0201 which is hardcoded */ |
260 sa->add(sa->set, 0xa5); | 260 sa->add(sa->set, 0xa5); |
261 sa->add(sa->set, 0x203e); | 261 sa->add(sa->set, 0x203e); |
262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 262 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { | 263 if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
264 /* include Latin-1 for some variants of JP */ | 264 /* include Latin-1 for some variants of JP */ |
265 sa->addRange(sa->set, 0, 0xff); | 265 sa->addRange(sa->set, 0, 0xff); |
266 @@ -3622,6 +3668,10 @@ | 266 @@ -3620,6 +3666,10 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
267 /* include ASCII for JP */ | 267 /* include ASCII for JP */ |
268 sa->addRange(sa->set, 0, 0x7f); | 268 sa->addRange(sa->set, 0, 0x7f); |
269 } | 269 } |
270 +#else | 270 +#else |
271 + /* include ASCII for JP */ | 271 + /* include ASCII for JP */ |
272 + sa->addRange(sa->set, 0, 0x7f); | 272 + sa->addRange(sa->set, 0, 0x7f); |
273 +#endif | 273 +#endif |
274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_
AND_FALLBACK_SET) { | 274 if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_
AND_FALLBACK_SET) { |
275 /* | 275 /* |
276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!
=0 | 276 * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!
=0 |
277 @@ -3640,6 +3690,7 @@ | 277 @@ -3638,6 +3688,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); | 278 sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
279 } | 279 } |
280 break; | 280 break; |
281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 281 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
282 case 'c': | 282 case 'c': |
283 case 'z': | 283 case 'z': |
284 /* include ASCII for CN */ | 284 /* include ASCII for CN */ |
285 @@ -3651,6 +3702,7 @@ | 285 @@ -3649,6 +3700,7 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
286 cnvData->currentConverter, sa, which, pErrorCode); | 286 cnvData->currentConverter, sa, which, pErrorCode); |
287 /* the loop over myConverterArray[] will simply not find another conver
ter */ | 287 /* the loop over myConverterArray[] will simply not find another conver
ter */ |
288 break; | 288 break; |
289 +#endif | 289 +#endif |
290 default: | 290 default: |
291 break; | 291 break; |
292 } | 292 } |
293 @@ -3671,10 +3723,16 @@ | 293 @@ -3669,9 +3721,15 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { | 294 for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
295 UConverterSetFilter filter; | 295 UConverterSetFilter filter; |
296 if(cnvData->myConverterArray[i]!=NULL) { | 296 if(cnvData->myConverterArray[i]!=NULL) { |
297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 297 - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
298 - cnvData->version==0 && i==CNS_11643 | 298 - cnvData->version==0 && i==CNS_11643 |
299 - ) { | 299 - ) { |
300 + if(cnvData->locale[0]=='j' && i==JISX208) { | 300 + if(cnvData->locale[0]=='j' && i==JISX208) { |
301 /* | 301 + /* |
302 + * Only add code points that map to Shift-JIS codes | 302 + * Only add code points that map to Shift-JIS codes |
303 + * corresponding to JIS X 0208. | 303 + * corresponding to JIS X 0208. |
304 + */ | 304 + */ |
305 + filter=UCNV_SET_FILTER_SJIS; | 305 + filter=UCNV_SET_FILTER_SJIS; |
306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 306 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && | 307 + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
308 + cnvData->version==0 && i==CNS_11643) { | 308 + cnvData->version==0 && i==CNS_11643) { |
309 + /* | 309 /* |
310 * Version-specific for CN: | 310 * Version-specific for CN: |
311 * CN version 0 does not map CNS planes 3..7 although | 311 * CN version 0 does not map CNS planes 3..7 although |
312 * they are all available in the CNS conversion table; | 312 @@ -3680,18 +3738,13 @@ _ISO_2022_GetUnicodeSet(const UConverter *cnv, |
313 @@ -3682,18 +3740,13 @@ | |
314 * The two versions create different Unicode sets. | 313 * The two versions create different Unicode sets. |
315 */ | 314 */ |
316 filter=UCNV_SET_FILTER_2022_CN; | 315 filter=UCNV_SET_FILTER_2022_CN; |
317 - } else if(cnvData->locale[0]=='j' && i==JISX208) { | 316 - } else if(cnvData->locale[0]=='j' && i==JISX208) { |
318 - /* | 317 - /* |
319 - * Only add code points that map to Shift-JIS codes | 318 - * Only add code points that map to Shift-JIS codes |
320 - * corresponding to JIS X 0208. | 319 - * corresponding to JIS X 0208. |
321 - */ | 320 - */ |
322 - filter=UCNV_SET_FILTER_SJIS; | 321 - filter=UCNV_SET_FILTER_SJIS; |
323 } else if(i==KSC5601) { | 322 } else if(i==KSC5601) { |
324 /* | 323 /* |
325 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) | 324 * Some of the KSC 5601 tables (convrtrs.txt has this aliases o
n multiple tables) |
326 * are broader than GR94. | 325 * are broader than GR94. |
327 */ | 326 */ |
328 filter=UCNV_SET_FILTER_GR94DBCS; | 327 filter=UCNV_SET_FILTER_GR94DBCS; |
329 +#endif | 328 +#endif |
330 } else { | 329 } else { |
331 filter=UCNV_SET_FILTER_NONE; | 330 filter=UCNV_SET_FILTER_NONE; |
332 } | 331 } |
333 @@ -3831,6 +3884,7 @@ | 332 @@ -3829,6 +3882,7 @@ const UConverterSharedData _ISO2022JPData={ |
334 | 333 |
335 } // namespace | 334 } // namespace |
336 | 335 |
337 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 336 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
338 /************* KR ***************/ | 337 /************* KR ***************/ |
339 static const UConverterImpl _ISO2022KRImpl={ | 338 static const UConverterImpl _ISO2022KRImpl={ |
340 UCNV_ISO_2022, | 339 UCNV_ISO_2022, |
341 @@ -3947,5 +4001,6 @@ | 340 @@ -3945,5 +3999,6 @@ const UConverterSharedData _ISO2022CNData={ |
342 }; | 341 }; |
343 | 342 |
344 } // namespace | 343 } // namespace |
345 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ | 344 +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
346 | 345 |
347 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | 346 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
348 Index: source/common/ucnvbocu.cpp | 347 diff --git a/source/common/ucnv_bld.cpp b/source/common/ucnv_bld.cpp |
349 =================================================================== | 348 index 4940310..047f18a 100644 |
350 --- source/common/ucnvbocu.cpp» (revision 259715) | 349 --- a/source/common/ucnv_bld.cpp |
351 +++ source/common/ucnvbocu.cpp» (working copy) | 350 +++ b/source/common/ucnv_bld.cpp |
352 @@ -19,7 +19,7 @@ | 351 @@ -69,28 +69,41 @@ converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ |
353 | |
354 #include "unicode/utypes.h" | |
355 | |
356 -#if !UCONFIG_NO_CONVERSION | |
357 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
358 | |
359 #include "unicode/ucnv.h" | |
360 #include "unicode/ucnv_cb.h" | |
361 Index: source/common/ucnvisci.c | |
362 =================================================================== | |
363 --- source/common/ucnvisci.c» (revision 259715) | |
364 +++ source/common/ucnvisci.c» (working copy) | |
365 @@ -17,7 +17,7 @@ | |
366 | |
367 #include "unicode/utypes.h" | |
368 | |
369 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
370 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
371 | |
372 #include "unicode/ucnv.h" | |
373 #include "unicode/ucnv_cb.h" | |
374 Index: source/common/ucnvscsu.c | |
375 =================================================================== | |
376 --- source/common/ucnvscsu.c» (revision 259715) | |
377 +++ source/common/ucnvscsu.c» (working copy) | |
378 @@ -21,7 +21,7 @@ | |
379 | |
380 #include "unicode/utypes.h" | |
381 | |
382 -#if !UCONFIG_NO_CONVERSION | |
383 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
384 | |
385 #include "unicode/ucnv.h" | |
386 #include "unicode/ucnv_cb.h" | |
387 Index: source/common/ucnv_u7.c | |
388 =================================================================== | |
389 --- source/common/ucnv_u7.c» (revision 259715) | |
390 +++ source/common/ucnv_u7.c» (working copy) | |
391 @@ -16,7 +16,7 @@ | |
392 | |
393 #include "unicode/utypes.h" | |
394 | |
395 -#if !UCONFIG_NO_CONVERSION | |
396 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | |
397 | |
398 #include "unicode/ucnv.h" | |
399 #include "ucnv_bld.h" | |
400 Index: source/common/unicode/uconfig.h | |
401 =================================================================== | |
402 --- source/common/unicode/uconfig.h» (revision 259715) | |
403 +++ source/common/unicode/uconfig.h» (working copy) | |
404 @@ -265,6 +265,14 @@ | |
405 #endif | |
406 | |
407 /** | |
408 + * This switch turns off all the converters NOT listed in | |
409 + * the encoding standard : http://encoding.spec.whatwg.org | |
410 + */ | |
411 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION | |
412 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 | |
413 +#endif | |
414 + | |
415 +/** | |
416 * \def UCONFIG_NO_LEGACY_CONVERSION | |
417 * This switch turns off all converters except for | |
418 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) | |
419 Index: source/common/ucnv_bld.cpp | |
420 =================================================================== | |
421 --- source/common/ucnv_bld.cpp» (revision 259715) | |
422 +++ source/common/ucnv_bld.cpp» (working copy) | |
423 @@ -69,28 +69,41 @@ | |
424 | 352 |
425 #if UCONFIG_NO_LEGACY_CONVERSION | 353 #if UCONFIG_NO_LEGACY_CONVERSION |
426 NULL, | 354 NULL, |
427 +#else | 355 +#else |
428 + &_ISO2022Data, | 356 + &_ISO2022Data, |
429 +#endif | 357 +#endif |
430 + | 358 + |
431 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 359 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
432 NULL, NULL, NULL, NULL, NULL, NULL, | 360 NULL, NULL, NULL, NULL, NULL, NULL, |
433 NULL, NULL, NULL, NULL, NULL, NULL, | 361 NULL, NULL, NULL, NULL, NULL, NULL, |
(...skipping 24 matching lines...) Expand all Loading... |
458 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, | 386 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, |
459 +#else | 387 +#else |
460 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, | 388 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
461 +#endif | 389 +#endif |
462 | 390 |
463 -#if UCONFIG_NO_LEGACY_CONVERSION | 391 -#if UCONFIG_NO_LEGACY_CONVERSION |
464 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION | 392 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
465 NULL, | 393 NULL, |
466 #else | 394 #else |
467 &_CompoundTextData | 395 &_CompoundTextData |
468 @@ -105,18 +118,24 @@ | 396 @@ -105,18 +118,24 @@ static struct { |
469 const char *name; | 397 const char *name; |
470 const UConverterType type; | 398 const UConverterType type; |
471 } const cnvNameType[] = { | 399 } const cnvNameType[] = { |
472 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 400 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
473 { "bocu1", UCNV_BOCU1 }, | 401 { "bocu1", UCNV_BOCU1 }, |
474 { "cesu8", UCNV_CESU8 }, | 402 { "cesu8", UCNV_CESU8 }, |
475 -#if !UCONFIG_NO_LEGACY_CONVERSION | 403 -#if !UCONFIG_NO_LEGACY_CONVERSION |
476 +#endif | 404 +#endif |
477 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 405 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
478 { "hz",UCNV_HZ }, | 406 { "hz",UCNV_HZ }, |
479 #endif | 407 #endif |
480 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 408 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
481 { "imapmailboxname", UCNV_IMAP_MAILBOX }, | 409 { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
| 410 -#if !UCONFIG_NO_LEGACY_CONVERSION |
482 +#endif | 411 +#endif |
483 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 412 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
484 + { "iscii", UCNV_ISCII }, | 413 { "iscii", UCNV_ISCII }, |
485 +#endif | 414 +#endif |
486 #if !UCONFIG_NO_LEGACY_CONVERSION | 415 +#if !UCONFIG_NO_LEGACY_CONVERSION |
487 - { "iscii", UCNV_ISCII }, | |
488 { "iso2022", UCNV_ISO_2022 }, | 416 { "iso2022", UCNV_ISO_2022 }, |
489 #endif | 417 #endif |
490 { "iso88591", UCNV_LATIN_1 }, | 418 { "iso88591", UCNV_LATIN_1 }, |
491 -#if !UCONFIG_NO_LEGACY_CONVERSION | 419 -#if !UCONFIG_NO_LEGACY_CONVERSION |
492 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION | 420 +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
493 { "lmbcs1", UCNV_LMBCS_1 }, | 421 { "lmbcs1", UCNV_LMBCS_1 }, |
494 { "lmbcs11",UCNV_LMBCS_11 }, | 422 { "lmbcs11",UCNV_LMBCS_11 }, |
495 { "lmbcs16",UCNV_LMBCS_16 }, | 423 { "lmbcs16",UCNV_LMBCS_16 }, |
496 @@ -130,7 +149,9 @@ | 424 @@ -130,7 +149,9 @@ static struct { |
497 { "lmbcs6", UCNV_LMBCS_6 }, | 425 { "lmbcs6", UCNV_LMBCS_6 }, |
498 { "lmbcs8", UCNV_LMBCS_8 }, | 426 { "lmbcs8", UCNV_LMBCS_8 }, |
499 #endif | 427 #endif |
500 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 428 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
501 { "scsu", UCNV_SCSU }, | 429 { "scsu", UCNV_SCSU }, |
502 +#endif | 430 +#endif |
503 { "usascii", UCNV_US_ASCII }, | 431 { "usascii", UCNV_US_ASCII }, |
504 { "utf16", UCNV_UTF16 }, | 432 { "utf16", UCNV_UTF16 }, |
505 { "utf16be", UCNV_UTF16_BigEndian }, | 433 { "utf16be", UCNV_UTF16_BigEndian }, |
506 @@ -152,9 +173,13 @@ | 434 @@ -152,9 +173,13 @@ static struct { |
507 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, | 435 { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
508 { "utf32platformendian", UCNV_UTF32_LittleEndian }, | 436 { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
509 #endif | 437 #endif |
510 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 438 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
511 { "utf7", UCNV_UTF7 }, | 439 { "utf7", UCNV_UTF7 }, |
512 +#endif | 440 +#endif |
513 { "utf8", UCNV_UTF8 }, | 441 { "utf8", UCNV_UTF8 }, |
514 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 442 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
515 { "x11compoundtext", UCNV_COMPOUND_TEXT} | 443 { "x11compoundtext", UCNV_COMPOUND_TEXT} |
516 +#endif | 444 +#endif |
517 }; | 445 }; |
518 | 446 |
519 | 447 |
520 Index: source/common/ucnv_u8.c | 448 diff --git a/source/common/ucnv_cnv.h b/source/common/ucnv_cnv.h |
521 =================================================================== | 449 index 402e2c9..5fad446 100644 |
522 --- source/common/ucnv_u8.c» (revision 259715) | 450 --- a/source/common/ucnv_cnv.h |
523 +++ source/common/ucnv_u8.c» (working copy) | 451 +++ b/source/common/ucnv_cnv.h |
524 @@ -87,6 +87,15 @@ | 452 @@ -256,11 +256,15 @@ struct UConverterImpl { |
| 453 extern const UConverterSharedData |
| 454 _MBCSData, _Latin1Data, |
| 455 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, |
| 456 - _ISO2022Data, |
| 457 + _ISO2022Data, |
| 458 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 459 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, |
| 460 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, |
| 461 _HZData,_ISCIIData, _SCSUData, _ASCIIData, |
| 462 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; |
| 463 +#else |
| 464 + _ASCIIData, _UTF16Data, _UTF32Data; |
| 465 +#endif |
| 466 |
| 467 U_CDECL_END |
| 468 |
| 469 diff --git a/source/common/ucnv_ct.c b/source/common/ucnv_ct.c |
| 470 index ec0e9c2..e723fa6 100644 |
| 471 --- a/source/common/ucnv_ct.c |
| 472 +++ b/source/common/ucnv_ct.c |
| 473 @@ -14,7 +14,7 @@ |
| 474 |
| 475 #include "unicode/utypes.h" |
| 476 |
| 477 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 478 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 479 |
| 480 #include "unicode/ucnv.h" |
| 481 #include "unicode/uset.h" |
| 482 diff --git a/source/common/ucnv_lmb.c b/source/common/ucnv_lmb.c |
| 483 index 1d921dd..a4fccee 100644 |
| 484 --- a/source/common/ucnv_lmb.c |
| 485 +++ b/source/common/ucnv_lmb.c |
| 486 @@ -25,7 +25,7 @@ |
| 487 |
| 488 #include "unicode/utypes.h" |
| 489 |
| 490 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 491 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 492 |
| 493 #include "unicode/ucnv_err.h" |
| 494 #include "unicode/ucnv.h" |
| 495 diff --git a/source/common/ucnv_u7.c b/source/common/ucnv_u7.c |
| 496 index 42943f4..6466b87 100644 |
| 497 --- a/source/common/ucnv_u7.c |
| 498 +++ b/source/common/ucnv_u7.c |
| 499 @@ -16,7 +16,7 @@ |
| 500 |
| 501 #include "unicode/utypes.h" |
| 502 |
| 503 -#if !UCONFIG_NO_CONVERSION |
| 504 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 505 |
| 506 #include "unicode/ucnv.h" |
| 507 #include "ucnv_bld.h" |
| 508 diff --git a/source/common/ucnv_u8.c b/source/common/ucnv_u8.c |
| 509 index 8ee9fe5..24205f5 100644 |
| 510 --- a/source/common/ucnv_u8.c |
| 511 +++ b/source/common/ucnv_u8.c |
| 512 @@ -87,6 +87,15 @@ static const int8_t bytesFromUTF8[256] = { |
525 static const uint32_t | 513 static const uint32_t |
526 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; | 514 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
527 | 515 |
528 +static UBool hasCESU8Data(const UConverter *cnv) | 516 +static UBool hasCESU8Data(const UConverter *cnv) |
529 +{ | 517 +{ |
530 +#if UCONFIG_NO_NON_HTML5_CONVERSION | 518 +#if UCONFIG_NO_NON_HTML5_CONVERSION |
531 + return FALSE; | 519 + return FALSE; |
532 +#else | 520 +#else |
533 + return (UBool)(cnv->sharedData == &_CESU8Data); | 521 + return (UBool)(cnv->sharedData == &_CESU8Data); |
534 +#endif | 522 +#endif |
535 +} | 523 +} |
536 + | 524 + |
537 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, | 525 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, |
538 UErrorCode * err) | 526 UErrorCode * err) |
539 { | 527 { |
540 @@ -96,10 +105,10 @@ | 528 @@ -96,10 +105,10 @@ static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs *
args, |
541 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 529 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
542 const UChar *targetLimit = args->targetLimit; | 530 const UChar *targetLimit = args->targetLimit; |
543 unsigned char *toUBytes = cnv->toUBytes; | 531 unsigned char *toUBytes = cnv->toUBytes; |
544 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 532 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); |
545 + UBool isCESU8 = hasCESU8Data(cnv); | 533 + UBool isCESU8 = hasCESU8Data(cnv); |
546 uint32_t ch, ch2 = 0; | 534 uint32_t ch, ch2 = 0; |
547 int32_t i, inBytes; | 535 int32_t i, inBytes; |
548 - | 536 - |
549 + | 537 + |
550 /* Restore size of current sequence */ | 538 /* Restore size of current sequence */ |
551 if (cnv->toUnicodeStatus && myTarget < targetLimit) | 539 if (cnv->toUnicodeStatus && myTarget < targetLimit) |
552 { | 540 { |
553 @@ -226,7 +235,7 @@ | 541 @@ -226,7 +235,7 @@ static void ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToU
nicodeArgs * args, |
554 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; | 542 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
555 const UChar *targetLimit = args->targetLimit; | 543 const UChar *targetLimit = args->targetLimit; |
556 unsigned char *toUBytes = cnv->toUBytes; | 544 unsigned char *toUBytes = cnv->toUBytes; |
557 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); | 545 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); |
558 + UBool isCESU8 = hasCESU8Data(cnv); | 546 + UBool isCESU8 = hasCESU8Data(cnv); |
559 uint32_t ch, ch2 = 0; | 547 uint32_t ch, ch2 = 0; |
560 int32_t i, inBytes; | 548 int32_t i, inBytes; |
561 | 549 |
562 @@ -357,7 +366,7 @@ | 550 @@ -357,7 +366,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArg
s * args, |
563 UChar32 ch; | 551 UChar32 ch; |
564 uint8_t tempBuf[4]; | 552 uint8_t tempBuf[4]; |
565 int32_t indexToWrite; | 553 int32_t indexToWrite; |
566 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 554 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
567 + UBool isNotCESU8 = !hasCESU8Data(cnv); | 555 + UBool isNotCESU8 = !hasCESU8Data(cnv); |
568 | 556 |
569 if (cnv->fromUChar32 && myTarget < targetLimit) | 557 if (cnv->fromUChar32 && myTarget < targetLimit) |
570 { | 558 { |
571 @@ -473,7 +482,7 @@ | 559 @@ -473,7 +482,7 @@ U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverter
FromUnicodeArgs * ar |
572 int32_t offsetNum, nextSourceIndex; | 560 int32_t offsetNum, nextSourceIndex; |
573 int32_t indexToWrite; | 561 int32_t indexToWrite; |
574 uint8_t tempBuf[4]; | 562 uint8_t tempBuf[4]; |
575 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); | 563 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
576 + UBool isNotCESU8 = !hasCESU8Data(cnv); | 564 + UBool isNotCESU8 = !hasCESU8Data(cnv); |
577 | 565 |
578 if (cnv->fromUChar32 && myTarget < targetLimit) | 566 if (cnv->fromUChar32 && myTarget < targetLimit) |
579 { | 567 { |
580 Index: source/common/unicode/urename.h | 568 diff --git a/source/common/ucnvbocu.cpp b/source/common/ucnvbocu.cpp |
581 =================================================================== | 569 index b97d666..281d6d9 100644 |
582 --- source/common/unicode/urename.h» (revision 259715) | 570 --- a/source/common/ucnvbocu.cpp |
583 +++ source/common/unicode/urename.h» (working copy) | 571 +++ b/source/common/ucnvbocu.cpp |
| 572 @@ -19,7 +19,7 @@ |
| 573 |
| 574 #include "unicode/utypes.h" |
| 575 |
| 576 -#if !UCONFIG_NO_CONVERSION |
| 577 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 578 |
| 579 #include "unicode/ucnv.h" |
| 580 #include "unicode/ucnv_cb.h" |
| 581 diff --git a/source/common/ucnvhz.c b/source/common/ucnvhz.c |
| 582 index 3760c39..51825e2 100644 |
| 583 --- a/source/common/ucnvhz.c |
| 584 +++ b/source/common/ucnvhz.c |
| 585 @@ -16,7 +16,7 @@ |
| 586 |
| 587 #include "unicode/utypes.h" |
| 588 |
| 589 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 590 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 591 |
| 592 #include "cmemory.h" |
| 593 #include "unicode/ucnv.h" |
| 594 @@ -635,4 +635,4 @@ const UConverterSharedData _HZData={ |
| 595 0 |
| 596 }; |
| 597 |
| 598 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| 599 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_NO_NON_HTML5_CONVERSION */ |
| 600 diff --git a/source/common/ucnvisci.c b/source/common/ucnvisci.c |
| 601 index fe61d40..16fd0a3 100644 |
| 602 --- a/source/common/ucnvisci.c |
| 603 +++ b/source/common/ucnvisci.c |
| 604 @@ -17,7 +17,7 @@ |
| 605 |
| 606 #include "unicode/utypes.h" |
| 607 |
| 608 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| 609 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION |
| 610 |
| 611 #include "unicode/ucnv.h" |
| 612 #include "unicode/ucnv_cb.h" |
| 613 diff --git a/source/common/ucnvscsu.c b/source/common/ucnvscsu.c |
| 614 index c6e96e1..a6f8c9e 100644 |
| 615 --- a/source/common/ucnvscsu.c |
| 616 +++ b/source/common/ucnvscsu.c |
| 617 @@ -21,7 +21,7 @@ |
| 618 |
| 619 #include "unicode/utypes.h" |
| 620 |
| 621 -#if !UCONFIG_NO_CONVERSION |
| 622 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| 623 |
| 624 #include "unicode/ucnv.h" |
| 625 #include "unicode/ucnv_cb.h" |
| 626 diff --git a/source/common/unicode/uconfig.h b/source/common/unicode/uconfig.h |
| 627 index ed073b6..8df56e6 100644 |
| 628 --- a/source/common/unicode/uconfig.h |
| 629 +++ b/source/common/unicode/uconfig.h |
| 630 @@ -270,6 +270,14 @@ |
| 631 #endif |
| 632 |
| 633 /** |
| 634 + * This switch turns off all the converters NOT listed in |
| 635 + * the encoding standard : http://encoding.spec.whatwg.org |
| 636 + */ |
| 637 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION |
| 638 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 |
| 639 +#endif |
| 640 + |
| 641 +/** |
| 642 * \def UCONFIG_NO_LEGACY_CONVERSION |
| 643 * This switch turns off all converters except for |
| 644 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) |
| 645 diff --git a/source/common/unicode/urename.h b/source/common/unicode/urename.h |
| 646 index a817262..89becca 100644 |
| 647 --- a/source/common/unicode/urename.h |
| 648 +++ b/source/common/unicode/urename.h |
584 @@ -73,12 +73,14 @@ | 649 @@ -73,12 +73,14 @@ |
585 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) | 650 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) |
586 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) | 651 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) |
587 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) | 652 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) |
588 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 653 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
589 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) | 654 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) |
590 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) | 655 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) |
591 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) | 656 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) |
592 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) | 657 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) |
593 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) | 658 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) |
(...skipping 14 matching lines...) Expand all Loading... |
608 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) | 673 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) |
609 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) | 674 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) |
610 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) | 675 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) |
611 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) | 676 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) |
612 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 677 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
613 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) | 678 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) |
614 +#endif | 679 +#endif |
615 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) | 680 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) |
616 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) | 681 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) |
617 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) | 682 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) |
618 Index: source/common/ucnv_cnv.h | 683 diff --git a/source/i18n/csdetect.cpp b/source/i18n/csdetect.cpp |
619 =================================================================== | 684 index 3efbd49..ba5b18c 100644 |
620 --- source/common/ucnv_cnv.h» (revision 259715) | 685 --- a/source/i18n/csdetect.cpp |
621 +++ source/common/ucnv_cnv.h» (working copy) | 686 +++ b/source/i18n/csdetect.cpp |
622 @@ -256,11 +256,15 @@ | 687 @@ -110,6 +110,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { |
623 extern const UConverterSharedData | 688 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), |
624 _MBCSData, _Latin1Data, | 689 |
625 _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, | 690 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), |
626 - _ISO2022Data, | |
627 + _ISO2022Data, | |
628 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 691 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
629 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6
, | 692 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), |
630 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData1
9, | 693 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), |
631 _HZData,_ISCIIData, _SCSUData, _ASCIIData, | 694 |
632 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _Comp
oundTextData; | 695 @@ -117,6 +118,7 @@ static void U_CALLCONV initRecognizers(UErrorCode &status) { |
633 +#else | 696 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), |
634 + _ASCIIData, _UTF16Data, _UTF32Data; | 697 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), |
| 698 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) |
635 +#endif | 699 +#endif |
| 700 }; |
| 701 int32_t rCount = ARRAY_SIZE(tempArray); |
636 | 702 |
637 U_CDECL_END | 703 diff --git a/source/i18n/csr2022.cpp b/source/i18n/csr2022.cpp |
638 | 704 index 3db0bc9..be3eafa 100644 |
639 Index: source/common/ucnv_lmb.c | 705 --- a/source/i18n/csr2022.cpp |
640 =================================================================== | 706 +++ b/source/i18n/csr2022.cpp |
641 --- source/common/ucnv_lmb.c» (revision 291619) | 707 @@ -119,6 +119,7 @@ static const uint8_t escapeSequences_2022JP[][5] = { |
642 +++ source/common/ucnv_lmb.c» (working copy) | |
643 @@ -25,7 +25,7 @@ | |
644 | |
645 #include "unicode/utypes.h" | |
646 | |
647 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
648 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
649 | |
650 #include "unicode/ucnv_err.h" | |
651 #include "unicode/ucnv.h" | |
652 Index: source/common/ucnvhz.c | |
653 =================================================================== | |
654 --- source/common/ucnvhz.c» (revision 291619) | |
655 +++ source/common/ucnvhz.c» (working copy) | |
656 @@ -16,7 +16,7 @@ | |
657 | |
658 #include "unicode/utypes.h" | |
659 | |
660 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
661 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
662 | |
663 #include "cmemory.h" | |
664 #include "unicode/ucnv.h" | |
665 @@ -637,4 +637,4 @@ | |
666 0 | |
667 }; | |
668 | |
669 -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ | |
670 +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONF
IG_NO_NON_HTML5_CONVERSION */ | |
671 Index: source/common/ucnv_ct.c | |
672 =================================================================== | |
673 --- source/common/ucnv_ct.c» (revision 291619) | |
674 +++ source/common/ucnv_ct.c» (working copy) | |
675 @@ -14,7 +14,7 @@ | |
676 | |
677 #include "unicode/utypes.h" | |
678 | |
679 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION | |
680 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_
HTML5_CONVERSION | |
681 | |
682 #include "unicode/ucnv.h" | |
683 #include "unicode/uset.h" | |
684 Index: source/i18n/csrsbcs.h | |
685 =================================================================== | |
686 --- source/i18n/csrsbcs.h» (revision 291619) | |
687 +++ source/i18n/csrsbcs.h» (working copy) | |
688 @@ -50,6 +50,7 @@ | |
689 | |
690 }; | |
691 | |
692 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
693 class NGramParser_IBM420 : public NGramParser | |
694 { | |
695 private: | |
696 @@ -61,6 +62,7 @@ | |
697 public: | |
698 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); | |
699 }; | |
700 +#endif | |
701 | |
702 | |
703 class CharsetRecog_sbcs : public CharsetRecognizer | |
704 @@ -229,6 +231,7 @@ | |
705 virtual UBool match(InputText *det, CharsetMatch *results) const; | |
706 }; | |
707 | |
708 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
709 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs | |
710 { | |
711 public: | |
712 @@ -280,6 +283,7 @@ | |
713 | |
714 virtual UBool match(InputText *det, CharsetMatch *results) const; | |
715 }; | |
716 +#endif | |
717 | |
718 U_NAMESPACE_END | |
719 | |
720 Index: source/i18n/csr2022.h | |
721 =================================================================== | |
722 --- source/i18n/csr2022.h» (revision 291619) | |
723 +++ source/i18n/csr2022.h» (working copy) | |
724 @@ -65,6 +65,7 @@ | |
725 UBool match(InputText *textIn, CharsetMatch *results) const; | |
726 }; | |
727 | |
728 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | |
729 class CharsetRecog_2022KR :public CharsetRecog_2022 { | |
730 public: | |
731 virtual ~CharsetRecog_2022KR(); | |
732 @@ -84,6 +85,7 @@ | |
733 | |
734 UBool match(InputText *textIn, CharsetMatch *results) const; | |
735 }; | |
736 +#endif | |
737 | |
738 U_NAMESPACE_END | |
739 | |
740 Index: source/i18n/csr2022.cpp | |
741 =================================================================== | |
742 --- source/i18n/csr2022.cpp» (revision 291619) | |
743 +++ source/i18n/csr2022.cpp» (working copy) | |
744 @@ -119,6 +119,7 @@ | |
745 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 | 708 {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
746 }; | 709 }; |
747 | 710 |
748 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 711 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
749 static const uint8_t escapeSequences_2022KR[][5] = { | 712 static const uint8_t escapeSequences_2022KR[][5] = { |
750 {0x1b, 0x24, 0x29, 0x43, 0x00} | 713 {0x1b, 0x24, 0x29, 0x43, 0x00} |
751 }; | 714 }; |
752 @@ -136,6 +137,7 @@ | 715 @@ -136,6 +137,7 @@ static const uint8_t escapeSequences_2022CN[][5] = { |
753 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 | 716 {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
754 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 | 717 {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
755 }; | 718 }; |
756 +#endif | 719 +#endif |
757 | 720 |
758 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} | 721 CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
759 | 722 |
760 @@ -152,6 +154,7 @@ | 723 @@ -152,6 +154,7 @@ UBool CharsetRecog_2022JP::match(InputText *textIn, CharsetM
atch *results) const |
761 return (confidence > 0); | 724 return (confidence > 0); |
762 } | 725 } |
763 | 726 |
764 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 727 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
765 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} | 728 CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
766 | 729 |
767 const char *CharsetRecog_2022KR::getName() const { | 730 const char *CharsetRecog_2022KR::getName() const { |
768 @@ -181,6 +184,7 @@ | 731 @@ -181,6 +184,7 @@ UBool CharsetRecog_2022CN::match(InputText *textIn, CharsetM
atch *results) const |
769 results->set(textIn, this, confidence); | 732 results->set(textIn, this, confidence); |
770 return (confidence > 0); | 733 return (confidence > 0); |
771 } | 734 } |
772 +#endif | 735 +#endif |
773 | 736 |
774 CharsetRecog_2022::~CharsetRecog_2022() { | 737 CharsetRecog_2022::~CharsetRecog_2022() { |
775 // nothing to do | 738 // nothing to do |
776 Index: source/i18n/csdetect.cpp | 739 diff --git a/source/i18n/csr2022.h b/source/i18n/csr2022.h |
777 =================================================================== | 740 index 2ac2b87..dad22c7 100644 |
778 --- source/i18n/csdetect.cpp» (revision 291619) | 741 --- a/source/i18n/csr2022.h |
779 +++ source/i18n/csdetect.cpp» (working copy) | 742 +++ b/source/i18n/csr2022.h |
780 @@ -110,6 +110,7 @@ | 743 @@ -65,6 +65,7 @@ public: |
781 new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), | 744 UBool match(InputText *textIn, CharsetMatch *results) const; |
| 745 }; |
782 | 746 |
783 new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), | |
784 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 747 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
785 new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), | 748 class CharsetRecog_2022KR :public CharsetRecog_2022 { |
786 new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), | 749 public: |
| 750 virtual ~CharsetRecog_2022KR(); |
| 751 @@ -84,6 +85,7 @@ public: |
787 | 752 |
788 @@ -117,6 +118,7 @@ | 753 UBool match(InputText *textIn, CharsetMatch *results) const; |
789 new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), | 754 }; |
790 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), | |
791 new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) | |
792 +#endif | 755 +#endif |
793 }; | |
794 int32_t rCount = ARRAY_SIZE(tempArray); | |
795 | 756 |
796 Index: source/i18n/csrsbcs.cpp | 757 U_NAMESPACE_END |
797 =================================================================== | 758 |
798 --- source/i18n/csrsbcs.cpp» (revision 291619) | 759 diff --git a/source/i18n/csrsbcs.cpp b/source/i18n/csrsbcs.cpp |
799 +++ source/i18n/csrsbcs.cpp» (working copy) | 760 index d03367c..7b70dc1 100644 |
800 @@ -137,6 +137,7 @@ | 761 --- a/source/i18n/csrsbcs.cpp |
| 762 +++ b/source/i18n/csrsbcs.cpp |
| 763 @@ -137,6 +137,7 @@ int32_t NGramParser::parse(InputText *det) |
801 return (int32_t) (rawPercent * 300.0); | 764 return (int32_t) (rawPercent * 300.0); |
802 } | 765 } |
803 | 766 |
804 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 767 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
805 static const uint8_t unshapeMap_IBM420[] = { | 768 static const uint8_t unshapeMap_IBM420[] = { |
806 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ | 769 /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A
-B -C -D -E -F */ |
807 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, | 770 /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
x40, 0x40, 0x40, 0x40, 0x40, |
808 @@ -232,6 +233,7 @@ | 771 @@ -232,6 +233,7 @@ void NGramParser_IBM420::parseCharacters(InputText *det) |
809 } | 772 } |
810 } | 773 } |
811 } | 774 } |
812 +#endif | 775 +#endif |
813 | 776 |
814 CharsetRecog_sbcs::CharsetRecog_sbcs() | 777 CharsetRecog_sbcs::CharsetRecog_sbcs() |
815 { | 778 { |
816 @@ -624,6 +626,7 @@ | 779 @@ -624,6 +626,7 @@ static const uint8_t charMap_KOI8_R[] = { |
817 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, | 780 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
818 }; | 781 }; |
819 | 782 |
820 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 783 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
821 static const int32_t ngrams_IBM424_he_rtl[] = { | 784 static const int32_t ngrams_IBM424_he_rtl[] = { |
822 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, | 785 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404
546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405
641, |
823 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, | 786 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454
056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514
045, |
824 @@ -691,6 +694,7 @@ | 787 @@ -691,6 +694,7 @@ static const uint8_t charMap_IBM420_ar[]= { |
825 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, | 788 /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0
xEB, 0x40, 0xED, 0xEE, 0xEF, |
826 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, | 789 /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0
xFB, 0xFC, 0xFD, 0xFE, 0x40, |
827 }; | 790 }; |
828 +#endif | 791 +#endif |
829 | 792 |
830 //ISO-8859-1,2,5,6,7,8,9 Ngrams | 793 //ISO-8859-1,2,5,6,7,8,9 Ngrams |
831 | 794 |
832 @@ -1155,6 +1159,7 @@ | 795 @@ -1155,6 +1159,7 @@ UBool CharsetRecog_KOI8_R::match(InputText *textIn, Charse
tMatch *results) const |
833 return (confidence > 0); | 796 return (confidence > 0); |
834 } | 797 } |
835 | 798 |
836 +#if !UCONFIG_NO_NON_HTML5_CONVERSION | 799 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
837 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() | 800 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() |
838 { | 801 { |
839 // nothing to do | 802 // nothing to do |
840 @@ -1253,6 +1258,7 @@ | 803 @@ -1253,6 +1258,7 @@ UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn,
CharsetMatch *results |
841 results->set(textIn, this, confidence); | 804 results->set(textIn, this, confidence); |
842 return (confidence > 0); | 805 return (confidence > 0); |
843 } | 806 } |
844 +#endif | 807 +#endif |
845 | 808 |
846 U_NAMESPACE_END | 809 U_NAMESPACE_END |
847 #endif | 810 #endif |
| 811 diff --git a/source/i18n/csrsbcs.h b/source/i18n/csrsbcs.h |
| 812 index 2579c02..7789f9b 100644 |
| 813 --- a/source/i18n/csrsbcs.h |
| 814 +++ b/source/i18n/csrsbcs.h |
| 815 @@ -50,6 +50,7 @@ public: |
| 816 |
| 817 }; |
| 818 |
| 819 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 820 class NGramParser_IBM420 : public NGramParser |
| 821 { |
| 822 private: |
| 823 @@ -61,6 +62,7 @@ private: |
| 824 public: |
| 825 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); |
| 826 }; |
| 827 +#endif |
| 828 |
| 829 |
| 830 class CharsetRecog_sbcs : public CharsetRecognizer |
| 831 @@ -229,6 +231,7 @@ public: |
| 832 virtual UBool match(InputText *det, CharsetMatch *results) const; |
| 833 }; |
| 834 |
| 835 +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| 836 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs |
| 837 { |
| 838 public: |
| 839 @@ -280,6 +283,7 @@ class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM42
0_ar { |
| 840 |
| 841 virtual UBool match(InputText *det, CharsetMatch *results) const; |
| 842 }; |
| 843 +#endif |
| 844 |
| 845 U_NAMESPACE_END |
| 846 |
OLD | NEW |