Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: Source/wtf/text/TextCodecICU.cpp

Issue 1167523003: Define a variable to distinguish system_icu from bundled_icu in Blink (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: fix the comment in BUILD.gn Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « Source/wtf/text/TextCodecICU.h ('k') | Source/wtf/unicode/CharacterNames.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
64 // We register Hebrew with logical ordering using a separate name. 64 // We register Hebrew with logical ordering using a separate name.
65 // Otherwise, this would share the same canonical name as the 65 // Otherwise, this would share the same canonical name as the
66 // visual ordering case, and then TextEncoding could not tell them 66 // visual ordering case, and then TextEncoding could not tell them
67 // apart; ICU treats these names as synonyms. 67 // apart; ICU treats these names as synonyms.
68 registrar("ISO-8859-8-I", "ISO-8859-8-I"); 68 registrar("ISO-8859-8-I", "ISO-8859-8-I");
69 69
70 int32_t numEncodings = ucnv_countAvailable(); 70 int32_t numEncodings = ucnv_countAvailable();
71 for (int32_t i = 0; i < numEncodings; ++i) { 71 for (int32_t i = 0; i < numEncodings; ++i) {
72 const char* name = ucnv_getAvailableName(i); 72 const char* name = ucnv_getAvailableName(i);
73 UErrorCode error = U_ZERO_ERROR; 73 UErrorCode error = U_ZERO_ERROR;
74 // Try MIME before trying IANA to pick up commonly used names like 74 #if !defined(USING_SYSTEM_ICU)
75 // 'EUC-JP' instead of horrendously long names like 75 const char* primaryStandard = "HTML";
76 // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. 76 const char* secondaryStandard = "MIME";
77 const char* standardName = ucnv_getStandardName(name, "MIME", &error); 77 #else
78 if (!U_SUCCESS(error) || !standardName) { 78 const char* primaryStandard = "MIME";
79 const char* secondaryStandard = "IANA";
80 #endif
81 const char* standardName = ucnv_getStandardName(name, primaryStandard, & error);
82 if (U_FAILURE(error) || !standardName) {
79 error = U_ZERO_ERROR; 83 error = U_ZERO_ERROR;
80 // Try IANA to pick up 'windows-12xx' and other names 84 // Try IANA to pick up 'windows-12xx' and other names
81 // which are not preferred MIME names but are widely used. 85 // which are not preferred MIME names but are widely used.
82 standardName = ucnv_getStandardName(name, "IANA", &error); 86 standardName = ucnv_getStandardName(name, secondaryStandard, &error) ;
83 if (!U_SUCCESS(error) || !standardName) 87 if (U_FAILURE(error) || !standardName)
84 continue; 88 continue;
85 } 89 }
86 90
87 // A number of these aliases are handled in Chrome's copy of ICU, but 91 // A number of these aliases are handled in Chrome's copy of ICU, but
88 // Chromium can be compiled with the system ICU. 92 // Chromium can be compiled with the system ICU.
89 93
90 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. 94 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
91 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding 95 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
92 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too. 96 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.
97 #if defined(USING_SYSTEM_ICU)
93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 ")) 98 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 "))
94 standardName = "GBK"; 99 standardName = "GBK";
95 // Similarly, EUC-KR encodings all map to an extended version, but 100 // Similarly, EUC-KR encodings all map to an extended version, but
96 // per HTML5, the canonical name still should be EUC-KR. 101 // per HTML5, the canonical name still should be EUC-KR.
97 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363")) 102 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363"))
98 standardName = "EUC-KR"; 103 standardName = "EUC-KR";
99 // And so on. 104 // And so on.
100 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6. 105 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.
101 standardName = "windows-1254"; 106 standardName = "windows-1254";
102 else if (!strcmp(standardName, "TIS-620")) 107 else if (!strcmp(standardName, "TIS-620"))
103 standardName = "windows-874"; 108 standardName = "windows-874";
109 #endif
104 110
105 registrar(standardName, standardName); 111 registrar(standardName, standardName);
106 112
107 uint16_t numAliases = ucnv_countAliases(name, &error); 113 uint16_t numAliases = ucnv_countAliases(name, &error);
108 ASSERT(U_SUCCESS(error)); 114 ASSERT(U_SUCCESS(error));
109 if (U_SUCCESS(error)) 115 if (U_SUCCESS(error))
110 for (uint16_t j = 0; j < numAliases; ++j) { 116 for (uint16_t j = 0; j < numAliases; ++j) {
111 error = U_ZERO_ERROR; 117 error = U_ZERO_ERROR;
112 const char* alias = ucnv_getAlias(name, j, &error); 118 const char* alias = ucnv_getAlias(name, j, &error);
113 ASSERT(U_SUCCESS(error)); 119 ASSERT(U_SUCCESS(error));
114 if (U_SUCCESS(error) && alias != standardName) 120 if (U_SUCCESS(error) && alias != standardName)
115 registrar(alias, standardName); 121 registrar(alias, standardName);
116 } 122 }
117 } 123 }
118 124
125 // These two entries have to be added here because ICU's converter table
126 // cannot have both ISO-8859-8-I and ISO-8859-8.
127 registrar("csISO88598I", "ISO-8859-8-I");
128 registrar("logical", "ISO-8859-8-I");
129
130 #if defined(USING_SYSTEM_ICU)
119 // Additional alias for MacCyrillic not present in ICU. 131 // Additional alias for MacCyrillic not present in ICU.
120 registrar("maccyrillic", "x-mac-cyrillic"); 132 registrar("maccyrillic", "x-mac-cyrillic");
121 133
122 // Additional aliases that historically were present in the encoding 134 // Additional aliases that historically were present in the encoding
123 // table in WebKit on Macintosh that don't seem to be present in ICU. 135 // table in WebKit on Macintosh that don't seem to be present in ICU.
124 // Perhaps we can prove these are not used on the web and remove them. 136 // Perhaps we can prove these are not used on the web and remove them.
125 // Or perhaps we can get them added to ICU. 137 // Or perhaps we can get them added to ICU.
126 registrar("x-mac-roman", "macintosh"); 138 registrar("x-mac-roman", "macintosh");
127 registrar("x-mac-ukrainian", "x-mac-cyrillic"); 139 registrar("x-mac-ukrainian", "x-mac-cyrillic");
128 registrar("cn-big5", "Big5"); 140 registrar("cn-big5", "Big5");
129 registrar("x-x-big5", "Big5"); 141 registrar("x-x-big5", "Big5");
130 registrar("cn-gb", "GBK"); 142 registrar("cn-gb", "GBK");
131 registrar("csgb231280", "GBK"); 143 registrar("csgb231280", "GBK");
132 registrar("x-euc-cn", "GBK"); 144 registrar("x-euc-cn", "GBK");
133 registrar("x-gbk", "GBK"); 145 registrar("x-gbk", "GBK");
134 registrar("csISO88598I", "ISO-8859-8-I");
135 registrar("koi", "KOI8-R"); 146 registrar("koi", "KOI8-R");
136 registrar("logical", "ISO-8859-8-I");
137 registrar("visual", "ISO-8859-8"); 147 registrar("visual", "ISO-8859-8");
138 registrar("winarabic", "windows-1256"); 148 registrar("winarabic", "windows-1256");
139 registrar("winbaltic", "windows-1257"); 149 registrar("winbaltic", "windows-1257");
140 registrar("wincyrillic", "windows-1251"); 150 registrar("wincyrillic", "windows-1251");
141 registrar("iso-8859-11", "windows-874"); 151 registrar("iso-8859-11", "windows-874");
142 registrar("iso8859-11", "windows-874"); 152 registrar("iso8859-11", "windows-874");
143 registrar("dos-874", "windows-874"); 153 registrar("dos-874", "windows-874");
144 registrar("wingreek", "windows-1253"); 154 registrar("wingreek", "windows-1253");
145 registrar("winhebrew", "windows-1255"); 155 registrar("winhebrew", "windows-1255");
146 registrar("winlatin2", "windows-1250"); 156 registrar("winlatin2", "windows-1250");
(...skipping 22 matching lines...) Expand all
169 registrar("ISO8859-13", "ISO-8859-13"); 179 registrar("ISO8859-13", "ISO-8859-13");
170 registrar("ISO8859-14", "ISO-8859-14"); 180 registrar("ISO8859-14", "ISO-8859-14");
171 registrar("ISO8859-15", "ISO-8859-15"); 181 registrar("ISO8859-15", "ISO-8859-15");
172 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label 182 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label
173 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ). 183 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ).
174 184
175 // Additional aliases present in the WHATWG Encoding Standard 185 // Additional aliases present in the WHATWG Encoding Standard
176 // and Firefox (as of Oct 2014), but not in the upstream ICU. 186 // and Firefox (as of Oct 2014), but not in the upstream ICU.
177 // Three entries for windows-1252 need not be listed here because 187 // Three entries for windows-1252 need not be listed here because
178 // TextCodecLatin1 registers them. 188 // TextCodecLatin1 registers them.
179 // FIXME: We may introduce SYSTEM_ICU and enclose this block
180 // with |#if SYSTEM_ICU| because Chromium's ICU has them all.
181 registrar("csiso58gb231280", "GBK"); 189 registrar("csiso58gb231280", "GBK");
182 registrar("csiso88596e", "ISO-8859-6"); 190 registrar("csiso88596e", "ISO-8859-6");
183 registrar("csiso88596i", "ISO-8859-6"); 191 registrar("csiso88596i", "ISO-8859-6");
184 registrar("csiso88598e", "ISO-8859-8"); 192 registrar("csiso88598e", "ISO-8859-8");
185 registrar("gb_2312", "GBK"); 193 registrar("gb_2312", "GBK");
186 registrar("iso88592", "ISO-8859-2"); 194 registrar("iso88592", "ISO-8859-2");
187 registrar("iso88593", "ISO-8859-3"); 195 registrar("iso88593", "ISO-8859-3");
188 registrar("iso88594", "ISO-8859-4"); 196 registrar("iso88594", "ISO-8859-4");
189 registrar("iso88595", "ISO-8859-5"); 197 registrar("iso88595", "ISO-8859-5");
190 registrar("iso88596", "ISO-8859-6"); 198 registrar("iso88596", "ISO-8859-6");
(...skipping 14 matching lines...) Expand all
205 registrar("iso_8859-8", "ISO-8859-8"); 213 registrar("iso_8859-8", "ISO-8859-8");
206 registrar("iso_8859-9", "windows-1254"); 214 registrar("iso_8859-9", "windows-1254");
207 registrar("iso_8859-15", "ISO-8859-15"); 215 registrar("iso_8859-15", "ISO-8859-15");
208 registrar("koi8_r", "KOI8-R"); 216 registrar("koi8_r", "KOI8-R");
209 registrar("x-cp1253", "windows-1253"); 217 registrar("x-cp1253", "windows-1253");
210 registrar("x-cp1254", "windows-1254"); 218 registrar("x-cp1254", "windows-1254");
211 registrar("x-cp1255", "windows-1255"); 219 registrar("x-cp1255", "windows-1255");
212 registrar("x-cp1256", "windows-1256"); 220 registrar("x-cp1256", "windows-1256");
213 registrar("x-cp1257", "windows-1257"); 221 registrar("x-cp1257", "windows-1257");
214 registrar("x-cp1258", "windows-1258"); 222 registrar("x-cp1258", "windows-1258");
223 #endif
215 } 224 }
216 225
217 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar) 226 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
218 { 227 {
219 // See comment above in registerEncodingNames. 228 // See comment above in registerEncodingNames.
220 registrar("ISO-8859-8-I", create, 0); 229 registrar("ISO-8859-8-I", create, 0);
221 230
222 int32_t numEncodings = ucnv_countAvailable(); 231 int32_t numEncodings = ucnv_countAvailable();
223 for (int32_t i = 0; i < numEncodings; ++i) { 232 for (int32_t i = 0; i < numEncodings; ++i) {
224 const char* name = ucnv_getAvailableName(i); 233 const char* name = ucnv_getAvailableName(i);
225 UErrorCode error = U_ZERO_ERROR; 234 UErrorCode error = U_ZERO_ERROR;
226 const char* standardName = ucnv_getStandardName(name, "MIME", &error); 235 const char* standardName = ucnv_getStandardName(name, "MIME", &error);
227 if (!U_SUCCESS(error) || !standardName) { 236 if (!U_SUCCESS(error) || !standardName) {
228 error = U_ZERO_ERROR; 237 error = U_ZERO_ERROR;
229 standardName = ucnv_getStandardName(name, "IANA", &error); 238 standardName = ucnv_getStandardName(name, "IANA", &error);
230 if (!U_SUCCESS(error) || !standardName) 239 if (!U_SUCCESS(error) || !standardName)
231 continue; 240 continue;
232 } 241 }
233 registrar(standardName, create, 0); 242 registrar(standardName, create, 0);
234 } 243 }
235 } 244 }
236 245
237 TextCodecICU::TextCodecICU(const TextEncoding& encoding) 246 TextCodecICU::TextCodecICU(const TextEncoding& encoding)
238 : m_encoding(encoding) 247 : m_encoding(encoding)
239 , m_converterICU(0) 248 , m_converterICU(0)
249 #if defined(USING_SYSTEM_ICU)
240 , m_needsGBKFallbacks(false) 250 , m_needsGBKFallbacks(false)
251 #endif
241 { 252 {
242 } 253 }
243 254
244 TextCodecICU::~TextCodecICU() 255 TextCodecICU::~TextCodecICU()
245 { 256 {
246 releaseICUConverter(); 257 releaseICUConverter();
247 } 258 }
248 259
249 void TextCodecICU::releaseICUConverter() const 260 void TextCodecICU::releaseICUConverter() const
250 { 261 {
251 if (m_converterICU) { 262 if (m_converterICU) {
252 UConverter*& cachedConverter = cachedConverterICU(); 263 UConverter*& cachedConverter = cachedConverterICU();
253 if (cachedConverter) 264 if (cachedConverter)
254 ucnv_close(cachedConverter); 265 ucnv_close(cachedConverter);
255 cachedConverter = m_converterICU; 266 cachedConverter = m_converterICU;
256 m_converterICU = 0; 267 m_converterICU = 0;
257 } 268 }
258 } 269 }
259 270
260 void TextCodecICU::createICUConverter() const 271 void TextCodecICU::createICUConverter() const
261 { 272 {
262 ASSERT(!m_converterICU); 273 ASSERT(!m_converterICU);
263 274
275 #if defined(USING_SYSTEM_ICU)
264 const char* name = m_encoding.name(); 276 const char* name = m_encoding.name();
265 m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3]; 277 m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
278 #endif
266 279
267 UErrorCode err; 280 UErrorCode err;
268 281
269 UConverter*& cachedConverter = cachedConverterICU(); 282 UConverter*& cachedConverter = cachedConverterICU();
270 if (cachedConverter) { 283 if (cachedConverter) {
271 err = U_ZERO_ERROR; 284 err = U_ZERO_ERROR;
272 const char* cachedName = ucnv_getName(cachedConverter, &err); 285 const char* cachedName = ucnv_getName(cachedConverter, &err);
273 if (U_SUCCESS(err) && m_encoding == cachedName) { 286 if (U_SUCCESS(err) && m_encoding == cachedName) {
274 m_converterICU = cachedConverter; 287 m_converterICU = cachedConverter;
275 cachedConverter = 0; 288 cachedConverter = 0;
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 } while (err == U_BUFFER_OVERFLOW_ERROR); 373 } while (err == U_BUFFER_OVERFLOW_ERROR);
361 374
362 if (U_FAILURE(err)) { 375 if (U_FAILURE(err)) {
363 // flush the converter so it can be reused, and not be bothered by this error. 376 // flush the converter so it can be reused, and not be bothered by this error.
364 do { 377 do {
365 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err); 378 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);
366 } while (source < sourceLimit); 379 } while (source < sourceLimit);
367 sawError = true; 380 sawError = true;
368 } 381 }
369 382
383 #if !defined(USING_SYSTEM_ICU)
384 // Chrome's copy of ICU does not have the issue described below.
385 return result.toString();
386 #else
370 String resultString = result.toString(); 387 String resultString = result.toString();
371 388
372 // <http://bugs.webkit.org/show_bug.cgi?id=17014> 389 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
373 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5. 390 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.
374 if (!strcmp(m_encoding.name(), "GBK") || !strcasecmp(m_encoding.name(), "gb1 8030")) 391 if (!strcmp(m_encoding.name(), "GBK")) {
375 resultString.replace(0xE5E5, ideographicSpaceCharacter); 392 if (!strcasecmp(m_encoding.name(), "gb18030"))
393 resultString.replace(0xE5E5, ideographicSpaceCharacter);
394 // Make GBK compliant to the encoding spec and align with GB18030
395 resultString.replace(0x01F9, 0xE7C8);
396 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3
397 // is resolved, add U+1E3F => 0xE7C7.
398 }
376 399
377 return resultString; 400 return resultString;
401 #endif
378 } 402 }
379 403
380 // We need to apply these fallbacks ourselves as they are not currently supporte d by ICU and 404 #if defined(USING_SYSTEM_ICU)
381 // they were provided by the old TEC encoding path. Needed to fix <rdar://proble m/4708689>. 405 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding
406 // spec, but ICU converter does not have them.
382 static UChar fallbackForGBK(UChar32 character) 407 static UChar fallbackForGBK(UChar32 character)
383 { 408 {
384 switch (character) { 409 switch (character) {
385 case 0x01F9: 410 case 0x01F9:
386 return 0xE7C8; 411 return 0xE7C8; // mapped to xA8xBF by ICU.
387 case 0x1E3F: 412 case 0x1E3F:
388 return 0xE7C7; 413 return 0xE7C7; // mapped to xA8xBC by ICU.
389 case 0x22EF:
390 return 0x2026;
391 case 0x301C:
392 return 0xFF5E;
393 } 414 }
394 return 0; 415 return 0;
395 } 416 }
417 #endif
396 418
397 // Invalid character handler when writing escaped entities for unrepresentable 419 // Invalid character handler when writing escaped entities for unrepresentable
398 // characters. See the declaration of TextCodec::encode for more. 420 // characters. See the declaration of TextCodec::encode for more.
399 static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeA rgs* fromUArgs, const UChar* codeUnits, int32_t length, 421 static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeA rgs* fromUArgs, const UChar* codeUnits, int32_t length,
400 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 422 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
401 { 423 {
402 if (reason == UCNV_UNASSIGNED) { 424 if (reason == UCNV_UNASSIGNED) {
403 *err = U_ZERO_ERROR; 425 *err = U_ZERO_ERROR;
404 426
405 UnencodableReplacementArray entity; 427 UnencodableReplacementArray entity;
406 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod edEntitiesForUnencodables, entity); 428 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod edEntitiesForUnencodables, entity);
407 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); 429 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
408 } else 430 } else
409 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP oint, reason, err); 431 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP oint, reason, err);
410 } 432 }
411 433
434 #if defined(USING_SYSTEM_ICU)
412 // Substitutes special GBK characters, escaping all other unassigned entities. 435 // Substitutes special GBK characters, escaping all other unassigned entities.
413 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr omUArgs, const UChar* codeUnits, int32_t length, 436 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr omUArgs, const UChar* codeUnits, int32_t length,
414 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 437 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)
415 { 438 {
416 UChar outChar; 439 UChar outChar;
417 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { 440 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
418 const UChar* source = &outChar; 441 const UChar* source = &outChar;
419 *err = U_ZERO_ERROR; 442 *err = U_ZERO_ERROR;
420 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); 443 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
421 return; 444 return;
(...skipping 23 matching lines...) Expand all
445 { 468 {
446 UChar outChar; 469 UChar outChar;
447 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { 470 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
448 const UChar* source = &outChar; 471 const UChar* source = &outChar;
449 *err = U_ZERO_ERROR; 472 *err = U_ZERO_ERROR;
450 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); 473 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
451 return; 474 return;
452 } 475 }
453 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP oint, reason, err); 476 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP oint, reason, err);
454 } 477 }
478 #endif // USING_SYSTEM_ICU
455 479
456 class TextCodecInput { 480 class TextCodecInput {
457 public: 481 public:
458 TextCodecInput(const TextEncoding& encoding, const UChar* characters, size_t length) 482 TextCodecInput(const TextEncoding& encoding, const UChar* characters, size_t length)
459 : m_begin(characters) 483 : m_begin(characters)
460 , m_end(characters + length) 484 , m_end(characters + length)
461 { } 485 { }
462 486
463 TextCodecInput(const TextEncoding& encoding, const LChar* characters, size_t length) 487 TextCodecInput(const TextEncoding& encoding, const LChar* characters, size_t length)
464 { 488 {
(...skipping 16 matching lines...) Expand all
481 CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHan dling handling) 505 CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHan dling handling)
482 { 506 {
483 const UChar* source = input.begin(); 507 const UChar* source = input.begin();
484 const UChar* end = input.end(); 508 const UChar* end = input.end();
485 509
486 UErrorCode err = U_ZERO_ERROR; 510 UErrorCode err = U_ZERO_ERROR;
487 511
488 switch (handling) { 512 switch (handling) {
489 case QuestionMarksForUnencodables: 513 case QuestionMarksForUnencodables:
490 ucnv_setSubstChars(m_converterICU, "?", 1, &err); 514 ucnv_setSubstChars(m_converterICU, "?", 1, &err);
515 #if !defined(USING_SYSTEM_ICU)
516 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUT E, 0, 0, 0, &err);
517 #else
491 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallb ackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); 518 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallb ackSubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);
519 #endif
492 break; 520 break;
493 case EntitiesForUnencodables: 521 case EntitiesForUnencodables:
522 #if !defined(USING_SYSTEM_ICU)
523 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, U CNV_ESCAPE_XML_DEC, 0, 0, &err);
524 #else
494 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallb ackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); 525 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallb ackEscape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);
526 #endif
495 break; 527 break;
496 case URLEncodedEntitiesForUnencodables: 528 case URLEncodedEntitiesForUnencodables:
529 #if !defined(USING_SYSTEM_ICU)
530 ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0 , 0, &err);
531 #else
497 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEs capedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); 532 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEs capedEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);
533 #endif
498 break; 534 break;
499 } 535 }
500 536
501 ASSERT(U_SUCCESS(err)); 537 ASSERT(U_SUCCESS(err));
502 if (U_FAILURE(err)) 538 if (U_FAILURE(err))
503 return CString(); 539 return CString();
504 540
505 Vector<char> result; 541 Vector<char> result;
506 size_t size = 0; 542 size_t size = 0;
507 do { 543 do {
(...skipping 30 matching lines...) Expand all
538 { 574 {
539 return encodeCommon(characters, length, handling); 575 return encodeCommon(characters, length, handling);
540 } 576 }
541 577
542 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling) 578 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)
543 { 579 {
544 return encodeCommon(characters, length, handling); 580 return encodeCommon(characters, length, handling);
545 } 581 }
546 582
547 } // namespace WTF 583 } // namespace WTF
OLDNEW
« no previous file with comments | « Source/wtf/text/TextCodecICU.h ('k') | Source/wtf/unicode/CharacterNames.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698