Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(350)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecICU.cpp

Issue 1611343002: wtf reformat test Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: pydent Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> 3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 22 matching lines...) Expand all
33 #include "wtf/text/CString.h" 33 #include "wtf/text/CString.h"
34 #include "wtf/text/CharacterNames.h" 34 #include "wtf/text/CharacterNames.h"
35 #include "wtf/text/StringBuilder.h" 35 #include "wtf/text/StringBuilder.h"
36 #include <unicode/ucnv.h> 36 #include <unicode/ucnv.h>
37 #include <unicode/ucnv_cb.h> 37 #include <unicode/ucnv_cb.h>
38 38
39 namespace WTF { 39 namespace WTF {
40 40
41 const size_t ConversionBufferSize = 16384; 41 const size_t ConversionBufferSize = 16384;
42 42
43 ICUConverterWrapper::~ICUConverterWrapper() 43 ICUConverterWrapper::~ICUConverterWrapper() {
44 if (converter)
45 ucnv_close(converter);
46 }
47
48 static UConverter*& cachedConverterICU() {
49 return wtfThreadData().cachedConverterICU().converter;
50 }
51
52 PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding,
53 const void*) {
54 return adoptPtr(new TextCodecICU(encoding));
55 }
56
57 void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar) {
58 // We register Hebrew with logical ordering using a separate name.
59 // Otherwise, this would share the same canonical name as the
60 // visual ordering case, and then TextEncoding could not tell them
61 // apart; ICU treats these names as synonyms.
62 registrar("ISO-8859-8-I", "ISO-8859-8-I");
63
64 int32_t numEncodings = ucnv_countAvailable();
65 for (int32_t i = 0; i < numEncodings; ++i) {
66 const char* name = ucnv_getAvailableName(i);
67 UErrorCode error = U_ZERO_ERROR;
68 #if !defined(USING_SYSTEM_ICU)
69 const char* primaryStandard = "HTML";
70 const char* secondaryStandard = "MIME";
71 #else
72 const char* primaryStandard = "MIME";
73 const char* secondaryStandard = "IANA";
74 #endif
75 const char* standardName =
76 ucnv_getStandardName(name, primaryStandard, &error);
77 if (U_FAILURE(error) || !standardName) {
78 error = U_ZERO_ERROR;
79 // Try IANA to pick up 'windows-12xx' and other names
80 // which are not preferred MIME names but are widely used.
81 standardName = ucnv_getStandardName(name, secondaryStandard, &error);
82 if (U_FAILURE(error) || !standardName)
83 continue;
84 }
85
86 // A number of these aliases are handled in Chrome's copy of ICU, but
87 // Chromium can be compiled with the system ICU.
88
89 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other br owsers.
90 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
91 // for encoding GB_2312-80 and several others. So, we need to override this b ehavior, too.
92 #if defined(USING_SYSTEM_ICU)
93 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80"))
94 standardName = "GBK";
95 // Similarly, EUC-KR encodings all map to an extended version, but
96 // per HTML5, the canonical name still should be EUC-KR.
97 else if (!strcmp(standardName, "EUC-KR") ||
98 !strcmp(standardName, "KSC_5601") ||
99 !strcmp(standardName, "cp1363"))
100 standardName = "EUC-KR";
101 // And so on.
102 else if (
103 !strcasecmp(
104 standardName,
105 "iso-8859-9")) // This name is returned in different case by ICU 3. 2 and 3.6.
106 standardName = "windows-1254";
107 else if (!strcmp(standardName, "TIS-620"))
108 standardName = "windows-874";
109 #endif
110
111 registrar(standardName, standardName);
112
113 uint16_t numAliases = ucnv_countAliases(name, &error);
114 ASSERT(U_SUCCESS(error));
115 if (U_SUCCESS(error))
116 for (uint16_t j = 0; j < numAliases; ++j) {
117 error = U_ZERO_ERROR;
118 const char* alias = ucnv_getAlias(name, j, &error);
119 ASSERT(U_SUCCESS(error));
120 if (U_SUCCESS(error) && alias != standardName)
121 registrar(alias, standardName);
122 }
123 }
124
125 // These two entries have to be added here because ICU's converter table
126 // cannot have both ISO-8859-8-I and ISO-8859-8.
127 registrar("csISO88598I", "ISO-8859-8-I");
128 registrar("logical", "ISO-8859-8-I");
129
130 #if defined(USING_SYSTEM_ICU)
131 // Additional alias for MacCyrillic not present in ICU.
132 registrar("maccyrillic", "x-mac-cyrillic");
133
134 // Additional aliases that historically were present in the encoding
135 // table in WebKit on Macintosh that don't seem to be present in ICU.
136 // Perhaps we can prove these are not used on the web and remove them.
137 // Or perhaps we can get them added to ICU.
138 registrar("x-mac-roman", "macintosh");
139 registrar("x-mac-ukrainian", "x-mac-cyrillic");
140 registrar("cn-big5", "Big5");
141 registrar("x-x-big5", "Big5");
142 registrar("cn-gb", "GBK");
143 registrar("csgb231280", "GBK");
144 registrar("x-euc-cn", "GBK");
145 registrar("x-gbk", "GBK");
146 registrar("koi", "KOI8-R");
147 registrar("visual", "ISO-8859-8");
148 registrar("winarabic", "windows-1256");
149 registrar("winbaltic", "windows-1257");
150 registrar("wincyrillic", "windows-1251");
151 registrar("iso-8859-11", "windows-874");
152 registrar("iso8859-11", "windows-874");
153 registrar("dos-874", "windows-874");
154 registrar("wingreek", "windows-1253");
155 registrar("winhebrew", "windows-1255");
156 registrar("winlatin2", "windows-1250");
157 registrar("winturkish", "windows-1254");
158 registrar("winvietnamese", "windows-1258");
159 registrar("x-cp1250", "windows-1250");
160 registrar("x-cp1251", "windows-1251");
161 registrar("x-euc", "EUC-JP");
162 registrar("x-windows-949", "EUC-KR");
163 registrar("KSC5601", "EUC-KR");
164 registrar("x-uhc", "EUC-KR");
165 registrar("shift-jis", "Shift_JIS");
166
167 // Alternative spelling of ISO encoding names.
168 registrar("ISO8859-1", "ISO-8859-1");
169 registrar("ISO8859-2", "ISO-8859-2");
170 registrar("ISO8859-3", "ISO-8859-3");
171 registrar("ISO8859-4", "ISO-8859-4");
172 registrar("ISO8859-5", "ISO-8859-5");
173 registrar("ISO8859-6", "ISO-8859-6");
174 registrar("ISO8859-7", "ISO-8859-7");
175 registrar("ISO8859-8", "ISO-8859-8");
176 registrar("ISO8859-8-I", "ISO-8859-8-I");
177 registrar("ISO8859-9", "ISO-8859-9");
178 registrar("ISO8859-10", "ISO-8859-10");
179 registrar("ISO8859-13", "ISO-8859-13");
180 registrar("ISO8859-14", "ISO-8859-14");
181 registrar("ISO8859-15", "ISO-8859-15");
182 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label
183 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.org/ ).
184
185 // Additional aliases present in the WHATWG Encoding Standard
186 // and Firefox (as of Oct 2014), but not in the upstream ICU.
187 // Three entries for windows-1252 need not be listed here because
188 // TextCodecLatin1 registers them.
189 registrar("csiso58gb231280", "GBK");
190 registrar("csiso88596e", "ISO-8859-6");
191 registrar("csiso88596i", "ISO-8859-6");
192 registrar("csiso88598e", "ISO-8859-8");
193 registrar("gb_2312", "GBK");
194 registrar("iso88592", "ISO-8859-2");
195 registrar("iso88593", "ISO-8859-3");
196 registrar("iso88594", "ISO-8859-4");
197 registrar("iso88595", "ISO-8859-5");
198 registrar("iso88596", "ISO-8859-6");
199 registrar("iso88597", "ISO-8859-7");
200 registrar("iso88598", "ISO-8859-8");
201 registrar("iso88599", "windows-1254");
202 registrar("iso885910", "ISO-8859-10");
203 registrar("iso885911", "windows-874");
204 registrar("iso885913", "ISO-8859-13");
205 registrar("iso885914", "ISO-8859-14");
206 registrar("iso885915", "ISO-8859-15");
207 registrar("iso_8859-2", "ISO-8859-2");
208 registrar("iso_8859-3", "ISO-8859-3");
209 registrar("iso_8859-4", "ISO-8859-4");
210 registrar("iso_8859-5", "ISO-8859-5");
211 registrar("iso_8859-6", "ISO-8859-6");
212 registrar("iso_8859-7", "ISO-8859-7");
213 registrar("iso_8859-8", "ISO-8859-8");
214 registrar("iso_8859-9", "windows-1254");
215 registrar("iso_8859-15", "ISO-8859-15");
216 registrar("koi8_r", "KOI8-R");
217 registrar("x-cp1253", "windows-1253");
218 registrar("x-cp1254", "windows-1254");
219 registrar("x-cp1255", "windows-1255");
220 registrar("x-cp1256", "windows-1256");
221 registrar("x-cp1257", "windows-1257");
222 registrar("x-cp1258", "windows-1258");
223 #endif
224 }
225
226 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar) {
227 // See comment above in registerEncodingNames.
228 registrar("ISO-8859-8-I", create, 0);
229
230 int32_t numEncodings = ucnv_countAvailable();
231 for (int32_t i = 0; i < numEncodings; ++i) {
232 const char* name = ucnv_getAvailableName(i);
233 UErrorCode error = U_ZERO_ERROR;
234 const char* standardName = ucnv_getStandardName(name, "MIME", &error);
235 if (!U_SUCCESS(error) || !standardName) {
236 error = U_ZERO_ERROR;
237 standardName = ucnv_getStandardName(name, "IANA", &error);
238 if (!U_SUCCESS(error) || !standardName)
239 continue;
240 }
241 registrar(standardName, create, 0);
242 }
243 }
244
245 TextCodecICU::TextCodecICU(const TextEncoding& encoding)
246 : m_encoding(encoding),
247 m_converterICU(0)
248 #if defined(USING_SYSTEM_ICU)
249 ,
250 m_needsGBKFallbacks(false)
251 #endif
44 { 252 {
45 if (converter) 253 }
46 ucnv_close(converter); 254
47 } 255 TextCodecICU::~TextCodecICU() {
48 256 releaseICUConverter();
49 static UConverter*& cachedConverterICU() 257 }
50 { 258
51 return wtfThreadData().cachedConverterICU().converter; 259 void TextCodecICU::releaseICUConverter() const {
52 } 260 if (m_converterICU) {
53 261 UConverter*& cachedConverter = cachedConverterICU();
54 PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const v oid*) 262 if (cachedConverter)
55 { 263 ucnv_close(cachedConverter);
56 return adoptPtr(new TextCodecICU(encoding)); 264 cachedConverter = m_converterICU;
57 } 265 m_converterICU = 0;
58 266 }
59 void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar) 267 }
60 { 268
61 // We register Hebrew with logical ordering using a separate name. 269 void TextCodecICU::createICUConverter() const {
62 // Otherwise, this would share the same canonical name as the 270 ASSERT(!m_converterICU);
63 // visual ordering case, and then TextEncoding could not tell them 271
64 // apart; ICU treats these names as synonyms. 272 #if defined(USING_SYSTEM_ICU)
65 registrar("ISO-8859-8-I", "ISO-8859-8-I"); 273 const char* name = m_encoding.name();
66 274 m_needsGBKFallbacks =
67 int32_t numEncodings = ucnv_countAvailable(); 275 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
68 for (int32_t i = 0; i < numEncodings; ++i) { 276 #endif
69 const char* name = ucnv_getAvailableName(i); 277
70 UErrorCode error = U_ZERO_ERROR; 278 UErrorCode err;
279
280 UConverter*& cachedConverter = cachedConverterICU();
281 if (cachedConverter) {
282 err = U_ZERO_ERROR;
283 const char* cachedName = ucnv_getName(cachedConverter, &err);
284 if (U_SUCCESS(err) && m_encoding == cachedName) {
285 m_converterICU = cachedConverter;
286 cachedConverter = 0;
287 return;
288 }
289 }
290
291 err = U_ZERO_ERROR;
292 m_converterICU = ucnv_open(m_encoding.name(), &err);
293 #if !LOG_DISABLED
294 if (err == U_AMBIGUOUS_ALIAS_WARNING)
295 WTF_LOG_ERROR("ICU ambiguous alias warning for encoding: %s",
296 m_encoding.name());
297 #endif
298 if (m_converterICU)
299 ucnv_setFallback(m_converterICU, TRUE);
300 }
301
302 int TextCodecICU::decodeToBuffer(UChar* target,
303 UChar* targetLimit,
304 const char*& source,
305 const char* sourceLimit,
306 int32_t* offsets,
307 bool flush,
308 UErrorCode& err) {
309 UChar* targetStart = target;
310 err = U_ZERO_ERROR;
311 ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit,
312 offsets, flush, &err);
313 return target - targetStart;
314 }
315
316 class ErrorCallbackSetter final {
317 STACK_ALLOCATED();
318
319 public:
320 ErrorCallbackSetter(UConverter* converter, bool stopOnError)
321 : m_converter(converter), m_shouldStopOnEncodingErrors(stopOnError) {
322 if (m_shouldStopOnEncodingErrors) {
323 UErrorCode err = U_ZERO_ERROR;
324 ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
325 UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
326 &m_savedContext, &err);
327 ASSERT(err == U_ZERO_ERROR);
328 }
329 }
330 ~ErrorCallbackSetter() {
331 if (m_shouldStopOnEncodingErrors) {
332 UErrorCode err = U_ZERO_ERROR;
333 const void* oldContext;
334 UConverterToUCallback oldAction;
335 ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext,
336 &oldAction, &oldContext, &err);
337 ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
338 ASSERT(!strcmp(static_cast<const char*>(oldContext),
339 UCNV_SUB_STOP_ON_ILLEGAL));
340 ASSERT(err == U_ZERO_ERROR);
341 }
342 }
343
344 private:
345 UConverter* m_converter;
346 bool m_shouldStopOnEncodingErrors;
347 const void* m_savedContext;
348 UConverterToUCallback m_savedAction;
349 };
350
351 String TextCodecICU::decode(const char* bytes,
352 size_t length,
353 FlushBehavior flush,
354 bool stopOnError,
355 bool& sawError) {
356 // Get a converter for the passed-in encoding.
357 if (!m_converterICU) {
358 createICUConverter();
359 ASSERT(m_converterICU);
360 if (!m_converterICU) {
361 WTF_LOG_ERROR(
362 "error creating ICU encoder even though encoding was in table");
363 return String();
364 }
365 }
366
367 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
368
369 StringBuilder result;
370
371 UChar buffer[ConversionBufferSize];
372 UChar* bufferLimit = buffer + ConversionBufferSize;
373 const char* source = reinterpret_cast<const char*>(bytes);
374 const char* sourceLimit = source + length;
375 int32_t* offsets = nullptr;
376 UErrorCode err = U_ZERO_ERROR;
377
378 do {
379 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit,
380 offsets, flush != DoNotFlush, err);
381 result.append(buffer, ucharsDecoded);
382 } while (err == U_BUFFER_OVERFLOW_ERROR);
383
384 if (U_FAILURE(err)) {
385 // flush the converter so it can be reused, and not be bothered by this erro r.
386 do {
387 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true,
388 err);
389 } while (source < sourceLimit);
390 sawError = true;
391 }
392
71 #if !defined(USING_SYSTEM_ICU) 393 #if !defined(USING_SYSTEM_ICU)
72 const char* primaryStandard = "HTML"; 394 // Chrome's copy of ICU does not have the issue described below.
73 const char* secondaryStandard = "MIME"; 395 return result.toString();
74 #else 396 #else
75 const char* primaryStandard = "MIME"; 397 String resultString = result.toString();
76 const char* secondaryStandard = "IANA"; 398
77 #endif 399 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
78 const char* standardName = ucnv_getStandardName(name, primaryStandard, & error); 400 // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.
79 if (U_FAILURE(error) || !standardName) { 401 if (!strcmp(m_encoding.name(), "GBK")) {
80 error = U_ZERO_ERROR; 402 if (!strcasecmp(m_encoding.name(), "gb18030"))
81 // Try IANA to pick up 'windows-12xx' and other names 403 resultString.replace(0xE5E5, ideographicSpaceCharacter);
82 // which are not preferred MIME names but are widely used. 404 // Make GBK compliant to the encoding spec and align with GB18030
83 standardName = ucnv_getStandardName(name, secondaryStandard, &error) ; 405 resultString.replace(0x01F9, 0xE7C8);
84 if (U_FAILURE(error) || !standardName) 406 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3
85 continue; 407 // is resolved, add U+1E3F => 0xE7C7.
86 } 408 }
87 409
88 // A number of these aliases are handled in Chrome's copy of ICU, but 410 return resultString;
89 // Chromium can be compiled with the system ICU.
90
91 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
92 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
93 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.
94 #if defined(USING_SYSTEM_ICU)
95 if (!strcmp(standardName, "GB2312") || !strcmp(standardName, "GB_2312-80 "))
96 standardName = "GBK";
97 // Similarly, EUC-KR encodings all map to an extended version, but
98 // per HTML5, the canonical name still should be EUC-KR.
99 else if (!strcmp(standardName, "EUC-KR") || !strcmp(standardName, "KSC_5 601") || !strcmp(standardName, "cp1363"))
100 standardName = "EUC-KR";
101 // And so on.
102 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.
103 standardName = "windows-1254";
104 else if (!strcmp(standardName, "TIS-620"))
105 standardName = "windows-874";
106 #endif
107
108 registrar(standardName, standardName);
109
110 uint16_t numAliases = ucnv_countAliases(name, &error);
111 ASSERT(U_SUCCESS(error));
112 if (U_SUCCESS(error))
113 for (uint16_t j = 0; j < numAliases; ++j) {
114 error = U_ZERO_ERROR;
115 const char* alias = ucnv_getAlias(name, j, &error);
116 ASSERT(U_SUCCESS(error));
117 if (U_SUCCESS(error) && alias != standardName)
118 registrar(alias, standardName);
119 }
120 }
121
122 // These two entries have to be added here because ICU's converter table
123 // cannot have both ISO-8859-8-I and ISO-8859-8.
124 registrar("csISO88598I", "ISO-8859-8-I");
125 registrar("logical", "ISO-8859-8-I");
126
127 #if defined(USING_SYSTEM_ICU)
128 // Additional alias for MacCyrillic not present in ICU.
129 registrar("maccyrillic", "x-mac-cyrillic");
130
131 // Additional aliases that historically were present in the encoding
132 // table in WebKit on Macintosh that don't seem to be present in ICU.
133 // Perhaps we can prove these are not used on the web and remove them.
134 // Or perhaps we can get them added to ICU.
135 registrar("x-mac-roman", "macintosh");
136 registrar("x-mac-ukrainian", "x-mac-cyrillic");
137 registrar("cn-big5", "Big5");
138 registrar("x-x-big5", "Big5");
139 registrar("cn-gb", "GBK");
140 registrar("csgb231280", "GBK");
141 registrar("x-euc-cn", "GBK");
142 registrar("x-gbk", "GBK");
143 registrar("koi", "KOI8-R");
144 registrar("visual", "ISO-8859-8");
145 registrar("winarabic", "windows-1256");
146 registrar("winbaltic", "windows-1257");
147 registrar("wincyrillic", "windows-1251");
148 registrar("iso-8859-11", "windows-874");
149 registrar("iso8859-11", "windows-874");
150 registrar("dos-874", "windows-874");
151 registrar("wingreek", "windows-1253");
152 registrar("winhebrew", "windows-1255");
153 registrar("winlatin2", "windows-1250");
154 registrar("winturkish", "windows-1254");
155 registrar("winvietnamese", "windows-1258");
156 registrar("x-cp1250", "windows-1250");
157 registrar("x-cp1251", "windows-1251");
158 registrar("x-euc", "EUC-JP");
159 registrar("x-windows-949", "EUC-KR");
160 registrar("KSC5601", "EUC-KR");
161 registrar("x-uhc", "EUC-KR");
162 registrar("shift-jis", "Shift_JIS");
163
164 // Alternative spelling of ISO encoding names.
165 registrar("ISO8859-1", "ISO-8859-1");
166 registrar("ISO8859-2", "ISO-8859-2");
167 registrar("ISO8859-3", "ISO-8859-3");
168 registrar("ISO8859-4", "ISO-8859-4");
169 registrar("ISO8859-5", "ISO-8859-5");
170 registrar("ISO8859-6", "ISO-8859-6");
171 registrar("ISO8859-7", "ISO-8859-7");
172 registrar("ISO8859-8", "ISO-8859-8");
173 registrar("ISO8859-8-I", "ISO-8859-8-I");
174 registrar("ISO8859-9", "ISO-8859-9");
175 registrar("ISO8859-10", "ISO-8859-10");
176 registrar("ISO8859-13", "ISO-8859-13");
177 registrar("ISO8859-14", "ISO-8859-14");
178 registrar("ISO8859-15", "ISO-8859-15");
179 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label
180 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ).
181
182 // Additional aliases present in the WHATWG Encoding Standard
183 // and Firefox (as of Oct 2014), but not in the upstream ICU.
184 // Three entries for windows-1252 need not be listed here because
185 // TextCodecLatin1 registers them.
186 registrar("csiso58gb231280", "GBK");
187 registrar("csiso88596e", "ISO-8859-6");
188 registrar("csiso88596i", "ISO-8859-6");
189 registrar("csiso88598e", "ISO-8859-8");
190 registrar("gb_2312", "GBK");
191 registrar("iso88592", "ISO-8859-2");
192 registrar("iso88593", "ISO-8859-3");
193 registrar("iso88594", "ISO-8859-4");
194 registrar("iso88595", "ISO-8859-5");
195 registrar("iso88596", "ISO-8859-6");
196 registrar("iso88597", "ISO-8859-7");
197 registrar("iso88598", "ISO-8859-8");
198 registrar("iso88599", "windows-1254");
199 registrar("iso885910", "ISO-8859-10");
200 registrar("iso885911", "windows-874");
201 registrar("iso885913", "ISO-8859-13");
202 registrar("iso885914", "ISO-8859-14");
203 registrar("iso885915", "ISO-8859-15");
204 registrar("iso_8859-2", "ISO-8859-2");
205 registrar("iso_8859-3", "ISO-8859-3");
206 registrar("iso_8859-4", "ISO-8859-4");
207 registrar("iso_8859-5", "ISO-8859-5");
208 registrar("iso_8859-6", "ISO-8859-6");
209 registrar("iso_8859-7", "ISO-8859-7");
210 registrar("iso_8859-8", "ISO-8859-8");
211 registrar("iso_8859-9", "windows-1254");
212 registrar("iso_8859-15", "ISO-8859-15");
213 registrar("koi8_r", "KOI8-R");
214 registrar("x-cp1253", "windows-1253");
215 registrar("x-cp1254", "windows-1254");
216 registrar("x-cp1255", "windows-1255");
217 registrar("x-cp1256", "windows-1256");
218 registrar("x-cp1257", "windows-1257");
219 registrar("x-cp1258", "windows-1258");
220 #endif
221 }
222
223 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
224 {
225 // See comment above in registerEncodingNames.
226 registrar("ISO-8859-8-I", create, 0);
227
228 int32_t numEncodings = ucnv_countAvailable();
229 for (int32_t i = 0; i < numEncodings; ++i) {
230 const char* name = ucnv_getAvailableName(i);
231 UErrorCode error = U_ZERO_ERROR;
232 const char* standardName = ucnv_getStandardName(name, "MIME", &error);
233 if (!U_SUCCESS(error) || !standardName) {
234 error = U_ZERO_ERROR;
235 standardName = ucnv_getStandardName(name, "IANA", &error);
236 if (!U_SUCCESS(error) || !standardName)
237 continue;
238 }
239 registrar(standardName, create, 0);
240 }
241 }
242
243 TextCodecICU::TextCodecICU(const TextEncoding& encoding)
244 : m_encoding(encoding)
245 , m_converterICU(0)
246 #if defined(USING_SYSTEM_ICU)
247 , m_needsGBKFallbacks(false)
248 #endif
249 {
250 }
251
252 TextCodecICU::~TextCodecICU()
253 {
254 releaseICUConverter();
255 }
256
257 void TextCodecICU::releaseICUConverter() const
258 {
259 if (m_converterICU) {
260 UConverter*& cachedConverter = cachedConverterICU();
261 if (cachedConverter)
262 ucnv_close(cachedConverter);
263 cachedConverter = m_converterICU;
264 m_converterICU = 0;
265 }
266 }
267
268 void TextCodecICU::createICUConverter() const
269 {
270 ASSERT(!m_converterICU);
271
272 #if defined(USING_SYSTEM_ICU)
273 const char* name = m_encoding.name();
274 m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];
275 #endif
276
277 UErrorCode err;
278
279 UConverter*& cachedConverter = cachedConverterICU();
280 if (cachedConverter) {
281 err = U_ZERO_ERROR;
282 const char* cachedName = ucnv_getName(cachedConverter, &err);
283 if (U_SUCCESS(err) && m_encoding == cachedName) {
284 m_converterICU = cachedConverter;
285 cachedConverter = 0;
286 return;
287 }
288 }
289
290 err = U_ZERO_ERROR;
291 m_converterICU = ucnv_open(m_encoding.name(), &err);
292 #if !LOG_DISABLED
293 if (err == U_AMBIGUOUS_ALIAS_WARNING)
294 WTF_LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding .name());
295 #endif
296 if (m_converterICU)
297 ucnv_setFallback(m_converterICU, TRUE);
298 }
299
300 int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char*& source, const char* sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)
301 {
302 UChar* targetStart = target;
303 err = U_ZERO_ERROR;
304 ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, o ffsets, flush, &err);
305 return target - targetStart;
306 }
307
308 class ErrorCallbackSetter final {
309 STACK_ALLOCATED();
310 public:
311 ErrorCallbackSetter(UConverter* converter, bool stopOnError)
312 : m_converter(converter)
313 , m_shouldStopOnEncodingErrors(stopOnError)
314 {
315 if (m_shouldStopOnEncodingErrors) {
316 UErrorCode err = U_ZERO_ERROR;
317 ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,
318 UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,
319 &m_savedContext, &err);
320 ASSERT(err == U_ZERO_ERROR);
321 }
322 }
323 ~ErrorCallbackSetter()
324 {
325 if (m_shouldStopOnEncodingErrors) {
326 UErrorCode err = U_ZERO_ERROR;
327 const void* oldContext;
328 UConverterToUCallback oldAction;
329 ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext, &old Action, &oldContext, &err);
330 ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);
331 ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_O N_ILLEGAL));
332 ASSERT(err == U_ZERO_ERROR);
333 }
334 }
335
336 private:
337 UConverter* m_converter;
338 bool m_shouldStopOnEncodingErrors;
339 const void* m_savedContext;
340 UConverterToUCallback m_savedAction;
341 };
342
343 String TextCodecICU::decode(const char* bytes, size_t length, FlushBehavior flus h, bool stopOnError, bool& sawError)
344 {
345 // Get a converter for the passed-in encoding.
346 if (!m_converterICU) {
347 createICUConverter();
348 ASSERT(m_converterICU);
349 if (!m_converterICU) {
350 WTF_LOG_ERROR("error creating ICU encoder even though encoding was i n table");
351 return String();
352 }
353 }
354
355 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);
356
357 StringBuilder result;
358
359 UChar buffer[ConversionBufferSize];
360 UChar* bufferLimit = buffer + ConversionBufferSize;
361 const char* source = reinterpret_cast<const char*>(bytes);
362 const char* sourceLimit = source + length;
363 int32_t* offsets = nullptr;
364 UErrorCode err = U_ZERO_ERROR;
365
366 do {
367 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLi mit, offsets, flush != DoNotFlush, err);
368 result.append(buffer, ucharsDecoded);
369 } while (err == U_BUFFER_OVERFLOW_ERROR);
370
371 if (U_FAILURE(err)) {
372 // flush the converter so it can be reused, and not be bothered by this error.
373 do {
374 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);
375 } while (source < sourceLimit);
376 sawError = true;
377 }
378
379 #if !defined(USING_SYSTEM_ICU)
380 // Chrome's copy of ICU does not have the issue described below.
381 return result.toString();
382 #else
383 String resultString = result.toString();
384
385 // <http://bugs.webkit.org/show_bug.cgi?id=17014>
386 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.
387 if (!strcmp(m_encoding.name(), "GBK")) {
388 if (!strcasecmp(m_encoding.name(), "gb18030"))
389 resultString.replace(0xE5E5, ideographicSpaceCharacter);
390 // Make GBK compliant to the encoding spec and align with GB18030
391 resultString.replace(0x01F9, 0xE7C8);
392 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3
393 // is resolved, add U+1E3F => 0xE7C7.
394 }
395
396 return resultString;
397 #endif 411 #endif
398 } 412 }
399 413
400 #if defined(USING_SYSTEM_ICU) 414 #if defined(USING_SYSTEM_ICU)
401 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding 415 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding
402 // spec, but ICU converter does not have them. 416 // spec, but ICU converter does not have them.
403 static UChar fallbackForGBK(UChar32 character) 417 static UChar fallbackForGBK(UChar32 character) {
404 { 418 switch (character) {
405 switch (character) {
406 case 0x01F9: 419 case 0x01F9:
407 return 0xE7C8; // mapped to xA8xBF by ICU. 420 return 0xE7C8; // mapped to xA8xBF by ICU.
408 case 0x1E3F: 421 case 0x1E3F:
409 return 0xE7C7; // mapped to xA8xBC by ICU. 422 return 0xE7C7; // mapped to xA8xBC by ICU.
410 } 423 }
411 return 0; 424 return 0;
412 } 425 }
413 #endif 426 #endif
414 427
415 // Invalid character handler when writing escaped entities for unrepresentable 428 // Invalid character handler when writing escaped entities for unrepresentable
416 // characters. See the declaration of TextCodec::encode for more. 429 // characters. See the declaration of TextCodec::encode for more.
417 static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeA rgs* fromUArgs, const UChar* codeUnits, int32_t length, 430 static void urlEscapedEntityCallback(const void* context,
418 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 431 UConverterFromUnicodeArgs* fromUArgs,
419 { 432 const UChar* codeUnits,
420 if (reason == UCNV_UNASSIGNED) { 433 int32_t length,
421 *err = U_ZERO_ERROR; 434 UChar32 codePoint,
422 435 UConverterCallbackReason reason,
423 UnencodableReplacementArray entity; 436 UErrorCode* err) {
424 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod edEntitiesForUnencodables, entity); 437 if (reason == UCNV_UNASSIGNED) {
425 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err); 438 *err = U_ZERO_ERROR;
426 } else { 439
427 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP oint, reason, err); 440 UnencodableReplacementArray entity;
428 } 441 int entityLen = TextCodec::getUnencodableReplacement(
442 codePoint, URLEncodedEntitiesForUnencodables, entity);
443 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);
444 } else {
445 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length,
446 codePoint, reason, err);
447 }
429 } 448 }
430 449
431 #if defined(USING_SYSTEM_ICU) 450 #if defined(USING_SYSTEM_ICU)
432 // Substitutes special GBK characters, escaping all other unassigned entities. 451 // Substitutes special GBK characters, escaping all other unassigned entities.
433 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr omUArgs, const UChar* codeUnits, int32_t length, 452 static void gbkCallbackEscape(const void* context,
434 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 453 UConverterFromUnicodeArgs* fromUArgs,
435 { 454 const UChar* codeUnits,
436 UChar outChar; 455 int32_t length,
437 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { 456 UChar32 codePoint,
438 const UChar* source = &outChar; 457 UConverterCallbackReason reason,
439 *err = U_ZERO_ERROR; 458 UErrorCode* err) {
440 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); 459 UChar outChar;
441 return; 460 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
442 } 461 const UChar* source = &outChar;
443 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint , reason, err); 462 *err = U_ZERO_ERROR;
463 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
464 return;
465 }
466 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint,
467 reason, err);
444 } 468 }
445 469
446 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution. 470 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution.
447 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod eArgs* fromUArgs, const UChar* codeUnits, int32_t length, 471 static void gbkUrlEscapedEntityCallack(const void* context,
448 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 472 UConverterFromUnicodeArgs* fromUArgs,
449 { 473 const UChar* codeUnits,
450 if (reason == UCNV_UNASSIGNED) { 474 int32_t length,
451 if (UChar outChar = fallbackForGBK(codePoint)) { 475 UChar32 codePoint,
452 const UChar* source = &outChar; 476 UConverterCallbackReason reason,
453 *err = U_ZERO_ERROR; 477 UErrorCode* err) {
454 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); 478 if (reason == UCNV_UNASSIGNED) {
455 return; 479 if (UChar outChar = fallbackForGBK(codePoint)) {
456 } 480 const UChar* source = &outChar;
457 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin t, reason, err); 481 *err = U_ZERO_ERROR;
458 return; 482 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
459 } 483 return;
460 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint , reason, err); 484 }
461 } 485 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint,
462 486 reason, err);
463 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs * fromUArgs, const UChar* codeUnits, int32_t length, 487 return;
464 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err) 488 }
465 { 489 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint,
466 UChar outChar; 490 reason, err);
467 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) { 491 }
468 const UChar* source = &outChar; 492
469 *err = U_ZERO_ERROR; 493 static void gbkCallbackSubstitute(const void* context,
470 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err); 494 UConverterFromUnicodeArgs* fromUArgs,
471 return; 495 const UChar* codeUnits,
472 } 496 int32_t length,
473 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP oint, reason, err); 497 UChar32 codePoint,
474 } 498 UConverterCallbackReason reason,
475 #endif // USING_SYSTEM_ICU 499 UErrorCode* err) {
500 UChar outChar;
501 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {
502 const UChar* source = &outChar;
503 *err = U_ZERO_ERROR;
504 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);
505 return;
506 }
507 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length,
508 codePoint, reason, err);
509 }
510 #endif // USING_SYSTEM_ICU
476 511
477 class TextCodecInput final { 512 class TextCodecInput final {
478 STACK_ALLOCATED(); 513 STACK_ALLOCATED();
479 public: 514
480 TextCodecInput(const TextEncoding& encoding, const UChar* characters, size_t length) 515 public:
481 : m_begin(characters) 516 TextCodecInput(const TextEncoding& encoding,
482 , m_end(characters + length) 517 const UChar* characters,
483 { } 518 size_t length)
484 519 : m_begin(characters), m_end(characters + length) {}
485 TextCodecInput(const TextEncoding& encoding, const LChar* characters, size_t length) 520
486 { 521 TextCodecInput(const TextEncoding& encoding,
487 m_buffer.reserveInitialCapacity(length); 522 const LChar* characters,
488 for (size_t i = 0; i < length; ++i) 523 size_t length) {
489 m_buffer.append(characters[i]); 524 m_buffer.reserveInitialCapacity(length);
490 m_begin = m_buffer.data(); 525 for (size_t i = 0; i < length; ++i)
491 m_end = m_begin + m_buffer.size(); 526 m_buffer.append(characters[i]);
492 } 527 m_begin = m_buffer.data();
493 528 m_end = m_begin + m_buffer.size();
494 const UChar* begin() const { return m_begin; } 529 }
495 const UChar* end() const { return m_end; } 530
496 531 const UChar* begin() const { return m_begin; }
497 private: 532 const UChar* end() const { return m_end; }
498 const UChar* m_begin; 533
499 const UChar* m_end; 534 private:
500 Vector<UChar> m_buffer; 535 const UChar* m_begin;
536 const UChar* m_end;
537 Vector<UChar> m_buffer;
501 }; 538 };
502 539
503 CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHan dling handling) 540 CString TextCodecICU::encodeInternal(const TextCodecInput& input,
504 { 541 UnencodableHandling handling) {
505 const UChar* source = input.begin(); 542 const UChar* source = input.begin();
506 const UChar* end = input.end(); 543 const UChar* end = input.end();
507 544
508 UErrorCode err = U_ZERO_ERROR; 545 UErrorCode err = U_ZERO_ERROR;
509 546
510 switch (handling) { 547 switch (handling) {
511 case QuestionMarksForUnencodables: 548 case QuestionMarksForUnencodables:
512 ucnv_setSubstChars(m_converterICU, "?", 1, &err); 549 ucnv_setSubstChars(m_converterICU, "?", 1, &err);
513 #if !defined(USING_SYSTEM_ICU) 550 #if !defined(USING_SYSTEM_ICU)
514 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0 , 0, 0, &err); 551 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
552 0, 0, &err);
515 #else 553 #else
516 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackS ubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err); 554 ucnv_setFromUCallBack(
517 #endif 555 m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute
518 break; 556 : UCNV_FROM_U_CALLBACK_SUBSTITUTE,
557 0, 0, 0, &err);
558 #endif
559 break;
519 case EntitiesForUnencodables: 560 case EntitiesForUnencodables:
520 #if !defined(USING_SYSTEM_ICU) 561 #if !defined(USING_SYSTEM_ICU)
521 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ ESCAPE_XML_DEC, 0, 0, &err); 562 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE,
563 UCNV_ESCAPE_XML_DEC, 0, 0, &err);
522 #else 564 #else
523 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackE scape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err); 565 ucnv_setFromUCallBack(
524 #endif 566 m_converterICU,
525 break; 567 m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE,
568 UCNV_ESCAPE_XML_DEC, 0, 0, &err);
569 #endif
570 break;
526 case URLEncodedEntitiesForUnencodables: 571 case URLEncodedEntitiesForUnencodables:
527 #if !defined(USING_SYSTEM_ICU) 572 #if !defined(USING_SYSTEM_ICU)
528 ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err); 573 ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0,
574 &err);
529 #else 575 #else
530 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscape dEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err); 576 ucnv_setFromUCallBack(m_converterICU,
531 #endif 577 m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack
532 break; 578 : urlEscapedEntityCallback,
533 } 579 0, 0, 0, &err);
534 580 #endif
535 ASSERT(U_SUCCESS(err)); 581 break;
536 if (U_FAILURE(err)) 582 }
537 return CString(); 583
538 584 ASSERT(U_SUCCESS(err));
539 Vector<char> result; 585 if (U_FAILURE(err))
540 size_t size = 0; 586 return CString();
541 do { 587
542 char buffer[ConversionBufferSize]; 588 Vector<char> result;
543 char* target = buffer; 589 size_t size = 0;
544 char* targetLimit = target + ConversionBufferSize; 590 do {
545 err = U_ZERO_ERROR; 591 char buffer[ConversionBufferSize];
546 ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err); 592 char* target = buffer;
547 size_t count = target - buffer; 593 char* targetLimit = target + ConversionBufferSize;
548 result.grow(size + count); 594 err = U_ZERO_ERROR;
549 memcpy(result.data() + size, buffer, count); 595 ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0,
550 size += count; 596 true, &err);
551 } while (err == U_BUFFER_OVERFLOW_ERROR); 597 size_t count = target - buffer;
552 598 result.grow(size + count);
553 return CString(result.data(), size); 599 memcpy(result.data() + size, buffer, count);
554 } 600 size += count;
555 601 } while (err == U_BUFFER_OVERFLOW_ERROR);
556 template<typename CharType> 602
557 CString TextCodecICU::encodeCommon(const CharType* characters, size_t length, Un encodableHandling handling) 603 return CString(result.data(), size);
558 { 604 }
559 if (!length) 605
560 return ""; 606 template <typename CharType>
561 607 CString TextCodecICU::encodeCommon(const CharType* characters,
562 if (!m_converterICU) 608 size_t length,
563 createICUConverter(); 609 UnencodableHandling handling) {
564 if (!m_converterICU) 610 if (!length)
565 return CString(); 611 return "";
566 612
567 TextCodecInput input(m_encoding, characters, length); 613 if (!m_converterICU)
568 return encodeInternal(input, handling); 614 createICUConverter();
569 } 615 if (!m_converterICU)
570 616 return CString();
571 CString TextCodecICU::encode(const UChar* characters, size_t length, Unencodable Handling handling) 617
572 { 618 TextCodecInput input(m_encoding, characters, length);
573 return encodeCommon(characters, length, handling); 619 return encodeInternal(input, handling);
574 } 620 }
575 621
576 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling) 622 CString TextCodecICU::encode(const UChar* characters,
577 { 623 size_t length,
578 return encodeCommon(characters, length, handling); 624 UnencodableHandling handling) {
579 } 625 return encodeCommon(characters, length, handling);
580 626 }
581 } // namespace WTF 627
628 CString TextCodecICU::encode(const LChar* characters,
629 size_t length,
630 UnencodableHandling handling) {
631 return encodeCommon(characters, length, handling);
632 }
633
634 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecICU.h ('k') | third_party/WebKit/Source/wtf/text/TextCodecLatin1.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698