third_party/WebKit/Source/wtf/text/TextCodecICU.cpp - Issue 1611343002: wtf reformat test

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecICU.cpp

Issue 1611343002: wtf reformat test Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: pydent Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.	2 * Copyright (C) 2004, 2006, 2007, 2008, 2011 Apple Inc. All rights reserved.

3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>	3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 22 matching lines...) Expand all Loading...
33 #include "wtf/text/CString.h"	33 #include "wtf/text/CString.h"

34 #include "wtf/text/CharacterNames.h"	34 #include "wtf/text/CharacterNames.h"

35 #include "wtf/text/StringBuilder.h"	35 #include "wtf/text/StringBuilder.h"

36 #include <unicode/ucnv.h>	36 #include <unicode/ucnv.h>

37 #include <unicode/ucnv_cb.h>	37 #include <unicode/ucnv_cb.h>

38	38

39 namespace WTF {	39 namespace WTF {

40	40

41 const size_t ConversionBufferSize = 16384;	41 const size_t ConversionBufferSize = 16384;

42	42

43 ICUConverterWrapper::~ICUConverterWrapper()	43 ICUConverterWrapper::~ICUConverterWrapper() {

	44 if (converter)

	45 ucnv_close(converter);

	46 }

	47

	48 static UConverter*& cachedConverterICU() {

	49 return wtfThreadData().cachedConverterICU().converter;

	50 }

	51

	52 PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding,

	53 const void*) {

	54 return adoptPtr(new TextCodecICU(encoding));

	55 }

	56

	57 void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar) {

	58 // We register Hebrew with logical ordering using a separate name.

	59 // Otherwise, this would share the same canonical name as the

	60 // visual ordering case, and then TextEncoding could not tell them

	61 // apart; ICU treats these names as synonyms.

	62 registrar("ISO-8859-8-I", "ISO-8859-8-I");

	63

	64 int32_t numEncodings = ucnv_countAvailable();

	65 for (int32_t i = 0; i < numEncodings; ++i) {

	66 const char* name = ucnv_getAvailableName(i);

	67 UErrorCode error = U_ZERO_ERROR;

	68 #if !defined(USING_SYSTEM_ICU)

	69 const char* primaryStandard = "HTML";

	70 const char* secondaryStandard = "MIME";

	71 #else

	72 const char* primaryStandard = "MIME";

	73 const char* secondaryStandard = "IANA";

	74 #endif

	75 const char* standardName =

	76 ucnv_getStandardName(name, primaryStandard, &error);

	77 if (U_FAILURE(error) \|\| !standardName) {

	78 error = U_ZERO_ERROR;

	79 // Try IANA to pick up 'windows-12xx' and other names

	80 // which are not preferred MIME names but are widely used.

	81 standardName = ucnv_getStandardName(name, secondaryStandard, &error);

	82 if (U_FAILURE(error) \|\| !standardName)

	83 continue;

	84 }

	85

	86 // A number of these aliases are handled in Chrome's copy of ICU, but

	87 // Chromium can be compiled with the system ICU.

	88

	89 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other br owsers.

	90 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding

	91 // for encoding GB_2312-80 and several others. So, we need to override this b ehavior, too.

	92 #if defined(USING_SYSTEM_ICU)

	93 if (!strcmp(standardName, "GB2312") \|\| !strcmp(standardName, "GB_2312-80"))

	94 standardName = "GBK";

	95 // Similarly, EUC-KR encodings all map to an extended version, but

	96 // per HTML5, the canonical name still should be EUC-KR.

	97 else if (!strcmp(standardName, "EUC-KR") \|\|

	98 !strcmp(standardName, "KSC_5601") \|\|

	99 !strcmp(standardName, "cp1363"))

	100 standardName = "EUC-KR";

	101 // And so on.

	102 else if (

	103 !strcasecmp(

	104 standardName,

	105 "iso-8859-9")) // This name is returned in different case by ICU 3. 2 and 3.6.

	106 standardName = "windows-1254";

	107 else if (!strcmp(standardName, "TIS-620"))

	108 standardName = "windows-874";

	109 #endif

	110

	111 registrar(standardName, standardName);

	112

	113 uint16_t numAliases = ucnv_countAliases(name, &error);

	114 ASSERT(U_SUCCESS(error));

	115 if (U_SUCCESS(error))

	116 for (uint16_t j = 0; j < numAliases; ++j) {

	117 error = U_ZERO_ERROR;

	118 const char* alias = ucnv_getAlias(name, j, &error);

	119 ASSERT(U_SUCCESS(error));

	120 if (U_SUCCESS(error) && alias != standardName)

	121 registrar(alias, standardName);

	122 }

	123 }

	124

	125 // These two entries have to be added here because ICU's converter table

	126 // cannot have both ISO-8859-8-I and ISO-8859-8.

	127 registrar("csISO88598I", "ISO-8859-8-I");

	128 registrar("logical", "ISO-8859-8-I");

	129

	130 #if defined(USING_SYSTEM_ICU)

	131 // Additional alias for MacCyrillic not present in ICU.

	132 registrar("maccyrillic", "x-mac-cyrillic");

	133

	134 // Additional aliases that historically were present in the encoding

	135 // table in WebKit on Macintosh that don't seem to be present in ICU.

	136 // Perhaps we can prove these are not used on the web and remove them.

	137 // Or perhaps we can get them added to ICU.

	138 registrar("x-mac-roman", "macintosh");

	139 registrar("x-mac-ukrainian", "x-mac-cyrillic");

	140 registrar("cn-big5", "Big5");

	141 registrar("x-x-big5", "Big5");

	142 registrar("cn-gb", "GBK");

	143 registrar("csgb231280", "GBK");

	144 registrar("x-euc-cn", "GBK");

	145 registrar("x-gbk", "GBK");

	146 registrar("koi", "KOI8-R");

	147 registrar("visual", "ISO-8859-8");

	148 registrar("winarabic", "windows-1256");

	149 registrar("winbaltic", "windows-1257");

	150 registrar("wincyrillic", "windows-1251");

	151 registrar("iso-8859-11", "windows-874");

	152 registrar("iso8859-11", "windows-874");

	153 registrar("dos-874", "windows-874");

	154 registrar("wingreek", "windows-1253");

	155 registrar("winhebrew", "windows-1255");

	156 registrar("winlatin2", "windows-1250");

	157 registrar("winturkish", "windows-1254");

	158 registrar("winvietnamese", "windows-1258");

	159 registrar("x-cp1250", "windows-1250");

	160 registrar("x-cp1251", "windows-1251");

	161 registrar("x-euc", "EUC-JP");

	162 registrar("x-windows-949", "EUC-KR");

	163 registrar("KSC5601", "EUC-KR");

	164 registrar("x-uhc", "EUC-KR");

	165 registrar("shift-jis", "Shift_JIS");

	166

	167 // Alternative spelling of ISO encoding names.

	168 registrar("ISO8859-1", "ISO-8859-1");

	169 registrar("ISO8859-2", "ISO-8859-2");

	170 registrar("ISO8859-3", "ISO-8859-3");

	171 registrar("ISO8859-4", "ISO-8859-4");

	172 registrar("ISO8859-5", "ISO-8859-5");

	173 registrar("ISO8859-6", "ISO-8859-6");

	174 registrar("ISO8859-7", "ISO-8859-7");

	175 registrar("ISO8859-8", "ISO-8859-8");

	176 registrar("ISO8859-8-I", "ISO-8859-8-I");

	177 registrar("ISO8859-9", "ISO-8859-9");

	178 registrar("ISO8859-10", "ISO-8859-10");

	179 registrar("ISO8859-13", "ISO-8859-13");

	180 registrar("ISO8859-14", "ISO-8859-14");

	181 registrar("ISO8859-15", "ISO-8859-15");

	182 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label

	183 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.org/ ).

	184

	185 // Additional aliases present in the WHATWG Encoding Standard

	186 // and Firefox (as of Oct 2014), but not in the upstream ICU.

	187 // Three entries for windows-1252 need not be listed here because

	188 // TextCodecLatin1 registers them.

	189 registrar("csiso58gb231280", "GBK");

	190 registrar("csiso88596e", "ISO-8859-6");

	191 registrar("csiso88596i", "ISO-8859-6");

	192 registrar("csiso88598e", "ISO-8859-8");

	193 registrar("gb_2312", "GBK");

	194 registrar("iso88592", "ISO-8859-2");

	195 registrar("iso88593", "ISO-8859-3");

	196 registrar("iso88594", "ISO-8859-4");

	197 registrar("iso88595", "ISO-8859-5");

	198 registrar("iso88596", "ISO-8859-6");

	199 registrar("iso88597", "ISO-8859-7");

	200 registrar("iso88598", "ISO-8859-8");

	201 registrar("iso88599", "windows-1254");

	202 registrar("iso885910", "ISO-8859-10");

	203 registrar("iso885911", "windows-874");

	204 registrar("iso885913", "ISO-8859-13");

	205 registrar("iso885914", "ISO-8859-14");

	206 registrar("iso885915", "ISO-8859-15");

	207 registrar("iso_8859-2", "ISO-8859-2");

	208 registrar("iso_8859-3", "ISO-8859-3");

	209 registrar("iso_8859-4", "ISO-8859-4");

	210 registrar("iso_8859-5", "ISO-8859-5");

	211 registrar("iso_8859-6", "ISO-8859-6");

	212 registrar("iso_8859-7", "ISO-8859-7");

	213 registrar("iso_8859-8", "ISO-8859-8");

	214 registrar("iso_8859-9", "windows-1254");

	215 registrar("iso_8859-15", "ISO-8859-15");

	216 registrar("koi8_r", "KOI8-R");

	217 registrar("x-cp1253", "windows-1253");

	218 registrar("x-cp1254", "windows-1254");

	219 registrar("x-cp1255", "windows-1255");

	220 registrar("x-cp1256", "windows-1256");

	221 registrar("x-cp1257", "windows-1257");

	222 registrar("x-cp1258", "windows-1258");

	223 #endif

	224 }

	225

	226 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar) {

	227 // See comment above in registerEncodingNames.

	228 registrar("ISO-8859-8-I", create, 0);

	229

	230 int32_t numEncodings = ucnv_countAvailable();

	231 for (int32_t i = 0; i < numEncodings; ++i) {

	232 const char* name = ucnv_getAvailableName(i);

	233 UErrorCode error = U_ZERO_ERROR;

	234 const char* standardName = ucnv_getStandardName(name, "MIME", &error);

	235 if (!U_SUCCESS(error) \|\| !standardName) {

	236 error = U_ZERO_ERROR;

	237 standardName = ucnv_getStandardName(name, "IANA", &error);

	238 if (!U_SUCCESS(error) \|\| !standardName)

	239 continue;

	240 }

	241 registrar(standardName, create, 0);

	242 }

	243 }

	244

	245 TextCodecICU::TextCodecICU(const TextEncoding& encoding)

	246 : m_encoding(encoding),

	247 m_converterICU(0)

	248 #if defined(USING_SYSTEM_ICU)

	249 ,

	250 m_needsGBKFallbacks(false)

	251 #endif

44 {	252 {

45 if (converter)	253 }

46 ucnv_close(converter);	254

47 }	255 TextCodecICU::~TextCodecICU() {

48	256 releaseICUConverter();

49 static UConverter*& cachedConverterICU()	257 }

50 {	258

51 return wtfThreadData().cachedConverterICU().converter;	259 void TextCodecICU::releaseICUConverter() const {

52 }	260 if (m_converterICU) {

53	261 UConverter*& cachedConverter = cachedConverterICU();

54 PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const v oid*)	262 if (cachedConverter)

55 {	263 ucnv_close(cachedConverter);

56 return adoptPtr(new TextCodecICU(encoding));	264 cachedConverter = m_converterICU;

57 }	265 m_converterICU = 0;

58	266 }

59 void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)	267 }

60 {	268

61 // We register Hebrew with logical ordering using a separate name.	269 void TextCodecICU::createICUConverter() const {

62 // Otherwise, this would share the same canonical name as the	270 ASSERT(!m_converterICU);

63 // visual ordering case, and then TextEncoding could not tell them	271

64 // apart; ICU treats these names as synonyms.	272 #if defined(USING_SYSTEM_ICU)

65 registrar("ISO-8859-8-I", "ISO-8859-8-I");	273 const char* name = m_encoding.name();

66	274 m_needsGBKFallbacks =

67 int32_t numEncodings = ucnv_countAvailable();	275 name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];

68 for (int32_t i = 0; i < numEncodings; ++i) {	276 #endif

69 const char* name = ucnv_getAvailableName(i);	277

70 UErrorCode error = U_ZERO_ERROR;	278 UErrorCode err;

	279

	280 UConverter*& cachedConverter = cachedConverterICU();

	281 if (cachedConverter) {

	282 err = U_ZERO_ERROR;

	283 const char* cachedName = ucnv_getName(cachedConverter, &err);

	284 if (U_SUCCESS(err) && m_encoding == cachedName) {

	285 m_converterICU = cachedConverter;

	286 cachedConverter = 0;

	287 return;

	288 }

	289 }

	290

	291 err = U_ZERO_ERROR;

	292 m_converterICU = ucnv_open(m_encoding.name(), &err);

	293 #if !LOG_DISABLED

	294 if (err == U_AMBIGUOUS_ALIAS_WARNING)

	295 WTF_LOG_ERROR("ICU ambiguous alias warning for encoding: %s",

	296 m_encoding.name());

	297 #endif

	298 if (m_converterICU)

	299 ucnv_setFallback(m_converterICU, TRUE);

	300 }

	301

	302 int TextCodecICU::decodeToBuffer(UChar* target,

	303 UChar* targetLimit,

	304 const char*& source,

	305 const char* sourceLimit,

	306 int32_t* offsets,

	307 bool flush,

	308 UErrorCode& err) {

	309 UChar* targetStart = target;

	310 err = U_ZERO_ERROR;

	311 ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit,

	312 offsets, flush, &err);

	313 return target - targetStart;

	314 }

	315

	316 class ErrorCallbackSetter final {

	317 STACK_ALLOCATED();

	318

	319 public:

	320 ErrorCallbackSetter(UConverter* converter, bool stopOnError)

	321 : m_converter(converter), m_shouldStopOnEncodingErrors(stopOnError) {

	322 if (m_shouldStopOnEncodingErrors) {

	323 UErrorCode err = U_ZERO_ERROR;

	324 ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,

	325 UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,

	326 &m_savedContext, &err);

	327 ASSERT(err == U_ZERO_ERROR);

	328 }

	329 }

	330 ~ErrorCallbackSetter() {

	331 if (m_shouldStopOnEncodingErrors) {

	332 UErrorCode err = U_ZERO_ERROR;

	333 const void* oldContext;

	334 UConverterToUCallback oldAction;

	335 ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext,

	336 &oldAction, &oldContext, &err);

	337 ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);

	338 ASSERT(!strcmp(static_cast<const char*>(oldContext),

	339 UCNV_SUB_STOP_ON_ILLEGAL));

	340 ASSERT(err == U_ZERO_ERROR);

	341 }

	342 }

	343

	344 private:

	345 UConverter* m_converter;

	346 bool m_shouldStopOnEncodingErrors;

	347 const void* m_savedContext;

	348 UConverterToUCallback m_savedAction;

	349 };

	350

	351 String TextCodecICU::decode(const char* bytes,

	352 size_t length,

	353 FlushBehavior flush,

	354 bool stopOnError,

	355 bool& sawError) {

	356 // Get a converter for the passed-in encoding.

	357 if (!m_converterICU) {

	358 createICUConverter();

	359 ASSERT(m_converterICU);

	360 if (!m_converterICU) {

	361 WTF_LOG_ERROR(

	362 "error creating ICU encoder even though encoding was in table");

	363 return String();

	364 }

	365 }

	366

	367 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);

	368

	369 StringBuilder result;

	370

	371 UChar buffer[ConversionBufferSize];

	372 UChar* bufferLimit = buffer + ConversionBufferSize;

	373 const char* source = reinterpret_cast<const char*>(bytes);

	374 const char* sourceLimit = source + length;

	375 int32_t* offsets = nullptr;

	376 UErrorCode err = U_ZERO_ERROR;

	377

	378 do {

	379 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLimit,

	380 offsets, flush != DoNotFlush, err);

	381 result.append(buffer, ucharsDecoded);

	382 } while (err == U_BUFFER_OVERFLOW_ERROR);

	383

	384 if (U_FAILURE(err)) {

	385 // flush the converter so it can be reused, and not be bothered by this erro r.

	386 do {

	387 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, true,

	388 err);

	389 } while (source < sourceLimit);

	390 sawError = true;

	391 }

	392

71 #if !defined(USING_SYSTEM_ICU)	393 #if !defined(USING_SYSTEM_ICU)

72 const char* primaryStandard = "HTML";	394 // Chrome's copy of ICU does not have the issue described below.

73 const char* secondaryStandard = "MIME";	395 return result.toString();

74 #else	396 #else

75 const char* primaryStandard = "MIME";	397 String resultString = result.toString();

76 const char* secondaryStandard = "IANA";	398

77 #endif	399 // <http://bugs.webkit.org/show_bug.cgi?id=17014>

78 const char* standardName = ucnv_getStandardName(name, primaryStandard, & error);	400 // Simplified Chinese pages use the code A3A0 to mean "full-width space", but ICU decodes it as U+E5E5.

79 if (U_FAILURE(error) \|\| !standardName) {	401 if (!strcmp(m_encoding.name(), "GBK")) {

80 error = U_ZERO_ERROR;	402 if (!strcasecmp(m_encoding.name(), "gb18030"))

81 // Try IANA to pick up 'windows-12xx' and other names	403 resultString.replace(0xE5E5, ideographicSpaceCharacter);

82 // which are not preferred MIME names but are widely used.	404 // Make GBK compliant to the encoding spec and align with GB18030

83 standardName = ucnv_getStandardName(name, secondaryStandard, &error) ;	405 resultString.replace(0x01F9, 0xE7C8);

84 if (U_FAILURE(error) \|\| !standardName)	406 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3

85 continue;	407 // is resolved, add U+1E3F => 0xE7C7.

86 }	408 }

87	409

88 // A number of these aliases are handled in Chrome's copy of ICU, but	410 return resultString;

89 // Chromium can be compiled with the system ICU.

90

91 // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.

92 // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding

93 // for encoding GB_2312-80 and several others. So, we need to overrid e this behavior, too.

94 #if defined(USING_SYSTEM_ICU)

95 if (!strcmp(standardName, "GB2312") \|\| !strcmp(standardName, "GB_2312-80 "))

96 standardName = "GBK";

97 // Similarly, EUC-KR encodings all map to an extended version, but

98 // per HTML5, the canonical name still should be EUC-KR.

99 else if (!strcmp(standardName, "EUC-KR") \|\| !strcmp(standardName, "KSC_5 601") \|\| !strcmp(standardName, "cp1363"))

100 standardName = "EUC-KR";

101 // And so on.

102 else if (!strcasecmp(standardName, "iso-8859-9")) // This name is return ed in different case by ICU 3.2 and 3.6.

103 standardName = "windows-1254";

104 else if (!strcmp(standardName, "TIS-620"))

105 standardName = "windows-874";

106 #endif

107

108 registrar(standardName, standardName);

109

110 uint16_t numAliases = ucnv_countAliases(name, &error);

111 ASSERT(U_SUCCESS(error));

112 if (U_SUCCESS(error))

113 for (uint16_t j = 0; j < numAliases; ++j) {

114 error = U_ZERO_ERROR;

115 const char* alias = ucnv_getAlias(name, j, &error);

116 ASSERT(U_SUCCESS(error));

117 if (U_SUCCESS(error) && alias != standardName)

118 registrar(alias, standardName);

119 }

120 }

121

122 // These two entries have to be added here because ICU's converter table

123 // cannot have both ISO-8859-8-I and ISO-8859-8.

124 registrar("csISO88598I", "ISO-8859-8-I");

125 registrar("logical", "ISO-8859-8-I");

126

127 #if defined(USING_SYSTEM_ICU)

128 // Additional alias for MacCyrillic not present in ICU.

129 registrar("maccyrillic", "x-mac-cyrillic");

130

131 // Additional aliases that historically were present in the encoding

132 // table in WebKit on Macintosh that don't seem to be present in ICU.

133 // Perhaps we can prove these are not used on the web and remove them.

134 // Or perhaps we can get them added to ICU.

135 registrar("x-mac-roman", "macintosh");

136 registrar("x-mac-ukrainian", "x-mac-cyrillic");

137 registrar("cn-big5", "Big5");

138 registrar("x-x-big5", "Big5");

139 registrar("cn-gb", "GBK");

140 registrar("csgb231280", "GBK");

141 registrar("x-euc-cn", "GBK");

142 registrar("x-gbk", "GBK");

143 registrar("koi", "KOI8-R");

144 registrar("visual", "ISO-8859-8");

145 registrar("winarabic", "windows-1256");

146 registrar("winbaltic", "windows-1257");

147 registrar("wincyrillic", "windows-1251");

148 registrar("iso-8859-11", "windows-874");

149 registrar("iso8859-11", "windows-874");

150 registrar("dos-874", "windows-874");

151 registrar("wingreek", "windows-1253");

152 registrar("winhebrew", "windows-1255");

153 registrar("winlatin2", "windows-1250");

154 registrar("winturkish", "windows-1254");

155 registrar("winvietnamese", "windows-1258");

156 registrar("x-cp1250", "windows-1250");

157 registrar("x-cp1251", "windows-1251");

158 registrar("x-euc", "EUC-JP");

159 registrar("x-windows-949", "EUC-KR");

160 registrar("KSC5601", "EUC-KR");

161 registrar("x-uhc", "EUC-KR");

162 registrar("shift-jis", "Shift_JIS");

163

164 // Alternative spelling of ISO encoding names.

165 registrar("ISO8859-1", "ISO-8859-1");

166 registrar("ISO8859-2", "ISO-8859-2");

167 registrar("ISO8859-3", "ISO-8859-3");

168 registrar("ISO8859-4", "ISO-8859-4");

169 registrar("ISO8859-5", "ISO-8859-5");

170 registrar("ISO8859-6", "ISO-8859-6");

171 registrar("ISO8859-7", "ISO-8859-7");

172 registrar("ISO8859-8", "ISO-8859-8");

173 registrar("ISO8859-8-I", "ISO-8859-8-I");

174 registrar("ISO8859-9", "ISO-8859-9");

175 registrar("ISO8859-10", "ISO-8859-10");

176 registrar("ISO8859-13", "ISO-8859-13");

177 registrar("ISO8859-14", "ISO-8859-14");

178 registrar("ISO8859-15", "ISO-8859-15");

179 // No need to have an entry for ISO8859-16. ISO-8859-16 has just one label

180 // listed in WHATWG Encoding Living Standard (http://encoding.spec.whatwg.or g/ ).

181

182 // Additional aliases present in the WHATWG Encoding Standard

183 // and Firefox (as of Oct 2014), but not in the upstream ICU.

184 // Three entries for windows-1252 need not be listed here because

185 // TextCodecLatin1 registers them.

186 registrar("csiso58gb231280", "GBK");

187 registrar("csiso88596e", "ISO-8859-6");

188 registrar("csiso88596i", "ISO-8859-6");

189 registrar("csiso88598e", "ISO-8859-8");

190 registrar("gb_2312", "GBK");

191 registrar("iso88592", "ISO-8859-2");

192 registrar("iso88593", "ISO-8859-3");

193 registrar("iso88594", "ISO-8859-4");

194 registrar("iso88595", "ISO-8859-5");

195 registrar("iso88596", "ISO-8859-6");

196 registrar("iso88597", "ISO-8859-7");

197 registrar("iso88598", "ISO-8859-8");

198 registrar("iso88599", "windows-1254");

199 registrar("iso885910", "ISO-8859-10");

200 registrar("iso885911", "windows-874");

201 registrar("iso885913", "ISO-8859-13");

202 registrar("iso885914", "ISO-8859-14");

203 registrar("iso885915", "ISO-8859-15");

204 registrar("iso_8859-2", "ISO-8859-2");

205 registrar("iso_8859-3", "ISO-8859-3");

206 registrar("iso_8859-4", "ISO-8859-4");

207 registrar("iso_8859-5", "ISO-8859-5");

208 registrar("iso_8859-6", "ISO-8859-6");

209 registrar("iso_8859-7", "ISO-8859-7");

210 registrar("iso_8859-8", "ISO-8859-8");

211 registrar("iso_8859-9", "windows-1254");

212 registrar("iso_8859-15", "ISO-8859-15");

213 registrar("koi8_r", "KOI8-R");

214 registrar("x-cp1253", "windows-1253");

215 registrar("x-cp1254", "windows-1254");

216 registrar("x-cp1255", "windows-1255");

217 registrar("x-cp1256", "windows-1256");

218 registrar("x-cp1257", "windows-1257");

219 registrar("x-cp1258", "windows-1258");

220 #endif

221 }

222

223 void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)

224 {

225 // See comment above in registerEncodingNames.

226 registrar("ISO-8859-8-I", create, 0);

227

228 int32_t numEncodings = ucnv_countAvailable();

229 for (int32_t i = 0; i < numEncodings; ++i) {

230 const char* name = ucnv_getAvailableName(i);

231 UErrorCode error = U_ZERO_ERROR;

232 const char* standardName = ucnv_getStandardName(name, "MIME", &error);

233 if (!U_SUCCESS(error) \|\| !standardName) {

234 error = U_ZERO_ERROR;

235 standardName = ucnv_getStandardName(name, "IANA", &error);

236 if (!U_SUCCESS(error) \|\| !standardName)

237 continue;

238 }

239 registrar(standardName, create, 0);

240 }

241 }

242

243 TextCodecICU::TextCodecICU(const TextEncoding& encoding)

244 : m_encoding(encoding)

245 , m_converterICU(0)

246 #if defined(USING_SYSTEM_ICU)

247 , m_needsGBKFallbacks(false)

248 #endif

249 {

250 }

251

252 TextCodecICU::~TextCodecICU()

253 {

254 releaseICUConverter();

255 }

256

257 void TextCodecICU::releaseICUConverter() const

258 {

259 if (m_converterICU) {

260 UConverter*& cachedConverter = cachedConverterICU();

261 if (cachedConverter)

262 ucnv_close(cachedConverter);

263 cachedConverter = m_converterICU;

264 m_converterICU = 0;

265 }

266 }

267

268 void TextCodecICU::createICUConverter() const

269 {

270 ASSERT(!m_converterICU);

271

272 #if defined(USING_SYSTEM_ICU)

273 const char* name = m_encoding.name();

274 m_needsGBKFallbacks = name[0] == 'G' && name[1] == 'B' && name[2] == 'K' && !name[3];

275 #endif

276

277 UErrorCode err;

278

279 UConverter*& cachedConverter = cachedConverterICU();

280 if (cachedConverter) {

281 err = U_ZERO_ERROR;

282 const char* cachedName = ucnv_getName(cachedConverter, &err);

283 if (U_SUCCESS(err) && m_encoding == cachedName) {

284 m_converterICU = cachedConverter;

285 cachedConverter = 0;

286 return;

287 }

288 }

289

290 err = U_ZERO_ERROR;

291 m_converterICU = ucnv_open(m_encoding.name(), &err);

292 #if !LOG_DISABLED

293 if (err == U_AMBIGUOUS_ALIAS_WARNING)

294 WTF_LOG_ERROR("ICU ambiguous alias warning for encoding: %s", m_encoding .name());

295 #endif

296 if (m_converterICU)

297 ucnv_setFallback(m_converterICU, TRUE);

298 }

299

300 int TextCodecICU::decodeToBuffer(UChar* target, UChar* targetLimit, const char& source, const char sourceLimit, int32_t* offsets, bool flush, UErrorCode& err)

301 {

302 UChar* targetStart = target;

303 err = U_ZERO_ERROR;

304 ucnv_toUnicode(m_converterICU, &target, targetLimit, &source, sourceLimit, o ffsets, flush, &err);

305 return target - targetStart;

306 }

307

308 class ErrorCallbackSetter final {

309 STACK_ALLOCATED();

310 public:

311 ErrorCallbackSetter(UConverter* converter, bool stopOnError)

312 : m_converter(converter)

313 , m_shouldStopOnEncodingErrors(stopOnError)

314 {

315 if (m_shouldStopOnEncodingErrors) {

316 UErrorCode err = U_ZERO_ERROR;

317 ucnv_setToUCallBack(m_converter, UCNV_TO_U_CALLBACK_SUBSTITUTE,

318 UCNV_SUB_STOP_ON_ILLEGAL, &m_savedAction,

319 &m_savedContext, &err);

320 ASSERT(err == U_ZERO_ERROR);

321 }

322 }

323 ~ErrorCallbackSetter()

324 {

325 if (m_shouldStopOnEncodingErrors) {

326 UErrorCode err = U_ZERO_ERROR;

327 const void* oldContext;

328 UConverterToUCallback oldAction;

329 ucnv_setToUCallBack(m_converter, m_savedAction, m_savedContext, &old Action, &oldContext, &err);

330 ASSERT(oldAction == UCNV_TO_U_CALLBACK_SUBSTITUTE);

331 ASSERT(!strcmp(static_cast<const char*>(oldContext), UCNV_SUB_STOP_O N_ILLEGAL));

332 ASSERT(err == U_ZERO_ERROR);

333 }

334 }

335

336 private:

337 UConverter* m_converter;

338 bool m_shouldStopOnEncodingErrors;

339 const void* m_savedContext;

340 UConverterToUCallback m_savedAction;

341 };

342

343 String TextCodecICU::decode(const char* bytes, size_t length, FlushBehavior flus h, bool stopOnError, bool& sawError)

344 {

345 // Get a converter for the passed-in encoding.

346 if (!m_converterICU) {

347 createICUConverter();

348 ASSERT(m_converterICU);

349 if (!m_converterICU) {

350 WTF_LOG_ERROR("error creating ICU encoder even though encoding was i n table");

351 return String();

352 }

353 }

354

355 ErrorCallbackSetter callbackSetter(m_converterICU, stopOnError);

356

357 StringBuilder result;

358

359 UChar buffer[ConversionBufferSize];

360 UChar* bufferLimit = buffer + ConversionBufferSize;

361 const char* source = reinterpret_cast<const char*>(bytes);

362 const char* sourceLimit = source + length;

363 int32_t* offsets = nullptr;

364 UErrorCode err = U_ZERO_ERROR;

365

366 do {

367 int ucharsDecoded = decodeToBuffer(buffer, bufferLimit, source, sourceLi mit, offsets, flush != DoNotFlush, err);

368 result.append(buffer, ucharsDecoded);

369 } while (err == U_BUFFER_OVERFLOW_ERROR);

370

371 if (U_FAILURE(err)) {

372 // flush the converter so it can be reused, and not be bothered by this error.

373 do {

374 decodeToBuffer(buffer, bufferLimit, source, sourceLimit, offsets, tr ue, err);

375 } while (source < sourceLimit);

376 sawError = true;

377 }

378

379 #if !defined(USING_SYSTEM_ICU)

380 // Chrome's copy of ICU does not have the issue described below.

381 return result.toString();

382 #else

383 String resultString = result.toString();

384

385 // <http://bugs.webkit.org/show_bug.cgi?id=17014>

386 // Simplified Chinese pages use the code A3A0 to mean "full-width space", bu t ICU decodes it as U+E5E5.

387 if (!strcmp(m_encoding.name(), "GBK")) {

388 if (!strcasecmp(m_encoding.name(), "gb18030"))

389 resultString.replace(0xE5E5, ideographicSpaceCharacter);

390 // Make GBK compliant to the encoding spec and align with GB18030

391 resultString.replace(0x01F9, 0xE7C8);

392 // FIXME: Once https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3

393 // is resolved, add U+1E3F => 0xE7C7.

394 }

395

396 return resultString;

397 #endif	411 #endif

398 }	412 }

399	413

400 #if defined(USING_SYSTEM_ICU)	414 #if defined(USING_SYSTEM_ICU)

401 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding	415 // U+01F9 and U+1E3F have to be mapped to xA8xBF and xA8xBC per the encoding

402 // spec, but ICU converter does not have them.	416 // spec, but ICU converter does not have them.

403 static UChar fallbackForGBK(UChar32 character)	417 static UChar fallbackForGBK(UChar32 character) {

404 {	418 switch (character) {

405 switch (character) {

406 case 0x01F9:	419 case 0x01F9:

407 return 0xE7C8; // mapped to xA8xBF by ICU.	420 return 0xE7C8; // mapped to xA8xBF by ICU.

408 case 0x1E3F:	421 case 0x1E3F:

409 return 0xE7C7; // mapped to xA8xBC by ICU.	422 return 0xE7C7; // mapped to xA8xBC by ICU.

410 }	423 }

411 return 0;	424 return 0;

412 }	425 }

413 #endif	426 #endif

414	427

415 // Invalid character handler when writing escaped entities for unrepresentable	428 // Invalid character handler when writing escaped entities for unrepresentable

416 // characters. See the declaration of TextCodec::encode for more.	429 // characters. See the declaration of TextCodec::encode for more.

417 static void urlEscapedEntityCallback(const void* context, UConverterFromUnicodeA rgs* fromUArgs, const UChar* codeUnits, int32_t length,	430 static void urlEscapedEntityCallback(const void* context,

418 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)	431 UConverterFromUnicodeArgs* fromUArgs,

419 {	432 const UChar* codeUnits,

420 if (reason == UCNV_UNASSIGNED) {	433 int32_t length,

421 *err = U_ZERO_ERROR;	434 UChar32 codePoint,

422	435 UConverterCallbackReason reason,

423 UnencodableReplacementArray entity;	436 UErrorCode* err) {

424 int entityLen = TextCodec::getUnencodableReplacement(codePoint, URLEncod edEntitiesForUnencodables, entity);	437 if (reason == UCNV_UNASSIGNED) {

425 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);	438 *err = U_ZERO_ERROR;

426 } else {	439

427 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codeP oint, reason, err);	440 UnencodableReplacementArray entity;

428 }	441 int entityLen = TextCodec::getUnencodableReplacement(

	442 codePoint, URLEncodedEntitiesForUnencodables, entity);

	443 ucnv_cbFromUWriteBytes(fromUArgs, entity, entityLen, 0, err);

	444 } else {

	445 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length,

	446 codePoint, reason, err);

	447 }

429 }	448 }

430	449

431 #if defined(USING_SYSTEM_ICU)	450 #if defined(USING_SYSTEM_ICU)

432 // Substitutes special GBK characters, escaping all other unassigned entities.	451 // Substitutes special GBK characters, escaping all other unassigned entities.

433 static void gbkCallbackEscape(const void* context, UConverterFromUnicodeArgs* fr omUArgs, const UChar* codeUnits, int32_t length,	452 static void gbkCallbackEscape(const void* context,

434 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)	453 UConverterFromUnicodeArgs* fromUArgs,

435 {	454 const UChar* codeUnits,

436 UChar outChar;	455 int32_t length,

437 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {	456 UChar32 codePoint,

438 const UChar* source = &outChar;	457 UConverterCallbackReason reason,

439 *err = U_ZERO_ERROR;	458 UErrorCode* err) {

440 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);	459 UChar outChar;

441 return;	460 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {

442 }	461 const UChar* source = &outChar;

443 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint , reason, err);	462 *err = U_ZERO_ERROR;

	463 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);

	464 return;

	465 }

	466 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint,

	467 reason, err);

444 }	468 }

445	469

446 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution.	470 // Combines both gbkUrlEscapedEntityCallback and GBK character substitution.

447 static void gbkUrlEscapedEntityCallack(const void* context, UConverterFromUnicod eArgs* fromUArgs, const UChar* codeUnits, int32_t length,	471 static void gbkUrlEscapedEntityCallack(const void* context,

448 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)	472 UConverterFromUnicodeArgs* fromUArgs,

449 {	473 const UChar* codeUnits,

450 if (reason == UCNV_UNASSIGNED) {	474 int32_t length,

451 if (UChar outChar = fallbackForGBK(codePoint)) {	475 UChar32 codePoint,

452 const UChar* source = &outChar;	476 UConverterCallbackReason reason,

453 *err = U_ZERO_ERROR;	477 UErrorCode* err) {

454 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);	478 if (reason == UCNV_UNASSIGNED) {

455 return;	479 if (UChar outChar = fallbackForGBK(codePoint)) {

456 }	480 const UChar* source = &outChar;

457 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoin t, reason, err);	481 *err = U_ZERO_ERROR;

458 return;	482 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);

459 }	483 return;

460 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint , reason, err);	484 }

461 }	485 urlEscapedEntityCallback(context, fromUArgs, codeUnits, length, codePoint,

462	486 reason, err);

463 static void gbkCallbackSubstitute(const void* context, UConverterFromUnicodeArgs * fromUArgs, const UChar* codeUnits, int32_t length,	487 return;

464 UChar32 codePoint, UConverterCallbackReason reason, UErrorCode* err)	488 }

465 {	489 UCNV_FROM_U_CALLBACK_ESCAPE(context, fromUArgs, codeUnits, length, codePoint,

466 UChar outChar;	490 reason, err);

467 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {	491 }

468 const UChar* source = &outChar;	492

469 *err = U_ZERO_ERROR;	493 static void gbkCallbackSubstitute(const void* context,

470 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);	494 UConverterFromUnicodeArgs* fromUArgs,

471 return;	495 const UChar* codeUnits,

472 }	496 int32_t length,

473 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codeP oint, reason, err);	497 UChar32 codePoint,

474 }	498 UConverterCallbackReason reason,

475 #endif // USING_SYSTEM_ICU	499 UErrorCode* err) {

	500 UChar outChar;

	501 if (reason == UCNV_UNASSIGNED && (outChar = fallbackForGBK(codePoint))) {

	502 const UChar* source = &outChar;

	503 *err = U_ZERO_ERROR;

	504 ucnv_cbFromUWriteUChars(fromUArgs, &source, source + 1, 0, err);

	505 return;

	506 }

	507 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length,

	508 codePoint, reason, err);

	509 }

	510 #endif // USING_SYSTEM_ICU

476	511

477 class TextCodecInput final {	512 class TextCodecInput final {

478 STACK_ALLOCATED();	513 STACK_ALLOCATED();

479 public:	514

480 TextCodecInput(const TextEncoding& encoding, const UChar* characters, size_t length)	515 public:

481 : m_begin(characters)	516 TextCodecInput(const TextEncoding& encoding,

482 , m_end(characters + length)	517 const UChar* characters,

483 { }	518 size_t length)

484	519 : m_begin(characters), m_end(characters + length) {}

485 TextCodecInput(const TextEncoding& encoding, const LChar* characters, size_t length)	520

486 {	521 TextCodecInput(const TextEncoding& encoding,

487 m_buffer.reserveInitialCapacity(length);	522 const LChar* characters,

488 for (size_t i = 0; i < length; ++i)	523 size_t length) {

489 m_buffer.append(characters[i]);	524 m_buffer.reserveInitialCapacity(length);

490 m_begin = m_buffer.data();	525 for (size_t i = 0; i < length; ++i)

491 m_end = m_begin + m_buffer.size();	526 m_buffer.append(characters[i]);

492 }	527 m_begin = m_buffer.data();

493	528 m_end = m_begin + m_buffer.size();

494 const UChar* begin() const { return m_begin; }	529 }

495 const UChar* end() const { return m_end; }	530

496	531 const UChar* begin() const { return m_begin; }

497 private:	532 const UChar* end() const { return m_end; }

498 const UChar* m_begin;	533

499 const UChar* m_end;	534 private:

500 Vector<UChar> m_buffer;	535 const UChar* m_begin;

	536 const UChar* m_end;

	537 Vector<UChar> m_buffer;

501 };	538 };

502	539

503 CString TextCodecICU::encodeInternal(const TextCodecInput& input, UnencodableHan dling handling)	540 CString TextCodecICU::encodeInternal(const TextCodecInput& input,

504 {	541 UnencodableHandling handling) {

505 const UChar* source = input.begin();	542 const UChar* source = input.begin();

506 const UChar* end = input.end();	543 const UChar* end = input.end();

507	544

508 UErrorCode err = U_ZERO_ERROR;	545 UErrorCode err = U_ZERO_ERROR;

509	546

510 switch (handling) {	547 switch (handling) {

511 case QuestionMarksForUnencodables:	548 case QuestionMarksForUnencodables:

512 ucnv_setSubstChars(m_converterICU, "?", 1, &err);	549 ucnv_setSubstChars(m_converterICU, "?", 1, &err);

513 #if !defined(USING_SYSTEM_ICU)	550 #if !defined(USING_SYSTEM_ICU)

514 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0 , 0, 0, &err);	551 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,

	552 0, 0, &err);

515 #else	553 #else

516 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackS ubstitute : UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 0, 0, &err);	554 ucnv_setFromUCallBack(

517 #endif	555 m_converterICU, m_needsGBKFallbacks ? gbkCallbackSubstitute

518 break;	556 : UCNV_FROM_U_CALLBACK_SUBSTITUTE,

	557 0, 0, 0, &err);

	558 #endif

	559 break;

519 case EntitiesForUnencodables:	560 case EntitiesForUnencodables:

520 #if !defined(USING_SYSTEM_ICU)	561 #if !defined(USING_SYSTEM_ICU)

521 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ ESCAPE_XML_DEC, 0, 0, &err);	562 ucnv_setFromUCallBack(m_converterICU, UCNV_FROM_U_CALLBACK_ESCAPE,

	563 UCNV_ESCAPE_XML_DEC, 0, 0, &err);

522 #else	564 #else

523 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkCallbackE scape : UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 0, 0, &err);	565 ucnv_setFromUCallBack(

524 #endif	566 m_converterICU,

525 break;	567 m_needsGBKFallbacks ? gbkCallbackEscape : UCNV_FROM_U_CALLBACK_ESCAPE,

	568 UCNV_ESCAPE_XML_DEC, 0, 0, &err);

	569 #endif

	570 break;

526 case URLEncodedEntitiesForUnencodables:	571 case URLEncodedEntitiesForUnencodables:

527 #if !defined(USING_SYSTEM_ICU)	572 #if !defined(USING_SYSTEM_ICU)

528 ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0, &err);	573 ucnv_setFromUCallBack(m_converterICU, urlEscapedEntityCallback, 0, 0, 0,

	574 &err);

529 #else	575 #else

530 ucnv_setFromUCallBack(m_converterICU, m_needsGBKFallbacks ? gbkUrlEscape dEntityCallack : urlEscapedEntityCallback, 0, 0, 0, &err);	576 ucnv_setFromUCallBack(m_converterICU,

531 #endif	577 m_needsGBKFallbacks ? gbkUrlEscapedEntityCallack

532 break;	578 : urlEscapedEntityCallback,

533 }	579 0, 0, 0, &err);

534	580 #endif

535 ASSERT(U_SUCCESS(err));	581 break;

536 if (U_FAILURE(err))	582 }

537 return CString();	583

538	584 ASSERT(U_SUCCESS(err));

539 Vector<char> result;	585 if (U_FAILURE(err))

540 size_t size = 0;	586 return CString();

541 do {	587

542 char buffer[ConversionBufferSize];	588 Vector<char> result;

543 char* target = buffer;	589 size_t size = 0;

544 char* targetLimit = target + ConversionBufferSize;	590 do {

545 err = U_ZERO_ERROR;	591 char buffer[ConversionBufferSize];

546 ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0, true, &err);	592 char* target = buffer;

547 size_t count = target - buffer;	593 char* targetLimit = target + ConversionBufferSize;

548 result.grow(size + count);	594 err = U_ZERO_ERROR;

549 memcpy(result.data() + size, buffer, count);	595 ucnv_fromUnicode(m_converterICU, &target, targetLimit, &source, end, 0,

550 size += count;	596 true, &err);

551 } while (err == U_BUFFER_OVERFLOW_ERROR);	597 size_t count = target - buffer;

552	598 result.grow(size + count);

553 return CString(result.data(), size);	599 memcpy(result.data() + size, buffer, count);

554 }	600 size += count;

555	601 } while (err == U_BUFFER_OVERFLOW_ERROR);

556 template<typename CharType>	602

557 CString TextCodecICU::encodeCommon(const CharType* characters, size_t length, Un encodableHandling handling)	603 return CString(result.data(), size);

558 {	604 }

559 if (!length)	605

560 return "";	606 template <typename CharType>

561	607 CString TextCodecICU::encodeCommon(const CharType* characters,

562 if (!m_converterICU)	608 size_t length,

563 createICUConverter();	609 UnencodableHandling handling) {

564 if (!m_converterICU)	610 if (!length)

565 return CString();	611 return "";

566	612

567 TextCodecInput input(m_encoding, characters, length);	613 if (!m_converterICU)

568 return encodeInternal(input, handling);	614 createICUConverter();

569 }	615 if (!m_converterICU)

570	616 return CString();

571 CString TextCodecICU::encode(const UChar* characters, size_t length, Unencodable Handling handling)	617

572 {	618 TextCodecInput input(m_encoding, characters, length);

573 return encodeCommon(characters, length, handling);	619 return encodeInternal(input, handling);

574 }	620 }

575	621

576 CString TextCodecICU::encode(const LChar* characters, size_t length, Unencodable Handling handling)	622 CString TextCodecICU::encode(const UChar* characters,

577 {	623 size_t length,

578 return encodeCommon(characters, length, handling);	624 UnencodableHandling handling) {

579 }	625 return encodeCommon(characters, length, handling);

580	626 }

581 } // namespace WTF	627

	628 CString TextCodecICU::encode(const LChar* characters,

	629 size_t length,

	630 UnencodableHandling handling) {

	631 return encodeCommon(characters, length, handling);

	632 }

	633

	634 } // namespace WTF

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/wtf/text/TextCodecICU.h ('k') | third_party/WebKit/Source/wtf/text/TextCodecLatin1.h » ('j') | no next file with comments »