third_party/WebKit/Source/wtf/text/TextCodecLatin1.cpp - Issue 2470043004: Fix vector resize logic when encoding non-BMP chars to latin1 as ?s

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecLatin1.cpp

Issue 2470043004: Fix vector resize logic when encoding non-BMP chars to latin1 as ?s (Closed)

Patch Set: rebased Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.	2 * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
199 ++destination16;	199 ++destination16;

200 }	200 }

201	201

202 return result16;	202 return result16;

203 }	203 }

204	204

205 template <typename CharType>	205 template <typename CharType>

206 static CString encodeComplexWindowsLatin1(const CharType* characters,	206 static CString encodeComplexWindowsLatin1(const CharType* characters,

207 size_t length,	207 size_t length,

208 UnencodableHandling handling) {	208 UnencodableHandling handling) {

209 Vector<char> result(length);	209 size_t targetLength = length;

	210 Vector<char> result(targetLength);

210 char* bytes = result.data();	211 char* bytes = result.data();

211	212

212 size_t resultLength = 0;	213 size_t resultLength = 0;

213 for (size_t i = 0; i < length;) {	214 for (size_t i = 0; i < length;) {

214 UChar32 c;	215 UChar32 c;

	216 // If CharType is LChar the U16_NEXT call reads a byte and increments;

	217 // since the convention is that LChar is already latin1 this is safe.

215 U16_NEXT(characters, i, length, c);	218 U16_NEXT(characters, i, length, c);

	219 // If input was a surrogate pair (non-BMP character) then we overestimated

	220 // the length.

	221 if (c > 0xffff)

	222 --targetLength;

216 unsigned char b = static_cast<unsigned char>(c);	223 unsigned char b = static_cast<unsigned char>(c);

217 // Do an efficient check to detect characters other than 00-7F and A0-FF.	224 // Do an efficient check to detect characters other than 00-7F and A0-FF.

218 if (b != c \|\| (c & 0xE0) == 0x80) {	225 if (b != c \|\| (c & 0xE0) == 0x80) {

219 // Look for a way to encode this with Windows Latin-1.	226 // Look for a way to encode this with Windows Latin-1.

220 for (b = 0x80; b < 0xA0; ++b) {	227 for (b = 0x80; b < 0xA0; ++b) {

221 if (table[b] == c)	228 if (table[b] == c)

222 goto gotByte;	229 goto gotByte;

223 }	230 }

224 // No way to encode this character with Windows Latin-1.	231 // No way to encode this character with Windows Latin-1.

225 UnencodableReplacementArray replacement;	232 UnencodableReplacementArray replacement;

226 int replacementLength =	233 int replacementLength =

227 TextCodec::getUnencodableReplacement(c, handling, replacement);	234 TextCodec::getUnencodableReplacement(c, handling, replacement);

228 result.grow(resultLength + replacementLength + length - i);	235 DCHECK_GT(replacementLength, 0);

229 bytes = result.data();	236 // Only one char was initially reserved per input character, so grow if

	237 // necessary. Note that the case of surrogate pairs and

	238 // QuestionMarksForUnencodables the result length may be shorter than

	239 // the input length.

	240 targetLength += replacementLength - 1;

	241 if (targetLength > result.size()) {

	242 result.grow(targetLength);

	243 bytes = result.data();

	244 }

230 memcpy(bytes + resultLength, replacement, replacementLength);	245 memcpy(bytes + resultLength, replacement, replacementLength);

231 resultLength += replacementLength;	246 resultLength += replacementLength;

232 continue;	247 continue;

233 }	248 }

234 gotByte:	249 gotByte:

235 bytes[resultLength++] = b;	250 bytes[resultLength++] = b;

236 }	251 }

237	252

238 return CString(bytes, resultLength);	253 return CString(bytes, resultLength);

239 }	254 }

(...skipping 29 matching lines...) Expand all Loading...
269 return encodeCommon(characters, length, handling);	284 return encodeCommon(characters, length, handling);

270 }	285 }

271	286

272 CString TextCodecLatin1::encode(const LChar* characters,	287 CString TextCodecLatin1::encode(const LChar* characters,

273 size_t length,	288 size_t length,

274 UnencodableHandling handling) {	289 UnencodableHandling handling) {

275 return encodeCommon(characters, length, handling);	290 return encodeCommon(characters, length, handling);

276 }	291 }

277	292

278 } // namespace WTF	293 } // namespace WTF

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/wtf/BUILD.gn ('k') | third_party/WebKit/Source/wtf/text/TextCodecLatin1Test.cpp » ('j') | no next file with comments »