Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(934)

Side by Side Diff: third_party/WebKit/Source/wtf/text/TextCodecLatin1.cpp

Issue 2470043004: Fix vector resize logic when encoding non-BMP chars to latin1 as ?s (Closed)
Patch Set: rebased Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved. 2 * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions 5 * modification, are permitted provided that the following conditions
6 * are met: 6 * are met:
7 * 1. Redistributions of source code must retain the above copyright 7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer. 8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright 9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the 10 * notice, this list of conditions and the following disclaimer in the
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
199 ++destination16; 199 ++destination16;
200 } 200 }
201 201
202 return result16; 202 return result16;
203 } 203 }
204 204
205 template <typename CharType> 205 template <typename CharType>
206 static CString encodeComplexWindowsLatin1(const CharType* characters, 206 static CString encodeComplexWindowsLatin1(const CharType* characters,
207 size_t length, 207 size_t length,
208 UnencodableHandling handling) { 208 UnencodableHandling handling) {
209 Vector<char> result(length); 209 size_t targetLength = length;
210 Vector<char> result(targetLength);
210 char* bytes = result.data(); 211 char* bytes = result.data();
211 212
212 size_t resultLength = 0; 213 size_t resultLength = 0;
213 for (size_t i = 0; i < length;) { 214 for (size_t i = 0; i < length;) {
214 UChar32 c; 215 UChar32 c;
216 // If CharType is LChar the U16_NEXT call reads a byte and increments;
217 // since the convention is that LChar is already latin1 this is safe.
215 U16_NEXT(characters, i, length, c); 218 U16_NEXT(characters, i, length, c);
219 // If input was a surrogate pair (non-BMP character) then we overestimated
220 // the length.
221 if (c > 0xffff)
222 --targetLength;
216 unsigned char b = static_cast<unsigned char>(c); 223 unsigned char b = static_cast<unsigned char>(c);
217 // Do an efficient check to detect characters other than 00-7F and A0-FF. 224 // Do an efficient check to detect characters other than 00-7F and A0-FF.
218 if (b != c || (c & 0xE0) == 0x80) { 225 if (b != c || (c & 0xE0) == 0x80) {
219 // Look for a way to encode this with Windows Latin-1. 226 // Look for a way to encode this with Windows Latin-1.
220 for (b = 0x80; b < 0xA0; ++b) { 227 for (b = 0x80; b < 0xA0; ++b) {
221 if (table[b] == c) 228 if (table[b] == c)
222 goto gotByte; 229 goto gotByte;
223 } 230 }
224 // No way to encode this character with Windows Latin-1. 231 // No way to encode this character with Windows Latin-1.
225 UnencodableReplacementArray replacement; 232 UnencodableReplacementArray replacement;
226 int replacementLength = 233 int replacementLength =
227 TextCodec::getUnencodableReplacement(c, handling, replacement); 234 TextCodec::getUnencodableReplacement(c, handling, replacement);
228 result.grow(resultLength + replacementLength + length - i); 235 DCHECK_GT(replacementLength, 0);
229 bytes = result.data(); 236 // Only one char was initially reserved per input character, so grow if
237 // necessary. Note that the case of surrogate pairs and
238 // QuestionMarksForUnencodables the result length may be shorter than
239 // the input length.
240 targetLength += replacementLength - 1;
241 if (targetLength > result.size()) {
242 result.grow(targetLength);
243 bytes = result.data();
244 }
230 memcpy(bytes + resultLength, replacement, replacementLength); 245 memcpy(bytes + resultLength, replacement, replacementLength);
231 resultLength += replacementLength; 246 resultLength += replacementLength;
232 continue; 247 continue;
233 } 248 }
234 gotByte: 249 gotByte:
235 bytes[resultLength++] = b; 250 bytes[resultLength++] = b;
236 } 251 }
237 252
238 return CString(bytes, resultLength); 253 return CString(bytes, resultLength);
239 } 254 }
(...skipping 29 matching lines...) Expand all
269 return encodeCommon(characters, length, handling); 284 return encodeCommon(characters, length, handling);
270 } 285 }
271 286
272 CString TextCodecLatin1::encode(const LChar* characters, 287 CString TextCodecLatin1::encode(const LChar* characters,
273 size_t length, 288 size_t length,
274 UnencodableHandling handling) { 289 UnencodableHandling handling) {
275 return encodeCommon(characters, length, handling); 290 return encodeCommon(characters, length, handling);
276 } 291 }
277 292
278 } // namespace WTF 293 } // namespace WTF
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/wtf/BUILD.gn ('k') | third_party/WebKit/Source/wtf/text/TextCodecLatin1Test.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698