Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp

Issue 2212393003: Fix BOM handling in TextResourceDecoder on partial data (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
5 5
6 This library is free software; you can redistribute it and/or 6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public 7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either 8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version. 9 version 2 of the License, or (at your option) any later version.
10 10
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
207 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; 207 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0;
208 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; 208 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0;
209 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0; 209 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *b uf2++) : 0;
210 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; 210 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
211 211
212 // Check for the BOM. 212 // Check for the BOM.
213 if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { 213 if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
214 setEncoding(UTF8Encoding(), AutoDetectedEncoding); 214 setEncoding(UTF8Encoding(), AutoDetectedEncoding);
215 lengthOfBOM = 3; 215 lengthOfBOM = 3;
216 } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText) { 216 } else if (m_encodingDetectionOption != AlwaysUseUTF8ForText) {
217 if (c1 == 0xFF && c2 == 0xFE) { 217 if (c1 == 0xFF && c2 == 0xFE && bufferLength + len >= 4) {
218 if (c3 || c4) { 218 if (c3 || c4) {
219 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 219 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
220 lengthOfBOM = 2; 220 lengthOfBOM = 2;
221 } else { 221 } else {
222 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 222 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
223 lengthOfBOM = 4; 223 lengthOfBOM = 4;
224 } 224 }
225 } else if (c1 == 0xFE && c2 == 0xFF) { 225 } else if (c1 == 0xFE && c2 == 0xFF) {
226 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 226 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
227 lengthOfBOM = 2; 227 lengthOfBOM = 2;
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
362 { 362 {
363 // Just checking m_hintEncoding suffices here because it's only set 363 // Just checking m_hintEncoding suffices here because it's only set
364 // in setHintEncoding when the source is AutoDetectedEncoding. 364 // in setHintEncoding when the source is AutoDetectedEncoding.
365 return m_encodingDetectionOption == UseAllAutoDetection 365 return m_encodingDetectionOption == UseAllAutoDetection
366 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); 366 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
367 } 367 }
368 368
369 String TextResourceDecoder::decode(const char* data, size_t len) 369 String TextResourceDecoder::decode(const char* data, size_t len)
370 { 370 {
371 size_t lengthOfBOM = 0; 371 size_t lengthOfBOM = 0;
372 if (!m_checkedForBOM) 372 if (!m_checkedForBOM) {
373 lengthOfBOM = checkForBOM(data, len); 373 lengthOfBOM = checkForBOM(data, len);
374 if (!m_checkedForBOM) {
kouhei (in TOK) 2016/08/06 01:16:31 Would you add a comment above why we need this dou
tzik 2016/08/07 10:18:45 Done.
375 DCHECK_EQ(0u, lengthOfBOM);
376 m_buffer.append(data, len);
377 return emptyString();
378 }
379 }
380 DCHECK_LE(lengthOfBOM, m_buffer.size() + len);
374 381
375 bool movedDataToBuffer = false; 382 bool movedDataToBuffer = false;
376 383
377 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { 384 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
378 if (!checkForCSSCharset(data, len, movedDataToBuffer)) 385 if (!checkForCSSCharset(data, len, movedDataToBuffer))
379 return emptyString(); 386 return emptyString();
380 } 387 }
381 388
382 // We check XML declaration in HTML content only if there is enough data ava ilable 389 // We check XML declaration in HTML content only if there is enough data ava ilable
383 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) { 390 if (((m_contentType == HTMLContent && len >= minimumLengthOfXMLDeclaration) || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
435 m_codec = newTextCodec(m_encoding); 442 m_codec = newTextCodec(m_encoding);
436 443
437 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 444 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
438 m_buffer.clear(); 445 m_buffer.clear();
439 m_codec.reset(); 446 m_codec.reset();
440 m_checkedForBOM = false; // Skip BOM again when re-decoding. 447 m_checkedForBOM = false; // Skip BOM again when re-decoding.
441 return result; 448 return result;
442 } 449 }
443 450
444 } // namespace blink 451 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/core.gypi ('k') | third_party/WebKit/Source/core/html/parser/TextResourceDecoderTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698