Source/core/html/parser/TextResourceDecoder.cpp - Issue 74513003: Moved text decoding to the parser thread

Side by Side Diff: Source/core/html/parser/TextResourceDecoder.cpp

Issue 74513003: Moved text decoding to the parser thread (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@parserthread_step25

Patch Set: Removed AtomicString from HTMLMetaCharsetParser Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)	2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)

3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.	3 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.

4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)	4 Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)

5	5

6 This library is free software; you can redistribute it and/or	6 This library is free software; you can redistribute it and/or

7 modify it under the terms of the GNU Library General Public	7 modify it under the terms of the GNU Library General Public

8 License as published by the Free Software Foundation; either	8 License as published by the Free Software Foundation; either

9 version 2 of the License, or (at your option) any later version.	9 version 2 of the License, or (at your option) any later version.

10	10

11 This library is distributed in the hope that it will be useful,	11 This library is distributed in the hope that it will be useful,

12 but WITHOUT ANY WARRANTY; without even the implied warranty of	12 but WITHOUT ANY WARRANTY; without even the implied warranty of

13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU	13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 Library General Public License for more details.	14 Library General Public License for more details.

15	15

16 You should have received a copy of the GNU Library General Public License	16 You should have received a copy of the GNU Library General Public License

17 along with this library; see the file COPYING.LIB. If not, write to	17 along with this library; see the file COPYING.LIB. If not, write to

18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,	18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

19 Boston, MA 02110-1301, USA.	19 Boston, MA 02110-1301, USA.

20 */	20 */

21	21

22	22

23 #include "config.h"	23 #include "config.h"

24 #include "core/fetch/TextResourceDecoder.h"	24 #include "core/html/parser/TextResourceDecoder.h"

25	25

26 #include "HTMLNames.h"	26 #include "HTMLNames.h"

27 #include "core/dom/DOMImplementation.h"	27 #include "core/dom/DOMImplementation.h"

28 #include "core/html/parser/HTMLMetaCharsetParser.h"	28 #include "core/html/parser/HTMLMetaCharsetParser.h"

29 #include "platform/text/TextEncodingDetector.h"	29 #include "platform/text/TextEncodingDetector.h"

30 #include "wtf/StringExtras.h"	30 #include "wtf/StringExtras.h"

31 #include "wtf/text/TextCodec.h"	31 #include "wtf/text/TextCodec.h"

32 #include "wtf/text/TextEncoding.h"	32 #include "wtf/text/TextEncoding.h"

33 #include "wtf/text/TextEncodingRegistry.h"	33 #include "wtf/text/TextEncodingRegistry.h"

34	34

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
85 {	85 {

86 Vector<char, 64> buffer(length + 1);	86 Vector<char, 64> buffer(length + 1);

87 memcpy(buffer.data(), encodingName, length);	87 memcpy(buffer.data(), encodingName, length);

88 buffer[length] = '\0';	88 buffer[length] = '\0';

89 return buffer.data();	89 return buffer.data();

90 }	90 }

91	91

92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)	92 TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)

93 {	93 {

94 if (equalIgnoringCase(mimeType, "text/css"))	94 if (equalIgnoringCase(mimeType, "text/css"))

95 return CSS;	95 return CSSContent;

96 if (equalIgnoringCase(mimeType, "text/html"))	96 if (equalIgnoringCase(mimeType, "text/html"))

97 return HTML;	97 return HTMLContent;

98 if (DOMImplementation::isXMLMIMEType(mimeType))	98 if (DOMImplementation::isXMLMIMEType(mimeType))

99 return XML;	99 return XMLContent;

100 return PlainText;	100 return PlainTextContent;

101 }	101 }

102	102

103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten tType, const WTF::TextEncoding& specifiedDefaultEncoding)	103 const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType conten tType, const WTF::TextEncoding& specifiedDefaultEncoding)

104 {	104 {

105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII	105 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII

106 // for text/xml. This matches Firefox.	106 // for text/xml. This matches Firefox.

107 if (contentType == XML)	107 if (contentType == XMLContent)

108 return UTF8Encoding();	108 return UTF8Encoding();

109 if (!specifiedDefaultEncoding.isValid())	109 if (!specifiedDefaultEncoding.isValid())

110 return Latin1Encoding();	110 return Latin1Encoding();

111 return specifiedDefaultEncoding;	111 return specifiedDefaultEncoding;

112 }	112 }

113	113

114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector)	114 TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::Text Encoding& specifiedDefaultEncoding, bool usesEncodingDetector)

115 : m_contentType(determineContentType(mimeType))	115 : m_contentType(determineContentType(mimeType))

116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))	116 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))

117 , m_source(DefaultEncoding)	117 , m_source(DefaultEncoding)

(...skipping 244 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
362 }	362 }

363	363

364 String TextResourceDecoder::decode(const char* data, size_t len)	364 String TextResourceDecoder::decode(const char* data, size_t len)

365 {	365 {

366 size_t lengthOfBOM = 0;	366 size_t lengthOfBOM = 0;

367 if (!m_checkedForBOM)	367 if (!m_checkedForBOM)

368 lengthOfBOM = checkForBOM(data, len);	368 lengthOfBOM = checkForBOM(data, len);

369	369

370 bool movedDataToBuffer = false;	370 bool movedDataToBuffer = false;

371	371

372 if (m_contentType == CSS && !m_checkedForCSSCharset) {	372 if (m_contentType == CSSContent && !m_checkedForCSSCharset) {

373 if (!checkForCSSCharset(data, len, movedDataToBuffer))	373 if (!checkForCSSCharset(data, len, movedDataToBuffer))

374 return emptyString();	374 return emptyString();

375 }	375 }

376	376

377 if ((m_contentType == HTML \|\| m_contentType == XML) && !m_checkedForXMLChars et) {	377 if ((m_contentType == HTMLContent \|\| m_contentType == XMLContent) && !m_chec kedForXMLCharset) {

378 if (!checkForXMLCharset(data, len, movedDataToBuffer))	378 if (!checkForXMLCharset(data, len, movedDataToBuffer))

379 return emptyString();	379 return emptyString();

380 }	380 }

381	381

382 const char* dataForDecode = data + lengthOfBOM;	382 const char* dataForDecode = data + lengthOfBOM;

383 size_t lengthForDecode = len - lengthOfBOM;	383 size_t lengthForDecode = len - lengthOfBOM;

384	384

385 if (!m_buffer.isEmpty()) {	385 if (!m_buffer.isEmpty()) {

386 if (!movedDataToBuffer) {	386 if (!movedDataToBuffer) {

387 size_t oldSize = m_buffer.size();	387 size_t oldSize = m_buffer.size();

388 m_buffer.grow(oldSize + len);	388 m_buffer.grow(oldSize + len);

389 memcpy(m_buffer.data() + oldSize, data, len);	389 memcpy(m_buffer.data() + oldSize, data, len);

390 }	390 }

391	391

392 dataForDecode = m_buffer.data() + lengthOfBOM;	392 dataForDecode = m_buffer.data() + lengthOfBOM;

393 lengthForDecode = m_buffer.size() - lengthOfBOM;	393 lengthForDecode = m_buffer.size() - lengthOfBOM;

394 }	394 }

395	395

396 if (m_contentType == HTML && !m_checkedForMetaCharset)	396 if (m_contentType == HTMLContent && !m_checkedForMetaCharset)

397 checkForMetaCharset(dataForDecode, lengthForDecode);	397 checkForMetaCharset(dataForDecode, lengthForDecode);

398	398

399 if (shouldAutoDetect()) {	399 if (shouldAutoDetect()) {

400 WTF::TextEncoding detectedEncoding;	400 WTF::TextEncoding detectedEncoding;

401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))	401 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))

402 setEncoding(detectedEncoding, EncodingFromContentSniffing);	402 setEncoding(detectedEncoding, EncodingFromContentSniffing);

403 }	403 }

404	404

405 ASSERT(m_encoding.isValid());	405 ASSERT(m_encoding.isValid());

406	406

407 if (!m_codec)	407 if (!m_codec)

408 m_codec = newTextCodec(m_encoding);	408 m_codec = newTextCodec(m_encoding);

409	409

410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con tentType == XML && !m_useLenientXMLDecoding, m_sawError);	410 String result = m_codec->decode(dataForDecode, lengthForDecode, false, m_con tentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);

411	411

412 m_buffer.clear();	412 m_buffer.clear();

413 return result;	413 return result;

414 }	414 }

415	415

416 String TextResourceDecoder::flush()	416 String TextResourceDecoder::flush()

417 {	417 {

418 // If we can not identify the encoding even after a document is completely	418 // If we can not identify the encoding even after a document is completely

419 // loaded, we need to detect the encoding if other conditions for	419 // loaded, we need to detect the encoding if other conditions for

420 // autodetection is satisfied.	420 // autodetection is satisfied.

421 if (m_buffer.size() && shouldAutoDetect()	421 if (m_buffer.size() && shouldAutoDetect()

422 && ((!m_checkedForXMLCharset && (m_contentType == HTML \|\| m_contentType == XML)) \|\| (!m_checkedForCSSCharset && (m_contentType == CSS)))) {	422 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent \|\| m_conte ntType == XMLContent)) \|\| (!m_checkedForCSSCharset && (m_contentType == CSSConte nt)))) {

423 WTF::TextEncoding detectedEncoding;	423 WTF::TextEncoding detectedEncoding;

424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))	424 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))

425 setEncoding(detectedEncoding, EncodingFromContentSniffing);	425 setEncoding(detectedEncoding, EncodingFromContentSniffing);

426 }	426 }

427	427

428 if (!m_codec)	428 if (!m_codec)

429 m_codec = newTextCodec(m_encoding);	429 m_codec = newTextCodec(m_encoding);

430	430

431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XML && !m_useLenientXMLDecoding, m_sawError);	431 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), true, m_co ntentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);

432 m_buffer.clear();	432 m_buffer.clear();

433 m_codec.clear();	433 m_codec.clear();

434 m_checkedForBOM = false; // Skip BOM again when re-decoding.	434 m_checkedForBOM = false; // Skip BOM again when re-decoding.

435 return result;	435 return result;

436 }	436 }

437	437

438 }	438 }

OLD	NEW

« Source/core/html/parser/HTMLTokenizer.h ('K') | « Source/core/html/parser/TextResourceDecoder.h ('k') | Source/core/html/parser/XSSAuditor.h » ('j') | no next file with comments »