Source/core/html/parser/BackgroundHTMLParser.cpp - Issue 100563004: Redirect HTML resource bytes directly to parser thread (Blink side CL)

Side by Side Diff: Source/core/html/parser/BackgroundHTMLParser.cpp

Issue 100563004: Redirect HTML resource bytes directly to parser thread (Blink side CL) (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@parserthread_decodermove

Patch Set: Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.	2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.

3 *	3 *

4 * Redistribution and use in source and binary forms, with or without	4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions	5 * modification, are permitted provided that the following conditions

6 * are met:	6 * are met:

7 * 1. Redistributions of source code must retain the above copyright	7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.	8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright	9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the	10 * notice, this list of conditions and the following disclaimer in the

11 * documentation and/or other materials provided with the distribution.	11 * documentation and/or other materials provided with the distribution.

12 *	12 *

13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY	13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY

14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE	14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR	15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR	16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR

17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,	17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,	18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR	19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY	20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

24 */	24 */

25	25

26 #include "config.h"	26 #include "config.h"

27 #include "core/html/parser/BackgroundHTMLParser.h"	27 #include "core/html/parser/BackgroundHTMLParser.h"

28	28

29 #include "core/html/parser/HTMLDocumentParser.h"	29 #include "core/html/parser/HTMLDocumentParser.h"

30 #include "core/html/parser/HTMLParserThread.h"

31 #include "core/html/parser/TextResourceDecoder.h"	30 #include "core/html/parser/TextResourceDecoder.h"

32 #include "core/html/parser/XSSAuditor.h"	31 #include "core/html/parser/XSSAuditor.h"

	32 #include "platform/SharedBuffer.h"

33 #include "wtf/MainThread.h"	33 #include "wtf/MainThread.h"

34 #include "wtf/text/TextPosition.h"	34 #include "wtf/text/TextPosition.h"

35	35

36 namespace WebCore {	36 namespace WebCore {

37	37

38 // On a network with high latency and high bandwidth, using a device	38 // On a network with high latency and high bandwidth, using a device

39 // with a fast CPU, we could end up speculatively tokenizing	39 // with a fast CPU, we could end up speculatively tokenizing

40 // the whole document, well ahead of when the main-thread actually needs it.	40 // the whole document, well ahead of when the main-thread actually needs it.

41 // This is a waste of memory (and potentially time if the speculation fails).	41 // This is a waste of memory (and potentially time if the speculation fails).

42 // So we limit our outstanding tokens arbitrarily to 10,000.	42 // So we limit our outstanding tokens arbitrarily to 10,000.

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
87 : m_weakFactory(reference, this)	87 : m_weakFactory(reference, this)

88 , m_token(adoptPtr(new HTMLToken))	88 , m_token(adoptPtr(new HTMLToken))

89 , m_tokenizer(HTMLTokenizer::create(config->options))	89 , m_tokenizer(HTMLTokenizer::create(config->options))

90 , m_treeBuilderSimulator(config->options)	90 , m_treeBuilderSimulator(config->options)

91 , m_options(config->options)	91 , m_options(config->options)

92 , m_parser(config->parser)	92 , m_parser(config->parser)

93 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream))	93 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream))

94 , m_xssAuditor(config->xssAuditor.release())	94 , m_xssAuditor(config->xssAuditor.release())

95 , m_preloadScanner(config->preloadScanner.release())	95 , m_preloadScanner(config->preloadScanner.release())

96 , m_decoder(config->decoder.release())	96 , m_decoder(config->decoder.release())

	97 , m_resourceProvider(config->resourceProvider.release())

	98 , m_receivingDataOnlyFromResourceProvider(false)

	99 , m_queuedData(SharedBuffer::create())

97 {	100 {

	101 // Tell the resource provider that we're ready to start receiving

	102 // chunks of data directly from the parser thread.

	103 if (m_resourceProvider)

	104 m_resourceProvider->setBackgroundClient(this);

98 }	105 }

99	106

100 BackgroundHTMLParser::~BackgroundHTMLParser()	107 BackgroundHTMLParser::~BackgroundHTMLParser()

101 {	108 {

	109 if (m_resourceProvider) {

	110 // Tell the resource provider that calling into this instance

	111 // is no longer safe and any further data received should be

	112 // ignored.

	113 m_resourceProvider->setBackgroundClient(0);

	114 // The resource provider needs to be destructed on the main thread.

	115 // Note we can only call static functions on the HTMLDocumentParser at t his point

	116 // as the weakptr we have for it is likely invalidated now.

	117 callOnMainThread(bind(&HTMLDocumentParser::destroyResourceProvider, m_re sourceProvider.release()));

	118 }

	119 }

	120

	121 void BackgroundHTMLParser::didReceivedData(const char* data, size_t length)

	122 {

	123 if (m_receivingDataOnlyFromResourceProvider && m_decoder) {

	124 updateDocument(m_decoder->decode(data, length));

	125 return;

	126 }

	127

	128 // If the parser thread is not standalone yet, it means we may get

	129 // further data packets from the main thread and will need to queue

	130 // up any data coming directly from the parser thread until we're

	131 // sure we're complete. This is also necessary if we've lost our

	132 // decoder, until we get a new one passed from the main thread.

	133 m_queuedData->append(data, length);

102 }	134 }

103	135

104 void BackgroundHTMLParser::append(const String& input)	136 void BackgroundHTMLParser::append(const String& input)

105 {	137 {

106 ASSERT(!m_input.current().isClosed());	138 ASSERT(!m_input.current().isClosed());

107 m_input.append(input);	139 m_input.append(input);

108 pumpTokenizer();	140 pumpTokenizer();

109 }	141 }

110	142

111 void BackgroundHTMLParser::appendBytes(PassOwnPtr<Vector<char> > buffer)	143 void BackgroundHTMLParser::appendBytes(PassOwnPtr<Vector<char> > buffer)

112 {	144 {

	145 ASSERT(!m_receivingDataOnlyFromResourceProvider);

113 updateDocument(m_decoder->decode(buffer->data(), buffer->size()));	146 updateDocument(m_decoder->decode(buffer->data(), buffer->size()));

114 }	147 }

115	148

116 void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)	149 void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder)

117 {	150 {

118 m_decoder = decoder;	151 m_decoder = decoder;

	152

	153 // If our decoder got reset in mid-stream and got recreated, we may have

	154 // some pending data here.

	155 if (m_decoder)

	156 flushQueuedData();

119 }	157 }

120	158

121 void BackgroundHTMLParser::flush()	159 void BackgroundHTMLParser::flush()

122 {	160 {

123 updateDocument(m_decoder->flush());	161 updateDocument(m_decoder->flush());

124 }	162 }

125	163

126 void BackgroundHTMLParser::updateDocument(const String& decodedData)	164 void BackgroundHTMLParser::updateDocument(const String& decodedData)

127 {	165 {

128 DocumentEncodingData encodingData(*m_decoder.get());	166 DocumentEncodingData encodingData(*m_decoder.get());

129	167

130 if (encodingData != m_lastSeenEncodingData) {	168 if (encodingData != m_lastSeenEncodingData) {

131 m_lastSeenEncodingData = encodingData;	169 m_lastSeenEncodingData = encodingData;

132

133 m_xssAuditor->setEncoding(encodingData.encoding());	170 m_xssAuditor->setEncoding(encodingData.encoding());

134 callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBac kgroundParser, m_parser, encodingData));	171 callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBac kgroundParser, m_parser, encodingData));

135 }	172 }

136	173

137 if (decodedData.isEmpty())	174 if (decodedData.isEmpty())

138 return;	175 return;

139	176

140 append(decodedData);	177 append(decodedData);

141 }	178 }

142	179

(...skipping 20 matching lines...) Expand all Loading...
163 {	200 {

164 markEndOfFile();	201 markEndOfFile();

165 pumpTokenizer();	202 pumpTokenizer();

166 }	203 }

167	204

168 void BackgroundHTMLParser::stop()	205 void BackgroundHTMLParser::stop()

169 {	206 {

170 delete this;	207 delete this;

171 }	208 }

172	209

	210 void BackgroundHTMLParser::flushQueuedData()

	211 {

	212 // We only process queued chunks if we have a decoder and if we know

	213 // we won't be receiving any more chunks for the main thread. Otherwise

	214 // we hang on to the chunks for a bit longer.

	215 if (!m_decoder \|\| !m_receivingDataOnlyFromResourceProvider)

	216 return;

	217

	218 const char* data;

	219 size_t position = 0;

	220 while (size_t length = m_queuedData->getSomeData(data, position)) {

	221 updateDocument(m_decoder->decode(data, length));

	222 position += length;

	223 }

	224

	225 m_queuedData->clear();

	226 }

	227

	228 void BackgroundHTMLParser::didSwitchedToBackgroundClient()

	229 {

	230 m_receivingDataOnlyFromResourceProvider = true;

	231

	232 // At this point we know for sure that no further data will

	233 // be coming from the main thread, so we can process anything

	234 // we've received directly on the parser thread in the meantime.

	235 flushQueuedData();

	236 }

	237

173 void BackgroundHTMLParser::forcePlaintextForTextDocument()	238 void BackgroundHTMLParser::forcePlaintextForTextDocument()

174 {	239 {

175 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentPa rser)	240 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentPa rser)

176 // to force us into the PLAINTEXT state w/o using a <plaintext> tag.	241 // to force us into the PLAINTEXT state w/o using a <plaintext> tag.

177 // The TextDocumentParser uses a <pre> tag for historical/compatibility reas ons.	242 // The TextDocumentParser uses a <pre> tag for historical/compatibility reas ons.

178 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);	243 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);

179 }	244 }

180	245

181 void BackgroundHTMLParser::markEndOfFile()	246 void BackgroundHTMLParser::markEndOfFile()

182 {	247 {

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
244 chunk->treeBuilderState = m_treeBuilderSimulator.state();	309 chunk->treeBuilderState = m_treeBuilderSimulator.state();

245 chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size());	310 chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size());

246 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint();	311 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint();

247 chunk->tokens = m_pendingTokens.release();	312 chunk->tokens = m_pendingTokens.release();

248 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgrou ndParser, m_parser, chunk.release()));	313 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgrou ndParser, m_parser, chunk.release()));

249	314

250 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream);	315 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream);

251 }	316 }

252	317

253 }	318 }

OLD	NEW