OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. | 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
11 * documentation and/or other materials provided with the distribution. | 11 * documentation and/or other materials provided with the distribution. |
12 * | 12 * |
13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY | 13 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY |
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR | 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR |
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 */ | 24 */ |
25 | 25 |
26 #include "config.h" | 26 #include "config.h" |
27 #include "core/html/parser/BackgroundHTMLParser.h" | 27 #include "core/html/parser/BackgroundHTMLParser.h" |
28 | 28 |
29 #include "core/html/parser/HTMLDocumentParser.h" | 29 #include "core/html/parser/HTMLDocumentParser.h" |
30 #include "core/html/parser/HTMLParserThread.h" | |
31 #include "core/html/parser/TextResourceDecoder.h" | 30 #include "core/html/parser/TextResourceDecoder.h" |
32 #include "core/html/parser/XSSAuditor.h" | 31 #include "core/html/parser/XSSAuditor.h" |
| 32 #include "platform/SharedBuffer.h" |
33 #include "wtf/MainThread.h" | 33 #include "wtf/MainThread.h" |
34 #include "wtf/text/TextPosition.h" | 34 #include "wtf/text/TextPosition.h" |
35 | 35 |
36 namespace WebCore { | 36 namespace WebCore { |
37 | 37 |
38 // On a network with high latency and high bandwidth, using a device | 38 // On a network with high latency and high bandwidth, using a device |
39 // with a fast CPU, we could end up speculatively tokenizing | 39 // with a fast CPU, we could end up speculatively tokenizing |
40 // the whole document, well ahead of when the main-thread actually needs it. | 40 // the whole document, well ahead of when the main-thread actually needs it. |
41 // This is a waste of memory (and potentially time if the speculation fails). | 41 // This is a waste of memory (and potentially time if the speculation fails). |
42 // So we limit our outstanding tokens arbitrarily to 10,000. | 42 // So we limit our outstanding tokens arbitrarily to 10,000. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
87 : m_weakFactory(reference, this) | 87 : m_weakFactory(reference, this) |
88 , m_token(adoptPtr(new HTMLToken)) | 88 , m_token(adoptPtr(new HTMLToken)) |
89 , m_tokenizer(HTMLTokenizer::create(config->options)) | 89 , m_tokenizer(HTMLTokenizer::create(config->options)) |
90 , m_treeBuilderSimulator(config->options) | 90 , m_treeBuilderSimulator(config->options) |
91 , m_options(config->options) | 91 , m_options(config->options) |
92 , m_parser(config->parser) | 92 , m_parser(config->parser) |
93 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) | 93 , m_pendingTokens(adoptPtr(new CompactHTMLTokenStream)) |
94 , m_xssAuditor(config->xssAuditor.release()) | 94 , m_xssAuditor(config->xssAuditor.release()) |
95 , m_preloadScanner(config->preloadScanner.release()) | 95 , m_preloadScanner(config->preloadScanner.release()) |
96 , m_decoder(config->decoder.release()) | 96 , m_decoder(config->decoder.release()) |
| 97 , m_resourceProvider(config->resourceProvider.release()) |
| 98 , m_receivingDataOnlyFromResourceProvider(false) |
| 99 , m_queuedData(SharedBuffer::create()) |
97 { | 100 { |
| 101 // Tell the resource provider that we're ready to start receiving |
| 102 // chunks of data directly from the parser thread. |
| 103 if (m_resourceProvider) |
| 104 m_resourceProvider->setBackgroundClient(this); |
98 } | 105 } |
99 | 106 |
100 BackgroundHTMLParser::~BackgroundHTMLParser() | 107 BackgroundHTMLParser::~BackgroundHTMLParser() |
101 { | 108 { |
| 109 if (m_resourceProvider) { |
| 110 // Tell the resource provider that calling into this instance |
| 111 // is no longer safe and any further data received should be |
| 112 // ignored. |
| 113 m_resourceProvider->setBackgroundClient(0); |
| 114 // The resource provider needs to be destructed on the main thread. |
| 115 // Note we can only call static functions on the HTMLDocumentParser at t
his point |
| 116 // as the weakptr we have for it is likely invalidated now. |
| 117 callOnMainThread(bind(&HTMLDocumentParser::destroyResourceProvider, m_re
sourceProvider.release())); |
| 118 } |
| 119 } |
| 120 |
| 121 void BackgroundHTMLParser::didReceivedData(const char* data, size_t length) |
| 122 { |
| 123 if (m_receivingDataOnlyFromResourceProvider && m_decoder) { |
| 124 updateDocument(m_decoder->decode(data, length)); |
| 125 return; |
| 126 } |
| 127 |
| 128 // If the parser thread is not standalone yet, it means we may get |
| 129 // further data packets from the main thread and will need to queue |
| 130 // up any data coming directly from the parser thread until we're |
| 131 // sure we're complete. This is also necessary if we've lost our |
| 132 // decoder, until we get a new one passed from the main thread. |
| 133 m_queuedData->append(data, length); |
102 } | 134 } |
103 | 135 |
104 void BackgroundHTMLParser::append(const String& input) | 136 void BackgroundHTMLParser::append(const String& input) |
105 { | 137 { |
106 ASSERT(!m_input.current().isClosed()); | 138 ASSERT(!m_input.current().isClosed()); |
107 m_input.append(input); | 139 m_input.append(input); |
108 pumpTokenizer(); | 140 pumpTokenizer(); |
109 } | 141 } |
110 | 142 |
111 void BackgroundHTMLParser::appendBytes(PassOwnPtr<Vector<char> > buffer) | 143 void BackgroundHTMLParser::appendBytes(PassOwnPtr<Vector<char> > buffer) |
112 { | 144 { |
| 145 ASSERT(!m_receivingDataOnlyFromResourceProvider); |
113 updateDocument(m_decoder->decode(buffer->data(), buffer->size())); | 146 updateDocument(m_decoder->decode(buffer->data(), buffer->size())); |
114 } | 147 } |
115 | 148 |
116 void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder) | 149 void BackgroundHTMLParser::setDecoder(PassOwnPtr<TextResourceDecoder> decoder) |
117 { | 150 { |
118 m_decoder = decoder; | 151 m_decoder = decoder; |
| 152 |
| 153 // If our decoder got reset in mid-stream and got recreated, we may have |
| 154 // some pending data here. |
| 155 if (m_decoder) |
| 156 flushQueuedData(); |
119 } | 157 } |
120 | 158 |
121 void BackgroundHTMLParser::flush() | 159 void BackgroundHTMLParser::flush() |
122 { | 160 { |
123 updateDocument(m_decoder->flush()); | 161 updateDocument(m_decoder->flush()); |
124 } | 162 } |
125 | 163 |
126 void BackgroundHTMLParser::updateDocument(const String& decodedData) | 164 void BackgroundHTMLParser::updateDocument(const String& decodedData) |
127 { | 165 { |
128 DocumentEncodingData encodingData(*m_decoder.get()); | 166 DocumentEncodingData encodingData(*m_decoder.get()); |
129 | 167 |
130 if (encodingData != m_lastSeenEncodingData) { | 168 if (encodingData != m_lastSeenEncodingData) { |
131 m_lastSeenEncodingData = encodingData; | 169 m_lastSeenEncodingData = encodingData; |
132 | |
133 m_xssAuditor->setEncoding(encodingData.encoding()); | 170 m_xssAuditor->setEncoding(encodingData.encoding()); |
134 callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBac
kgroundParser, m_parser, encodingData)); | 171 callOnMainThread(bind(&HTMLDocumentParser::didReceiveEncodingDataFromBac
kgroundParser, m_parser, encodingData)); |
135 } | 172 } |
136 | 173 |
137 if (decodedData.isEmpty()) | 174 if (decodedData.isEmpty()) |
138 return; | 175 return; |
139 | 176 |
140 append(decodedData); | 177 append(decodedData); |
141 } | 178 } |
142 | 179 |
(...skipping 20 matching lines...) Expand all Loading... |
163 { | 200 { |
164 markEndOfFile(); | 201 markEndOfFile(); |
165 pumpTokenizer(); | 202 pumpTokenizer(); |
166 } | 203 } |
167 | 204 |
168 void BackgroundHTMLParser::stop() | 205 void BackgroundHTMLParser::stop() |
169 { | 206 { |
170 delete this; | 207 delete this; |
171 } | 208 } |
172 | 209 |
| 210 void BackgroundHTMLParser::flushQueuedData() |
| 211 { |
| 212 // We only process queued chunks if we have a decoder and if we know |
| 213 // we won't be receiving any more chunks for the main thread. Otherwise |
| 214 // we hang on to the chunks for a bit longer. |
| 215 if (!m_decoder || !m_receivingDataOnlyFromResourceProvider) |
| 216 return; |
| 217 |
| 218 const char* data; |
| 219 size_t position = 0; |
| 220 while (size_t length = m_queuedData->getSomeData(data, position)) { |
| 221 updateDocument(m_decoder->decode(data, length)); |
| 222 position += length; |
| 223 } |
| 224 |
| 225 m_queuedData->clear(); |
| 226 } |
| 227 |
| 228 void BackgroundHTMLParser::didSwitchedToBackgroundClient() |
| 229 { |
| 230 m_receivingDataOnlyFromResourceProvider = true; |
| 231 |
| 232 // At this point we know for sure that no further data will |
| 233 // be coming from the main thread, so we can process anything |
| 234 // we've received directly on the parser thread in the meantime. |
| 235 flushQueuedData(); |
| 236 } |
| 237 |
173 void BackgroundHTMLParser::forcePlaintextForTextDocument() | 238 void BackgroundHTMLParser::forcePlaintextForTextDocument() |
174 { | 239 { |
175 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentPa
rser) | 240 // This is only used by the TextDocumentParser (a subclass of HTMLDocumentPa
rser) |
176 // to force us into the PLAINTEXT state w/o using a <plaintext> tag. | 241 // to force us into the PLAINTEXT state w/o using a <plaintext> tag. |
177 // The TextDocumentParser uses a <pre> tag for historical/compatibility reas
ons. | 242 // The TextDocumentParser uses a <pre> tag for historical/compatibility reas
ons. |
178 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); | 243 m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState); |
179 } | 244 } |
180 | 245 |
181 void BackgroundHTMLParser::markEndOfFile() | 246 void BackgroundHTMLParser::markEndOfFile() |
182 { | 247 { |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 chunk->treeBuilderState = m_treeBuilderSimulator.state(); | 309 chunk->treeBuilderState = m_treeBuilderSimulator.state(); |
245 chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size()); | 310 chunk->inputCheckpoint = m_input.createCheckpoint(m_pendingTokens->size()); |
246 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); | 311 chunk->preloadScannerCheckpoint = m_preloadScanner->createCheckpoint(); |
247 chunk->tokens = m_pendingTokens.release(); | 312 chunk->tokens = m_pendingTokens.release(); |
248 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgrou
ndParser, m_parser, chunk.release())); | 313 callOnMainThread(bind(&HTMLDocumentParser::didReceiveParsedChunkFromBackgrou
ndParser, m_parser, chunk.release())); |
249 | 314 |
250 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); | 315 m_pendingTokens = adoptPtr(new CompactHTMLTokenStream); |
251 } | 316 } |
252 | 317 |
253 } | 318 } |
OLD | NEW |