Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(542)

Side by Side Diff: sky/engine/core/html/parser/HTMLConstructionSite.cpp

Issue 1215103007: Remove remaining HTML elements (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "sky/engine/core/html/parser/HTMLConstructionSite.h"
28
29 #include <limits>
30 #include "gen/sky/core/HTMLElementFactory.h"
31 #include "sky/engine/core/dom/DocumentFragment.h"
32 #include "sky/engine/core/dom/Element.h"
33 #include "sky/engine/core/dom/Text.h"
34 #include "sky/engine/core/frame/LocalFrame.h"
35 #include "sky/engine/core/html/HTMLScriptElement.h"
36 #include "sky/engine/core/html/HTMLTemplateElement.h"
37 #include "sky/engine/core/html/parser/AtomicHTMLToken.h"
38 #include "sky/engine/core/html/parser/HTMLParserIdioms.h"
39 #include "sky/engine/core/html/parser/HTMLToken.h"
40 #include "sky/engine/core/loader/FrameLoaderClient.h"
41 #include "sky/engine/platform/NotImplemented.h"
42 #include "sky/engine/platform/text/TextBreakIterator.h"
43
44 namespace blink {
45
46 static const unsigned maximumHTMLParserDOMTreeDepth = 512;
47
48 static inline void setAttributes(Element* element, AtomicHTMLToken* token)
49 {
50 element->parserSetAttributes(token->attributes());
51 }
52
53 static bool shouldUseLengthLimit(const ContainerNode& node)
54 {
55 return !isHTMLScriptElement(node)
56 && !isHTMLStyleElement(node);
57 }
58
59 static unsigned textLengthLimitForContainer(const ContainerNode& node)
60 {
61 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_ limits<unsigned>::max();
62 }
63
64 static inline bool isAllWhitespace(const String& string)
65 {
66 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >();
67 }
68
69 static inline void insert(HTMLConstructionSiteTask& task)
70 {
71 if (isHTMLTemplateElement(*task.parent))
72 task.parent = toHTMLTemplateElement(task.parent.get())->content();
73 task.parent->parserAppendChild(task.child.get());
74 }
75
76 static inline void executeInsertTask(HTMLConstructionSiteTask& task)
77 {
78 ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
79 insert(task);
80 }
81
82 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
83 {
84 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
85 ASSERT(task.child->isTextNode());
86
87 // Merge text nodes into previous ones if possible:
88 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc tion.html#insert-a-character
89 Text* newText = toText(task.child.get());
90 Node* previousChild = task.parent->lastChild();
91 if (previousChild && previousChild->isTextNode()) {
92 Text* previousText = toText(previousChild);
93 unsigned lengthLimit = textLengthLimitForContainer(*task.parent);
94 if (previousText->length() + newText->length() < lengthLimit) {
95 previousText->parserAppendData(newText->data());
96 return;
97 }
98 }
99
100 insert(task);
101 }
102
103 static inline void executeTask(HTMLConstructionSiteTask& task)
104 {
105 if (task.operation == HTMLConstructionSiteTask::Insert)
106 return executeInsertTask(task);
107
108 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
109 return executeInsertTextTask(task);
110 }
111
112 // This is only needed for TextDocuments where we might have text nodes
113 // approaching the default length limit (~64k) and we don't want to
114 // break a text node in the middle of a combining character.
115 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned curr entPosition, unsigned proposedBreakIndex)
116 {
117 ASSERT(currentPosition < proposedBreakIndex);
118 ASSERT(proposedBreakIndex <= string.length());
119 // The end of the string is always a valid break.
120 if (proposedBreakIndex == string.length())
121 return proposedBreakIndex;
122
123 // Latin-1 does not have breakable boundaries. If we ever moved to a differn et 8-bit encoding this could be wrong.
124 if (string.is8Bit())
125 return proposedBreakIndex;
126
127 const UChar* breakSearchCharacters = string.characters16() + currentPosition ;
128 // We need at least two characters look-ahead to account for UTF-16 surrogat es, but can't search off the end of the buffer!
129 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition + 2, string.length() - currentPosition);
130 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength) ;
131
132 if (it.isBreak(proposedBreakIndex - currentPosition))
133 return proposedBreakIndex;
134
135 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - curren tPosition);
136 if (adjustedBreakIndexInSubstring > 0)
137 return currentPosition + adjustedBreakIndexInSubstring;
138 // We failed to find a breakable point, let the caller figure out what to do .
139 return 0;
140 }
141
142 void HTMLConstructionSite::flushPendingText()
143 {
144 if (m_pendingText.isEmpty())
145 return;
146
147 PendingText pendingText;
148 // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
149 m_pendingText.swap(pendingText);
150 ASSERT(m_pendingText.isEmpty());
151
152 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is n ecessary
153 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
154 unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent);
155
156 unsigned currentPosition = 0;
157 const StringBuilder& string = pendingText.stringBuilder;
158 while (currentPosition < string.length()) {
159 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, st ring.length());
160 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, pro posedBreakIndex);
161 ASSERT(breakIndex <= string.length());
162 String substring = string.substring(currentPosition, breakIndex - curren tPosition);
163
164 ASSERT(breakIndex > currentPosition);
165 ASSERT(breakIndex - currentPosition == substring.length());
166 currentPosition = breakIndex;
167
168 if (isAllWhitespace(substring)) {
169 // Ignore whitespace nodes not inside inside a <t>. If we're splitti ng
170 // a text node this isn't really a whitespace node and we can't igno re
171 // it either.
172 if (!m_openElements.preserveWhiteSpace() && string.length() == subst ring.length())
173 continue;
174
175 // Strings composed entirely of whitespace are likely to be repeated .
176 // Turn them into AtomicString so we share a single string for each.
177 substring = AtomicString(substring).string();
178 }
179
180 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
181 task.parent = pendingText.parent;
182 task.child = Text::create(task.parent->document(), substring);
183 queueTask(task);
184 ASSERT(toText(task.child.get())->length() == substring.length());
185 }
186 }
187
188 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
189 {
190 flushPendingText();
191 ASSERT(m_pendingText.isEmpty());
192 m_taskQueue.append(task);
193 }
194
195 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> p rpChild, bool selfClosing)
196 {
197 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
198 task.parent = parent;
199 task.child = prpChild;
200 task.selfClosing = selfClosing;
201
202 // Add as a sibling of the parent if we have reached the maximum depth allow ed.
203 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.pare nt->parentNode())
204 task.parent = task.parent->parentNode();
205
206 ASSERT(task.parent);
207 queueTask(task);
208 }
209
210 void HTMLConstructionSite::executeQueuedTasks()
211 {
212 // This has no affect on pendingText, and we may have pendingText
213 // remaining after executing all other queued tasks.
214 const size_t size = m_taskQueue.size();
215 if (!size)
216 return;
217
218 // Copy the task queue into a local variable in case executeTask
219 // re-enters the parser.
220 TaskQueue queue;
221 queue.swap(m_taskQueue);
222
223 for (size_t i = 0; i < size; ++i)
224 executeTask(queue[i]);
225
226 // We might be detached now.
227 }
228
229 HTMLConstructionSite::HTMLConstructionSite(Document* document)
230 : m_document(document)
231 , m_attachmentRoot(document)
232 {
233 }
234
235 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment)
236 : m_document(&fragment->document())
237 , m_attachmentRoot(fragment)
238 {
239 }
240
241 HTMLConstructionSite::~HTMLConstructionSite()
242 {
243 // Depending on why we're being destroyed it might be OK
244 // to forget queued tasks, but currently we don't expect to.
245 ASSERT(m_taskQueue.isEmpty());
246 // Currently we assume that text will never be the last token in the
247 // document and that we'll always queue some additional task to cause it to flush.
248 ASSERT(m_pendingText.isEmpty());
249 }
250
251 void HTMLConstructionSite::detach()
252 {
253 // FIXME: We'd like to ASSERT here that we're canceling and not just discard ing
254 // text that really should have made it into the DOM earlier, but there
255 // doesn't seem to be a nice way to do that.
256 m_pendingText.discard();
257 m_document = nullptr;
258 m_attachmentRoot = nullptr;
259 }
260
261 void HTMLConstructionSite::processEndOfFile()
262 {
263 flush();
264 openElements()->popAll();
265 }
266
267 void HTMLConstructionSite::finishedParsing()
268 {
269 // We shouldn't have any queued tasks but we might have pending text which w e need to promote to tasks and execute.
270 ASSERT(m_taskQueue.isEmpty());
271 flush();
272 m_document->finishedParsing();
273 }
274
275 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
276 {
277 RefPtr<Element> element = createElement(token);
278 attachLater(currentNode(), element);
279 m_openElements.push(element.release());
280 }
281
282 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
283 {
284 ASSERT(token->type() == HTMLToken::StartTag);
285 // Normally HTMLElementStack is responsible for calling finishParsingChildre n,
286 // but self-closing elements are never in the element stack so the stack
287 // doesn't get a chance to tell them that we're done parsing their children.
288 attachLater(currentNode(), createElement(token), true);
289 // FIXME: Do we want to acknowledge the token's self-closing flag?
290 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization. html#acknowledge-self-closing-flag
291 }
292
293 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
294 {
295 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentF orCurrentNode());
296 setAttributes(element.get(), token);
297 attachLater(currentNode(), element);
298 m_openElements.push(element.release());
299 }
300
301 void HTMLConstructionSite::insertTextNode(const String& string)
302 {
303 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
304 dummyTask.parent = currentNode();
305
306 // FIXME: This probably doesn't need to be done both here and in insert(Task ).
307 if (isHTMLTemplateElement(*dummyTask.parent))
308 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->conten t();
309
310 // Unclear when parent != case occurs. Somehow we insert text into two separ ate
311 // nodes while processing the same Token. When it happens we have to flush t he
312 // pending text into the task queue before making more.
313 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent))
314 flushPendingText();
315 m_pendingText.append(dummyTask.parent, string);
316 }
317
318 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode()
319 {
320 if (isHTMLTemplateElement(*currentNode()))
321 return toHTMLTemplateElement(currentElement())->content()->document();
322 return currentNode()->document();
323 }
324
325 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token)
326 {
327 Document& document = ownerDocumentForCurrentNode();
328 RefPtr<Element> element = HTMLElementFactory::createElement(token->name(), d ocument, true);
329 setAttributes(element.get(), token);
330 return element.release();
331 }
332
333 }
OLDNEW
« no previous file with comments | « sky/engine/core/html/parser/HTMLConstructionSite.h ('k') | sky/engine/core/html/parser/HTMLDocumentParser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698