| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | |
| 3 * Copyright (C) 2011 Apple Inc. All rights reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * | |
| 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY | |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR | |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 */ | |
| 26 | |
| 27 #include "sky/engine/core/html/parser/HTMLConstructionSite.h" | |
| 28 | |
| 29 #include <limits> | |
| 30 #include "gen/sky/core/HTMLElementFactory.h" | |
| 31 #include "sky/engine/core/dom/DocumentFragment.h" | |
| 32 #include "sky/engine/core/dom/Element.h" | |
| 33 #include "sky/engine/core/dom/Text.h" | |
| 34 #include "sky/engine/core/frame/LocalFrame.h" | |
| 35 #include "sky/engine/core/html/HTMLScriptElement.h" | |
| 36 #include "sky/engine/core/html/HTMLTemplateElement.h" | |
| 37 #include "sky/engine/core/html/parser/AtomicHTMLToken.h" | |
| 38 #include "sky/engine/core/html/parser/HTMLParserIdioms.h" | |
| 39 #include "sky/engine/core/html/parser/HTMLToken.h" | |
| 40 #include "sky/engine/core/loader/FrameLoaderClient.h" | |
| 41 #include "sky/engine/platform/NotImplemented.h" | |
| 42 #include "sky/engine/platform/text/TextBreakIterator.h" | |
| 43 | |
| 44 namespace blink { | |
| 45 | |
| 46 static const unsigned maximumHTMLParserDOMTreeDepth = 512; | |
| 47 | |
| 48 static inline void setAttributes(Element* element, AtomicHTMLToken* token) | |
| 49 { | |
| 50 element->parserSetAttributes(token->attributes()); | |
| 51 } | |
| 52 | |
| 53 static bool shouldUseLengthLimit(const ContainerNode& node) | |
| 54 { | |
| 55 return !isHTMLScriptElement(node) | |
| 56 && !isHTMLStyleElement(node); | |
| 57 } | |
| 58 | |
| 59 static unsigned textLengthLimitForContainer(const ContainerNode& node) | |
| 60 { | |
| 61 return shouldUseLengthLimit(node) ? Text::defaultLengthLimit : std::numeric_
limits<unsigned>::max(); | |
| 62 } | |
| 63 | |
| 64 static inline bool isAllWhitespace(const String& string) | |
| 65 { | |
| 66 return string.isAllSpecialCharacters<isHTMLSpace<UChar> >(); | |
| 67 } | |
| 68 | |
| 69 static inline void insert(HTMLConstructionSiteTask& task) | |
| 70 { | |
| 71 if (isHTMLTemplateElement(*task.parent)) | |
| 72 task.parent = toHTMLTemplateElement(task.parent.get())->content(); | |
| 73 task.parent->parserAppendChild(task.child.get()); | |
| 74 } | |
| 75 | |
| 76 static inline void executeInsertTask(HTMLConstructionSiteTask& task) | |
| 77 { | |
| 78 ASSERT(task.operation == HTMLConstructionSiteTask::Insert); | |
| 79 insert(task); | |
| 80 } | |
| 81 | |
| 82 static inline void executeInsertTextTask(HTMLConstructionSiteTask& task) | |
| 83 { | |
| 84 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText); | |
| 85 ASSERT(task.child->isTextNode()); | |
| 86 | |
| 87 // Merge text nodes into previous ones if possible: | |
| 88 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construc
tion.html#insert-a-character | |
| 89 Text* newText = toText(task.child.get()); | |
| 90 Node* previousChild = task.parent->lastChild(); | |
| 91 if (previousChild && previousChild->isTextNode()) { | |
| 92 Text* previousText = toText(previousChild); | |
| 93 unsigned lengthLimit = textLengthLimitForContainer(*task.parent); | |
| 94 if (previousText->length() + newText->length() < lengthLimit) { | |
| 95 previousText->parserAppendData(newText->data()); | |
| 96 return; | |
| 97 } | |
| 98 } | |
| 99 | |
| 100 insert(task); | |
| 101 } | |
| 102 | |
| 103 static inline void executeTask(HTMLConstructionSiteTask& task) | |
| 104 { | |
| 105 if (task.operation == HTMLConstructionSiteTask::Insert) | |
| 106 return executeInsertTask(task); | |
| 107 | |
| 108 ASSERT(task.operation == HTMLConstructionSiteTask::InsertText); | |
| 109 return executeInsertTextTask(task); | |
| 110 } | |
| 111 | |
| 112 // This is only needed for TextDocuments where we might have text nodes | |
| 113 // approaching the default length limit (~64k) and we don't want to | |
| 114 // break a text node in the middle of a combining character. | |
| 115 static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned curr
entPosition, unsigned proposedBreakIndex) | |
| 116 { | |
| 117 ASSERT(currentPosition < proposedBreakIndex); | |
| 118 ASSERT(proposedBreakIndex <= string.length()); | |
| 119 // The end of the string is always a valid break. | |
| 120 if (proposedBreakIndex == string.length()) | |
| 121 return proposedBreakIndex; | |
| 122 | |
| 123 // Latin-1 does not have breakable boundaries. If we ever moved to a differn
et 8-bit encoding this could be wrong. | |
| 124 if (string.is8Bit()) | |
| 125 return proposedBreakIndex; | |
| 126 | |
| 127 const UChar* breakSearchCharacters = string.characters16() + currentPosition
; | |
| 128 // We need at least two characters look-ahead to account for UTF-16 surrogat
es, but can't search off the end of the buffer! | |
| 129 unsigned breakSearchLength = std::min(proposedBreakIndex - currentPosition +
2, string.length() - currentPosition); | |
| 130 NonSharedCharacterBreakIterator it(breakSearchCharacters, breakSearchLength)
; | |
| 131 | |
| 132 if (it.isBreak(proposedBreakIndex - currentPosition)) | |
| 133 return proposedBreakIndex; | |
| 134 | |
| 135 int adjustedBreakIndexInSubstring = it.preceding(proposedBreakIndex - curren
tPosition); | |
| 136 if (adjustedBreakIndexInSubstring > 0) | |
| 137 return currentPosition + adjustedBreakIndexInSubstring; | |
| 138 // We failed to find a breakable point, let the caller figure out what to do
. | |
| 139 return 0; | |
| 140 } | |
| 141 | |
| 142 void HTMLConstructionSite::flushPendingText() | |
| 143 { | |
| 144 if (m_pendingText.isEmpty()) | |
| 145 return; | |
| 146 | |
| 147 PendingText pendingText; | |
| 148 // Hold onto the current pending text on the stack so that queueTask doesn't
recurse infinitely. | |
| 149 m_pendingText.swap(pendingText); | |
| 150 ASSERT(m_pendingText.isEmpty()); | |
| 151 | |
| 152 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is n
ecessary | |
| 153 // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898 | |
| 154 unsigned lengthLimit = textLengthLimitForContainer(*pendingText.parent); | |
| 155 | |
| 156 unsigned currentPosition = 0; | |
| 157 const StringBuilder& string = pendingText.stringBuilder; | |
| 158 while (currentPosition < string.length()) { | |
| 159 unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, st
ring.length()); | |
| 160 unsigned breakIndex = findBreakIndexBetween(string, currentPosition, pro
posedBreakIndex); | |
| 161 ASSERT(breakIndex <= string.length()); | |
| 162 String substring = string.substring(currentPosition, breakIndex - curren
tPosition); | |
| 163 | |
| 164 ASSERT(breakIndex > currentPosition); | |
| 165 ASSERT(breakIndex - currentPosition == substring.length()); | |
| 166 currentPosition = breakIndex; | |
| 167 | |
| 168 if (isAllWhitespace(substring)) { | |
| 169 // Ignore whitespace nodes not inside inside a <t>. If we're splitti
ng | |
| 170 // a text node this isn't really a whitespace node and we can't igno
re | |
| 171 // it either. | |
| 172 if (!m_openElements.preserveWhiteSpace() && string.length() == subst
ring.length()) | |
| 173 continue; | |
| 174 | |
| 175 // Strings composed entirely of whitespace are likely to be repeated
. | |
| 176 // Turn them into AtomicString so we share a single string for each. | |
| 177 substring = AtomicString(substring).string(); | |
| 178 } | |
| 179 | |
| 180 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText); | |
| 181 task.parent = pendingText.parent; | |
| 182 task.child = Text::create(task.parent->document(), substring); | |
| 183 queueTask(task); | |
| 184 ASSERT(toText(task.child.get())->length() == substring.length()); | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task) | |
| 189 { | |
| 190 flushPendingText(); | |
| 191 ASSERT(m_pendingText.isEmpty()); | |
| 192 m_taskQueue.append(task); | |
| 193 } | |
| 194 | |
| 195 void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> p
rpChild, bool selfClosing) | |
| 196 { | |
| 197 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert); | |
| 198 task.parent = parent; | |
| 199 task.child = prpChild; | |
| 200 task.selfClosing = selfClosing; | |
| 201 | |
| 202 // Add as a sibling of the parent if we have reached the maximum depth allow
ed. | |
| 203 if (m_openElements.stackDepth() > maximumHTMLParserDOMTreeDepth && task.pare
nt->parentNode()) | |
| 204 task.parent = task.parent->parentNode(); | |
| 205 | |
| 206 ASSERT(task.parent); | |
| 207 queueTask(task); | |
| 208 } | |
| 209 | |
| 210 void HTMLConstructionSite::executeQueuedTasks() | |
| 211 { | |
| 212 // This has no affect on pendingText, and we may have pendingText | |
| 213 // remaining after executing all other queued tasks. | |
| 214 const size_t size = m_taskQueue.size(); | |
| 215 if (!size) | |
| 216 return; | |
| 217 | |
| 218 // Copy the task queue into a local variable in case executeTask | |
| 219 // re-enters the parser. | |
| 220 TaskQueue queue; | |
| 221 queue.swap(m_taskQueue); | |
| 222 | |
| 223 for (size_t i = 0; i < size; ++i) | |
| 224 executeTask(queue[i]); | |
| 225 | |
| 226 // We might be detached now. | |
| 227 } | |
| 228 | |
| 229 HTMLConstructionSite::HTMLConstructionSite(Document* document) | |
| 230 : m_document(document) | |
| 231 , m_attachmentRoot(document) | |
| 232 { | |
| 233 } | |
| 234 | |
| 235 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment) | |
| 236 : m_document(&fragment->document()) | |
| 237 , m_attachmentRoot(fragment) | |
| 238 { | |
| 239 } | |
| 240 | |
| 241 HTMLConstructionSite::~HTMLConstructionSite() | |
| 242 { | |
| 243 // Depending on why we're being destroyed it might be OK | |
| 244 // to forget queued tasks, but currently we don't expect to. | |
| 245 ASSERT(m_taskQueue.isEmpty()); | |
| 246 // Currently we assume that text will never be the last token in the | |
| 247 // document and that we'll always queue some additional task to cause it to
flush. | |
| 248 ASSERT(m_pendingText.isEmpty()); | |
| 249 } | |
| 250 | |
| 251 void HTMLConstructionSite::detach() | |
| 252 { | |
| 253 // FIXME: We'd like to ASSERT here that we're canceling and not just discard
ing | |
| 254 // text that really should have made it into the DOM earlier, but there | |
| 255 // doesn't seem to be a nice way to do that. | |
| 256 m_pendingText.discard(); | |
| 257 m_document = nullptr; | |
| 258 m_attachmentRoot = nullptr; | |
| 259 } | |
| 260 | |
| 261 void HTMLConstructionSite::processEndOfFile() | |
| 262 { | |
| 263 flush(); | |
| 264 openElements()->popAll(); | |
| 265 } | |
| 266 | |
| 267 void HTMLConstructionSite::finishedParsing() | |
| 268 { | |
| 269 // We shouldn't have any queued tasks but we might have pending text which w
e need to promote to tasks and execute. | |
| 270 ASSERT(m_taskQueue.isEmpty()); | |
| 271 flush(); | |
| 272 m_document->finishedParsing(); | |
| 273 } | |
| 274 | |
| 275 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token) | |
| 276 { | |
| 277 RefPtr<Element> element = createElement(token); | |
| 278 attachLater(currentNode(), element); | |
| 279 m_openElements.push(element.release()); | |
| 280 } | |
| 281 | |
| 282 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token) | |
| 283 { | |
| 284 ASSERT(token->type() == HTMLToken::StartTag); | |
| 285 // Normally HTMLElementStack is responsible for calling finishParsingChildre
n, | |
| 286 // but self-closing elements are never in the element stack so the stack | |
| 287 // doesn't get a chance to tell them that we're done parsing their children. | |
| 288 attachLater(currentNode(), createElement(token), true); | |
| 289 // FIXME: Do we want to acknowledge the token's self-closing flag? | |
| 290 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.
html#acknowledge-self-closing-flag | |
| 291 } | |
| 292 | |
| 293 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token) | |
| 294 { | |
| 295 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(ownerDocumentF
orCurrentNode()); | |
| 296 setAttributes(element.get(), token); | |
| 297 attachLater(currentNode(), element); | |
| 298 m_openElements.push(element.release()); | |
| 299 } | |
| 300 | |
| 301 void HTMLConstructionSite::insertTextNode(const String& string) | |
| 302 { | |
| 303 HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert); | |
| 304 dummyTask.parent = currentNode(); | |
| 305 | |
| 306 // FIXME: This probably doesn't need to be done both here and in insert(Task
). | |
| 307 if (isHTMLTemplateElement(*dummyTask.parent)) | |
| 308 dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->conten
t(); | |
| 309 | |
| 310 // Unclear when parent != case occurs. Somehow we insert text into two separ
ate | |
| 311 // nodes while processing the same Token. When it happens we have to flush t
he | |
| 312 // pending text into the task queue before making more. | |
| 313 if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent)) | |
| 314 flushPendingText(); | |
| 315 m_pendingText.append(dummyTask.parent, string); | |
| 316 } | |
| 317 | |
| 318 inline Document& HTMLConstructionSite::ownerDocumentForCurrentNode() | |
| 319 { | |
| 320 if (isHTMLTemplateElement(*currentNode())) | |
| 321 return toHTMLTemplateElement(currentElement())->content()->document(); | |
| 322 return currentNode()->document(); | |
| 323 } | |
| 324 | |
| 325 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token) | |
| 326 { | |
| 327 Document& document = ownerDocumentForCurrentNode(); | |
| 328 RefPtr<Element> element = HTMLElementFactory::createElement(token->name(), d
ocument, true); | |
| 329 setAttributes(element.get(), token); | |
| 330 return element.release(); | |
| 331 } | |
| 332 | |
| 333 } | |
| OLD | NEW |