Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1220)

Unified Diff: Source/core/html/parser/HTMLConstructionSite.cpp

Issue 26129005: Rewrite Text node attaching to not be N^2 (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Seems to actually work Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: Source/core/html/parser/HTMLConstructionSite.cpp
diff --git a/Source/core/html/parser/HTMLConstructionSite.cpp b/Source/core/html/parser/HTMLConstructionSite.cpp
index 4aba70834d78c1674af72c38dd1be2a022677ff5..b80189f54715aee12af967b738155b3c8beb3f07 100644
--- a/Source/core/html/parser/HTMLConstructionSite.cpp
+++ b/Source/core/html/parser/HTMLConstructionSite.cpp
@@ -119,6 +119,27 @@ static inline void executeInsertTask(HTMLConstructionSiteTask& task)
task.child->finishParsingChildren();
}
+static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)
+{
+ ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);
+ ASSERT(task.child->isTextNode());
+
+ // Merge text nodes into previous ones if possible:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
+ Text* newText = toText(task.child.get());
+ Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
+ if (previousChild && previousChild->isTextNode()) {
+ Text* previousText = toText(previousChild);
+ unsigned lengthLimit = textLengthLimitForContainer(task.parent.get());
+ if (previousText->length() + newText->length() < lengthLimit) {
+ previousText->parserAppendData(newText->data());
+ return;
+ }
+ }
+
+ insert(task);
+}
+
static inline void executeReparentTask(HTMLConstructionSiteTask& task)
{
ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
@@ -149,6 +170,9 @@ void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
if (task.operation == HTMLConstructionSiteTask::Insert)
return executeInsertTask(task);
+ if (task.operation == HTMLConstructionSiteTask::InsertText)
+ return executeInsertTextTask(task);
+
// All the cases below this point are only used by the adoption agency.
if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)
@@ -166,7 +190,7 @@ void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)
// This is only needed for TextDocuments where we might have text nodes
// approaching the default length limit (~64k) and we don't want to
// break a text node in the middle of a combining character.
-static unsigned findBreakIndexBetween(const String& string, unsigned currentPosition, unsigned proposedBreakIndex)
+static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)
{
ASSERT(currentPosition < proposedBreakIndex);
ASSERT(proposedBreakIndex <= string.length());
@@ -202,8 +226,46 @@ static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whites
return string;
}
+void HTMLConstructionSite::flushPendingText()
+{
+ if (m_pendingText.isEmpty())
+ return;
+
+ PendingText pendingText;
+ // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.
+ m_pendingText.swap(pendingText);
+ ASSERT(m_pendingText.isEmpty());
+
+ // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
+ // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
+ unsigned lengthLimit = textLengthLimitForContainer(pendingText.parent.get());
+
+ unsigned currentPosition = 0;
+ const StringBuilder& string = pendingText.stringBuilder;
+ while (currentPosition < string.length()) {
+ unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
+ unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
+ ASSERT(breakIndex <= string.length());
+ String substring = string.substring(currentPosition, breakIndex - currentPosition);
+ substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);
+
+ HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);
+ task.parent = pendingText.parent;
+ task.nextChild = pendingText.nextChild;
+ task.child = Text::create(task.parent->document(), substring);
+ queueTask(task);
+
+ ASSERT(breakIndex > currentPosition);
+ ASSERT(breakIndex - currentPosition == substring.length());
+ ASSERT(toText(task.child.get())->length() == substring.length());
+ currentPosition = breakIndex;
+ }
+}
+
void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)
{
+ flushPendingText();
+ ASSERT(m_pendingText.isEmpty());
m_taskQueue.append(task);
}
@@ -232,6 +294,8 @@ void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> p
void HTMLConstructionSite::executeQueuedTasks()
{
+ // This has no affect on pendingText, and we may have pendingText
+ // remaining after executing all other queued tasks.
const size_t size = m_taskQueue.size();
if (!size)
return;
@@ -274,10 +338,19 @@ HTMLConstructionSite::~HTMLConstructionSite()
// Depending on why we're being destroyed it might be OK
// to forget queued tasks, but currently we don't expect to.
ASSERT(m_taskQueue.isEmpty());
+ // Currently we assume that text will never be the last token in the
+ // document and that we'll always queue some additional task text to cause it to flush.
esprehn 2013/10/11 00:02:56 text task? "task text" is weird.
abarth-chromium 2013/10/11 18:54:04 "task text" -> task
+ ASSERT(m_pendingText.isEmpty());
}
void HTMLConstructionSite::detach()
{
+ // FIXME: We'd like to ASSERT here that we're canceling and not just discarding
+ // text that really should have made it into the DOM earlier, but there
+ // doesn't seem to be a nice way to do that.
+ PendingText discardedText;
+ m_pendingText.swap(discardedText);
esprehn 2013/10/11 00:02:56 Adding a discard() method would be prettier.
abarth-chromium 2013/10/11 18:54:04 Agreed. It can use swap internally if that's help
+
m_document = 0;
m_attachmentRoot = 0;
}
@@ -444,15 +517,24 @@ void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, c
setCompatibilityMode(Document::NoQuirksMode);
}
+void HTMLConstructionSite::flush()
+{
+ flushPendingText();
+ executeQueuedTasks();
+}
+
void HTMLConstructionSite::processEndOfFile()
{
ASSERT(currentNode());
+ flush();
openElements()->popAll();
}
void HTMLConstructionSite::finishedParsing()
{
+ // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.
ASSERT(m_taskQueue.isEmpty());
+ flush();
m_document->finishedParsing();
}
@@ -586,63 +668,22 @@ void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const At
void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)
{
- HTMLConstructionSiteTask protoTask(HTMLConstructionSiteTask::Insert);
- protoTask.parent = currentNode();
+ HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);
+ dummyTask.parent = currentNode();
if (shouldFosterParent())
- findFosterSite(protoTask);
+ findFosterSite(dummyTask);
// FIXME: This probably doesn't need to be done both here and in insert(Task).
- if (protoTask.parent->hasTagName(templateTag))
- protoTask.parent = toHTMLTemplateElement(protoTask.parent.get())->content();
-
- // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary
- // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898
- unsigned lengthLimit = textLengthLimitForContainer(protoTask.parent.get());
- unsigned currentPosition = 0;
-
- // Merge text nodes into previous ones if possible:
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
- Node* previousChild = protoTask.nextChild ? protoTask.nextChild->previousSibling() : protoTask.parent->lastChild();
- if (previousChild && previousChild->isTextNode()) {
- Text* previousText = toText(previousChild);
- unsigned appendLengthLimit = lengthLimit - previousText->length();
-
- unsigned proposedBreakIndex = std::min(currentPosition + appendLengthLimit, string.length());
- unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
- ASSERT(breakIndex <= string.length());
- // If we didn't find a breable piece to append, forget it.
- if (breakIndex) {
- String substring = string.substring(currentPosition, breakIndex - currentPosition);
- substring = atomizeIfAllWhitespace(substring, whitespaceMode);
- previousText->parserAppendData(substring);
- currentPosition += substring.length();
- }
- }
-
- while (currentPosition < string.length()) {
- unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());
- unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);
- // We failed to find a breakable boudary between the minimum and the proposed, just give up and break at the proposed index.
- // We could go searching after the proposed index, but current callers are attempting to break after 65k chars!
- // 65k of unbreakable characters isn't worth trying to handle "correctly".
- if (!breakIndex)
- breakIndex = proposedBreakIndex;
- ASSERT(breakIndex <= string.length());
- String substring = string.substring(currentPosition, breakIndex - currentPosition);
- substring = atomizeIfAllWhitespace(substring, whitespaceMode);
-
- HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
- task.parent = protoTask.parent;
- task.nextChild = protoTask.nextChild;
- task.child = Text::create(task.parent->document(), substring);
- queueTask(task);
-
- ASSERT(breakIndex > currentPosition);
- ASSERT(breakIndex - currentPosition == substring.length());
- ASSERT(toText(task.child.get())->length() == substring.length());
- currentPosition = breakIndex;
- }
+ if (dummyTask.parent->hasTagName(templateTag))
+ dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();
+
+ // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.
+ // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"
+ // In either case we have to flush the pending text into the task queue before making more.
+ if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild))
+ flushPendingText();
+ m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);
}
void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)

Powered by Google App Engine
This is Rietveld 408576698