Source/core/html/parser/HTMLConstructionSite.cpp - Issue 26129005: Rewrite Text node attaching to not be N^2

Unified Diff: Source/core/html/parser/HTMLConstructionSite.cpp

Issue 26129005: Rewrite Text node attaching to not be N^2 (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Flush for foriegn content also Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: Source/core/html/parser/HTMLConstructionSite.cpp

diff --git a/Source/core/html/parser/HTMLConstructionSite.cpp b/Source/core/html/parser/HTMLConstructionSite.cpp

index f0ac5ad2b35da009a18d4e4782d52e93480c9c89..144bbd85d80dea55b93d4018db68c7d92b808f75 100644

--- a/Source/core/html/parser/HTMLConstructionSite.cpp

+++ b/Source/core/html/parser/HTMLConstructionSite.cpp

@@ -119,6 +119,27 @@ static inline void executeInsertTask(HTMLConstructionSiteTask& task)

task.child->finishParsingChildren();

}

+static inline void executeInsertTextTask(HTMLConstructionSiteTask& task)

+ ASSERT(task.operation == HTMLConstructionSiteTask::InsertText);

+ ASSERT(task.child->isTextNode());

+ // Merge text nodes into previous ones if possible:

+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character

+ Text* newText = toText(task.child.get());

+ Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();

+ if (previousChild && previousChild->isTextNode()) {

+ Text* previousText = toText(previousChild);

+ unsigned lengthLimit = textLengthLimitForContainer(task.parent.get());

+ if (previousText->length() + newText->length() < lengthLimit) {

+ previousText->parserAppendData(newText->data());

+ return;

+ }

+ insert(task);

static inline void executeReparentTask(HTMLConstructionSiteTask& task)

{

ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);

@@ -149,6 +170,9 @@ void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)

if (task.operation == HTMLConstructionSiteTask::Insert)

return executeInsertTask(task);

+ if (task.operation == HTMLConstructionSiteTask::InsertText)

+ return executeInsertTextTask(task);

// All the cases below this point are only used by the adoption agency.

if (task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild)

@@ -166,7 +190,7 @@ void HTMLConstructionSite::executeTask(HTMLConstructionSiteTask& task)

// This is only needed for TextDocuments where we might have text nodes

// approaching the default length limit (~64k) and we don't want to

// break a text node in the middle of a combining character.

-static unsigned findBreakIndexBetween(const String& string, unsigned currentPosition, unsigned proposedBreakIndex)

+static unsigned findBreakIndexBetween(const StringBuilder& string, unsigned currentPosition, unsigned proposedBreakIndex)

{

ASSERT(currentPosition < proposedBreakIndex);

ASSERT(proposedBreakIndex <= string.length());

@@ -202,8 +226,46 @@ static String atomizeIfAllWhitespace(const String& string, WhitespaceMode whites

return string;

}

+void HTMLConstructionSite::flushPendingText()

+ if (m_pendingText.isEmpty())

+ return;

+ PendingText pendingText;

+ // Hold onto the current pending text on the stack so that queueTask doesn't recurse infinitely.

+ m_pendingText.swap(pendingText);

+ ASSERT(m_pendingText.isEmpty());

+ // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary

+ // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898

+ unsigned lengthLimit = textLengthLimitForContainer(pendingText.parent.get());

+ unsigned currentPosition = 0;

+ const StringBuilder& string = pendingText.stringBuilder;

+ while (currentPosition < string.length()) {

+ unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());

+ unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);

+ ASSERT(breakIndex <= string.length());

+ String substring = string.substring(currentPosition, breakIndex - currentPosition);

+ substring = atomizeIfAllWhitespace(substring, pendingText.whitespaceMode);

+ HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertText);

+ task.parent = pendingText.parent;

+ task.nextChild = pendingText.nextChild;

+ task.child = Text::create(task.parent->document(), substring);

+ queueTask(task);

+ ASSERT(breakIndex > currentPosition);

+ ASSERT(breakIndex - currentPosition == substring.length());

+ ASSERT(toText(task.child.get())->length() == substring.length());

+ currentPosition = breakIndex;

+ }

void HTMLConstructionSite::queueTask(const HTMLConstructionSiteTask& task)

{

+ flushPendingText();

+ ASSERT(m_pendingText.isEmpty());

m_taskQueue.append(task);

}

@@ -232,6 +294,8 @@ void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> p

void HTMLConstructionSite::executeQueuedTasks()

{

+ // This has no affect on pendingText, and we may have pendingText

+ // remaining after executing all other queued tasks.

const size_t size = m_taskQueue.size();

if (!size)

return;

@@ -274,10 +338,17 @@ HTMLConstructionSite::~HTMLConstructionSite()

// Depending on why we're being destroyed it might be OK

// to forget queued tasks, but currently we don't expect to.

ASSERT(m_taskQueue.isEmpty());

+ // Currently we assume that text will never be the last token in the

+ // document and that we'll always queue some additional task to cause it to flush.

+ ASSERT(m_pendingText.isEmpty());

}

void HTMLConstructionSite::detach()

{

+ // FIXME: We'd like to ASSERT here that we're canceling and not just discarding

+ // text that really should have made it into the DOM earlier, but there

+ // doesn't seem to be a nice way to do that.

+ m_pendingText.discard();

m_document = 0;

m_attachmentRoot = 0;

}

@@ -447,12 +518,15 @@ void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, c

void HTMLConstructionSite::processEndOfFile()

{

ASSERT(currentNode());

+ flush();

openElements()->popAll();

}

void HTMLConstructionSite::finishedParsing()

{

+ // We shouldn't have any queued tasks but we might have pending text which we need to promote to tasks and execute.

ASSERT(m_taskQueue.isEmpty());

+ flush();

m_document->finishedParsing();

}

@@ -586,63 +660,22 @@ void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const At

void HTMLConstructionSite::insertTextNode(const String& string, WhitespaceMode whitespaceMode)

{

- HTMLConstructionSiteTask protoTask(HTMLConstructionSiteTask::Insert);

- protoTask.parent = currentNode();

+ HTMLConstructionSiteTask dummyTask(HTMLConstructionSiteTask::Insert);

+ dummyTask.parent = currentNode();

if (shouldFosterParent())

- findFosterSite(protoTask);

+ findFosterSite(dummyTask);

// FIXME: This probably doesn't need to be done both here and in insert(Task).

- if (protoTask.parent->hasTagName(templateTag))

- protoTask.parent = toHTMLTemplateElement(protoTask.parent.get())->content();

- // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is necessary

- // for performance, see: https://bugs.webkit.org/show_bug.cgi?id=55898

- unsigned lengthLimit = textLengthLimitForContainer(protoTask.parent.get());

- unsigned currentPosition = 0;

- // Merge text nodes into previous ones if possible:

- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character

- Node* previousChild = protoTask.nextChild ? protoTask.nextChild->previousSibling() : protoTask.parent->lastChild();

- if (previousChild && previousChild->isTextNode()) {

- Text* previousText = toText(previousChild);

- unsigned appendLengthLimit = lengthLimit - previousText->length();

- unsigned proposedBreakIndex = std::min(currentPosition + appendLengthLimit, string.length());

- unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);

- ASSERT(breakIndex <= string.length());

- // If we didn't find a breable piece to append, forget it.

- if (breakIndex) {

- String substring = string.substring(currentPosition, breakIndex - currentPosition);

- substring = atomizeIfAllWhitespace(substring, whitespaceMode);

- previousText->parserAppendData(substring);

- currentPosition += substring.length();

- }

- while (currentPosition < string.length()) {

- unsigned proposedBreakIndex = std::min(currentPosition + lengthLimit, string.length());

- unsigned breakIndex = findBreakIndexBetween(string, currentPosition, proposedBreakIndex);

- // We failed to find a breakable boudary between the minimum and the proposed, just give up and break at the proposed index.

- // We could go searching after the proposed index, but current callers are attempting to break after 65k chars!

- // 65k of unbreakable characters isn't worth trying to handle "correctly".

- if (!breakIndex)

- breakIndex = proposedBreakIndex;

- ASSERT(breakIndex <= string.length());

- String substring = string.substring(currentPosition, breakIndex - currentPosition);

- substring = atomizeIfAllWhitespace(substring, whitespaceMode);

- HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);

- task.parent = protoTask.parent;

- task.nextChild = protoTask.nextChild;

- task.child = Text::create(task.parent->document(), substring);

- queueTask(task);

- ASSERT(breakIndex > currentPosition);

- ASSERT(breakIndex - currentPosition == substring.length());

- ASSERT(toText(task.child)->length() == substring.length());

- currentPosition = breakIndex;

- }

+ if (dummyTask.parent->hasTagName(templateTag))

+ dummyTask.parent = toHTMLTemplateElement(dummyTask.parent.get())->content();

+ // Unclear when parent != case occurs. Somehow we insert text into two separate nodes while processing the same Token.

+ // The nextChild != dummy.nextChild case occurs whenever foster parenting happened and we hit a new text node "<table>a</table>b"

+ // In either case we have to flush the pending text into the task queue before making more.

+ if (!m_pendingText.isEmpty() && (m_pendingText.parent != dummyTask.parent || m_pendingText.nextChild != dummyTask.nextChild))

+ flushPendingText();

+ m_pendingText.append(dummyTask.parent, dummyTask.nextChild, string, whitespaceMode);

}

void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord* newParent, HTMLElementStack::ElementRecord* child)

« no previous file with comments | « Source/core/html/parser/HTMLConstructionSite.h ('k') | Source/core/html/parser/HTMLDocumentParser.cpp » ('j') | no next file with comments »