Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(412)

Unified Diff: third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp

Issue 2366203006: Revert r403630 which broke parsing on some sites. (Closed)
Patch Set: Revert r403630 (by hand) Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
index 9e0703333a4871dcc5cf76c47a9893afc981ef4e..c520f936b695e82295b5148c828078a2e8d37fdb 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTreeBuilderSimulator.cpp
@@ -104,63 +104,28 @@ static bool tokenExitsMath(const CompactHTMLToken& token)
|| threadSafeMatch(tagName, MathMLNames::mtextTag);
}
-// We always push tokens which may be related to elements which are
-// HTML integration points. elementMayBeHTMLIntegrationPoint gives
-// conservative false positives. Specifically, annotation-xml end tags
-// may not be related to HTML integration points; it depends on the
-// opening tags' attributes. But elementMayBeHTMLIntegrationPoint
-// returns true for these elements.
-static bool elementMayBeHTMLIntegrationPoint(const String& tagName)
-{
- return threadSafeMatch(tagName, MathMLNames::annotation_xmlTag)
- || threadSafeMatch(tagName, SVGNames::descTag)
- || threadSafeMatch(tagName, SVGNames::foreignObjectTag)
- || threadSafeMatch(tagName, titleTag);
-}
-
-// https://html.spec.whatwg.org/#html-integration-point
-// See also HTMLElementStack::isHTMLIntegrationPoint
-static bool tokenStartsHTMLIntegrationPoint(const CompactHTMLToken& token)
-{
- if (token.type() != HTMLToken::StartTag)
- return false;
-
- const String& tagName = token.data();
- if (threadSafeMatch(tagName, MathMLNames::annotation_xmlTag)) {
- if (const CompactHTMLToken::Attribute* encoding = token.getAttributeItem(MathMLNames::encodingAttr)) {
- return equalIgnoringCase(encoding->value(), "text/html")
- || equalIgnoringCase(encoding->value(), "application/xhtml+xml");
- }
- return false;
- }
-
- return threadSafeMatch(tagName, SVGNames::descTag)
- || threadSafeMatch(tagName, SVGNames::foreignObjectTag)
- || threadSafeMatch(tagName, titleTag);
-}
-
HTMLTreeBuilderSimulator::HTMLTreeBuilderSimulator(const HTMLParserOptions& options)
: m_options(options)
{
- m_stack.append(StateFlags {HTML, false});
+ m_namespaceStack.append(HTML);
}
HTMLTreeBuilderSimulator::State HTMLTreeBuilderSimulator::stateFor(HTMLTreeBuilder* treeBuilder)
{
ASSERT(isMainThread());
- State stack;
+ State namespaceStack;
for (HTMLElementStack::ElementRecord* record = treeBuilder->openElements()->topRecord(); record; record = record->next()) {
- Namespace recordNamespace = HTML;
+ Namespace currentNamespace = HTML;
if (record->namespaceURI() == SVGNames::svgNamespaceURI)
- recordNamespace = SVG;
+ currentNamespace = SVG;
else if (record->namespaceURI() == MathMLNames::mathmlNamespaceURI)
- recordNamespace = MathML;
+ currentNamespace = MathML;
- if (stack.isEmpty() || static_cast<Namespace>(stack.last().ns) != recordNamespace || elementMayBeHTMLIntegrationPoint(record->stackItem()->localName()))
- stack.append(StateFlags {static_cast<unsigned>(recordNamespace), HTMLElementStack::isHTMLIntegrationPoint(record->stackItem())});
+ if (namespaceStack.isEmpty() || namespaceStack.last() != currentNamespace)
+ namespaceStack.append(currentNamespace);
}
- stack.reverse();
- return stack;
+ namespaceStack.reverse();
+ return namespaceStack;
}
HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::simulate(const CompactHTMLToken& token, HTMLTokenizer* tokenizer)
@@ -169,23 +134,16 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::simulate(cons
if (token.type() == HTMLToken::StartTag) {
const String& tagName = token.data();
- bool currentNodeIsHTMLIntegrationPoint = m_stack.last().isHTMLIntegrationPoint;
- Namespace currentNodeNamespace = currentNamespace();
-
- if (inForeignContent() && tokenExitsForeignContent(token))
- m_stack.removeLast();
-
if (threadSafeMatch(tagName, SVGNames::svgTag))
- m_stack.append(StateFlags {SVG, tokenStartsHTMLIntegrationPoint(token)});
- else if (threadSafeMatch(tagName, MathMLNames::mathTag))
- m_stack.append(StateFlags {MathML, tokenStartsHTMLIntegrationPoint(token)});
- else if ((currentNodeNamespace == SVG && tokenExitsSVG(token))
- || (currentNodeNamespace == MathML && tokenExitsMath(token)))
- m_stack.append(StateFlags {HTML, tokenStartsHTMLIntegrationPoint(token)});
- else if (elementMayBeHTMLIntegrationPoint(token.data()) != currentNodeIsHTMLIntegrationPoint)
- m_stack.append(StateFlags {static_cast<unsigned>(currentNodeNamespace), tokenStartsHTMLIntegrationPoint(token)});
-
- if (!inForeignContent() || currentNodeIsHTMLIntegrationPoint) {
+ m_namespaceStack.append(SVG);
+ if (threadSafeMatch(tagName, MathMLNames::mathTag))
+ m_namespaceStack.append(MathML);
+ if (inForeignContent() && tokenExitsForeignContent(token))
+ m_namespaceStack.removeLast();
+ if ((m_namespaceStack.last() == SVG && tokenExitsSVG(token))
+ || (m_namespaceStack.last() == MathML && tokenExitsMath(token)))
+ m_namespaceStack.append(HTML);
+ if (!inForeignContent()) {
// FIXME: This is just a copy of Tokenizer::updateStateFor which uses threadSafeMatches.
if (threadSafeMatch(tagName, textareaTag) || threadSafeMatch(tagName, titleTag)) {
tokenizer->setState(HTMLTokenizer::RCDATAState);
@@ -207,18 +165,11 @@ HTMLTreeBuilderSimulator::SimulatedToken HTMLTreeBuilderSimulator::simulate(cons
if (token.type() == HTMLToken::EndTag) {
const String& tagName = token.data();
- if ((currentNamespace() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
- || (currentNamespace() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
- || (stackContainsNamespace(SVG) && currentNamespace() == HTML && tokenExitsSVG(token))
- || (stackContainsNamespace(MathML) && currentNamespace() == HTML && tokenExitsMath(token))
- // By checking the namespace, the above tests subtly avoid
- // popping the base stack entry which is 'HTML'. Because
- // HTML title is an integration point, we must explicitly
- // check we are not popping the base entry when presented
- // malformed input like </title> with no opening tag.
- || (m_stack.size() > 1 && elementMayBeHTMLIntegrationPoint(token.data()) != static_cast<bool>(m_stack.last().isHTMLIntegrationPoint)))
- m_stack.removeLast();
-
+ if ((m_namespaceStack.last() == SVG && threadSafeMatch(tagName, SVGNames::svgTag))
+ || (m_namespaceStack.last() == MathML && threadSafeMatch(tagName, MathMLNames::mathTag))
+ || (m_namespaceStack.contains(SVG) && m_namespaceStack.last() == HTML && tokenExitsSVG(token))
+ || (m_namespaceStack.contains(MathML) && m_namespaceStack.last() == HTML && tokenExitsMath(token)))
+ m_namespaceStack.removeLast();
if (threadSafeMatch(tagName, scriptTag)) {
if (!inForeignContent())
tokenizer->setState(HTMLTokenizer::DataState);

Powered by Google App Engine
This is Rietveld 408576698