| Index: third_party/WebKit/WebCore/html/HTMLParser.cpp
|
| ===================================================================
|
| --- third_party/WebKit/WebCore/html/HTMLParser.cpp (revision 9310)
|
| +++ third_party/WebKit/WebCore/html/HTMLParser.cpp (working copy)
|
| @@ -1,1605 +1,1608 @@
|
| -/*
|
| - Copyright (C) 1997 Martin Jones (mjones@kde.org)
|
| - (C) 1997 Torben Weis (weis@kde.org)
|
| - (C) 1999,2001 Lars Knoll (knoll@kde.org)
|
| - (C) 2000,2001 Dirk Mueller (mueller@kde.org)
|
| - Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
|
| -
|
| - This library is free software; you can redistribute it and/or
|
| - modify it under the terms of the GNU Library General Public
|
| - License as published by the Free Software Foundation; either
|
| - version 2 of the License, or (at your option) any later version.
|
| -
|
| - This library is distributed in the hope that it will be useful,
|
| - but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| - Library General Public License for more details.
|
| -
|
| - You should have received a copy of the GNU Library General Public License
|
| - along with this library; see the file COPYING.LIB. If not, write to
|
| - the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| - Boston, MA 02110-1301, USA.
|
| -*/
|
| -
|
| -#include "config.h"
|
| -#include "HTMLParser.h"
|
| -
|
| -#include "CharacterNames.h"
|
| -#include "CSSPropertyNames.h"
|
| -#include "CSSValueKeywords.h"
|
| -#include "Comment.h"
|
| -#include "Console.h"
|
| -#include "DOMWindow.h"
|
| -#include "DocumentFragment.h"
|
| -#include "DocumentType.h"
|
| -#include "Frame.h"
|
| -#include "HTMLBodyElement.h"
|
| -#include "HTMLDocument.h"
|
| -#include "HTMLDivElement.h"
|
| -#include "HTMLDListElement.h"
|
| -#include "HTMLElementFactory.h"
|
| -#include "HTMLFormElement.h"
|
| -#include "HTMLHeadElement.h"
|
| -#include "HTMLHRElement.h"
|
| -#include "HTMLHtmlElement.h"
|
| -#include "HTMLIsIndexElement.h"
|
| -#include "HTMLMapElement.h"
|
| -#include "HTMLNames.h"
|
| -#include "HTMLTableCellElement.h"
|
| -#include "HTMLTableRowElement.h"
|
| -#include "HTMLTableSectionElement.h"
|
| -#include "HTMLTokenizer.h"
|
| -#include "LocalizedStrings.h"
|
| -#include "Settings.h"
|
| -#include "Text.h"
|
| -#include <wtf/StdLibExtras.h>
|
| -
|
| -namespace WebCore {
|
| -
|
| -using namespace HTMLNames;
|
| -
|
| -static const unsigned cMaxRedundantTagDepth = 20;
|
| -static const unsigned cResidualStyleMaxDepth = 200;
|
| -
|
| -struct HTMLStackElem : Noncopyable {
|
| - HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
|
| - : tagName(t)
|
| - , level(lvl)
|
| - , strayTableContent(false)
|
| - , node(n)
|
| - , didRefNode(r)
|
| - , next(nx)
|
| - {
|
| - }
|
| -
|
| - void derefNode()
|
| - {
|
| - if (didRefNode)
|
| - node->deref();
|
| - }
|
| -
|
| - AtomicString tagName;
|
| - int level;
|
| - bool strayTableContent;
|
| - Node* node;
|
| - bool didRefNode;
|
| - HTMLStackElem* next;
|
| -};
|
| -
|
| -/**
|
| - * The parser parses tokenized input into the document, building up the
|
| - * document tree. If the document is well-formed, parsing it is straightforward.
|
| - *
|
| - * Unfortunately, we have to handle many HTML documents that are not well-formed,
|
| - * so the parser has to be tolerant about errors.
|
| - *
|
| - * We have to take care of at least the following error conditions:
|
| - *
|
| - * 1. The element being added is explicitly forbidden inside some outer tag.
|
| - * In this case we should close all tags up to the one, which forbids
|
| - * the element, and add it afterwards.
|
| - *
|
| - * 2. We are not allowed to add the element directly. It could be that
|
| - * the person writing the document forgot some tag in between (or that the
|
| - * tag in between is optional). This could be the case with the following
|
| - * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
|
| - *
|
| - * 3. We want to add a block element inside to an inline element. Close all
|
| - * inline elements up to the next higher block element.
|
| - *
|
| - * 4. If this doesn't help, close elements until we are allowed to add the
|
| - * element or ignore the tag.
|
| - *
|
| - */
|
| -
|
| -HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
|
| - : document(doc)
|
| - , current(doc)
|
| - , didRefCurrent(false)
|
| - , blockStack(0)
|
| - , m_hasPElementInScope(NotInScope)
|
| - , head(0)
|
| - , inBody(false)
|
| - , haveContent(false)
|
| - , haveFrameSet(false)
|
| - , m_isParsingFragment(false)
|
| - , m_reportErrors(reportErrors)
|
| - , m_handlingResidualStyleAcrossBlocks(false)
|
| - , inStrayTableContent(0)
|
| -{
|
| -}
|
| -
|
| -HTMLParser::HTMLParser(DocumentFragment* frag)
|
| - : document(frag->document())
|
| - , current(frag)
|
| - , didRefCurrent(true)
|
| - , blockStack(0)
|
| - , m_hasPElementInScope(NotInScope)
|
| - , head(0)
|
| - , inBody(true)
|
| - , haveContent(false)
|
| - , haveFrameSet(false)
|
| - , m_isParsingFragment(true)
|
| - , m_reportErrors(false)
|
| - , m_handlingResidualStyleAcrossBlocks(false)
|
| - , inStrayTableContent(0)
|
| -{
|
| - if (frag)
|
| - frag->ref();
|
| -}
|
| -
|
| -HTMLParser::~HTMLParser()
|
| -{
|
| - freeBlock();
|
| - if (didRefCurrent)
|
| - current->deref();
|
| -}
|
| -
|
| -void HTMLParser::reset()
|
| -{
|
| - ASSERT(!m_isParsingFragment);
|
| -
|
| - setCurrent(document);
|
| -
|
| - freeBlock();
|
| -
|
| - inBody = false;
|
| - haveFrameSet = false;
|
| - haveContent = false;
|
| - inStrayTableContent = 0;
|
| -
|
| - m_currentFormElement = 0;
|
| - m_currentMapElement = 0;
|
| - head = 0;
|
| - m_isindexElement = 0;
|
| -
|
| - m_skipModeTag = nullAtom;
|
| -}
|
| -
|
| -void HTMLParser::setCurrent(Node* newCurrent)
|
| -{
|
| - bool didRefNewCurrent = newCurrent && newCurrent != document;
|
| - if (didRefNewCurrent)
|
| - newCurrent->ref();
|
| - if (didRefCurrent)
|
| - current->deref();
|
| - current = newCurrent;
|
| - didRefCurrent = didRefNewCurrent;
|
| -}
|
| -
|
| -PassRefPtr<Node> HTMLParser::parseToken(Token* t)
|
| -{
|
| - if (!m_skipModeTag.isNull()) {
|
| - if (!t->beginTag && t->tagName == m_skipModeTag)
|
| - // Found the end tag for the current skip mode, so we're done skipping.
|
| - m_skipModeTag = nullAtom;
|
| - else if (current->localName() == t->tagName)
|
| - // Do not skip </iframe>.
|
| - // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
|
| - ;
|
| - else
|
| - return 0;
|
| - }
|
| -
|
| - // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
|
| - if (t->isCloseTag(brTag) && document->inCompatMode()) {
|
| - reportError(MalformedBRError);
|
| - t->beginTag = true;
|
| - }
|
| -
|
| - if (!t->beginTag) {
|
| - processCloseTag(t);
|
| - return 0;
|
| - }
|
| -
|
| - // Ignore spaces, if we're not inside a paragraph or other inline code.
|
| - // Do not alter the text if it is part of a scriptTag.
|
| - if (t->tagName == textAtom && t->text && current->localName() != scriptTag) {
|
| - if (inBody && !skipMode() && current->localName() != styleTag &&
|
| - current->localName() != titleTag && !t->text->containsOnlyWhitespace())
|
| - haveContent = true;
|
| -
|
| - RefPtr<Node> n;
|
| - String text = t->text.get();
|
| - unsigned charsLeft = text.length();
|
| - while (charsLeft) {
|
| - // split large blocks of text to nodes of manageable size
|
| - n = Text::createWithLengthLimit(document, text, charsLeft);
|
| - if (!insertNode(n.get(), t->selfClosingTag))
|
| - return 0;
|
| - }
|
| - return n;
|
| - }
|
| -
|
| - RefPtr<Node> n = getNode(t);
|
| - // just to be sure, and to catch currently unimplemented stuff
|
| - if (!n)
|
| - return 0;
|
| -
|
| - // set attributes
|
| - if (n->isHTMLElement()) {
|
| - HTMLElement* e = static_cast<HTMLElement*>(n.get());
|
| - e->setAttributeMap(t->attrs.get());
|
| -
|
| - // take care of optional close tags
|
| - if (e->endTagRequirement() == TagStatusOptional)
|
| - popBlock(t->tagName);
|
| -
|
| - // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
|
| - // syntax was used, report an error.
|
| - if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
|
| - if (t->tagName == scriptTag)
|
| - reportError(IncorrectXMLCloseScriptWarning);
|
| - else
|
| - reportError(IncorrectXMLSelfCloseError, &t->tagName);
|
| - }
|
| - }
|
| -
|
| - if (!insertNode(n.get(), t->selfClosingTag)) {
|
| - // we couldn't insert the node
|
| -
|
| - if (n->isElementNode()) {
|
| - Element* e = static_cast<Element*>(n.get());
|
| - e->setAttributeMap(0);
|
| - }
|
| -
|
| - if (m_currentMapElement == n)
|
| - m_currentMapElement = 0;
|
| -
|
| - if (m_currentFormElement == n)
|
| - m_currentFormElement = 0;
|
| -
|
| - if (head == n)
|
| - head = 0;
|
| -
|
| - return 0;
|
| - }
|
| - return n;
|
| -}
|
| -
|
| -void HTMLParser::parseDoctypeToken(DoctypeToken* t)
|
| -{
|
| - // Ignore any doctype after the first. Ignore doctypes in fragments.
|
| - if (document->doctype() || m_isParsingFragment || current != document)
|
| - return;
|
| -
|
| - // Make a new doctype node and set it as our doctype.
|
| - document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
|
| -}
|
| -
|
| -static bool isTableSection(Node* n)
|
| -{
|
| - return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
|
| -}
|
| -
|
| -static bool isTablePart(Node* n)
|
| -{
|
| - return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
|
| - isTableSection(n);
|
| -}
|
| -
|
| -static bool isTableRelated(Node* n)
|
| -{
|
| - return n->hasTagName(tableTag) || isTablePart(n);
|
| -}
|
| -
|
| -static bool isScopingTag(const AtomicString& tagName)
|
| -{
|
| - return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;
|
| -}
|
| -
|
| -bool HTMLParser::insertNode(Node* n, bool flat)
|
| -{
|
| - RefPtr<Node> protectNode(n);
|
| -
|
| - const AtomicString& localName = n->localName();
|
| - int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
|
| -
|
| - // <table> is never allowed inside stray table content. Always pop out of the stray table content
|
| - // and close up the first table, and then start the second table as a sibling.
|
| - if (inStrayTableContent && localName == tableTag)
|
| - popBlock(tableTag);
|
| -
|
| - // let's be stupid and just try to insert it.
|
| - // this should work if the document is well-formed
|
| - Node* newNode = current->addChild(n);
|
| - if (!newNode)
|
| - return handleError(n, flat, localName, tagPriority); // Try to handle the error.
|
| -
|
| - // don't push elements without end tags (e.g., <img>) on the stack
|
| - bool parentAttached = current->attached();
|
| - if (tagPriority > 0 && !flat) {
|
| - if (newNode == current) {
|
| - // This case should only be hit when a demoted <form> is placed inside a table.
|
| - ASSERT(localName == formTag);
|
| - reportError(FormInsideTablePartError, ¤t->localName());
|
| - } else {
|
| - // The pushBlock function transfers ownership of current to the block stack
|
| - // so we're guaranteed that didRefCurrent is false. The code below is an
|
| - // optimized version of setCurrent that takes advantage of that fact and also
|
| - // assumes that newNode is neither 0 nor a pointer to the document.
|
| - pushBlock(localName, tagPriority);
|
| - newNode->beginParsingChildren();
|
| - ASSERT(!didRefCurrent);
|
| - newNode->ref();
|
| - current = newNode;
|
| - didRefCurrent = true;
|
| - }
|
| - if (parentAttached && !n->attached() && !m_isParsingFragment)
|
| - n->attach();
|
| - } else {
|
| - if (parentAttached && !n->attached() && !m_isParsingFragment)
|
| - n->attach();
|
| - n->finishParsingChildren();
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
|
| -{
|
| - // Error handling code. This is just ad hoc handling of specific parent/child combinations.
|
| - HTMLElement* e;
|
| - bool handled = false;
|
| -
|
| - // 1. Check out the element's tag name to decide how to deal with errors.
|
| - if (n->isHTMLElement()) {
|
| - HTMLElement* h = static_cast<HTMLElement*>(n);
|
| - if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
|
| - if (inStrayTableContent && !isTableRelated(current)) {
|
| - reportError(MisplacedTablePartError, &localName, ¤t->localName());
|
| - // pop out to the nearest enclosing table-related tag.
|
| - while (blockStack && !isTableRelated(current))
|
| - popOneBlock();
|
| - return insertNode(n);
|
| - }
|
| - } else if (h->hasLocalName(headTag)) {
|
| - if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
|
| - reportError(MisplacedHeadError);
|
| - return false;
|
| - }
|
| - } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
|
| - bool createdHead = false;
|
| - if (!head) {
|
| - createHead();
|
| - createdHead = true;
|
| - }
|
| - if (head) {
|
| - if (!createdHead)
|
| - reportError(MisplacedHeadContentError, &localName, ¤t->localName());
|
| - if (head->addChild(n)) {
|
| - if (!n->attached() && !m_isParsingFragment)
|
| - n->attach();
|
| - return true;
|
| - } else
|
| - return false;
|
| - }
|
| - } else if (h->hasLocalName(htmlTag)) {
|
| - if (!current->isDocumentNode() ) {
|
| - if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) {
|
| - reportError(RedundantHTMLBodyError, &localName);
|
| - // we have another <HTML> element.... apply attributes to existing one
|
| - // make sure we don't overwrite already existing attributes
|
| - NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
|
| - Element* existingHTML = static_cast<Element*>(document->documentElement());
|
| - NamedAttrMap* bmap = existingHTML->attributes(false);
|
| - for (unsigned l = 0; map && l < map->length(); ++l) {
|
| - Attribute* it = map->attributeItem(l);
|
| - if (!bmap->getAttributeItem(it->name()))
|
| - existingHTML->setAttribute(it->name(), it->value());
|
| - }
|
| - }
|
| - return false;
|
| - }
|
| - } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
|
| - bool createdHead = false;
|
| - if (!head) {
|
| - createHead();
|
| - createdHead = true;
|
| - }
|
| - if (head) {
|
| - Node* newNode = head->addChild(n);
|
| - if (!newNode) {
|
| - setSkipMode(h->tagQName());
|
| - return false;
|
| - }
|
| -
|
| - if (!createdHead)
|
| - reportError(MisplacedHeadContentError, &localName, ¤t->localName());
|
| -
|
| - pushBlock(localName, tagPriority);
|
| - newNode->beginParsingChildren();
|
| - setCurrent(newNode);
|
| - if (!n->attached() && !m_isParsingFragment)
|
| - n->attach();
|
| - return true;
|
| - }
|
| - if (inBody) {
|
| - setSkipMode(h->tagQName());
|
| - return false;
|
| - }
|
| - } else if (h->hasLocalName(bodyTag)) {
|
| - if (inBody && document->body()) {
|
| - // we have another <BODY> element.... apply attributes to existing one
|
| - // make sure we don't overwrite already existing attributes
|
| - // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
|
| - reportError(RedundantHTMLBodyError, &localName);
|
| - NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
|
| - Element* existingBody = document->body();
|
| - NamedAttrMap* bmap = existingBody->attributes(false);
|
| - for (unsigned l = 0; map && l < map->length(); ++l) {
|
| - Attribute* it = map->attributeItem(l);
|
| - if (!bmap->getAttributeItem(it->name()))
|
| - existingBody->setAttribute(it->name(), it->value());
|
| - }
|
| - return false;
|
| - }
|
| - else if (!current->isDocumentNode())
|
| - return false;
|
| - } else if (h->hasLocalName(areaTag)) {
|
| - if (m_currentMapElement) {
|
| - reportError(MisplacedAreaError, ¤t->localName());
|
| - m_currentMapElement->addChild(n);
|
| - if (!n->attached() && !m_isParsingFragment)
|
| - n->attach();
|
| - handled = true;
|
| - return true;
|
| - }
|
| - return false;
|
| - } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
|
| - if (isTableRelated(current)) {
|
| - while (blockStack && isTablePart(current))
|
| - popOneBlock();
|
| - return insertNode(n);
|
| - }
|
| - }
|
| - } else if (n->isCommentNode() && !head)
|
| - return false;
|
| -
|
| - // 2. Next we examine our currently active element to do some further error handling.
|
| - if (current->isHTMLElement()) {
|
| - HTMLElement* h = static_cast<HTMLElement*>(current);
|
| - const AtomicString& currentTagName = h->localName();
|
| - if (h->hasLocalName(htmlTag)) {
|
| - HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
|
| - if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
|
| - elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
|
| - elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
|
| - elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
|
| - elt->hasLocalName(baseTag))) {
|
| - if (!head) {
|
| - head = new HTMLHeadElement(headTag, document);
|
| - e = head;
|
| - insertNode(e);
|
| - handled = true;
|
| - }
|
| - } else {
|
| - if (n->isTextNode()) {
|
| - Text* t = static_cast<Text*>(n);
|
| - if (t->containsOnlyWhitespace())
|
| - return false;
|
| - }
|
| - if (!haveFrameSet) {
|
| - e = new HTMLBodyElement(bodyTag, document);
|
| - startBody();
|
| - insertNode(e);
|
| - handled = true;
|
| - } else
|
| - reportError(MisplacedFramesetContentError, &localName);
|
| - }
|
| - } else if (h->hasLocalName(headTag)) {
|
| - if (n->hasTagName(htmlTag))
|
| - return false;
|
| - else {
|
| - // This means the body starts here...
|
| - if (!haveFrameSet) {
|
| - popBlock(currentTagName);
|
| - e = new HTMLBodyElement(bodyTag, document);
|
| - startBody();
|
| - insertNode(e);
|
| - handled = true;
|
| - } else
|
| - reportError(MisplacedFramesetContentError, &localName);
|
| - }
|
| - } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag)
|
| - || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
|
| - reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| - popBlock(currentTagName);
|
| - handled = true;
|
| - } else if (h->hasLocalName(captionTag)) {
|
| - // Illegal content in a caption. Close the caption and try again.
|
| - reportError(MisplacedCaptionContentError, &localName);
|
| - popBlock(currentTagName);
|
| - if (isTablePart(n))
|
| - return insertNode(n, flat);
|
| - } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
|
| - if (n->hasTagName(tableTag)) {
|
| - reportError(MisplacedTableError, ¤tTagName);
|
| - if (m_isParsingFragment && !h->hasLocalName(tableTag))
|
| - // fragment may contain table parts without <table> ancestor, pop them one by one
|
| - popBlock(h->localName());
|
| - popBlock(localName); // end the table
|
| - handled = true; // ...and start a new one
|
| - } else {
|
| - ExceptionCode ec = 0;
|
| - Node* node = current;
|
| - Node* parent = node->parentNode();
|
| - // A script may have removed the current node's parent from the DOM
|
| - // http://bugs.webkit.org/show_bug.cgi?id=7137
|
| - // FIXME: we should do real recovery here and re-parent with the correct node.
|
| - if (!parent)
|
| - return false;
|
| - Node* grandparent = parent->parentNode();
|
| -
|
| - if (n->isTextNode() ||
|
| - (h->hasLocalName(trTag) &&
|
| - isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
|
| - ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
|
| - !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
|
| - parent->hasTagName(tableTag))) {
|
| - node = (node->hasTagName(tableTag)) ? node :
|
| - ((node->hasTagName(trTag)) ? grandparent : parent);
|
| - // This can happen with fragments
|
| - if (!node)
|
| - return false;
|
| - Node* parent = node->parentNode();
|
| - if (!parent)
|
| - return false;
|
| - parent->insertBefore(n, node, ec);
|
| - if (!ec) {
|
| - reportError(StrayTableContentError, &localName, ¤tTagName);
|
| - if (n->isHTMLElement() && tagPriority > 0 &&
|
| - !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
|
| - {
|
| - pushBlock(localName, tagPriority);
|
| - n->beginParsingChildren();
|
| - setCurrent(n);
|
| - inStrayTableContent++;
|
| - blockStack->strayTableContent = true;
|
| - }
|
| - return true;
|
| - }
|
| - }
|
| -
|
| - if (!ec) {
|
| - if (current->hasTagName(trTag)) {
|
| - reportError(TablePartRequiredError, &localName, &tdTag.localName());
|
| - e = new HTMLTableCellElement(tdTag, document);
|
| - } else if (current->hasTagName(tableTag)) {
|
| - // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
|
| - // and it isn't really a parse error per se.
|
| - e = new HTMLTableSectionElement(tbodyTag, document);
|
| - } else {
|
| - reportError(TablePartRequiredError, &localName, &trTag.localName());
|
| - e = new HTMLTableRowElement(trTag, document);
|
| - }
|
| -
|
| - insertNode(e);
|
| - handled = true;
|
| - }
|
| - }
|
| - } else if (h->hasLocalName(objectTag)) {
|
| - reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| - popBlock(objectTag);
|
| - handled = true;
|
| - } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
|
| - if (!isInline(n)) {
|
| - popBlock(currentTagName);
|
| - handled = true;
|
| - }
|
| - } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
|
| - if (localName == optgroupTag) {
|
| - popBlock(currentTagName);
|
| - handled = true;
|
| - } else if (localName == selectTag) {
|
| - // IE treats a nested select as </select>. Let's do the same
|
| - popBlock(localName);
|
| - }
|
| - } else if (h->hasLocalName(selectTag)) {
|
| - if (localName == inputTag || localName == textareaTag) {
|
| - reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| - popBlock(currentTagName);
|
| - handled = true;
|
| - }
|
| - } else if (h->hasLocalName(colgroupTag)) {
|
| - popBlock(currentTagName);
|
| - handled = true;
|
| - } else if (!h->hasLocalName(bodyTag)) {
|
| - if (isInline(current)) {
|
| - popInlineBlocks();
|
| - handled = true;
|
| - }
|
| - }
|
| - } else if (current->isDocumentNode()) {
|
| - if (n->isTextNode()) {
|
| - Text* t = static_cast<Text*>(n);
|
| - if (t->containsOnlyWhitespace())
|
| - return false;
|
| - }
|
| -
|
| - if (!document->documentElement()) {
|
| - e = new HTMLHtmlElement(htmlTag, document);
|
| - insertNode(e);
|
| - handled = true;
|
| - }
|
| - }
|
| -
|
| - // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
|
| - if (!handled) {
|
| - reportError(IgnoredContentError, &localName, ¤t->localName());
|
| - return false;
|
| - }
|
| - return insertNode(n);
|
| -}
|
| -
|
| -typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
|
| -typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
|
| -
|
| -bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - result = new Text(document, t->text.get());
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - result = new Comment(document, t->text.get());
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
|
| -{
|
| - if (!head || current->localName() == htmlTag) {
|
| - head = new HTMLHeadElement(headTag, document);
|
| - result = head;
|
| - } else
|
| - reportError(MisplacedHeadError);
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - // body no longer allowed if we have a frameset
|
| - if (haveFrameSet)
|
| - return false;
|
| - popBlock(headTag);
|
| - startBody();
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - popBlock(headTag);
|
| - if (inBody && !haveFrameSet && !haveContent) {
|
| - popBlock(bodyTag);
|
| - // ### actually for IE document.body returns the now hidden "body" element
|
| - // we can't implement that behaviour now because it could cause too many
|
| - // regressions and the headaches are not worth the work as long as there is
|
| - // no site actually relying on that detail (Dirk)
|
| - if (document->body())
|
| - document->body()->setAttribute(styleAttr, "display:none");
|
| - inBody = false;
|
| - }
|
| - if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
|
| - return false;
|
| - haveFrameSet = true;
|
| - startBody();
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - // Only create a new form if we're not already inside one.
|
| - // This is consistent with other browsers' behavior.
|
| - if (!m_currentFormElement) {
|
| - m_currentFormElement = new HTMLFormElement(formTag, document);
|
| - result = m_currentFormElement;
|
| - pCloserCreateErrorCheck(t, result);
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - RefPtr<Node> n = handleIsindex(t);
|
| - if (!inBody)
|
| - m_isindexElement = n.release();
|
| - else {
|
| - t->selfClosingTag = true;
|
| - result = n.release();
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - pCloserCreateErrorCheck(t, result);
|
| - popBlock(dtTag);
|
| - popBlock(ddTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - pCloserCreateErrorCheck(t, result);
|
| - popBlock(ddTag);
|
| - popBlock(dtTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
|
| -{
|
| - popBlock(t->tagName);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| -{
|
| - pCloserCreateErrorCheck(t, result);
|
| - popBlock(t->tagName);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&)
|
| -{
|
| - return allowNestedRedundantTag(t->tagName);
|
| -}
|
| -
|
| -bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - popBlock(tdTag);
|
| - popBlock(thTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - popBlock(theadTag);
|
| - popBlock(tbodyTag);
|
| - popBlock(tfootTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - setSkipMode(noembedTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - setSkipMode(noframesTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - if (!m_isParsingFragment) {
|
| - Settings* settings = document->settings();
|
| - if (settings && settings->isJavaScriptEnabled())
|
| - setSkipMode(noscriptTag);
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - if (hasPElementInScope())
|
| - popBlock(pTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
|
| -{
|
| - if (document->inCompatMode())
|
| - return true;
|
| - if (hasPElementInScope())
|
| - popBlock(pTag);
|
| - return true;
|
| -}
|
| -
|
| -bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
|
| -{
|
| - m_currentMapElement = new HTMLMapElement(mapTag, document);
|
| - result = m_currentMapElement;
|
| - return false;
|
| -}
|
| -
|
| -PassRefPtr<Node> HTMLParser::getNode(Token* t)
|
| -{
|
| - // Init our error handling table.
|
| - DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ());
|
| - if (gFunctionMap.isEmpty()) {
|
| - gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| - gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
|
| - gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| - gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
|
| - gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
|
| - gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
|
| - gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
|
| - gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
|
| - gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
|
| - gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
|
| - gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck);
|
| - gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
|
| - gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| - gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
|
| - gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
|
| - gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
|
| - gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
|
| - gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck);
|
| - gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| - gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
|
| - gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
|
| - gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| - gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
|
| - gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| - gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| - gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| - gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| - }
|
| -
|
| - bool proceed = true;
|
| - RefPtr<Node> result;
|
| - if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
|
| - proceed = (this->*errorCheckFunc)(t, result);
|
| - if (proceed)
|
| - result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), document, m_currentFormElement.get());
|
| - return result.release();
|
| -}
|
| -
|
| -bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
|
| -{
|
| - // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
|
| - // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
|
| - // nested tags of the same type before just ignoring them all together.
|
| - unsigned i = 0;
|
| - for (HTMLStackElem* curr = blockStack;
|
| - i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
|
| - curr = curr->next, i++) { }
|
| - return i != cMaxRedundantTagDepth;
|
| -}
|
| -
|
| -void HTMLParser::processCloseTag(Token* t)
|
| -{
|
| - // Support for really broken html.
|
| - // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
|
| - // let's rely on the end() call to close things.
|
| - if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
|
| - return;
|
| -
|
| - bool checkForCloseTagErrors = true;
|
| - if (t->tagName == formTag && m_currentFormElement) {
|
| - m_currentFormElement = 0;
|
| - checkForCloseTagErrors = false;
|
| - } else if (t->tagName == mapTag)
|
| - m_currentMapElement = 0;
|
| - else if (t->tagName == pTag)
|
| - checkForCloseTagErrors = false;
|
| -
|
| - HTMLStackElem* oldElem = blockStack;
|
| - popBlock(t->tagName, checkForCloseTagErrors);
|
| - if (oldElem == blockStack && t->tagName == pTag) {
|
| - // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
|
| - // this as a valid break, i.e., <p></p>. So go ahead and make the empty
|
| - // paragraph.
|
| - t->beginTag = true;
|
| - parseToken(t);
|
| - popBlock(t->tagName);
|
| - reportError(StrayParagraphCloseError);
|
| - }
|
| -}
|
| -
|
| -bool HTMLParser::isHeaderTag(const AtomicString& tagName)
|
| -{
|
| - DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ());
|
| - if (headerTags.isEmpty()) {
|
| - headerTags.add(h1Tag.localName().impl());
|
| - headerTags.add(h2Tag.localName().impl());
|
| - headerTags.add(h3Tag.localName().impl());
|
| - headerTags.add(h4Tag.localName().impl());
|
| - headerTags.add(h5Tag.localName().impl());
|
| - headerTags.add(h6Tag.localName().impl());
|
| - }
|
| -
|
| - return headerTags.contains(tagName.impl());
|
| -}
|
| -
|
| -bool HTMLParser::isInline(Node* node) const
|
| -{
|
| - if (node->isTextNode())
|
| - return true;
|
| -
|
| - if (node->isHTMLElement()) {
|
| - HTMLElement* e = static_cast<HTMLElement*>(node);
|
| - if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
|
| - e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
|
| - e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
|
| - e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
|
| - e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
|
| - e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
|
| - e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
|
| - e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
|
| - e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
|
| - e->hasLocalName(noembedTag))
|
| - return true;
|
| - if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
|
| - Settings* settings = document->settings();
|
| - if (settings && settings->isJavaScriptEnabled())
|
| - return true;
|
| - }
|
| - }
|
| -
|
| - return false;
|
| -}
|
| -
|
| -bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
|
| -{
|
| - DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());
|
| - if (residualStyleTags.isEmpty()) {
|
| - residualStyleTags.add(aTag.localName().impl());
|
| - residualStyleTags.add(fontTag.localName().impl());
|
| - residualStyleTags.add(ttTag.localName().impl());
|
| - residualStyleTags.add(uTag.localName().impl());
|
| - residualStyleTags.add(bTag.localName().impl());
|
| - residualStyleTags.add(iTag.localName().impl());
|
| - residualStyleTags.add(sTag.localName().impl());
|
| - residualStyleTags.add(strikeTag.localName().impl());
|
| - residualStyleTags.add(bigTag.localName().impl());
|
| - residualStyleTags.add(smallTag.localName().impl());
|
| - residualStyleTags.add(emTag.localName().impl());
|
| - residualStyleTags.add(strongTag.localName().impl());
|
| - residualStyleTags.add(dfnTag.localName().impl());
|
| - residualStyleTags.add(codeTag.localName().impl());
|
| - residualStyleTags.add(sampTag.localName().impl());
|
| - residualStyleTags.add(kbdTag.localName().impl());
|
| - residualStyleTags.add(varTag.localName().impl());
|
| - residualStyleTags.add(nobrTag.localName().impl());
|
| - }
|
| -
|
| - return residualStyleTags.contains(tagName.impl());
|
| -}
|
| -
|
| -bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
|
| -{
|
| - DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());
|
| - if (unaffectedTags.isEmpty()) {
|
| - unaffectedTags.add(bodyTag.localName().impl());
|
| - unaffectedTags.add(tableTag.localName().impl());
|
| - unaffectedTags.add(theadTag.localName().impl());
|
| - unaffectedTags.add(tbodyTag.localName().impl());
|
| - unaffectedTags.add(tfootTag.localName().impl());
|
| - unaffectedTags.add(trTag.localName().impl());
|
| - unaffectedTags.add(thTag.localName().impl());
|
| - unaffectedTags.add(tdTag.localName().impl());
|
| - unaffectedTags.add(captionTag.localName().impl());
|
| - unaffectedTags.add(colgroupTag.localName().impl());
|
| - unaffectedTags.add(colTag.localName().impl());
|
| - unaffectedTags.add(optionTag.localName().impl());
|
| - unaffectedTags.add(optgroupTag.localName().impl());
|
| - unaffectedTags.add(selectTag.localName().impl());
|
| - unaffectedTags.add(objectTag.localName().impl());
|
| - }
|
| -
|
| - return !unaffectedTags.contains(tagName.impl());
|
| -}
|
| -
|
| -void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
|
| -{
|
| - HTMLStackElem* maxElem = 0;
|
| - bool finished = false;
|
| - bool strayTableContent = elem->strayTableContent;
|
| -
|
| - m_handlingResidualStyleAcrossBlocks = true;
|
| - while (!finished) {
|
| - // Find the outermost element that crosses over to a higher level. If there exists another higher-level
|
| - // element, we will do another pass, until we have corrected the innermost one.
|
| - ExceptionCode ec = 0;
|
| - HTMLStackElem* curr = blockStack;
|
| - HTMLStackElem* prev = 0;
|
| - HTMLStackElem* prevMaxElem = 0;
|
| - maxElem = 0;
|
| - finished = true;
|
| - while (curr && curr != elem) {
|
| - if (curr->level > elem->level) {
|
| - if (!isAffectedByResidualStyle(curr->tagName))
|
| - return;
|
| - if (maxElem)
|
| - // We will need another pass.
|
| - finished = false;
|
| - maxElem = curr;
|
| - prevMaxElem = prev;
|
| - }
|
| -
|
| - prev = curr;
|
| - curr = curr->next;
|
| - }
|
| -
|
| - if (!curr || !maxElem)
|
| - return;
|
| -
|
| - Node* residualElem = prev->node;
|
| - Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
|
| - Node* parentElem = elem->node;
|
| -
|
| - // Check to see if the reparenting that is going to occur is allowed according to the DOM.
|
| - // FIXME: We should either always allow it or perform an additional fixup instead of
|
| - // just bailing here.
|
| - // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
|
| - if (!parentElem->childAllowed(blockElem))
|
| - return;
|
| -
|
| - m_hasPElementInScope = Unknown;
|
| -
|
| - if (maxElem->node->parentNode() != elem->node) {
|
| - // Walk the stack and remove any elements that aren't residual style tags. These
|
| - // are basically just being closed up. Example:
|
| - // <font><span>Moo<p>Goo</font></p>.
|
| - // In the above example, the <span> doesn't need to be reopened. It can just close.
|
| - HTMLStackElem* currElem = maxElem->next;
|
| - HTMLStackElem* prevElem = maxElem;
|
| - while (currElem != elem) {
|
| - HTMLStackElem* nextElem = currElem->next;
|
| - if (!isResidualStyleTag(currElem->tagName)) {
|
| - prevElem->next = nextElem;
|
| - prevElem->derefNode();
|
| - prevElem->node = currElem->node;
|
| - prevElem->didRefNode = currElem->didRefNode;
|
| - delete currElem;
|
| - }
|
| - else
|
| - prevElem = currElem;
|
| - currElem = nextElem;
|
| - }
|
| -
|
| - // We have to reopen residual tags in between maxElem and elem. An example of this case is:
|
| - // <font><i>Moo<p>Foo</font>.
|
| - // In this case, we need to transform the part before the <p> into:
|
| - // <font><i>Moo</i></font><i>
|
| - // so that the <i> will remain open. This involves the modification of elements
|
| - // in the block stack.
|
| - // This will also affect how we ultimately reparent the block, since we want it to end up
|
| - // under the reopened residual tags (e.g., the <i> in the above example.)
|
| - RefPtr<Node> prevNode = 0;
|
| - currElem = maxElem;
|
| - while (currElem->node != residualElem) {
|
| - if (isResidualStyleTag(currElem->node->localName())) {
|
| - // Create a clone of this element.
|
| - // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
|
| - Node* currNode = currElem->node->cloneNode(false).releaseRef();
|
| - reportError(ResidualStyleError, &currNode->localName());
|
| -
|
| - // Change the stack element's node to point to the clone.
|
| - // The stack element adopts the reference we obtained above by calling release().
|
| - currElem->derefNode();
|
| - currElem->node = currNode;
|
| - currElem->didRefNode = true;
|
| -
|
| - // Attach the previous node as a child of this new node.
|
| - if (prevNode)
|
| - currNode->appendChild(prevNode, ec);
|
| - else // The new parent for the block element is going to be the innermost clone.
|
| - parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though.
|
| -
|
| - prevNode = currNode;
|
| - }
|
| -
|
| - currElem = currElem->next;
|
| - }
|
| -
|
| - // Now append the chain of new residual style elements if one exists.
|
| - if (prevNode)
|
| - elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
|
| - }
|
| -
|
| - // Check if the block is still in the tree. If it isn't, then we don't
|
| - // want to remove it from its parent (that would crash) or insert it into
|
| - // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
|
| - bool isBlockStillInTree = blockElem->parentNode();
|
| -
|
| - // We need to make a clone of |residualElem| and place it just inside |blockElem|.
|
| - // All content of |blockElem| is reparented to be under this clone. We then
|
| - // reparent |blockElem| using real DOM calls so that attachment/detachment will
|
| - // be performed to fix up the rendering tree.
|
| - // So for this example: <b>...<p>Foo</b>Goo</p>
|
| - // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
|
| - //
|
| - // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
|
| - if (isBlockStillInTree)
|
| - blockElem->parentNode()->removeChild(blockElem, ec);
|
| -
|
| - Node* newNodePtr = 0;
|
| - if (blockElem->firstChild()) {
|
| - // Step 2: Clone |residualElem|.
|
| - RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
|
| - newNodePtr = newNode.get();
|
| - reportError(ResidualStyleError, &newNode->localName());
|
| -
|
| - // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
|
| - // before we've put |newElem| into the document. That way we'll only do one attachment of all
|
| - // the new content (instead of a bunch of individual attachments).
|
| - Node* currNode = blockElem->firstChild();
|
| - while (currNode) {
|
| - Node* nextNode = currNode->nextSibling();
|
| - newNode->appendChild(currNode, ec);
|
| - currNode = nextNode;
|
| - }
|
| -
|
| - // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
|
| - // attachment can occur yet.
|
| - blockElem->appendChild(newNode.release(), ec);
|
| - } else
|
| - finished = true;
|
| -
|
| - // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
|
| - if (isBlockStillInTree)
|
| - parentElem->appendChild(blockElem, ec);
|
| -
|
| - // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update
|
| - // the node associated with the previous stack element so that when it gets popped,
|
| - // it doesn't make the residual element the next current node.
|
| - HTMLStackElem* currElem = maxElem;
|
| - HTMLStackElem* prevElem = 0;
|
| - while (currElem != elem) {
|
| - prevElem = currElem;
|
| - currElem = currElem->next;
|
| - }
|
| - prevElem->next = elem->next;
|
| - prevElem->derefNode();
|
| - prevElem->node = elem->node;
|
| - prevElem->didRefNode = elem->didRefNode;
|
| - if (!finished) {
|
| - // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
|
| - // in the stack. We do not do this for the innermost block, because in that case the new
|
| - // node is effectively no longer open.
|
| - elem->next = maxElem;
|
| - elem->node = prevMaxElem->node;
|
| - elem->didRefNode = prevMaxElem->didRefNode;
|
| - elem->strayTableContent = false;
|
| - prevMaxElem->next = elem;
|
| - ASSERT(newNodePtr);
|
| - prevMaxElem->node = newNodePtr;
|
| - prevMaxElem->didRefNode = false;
|
| - } else
|
| - delete elem;
|
| - }
|
| -
|
| - // FIXME: If we ever make a case like this work:
|
| - // <table><b><i><form></b></form></i></table>
|
| - // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
|
| - if (strayTableContent)
|
| - inStrayTableContent--;
|
| -
|
| - // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
|
| - // In the above example, Goo should stay italic.
|
| - // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
|
| -
|
| - HTMLStackElem* curr = blockStack;
|
| - HTMLStackElem* residualStyleStack = 0;
|
| - unsigned stackDepth = 1;
|
| - unsigned redundantStyleCount = 0;
|
| - while (curr && curr != maxElem) {
|
| - // We will actually schedule this tag for reopening
|
| - // after we complete the close of this entire block.
|
| - if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
|
| - // We've overloaded the use of stack elements and are just reusing the
|
| - // struct with a slightly different meaning to the variables. Instead of chaining
|
| - // from innermost to outermost, we build up a list of all the tags we need to reopen
|
| - // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
|
| - // to the outermost tag we need to reopen.
|
| - // We also set curr->node to be the actual element that corresponds to the ID stored in
|
| - // curr->id rather than the node that you should pop to when the element gets pulled off
|
| - // the stack.
|
| - if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
|
| - redundantStyleCount++;
|
| - else
|
| - redundantStyleCount = 0;
|
| -
|
| - if (redundantStyleCount < cMaxRedundantTagDepth)
|
| - moveOneBlockToStack(residualStyleStack);
|
| - else
|
| - popOneBlock();
|
| - } else
|
| - popOneBlock();
|
| -
|
| - curr = blockStack;
|
| - }
|
| -
|
| - reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
|
| -
|
| - m_handlingResidualStyleAcrossBlocks = false;
|
| -}
|
| -
|
| -void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
|
| -{
|
| - // Loop for each tag that needs to be reopened.
|
| - while (elem) {
|
| - // Create a shallow clone of the DOM node for this element.
|
| - RefPtr<Node> newNode = elem->node->cloneNode(false);
|
| - reportError(ResidualStyleError, &newNode->localName());
|
| -
|
| - // Append the new node. In the malformed table case, we need to insert before the table,
|
| - // which will be the last child.
|
| - ExceptionCode ec = 0;
|
| - if (malformedTableParent)
|
| - malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
|
| - else
|
| - current->appendChild(newNode, ec);
|
| - // FIXME: Is it really OK to ignore the exceptions here?
|
| -
|
| - // Now push a new stack element for this node we just created.
|
| - pushBlock(elem->tagName, elem->level);
|
| - newNode->beginParsingChildren();
|
| -
|
| - // Set our strayTableContent boolean if needed, so that the reopened tag also knows
|
| - // that it is inside a malformed table.
|
| - blockStack->strayTableContent = malformedTableParent != 0;
|
| - if (blockStack->strayTableContent)
|
| - inStrayTableContent++;
|
| -
|
| - // Clear our malformed table parent variable.
|
| - malformedTableParent = 0;
|
| -
|
| - // Update |current| manually to point to the new node.
|
| - setCurrent(newNode.get());
|
| -
|
| - // Advance to the next tag that needs to be reopened.
|
| - HTMLStackElem* next = elem->next;
|
| - elem->derefNode();
|
| - delete elem;
|
| - elem = next;
|
| - }
|
| -}
|
| -
|
| -void HTMLParser::pushBlock(const AtomicString& tagName, int level)
|
| -{
|
| - blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
|
| - didRefCurrent = false;
|
| - if (tagName == pTag)
|
| - m_hasPElementInScope = InScope;
|
| - else if (isScopingTag(tagName))
|
| - m_hasPElementInScope = NotInScope;
|
| -}
|
| -
|
| -void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
|
| -{
|
| - HTMLStackElem* elem = blockStack;
|
| -
|
| - int maxLevel = 0;
|
| -
|
| - while (elem && (elem->tagName != tagName)) {
|
| - if (maxLevel < elem->level)
|
| - maxLevel = elem->level;
|
| - elem = elem->next;
|
| - }
|
| -
|
| - if (!elem) {
|
| - if (reportErrors)
|
| - reportError(StrayCloseTagError, &tagName, 0, true);
|
| - return;
|
| - }
|
| -
|
| - if (maxLevel > elem->level) {
|
| - // We didn't match because the tag is in a different scope, e.g.,
|
| - // <b><p>Foo</b>. Try to correct the problem.
|
| - if (!isResidualStyleTag(tagName))
|
| - return;
|
| - return handleResidualStyleCloseTagAcrossBlocks(elem);
|
| - }
|
| -
|
| - bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
|
| - HTMLStackElem* residualStyleStack = 0;
|
| - Node* malformedTableParent = 0;
|
| -
|
| - elem = blockStack;
|
| - unsigned stackDepth = 1;
|
| - unsigned redundantStyleCount = 0;
|
| - while (elem) {
|
| - if (elem->tagName == tagName) {
|
| - int strayTable = inStrayTableContent;
|
| - popOneBlock();
|
| - elem = 0;
|
| -
|
| - // This element was the root of some malformed content just inside an implicit or
|
| - // explicit <tbody> or <tr>.
|
| - // If we end up needing to reopen residual style tags, the root of the reopened chain
|
| - // must also know that it is the root of malformed content inside a <tbody>/<tr>.
|
| - if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
|
| - Node* curr = current;
|
| - while (curr && !curr->hasTagName(tableTag))
|
| - curr = curr->parentNode();
|
| - malformedTableParent = curr ? curr->parentNode() : 0;
|
| - }
|
| - }
|
| - else {
|
| - if (m_currentFormElement && elem->tagName == formTag)
|
| - // A <form> is being closed prematurely (and this is
|
| - // malformed HTML). Set an attribute on the form to clear out its
|
| - // bottom margin.
|
| - m_currentFormElement->setMalformed(true);
|
| -
|
| - // Schedule this tag for reopening
|
| - // after we complete the close of this entire block.
|
| - if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
|
| - // We've overloaded the use of stack elements and are just reusing the
|
| - // struct with a slightly different meaning to the variables. Instead of chaining
|
| - // from innermost to outermost, we build up a list of all the tags we need to reopen
|
| - // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
|
| - // to the outermost tag we need to reopen.
|
| - // We also set elem->node to be the actual element that corresponds to the ID stored in
|
| - // elem->id rather than the node that you should pop to when the element gets pulled off
|
| - // the stack.
|
| - if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
|
| - redundantStyleCount++;
|
| - else
|
| - redundantStyleCount = 0;
|
| -
|
| - if (redundantStyleCount < cMaxRedundantTagDepth)
|
| - moveOneBlockToStack(residualStyleStack);
|
| - else
|
| - popOneBlock();
|
| - } else
|
| - popOneBlock();
|
| - elem = blockStack;
|
| - }
|
| - }
|
| -
|
| - reopenResidualStyleTags(residualStyleStack, malformedTableParent);
|
| -}
|
| -
|
| -inline HTMLStackElem* HTMLParser::popOneBlockCommon()
|
| -{
|
| - HTMLStackElem* elem = blockStack;
|
| -
|
| - // Form elements restore their state during the parsing process.
|
| - // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
|
| - if (current && elem->node != current)
|
| - current->finishParsingChildren();
|
| -
|
| - blockStack = elem->next;
|
| - current = elem->node;
|
| - didRefCurrent = elem->didRefNode;
|
| -
|
| - if (elem->strayTableContent)
|
| - inStrayTableContent--;
|
| -
|
| - if (elem->tagName == pTag)
|
| - m_hasPElementInScope = NotInScope;
|
| - else if (isScopingTag(elem->tagName))
|
| - m_hasPElementInScope = Unknown;
|
| -
|
| - return elem;
|
| -}
|
| -
|
| -void HTMLParser::popOneBlock()
|
| -{
|
| - // Store the current node before popOneBlockCommon overwrites it.
|
| - Node* lastCurrent = current;
|
| - bool didRefLastCurrent = didRefCurrent;
|
| -
|
| - delete popOneBlockCommon();
|
| -
|
| - if (didRefLastCurrent)
|
| - lastCurrent->deref();
|
| -}
|
| -
|
| -void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
|
| -{
|
| - // We'll be using the stack element we're popping, but for the current node.
|
| - // See the two callers for details.
|
| -
|
| - // Store the current node before popOneBlockCommon overwrites it.
|
| - Node* lastCurrent = current;
|
| - bool didRefLastCurrent = didRefCurrent;
|
| -
|
| - // Pop the block, but don't deref the current node as popOneBlock does because
|
| - // we'll be using the pointer in the new stack element.
|
| - HTMLStackElem* elem = popOneBlockCommon();
|
| -
|
| - // Transfer the current node into the stack element.
|
| - // No need to deref the old elem->node because popOneBlockCommon transferred
|
| - // it into the current/didRefCurrent fields.
|
| - elem->node = lastCurrent;
|
| - elem->didRefNode = didRefLastCurrent;
|
| - elem->next = head;
|
| - head = elem;
|
| -}
|
| -
|
| -void HTMLParser::checkIfHasPElementInScope()
|
| -{
|
| - m_hasPElementInScope = NotInScope;
|
| - HTMLStackElem* elem = blockStack;
|
| - while (elem) {
|
| - const AtomicString& tagName = elem->tagName;
|
| - if (tagName == pTag) {
|
| - m_hasPElementInScope = InScope;
|
| - return;
|
| - } else if (isScopingTag(tagName))
|
| - return;
|
| - elem = elem->next;
|
| - }
|
| -}
|
| -
|
| -void HTMLParser::popInlineBlocks()
|
| -{
|
| - while (blockStack && isInline(current))
|
| - popOneBlock();
|
| -}
|
| -
|
| -void HTMLParser::freeBlock()
|
| -{
|
| - while (blockStack)
|
| - popOneBlock();
|
| -}
|
| -
|
| -void HTMLParser::createHead()
|
| -{
|
| - if (head || !document->documentElement())
|
| - return;
|
| -
|
| - head = new HTMLHeadElement(headTag, document);
|
| - HTMLElement* body = document->body();
|
| - ExceptionCode ec = 0;
|
| - document->documentElement()->insertBefore(head, body, ec);
|
| - if (ec)
|
| - head = 0;
|
| -
|
| - // If the body does not exist yet, then the <head> should be pushed as the current block.
|
| - if (head && !body) {
|
| - pushBlock(head->localName(), head->tagPriority());
|
| - setCurrent(head);
|
| - }
|
| -}
|
| -
|
| -PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
|
| -{
|
| - RefPtr<Node> n = new HTMLDivElement(divTag, document);
|
| -
|
| - NamedMappedAttrMap* attrs = t->attrs.get();
|
| -
|
| - RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, document, m_currentFormElement.get());
|
| - isIndex->setAttributeMap(attrs);
|
| - isIndex->setAttribute(typeAttr, "khtml_isindex");
|
| -
|
| - String text = searchableIndexIntroduction();
|
| - if (attrs) {
|
| - if (Attribute* a = attrs->getAttributeItem(promptAttr))
|
| - text = a->value().string() + " ";
|
| - t->attrs = 0;
|
| - }
|
| -
|
| - n->addChild(new HTMLHRElement(hrTag, document));
|
| - n->addChild(new Text(document, text));
|
| - n->addChild(isIndex.release());
|
| - n->addChild(new HTMLHRElement(hrTag, document));
|
| -
|
| - return n.release();
|
| -}
|
| -
|
| -void HTMLParser::startBody()
|
| -{
|
| - if (inBody)
|
| - return;
|
| -
|
| - inBody = true;
|
| -
|
| - if (m_isindexElement) {
|
| - insertNode(m_isindexElement.get(), true /* don't descend into this node */);
|
| - m_isindexElement = 0;
|
| - }
|
| -}
|
| -
|
| -void HTMLParser::finished()
|
| -{
|
| - // In the case of a completely empty document, here's the place to create the HTML element.
|
| - if (current && current->isDocumentNode() && !document->documentElement())
|
| - insertNode(new HTMLHtmlElement(htmlTag, document));
|
| -
|
| - // This ensures that "current" is not left pointing to a node when the document is destroyed.
|
| - freeBlock();
|
| - setCurrent(0);
|
| -
|
| - // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
|
| - if (!m_isParsingFragment)
|
| - document->finishedParsing();
|
| -}
|
| -
|
| -void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
|
| -{
|
| - Frame* frame = document->frame();
|
| - if (!frame)
|
| - return;
|
| -
|
| - HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
|
| - int lineNumber = htmlTokenizer->lineNumber() + 1;
|
| -
|
| - AtomicString tag1;
|
| - AtomicString tag2;
|
| - if (tagName1) {
|
| - if (*tagName1 == "#text")
|
| - tag1 = "Text";
|
| - else if (*tagName1 == "#comment")
|
| - tag1 = "<!-- comment -->";
|
| - else
|
| - tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
|
| - }
|
| - if (tagName2) {
|
| - if (*tagName2 == "#text")
|
| - tag2 = "Text";
|
| - else if (*tagName2 == "#comment")
|
| - tag2 = "<!-- comment -->";
|
| - else
|
| - tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
|
| - }
|
| -
|
| - const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
|
| - if (!errorMsg)
|
| - return;
|
| -
|
| - String message;
|
| - if (htmlTokenizer->processingContentWrittenByScript())
|
| - message += htmlParserDocumentWriteMessage();
|
| - message += errorMsg;
|
| - message.replace("%tag1", tag1);
|
| - message.replace("%tag2", tag2);
|
| -
|
| - frame->domWindow()->console()->addMessage(HTMLMessageSource,
|
| - isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
|
| - message, lineNumber, document->url().string());
|
| -}
|
| -
|
| -}
|
| -
|
| -
|
| +/*
|
| + Copyright (C) 1997 Martin Jones (mjones@kde.org)
|
| + (C) 1997 Torben Weis (weis@kde.org)
|
| + (C) 1999,2001 Lars Knoll (knoll@kde.org)
|
| + (C) 2000,2001 Dirk Mueller (mueller@kde.org)
|
| + Copyright (C) 2004, 2005, 2006, 2007 Apple Inc. All rights reserved.
|
| +
|
| + This library is free software; you can redistribute it and/or
|
| + modify it under the terms of the GNU Library General Public
|
| + License as published by the Free Software Foundation; either
|
| + version 2 of the License, or (at your option) any later version.
|
| +
|
| + This library is distributed in the hope that it will be useful,
|
| + but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| + Library General Public License for more details.
|
| +
|
| + You should have received a copy of the GNU Library General Public License
|
| + along with this library; see the file COPYING.LIB. If not, write to
|
| + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
| + Boston, MA 02110-1301, USA.
|
| +*/
|
| +
|
| +#include "config.h"
|
| +#include "HTMLParser.h"
|
| +
|
| +#include "CharacterNames.h"
|
| +#include "CSSPropertyNames.h"
|
| +#include "CSSValueKeywords.h"
|
| +#include "Comment.h"
|
| +#include "Console.h"
|
| +#include "DOMWindow.h"
|
| +#include "DocumentFragment.h"
|
| +#include "DocumentType.h"
|
| +#include "Frame.h"
|
| +#include "HTMLBodyElement.h"
|
| +#include "HTMLDocument.h"
|
| +#include "HTMLDivElement.h"
|
| +#include "HTMLDListElement.h"
|
| +#include "HTMLElementFactory.h"
|
| +#include "HTMLFormElement.h"
|
| +#include "HTMLHeadElement.h"
|
| +#include "HTMLHRElement.h"
|
| +#include "HTMLHtmlElement.h"
|
| +#include "HTMLIsIndexElement.h"
|
| +#include "HTMLMapElement.h"
|
| +#include "HTMLNames.h"
|
| +#include "HTMLTableCellElement.h"
|
| +#include "HTMLTableRowElement.h"
|
| +#include "HTMLTableSectionElement.h"
|
| +#include "HTMLTokenizer.h"
|
| +#include "LocalizedStrings.h"
|
| +#include "Settings.h"
|
| +#include "Text.h"
|
| +#include <wtf/StdLibExtras.h>
|
| +
|
| +namespace WebCore {
|
| +
|
| +using namespace HTMLNames;
|
| +
|
| +static const unsigned cMaxRedundantTagDepth = 20;
|
| +static const unsigned cResidualStyleMaxDepth = 200;
|
| +
|
| +struct HTMLStackElem : Noncopyable {
|
| + HTMLStackElem(const AtomicString& t, int lvl, Node* n, bool r, HTMLStackElem* nx)
|
| + : tagName(t)
|
| + , level(lvl)
|
| + , strayTableContent(false)
|
| + , node(n)
|
| + , didRefNode(r)
|
| + , next(nx)
|
| + {
|
| + }
|
| +
|
| + void derefNode()
|
| + {
|
| + if (didRefNode)
|
| + node->deref();
|
| + }
|
| +
|
| + AtomicString tagName;
|
| + int level;
|
| + bool strayTableContent;
|
| + Node* node;
|
| + bool didRefNode;
|
| + HTMLStackElem* next;
|
| +};
|
| +
|
| +/**
|
| + * The parser parses tokenized input into the document, building up the
|
| + * document tree. If the document is well-formed, parsing it is straightforward.
|
| + *
|
| + * Unfortunately, we have to handle many HTML documents that are not well-formed,
|
| + * so the parser has to be tolerant about errors.
|
| + *
|
| + * We have to take care of at least the following error conditions:
|
| + *
|
| + * 1. The element being added is explicitly forbidden inside some outer tag.
|
| + * In this case we should close all tags up to the one, which forbids
|
| + * the element, and add it afterwards.
|
| + *
|
| + * 2. We are not allowed to add the element directly. It could be that
|
| + * the person writing the document forgot some tag in between (or that the
|
| + * tag in between is optional). This could be the case with the following
|
| + * tags: HTML HEAD BODY TBODY TR TD LI (did I forget any?).
|
| + *
|
| + * 3. We want to add a block element inside to an inline element. Close all
|
| + * inline elements up to the next higher block element.
|
| + *
|
| + * 4. If this doesn't help, close elements until we are allowed to add the
|
| + * element or ignore the tag.
|
| + *
|
| + */
|
| +
|
| +HTMLParser::HTMLParser(HTMLDocument* doc, bool reportErrors)
|
| + : document(doc)
|
| + , current(doc)
|
| + , didRefCurrent(false)
|
| + , blockStack(0)
|
| + , m_hasPElementInScope(NotInScope)
|
| + , head(0)
|
| + , inBody(false)
|
| + , haveContent(false)
|
| + , haveFrameSet(false)
|
| + , m_isParsingFragment(false)
|
| + , m_reportErrors(reportErrors)
|
| + , m_handlingResidualStyleAcrossBlocks(false)
|
| + , inStrayTableContent(0)
|
| +{
|
| +}
|
| +
|
| +HTMLParser::HTMLParser(DocumentFragment* frag)
|
| + : document(frag->document())
|
| + , current(frag)
|
| + , didRefCurrent(true)
|
| + , blockStack(0)
|
| + , m_hasPElementInScope(NotInScope)
|
| + , head(0)
|
| + , inBody(true)
|
| + , haveContent(false)
|
| + , haveFrameSet(false)
|
| + , m_isParsingFragment(true)
|
| + , m_reportErrors(false)
|
| + , m_handlingResidualStyleAcrossBlocks(false)
|
| + , inStrayTableContent(0)
|
| +{
|
| + if (frag)
|
| + frag->ref();
|
| +}
|
| +
|
| +HTMLParser::~HTMLParser()
|
| +{
|
| + freeBlock();
|
| + if (didRefCurrent)
|
| + current->deref();
|
| +}
|
| +
|
| +void HTMLParser::reset()
|
| +{
|
| + ASSERT(!m_isParsingFragment);
|
| +
|
| + setCurrent(document);
|
| +
|
| + freeBlock();
|
| +
|
| + inBody = false;
|
| + haveFrameSet = false;
|
| + haveContent = false;
|
| + inStrayTableContent = 0;
|
| +
|
| + m_currentFormElement = 0;
|
| + m_currentMapElement = 0;
|
| + head = 0;
|
| + m_isindexElement = 0;
|
| +
|
| + m_skipModeTag = nullAtom;
|
| +}
|
| +
|
| +void HTMLParser::setCurrent(Node* newCurrent)
|
| +{
|
| + bool didRefNewCurrent = newCurrent && newCurrent != document;
|
| + if (didRefNewCurrent)
|
| + newCurrent->ref();
|
| + if (didRefCurrent)
|
| + current->deref();
|
| + current = newCurrent;
|
| + didRefCurrent = didRefNewCurrent;
|
| +}
|
| +
|
| +PassRefPtr<Node> HTMLParser::parseToken(Token* t)
|
| +{
|
| + if (!m_skipModeTag.isNull()) {
|
| + if (!t->beginTag && t->tagName == m_skipModeTag)
|
| + // Found the end tag for the current skip mode, so we're done skipping.
|
| + m_skipModeTag = nullAtom;
|
| + else if (current->localName() == t->tagName)
|
| + // Do not skip </iframe>.
|
| + // FIXME: What does that comment mean? How can it be right to parse a token without clearing m_skipModeTag?
|
| + ;
|
| + else
|
| + return 0;
|
| + }
|
| +
|
| + // Apparently some sites use </br> instead of <br>. Be compatible with IE and Firefox and treat this like <br>.
|
| + if (t->isCloseTag(brTag) && document->inCompatMode()) {
|
| + reportError(MalformedBRError);
|
| + t->beginTag = true;
|
| + }
|
| +
|
| + if (!t->beginTag) {
|
| + processCloseTag(t);
|
| + return 0;
|
| + }
|
| +
|
| + // Ignore spaces, if we're not inside a paragraph or other inline code.
|
| + // Do not alter the text if it is part of a scriptTag.
|
| + if (t->tagName == textAtom && t->text && current->localName() != scriptTag) {
|
| + if (inBody && !skipMode() && current->localName() != styleTag &&
|
| + current->localName() != titleTag && !t->text->containsOnlyWhitespace())
|
| + haveContent = true;
|
| +
|
| + RefPtr<Node> n;
|
| + String text = t->text.get();
|
| + unsigned charsLeft = text.length();
|
| + while (charsLeft) {
|
| + // split large blocks of text to nodes of manageable size
|
| + n = Text::createWithLengthLimit(document, text, charsLeft);
|
| + if (!insertNode(n.get(), t->selfClosingTag))
|
| + return 0;
|
| + }
|
| + return n;
|
| + }
|
| +
|
| + RefPtr<Node> n = getNode(t);
|
| + // just to be sure, and to catch currently unimplemented stuff
|
| + if (!n)
|
| + return 0;
|
| +
|
| + // set attributes
|
| + if (n->isHTMLElement()) {
|
| + HTMLElement* e = static_cast<HTMLElement*>(n.get());
|
| + e->setAttributeMap(t->attrs.get());
|
| +
|
| + // take care of optional close tags
|
| + if (e->endTagRequirement() == TagStatusOptional)
|
| + popBlock(t->tagName);
|
| +
|
| + // If the node does not have a forbidden end tag requirement, and if the broken XML self-closing
|
| + // syntax was used, report an error.
|
| + if (t->brokenXMLStyle && e->endTagRequirement() != TagStatusForbidden) {
|
| + if (t->tagName == scriptTag)
|
| + reportError(IncorrectXMLCloseScriptWarning);
|
| + else
|
| + reportError(IncorrectXMLSelfCloseError, &t->tagName);
|
| + }
|
| + }
|
| +
|
| + if (!insertNode(n.get(), t->selfClosingTag)) {
|
| + // we couldn't insert the node
|
| +
|
| + if (n->isElementNode()) {
|
| + Element* e = static_cast<Element*>(n.get());
|
| + e->setAttributeMap(0);
|
| + }
|
| +
|
| + if (m_currentMapElement == n)
|
| + m_currentMapElement = 0;
|
| +
|
| + if (m_currentFormElement == n)
|
| + m_currentFormElement = 0;
|
| +
|
| + if (head == n)
|
| + head = 0;
|
| +
|
| + return 0;
|
| + }
|
| + return n;
|
| +}
|
| +
|
| +void HTMLParser::parseDoctypeToken(DoctypeToken* t)
|
| +{
|
| + // Ignore any doctype after the first. Ignore doctypes in fragments.
|
| + if (document->doctype() || m_isParsingFragment || current != document)
|
| + return;
|
| +
|
| + // Make a new doctype node and set it as our doctype.
|
| + document->addChild(DocumentType::create(document, String::adopt(t->m_name), String::adopt(t->m_publicID), String::adopt(t->m_systemID)));
|
| +}
|
| +
|
| +static bool isTableSection(Node* n)
|
| +{
|
| + return n->hasTagName(tbodyTag) || n->hasTagName(tfootTag) || n->hasTagName(theadTag);
|
| +}
|
| +
|
| +static bool isTablePart(Node* n)
|
| +{
|
| + return n->hasTagName(trTag) || n->hasTagName(tdTag) || n->hasTagName(thTag) ||
|
| + isTableSection(n);
|
| +}
|
| +
|
| +static bool isTableRelated(Node* n)
|
| +{
|
| + return n->hasTagName(tableTag) || isTablePart(n);
|
| +}
|
| +
|
| +static bool isScopingTag(const AtomicString& tagName)
|
| +{
|
| + return tagName == appletTag || tagName == captionTag || tagName == tdTag || tagName == thTag || tagName == buttonTag || tagName == marqueeTag || tagName == objectTag || tagName == tableTag || tagName == htmlTag;
|
| +}
|
| +
|
| +bool HTMLParser::insertNode(Node* n, bool flat)
|
| +{
|
| + RefPtr<Node> protectNode(n);
|
| +
|
| + const AtomicString& localName = n->localName();
|
| + int tagPriority = n->isHTMLElement() ? static_cast<HTMLElement*>(n)->tagPriority() : 0;
|
| +
|
| + // <table> is never allowed inside stray table content. Always pop out of the stray table content
|
| + // and close up the first table, and then start the second table as a sibling.
|
| + if (inStrayTableContent && localName == tableTag)
|
| + popBlock(tableTag);
|
| +
|
| + // let's be stupid and just try to insert it.
|
| + // this should work if the document is well-formed
|
| + Node* newNode = current->addChild(n);
|
| + if (!newNode)
|
| + return handleError(n, flat, localName, tagPriority); // Try to handle the error.
|
| +
|
| + // don't push elements without end tags (e.g., <img>) on the stack
|
| + bool parentAttached = current->attached();
|
| + if (tagPriority > 0 && !flat) {
|
| + if (newNode == current) {
|
| + // This case should only be hit when a demoted <form> is placed inside a table.
|
| + ASSERT(localName == formTag);
|
| + reportError(FormInsideTablePartError, ¤t->localName());
|
| + } else {
|
| + // The pushBlock function transfers ownership of current to the block stack
|
| + // so we're guaranteed that didRefCurrent is false. The code below is an
|
| + // optimized version of setCurrent that takes advantage of that fact and also
|
| + // assumes that newNode is neither 0 nor a pointer to the document.
|
| + pushBlock(localName, tagPriority);
|
| + newNode->beginParsingChildren();
|
| + ASSERT(!didRefCurrent);
|
| + newNode->ref();
|
| + current = newNode;
|
| + didRefCurrent = true;
|
| + }
|
| + if (parentAttached && !n->attached() && !m_isParsingFragment)
|
| + n->attach();
|
| + } else {
|
| + if (parentAttached && !n->attached() && !m_isParsingFragment)
|
| + n->attach();
|
| + n->finishParsingChildren();
|
| + }
|
| +
|
| + if (localName == htmlTag && document->frame())
|
| + document->frame()->loader()->dispatchDocumentElementAvailable();
|
| +
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority)
|
| +{
|
| + // Error handling code. This is just ad hoc handling of specific parent/child combinations.
|
| + HTMLElement* e;
|
| + bool handled = false;
|
| +
|
| + // 1. Check out the element's tag name to decide how to deal with errors.
|
| + if (n->isHTMLElement()) {
|
| + HTMLElement* h = static_cast<HTMLElement*>(n);
|
| + if (h->hasLocalName(trTag) || h->hasLocalName(thTag) || h->hasLocalName(tdTag)) {
|
| + if (inStrayTableContent && !isTableRelated(current)) {
|
| + reportError(MisplacedTablePartError, &localName, ¤t->localName());
|
| + // pop out to the nearest enclosing table-related tag.
|
| + while (blockStack && !isTableRelated(current))
|
| + popOneBlock();
|
| + return insertNode(n);
|
| + }
|
| + } else if (h->hasLocalName(headTag)) {
|
| + if (!current->isDocumentNode() && !current->hasTagName(htmlTag)) {
|
| + reportError(MisplacedHeadError);
|
| + return false;
|
| + }
|
| + } else if (h->hasLocalName(metaTag) || h->hasLocalName(linkTag) || h->hasLocalName(baseTag)) {
|
| + bool createdHead = false;
|
| + if (!head) {
|
| + createHead();
|
| + createdHead = true;
|
| + }
|
| + if (head) {
|
| + if (!createdHead)
|
| + reportError(MisplacedHeadContentError, &localName, ¤t->localName());
|
| + if (head->addChild(n)) {
|
| + if (!n->attached() && !m_isParsingFragment)
|
| + n->attach();
|
| + return true;
|
| + } else
|
| + return false;
|
| + }
|
| + } else if (h->hasLocalName(htmlTag)) {
|
| + if (!current->isDocumentNode() ) {
|
| + if (document->documentElement() && document->documentElement()->hasTagName(htmlTag)) {
|
| + reportError(RedundantHTMLBodyError, &localName);
|
| + // we have another <HTML> element.... apply attributes to existing one
|
| + // make sure we don't overwrite already existing attributes
|
| + NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
|
| + Element* existingHTML = static_cast<Element*>(document->documentElement());
|
| + NamedAttrMap* bmap = existingHTML->attributes(false);
|
| + for (unsigned l = 0; map && l < map->length(); ++l) {
|
| + Attribute* it = map->attributeItem(l);
|
| + if (!bmap->getAttributeItem(it->name()))
|
| + existingHTML->setAttribute(it->name(), it->value());
|
| + }
|
| + }
|
| + return false;
|
| + }
|
| + } else if (h->hasLocalName(titleTag) || h->hasLocalName(styleTag)) {
|
| + bool createdHead = false;
|
| + if (!head) {
|
| + createHead();
|
| + createdHead = true;
|
| + }
|
| + if (head) {
|
| + Node* newNode = head->addChild(n);
|
| + if (!newNode) {
|
| + setSkipMode(h->tagQName());
|
| + return false;
|
| + }
|
| +
|
| + if (!createdHead)
|
| + reportError(MisplacedHeadContentError, &localName, ¤t->localName());
|
| +
|
| + pushBlock(localName, tagPriority);
|
| + newNode->beginParsingChildren();
|
| + setCurrent(newNode);
|
| + if (!n->attached() && !m_isParsingFragment)
|
| + n->attach();
|
| + return true;
|
| + }
|
| + if (inBody) {
|
| + setSkipMode(h->tagQName());
|
| + return false;
|
| + }
|
| + } else if (h->hasLocalName(bodyTag)) {
|
| + if (inBody && document->body()) {
|
| + // we have another <BODY> element.... apply attributes to existing one
|
| + // make sure we don't overwrite already existing attributes
|
| + // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
|
| + reportError(RedundantHTMLBodyError, &localName);
|
| + NamedAttrMap* map = static_cast<Element*>(n)->attributes(true);
|
| + Element* existingBody = document->body();
|
| + NamedAttrMap* bmap = existingBody->attributes(false);
|
| + for (unsigned l = 0; map && l < map->length(); ++l) {
|
| + Attribute* it = map->attributeItem(l);
|
| + if (!bmap->getAttributeItem(it->name()))
|
| + existingBody->setAttribute(it->name(), it->value());
|
| + }
|
| + return false;
|
| + }
|
| + else if (!current->isDocumentNode())
|
| + return false;
|
| + } else if (h->hasLocalName(areaTag)) {
|
| + if (m_currentMapElement) {
|
| + reportError(MisplacedAreaError, ¤t->localName());
|
| + m_currentMapElement->addChild(n);
|
| + if (!n->attached() && !m_isParsingFragment)
|
| + n->attach();
|
| + handled = true;
|
| + return true;
|
| + }
|
| + return false;
|
| + } else if (h->hasLocalName(colgroupTag) || h->hasLocalName(captionTag)) {
|
| + if (isTableRelated(current)) {
|
| + while (blockStack && isTablePart(current))
|
| + popOneBlock();
|
| + return insertNode(n);
|
| + }
|
| + }
|
| + } else if (n->isCommentNode() && !head)
|
| + return false;
|
| +
|
| + // 2. Next we examine our currently active element to do some further error handling.
|
| + if (current->isHTMLElement()) {
|
| + HTMLElement* h = static_cast<HTMLElement*>(current);
|
| + const AtomicString& currentTagName = h->localName();
|
| + if (h->hasLocalName(htmlTag)) {
|
| + HTMLElement* elt = n->isHTMLElement() ? static_cast<HTMLElement*>(n) : 0;
|
| + if (elt && (elt->hasLocalName(scriptTag) || elt->hasLocalName(styleTag) ||
|
| + elt->hasLocalName(metaTag) || elt->hasLocalName(linkTag) ||
|
| + elt->hasLocalName(objectTag) || elt->hasLocalName(embedTag) ||
|
| + elt->hasLocalName(titleTag) || elt->hasLocalName(isindexTag) ||
|
| + elt->hasLocalName(baseTag))) {
|
| + if (!head) {
|
| + head = new HTMLHeadElement(headTag, document);
|
| + e = head;
|
| + insertNode(e);
|
| + handled = true;
|
| + }
|
| + } else {
|
| + if (n->isTextNode()) {
|
| + Text* t = static_cast<Text*>(n);
|
| + if (t->containsOnlyWhitespace())
|
| + return false;
|
| + }
|
| + if (!haveFrameSet) {
|
| + e = new HTMLBodyElement(bodyTag, document);
|
| + startBody();
|
| + insertNode(e);
|
| + handled = true;
|
| + } else
|
| + reportError(MisplacedFramesetContentError, &localName);
|
| + }
|
| + } else if (h->hasLocalName(headTag)) {
|
| + if (n->hasTagName(htmlTag))
|
| + return false;
|
| + else {
|
| + // This means the body starts here...
|
| + if (!haveFrameSet) {
|
| + popBlock(currentTagName);
|
| + e = new HTMLBodyElement(bodyTag, document);
|
| + startBody();
|
| + insertNode(e);
|
| + handled = true;
|
| + } else
|
| + reportError(MisplacedFramesetContentError, &localName);
|
| + }
|
| + } else if (h->hasLocalName(addressTag) || h->hasLocalName(fontTag)
|
| + || h->hasLocalName(styleTag) || h->hasLocalName(titleTag)) {
|
| + reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| + popBlock(currentTagName);
|
| + handled = true;
|
| + } else if (h->hasLocalName(captionTag)) {
|
| + // Illegal content in a caption. Close the caption and try again.
|
| + reportError(MisplacedCaptionContentError, &localName);
|
| + popBlock(currentTagName);
|
| + if (isTablePart(n))
|
| + return insertNode(n, flat);
|
| + } else if (h->hasLocalName(tableTag) || h->hasLocalName(trTag) || isTableSection(h)) {
|
| + if (n->hasTagName(tableTag)) {
|
| + reportError(MisplacedTableError, ¤tTagName);
|
| + if (m_isParsingFragment && !h->hasLocalName(tableTag))
|
| + // fragment may contain table parts without <table> ancestor, pop them one by one
|
| + popBlock(h->localName());
|
| + popBlock(localName); // end the table
|
| + handled = true; // ...and start a new one
|
| + } else {
|
| + ExceptionCode ec = 0;
|
| + Node* node = current;
|
| + Node* parent = node->parentNode();
|
| + // A script may have removed the current node's parent from the DOM
|
| + // http://bugs.webkit.org/show_bug.cgi?id=7137
|
| + // FIXME: we should do real recovery here and re-parent with the correct node.
|
| + if (!parent)
|
| + return false;
|
| + Node* grandparent = parent->parentNode();
|
| +
|
| + if (n->isTextNode() ||
|
| + (h->hasLocalName(trTag) &&
|
| + isTableSection(parent) && grandparent && grandparent->hasTagName(tableTag)) ||
|
| + ((!n->hasTagName(tdTag) && !n->hasTagName(thTag) &&
|
| + !n->hasTagName(formTag) && !n->hasTagName(scriptTag)) && isTableSection(node) &&
|
| + parent->hasTagName(tableTag))) {
|
| + node = (node->hasTagName(tableTag)) ? node :
|
| + ((node->hasTagName(trTag)) ? grandparent : parent);
|
| + // This can happen with fragments
|
| + if (!node)
|
| + return false;
|
| + Node* parent = node->parentNode();
|
| + if (!parent)
|
| + return false;
|
| + parent->insertBefore(n, node, ec);
|
| + if (!ec) {
|
| + reportError(StrayTableContentError, &localName, ¤tTagName);
|
| + if (n->isHTMLElement() && tagPriority > 0 &&
|
| + !flat && static_cast<HTMLElement*>(n)->endTagRequirement() != TagStatusForbidden)
|
| + {
|
| + pushBlock(localName, tagPriority);
|
| + n->beginParsingChildren();
|
| + setCurrent(n);
|
| + inStrayTableContent++;
|
| + blockStack->strayTableContent = true;
|
| + }
|
| + return true;
|
| + }
|
| + }
|
| +
|
| + if (!ec) {
|
| + if (current->hasTagName(trTag)) {
|
| + reportError(TablePartRequiredError, &localName, &tdTag.localName());
|
| + e = new HTMLTableCellElement(tdTag, document);
|
| + } else if (current->hasTagName(tableTag)) {
|
| + // Don't report an error in this case, since making a <tbody> happens all the time when you have <table><tr>,
|
| + // and it isn't really a parse error per se.
|
| + e = new HTMLTableSectionElement(tbodyTag, document);
|
| + } else {
|
| + reportError(TablePartRequiredError, &localName, &trTag.localName());
|
| + e = new HTMLTableRowElement(trTag, document);
|
| + }
|
| +
|
| + insertNode(e);
|
| + handled = true;
|
| + }
|
| + }
|
| + } else if (h->hasLocalName(objectTag)) {
|
| + reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| + popBlock(objectTag);
|
| + handled = true;
|
| + } else if (h->hasLocalName(pTag) || isHeaderTag(currentTagName)) {
|
| + if (!isInline(n)) {
|
| + popBlock(currentTagName);
|
| + handled = true;
|
| + }
|
| + } else if (h->hasLocalName(optionTag) || h->hasLocalName(optgroupTag)) {
|
| + if (localName == optgroupTag) {
|
| + popBlock(currentTagName);
|
| + handled = true;
|
| + } else if (localName == selectTag) {
|
| + // IE treats a nested select as </select>. Let's do the same
|
| + popBlock(localName);
|
| + }
|
| + } else if (h->hasLocalName(selectTag)) {
|
| + if (localName == inputTag || localName == textareaTag) {
|
| + reportError(MisplacedContentRetryError, &localName, ¤tTagName);
|
| + popBlock(currentTagName);
|
| + handled = true;
|
| + }
|
| + } else if (h->hasLocalName(colgroupTag)) {
|
| + popBlock(currentTagName);
|
| + handled = true;
|
| + } else if (!h->hasLocalName(bodyTag)) {
|
| + if (isInline(current)) {
|
| + popInlineBlocks();
|
| + handled = true;
|
| + }
|
| + }
|
| + } else if (current->isDocumentNode()) {
|
| + if (n->isTextNode()) {
|
| + Text* t = static_cast<Text*>(n);
|
| + if (t->containsOnlyWhitespace())
|
| + return false;
|
| + }
|
| +
|
| + if (!document->documentElement()) {
|
| + e = new HTMLHtmlElement(htmlTag, document);
|
| + insertNode(e);
|
| + handled = true;
|
| + }
|
| + }
|
| +
|
| + // 3. If we couldn't handle the error, just return false and attempt to error-correct again.
|
| + if (!handled) {
|
| + reportError(IgnoredContentError, &localName, ¤t->localName());
|
| + return false;
|
| + }
|
| + return insertNode(n);
|
| +}
|
| +
|
| +typedef bool (HTMLParser::*CreateErrorCheckFunc)(Token* t, RefPtr<Node>&);
|
| +typedef HashMap<AtomicStringImpl*, CreateErrorCheckFunc> FunctionMap;
|
| +
|
| +bool HTMLParser::textCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + result = new Text(document, t->text.get());
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::commentCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + result = new Comment(document, t->text.get());
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::headCreateErrorCheck(Token*, RefPtr<Node>& result)
|
| +{
|
| + if (!head || current->localName() == htmlTag) {
|
| + head = new HTMLHeadElement(headTag, document);
|
| + result = head;
|
| + } else
|
| + reportError(MisplacedHeadError);
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::bodyCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + // body no longer allowed if we have a frameset
|
| + if (haveFrameSet)
|
| + return false;
|
| + popBlock(headTag);
|
| + startBody();
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::framesetCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + popBlock(headTag);
|
| + if (inBody && !haveFrameSet && !haveContent) {
|
| + popBlock(bodyTag);
|
| + // ### actually for IE document.body returns the now hidden "body" element
|
| + // we can't implement that behaviour now because it could cause too many
|
| + // regressions and the headaches are not worth the work as long as there is
|
| + // no site actually relying on that detail (Dirk)
|
| + if (document->body())
|
| + document->body()->setAttribute(styleAttr, "display:none");
|
| + inBody = false;
|
| + }
|
| + if ((haveContent || haveFrameSet) && current->localName() == htmlTag)
|
| + return false;
|
| + haveFrameSet = true;
|
| + startBody();
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::formCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + // Only create a new form if we're not already inside one.
|
| + // This is consistent with other browsers' behavior.
|
| + if (!m_currentFormElement) {
|
| + m_currentFormElement = new HTMLFormElement(formTag, document);
|
| + result = m_currentFormElement;
|
| + pCloserCreateErrorCheck(t, result);
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::isindexCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + RefPtr<Node> n = handleIsindex(t);
|
| + if (!inBody)
|
| + m_isindexElement = n.release();
|
| + else {
|
| + t->selfClosingTag = true;
|
| + result = n.release();
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::selectCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::ddCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + pCloserCreateErrorCheck(t, result);
|
| + popBlock(dtTag);
|
| + popBlock(ddTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::dtCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + pCloserCreateErrorCheck(t, result);
|
| + popBlock(ddTag);
|
| + popBlock(dtTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::nestedCreateErrorCheck(Token* t, RefPtr<Node>&)
|
| +{
|
| + popBlock(t->tagName);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::nestedPCloserCreateErrorCheck(Token* t, RefPtr<Node>& result)
|
| +{
|
| + pCloserCreateErrorCheck(t, result);
|
| + popBlock(t->tagName);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::nestedStyleCreateErrorCheck(Token* t, RefPtr<Node>&)
|
| +{
|
| + return allowNestedRedundantTag(t->tagName);
|
| +}
|
| +
|
| +bool HTMLParser::tableCellCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + popBlock(tdTag);
|
| + popBlock(thTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::tableSectionCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + popBlock(theadTag);
|
| + popBlock(tbodyTag);
|
| + popBlock(tfootTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::noembedCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + setSkipMode(noembedTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::noframesCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + setSkipMode(noframesTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::noscriptCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + if (!m_isParsingFragment) {
|
| + Settings* settings = document->settings();
|
| + if (settings && settings->isJavaScriptEnabled())
|
| + setSkipMode(noscriptTag);
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::pCloserCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + if (hasPElementInScope())
|
| + popBlock(pTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::pCloserStrictCreateErrorCheck(Token*, RefPtr<Node>&)
|
| +{
|
| + if (document->inCompatMode())
|
| + return true;
|
| + if (hasPElementInScope())
|
| + popBlock(pTag);
|
| + return true;
|
| +}
|
| +
|
| +bool HTMLParser::mapCreateErrorCheck(Token*, RefPtr<Node>& result)
|
| +{
|
| + m_currentMapElement = new HTMLMapElement(mapTag, document);
|
| + result = m_currentMapElement;
|
| + return false;
|
| +}
|
| +
|
| +PassRefPtr<Node> HTMLParser::getNode(Token* t)
|
| +{
|
| + // Init our error handling table.
|
| + DEFINE_STATIC_LOCAL(FunctionMap, gFunctionMap, ());
|
| + if (gFunctionMap.isEmpty()) {
|
| + gFunctionMap.set(aTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| + gFunctionMap.set(addressTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(bTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(bigTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(blockquoteTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(bodyTag.localName().impl(), &HTMLParser::bodyCreateErrorCheck);
|
| + gFunctionMap.set(buttonTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| + gFunctionMap.set(centerTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(commentAtom.impl(), &HTMLParser::commentCreateErrorCheck);
|
| + gFunctionMap.set(ddTag.localName().impl(), &HTMLParser::ddCreateErrorCheck);
|
| + gFunctionMap.set(dirTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(divTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(dlTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(dtTag.localName().impl(), &HTMLParser::dtCreateErrorCheck);
|
| + gFunctionMap.set(formTag.localName().impl(), &HTMLParser::formCreateErrorCheck);
|
| + gFunctionMap.set(fieldsetTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(framesetTag.localName().impl(), &HTMLParser::framesetCreateErrorCheck);
|
| + gFunctionMap.set(h1Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(h2Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(h3Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(h4Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(h5Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(h6Tag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(headTag.localName().impl(), &HTMLParser::headCreateErrorCheck);
|
| + gFunctionMap.set(hrTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(iTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(isindexTag.localName().impl(), &HTMLParser::isindexCreateErrorCheck);
|
| + gFunctionMap.set(liTag.localName().impl(), &HTMLParser::nestedPCloserCreateErrorCheck);
|
| + gFunctionMap.set(listingTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(mapTag.localName().impl(), &HTMLParser::mapCreateErrorCheck);
|
| + gFunctionMap.set(menuTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(nobrTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| + gFunctionMap.set(noembedTag.localName().impl(), &HTMLParser::noembedCreateErrorCheck);
|
| + gFunctionMap.set(noframesTag.localName().impl(), &HTMLParser::noframesCreateErrorCheck);
|
| + gFunctionMap.set(noscriptTag.localName().impl(), &HTMLParser::noscriptCreateErrorCheck);
|
| + gFunctionMap.set(olTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(pTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(plaintextTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(preTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + gFunctionMap.set(sTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(selectTag.localName().impl(), &HTMLParser::selectCreateErrorCheck);
|
| + gFunctionMap.set(smallTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(strikeTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(tableTag.localName().impl(), &HTMLParser::pCloserStrictCreateErrorCheck);
|
| + gFunctionMap.set(tbodyTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| + gFunctionMap.set(tdTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
|
| + gFunctionMap.set(textAtom.impl(), &HTMLParser::textCreateErrorCheck);
|
| + gFunctionMap.set(tfootTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| + gFunctionMap.set(thTag.localName().impl(), &HTMLParser::tableCellCreateErrorCheck);
|
| + gFunctionMap.set(theadTag.localName().impl(), &HTMLParser::tableSectionCreateErrorCheck);
|
| + gFunctionMap.set(trTag.localName().impl(), &HTMLParser::nestedCreateErrorCheck);
|
| + gFunctionMap.set(ttTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(uTag.localName().impl(), &HTMLParser::nestedStyleCreateErrorCheck);
|
| + gFunctionMap.set(ulTag.localName().impl(), &HTMLParser::pCloserCreateErrorCheck);
|
| + }
|
| +
|
| + bool proceed = true;
|
| + RefPtr<Node> result;
|
| + if (CreateErrorCheckFunc errorCheckFunc = gFunctionMap.get(t->tagName.impl()))
|
| + proceed = (this->*errorCheckFunc)(t, result);
|
| + if (proceed)
|
| + result = HTMLElementFactory::createHTMLElement(QualifiedName(nullAtom, t->tagName, xhtmlNamespaceURI), document, m_currentFormElement.get());
|
| + return result.release();
|
| +}
|
| +
|
| +bool HTMLParser::allowNestedRedundantTag(const AtomicString& tagName)
|
| +{
|
| + // www.liceo.edu.mx is an example of a site that achieves a level of nesting of
|
| + // about 1500 tags, all from a bunch of <b>s. We will only allow at most 20
|
| + // nested tags of the same type before just ignoring them all together.
|
| + unsigned i = 0;
|
| + for (HTMLStackElem* curr = blockStack;
|
| + i < cMaxRedundantTagDepth && curr && curr->tagName == tagName;
|
| + curr = curr->next, i++) { }
|
| + return i != cMaxRedundantTagDepth;
|
| +}
|
| +
|
| +void HTMLParser::processCloseTag(Token* t)
|
| +{
|
| + // Support for really broken html.
|
| + // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
|
| + // let's rely on the end() call to close things.
|
| + if (t->tagName == htmlTag || t->tagName == bodyTag || t->tagName == commentAtom)
|
| + return;
|
| +
|
| + bool checkForCloseTagErrors = true;
|
| + if (t->tagName == formTag && m_currentFormElement) {
|
| + m_currentFormElement = 0;
|
| + checkForCloseTagErrors = false;
|
| + } else if (t->tagName == mapTag)
|
| + m_currentMapElement = 0;
|
| + else if (t->tagName == pTag)
|
| + checkForCloseTagErrors = false;
|
| +
|
| + HTMLStackElem* oldElem = blockStack;
|
| + popBlock(t->tagName, checkForCloseTagErrors);
|
| + if (oldElem == blockStack && t->tagName == pTag) {
|
| + // We encountered a stray </p>. Amazingly Gecko, WinIE, and MacIE all treat
|
| + // this as a valid break, i.e., <p></p>. So go ahead and make the empty
|
| + // paragraph.
|
| + t->beginTag = true;
|
| + parseToken(t);
|
| + popBlock(t->tagName);
|
| + reportError(StrayParagraphCloseError);
|
| + }
|
| +}
|
| +
|
| +bool HTMLParser::isHeaderTag(const AtomicString& tagName)
|
| +{
|
| + DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, headerTags, ());
|
| + if (headerTags.isEmpty()) {
|
| + headerTags.add(h1Tag.localName().impl());
|
| + headerTags.add(h2Tag.localName().impl());
|
| + headerTags.add(h3Tag.localName().impl());
|
| + headerTags.add(h4Tag.localName().impl());
|
| + headerTags.add(h5Tag.localName().impl());
|
| + headerTags.add(h6Tag.localName().impl());
|
| + }
|
| +
|
| + return headerTags.contains(tagName.impl());
|
| +}
|
| +
|
| +bool HTMLParser::isInline(Node* node) const
|
| +{
|
| + if (node->isTextNode())
|
| + return true;
|
| +
|
| + if (node->isHTMLElement()) {
|
| + HTMLElement* e = static_cast<HTMLElement*>(node);
|
| + if (e->hasLocalName(aTag) || e->hasLocalName(fontTag) || e->hasLocalName(ttTag) ||
|
| + e->hasLocalName(uTag) || e->hasLocalName(bTag) || e->hasLocalName(iTag) ||
|
| + e->hasLocalName(sTag) || e->hasLocalName(strikeTag) || e->hasLocalName(bigTag) ||
|
| + e->hasLocalName(smallTag) || e->hasLocalName(emTag) || e->hasLocalName(strongTag) ||
|
| + e->hasLocalName(dfnTag) || e->hasLocalName(codeTag) || e->hasLocalName(sampTag) ||
|
| + e->hasLocalName(kbdTag) || e->hasLocalName(varTag) || e->hasLocalName(citeTag) ||
|
| + e->hasLocalName(abbrTag) || e->hasLocalName(acronymTag) || e->hasLocalName(subTag) ||
|
| + e->hasLocalName(supTag) || e->hasLocalName(spanTag) || e->hasLocalName(nobrTag) ||
|
| + e->hasLocalName(noframesTag) || e->hasLocalName(nolayerTag) ||
|
| + e->hasLocalName(noembedTag))
|
| + return true;
|
| + if (e->hasLocalName(noscriptTag) && !m_isParsingFragment) {
|
| + Settings* settings = document->settings();
|
| + if (settings && settings->isJavaScriptEnabled())
|
| + return true;
|
| + }
|
| + }
|
| +
|
| + return false;
|
| +}
|
| +
|
| +bool HTMLParser::isResidualStyleTag(const AtomicString& tagName)
|
| +{
|
| + DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, residualStyleTags, ());
|
| + if (residualStyleTags.isEmpty()) {
|
| + residualStyleTags.add(aTag.localName().impl());
|
| + residualStyleTags.add(fontTag.localName().impl());
|
| + residualStyleTags.add(ttTag.localName().impl());
|
| + residualStyleTags.add(uTag.localName().impl());
|
| + residualStyleTags.add(bTag.localName().impl());
|
| + residualStyleTags.add(iTag.localName().impl());
|
| + residualStyleTags.add(sTag.localName().impl());
|
| + residualStyleTags.add(strikeTag.localName().impl());
|
| + residualStyleTags.add(bigTag.localName().impl());
|
| + residualStyleTags.add(smallTag.localName().impl());
|
| + residualStyleTags.add(emTag.localName().impl());
|
| + residualStyleTags.add(strongTag.localName().impl());
|
| + residualStyleTags.add(dfnTag.localName().impl());
|
| + residualStyleTags.add(codeTag.localName().impl());
|
| + residualStyleTags.add(sampTag.localName().impl());
|
| + residualStyleTags.add(kbdTag.localName().impl());
|
| + residualStyleTags.add(varTag.localName().impl());
|
| + residualStyleTags.add(nobrTag.localName().impl());
|
| + }
|
| +
|
| + return residualStyleTags.contains(tagName.impl());
|
| +}
|
| +
|
| +bool HTMLParser::isAffectedByResidualStyle(const AtomicString& tagName)
|
| +{
|
| + DEFINE_STATIC_LOCAL(HashSet<AtomicStringImpl*>, unaffectedTags, ());
|
| + if (unaffectedTags.isEmpty()) {
|
| + unaffectedTags.add(bodyTag.localName().impl());
|
| + unaffectedTags.add(tableTag.localName().impl());
|
| + unaffectedTags.add(theadTag.localName().impl());
|
| + unaffectedTags.add(tbodyTag.localName().impl());
|
| + unaffectedTags.add(tfootTag.localName().impl());
|
| + unaffectedTags.add(trTag.localName().impl());
|
| + unaffectedTags.add(thTag.localName().impl());
|
| + unaffectedTags.add(tdTag.localName().impl());
|
| + unaffectedTags.add(captionTag.localName().impl());
|
| + unaffectedTags.add(colgroupTag.localName().impl());
|
| + unaffectedTags.add(colTag.localName().impl());
|
| + unaffectedTags.add(optionTag.localName().impl());
|
| + unaffectedTags.add(optgroupTag.localName().impl());
|
| + unaffectedTags.add(selectTag.localName().impl());
|
| + unaffectedTags.add(objectTag.localName().impl());
|
| + }
|
| +
|
| + return !unaffectedTags.contains(tagName.impl());
|
| +}
|
| +
|
| +void HTMLParser::handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem)
|
| +{
|
| + HTMLStackElem* maxElem = 0;
|
| + bool finished = false;
|
| + bool strayTableContent = elem->strayTableContent;
|
| +
|
| + m_handlingResidualStyleAcrossBlocks = true;
|
| + while (!finished) {
|
| + // Find the outermost element that crosses over to a higher level. If there exists another higher-level
|
| + // element, we will do another pass, until we have corrected the innermost one.
|
| + ExceptionCode ec = 0;
|
| + HTMLStackElem* curr = blockStack;
|
| + HTMLStackElem* prev = 0;
|
| + HTMLStackElem* prevMaxElem = 0;
|
| + maxElem = 0;
|
| + finished = true;
|
| + while (curr && curr != elem) {
|
| + if (curr->level > elem->level) {
|
| + if (!isAffectedByResidualStyle(curr->tagName))
|
| + return;
|
| + if (maxElem)
|
| + // We will need another pass.
|
| + finished = false;
|
| + maxElem = curr;
|
| + prevMaxElem = prev;
|
| + }
|
| +
|
| + prev = curr;
|
| + curr = curr->next;
|
| + }
|
| +
|
| + if (!curr || !maxElem)
|
| + return;
|
| +
|
| + Node* residualElem = prev->node;
|
| + Node* blockElem = prevMaxElem ? prevMaxElem->node : current;
|
| + Node* parentElem = elem->node;
|
| +
|
| + // Check to see if the reparenting that is going to occur is allowed according to the DOM.
|
| + // FIXME: We should either always allow it or perform an additional fixup instead of
|
| + // just bailing here.
|
| + // Example: <p><font><center>blah</font></center></p> isn't doing a fixup right now.
|
| + if (!parentElem->childAllowed(blockElem))
|
| + return;
|
| +
|
| + m_hasPElementInScope = Unknown;
|
| +
|
| + if (maxElem->node->parentNode() != elem->node) {
|
| + // Walk the stack and remove any elements that aren't residual style tags. These
|
| + // are basically just being closed up. Example:
|
| + // <font><span>Moo<p>Goo</font></p>.
|
| + // In the above example, the <span> doesn't need to be reopened. It can just close.
|
| + HTMLStackElem* currElem = maxElem->next;
|
| + HTMLStackElem* prevElem = maxElem;
|
| + while (currElem != elem) {
|
| + HTMLStackElem* nextElem = currElem->next;
|
| + if (!isResidualStyleTag(currElem->tagName)) {
|
| + prevElem->next = nextElem;
|
| + prevElem->derefNode();
|
| + prevElem->node = currElem->node;
|
| + prevElem->didRefNode = currElem->didRefNode;
|
| + delete currElem;
|
| + }
|
| + else
|
| + prevElem = currElem;
|
| + currElem = nextElem;
|
| + }
|
| +
|
| + // We have to reopen residual tags in between maxElem and elem. An example of this case is:
|
| + // <font><i>Moo<p>Foo</font>.
|
| + // In this case, we need to transform the part before the <p> into:
|
| + // <font><i>Moo</i></font><i>
|
| + // so that the <i> will remain open. This involves the modification of elements
|
| + // in the block stack.
|
| + // This will also affect how we ultimately reparent the block, since we want it to end up
|
| + // under the reopened residual tags (e.g., the <i> in the above example.)
|
| + RefPtr<Node> prevNode = 0;
|
| + currElem = maxElem;
|
| + while (currElem->node != residualElem) {
|
| + if (isResidualStyleTag(currElem->node->localName())) {
|
| + // Create a clone of this element.
|
| + // We call releaseRef to get a raw pointer since we plan to hand over ownership to currElem.
|
| + Node* currNode = currElem->node->cloneNode(false).releaseRef();
|
| + reportError(ResidualStyleError, &currNode->localName());
|
| +
|
| + // Change the stack element's node to point to the clone.
|
| + // The stack element adopts the reference we obtained above by calling release().
|
| + currElem->derefNode();
|
| + currElem->node = currNode;
|
| + currElem->didRefNode = true;
|
| +
|
| + // Attach the previous node as a child of this new node.
|
| + if (prevNode)
|
| + currNode->appendChild(prevNode, ec);
|
| + else // The new parent for the block element is going to be the innermost clone.
|
| + parentElem = currNode; // FIXME: We shifted parentElem to be a residual inline. We never checked to see if blockElem could be legally placed inside the inline though.
|
| +
|
| + prevNode = currNode;
|
| + }
|
| +
|
| + currElem = currElem->next;
|
| + }
|
| +
|
| + // Now append the chain of new residual style elements if one exists.
|
| + if (prevNode)
|
| + elem->node->appendChild(prevNode, ec); // FIXME: This append can result in weird stuff happening, like an inline chain being put into a table section.
|
| + }
|
| +
|
| + // Check if the block is still in the tree. If it isn't, then we don't
|
| + // want to remove it from its parent (that would crash) or insert it into
|
| + // a new parent later. See http://bugs.webkit.org/show_bug.cgi?id=6778
|
| + bool isBlockStillInTree = blockElem->parentNode();
|
| +
|
| + // We need to make a clone of |residualElem| and place it just inside |blockElem|.
|
| + // All content of |blockElem| is reparented to be under this clone. We then
|
| + // reparent |blockElem| using real DOM calls so that attachment/detachment will
|
| + // be performed to fix up the rendering tree.
|
| + // So for this example: <b>...<p>Foo</b>Goo</p>
|
| + // The end result will be: <b>...</b><p><b>Foo</b>Goo</p>
|
| + //
|
| + // Step 1: Remove |blockElem| from its parent, doing a batch detach of all the kids.
|
| + if (isBlockStillInTree)
|
| + blockElem->parentNode()->removeChild(blockElem, ec);
|
| +
|
| + Node* newNodePtr = 0;
|
| + if (blockElem->firstChild()) {
|
| + // Step 2: Clone |residualElem|.
|
| + RefPtr<Node> newNode = residualElem->cloneNode(false); // Shallow clone. We don't pick up the same kids.
|
| + newNodePtr = newNode.get();
|
| + reportError(ResidualStyleError, &newNode->localName());
|
| +
|
| + // Step 3: Place |blockElem|'s children under |newNode|. Remove all of the children of |blockElem|
|
| + // before we've put |newElem| into the document. That way we'll only do one attachment of all
|
| + // the new content (instead of a bunch of individual attachments).
|
| + Node* currNode = blockElem->firstChild();
|
| + while (currNode) {
|
| + Node* nextNode = currNode->nextSibling();
|
| + newNode->appendChild(currNode, ec);
|
| + currNode = nextNode;
|
| + }
|
| +
|
| + // Step 4: Place |newNode| under |blockElem|. |blockElem| is still out of the document, so no
|
| + // attachment can occur yet.
|
| + blockElem->appendChild(newNode.release(), ec);
|
| + } else
|
| + finished = true;
|
| +
|
| + // Step 5: Reparent |blockElem|. Now the full attachment of the fixed up tree takes place.
|
| + if (isBlockStillInTree)
|
| + parentElem->appendChild(blockElem, ec);
|
| +
|
| + // Step 6: Pull |elem| out of the stack, since it is no longer enclosing us. Also update
|
| + // the node associated with the previous stack element so that when it gets popped,
|
| + // it doesn't make the residual element the next current node.
|
| + HTMLStackElem* currElem = maxElem;
|
| + HTMLStackElem* prevElem = 0;
|
| + while (currElem != elem) {
|
| + prevElem = currElem;
|
| + currElem = currElem->next;
|
| + }
|
| + prevElem->next = elem->next;
|
| + prevElem->derefNode();
|
| + prevElem->node = elem->node;
|
| + prevElem->didRefNode = elem->didRefNode;
|
| + if (!finished) {
|
| + // Repurpose |elem| to represent |newNode| and insert it at the appropriate position
|
| + // in the stack. We do not do this for the innermost block, because in that case the new
|
| + // node is effectively no longer open.
|
| + elem->next = maxElem;
|
| + elem->node = prevMaxElem->node;
|
| + elem->didRefNode = prevMaxElem->didRefNode;
|
| + elem->strayTableContent = false;
|
| + prevMaxElem->next = elem;
|
| + ASSERT(newNodePtr);
|
| + prevMaxElem->node = newNodePtr;
|
| + prevMaxElem->didRefNode = false;
|
| + } else
|
| + delete elem;
|
| + }
|
| +
|
| + // FIXME: If we ever make a case like this work:
|
| + // <table><b><i><form></b></form></i></table>
|
| + // Then this check will be too simplistic. Right now the <i><form> chain will end up inside the <tbody>, which is pretty crazy.
|
| + if (strayTableContent)
|
| + inStrayTableContent--;
|
| +
|
| + // Step 7: Reopen intermediate inlines, e.g., <b><p><i>Foo</b>Goo</p>.
|
| + // In the above example, Goo should stay italic.
|
| + // We cap the number of tags we're willing to reopen based off cResidualStyleMaxDepth.
|
| +
|
| + HTMLStackElem* curr = blockStack;
|
| + HTMLStackElem* residualStyleStack = 0;
|
| + unsigned stackDepth = 1;
|
| + unsigned redundantStyleCount = 0;
|
| + while (curr && curr != maxElem) {
|
| + // We will actually schedule this tag for reopening
|
| + // after we complete the close of this entire block.
|
| + if (isResidualStyleTag(curr->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
|
| + // We've overloaded the use of stack elements and are just reusing the
|
| + // struct with a slightly different meaning to the variables. Instead of chaining
|
| + // from innermost to outermost, we build up a list of all the tags we need to reopen
|
| + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
|
| + // to the outermost tag we need to reopen.
|
| + // We also set curr->node to be the actual element that corresponds to the ID stored in
|
| + // curr->id rather than the node that you should pop to when the element gets pulled off
|
| + // the stack.
|
| + if (residualStyleStack && curr->tagName == residualStyleStack->tagName && curr->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
|
| + redundantStyleCount++;
|
| + else
|
| + redundantStyleCount = 0;
|
| +
|
| + if (redundantStyleCount < cMaxRedundantTagDepth)
|
| + moveOneBlockToStack(residualStyleStack);
|
| + else
|
| + popOneBlock();
|
| + } else
|
| + popOneBlock();
|
| +
|
| + curr = blockStack;
|
| + }
|
| +
|
| + reopenResidualStyleTags(residualStyleStack, 0); // Stray table content can't be an issue here, since some element above will always become the root of new stray table content.
|
| +
|
| + m_handlingResidualStyleAcrossBlocks = false;
|
| +}
|
| +
|
| +void HTMLParser::reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent)
|
| +{
|
| + // Loop for each tag that needs to be reopened.
|
| + while (elem) {
|
| + // Create a shallow clone of the DOM node for this element.
|
| + RefPtr<Node> newNode = elem->node->cloneNode(false);
|
| + reportError(ResidualStyleError, &newNode->localName());
|
| +
|
| + // Append the new node. In the malformed table case, we need to insert before the table,
|
| + // which will be the last child.
|
| + ExceptionCode ec = 0;
|
| + if (malformedTableParent)
|
| + malformedTableParent->insertBefore(newNode, malformedTableParent->lastChild(), ec);
|
| + else
|
| + current->appendChild(newNode, ec);
|
| + // FIXME: Is it really OK to ignore the exceptions here?
|
| +
|
| + // Now push a new stack element for this node we just created.
|
| + pushBlock(elem->tagName, elem->level);
|
| + newNode->beginParsingChildren();
|
| +
|
| + // Set our strayTableContent boolean if needed, so that the reopened tag also knows
|
| + // that it is inside a malformed table.
|
| + blockStack->strayTableContent = malformedTableParent != 0;
|
| + if (blockStack->strayTableContent)
|
| + inStrayTableContent++;
|
| +
|
| + // Clear our malformed table parent variable.
|
| + malformedTableParent = 0;
|
| +
|
| + // Update |current| manually to point to the new node.
|
| + setCurrent(newNode.get());
|
| +
|
| + // Advance to the next tag that needs to be reopened.
|
| + HTMLStackElem* next = elem->next;
|
| + elem->derefNode();
|
| + delete elem;
|
| + elem = next;
|
| + }
|
| +}
|
| +
|
| +void HTMLParser::pushBlock(const AtomicString& tagName, int level)
|
| +{
|
| + blockStack = new HTMLStackElem(tagName, level, current, didRefCurrent, blockStack);
|
| + didRefCurrent = false;
|
| + if (tagName == pTag)
|
| + m_hasPElementInScope = InScope;
|
| + else if (isScopingTag(tagName))
|
| + m_hasPElementInScope = NotInScope;
|
| +}
|
| +
|
| +void HTMLParser::popBlock(const AtomicString& tagName, bool reportErrors)
|
| +{
|
| + HTMLStackElem* elem = blockStack;
|
| +
|
| + int maxLevel = 0;
|
| +
|
| + while (elem && (elem->tagName != tagName)) {
|
| + if (maxLevel < elem->level)
|
| + maxLevel = elem->level;
|
| + elem = elem->next;
|
| + }
|
| +
|
| + if (!elem) {
|
| + if (reportErrors)
|
| + reportError(StrayCloseTagError, &tagName, 0, true);
|
| + return;
|
| + }
|
| +
|
| + if (maxLevel > elem->level) {
|
| + // We didn't match because the tag is in a different scope, e.g.,
|
| + // <b><p>Foo</b>. Try to correct the problem.
|
| + if (!isResidualStyleTag(tagName))
|
| + return;
|
| + return handleResidualStyleCloseTagAcrossBlocks(elem);
|
| + }
|
| +
|
| + bool isAffectedByStyle = isAffectedByResidualStyle(elem->tagName);
|
| + HTMLStackElem* residualStyleStack = 0;
|
| + Node* malformedTableParent = 0;
|
| +
|
| + elem = blockStack;
|
| + unsigned stackDepth = 1;
|
| + unsigned redundantStyleCount = 0;
|
| + while (elem) {
|
| + if (elem->tagName == tagName) {
|
| + int strayTable = inStrayTableContent;
|
| + popOneBlock();
|
| + elem = 0;
|
| +
|
| + // This element was the root of some malformed content just inside an implicit or
|
| + // explicit <tbody> or <tr>.
|
| + // If we end up needing to reopen residual style tags, the root of the reopened chain
|
| + // must also know that it is the root of malformed content inside a <tbody>/<tr>.
|
| + if (strayTable && (inStrayTableContent < strayTable) && residualStyleStack) {
|
| + Node* curr = current;
|
| + while (curr && !curr->hasTagName(tableTag))
|
| + curr = curr->parentNode();
|
| + malformedTableParent = curr ? curr->parentNode() : 0;
|
| + }
|
| + }
|
| + else {
|
| + if (m_currentFormElement && elem->tagName == formTag)
|
| + // A <form> is being closed prematurely (and this is
|
| + // malformed HTML). Set an attribute on the form to clear out its
|
| + // bottom margin.
|
| + m_currentFormElement->setMalformed(true);
|
| +
|
| + // Schedule this tag for reopening
|
| + // after we complete the close of this entire block.
|
| + if (isAffectedByStyle && isResidualStyleTag(elem->tagName) && stackDepth++ < cResidualStyleMaxDepth) {
|
| + // We've overloaded the use of stack elements and are just reusing the
|
| + // struct with a slightly different meaning to the variables. Instead of chaining
|
| + // from innermost to outermost, we build up a list of all the tags we need to reopen
|
| + // from the outermost to the innermost, i.e., residualStyleStack will end up pointing
|
| + // to the outermost tag we need to reopen.
|
| + // We also set elem->node to be the actual element that corresponds to the ID stored in
|
| + // elem->id rather than the node that you should pop to when the element gets pulled off
|
| + // the stack.
|
| + if (residualStyleStack && elem->tagName == residualStyleStack->tagName && elem->node->attributes()->mapsEquivalent(residualStyleStack->node->attributes()))
|
| + redundantStyleCount++;
|
| + else
|
| + redundantStyleCount = 0;
|
| +
|
| + if (redundantStyleCount < cMaxRedundantTagDepth)
|
| + moveOneBlockToStack(residualStyleStack);
|
| + else
|
| + popOneBlock();
|
| + } else
|
| + popOneBlock();
|
| + elem = blockStack;
|
| + }
|
| + }
|
| +
|
| + reopenResidualStyleTags(residualStyleStack, malformedTableParent);
|
| +}
|
| +
|
| +inline HTMLStackElem* HTMLParser::popOneBlockCommon()
|
| +{
|
| + HTMLStackElem* elem = blockStack;
|
| +
|
| + // Form elements restore their state during the parsing process.
|
| + // Also, a few elements (<applet>, <object>) need to know when all child elements (<param>s) are available.
|
| + if (current && elem->node != current)
|
| + current->finishParsingChildren();
|
| +
|
| + blockStack = elem->next;
|
| + current = elem->node;
|
| + didRefCurrent = elem->didRefNode;
|
| +
|
| + if (elem->strayTableContent)
|
| + inStrayTableContent--;
|
| +
|
| + if (elem->tagName == pTag)
|
| + m_hasPElementInScope = NotInScope;
|
| + else if (isScopingTag(elem->tagName))
|
| + m_hasPElementInScope = Unknown;
|
| +
|
| + return elem;
|
| +}
|
| +
|
| +void HTMLParser::popOneBlock()
|
| +{
|
| + // Store the current node before popOneBlockCommon overwrites it.
|
| + Node* lastCurrent = current;
|
| + bool didRefLastCurrent = didRefCurrent;
|
| +
|
| + delete popOneBlockCommon();
|
| +
|
| + if (didRefLastCurrent)
|
| + lastCurrent->deref();
|
| +}
|
| +
|
| +void HTMLParser::moveOneBlockToStack(HTMLStackElem*& head)
|
| +{
|
| + // We'll be using the stack element we're popping, but for the current node.
|
| + // See the two callers for details.
|
| +
|
| + // Store the current node before popOneBlockCommon overwrites it.
|
| + Node* lastCurrent = current;
|
| + bool didRefLastCurrent = didRefCurrent;
|
| +
|
| + // Pop the block, but don't deref the current node as popOneBlock does because
|
| + // we'll be using the pointer in the new stack element.
|
| + HTMLStackElem* elem = popOneBlockCommon();
|
| +
|
| + // Transfer the current node into the stack element.
|
| + // No need to deref the old elem->node because popOneBlockCommon transferred
|
| + // it into the current/didRefCurrent fields.
|
| + elem->node = lastCurrent;
|
| + elem->didRefNode = didRefLastCurrent;
|
| + elem->next = head;
|
| + head = elem;
|
| +}
|
| +
|
| +void HTMLParser::checkIfHasPElementInScope()
|
| +{
|
| + m_hasPElementInScope = NotInScope;
|
| + HTMLStackElem* elem = blockStack;
|
| + while (elem) {
|
| + const AtomicString& tagName = elem->tagName;
|
| + if (tagName == pTag) {
|
| + m_hasPElementInScope = InScope;
|
| + return;
|
| + } else if (isScopingTag(tagName))
|
| + return;
|
| + elem = elem->next;
|
| + }
|
| +}
|
| +
|
| +void HTMLParser::popInlineBlocks()
|
| +{
|
| + while (blockStack && isInline(current))
|
| + popOneBlock();
|
| +}
|
| +
|
| +void HTMLParser::freeBlock()
|
| +{
|
| + while (blockStack)
|
| + popOneBlock();
|
| +}
|
| +
|
| +void HTMLParser::createHead()
|
| +{
|
| + if (head || !document->documentElement())
|
| + return;
|
| +
|
| + head = new HTMLHeadElement(headTag, document);
|
| + HTMLElement* body = document->body();
|
| + ExceptionCode ec = 0;
|
| + document->documentElement()->insertBefore(head, body, ec);
|
| + if (ec)
|
| + head = 0;
|
| +
|
| + // If the body does not exist yet, then the <head> should be pushed as the current block.
|
| + if (head && !body) {
|
| + pushBlock(head->localName(), head->tagPriority());
|
| + setCurrent(head);
|
| + }
|
| +}
|
| +
|
| +PassRefPtr<Node> HTMLParser::handleIsindex(Token* t)
|
| +{
|
| + RefPtr<Node> n = new HTMLDivElement(divTag, document);
|
| +
|
| + NamedMappedAttrMap* attrs = t->attrs.get();
|
| +
|
| + RefPtr<HTMLIsIndexElement> isIndex = new HTMLIsIndexElement(isindexTag, document, m_currentFormElement.get());
|
| + isIndex->setAttributeMap(attrs);
|
| + isIndex->setAttribute(typeAttr, "khtml_isindex");
|
| +
|
| + String text = searchableIndexIntroduction();
|
| + if (attrs) {
|
| + if (Attribute* a = attrs->getAttributeItem(promptAttr))
|
| + text = a->value().string() + " ";
|
| + t->attrs = 0;
|
| + }
|
| +
|
| + n->addChild(new HTMLHRElement(hrTag, document));
|
| + n->addChild(new Text(document, text));
|
| + n->addChild(isIndex.release());
|
| + n->addChild(new HTMLHRElement(hrTag, document));
|
| +
|
| + return n.release();
|
| +}
|
| +
|
| +void HTMLParser::startBody()
|
| +{
|
| + if (inBody)
|
| + return;
|
| +
|
| + inBody = true;
|
| +
|
| + if (m_isindexElement) {
|
| + insertNode(m_isindexElement.get(), true /* don't descend into this node */);
|
| + m_isindexElement = 0;
|
| + }
|
| +}
|
| +
|
| +void HTMLParser::finished()
|
| +{
|
| + // In the case of a completely empty document, here's the place to create the HTML element.
|
| + if (current && current->isDocumentNode() && !document->documentElement())
|
| + insertNode(new HTMLHtmlElement(htmlTag, document));
|
| +
|
| + // This ensures that "current" is not left pointing to a node when the document is destroyed.
|
| + freeBlock();
|
| + setCurrent(0);
|
| +
|
| + // Warning, this may delete the tokenizer and parser, so don't try to do anything else after this.
|
| + if (!m_isParsingFragment)
|
| + document->finishedParsing();
|
| +}
|
| +
|
| +void HTMLParser::reportErrorToConsole(HTMLParserErrorCode errorCode, const AtomicString* tagName1, const AtomicString* tagName2, bool closeTags)
|
| +{
|
| + Frame* frame = document->frame();
|
| + if (!frame)
|
| + return;
|
| +
|
| + HTMLTokenizer* htmlTokenizer = static_cast<HTMLTokenizer*>(document->tokenizer());
|
| + int lineNumber = htmlTokenizer->lineNumber() + 1;
|
| +
|
| + AtomicString tag1;
|
| + AtomicString tag2;
|
| + if (tagName1) {
|
| + if (*tagName1 == "#text")
|
| + tag1 = "Text";
|
| + else if (*tagName1 == "#comment")
|
| + tag1 = "<!-- comment -->";
|
| + else
|
| + tag1 = (closeTags ? "</" : "<") + *tagName1 + ">";
|
| + }
|
| + if (tagName2) {
|
| + if (*tagName2 == "#text")
|
| + tag2 = "Text";
|
| + else if (*tagName2 == "#comment")
|
| + tag2 = "<!-- comment -->";
|
| + else
|
| + tag2 = (closeTags ? "</" : "<") + *tagName2 + ">";
|
| + }
|
| +
|
| + const char* errorMsg = htmlParserErrorMessageTemplate(errorCode);
|
| + if (!errorMsg)
|
| + return;
|
| +
|
| + String message;
|
| + if (htmlTokenizer->processingContentWrittenByScript())
|
| + message += htmlParserDocumentWriteMessage();
|
| + message += errorMsg;
|
| + message.replace("%tag1", tag1);
|
| + message.replace("%tag2", tag2);
|
| +
|
| + frame->domWindow()->console()->addMessage(HTMLMessageSource,
|
| + isWarning(errorCode) ? WarningMessageLevel : ErrorMessageLevel,
|
| + message, lineNumber, document->url().string());
|
| +}
|
| +
|
| +}
|
| +
|
| +
|
|
|