| Index: third_party/WebKit/Source/core/page/PageSerializer.cpp
|
| diff --git a/third_party/WebKit/Source/core/page/PageSerializer.cpp b/third_party/WebKit/Source/core/page/PageSerializer.cpp
|
| deleted file mode 100644
|
| index a3fe2fea67075906a0059065ed74c24180d8f5a8..0000000000000000000000000000000000000000
|
| --- a/third_party/WebKit/Source/core/page/PageSerializer.cpp
|
| +++ /dev/null
|
| @@ -1,489 +0,0 @@
|
| -/*
|
| - * Copyright (C) 2011 Google Inc. All rights reserved.
|
| - *
|
| - * Redistribution and use in source and binary forms, with or without
|
| - * modification, are permitted provided that the following conditions are
|
| - * met:
|
| - *
|
| - * * Redistributions of source code must retain the above copyright
|
| - * notice, this list of conditions and the following disclaimer.
|
| - * * Redistributions in binary form must reproduce the above
|
| - * copyright notice, this list of conditions and the following disclaimer
|
| - * in the documentation and/or other materials provided with the
|
| - * distribution.
|
| - * * Neither the name of Google Inc. nor the names of its
|
| - * contributors may be used to endorse or promote products derived from
|
| - * this software without specific prior written permission.
|
| - *
|
| - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| - */
|
| -
|
| -#include "core/page/PageSerializer.h"
|
| -
|
| -#include "core/HTMLNames.h"
|
| -#include "core/InputTypeNames.h"
|
| -#include "core/css/CSSFontFaceRule.h"
|
| -#include "core/css/CSSFontFaceSrcValue.h"
|
| -#include "core/css/CSSImageValue.h"
|
| -#include "core/css/CSSImportRule.h"
|
| -#include "core/css/CSSRuleList.h"
|
| -#include "core/css/CSSStyleDeclaration.h"
|
| -#include "core/css/CSSStyleRule.h"
|
| -#include "core/css/CSSValueList.h"
|
| -#include "core/css/StylePropertySet.h"
|
| -#include "core/css/StyleRule.h"
|
| -#include "core/css/StyleSheetContents.h"
|
| -#include "core/dom/Document.h"
|
| -#include "core/dom/Element.h"
|
| -#include "core/dom/Text.h"
|
| -#include "core/editing/serializers/MarkupAccumulator.h"
|
| -#include "core/fetch/FontResource.h"
|
| -#include "core/fetch/ImageResource.h"
|
| -#include "core/frame/LocalFrame.h"
|
| -#include "core/html/HTMLFrameElementBase.h"
|
| -#include "core/html/HTMLImageElement.h"
|
| -#include "core/html/HTMLInputElement.h"
|
| -#include "core/html/HTMLLinkElement.h"
|
| -#include "core/html/HTMLMetaElement.h"
|
| -#include "core/html/HTMLStyleElement.h"
|
| -#include "core/html/ImageDocument.h"
|
| -#include "core/page/Page.h"
|
| -#include "core/style/StyleFetchedImage.h"
|
| -#include "core/style/StyleImage.h"
|
| -#include "platform/SerializedResource.h"
|
| -#include "platform/graphics/Image.h"
|
| -#include "platform/heap/Handle.h"
|
| -#include "wtf/HashSet.h"
|
| -#include "wtf/OwnPtr.h"
|
| -#include "wtf/text/CString.h"
|
| -#include "wtf/text/StringBuilder.h"
|
| -#include "wtf/text/TextEncoding.h"
|
| -#include "wtf/text/WTFString.h"
|
| -
|
| -namespace blink {
|
| -
|
| -static bool shouldIgnoreElement(const Element& element)
|
| -{
|
| - if (isHTMLScriptElement(element))
|
| - return true;
|
| - if (isHTMLNoScriptElement(element))
|
| - return true;
|
| - return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncoding().isValid();
|
| -}
|
| -
|
| -class SerializerMarkupAccumulator : public MarkupAccumulator {
|
| - STACK_ALLOCATED();
|
| -public:
|
| - SerializerMarkupAccumulator(PageSerializer::Delegate&, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&);
|
| - ~SerializerMarkupAccumulator() override;
|
| -
|
| -protected:
|
| - void appendText(StringBuilder& out, Text&) override;
|
| - bool shouldIgnoreAttribute(const Attribute&) override;
|
| - void appendElement(StringBuilder& out, Element&, Namespaces*) override;
|
| - void appendAttribute(StringBuilder& out, const Element&, const Attribute&, Namespaces*) override;
|
| - void appendStartTag(Node&, Namespaces* = nullptr) override;
|
| - void appendEndTag(const Element&) override;
|
| -
|
| -private:
|
| - void appendAttributeValue(StringBuilder& out, const String& attributeValue);
|
| - void appendRewrittenAttribute(
|
| - StringBuilder& out,
|
| - const Element&,
|
| - const String& attributeName,
|
| - const String& attributeValue);
|
| -
|
| - PageSerializer::Delegate& m_delegate;
|
| - RawPtrWillBeMember<const Document> m_document;
|
| -
|
| - // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
|
| - // included into serialized text then extracts image, object, etc. The size
|
| - // of this vector isn't small for large document. It is better to use
|
| - // callback like functionality.
|
| - WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
|
| -
|
| - // Elements with links rewritten via appendAttribute method.
|
| - WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewrittenLinks;
|
| -};
|
| -
|
| -SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer::Delegate& delegate, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes)
|
| - : MarkupAccumulator(ResolveAllURLs)
|
| - , m_delegate(delegate)
|
| - , m_document(&document)
|
| - , m_nodes(nodes)
|
| -{
|
| -}
|
| -
|
| -SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
|
| -{
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text)
|
| -{
|
| - Element* parent = text.parentElement();
|
| - if (parent && !shouldIgnoreElement(*parent))
|
| - MarkupAccumulator::appendText(result, text);
|
| -}
|
| -
|
| -bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribute)
|
| -{
|
| - return m_delegate.shouldIgnoreAttribute(attribute);
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
|
| -{
|
| - if (!shouldIgnoreElement(element))
|
| - MarkupAccumulator::appendElement(result, element, namespaces);
|
| -
|
| - // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML
|
| - if (isHTMLHeadElement(element)) {
|
| - result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\"");
|
| - appendAttributeValue(result, m_document->suggestedMIMEType());
|
| - result.appendLiteral("; charset=");
|
| - appendAttributeValue(result, m_document->characterSet());
|
| - if (m_document->isXHTMLDocument())
|
| - result.appendLiteral("\" />");
|
| - else
|
| - result.appendLiteral("\">");
|
| - }
|
| -
|
| - // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendAttribute(
|
| - StringBuilder& out,
|
| - const Element& element,
|
| - const Attribute& attribute,
|
| - Namespaces* namespaces)
|
| -{
|
| - // Check if link rewriting can affect the attribute.
|
| - bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name());
|
| - bool isSrcDocAttribute = isHTMLFrameElementBase(element)
|
| - && attribute.name() == HTMLNames::srcdocAttr;
|
| - if (isLinkAttribute || isSrcDocAttribute) {
|
| - // Check if the delegate wants to do link rewriting for the element.
|
| - String newLinkForTheElement;
|
| - if (m_delegate.rewriteLink(element, newLinkForTheElement)) {
|
| - if (isLinkAttribute) {
|
| - // Rewrite element links.
|
| - appendRewrittenAttribute(
|
| - out, element, attribute.name().toString(), newLinkForTheElement);
|
| - } else {
|
| - ASSERT(isSrcDocAttribute);
|
| - // Emit src instead of srcdoc attribute for frame elements - we want the
|
| - // serialized subframe to use html contents from the link provided by
|
| - // Delegate::rewriteLink rather than html contents from srcdoc
|
| - // attribute.
|
| - appendRewrittenAttribute(
|
| - out, element, HTMLNames::srcAttr.localName(), newLinkForTheElement);
|
| - }
|
| - return;
|
| - }
|
| - }
|
| -
|
| - // Fallback to appending the original attribute.
|
| - MarkupAccumulator::appendAttribute(out, element, attribute, namespaces);
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces)
|
| -{
|
| - MarkupAccumulator::appendStartTag(node, namespaces);
|
| - m_nodes.append(&node);
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendEndTag(const Element& element)
|
| -{
|
| - if (!shouldIgnoreElement(element))
|
| - MarkupAccumulator::appendEndTag(element);
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendAttributeValue(
|
| - StringBuilder& out,
|
| - const String& attributeValue)
|
| -{
|
| - MarkupFormatter::appendAttributeValue(out, attributeValue, m_document->isHTMLDocument());
|
| -}
|
| -
|
| -void SerializerMarkupAccumulator::appendRewrittenAttribute(
|
| - StringBuilder& out,
|
| - const Element& element,
|
| - const String& attributeName,
|
| - const String& attributeValue)
|
| -{
|
| - if (m_elementsWithRewrittenLinks.contains(&element))
|
| - return;
|
| - m_elementsWithRewrittenLinks.add(&element);
|
| -
|
| - // Append the rewritten attribute.
|
| - // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this.
|
| - out.append(' ');
|
| - out.append(attributeName);
|
| - out.appendLiteral("=\"");
|
| - appendAttributeValue(out, attributeValue);
|
| - out.appendLiteral("\"");
|
| -}
|
| -
|
| -// TODO(tiger): Right now there is no support for rewriting URLs inside CSS
|
| -// documents which leads to bugs like <https://crbug.com/251898>. Not being
|
| -// able to rewrite URLs inside CSS documents means that resources imported from
|
| -// url(...) statements in CSS might not work when rewriting links for the
|
| -// "Webpage, Complete" method of saving a page. It will take some work but it
|
| -// needs to be done if we want to continue to support non-MHTML saved pages.
|
| -
|
| -PageSerializer::PageSerializer(
|
| - Vector<SerializedResource>& resources,
|
| - Delegate& delegate)
|
| - : m_resources(&resources)
|
| - , m_delegate(delegate)
|
| -{
|
| -}
|
| -
|
| -void PageSerializer::serializeFrame(const LocalFrame& frame)
|
| -{
|
| - ASSERT(frame.document());
|
| - Document& document = *frame.document();
|
| - KURL url = document.url();
|
| -
|
| - // If frame is an image document, add the image and don't continue
|
| - if (document.isImageDocument()) {
|
| - ImageDocument& imageDocument = toImageDocument(document);
|
| - addImageToResources(imageDocument.cachedImage(), url);
|
| - return;
|
| - }
|
| -
|
| - WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
|
| - SerializerMarkupAccumulator accumulator(m_delegate, document, serializedNodes);
|
| - String text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode);
|
| -
|
| - CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencodables);
|
| - m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
|
| -
|
| - for (Node* node: serializedNodes) {
|
| - ASSERT(node);
|
| - if (!node->isElementNode())
|
| - continue;
|
| -
|
| - Element& element = toElement(*node);
|
| - // We have to process in-line style as it might contain some resources (typically background images).
|
| - if (element.isStyledElement()) {
|
| - retrieveResourcesForProperties(element.inlineStyle(), document);
|
| - retrieveResourcesForProperties(element.presentationAttributeStyle(), document);
|
| - }
|
| -
|
| - if (isHTMLImageElement(element)) {
|
| - HTMLImageElement& imageElement = toHTMLImageElement(element);
|
| - KURL url = document.completeURL(imageElement.getAttribute(HTMLNames::srcAttr));
|
| - ImageResource* cachedImage = imageElement.cachedImage();
|
| - addImageToResources(cachedImage, url);
|
| - } else if (isHTMLInputElement(element)) {
|
| - HTMLInputElement& inputElement = toHTMLInputElement(element);
|
| - if (inputElement.type() == InputTypeNames::image && inputElement.imageLoader()) {
|
| - KURL url = inputElement.src();
|
| - ImageResource* cachedImage = inputElement.imageLoader()->image();
|
| - addImageToResources(cachedImage, url);
|
| - }
|
| - } else if (isHTMLLinkElement(element)) {
|
| - HTMLLinkElement& linkElement = toHTMLLinkElement(element);
|
| - if (CSSStyleSheet* sheet = linkElement.sheet()) {
|
| - KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
|
| - serializeCSSStyleSheet(*sheet, url);
|
| - }
|
| - } else if (isHTMLStyleElement(element)) {
|
| - HTMLStyleElement& styleElement = toHTMLStyleElement(element);
|
| - if (CSSStyleSheet* sheet = styleElement.sheet())
|
| - serializeCSSStyleSheet(*sheet, KURL());
|
| - }
|
| - }
|
| -}
|
| -
|
| -void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KURL& url)
|
| -{
|
| - StringBuilder cssText;
|
| - cssText.appendLiteral("@charset \"");
|
| - cssText.append(styleSheet.contents()->charset().lower());
|
| - cssText.appendLiteral("\";\n\n");
|
| -
|
| - for (unsigned i = 0; i < styleSheet.length(); ++i) {
|
| - CSSRule* rule = styleSheet.item(i);
|
| - String itemText = rule->cssText();
|
| - if (!itemText.isEmpty()) {
|
| - cssText.append(itemText);
|
| - if (i < styleSheet.length() - 1)
|
| - cssText.appendLiteral("\n\n");
|
| - }
|
| -
|
| - // Some rules have resources associated with them that we need to retrieve.
|
| - serializeCSSRule(rule);
|
| - }
|
| -
|
| - if (shouldAddURL(url)) {
|
| - WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
|
| - ASSERT(textEncoding.isValid());
|
| - String textString = cssText.toString();
|
| - CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodables);
|
| - m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
|
| - m_resourceURLs.add(url);
|
| - }
|
| -}
|
| -
|
| -void PageSerializer::serializeCSSRule(CSSRule* rule)
|
| -{
|
| - ASSERT(rule->parentStyleSheet()->ownerDocument());
|
| - Document& document = *rule->parentStyleSheet()->ownerDocument();
|
| -
|
| - switch (rule->type()) {
|
| - case CSSRule::STYLE_RULE:
|
| - retrieveResourcesForProperties(&toCSSStyleRule(rule)->styleRule()->properties(), document);
|
| - break;
|
| -
|
| - case CSSRule::IMPORT_RULE: {
|
| - CSSImportRule* importRule = toCSSImportRule(rule);
|
| - KURL sheetBaseURL = rule->parentStyleSheet()->baseURL();
|
| - ASSERT(sheetBaseURL.isValid());
|
| - KURL importURL = KURL(sheetBaseURL, importRule->href());
|
| - if (m_resourceURLs.contains(importURL))
|
| - break;
|
| - if (importRule->styleSheet())
|
| - serializeCSSStyleSheet(*importRule->styleSheet(), importURL);
|
| - break;
|
| - }
|
| -
|
| - // Rules inheriting CSSGroupingRule
|
| - case CSSRule::MEDIA_RULE:
|
| - case CSSRule::SUPPORTS_RULE: {
|
| - CSSRuleList* ruleList = rule->cssRules();
|
| - for (unsigned i = 0; i < ruleList->length(); ++i)
|
| - serializeCSSRule(ruleList->item(i));
|
| - break;
|
| - }
|
| -
|
| - case CSSRule::FONT_FACE_RULE:
|
| - retrieveResourcesForProperties(&toCSSFontFaceRule(rule)->styleRule()->properties(), document);
|
| - break;
|
| -
|
| - // Rules in which no external resources can be referenced
|
| - case CSSRule::CHARSET_RULE:
|
| - case CSSRule::PAGE_RULE:
|
| - case CSSRule::KEYFRAMES_RULE:
|
| - case CSSRule::KEYFRAME_RULE:
|
| - case CSSRule::VIEWPORT_RULE:
|
| - break;
|
| -
|
| - default:
|
| - ASSERT_NOT_REACHED();
|
| - }
|
| -}
|
| -
|
| -bool PageSerializer::shouldAddURL(const KURL& url)
|
| -{
|
| - return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData()
|
| - && !m_delegate.shouldSkipResource(url);
|
| -}
|
| -
|
| -void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
|
| -{
|
| - if (!data) {
|
| - WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
|
| - return;
|
| - }
|
| -
|
| - String mimeType = resource->response().mimeType();
|
| - m_resources->append(SerializedResource(url, mimeType, data));
|
| - m_resourceURLs.add(url);
|
| -}
|
| -
|
| -void PageSerializer::addImageToResources(ImageResource* image, const KURL& url)
|
| -{
|
| - if (!shouldAddURL(url))
|
| - return;
|
| -
|
| - if (!image || !image->hasImage() || image->errorOccurred())
|
| - return;
|
| -
|
| - RefPtr<SharedBuffer> data = image->image()->data();
|
| - addToResources(image, data, url);
|
| -}
|
| -
|
| -void PageSerializer::addFontToResources(FontResource* font)
|
| -{
|
| - if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer())
|
| - return;
|
| -
|
| - RefPtr<SharedBuffer> data(font->resourceBuffer());
|
| -
|
| - addToResources(font, data, font->url());
|
| -}
|
| -
|
| -void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document& document)
|
| -{
|
| - if (!styleDeclaration)
|
| - return;
|
| -
|
| - // The background-image and list-style-image (for ul or ol) are the CSS properties
|
| - // that make use of images. We iterate to make sure we include any other
|
| - // image properties there might be.
|
| - unsigned propertyCount = styleDeclaration->propertyCount();
|
| - for (unsigned i = 0; i < propertyCount; ++i) {
|
| - RefPtrWillBeRawPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
|
| - retrieveResourcesForCSSValue(cssValue.get(), document);
|
| - }
|
| -}
|
| -
|
| -void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& document)
|
| -{
|
| - if (cssValue->isImageValue()) {
|
| - CSSImageValue* imageValue = toCSSImageValue(cssValue);
|
| - if (imageValue->isCachePending())
|
| - return;
|
| - StyleImage* styleImage = imageValue->cachedImage();
|
| - if (!styleImage || !styleImage->isImageResource())
|
| - return;
|
| -
|
| - addImageToResources(styleImage->cachedImage(), styleImage->cachedImage()->url());
|
| - } else if (cssValue->isFontFaceSrcValue()) {
|
| - CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
|
| - if (fontFaceSrcValue->isLocal()) {
|
| - return;
|
| - }
|
| -
|
| - addFontToResources(fontFaceSrcValue->fetch(&document));
|
| - } else if (cssValue->isValueList()) {
|
| - CSSValueList* cssValueList = toCSSValueList(cssValue);
|
| - for (unsigned i = 0; i < cssValueList->length(); i++)
|
| - retrieveResourcesForCSSValue(cssValueList->item(i), document);
|
| - }
|
| -}
|
| -
|
| -// Returns MOTW (Mark of the Web) declaration before html tag which is in
|
| -// HTML comment, e.g. "<!-- saved from url=(%04d)%s -->"
|
| -// See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
|
| -String PageSerializer::markOfTheWebDeclaration(const KURL& url)
|
| -{
|
| - StringBuilder builder;
|
| - bool emitsMinus = false;
|
| - CString orignalUrl = url.string().ascii();
|
| - for (const char* string = orignalUrl.data(); *string; ++string) {
|
| - const char ch = *string;
|
| - if (ch == '-' && emitsMinus) {
|
| - builder.append("%2D");
|
| - emitsMinus = false;
|
| - continue;
|
| - }
|
| - emitsMinus = ch == '-';
|
| - builder.append(ch);
|
| - }
|
| - CString escapedUrl = builder.toString().ascii();
|
| - return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl.length()), escapedUrl.data());
|
| -}
|
| -
|
| -} // namespace blink
|
|
|