Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(705)

Unified Diff: third_party/WebKit/Source/core/page/PageSerializer.cpp

Issue 1541463002: Rename [Web]PageSerializer[Test|Client|Impl] to ...FrameSerializer... (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-deduplication-of-resources
Patch Set: Rebasing... Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/page/PageSerializer.cpp
diff --git a/third_party/WebKit/Source/core/page/PageSerializer.cpp b/third_party/WebKit/Source/core/page/PageSerializer.cpp
deleted file mode 100644
index a3fe2fea67075906a0059065ed74c24180d8f5a8..0000000000000000000000000000000000000000
--- a/third_party/WebKit/Source/core/page/PageSerializer.cpp
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Copyright (C) 2011 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "core/page/PageSerializer.h"
-
-#include "core/HTMLNames.h"
-#include "core/InputTypeNames.h"
-#include "core/css/CSSFontFaceRule.h"
-#include "core/css/CSSFontFaceSrcValue.h"
-#include "core/css/CSSImageValue.h"
-#include "core/css/CSSImportRule.h"
-#include "core/css/CSSRuleList.h"
-#include "core/css/CSSStyleDeclaration.h"
-#include "core/css/CSSStyleRule.h"
-#include "core/css/CSSValueList.h"
-#include "core/css/StylePropertySet.h"
-#include "core/css/StyleRule.h"
-#include "core/css/StyleSheetContents.h"
-#include "core/dom/Document.h"
-#include "core/dom/Element.h"
-#include "core/dom/Text.h"
-#include "core/editing/serializers/MarkupAccumulator.h"
-#include "core/fetch/FontResource.h"
-#include "core/fetch/ImageResource.h"
-#include "core/frame/LocalFrame.h"
-#include "core/html/HTMLFrameElementBase.h"
-#include "core/html/HTMLImageElement.h"
-#include "core/html/HTMLInputElement.h"
-#include "core/html/HTMLLinkElement.h"
-#include "core/html/HTMLMetaElement.h"
-#include "core/html/HTMLStyleElement.h"
-#include "core/html/ImageDocument.h"
-#include "core/page/Page.h"
-#include "core/style/StyleFetchedImage.h"
-#include "core/style/StyleImage.h"
-#include "platform/SerializedResource.h"
-#include "platform/graphics/Image.h"
-#include "platform/heap/Handle.h"
-#include "wtf/HashSet.h"
-#include "wtf/OwnPtr.h"
-#include "wtf/text/CString.h"
-#include "wtf/text/StringBuilder.h"
-#include "wtf/text/TextEncoding.h"
-#include "wtf/text/WTFString.h"
-
-namespace blink {
-
-static bool shouldIgnoreElement(const Element& element)
-{
- if (isHTMLScriptElement(element))
- return true;
- if (isHTMLNoScriptElement(element))
- return true;
- return isHTMLMetaElement(element) && toHTMLMetaElement(element).computeEncoding().isValid();
-}
-
-class SerializerMarkupAccumulator : public MarkupAccumulator {
- STACK_ALLOCATED();
-public:
- SerializerMarkupAccumulator(PageSerializer::Delegate&, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>&);
- ~SerializerMarkupAccumulator() override;
-
-protected:
- void appendText(StringBuilder& out, Text&) override;
- bool shouldIgnoreAttribute(const Attribute&) override;
- void appendElement(StringBuilder& out, Element&, Namespaces*) override;
- void appendAttribute(StringBuilder& out, const Element&, const Attribute&, Namespaces*) override;
- void appendStartTag(Node&, Namespaces* = nullptr) override;
- void appendEndTag(const Element&) override;
-
-private:
- void appendAttributeValue(StringBuilder& out, const String& attributeValue);
- void appendRewrittenAttribute(
- StringBuilder& out,
- const Element&,
- const String& attributeName,
- const String& attributeValue);
-
- PageSerializer::Delegate& m_delegate;
- RawPtrWillBeMember<const Document> m_document;
-
- // FIXME: |PageSerializer| uses |m_nodes| for collecting nodes in document
- // included into serialized text then extracts image, object, etc. The size
- // of this vector isn't small for large document. It is better to use
- // callback like functionality.
- WillBeHeapVector<RawPtrWillBeMember<Node>>& m_nodes;
-
- // Elements with links rewritten via appendAttribute method.
- WillBeHeapHashSet<RawPtrWillBeMember<const Element>> m_elementsWithRewrittenLinks;
-};
-
-SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer::Delegate& delegate, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>& nodes)
- : MarkupAccumulator(ResolveAllURLs)
- , m_delegate(delegate)
- , m_document(&document)
- , m_nodes(nodes)
-{
-}
-
-SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
-{
-}
-
-void SerializerMarkupAccumulator::appendText(StringBuilder& result, Text& text)
-{
- Element* parent = text.parentElement();
- if (parent && !shouldIgnoreElement(*parent))
- MarkupAccumulator::appendText(result, text);
-}
-
-bool SerializerMarkupAccumulator::shouldIgnoreAttribute(const Attribute& attribute)
-{
- return m_delegate.shouldIgnoreAttribute(attribute);
-}
-
-void SerializerMarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
-{
- if (!shouldIgnoreElement(element))
- MarkupAccumulator::appendElement(result, element, namespaces);
-
- // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an element like this, without special cases for XHTML
- if (isHTMLHeadElement(element)) {
- result.appendLiteral("<meta http-equiv=\"Content-Type\" content=\"");
- appendAttributeValue(result, m_document->suggestedMIMEType());
- result.appendLiteral("; charset=");
- appendAttributeValue(result, m_document->characterSet());
- if (m_document->isXHTMLDocument())
- result.appendLiteral("\" />");
- else
- result.appendLiteral("\">");
- }
-
- // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
-}
-
-void SerializerMarkupAccumulator::appendAttribute(
- StringBuilder& out,
- const Element& element,
- const Attribute& attribute,
- Namespaces* namespaces)
-{
- // Check if link rewriting can affect the attribute.
- bool isLinkAttribute = element.hasLegalLinkAttribute(attribute.name());
- bool isSrcDocAttribute = isHTMLFrameElementBase(element)
- && attribute.name() == HTMLNames::srcdocAttr;
- if (isLinkAttribute || isSrcDocAttribute) {
- // Check if the delegate wants to do link rewriting for the element.
- String newLinkForTheElement;
- if (m_delegate.rewriteLink(element, newLinkForTheElement)) {
- if (isLinkAttribute) {
- // Rewrite element links.
- appendRewrittenAttribute(
- out, element, attribute.name().toString(), newLinkForTheElement);
- } else {
- ASSERT(isSrcDocAttribute);
- // Emit src instead of srcdoc attribute for frame elements - we want the
- // serialized subframe to use html contents from the link provided by
- // Delegate::rewriteLink rather than html contents from srcdoc
- // attribute.
- appendRewrittenAttribute(
- out, element, HTMLNames::srcAttr.localName(), newLinkForTheElement);
- }
- return;
- }
- }
-
- // Fallback to appending the original attribute.
- MarkupAccumulator::appendAttribute(out, element, attribute, namespaces);
-}
-
-void SerializerMarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces)
-{
- MarkupAccumulator::appendStartTag(node, namespaces);
- m_nodes.append(&node);
-}
-
-void SerializerMarkupAccumulator::appendEndTag(const Element& element)
-{
- if (!shouldIgnoreElement(element))
- MarkupAccumulator::appendEndTag(element);
-}
-
-void SerializerMarkupAccumulator::appendAttributeValue(
- StringBuilder& out,
- const String& attributeValue)
-{
- MarkupFormatter::appendAttributeValue(out, attributeValue, m_document->isHTMLDocument());
-}
-
-void SerializerMarkupAccumulator::appendRewrittenAttribute(
- StringBuilder& out,
- const Element& element,
- const String& attributeName,
- const String& attributeValue)
-{
- if (m_elementsWithRewrittenLinks.contains(&element))
- return;
- m_elementsWithRewrittenLinks.add(&element);
-
- // Append the rewritten attribute.
- // TODO(tiger): Refactor MarkupAccumulator so it is easier to append an attribute like this.
- out.append(' ');
- out.append(attributeName);
- out.appendLiteral("=\"");
- appendAttributeValue(out, attributeValue);
- out.appendLiteral("\"");
-}
-
-// TODO(tiger): Right now there is no support for rewriting URLs inside CSS
-// documents which leads to bugs like <https://crbug.com/251898>. Not being
-// able to rewrite URLs inside CSS documents means that resources imported from
-// url(...) statements in CSS might not work when rewriting links for the
-// "Webpage, Complete" method of saving a page. It will take some work but it
-// needs to be done if we want to continue to support non-MHTML saved pages.
-
-PageSerializer::PageSerializer(
- Vector<SerializedResource>& resources,
- Delegate& delegate)
- : m_resources(&resources)
- , m_delegate(delegate)
-{
-}
-
-void PageSerializer::serializeFrame(const LocalFrame& frame)
-{
- ASSERT(frame.document());
- Document& document = *frame.document();
- KURL url = document.url();
-
- // If frame is an image document, add the image and don't continue
- if (document.isImageDocument()) {
- ImageDocument& imageDocument = toImageDocument(document);
- addImageToResources(imageDocument.cachedImage(), url);
- return;
- }
-
- WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
- SerializerMarkupAccumulator accumulator(m_delegate, document, serializedNodes);
- String text = serializeNodes<EditingStrategy>(accumulator, document, IncludeNode);
-
- CString frameHTML = document.encoding().encode(text, WTF::EntitiesForUnencodables);
- m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
-
- for (Node* node: serializedNodes) {
- ASSERT(node);
- if (!node->isElementNode())
- continue;
-
- Element& element = toElement(*node);
- // We have to process in-line style as it might contain some resources (typically background images).
- if (element.isStyledElement()) {
- retrieveResourcesForProperties(element.inlineStyle(), document);
- retrieveResourcesForProperties(element.presentationAttributeStyle(), document);
- }
-
- if (isHTMLImageElement(element)) {
- HTMLImageElement& imageElement = toHTMLImageElement(element);
- KURL url = document.completeURL(imageElement.getAttribute(HTMLNames::srcAttr));
- ImageResource* cachedImage = imageElement.cachedImage();
- addImageToResources(cachedImage, url);
- } else if (isHTMLInputElement(element)) {
- HTMLInputElement& inputElement = toHTMLInputElement(element);
- if (inputElement.type() == InputTypeNames::image && inputElement.imageLoader()) {
- KURL url = inputElement.src();
- ImageResource* cachedImage = inputElement.imageLoader()->image();
- addImageToResources(cachedImage, url);
- }
- } else if (isHTMLLinkElement(element)) {
- HTMLLinkElement& linkElement = toHTMLLinkElement(element);
- if (CSSStyleSheet* sheet = linkElement.sheet()) {
- KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
- serializeCSSStyleSheet(*sheet, url);
- }
- } else if (isHTMLStyleElement(element)) {
- HTMLStyleElement& styleElement = toHTMLStyleElement(element);
- if (CSSStyleSheet* sheet = styleElement.sheet())
- serializeCSSStyleSheet(*sheet, KURL());
- }
- }
-}
-
-void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KURL& url)
-{
- StringBuilder cssText;
- cssText.appendLiteral("@charset \"");
- cssText.append(styleSheet.contents()->charset().lower());
- cssText.appendLiteral("\";\n\n");
-
- for (unsigned i = 0; i < styleSheet.length(); ++i) {
- CSSRule* rule = styleSheet.item(i);
- String itemText = rule->cssText();
- if (!itemText.isEmpty()) {
- cssText.append(itemText);
- if (i < styleSheet.length() - 1)
- cssText.appendLiteral("\n\n");
- }
-
- // Some rules have resources associated with them that we need to retrieve.
- serializeCSSRule(rule);
- }
-
- if (shouldAddURL(url)) {
- WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
- ASSERT(textEncoding.isValid());
- String textString = cssText.toString();
- CString text = textEncoding.encode(textString, WTF::EntitiesForUnencodables);
- m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
- m_resourceURLs.add(url);
- }
-}
-
-void PageSerializer::serializeCSSRule(CSSRule* rule)
-{
- ASSERT(rule->parentStyleSheet()->ownerDocument());
- Document& document = *rule->parentStyleSheet()->ownerDocument();
-
- switch (rule->type()) {
- case CSSRule::STYLE_RULE:
- retrieveResourcesForProperties(&toCSSStyleRule(rule)->styleRule()->properties(), document);
- break;
-
- case CSSRule::IMPORT_RULE: {
- CSSImportRule* importRule = toCSSImportRule(rule);
- KURL sheetBaseURL = rule->parentStyleSheet()->baseURL();
- ASSERT(sheetBaseURL.isValid());
- KURL importURL = KURL(sheetBaseURL, importRule->href());
- if (m_resourceURLs.contains(importURL))
- break;
- if (importRule->styleSheet())
- serializeCSSStyleSheet(*importRule->styleSheet(), importURL);
- break;
- }
-
- // Rules inheriting CSSGroupingRule
- case CSSRule::MEDIA_RULE:
- case CSSRule::SUPPORTS_RULE: {
- CSSRuleList* ruleList = rule->cssRules();
- for (unsigned i = 0; i < ruleList->length(); ++i)
- serializeCSSRule(ruleList->item(i));
- break;
- }
-
- case CSSRule::FONT_FACE_RULE:
- retrieveResourcesForProperties(&toCSSFontFaceRule(rule)->styleRule()->properties(), document);
- break;
-
- // Rules in which no external resources can be referenced
- case CSSRule::CHARSET_RULE:
- case CSSRule::PAGE_RULE:
- case CSSRule::KEYFRAMES_RULE:
- case CSSRule::KEYFRAME_RULE:
- case CSSRule::VIEWPORT_RULE:
- break;
-
- default:
- ASSERT_NOT_REACHED();
- }
-}
-
-bool PageSerializer::shouldAddURL(const KURL& url)
-{
- return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData()
- && !m_delegate.shouldSkipResource(url);
-}
-
-void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
-{
- if (!data) {
- WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
- return;
- }
-
- String mimeType = resource->response().mimeType();
- m_resources->append(SerializedResource(url, mimeType, data));
- m_resourceURLs.add(url);
-}
-
-void PageSerializer::addImageToResources(ImageResource* image, const KURL& url)
-{
- if (!shouldAddURL(url))
- return;
-
- if (!image || !image->hasImage() || image->errorOccurred())
- return;
-
- RefPtr<SharedBuffer> data = image->image()->data();
- addToResources(image, data, url);
-}
-
-void PageSerializer::addFontToResources(FontResource* font)
-{
- if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer())
- return;
-
- RefPtr<SharedBuffer> data(font->resourceBuffer());
-
- addToResources(font, data, font->url());
-}
-
-void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document& document)
-{
- if (!styleDeclaration)
- return;
-
- // The background-image and list-style-image (for ul or ol) are the CSS properties
- // that make use of images. We iterate to make sure we include any other
- // image properties there might be.
- unsigned propertyCount = styleDeclaration->propertyCount();
- for (unsigned i = 0; i < propertyCount; ++i) {
- RefPtrWillBeRawPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
- retrieveResourcesForCSSValue(cssValue.get(), document);
- }
-}
-
-void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& document)
-{
- if (cssValue->isImageValue()) {
- CSSImageValue* imageValue = toCSSImageValue(cssValue);
- if (imageValue->isCachePending())
- return;
- StyleImage* styleImage = imageValue->cachedImage();
- if (!styleImage || !styleImage->isImageResource())
- return;
-
- addImageToResources(styleImage->cachedImage(), styleImage->cachedImage()->url());
- } else if (cssValue->isFontFaceSrcValue()) {
- CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
- if (fontFaceSrcValue->isLocal()) {
- return;
- }
-
- addFontToResources(fontFaceSrcValue->fetch(&document));
- } else if (cssValue->isValueList()) {
- CSSValueList* cssValueList = toCSSValueList(cssValue);
- for (unsigned i = 0; i < cssValueList->length(); i++)
- retrieveResourcesForCSSValue(cssValueList->item(i), document);
- }
-}
-
-// Returns MOTW (Mark of the Web) declaration before html tag which is in
-// HTML comment, e.g. "<!-- saved from url=(%04d)%s -->"
-// See http://msdn2.microsoft.com/en-us/library/ms537628(VS.85).aspx.
-String PageSerializer::markOfTheWebDeclaration(const KURL& url)
-{
- StringBuilder builder;
- bool emitsMinus = false;
- CString orignalUrl = url.string().ascii();
- for (const char* string = orignalUrl.data(); *string; ++string) {
- const char ch = *string;
- if (ch == '-' && emitsMinus) {
- builder.append("%2D");
- emitsMinus = false;
- continue;
- }
- emitsMinus = ch == '-';
- builder.append(ch);
- }
- CString escapedUrl = builder.toString().ascii();
- return String::format("saved from url=(%04d)%s", static_cast<int>(escapedUrl.length()), escapedUrl.data());
-}
-
-} // namespace blink
« no previous file with comments | « third_party/WebKit/Source/core/page/PageSerializer.h ('k') | third_party/WebKit/Source/web/WebFrameSerializer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698