Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1491)

Unified Diff: Source/web/WebPageSerializer.cpp

Issue 1177733003: Merge page serializers [12/12] (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Rebase Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/web/WebPageSerializer.cpp
diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp
index a436d085b65119c05d0b70200d7a7500eec31a49..0d8b4a24351e9e18726f05c38c79e78b60c80681 100644
--- a/Source/web/WebPageSerializer.cpp
+++ b/Source/web/WebPageSerializer.cpp
@@ -29,14 +29,13 @@
*/
#include "config.h"
+
#include "public/web/WebPageSerializer.h"
-#include "core/HTMLNames.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
-#include "core/frame/LocalFrame.h"
+#include "core/frame/Frame.h"
#include "core/html/HTMLAllCollection.h"
-#include "core/html/HTMLFrameElementBase.h"
#include "core/html/HTMLFrameOwnerElement.h"
#include "core/html/HTMLInputElement.h"
#include "core/html/HTMLTableElement.h"
@@ -50,11 +49,8 @@
#include "public/platform/WebString.h"
#include "public/platform/WebURL.h"
#include "public/platform/WebVector.h"
-#include "public/web/WebFrame.h"
+#include "public/web/WebLocalFrame.h"
#include "public/web/WebPageSerializerClient.h"
-#include "public/web/WebView.h"
-#include "web/WebLocalFrameImpl.h"
-#include "web/WebPageSerializerImpl.h"
#include "web/WebViewImpl.h"
#include "wtf/Vector.h"
#include "wtf/text/StringConcatenate.h"
@@ -63,99 +59,12 @@ namespace blink {
namespace {
-KURL getSubResourceURLFromElement(Element* element)
-{
- ASSERT(element);
- const QualifiedName& attributeName = element->subResourceAttributeName();
- if (attributeName == QualifiedName::null())
- return KURL();
-
- String value = element->getAttribute(attributeName);
- // Ignore javascript content.
- if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", TextCaseInsensitive))
- return KURL();
-
- return element->document().completeURL(value);
-}
-
-void retrieveResourcesForElement(Element* element,
- Vector<LocalFrame*>* visitedFrames,
- Vector<LocalFrame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- ASSERT(element);
- // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
- if (isHTMLFrameElementBase(*element) || isHTMLObjectElement(*element) || isHTMLEmbedElement(*element)) {
- Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame();
- if (frame && frame->isLocalFrame()) {
- if (!visitedFrames->contains(toLocalFrame(frame)))
- framesToVisit->append(toLocalFrame(frame));
- return;
- }
- }
-
- KURL url = getSubResourceURLFromElement(element);
- if (url.isEmpty() || !url.isValid())
- return; // No subresource for this node.
-
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol
- // does no have a cache mechanism, we skip it as well.
- if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
- return;
-
- if (!resourceURLs->contains(url))
- resourceURLs->append(url);
-}
-
-void retrieveResourcesForFrame(LocalFrame* frame,
- const WebVector<WebCString>& supportedSchemes,
- Vector<LocalFrame*>* visitedFrames,
- Vector<LocalFrame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- KURL frameURL = frame->loader().documentLoader()->request().url();
-
- // If the frame's URL is invalid, ignore it, it is not retrievable.
- if (!frameURL.isValid())
- return;
-
- // Ignore frames from unsupported schemes.
- bool isValidScheme = false;
- for (size_t i = 0; i < supportedSchemes.size(); ++i) {
- if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
- isValidScheme = true;
- break;
- }
- }
- if (!isValidScheme)
- return;
-
- // If we have already seen that frame, ignore it.
- if (visitedFrames->contains(frame))
- return;
- visitedFrames->append(frame);
- if (!frameURLs->contains(frameURL))
- frameURLs->append(frameURL);
-
- // Now get the resources associated with each node of the document.
- RefPtrWillBeRawPtr<HTMLAllCollection> allElements = frame->document()->all();
- for (unsigned i = 0; i < allElements->length(); ++i) {
- Element* element = allElements->item(i);
- retrieveResourcesForElement(element,
- visitedFrames, framesToVisit,
- frameURLs, resourceURLs);
- }
-}
-
class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
public:
~MHTMLPageSerializerDelegate() override;
bool shouldIgnoreAttribute(const Attribute&) override;
};
-
MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate()
{
}
@@ -212,57 +121,28 @@ WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
return WebCString(mhtml->data(), mhtml->size());
}
-bool WebPageSerializer::serialize(WebLocalFrame* frame,
- bool recursive,
- WebPageSerializerClient* client,
- const WebVector<WebURL>& links,
- const WebVector<WebString>& localPaths,
- const WebString& localDirectoryName)
+bool WebPageSerializer::serialize(WebLocalFrame* frame, bool recursive, WebPageSerializerClient* client,
+ const WebVector<WebURL>& links, const WebVector<WebString>& localPaths, const WebString& localDirectoryName)
{
- WebPageSerializerImpl serializerImpl(
- frame, recursive, client, links, localPaths, localDirectoryName);
- return serializerImpl.serialize();
-}
-
-bool WebPageSerializer::retrieveAllResources(WebView* view,
- const WebVector<WebCString>& supportedSchemes,
- WebVector<WebURL>* resourceURLs,
- WebVector<WebURL>* frameURLs) {
- WebLocalFrameImpl* mainFrame = toWebLocalFrameImpl(view->mainFrame());
- if (!mainFrame)
- return false;
+ ASSERT(frame);
+ ASSERT(client);
+ ASSERT(links.size() == localPaths.size());
- Vector<LocalFrame*> framesToVisit;
- Vector<LocalFrame*> visitedFrames;
- Vector<KURL> frameKURLs;
- Vector<KURL> resourceKURLs;
+ Vector<SerializedResource> resources;
+ PageSerializer serializer(&resources, nullptr);
- // Let's retrieve the resources from every frame in this page.
- framesToVisit.append(mainFrame->frame());
- while (!framesToVisit.isEmpty()) {
- LocalFrame* frame = framesToVisit[0];
- framesToVisit.remove(0);
- retrieveResourcesForFrame(frame, supportedSchemes,
- &visitedFrames, &framesToVisit,
- &frameKURLs, &resourceKURLs);
+ serializer.setRewriteURLFolder(localDirectoryName);
+ for (size_t i = 0; i < links.size(); i++) {
+ KURL url = links[i];
+ serializer.registerRewriteURL(url.string(), localPaths[i]);
}
- // Converts the results to WebURLs.
- WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
- for (size_t i = 0; i < resourceKURLs.size(); ++i) {
- resultResourceURLs[i] = resourceKURLs[i];
- // A frame's src can point to the same URL as another resource, keep the
- // resource URL only in such cases.
- size_t index = frameKURLs.find(resourceKURLs[i]);
- if (index != kNotFound)
- frameKURLs.remove(index);
- }
- *resourceURLs = resultResourceURLs;
- WebVector<WebURL> resultFrameURLs(frameKURLs.size());
- for (size_t i = 0; i < frameKURLs.size(); ++i)
- resultFrameURLs[i] = frameKURLs[i];
- *frameURLs = resultFrameURLs;
+ serializer.serialize(toWebViewImpl(frame->view())->page());
+ for (SerializedResource& resource : resources) {
+ client->didSerializeDataForFrame(resource.url, WebCString(resource.data->data(), resource.data->size()), WebPageSerializerClient::CurrentFrameIsFinished);
+ }
+ client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
return true;
}
@@ -275,8 +155,7 @@ WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& cha
WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
{
return String::format("\n<!-- saved from url=(%04d)%s -->\n",
- static_cast<int>(url.spec().length()),
- url.spec().data());
+ static_cast<int>(url.spec().length()), url.spec().data());
}
WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
« no previous file with comments | « no previous file | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698