Index: Source/web/WebPageSerializer.cpp |
diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp |
index a436d085b65119c05d0b70200d7a7500eec31a49..0d8b4a24351e9e18726f05c38c79e78b60c80681 100644 |
--- a/Source/web/WebPageSerializer.cpp |
+++ b/Source/web/WebPageSerializer.cpp |
@@ -29,14 +29,13 @@ |
*/ |
#include "config.h" |
+ |
#include "public/web/WebPageSerializer.h" |
-#include "core/HTMLNames.h" |
#include "core/dom/Document.h" |
#include "core/dom/Element.h" |
-#include "core/frame/LocalFrame.h" |
+#include "core/frame/Frame.h" |
#include "core/html/HTMLAllCollection.h" |
-#include "core/html/HTMLFrameElementBase.h" |
#include "core/html/HTMLFrameOwnerElement.h" |
#include "core/html/HTMLInputElement.h" |
#include "core/html/HTMLTableElement.h" |
@@ -50,11 +49,8 @@ |
#include "public/platform/WebString.h" |
#include "public/platform/WebURL.h" |
#include "public/platform/WebVector.h" |
-#include "public/web/WebFrame.h" |
+#include "public/web/WebLocalFrame.h" |
#include "public/web/WebPageSerializerClient.h" |
-#include "public/web/WebView.h" |
-#include "web/WebLocalFrameImpl.h" |
-#include "web/WebPageSerializerImpl.h" |
#include "web/WebViewImpl.h" |
#include "wtf/Vector.h" |
#include "wtf/text/StringConcatenate.h" |
@@ -63,99 +59,12 @@ namespace blink { |
namespace { |
-KURL getSubResourceURLFromElement(Element* element) |
-{ |
- ASSERT(element); |
- const QualifiedName& attributeName = element->subResourceAttributeName(); |
- if (attributeName == QualifiedName::null()) |
- return KURL(); |
- |
- String value = element->getAttribute(attributeName); |
- // Ignore javascript content. |
- if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", TextCaseInsensitive)) |
- return KURL(); |
- |
- return element->document().completeURL(value); |
-} |
- |
-void retrieveResourcesForElement(Element* element, |
- Vector<LocalFrame*>* visitedFrames, |
- Vector<LocalFrame*>* framesToVisit, |
- Vector<KURL>* frameURLs, |
- Vector<KURL>* resourceURLs) |
-{ |
- ASSERT(element); |
- // If the node is a frame, we'll process it later in retrieveResourcesForFrame. |
- if (isHTMLFrameElementBase(*element) || isHTMLObjectElement(*element) || isHTMLEmbedElement(*element)) { |
- Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame(); |
- if (frame && frame->isLocalFrame()) { |
- if (!visitedFrames->contains(toLocalFrame(frame))) |
- framesToVisit->append(toLocalFrame(frame)); |
- return; |
- } |
- } |
- |
- KURL url = getSubResourceURLFromElement(element); |
- if (url.isEmpty() || !url.isValid()) |
- return; // No subresource for this node. |
- |
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol |
- // does no have a cache mechanism, we skip it as well. |
- if (!url.protocolIsInHTTPFamily() && !url.isLocalFile()) |
- return; |
- |
- if (!resourceURLs->contains(url)) |
- resourceURLs->append(url); |
-} |
- |
-void retrieveResourcesForFrame(LocalFrame* frame, |
- const WebVector<WebCString>& supportedSchemes, |
- Vector<LocalFrame*>* visitedFrames, |
- Vector<LocalFrame*>* framesToVisit, |
- Vector<KURL>* frameURLs, |
- Vector<KURL>* resourceURLs) |
-{ |
- KURL frameURL = frame->loader().documentLoader()->request().url(); |
- |
- // If the frame's URL is invalid, ignore it, it is not retrievable. |
- if (!frameURL.isValid()) |
- return; |
- |
- // Ignore frames from unsupported schemes. |
- bool isValidScheme = false; |
- for (size_t i = 0; i < supportedSchemes.size(); ++i) { |
- if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) { |
- isValidScheme = true; |
- break; |
- } |
- } |
- if (!isValidScheme) |
- return; |
- |
- // If we have already seen that frame, ignore it. |
- if (visitedFrames->contains(frame)) |
- return; |
- visitedFrames->append(frame); |
- if (!frameURLs->contains(frameURL)) |
- frameURLs->append(frameURL); |
- |
- // Now get the resources associated with each node of the document. |
- RefPtrWillBeRawPtr<HTMLAllCollection> allElements = frame->document()->all(); |
- for (unsigned i = 0; i < allElements->length(); ++i) { |
- Element* element = allElements->item(i); |
- retrieveResourcesForElement(element, |
- visitedFrames, framesToVisit, |
- frameURLs, resourceURLs); |
- } |
-} |
- |
class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate { |
public: |
~MHTMLPageSerializerDelegate() override; |
bool shouldIgnoreAttribute(const Attribute&) override; |
}; |
- |
MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate() |
{ |
} |
@@ -212,57 +121,28 @@ WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view) |
return WebCString(mhtml->data(), mhtml->size()); |
} |
-bool WebPageSerializer::serialize(WebLocalFrame* frame, |
- bool recursive, |
- WebPageSerializerClient* client, |
- const WebVector<WebURL>& links, |
- const WebVector<WebString>& localPaths, |
- const WebString& localDirectoryName) |
+bool WebPageSerializer::serialize(WebLocalFrame* frame, bool recursive, WebPageSerializerClient* client, |
+ const WebVector<WebURL>& links, const WebVector<WebString>& localPaths, const WebString& localDirectoryName) |
{ |
- WebPageSerializerImpl serializerImpl( |
- frame, recursive, client, links, localPaths, localDirectoryName); |
- return serializerImpl.serialize(); |
-} |
- |
-bool WebPageSerializer::retrieveAllResources(WebView* view, |
- const WebVector<WebCString>& supportedSchemes, |
- WebVector<WebURL>* resourceURLs, |
- WebVector<WebURL>* frameURLs) { |
- WebLocalFrameImpl* mainFrame = toWebLocalFrameImpl(view->mainFrame()); |
- if (!mainFrame) |
- return false; |
+ ASSERT(frame); |
+ ASSERT(client); |
+ ASSERT(links.size() == localPaths.size()); |
- Vector<LocalFrame*> framesToVisit; |
- Vector<LocalFrame*> visitedFrames; |
- Vector<KURL> frameKURLs; |
- Vector<KURL> resourceKURLs; |
+ Vector<SerializedResource> resources; |
+ PageSerializer serializer(&resources, nullptr); |
- // Let's retrieve the resources from every frame in this page. |
- framesToVisit.append(mainFrame->frame()); |
- while (!framesToVisit.isEmpty()) { |
- LocalFrame* frame = framesToVisit[0]; |
- framesToVisit.remove(0); |
- retrieveResourcesForFrame(frame, supportedSchemes, |
- &visitedFrames, &framesToVisit, |
- &frameKURLs, &resourceKURLs); |
+ serializer.setRewriteURLFolder(localDirectoryName); |
+ for (size_t i = 0; i < links.size(); i++) { |
+ KURL url = links[i]; |
+ serializer.registerRewriteURL(url.string(), localPaths[i]); |
} |
- // Converts the results to WebURLs. |
- WebVector<WebURL> resultResourceURLs(resourceKURLs.size()); |
- for (size_t i = 0; i < resourceKURLs.size(); ++i) { |
- resultResourceURLs[i] = resourceKURLs[i]; |
- // A frame's src can point to the same URL as another resource, keep the |
- // resource URL only in such cases. |
- size_t index = frameKURLs.find(resourceKURLs[i]); |
- if (index != kNotFound) |
- frameKURLs.remove(index); |
- } |
- *resourceURLs = resultResourceURLs; |
- WebVector<WebURL> resultFrameURLs(frameKURLs.size()); |
- for (size_t i = 0; i < frameKURLs.size(); ++i) |
- resultFrameURLs[i] = frameKURLs[i]; |
- *frameURLs = resultFrameURLs; |
+ serializer.serialize(toWebViewImpl(frame->view())->page()); |
+ for (SerializedResource& resource : resources) { |
+ client->didSerializeDataForFrame(resource.url, WebCString(resource.data->data(), resource.data->size()), WebPageSerializerClient::CurrentFrameIsFinished); |
+ } |
+ client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished); |
return true; |
} |
@@ -275,8 +155,7 @@ WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& cha |
WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url) |
{ |
return String::format("\n<!-- saved from url=(%04d)%s -->\n", |
- static_cast<int>(url.spec().length()), |
- url.spec().data()); |
+ static_cast<int>(url.spec().length()), url.spec().data()); |
} |
WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget) |