| Index: Source/web/WebPageSerializer.cpp
|
| diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp
|
| index 0d8b4a24351e9e18726f05c38c79e78b60c80681..a436d085b65119c05d0b70200d7a7500eec31a49 100644
|
| --- a/Source/web/WebPageSerializer.cpp
|
| +++ b/Source/web/WebPageSerializer.cpp
|
| @@ -29,13 +29,14 @@
|
| */
|
|
|
| #include "config.h"
|
| -
|
| #include "public/web/WebPageSerializer.h"
|
|
|
| +#include "core/HTMLNames.h"
|
| #include "core/dom/Document.h"
|
| #include "core/dom/Element.h"
|
| -#include "core/frame/Frame.h"
|
| +#include "core/frame/LocalFrame.h"
|
| #include "core/html/HTMLAllCollection.h"
|
| +#include "core/html/HTMLFrameElementBase.h"
|
| #include "core/html/HTMLFrameOwnerElement.h"
|
| #include "core/html/HTMLInputElement.h"
|
| #include "core/html/HTMLTableElement.h"
|
| @@ -49,8 +50,11 @@
|
| #include "public/platform/WebString.h"
|
| #include "public/platform/WebURL.h"
|
| #include "public/platform/WebVector.h"
|
| -#include "public/web/WebLocalFrame.h"
|
| +#include "public/web/WebFrame.h"
|
| #include "public/web/WebPageSerializerClient.h"
|
| +#include "public/web/WebView.h"
|
| +#include "web/WebLocalFrameImpl.h"
|
| +#include "web/WebPageSerializerImpl.h"
|
| #include "web/WebViewImpl.h"
|
| #include "wtf/Vector.h"
|
| #include "wtf/text/StringConcatenate.h"
|
| @@ -59,12 +63,99 @@ namespace blink {
|
|
|
| namespace {
|
|
|
| +KURL getSubResourceURLFromElement(Element* element)
|
| +{
|
| + ASSERT(element);
|
| + const QualifiedName& attributeName = element->subResourceAttributeName();
|
| + if (attributeName == QualifiedName::null())
|
| + return KURL();
|
| +
|
| + String value = element->getAttribute(attributeName);
|
| + // Ignore javascript content.
|
| + if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", TextCaseInsensitive))
|
| + return KURL();
|
| +
|
| + return element->document().completeURL(value);
|
| +}
|
| +
|
| +void retrieveResourcesForElement(Element* element,
|
| + Vector<LocalFrame*>* visitedFrames,
|
| + Vector<LocalFrame*>* framesToVisit,
|
| + Vector<KURL>* frameURLs,
|
| + Vector<KURL>* resourceURLs)
|
| +{
|
| + ASSERT(element);
|
| + // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
|
| + if (isHTMLFrameElementBase(*element) || isHTMLObjectElement(*element) || isHTMLEmbedElement(*element)) {
|
| + Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame();
|
| + if (frame && frame->isLocalFrame()) {
|
| + if (!visitedFrames->contains(toLocalFrame(frame)))
|
| + framesToVisit->append(toLocalFrame(frame));
|
| + return;
|
| + }
|
| + }
|
| +
|
| + KURL url = getSubResourceURLFromElement(element);
|
| + if (url.isEmpty() || !url.isValid())
|
| + return; // No subresource for this node.
|
| +
|
| + // Ignore URLs that have a non-standard protocols. Since the FTP protocol
|
| + // does no have a cache mechanism, we skip it as well.
|
| + if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
|
| + return;
|
| +
|
| + if (!resourceURLs->contains(url))
|
| + resourceURLs->append(url);
|
| +}
|
| +
|
| +void retrieveResourcesForFrame(LocalFrame* frame,
|
| + const WebVector<WebCString>& supportedSchemes,
|
| + Vector<LocalFrame*>* visitedFrames,
|
| + Vector<LocalFrame*>* framesToVisit,
|
| + Vector<KURL>* frameURLs,
|
| + Vector<KURL>* resourceURLs)
|
| +{
|
| + KURL frameURL = frame->loader().documentLoader()->request().url();
|
| +
|
| + // If the frame's URL is invalid, ignore it, it is not retrievable.
|
| + if (!frameURL.isValid())
|
| + return;
|
| +
|
| + // Ignore frames from unsupported schemes.
|
| + bool isValidScheme = false;
|
| + for (size_t i = 0; i < supportedSchemes.size(); ++i) {
|
| + if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
|
| + isValidScheme = true;
|
| + break;
|
| + }
|
| + }
|
| + if (!isValidScheme)
|
| + return;
|
| +
|
| + // If we have already seen that frame, ignore it.
|
| + if (visitedFrames->contains(frame))
|
| + return;
|
| + visitedFrames->append(frame);
|
| + if (!frameURLs->contains(frameURL))
|
| + frameURLs->append(frameURL);
|
| +
|
| + // Now get the resources associated with each node of the document.
|
| + RefPtrWillBeRawPtr<HTMLAllCollection> allElements = frame->document()->all();
|
| + for (unsigned i = 0; i < allElements->length(); ++i) {
|
| + Element* element = allElements->item(i);
|
| + retrieveResourcesForElement(element,
|
| + visitedFrames, framesToVisit,
|
| + frameURLs, resourceURLs);
|
| + }
|
| +}
|
| +
|
| class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
|
| public:
|
| ~MHTMLPageSerializerDelegate() override;
|
| bool shouldIgnoreAttribute(const Attribute&) override;
|
| };
|
|
|
| +
|
| MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate()
|
| {
|
| }
|
| @@ -121,28 +212,57 @@ WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
|
| return WebCString(mhtml->data(), mhtml->size());
|
| }
|
|
|
| -bool WebPageSerializer::serialize(WebLocalFrame* frame, bool recursive, WebPageSerializerClient* client,
|
| - const WebVector<WebURL>& links, const WebVector<WebString>& localPaths, const WebString& localDirectoryName)
|
| +bool WebPageSerializer::serialize(WebLocalFrame* frame,
|
| + bool recursive,
|
| + WebPageSerializerClient* client,
|
| + const WebVector<WebURL>& links,
|
| + const WebVector<WebString>& localPaths,
|
| + const WebString& localDirectoryName)
|
| {
|
| - ASSERT(frame);
|
| - ASSERT(client);
|
| - ASSERT(links.size() == localPaths.size());
|
| + WebPageSerializerImpl serializerImpl(
|
| + frame, recursive, client, links, localPaths, localDirectoryName);
|
| + return serializerImpl.serialize();
|
| +}
|
|
|
| - Vector<SerializedResource> resources;
|
| - PageSerializer serializer(&resources, nullptr);
|
| +bool WebPageSerializer::retrieveAllResources(WebView* view,
|
| + const WebVector<WebCString>& supportedSchemes,
|
| + WebVector<WebURL>* resourceURLs,
|
| + WebVector<WebURL>* frameURLs) {
|
| + WebLocalFrameImpl* mainFrame = toWebLocalFrameImpl(view->mainFrame());
|
| + if (!mainFrame)
|
| + return false;
|
|
|
| - serializer.setRewriteURLFolder(localDirectoryName);
|
| - for (size_t i = 0; i < links.size(); i++) {
|
| - KURL url = links[i];
|
| - serializer.registerRewriteURL(url.string(), localPaths[i]);
|
| - }
|
| + Vector<LocalFrame*> framesToVisit;
|
| + Vector<LocalFrame*> visitedFrames;
|
| + Vector<KURL> frameKURLs;
|
| + Vector<KURL> resourceKURLs;
|
|
|
| - serializer.serialize(toWebViewImpl(frame->view())->page());
|
| + // Let's retrieve the resources from every frame in this page.
|
| + framesToVisit.append(mainFrame->frame());
|
| + while (!framesToVisit.isEmpty()) {
|
| + LocalFrame* frame = framesToVisit[0];
|
| + framesToVisit.remove(0);
|
| + retrieveResourcesForFrame(frame, supportedSchemes,
|
| + &visitedFrames, &framesToVisit,
|
| + &frameKURLs, &resourceKURLs);
|
| + }
|
|
|
| - for (SerializedResource& resource : resources) {
|
| - client->didSerializeDataForFrame(resource.url, WebCString(resource.data->data(), resource.data->size()), WebPageSerializerClient::CurrentFrameIsFinished);
|
| + // Converts the results to WebURLs.
|
| + WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
|
| + for (size_t i = 0; i < resourceKURLs.size(); ++i) {
|
| + resultResourceURLs[i] = resourceKURLs[i];
|
| + // A frame's src can point to the same URL as another resource, keep the
|
| + // resource URL only in such cases.
|
| + size_t index = frameKURLs.find(resourceKURLs[i]);
|
| + if (index != kNotFound)
|
| + frameKURLs.remove(index);
|
| }
|
| - client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
|
| + *resourceURLs = resultResourceURLs;
|
| + WebVector<WebURL> resultFrameURLs(frameKURLs.size());
|
| + for (size_t i = 0; i < frameKURLs.size(); ++i)
|
| + resultFrameURLs[i] = frameKURLs[i];
|
| + *frameURLs = resultFrameURLs;
|
| +
|
| return true;
|
| }
|
|
|
| @@ -155,7 +275,8 @@ WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& cha
|
| WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
|
| {
|
| return String::format("\n<!-- saved from url=(%04d)%s -->\n",
|
| - static_cast<int>(url.spec().length()), url.spec().data());
|
| + static_cast<int>(url.spec().length()),
|
| + url.spec().data());
|
| }
|
|
|
| WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
|
|
|