Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Unified Diff: Source/web/WebPageSerializer.cpp

Issue 1295063002: Revert "Remove old serializer" It caused a regression, crbug.com/510422 (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/web/WebPageSerializer.cpp
diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp
index 0d8b4a24351e9e18726f05c38c79e78b60c80681..a436d085b65119c05d0b70200d7a7500eec31a49 100644
--- a/Source/web/WebPageSerializer.cpp
+++ b/Source/web/WebPageSerializer.cpp
@@ -29,13 +29,14 @@
*/
#include "config.h"
-
#include "public/web/WebPageSerializer.h"
+#include "core/HTMLNames.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
-#include "core/frame/Frame.h"
+#include "core/frame/LocalFrame.h"
#include "core/html/HTMLAllCollection.h"
+#include "core/html/HTMLFrameElementBase.h"
#include "core/html/HTMLFrameOwnerElement.h"
#include "core/html/HTMLInputElement.h"
#include "core/html/HTMLTableElement.h"
@@ -49,8 +50,11 @@
#include "public/platform/WebString.h"
#include "public/platform/WebURL.h"
#include "public/platform/WebVector.h"
-#include "public/web/WebLocalFrame.h"
+#include "public/web/WebFrame.h"
#include "public/web/WebPageSerializerClient.h"
+#include "public/web/WebView.h"
+#include "web/WebLocalFrameImpl.h"
+#include "web/WebPageSerializerImpl.h"
#include "web/WebViewImpl.h"
#include "wtf/Vector.h"
#include "wtf/text/StringConcatenate.h"
@@ -59,12 +63,99 @@ namespace blink {
namespace {
+KURL getSubResourceURLFromElement(Element* element)
+{
+ ASSERT(element);
+ const QualifiedName& attributeName = element->subResourceAttributeName();
+ if (attributeName == QualifiedName::null())
+ return KURL();
+
+ String value = element->getAttribute(attributeName);
+ // Ignore javascript content.
+ if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", TextCaseInsensitive))
+ return KURL();
+
+ return element->document().completeURL(value);
+}
+
+void retrieveResourcesForElement(Element* element,
+ Vector<LocalFrame*>* visitedFrames,
+ Vector<LocalFrame*>* framesToVisit,
+ Vector<KURL>* frameURLs,
+ Vector<KURL>* resourceURLs)
+{
+ ASSERT(element);
+ // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
+ if (isHTMLFrameElementBase(*element) || isHTMLObjectElement(*element) || isHTMLEmbedElement(*element)) {
+ Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame();
+ if (frame && frame->isLocalFrame()) {
+ if (!visitedFrames->contains(toLocalFrame(frame)))
+ framesToVisit->append(toLocalFrame(frame));
+ return;
+ }
+ }
+
+ KURL url = getSubResourceURLFromElement(element);
+ if (url.isEmpty() || !url.isValid())
+ return; // No subresource for this node.
+
+ // Ignore URLs that have a non-standard protocols. Since the FTP protocol
+ // does no have a cache mechanism, we skip it as well.
+ if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
+ return;
+
+ if (!resourceURLs->contains(url))
+ resourceURLs->append(url);
+}
+
+void retrieveResourcesForFrame(LocalFrame* frame,
+ const WebVector<WebCString>& supportedSchemes,
+ Vector<LocalFrame*>* visitedFrames,
+ Vector<LocalFrame*>* framesToVisit,
+ Vector<KURL>* frameURLs,
+ Vector<KURL>* resourceURLs)
+{
+ KURL frameURL = frame->loader().documentLoader()->request().url();
+
+ // If the frame's URL is invalid, ignore it, it is not retrievable.
+ if (!frameURL.isValid())
+ return;
+
+ // Ignore frames from unsupported schemes.
+ bool isValidScheme = false;
+ for (size_t i = 0; i < supportedSchemes.size(); ++i) {
+ if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
+ isValidScheme = true;
+ break;
+ }
+ }
+ if (!isValidScheme)
+ return;
+
+ // If we have already seen that frame, ignore it.
+ if (visitedFrames->contains(frame))
+ return;
+ visitedFrames->append(frame);
+ if (!frameURLs->contains(frameURL))
+ frameURLs->append(frameURL);
+
+ // Now get the resources associated with each node of the document.
+ RefPtrWillBeRawPtr<HTMLAllCollection> allElements = frame->document()->all();
+ for (unsigned i = 0; i < allElements->length(); ++i) {
+ Element* element = allElements->item(i);
+ retrieveResourcesForElement(element,
+ visitedFrames, framesToVisit,
+ frameURLs, resourceURLs);
+ }
+}
+
class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
public:
~MHTMLPageSerializerDelegate() override;
bool shouldIgnoreAttribute(const Attribute&) override;
};
+
MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate()
{
}
@@ -121,28 +212,57 @@ WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
return WebCString(mhtml->data(), mhtml->size());
}
-bool WebPageSerializer::serialize(WebLocalFrame* frame, bool recursive, WebPageSerializerClient* client,
- const WebVector<WebURL>& links, const WebVector<WebString>& localPaths, const WebString& localDirectoryName)
+bool WebPageSerializer::serialize(WebLocalFrame* frame,
+ bool recursive,
+ WebPageSerializerClient* client,
+ const WebVector<WebURL>& links,
+ const WebVector<WebString>& localPaths,
+ const WebString& localDirectoryName)
{
- ASSERT(frame);
- ASSERT(client);
- ASSERT(links.size() == localPaths.size());
+ WebPageSerializerImpl serializerImpl(
+ frame, recursive, client, links, localPaths, localDirectoryName);
+ return serializerImpl.serialize();
+}
- Vector<SerializedResource> resources;
- PageSerializer serializer(&resources, nullptr);
+bool WebPageSerializer::retrieveAllResources(WebView* view,
+ const WebVector<WebCString>& supportedSchemes,
+ WebVector<WebURL>* resourceURLs,
+ WebVector<WebURL>* frameURLs) {
+ WebLocalFrameImpl* mainFrame = toWebLocalFrameImpl(view->mainFrame());
+ if (!mainFrame)
+ return false;
- serializer.setRewriteURLFolder(localDirectoryName);
- for (size_t i = 0; i < links.size(); i++) {
- KURL url = links[i];
- serializer.registerRewriteURL(url.string(), localPaths[i]);
- }
+ Vector<LocalFrame*> framesToVisit;
+ Vector<LocalFrame*> visitedFrames;
+ Vector<KURL> frameKURLs;
+ Vector<KURL> resourceKURLs;
- serializer.serialize(toWebViewImpl(frame->view())->page());
+ // Let's retrieve the resources from every frame in this page.
+ framesToVisit.append(mainFrame->frame());
+ while (!framesToVisit.isEmpty()) {
+ LocalFrame* frame = framesToVisit[0];
+ framesToVisit.remove(0);
+ retrieveResourcesForFrame(frame, supportedSchemes,
+ &visitedFrames, &framesToVisit,
+ &frameKURLs, &resourceKURLs);
+ }
- for (SerializedResource& resource : resources) {
- client->didSerializeDataForFrame(resource.url, WebCString(resource.data->data(), resource.data->size()), WebPageSerializerClient::CurrentFrameIsFinished);
+ // Converts the results to WebURLs.
+ WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
+ for (size_t i = 0; i < resourceKURLs.size(); ++i) {
+ resultResourceURLs[i] = resourceKURLs[i];
+ // A frame's src can point to the same URL as another resource, keep the
+ // resource URL only in such cases.
+ size_t index = frameKURLs.find(resourceKURLs[i]);
+ if (index != kNotFound)
+ frameKURLs.remove(index);
}
- client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
+ *resourceURLs = resultResourceURLs;
+ WebVector<WebURL> resultFrameURLs(frameKURLs.size());
+ for (size_t i = 0; i < frameKURLs.size(); ++i)
+ resultFrameURLs[i] = frameKURLs[i];
+ *frameURLs = resultFrameURLs;
+
return true;
}
@@ -155,7 +275,8 @@ WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& cha
WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url)
{
return String::format("\n<!-- saved from url=(%04d)%s -->\n",
- static_cast<int>(url.spec().length()), url.spec().data());
+ static_cast<int>(url.spec().length()),
+ url.spec().data());
}
WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget)
« no previous file with comments | « no previous file | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698