Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(413)

Unified Diff: Source/web/WebPageSerializer.cpp

Issue 68613003: Merges the two different page serializers (Closed) Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Remove newline after XML decl Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « Source/core/page/PageSerializer.cpp ('k') | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Source/web/WebPageSerializer.cpp
diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp
index 6eeac90829d29c461c45e0bb34651e01eaab1fba..845949e88f543ff43ecbd43ee592dd9d4f7642e3 100644
--- a/Source/web/WebPageSerializer.cpp
+++ b/Source/web/WebPageSerializer.cpp
@@ -35,7 +35,6 @@
#include "WebFrame.h"
#include "WebFrameImpl.h"
#include "WebPageSerializerClient.h"
-#include "WebPageSerializerImpl.h"
#include "WebView.h"
#include "WebViewImpl.h"
#include "core/dom/Document.h"
@@ -59,126 +58,6 @@
using namespace WebCore;
-namespace {
-
-KURL getSubResourceURLFromElement(Element* element)
-{
- ASSERT(element);
- const QualifiedName* attributeName = 0;
- if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag))
- attributeName = &HTMLNames::srcAttr;
- else if (element->hasTagName(HTMLNames::inputTag)) {
- if (toHTMLInputElement(element)->isImageButton())
- attributeName = &HTMLNames::srcAttr;
- } else if (element->hasTagName(HTMLNames::bodyTag)
- || isHTMLTableElement(element)
- || element->hasTagName(HTMLNames::trTag)
- || element->hasTagName(HTMLNames::tdTag))
- attributeName = &HTMLNames::backgroundAttr;
- else if (element->hasTagName(HTMLNames::blockquoteTag)
- || element->hasTagName(HTMLNames::qTag)
- || element->hasTagName(HTMLNames::delTag)
- || element->hasTagName(HTMLNames::insTag))
- attributeName = &HTMLNames::citeAttr;
- else if (element->hasTagName(HTMLNames::linkTag)) {
- // If the link element is not css, ignore it.
- if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) {
- // FIXME: Add support for extracting links of sub-resources which
- // are inside style-sheet such as @import, @font-face, url(), etc.
- attributeName = &HTMLNames::hrefAttr;
- }
- } else if (element->hasTagName(HTMLNames::objectTag))
- attributeName = &HTMLNames::dataAttr;
- else if (element->hasTagName(HTMLNames::embedTag))
- attributeName = &HTMLNames::srcAttr;
-
- if (!attributeName)
- return KURL();
-
- String value = element->getAttribute(*attributeName);
- // Ignore javascript content.
- if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
- return KURL();
-
- return element->document().completeURL(value);
-}
-
-void retrieveResourcesForElement(Element* element,
- Vector<Frame*>* visitedFrames,
- Vector<Frame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
- if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag)
- || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
- && element->isFrameOwnerElement()) {
- if (Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame()) {
- if (!visitedFrames->contains(frame))
- framesToVisit->append(frame);
- return;
- }
- }
-
- KURL url = getSubResourceURLFromElement(element);
- if (url.isEmpty() || !url.isValid())
- return; // No subresource for this node.
-
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol
- // does no have a cache mechanism, we skip it as well.
- if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
- return;
-
- if (!resourceURLs->contains(url))
- resourceURLs->append(url);
-}
-
-void retrieveResourcesForFrame(Frame* frame,
- const blink::WebVector<blink::WebCString>& supportedSchemes,
- Vector<Frame*>* visitedFrames,
- Vector<Frame*>* framesToVisit,
- Vector<KURL>* frameURLs,
- Vector<KURL>* resourceURLs)
-{
- KURL frameURL = frame->loader().documentLoader()->request().url();
-
- // If the frame's URL is invalid, ignore it, it is not retrievable.
- if (!frameURL.isValid())
- return;
-
- // Ignore frames from unsupported schemes.
- bool isValidScheme = false;
- for (size_t i = 0; i < supportedSchemes.size(); ++i) {
- if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
- isValidScheme = true;
- break;
- }
- }
- if (!isValidScheme)
- return;
-
- // If we have already seen that frame, ignore it.
- if (visitedFrames->contains(frame))
- return;
- visitedFrames->append(frame);
- if (!frameURLs->contains(frameURL))
- frameURLs->append(frameURL);
-
- // Now get the resources associated with each node of the document.
- RefPtr<HTMLCollection> allNodes = frame->document()->all();
- for (unsigned i = 0; i < allNodes->length(); ++i) {
- Node* node = allNodes->item(i);
- // We are only interested in HTML resources.
- if (!node->isElementNode())
- continue;
- retrieveResourcesForElement(toElement(node),
- visitedFrames, framesToVisit,
- frameURLs, resourceURLs);
- }
-}
-
-} // namespace
-
namespace blink {
void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam)
@@ -230,50 +109,26 @@ bool WebPageSerializer::serialize(WebFrame* frame,
const WebVector<WebString>& localPaths,
const WebString& localDirectoryName)
{
- WebPageSerializerImpl serializerImpl(
- frame, recursive, client, links, localPaths, localDirectoryName);
- return serializerImpl.serialize();
-}
+ ASSERT(frame);
+ ASSERT(client);
+ ASSERT(links.size() == localPaths.size());
-bool WebPageSerializer::retrieveAllResources(WebView* view,
- const WebVector<WebCString>& supportedSchemes,
- WebVector<WebURL>* resourceURLs,
- WebVector<WebURL>* frameURLs) {
- WebFrameImpl* mainFrame = toWebFrameImpl(view->mainFrame());
- if (!mainFrame)
- return false;
+ LinkLocalPathMap m_localLinks;
- Vector<Frame*> framesToVisit;
- Vector<Frame*> visitedFrames;
- Vector<KURL> frameKURLs;
- Vector<KURL> resourceKURLs;
-
- // Let's retrieve the resources from every frame in this page.
- framesToVisit.append(mainFrame->frame());
- while (!framesToVisit.isEmpty()) {
- Frame* frame = framesToVisit[0];
- framesToVisit.remove(0);
- retrieveResourcesForFrame(frame, supportedSchemes,
- &visitedFrames, &framesToVisit,
- &frameKURLs, &resourceKURLs);
+ for (size_t i = 0; i < links.size(); i++) {
+ KURL url = links[i];
+ ASSERT(!m_localLinks.contains(url.string()));
+ m_localLinks.set(url.string(), localPaths[i]);
}
- // Converts the results to WebURLs.
- WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
- for (size_t i = 0; i < resourceKURLs.size(); ++i) {
- resultResourceURLs[i] = resourceKURLs[i];
- // A frame's src can point to the same URL as another resource, keep the
- // resource URL only in such cases.
- size_t index = frameKURLs.find(resourceKURLs[i]);
- if (index != kNotFound)
- frameKURLs.remove(index);
- }
- *resourceURLs = resultResourceURLs;
- WebVector<WebURL> resultFrameURLs(frameKURLs.size());
- for (size_t i = 0; i < frameKURLs.size(); ++i)
- resultFrameURLs[i] = frameKURLs[i];
- *frameURLs = resultFrameURLs;
+ Vector<SerializedResource> resources;
+ PageSerializer serializer(&resources, &m_localLinks, localDirectoryName);
+ serializer.serialize(toWebViewImpl(frame->view())->page());
+ for (Vector<SerializedResource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) {
+ client->didSerializeDataForFrame(iter->url, WebCString(iter->data->data(), iter->data->size()), WebPageSerializerClient::CurrentFrameIsFinished);
+ }
+ client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
return true;
}
« no previous file with comments | « Source/core/page/PageSerializer.cpp ('k') | Source/web/WebPageSerializerImpl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698