Index: Source/web/WebPageSerializer.cpp |
diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp |
index 6eeac90829d29c461c45e0bb34651e01eaab1fba..845949e88f543ff43ecbd43ee592dd9d4f7642e3 100644 |
--- a/Source/web/WebPageSerializer.cpp |
+++ b/Source/web/WebPageSerializer.cpp |
@@ -35,7 +35,6 @@ |
#include "WebFrame.h" |
#include "WebFrameImpl.h" |
#include "WebPageSerializerClient.h" |
-#include "WebPageSerializerImpl.h" |
#include "WebView.h" |
#include "WebViewImpl.h" |
#include "core/dom/Document.h" |
@@ -59,126 +58,6 @@ |
using namespace WebCore; |
-namespace { |
- |
-KURL getSubResourceURLFromElement(Element* element) |
-{ |
- ASSERT(element); |
- const QualifiedName* attributeName = 0; |
- if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag)) |
- attributeName = &HTMLNames::srcAttr; |
- else if (element->hasTagName(HTMLNames::inputTag)) { |
- if (toHTMLInputElement(element)->isImageButton()) |
- attributeName = &HTMLNames::srcAttr; |
- } else if (element->hasTagName(HTMLNames::bodyTag) |
- || isHTMLTableElement(element) |
- || element->hasTagName(HTMLNames::trTag) |
- || element->hasTagName(HTMLNames::tdTag)) |
- attributeName = &HTMLNames::backgroundAttr; |
- else if (element->hasTagName(HTMLNames::blockquoteTag) |
- || element->hasTagName(HTMLNames::qTag) |
- || element->hasTagName(HTMLNames::delTag) |
- || element->hasTagName(HTMLNames::insTag)) |
- attributeName = &HTMLNames::citeAttr; |
- else if (element->hasTagName(HTMLNames::linkTag)) { |
- // If the link element is not css, ignore it. |
- if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) { |
- // FIXME: Add support for extracting links of sub-resources which |
- // are inside style-sheet such as @import, @font-face, url(), etc. |
- attributeName = &HTMLNames::hrefAttr; |
- } |
- } else if (element->hasTagName(HTMLNames::objectTag)) |
- attributeName = &HTMLNames::dataAttr; |
- else if (element->hasTagName(HTMLNames::embedTag)) |
- attributeName = &HTMLNames::srcAttr; |
- |
- if (!attributeName) |
- return KURL(); |
- |
- String value = element->getAttribute(*attributeName); |
- // Ignore javascript content. |
- if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false)) |
- return KURL(); |
- |
- return element->document().completeURL(value); |
-} |
- |
-void retrieveResourcesForElement(Element* element, |
- Vector<Frame*>* visitedFrames, |
- Vector<Frame*>* framesToVisit, |
- Vector<KURL>* frameURLs, |
- Vector<KURL>* resourceURLs) |
-{ |
- // If the node is a frame, we'll process it later in retrieveResourcesForFrame. |
- if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag) |
- || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag)) |
- && element->isFrameOwnerElement()) { |
- if (Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame()) { |
- if (!visitedFrames->contains(frame)) |
- framesToVisit->append(frame); |
- return; |
- } |
- } |
- |
- KURL url = getSubResourceURLFromElement(element); |
- if (url.isEmpty() || !url.isValid()) |
- return; // No subresource for this node. |
- |
- // Ignore URLs that have a non-standard protocols. Since the FTP protocol |
- // does no have a cache mechanism, we skip it as well. |
- if (!url.protocolIsInHTTPFamily() && !url.isLocalFile()) |
- return; |
- |
- if (!resourceURLs->contains(url)) |
- resourceURLs->append(url); |
-} |
- |
-void retrieveResourcesForFrame(Frame* frame, |
- const blink::WebVector<blink::WebCString>& supportedSchemes, |
- Vector<Frame*>* visitedFrames, |
- Vector<Frame*>* framesToVisit, |
- Vector<KURL>* frameURLs, |
- Vector<KURL>* resourceURLs) |
-{ |
- KURL frameURL = frame->loader().documentLoader()->request().url(); |
- |
- // If the frame's URL is invalid, ignore it, it is not retrievable. |
- if (!frameURL.isValid()) |
- return; |
- |
- // Ignore frames from unsupported schemes. |
- bool isValidScheme = false; |
- for (size_t i = 0; i < supportedSchemes.size(); ++i) { |
- if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) { |
- isValidScheme = true; |
- break; |
- } |
- } |
- if (!isValidScheme) |
- return; |
- |
- // If we have already seen that frame, ignore it. |
- if (visitedFrames->contains(frame)) |
- return; |
- visitedFrames->append(frame); |
- if (!frameURLs->contains(frameURL)) |
- frameURLs->append(frameURL); |
- |
- // Now get the resources associated with each node of the document. |
- RefPtr<HTMLCollection> allNodes = frame->document()->all(); |
- for (unsigned i = 0; i < allNodes->length(); ++i) { |
- Node* node = allNodes->item(i); |
- // We are only interested in HTML resources. |
- if (!node->isElementNode()) |
- continue; |
- retrieveResourcesForElement(toElement(node), |
- visitedFrames, framesToVisit, |
- frameURLs, resourceURLs); |
- } |
-} |
- |
-} // namespace |
- |
namespace blink { |
void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam) |
@@ -230,50 +109,26 @@ bool WebPageSerializer::serialize(WebFrame* frame, |
const WebVector<WebString>& localPaths, |
const WebString& localDirectoryName) |
{ |
- WebPageSerializerImpl serializerImpl( |
- frame, recursive, client, links, localPaths, localDirectoryName); |
- return serializerImpl.serialize(); |
-} |
+ ASSERT(frame); |
+ ASSERT(client); |
+ ASSERT(links.size() == localPaths.size()); |
-bool WebPageSerializer::retrieveAllResources(WebView* view, |
- const WebVector<WebCString>& supportedSchemes, |
- WebVector<WebURL>* resourceURLs, |
- WebVector<WebURL>* frameURLs) { |
- WebFrameImpl* mainFrame = toWebFrameImpl(view->mainFrame()); |
- if (!mainFrame) |
- return false; |
+ LinkLocalPathMap m_localLinks; |
- Vector<Frame*> framesToVisit; |
- Vector<Frame*> visitedFrames; |
- Vector<KURL> frameKURLs; |
- Vector<KURL> resourceKURLs; |
- |
- // Let's retrieve the resources from every frame in this page. |
- framesToVisit.append(mainFrame->frame()); |
- while (!framesToVisit.isEmpty()) { |
- Frame* frame = framesToVisit[0]; |
- framesToVisit.remove(0); |
- retrieveResourcesForFrame(frame, supportedSchemes, |
- &visitedFrames, &framesToVisit, |
- &frameKURLs, &resourceKURLs); |
+ for (size_t i = 0; i < links.size(); i++) { |
+ KURL url = links[i]; |
+ ASSERT(!m_localLinks.contains(url.string())); |
+ m_localLinks.set(url.string(), localPaths[i]); |
} |
- // Converts the results to WebURLs. |
- WebVector<WebURL> resultResourceURLs(resourceKURLs.size()); |
- for (size_t i = 0; i < resourceKURLs.size(); ++i) { |
- resultResourceURLs[i] = resourceKURLs[i]; |
- // A frame's src can point to the same URL as another resource, keep the |
- // resource URL only in such cases. |
- size_t index = frameKURLs.find(resourceKURLs[i]); |
- if (index != kNotFound) |
- frameKURLs.remove(index); |
- } |
- *resourceURLs = resultResourceURLs; |
- WebVector<WebURL> resultFrameURLs(frameKURLs.size()); |
- for (size_t i = 0; i < frameKURLs.size(); ++i) |
- resultFrameURLs[i] = frameKURLs[i]; |
- *frameURLs = resultFrameURLs; |
+ Vector<SerializedResource> resources; |
+ PageSerializer serializer(&resources, &m_localLinks, localDirectoryName); |
+ serializer.serialize(toWebViewImpl(frame->view())->page()); |
+ for (Vector<SerializedResource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) { |
+ client->didSerializeDataForFrame(iter->url, WebCString(iter->data->data(), iter->data->size()), WebPageSerializerClient::CurrentFrameIsFinished); |
+ } |
+ client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished); |
return true; |
} |