Chromium Code Reviews| Index: Source/web/WebPageSerializer.cpp |
| diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp |
| index 03fba992cb597429ccda143feef59b7c00cf6716..f95ea664b525616e14ecf4e8d3b0c6c7571c78c6 100644 |
| --- a/Source/web/WebPageSerializer.cpp |
| +++ b/Source/web/WebPageSerializer.cpp |
| @@ -35,7 +35,6 @@ |
| #include "WebFrame.h" |
| #include "WebFrameImpl.h" |
| #include "WebPageSerializerClient.h" |
| -#include "WebPageSerializerImpl.h" |
| #include "WebView.h" |
| #include "WebViewImpl.h" |
| #include "core/dom/Document.h" |
| @@ -59,126 +58,6 @@ |
| using namespace WebCore; |
| -namespace { |
| - |
| -KURL getSubResourceURLFromElement(Element* element) |
| -{ |
| - ASSERT(element); |
| - const QualifiedName* attributeName = 0; |
| - if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag)) |
| - attributeName = &HTMLNames::srcAttr; |
| - else if (element->hasTagName(HTMLNames::inputTag)) { |
| - if (toHTMLInputElement(element)->isImageButton()) |
| - attributeName = &HTMLNames::srcAttr; |
| - } else if (element->hasTagName(HTMLNames::bodyTag) |
| - || isHTMLTableElement(element) |
| - || element->hasTagName(HTMLNames::trTag) |
| - || element->hasTagName(HTMLNames::tdTag)) |
| - attributeName = &HTMLNames::backgroundAttr; |
| - else if (element->hasTagName(HTMLNames::blockquoteTag) |
| - || element->hasTagName(HTMLNames::qTag) |
| - || element->hasTagName(HTMLNames::delTag) |
| - || element->hasTagName(HTMLNames::insTag)) |
| - attributeName = &HTMLNames::citeAttr; |
| - else if (element->hasTagName(HTMLNames::linkTag)) { |
| - // If the link element is not css, ignore it. |
| - if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) { |
| - // FIXME: Add support for extracting links of sub-resources which |
| - // are inside style-sheet such as @import, @font-face, url(), etc. |
| - attributeName = &HTMLNames::hrefAttr; |
| - } |
| - } else if (element->hasTagName(HTMLNames::objectTag)) |
| - attributeName = &HTMLNames::dataAttr; |
| - else if (element->hasTagName(HTMLNames::embedTag)) |
| - attributeName = &HTMLNames::srcAttr; |
| - |
| - if (!attributeName) |
| - return KURL(); |
| - |
| - String value = element->getAttribute(*attributeName); |
| - // Ignore javascript content. |
| - if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false)) |
| - return KURL(); |
| - |
| - return element->document().completeURL(value); |
| -} |
| - |
| -void retrieveResourcesForElement(Element* element, |
| - Vector<Frame*>* visitedFrames, |
| - Vector<Frame*>* framesToVisit, |
| - Vector<KURL>* frameURLs, |
| - Vector<KURL>* resourceURLs) |
| -{ |
| - // If the node is a frame, we'll process it later in retrieveResourcesForFrame. |
| - if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag) |
| - || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag)) |
| - && element->isFrameOwnerElement()) { |
| - if (Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame()) { |
| - if (!visitedFrames->contains(frame)) |
| - framesToVisit->append(frame); |
| - return; |
| - } |
| - } |
| - |
| - KURL url = getSubResourceURLFromElement(element); |
| - if (url.isEmpty() || !url.isValid()) |
| - return; // No subresource for this node. |
| - |
| - // Ignore URLs that have a non-standard protocols. Since the FTP protocol |
| - // does no have a cache mechanism, we skip it as well. |
| - if (!url.protocolIsInHTTPFamily() && !url.isLocalFile()) |
| - return; |
| - |
| - if (!resourceURLs->contains(url)) |
| - resourceURLs->append(url); |
| -} |
| - |
| -void retrieveResourcesForFrame(Frame* frame, |
| - const blink::WebVector<blink::WebCString>& supportedSchemes, |
| - Vector<Frame*>* visitedFrames, |
| - Vector<Frame*>* framesToVisit, |
| - Vector<KURL>* frameURLs, |
| - Vector<KURL>* resourceURLs) |
| -{ |
| - KURL frameURL = frame->loader().documentLoader()->request().url(); |
| - |
| - // If the frame's URL is invalid, ignore it, it is not retrievable. |
| - if (!frameURL.isValid()) |
| - return; |
| - |
| - // Ignore frames from unsupported schemes. |
| - bool isValidScheme = false; |
| - for (size_t i = 0; i < supportedSchemes.size(); ++i) { |
| - if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) { |
| - isValidScheme = true; |
| - break; |
| - } |
| - } |
| - if (!isValidScheme) |
| - return; |
| - |
| - // If we have already seen that frame, ignore it. |
| - if (visitedFrames->contains(frame)) |
| - return; |
| - visitedFrames->append(frame); |
| - if (!frameURLs->contains(frameURL)) |
| - frameURLs->append(frameURL); |
| - |
| - // Now get the resources associated with each node of the document. |
| - RefPtr<HTMLCollection> allNodes = frame->document()->all(); |
| - for (unsigned i = 0; i < allNodes->length(); ++i) { |
| - Node* node = allNodes->item(i); |
| - // We are only interested in HTML resources. |
| - if (!node->isElementNode()) |
| - continue; |
| - retrieveResourcesForElement(toElement(node), |
| - visitedFrames, framesToVisit, |
| - frameURLs, resourceURLs); |
| - } |
| -} |
| - |
| -} // namespace |
| - |
| namespace blink { |
| void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam) |
| @@ -223,79 +102,35 @@ WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view) |
| return WebCString(mhtml->data(), mhtml->size()); |
| } |
| -bool WebPageSerializer::serialize(WebFrame* frame, |
| - bool recursive, |
| - WebPageSerializerClient* client, |
| - const WebVector<WebURL>& links, |
| - const WebVector<WebString>& localPaths, |
| - const WebString& localDirectoryName) |
| +bool WebPageSerializer::serialize(WebView* view, |
| + WebPageSerializerClient* client, const WebVector<WebURL>& links, |
| + const WebVector<WebString>& localPaths, |
| + const WebString& localDirectoryName) |
| { |
| - WebPageSerializerImpl serializerImpl( |
| - frame, recursive, client, links, localPaths, localDirectoryName); |
| - return serializerImpl.serialize(); |
| -} |
| - |
| -bool WebPageSerializer::retrieveAllResources(WebView* view, |
| - const WebVector<WebCString>& supportedSchemes, |
| - WebVector<WebURL>* resourceURLs, |
| - WebVector<WebURL>* frameURLs) { |
| - WebFrameImpl* mainFrame = toWebFrameImpl(view->mainFrame()); |
| - if (!mainFrame) |
| - return false; |
| - |
| - Vector<Frame*> framesToVisit; |
| - Vector<Frame*> visitedFrames; |
| - Vector<KURL> frameKURLs; |
| - Vector<KURL> resourceKURLs; |
| - |
| - // Let's retrieve the resources from every frame in this page. |
| - framesToVisit.append(mainFrame->frame()); |
| - while (!framesToVisit.isEmpty()) { |
| - Frame* frame = framesToVisit[0]; |
| - framesToVisit.remove(0); |
| - retrieveResourcesForFrame(frame, supportedSchemes, |
| - &visitedFrames, &framesToVisit, |
| - &frameKURLs, &resourceKURLs); |
| + // Must specify available webvuew. |
| + ASSERT(view); |
| + // Make sure we have non 0 client. |
| + ASSERT(client); |
| + // Build local resources map. |
|
abarth-chromium
2013/11/14 16:55:03
None of these comments are worth having.
|
| + ASSERT(links.size() == localPaths.size()); |
| + |
| + HashMap<WTF::String, WTF::String> m_localLinks; |
|
abarth-chromium
2013/11/14 16:55:03
Why not use the typedef you created for this purpo
|
| + |
| + for (size_t i = 0; i < links.size(); i++) { |
| + KURL url = links[i]; |
| + ASSERT(!m_localLinks.contains(url.string())); |
| + m_localLinks.set(url.string(), localPaths[i]); |
| } |
| - // Converts the results to WebURLs. |
| - WebVector<WebURL> resultResourceURLs(resourceKURLs.size()); |
| - for (size_t i = 0; i < resourceKURLs.size(); ++i) { |
| - resultResourceURLs[i] = resourceKURLs[i]; |
| - // A frame's src can point to the same URL as another resource, keep the |
| - // resource URL only in such cases. |
| - size_t index = frameKURLs.find(resourceKURLs[i]); |
| - if (index != kNotFound) |
| - frameKURLs.remove(index); |
| - } |
| - *resourceURLs = resultResourceURLs; |
| - WebVector<WebURL> resultFrameURLs(frameKURLs.size()); |
| - for (size_t i = 0; i < frameKURLs.size(); ++i) |
| - resultFrameURLs[i] = frameKURLs[i]; |
| - *frameURLs = resultFrameURLs; |
| + Vector<SerializedResource> resources; |
| + PageSerializer serializer(&resources, &m_localLinks, localDirectoryName); |
| + serializer.serialize(toWebViewImpl(view)->page()); |
| + for (Vector<SerializedResource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) { |
| + client->didSerializeDataForFrame(iter->url, WebCString(iter->data->data(), iter->data->size()), WebPageSerializerClient::CurrentFrameIsFinished); |
| + } |
| + client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished); |
| return true; |
| } |
| -WebString WebPageSerializer::generateMetaCharsetDeclaration(const WebString& charset) |
| -{ |
| - String charsetString = "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=" + static_cast<const String&>(charset) + "\">"; |
| - return charsetString; |
| -} |
| - |
| -WebString WebPageSerializer::generateMarkOfTheWebDeclaration(const WebURL& url) |
| -{ |
| - return String::format("\n<!-- saved from url=(%04d)%s -->\n", |
| - static_cast<int>(url.spec().length()), |
| - url.spec().data()); |
| -} |
| - |
| -WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTarget) |
| -{ |
| - if (baseTarget.isEmpty()) |
| - return String("<base href=\".\">"); |
| - String baseString = "<base href=\".\" target=\"" + static_cast<const String&>(baseTarget) + "\">"; |
| - return baseString; |
| -} |
| - |
| } // namespace blink |