| Index: Source/web/WebPageSerializer.cpp
|
| diff --git a/Source/web/WebPageSerializer.cpp b/Source/web/WebPageSerializer.cpp
|
| index 6eeac90829d29c461c45e0bb34651e01eaab1fba..845949e88f543ff43ecbd43ee592dd9d4f7642e3 100644
|
| --- a/Source/web/WebPageSerializer.cpp
|
| +++ b/Source/web/WebPageSerializer.cpp
|
| @@ -35,7 +35,6 @@
|
| #include "WebFrame.h"
|
| #include "WebFrameImpl.h"
|
| #include "WebPageSerializerClient.h"
|
| -#include "WebPageSerializerImpl.h"
|
| #include "WebView.h"
|
| #include "WebViewImpl.h"
|
| #include "core/dom/Document.h"
|
| @@ -59,126 +58,6 @@
|
|
|
| using namespace WebCore;
|
|
|
| -namespace {
|
| -
|
| -KURL getSubResourceURLFromElement(Element* element)
|
| -{
|
| - ASSERT(element);
|
| - const QualifiedName* attributeName = 0;
|
| - if (element->hasTagName(HTMLNames::imgTag) || element->hasTagName(HTMLNames::scriptTag))
|
| - attributeName = &HTMLNames::srcAttr;
|
| - else if (element->hasTagName(HTMLNames::inputTag)) {
|
| - if (toHTMLInputElement(element)->isImageButton())
|
| - attributeName = &HTMLNames::srcAttr;
|
| - } else if (element->hasTagName(HTMLNames::bodyTag)
|
| - || isHTMLTableElement(element)
|
| - || element->hasTagName(HTMLNames::trTag)
|
| - || element->hasTagName(HTMLNames::tdTag))
|
| - attributeName = &HTMLNames::backgroundAttr;
|
| - else if (element->hasTagName(HTMLNames::blockquoteTag)
|
| - || element->hasTagName(HTMLNames::qTag)
|
| - || element->hasTagName(HTMLNames::delTag)
|
| - || element->hasTagName(HTMLNames::insTag))
|
| - attributeName = &HTMLNames::citeAttr;
|
| - else if (element->hasTagName(HTMLNames::linkTag)) {
|
| - // If the link element is not css, ignore it.
|
| - if (equalIgnoringCase(element->getAttribute(HTMLNames::typeAttr), "text/css")) {
|
| - // FIXME: Add support for extracting links of sub-resources which
|
| - // are inside style-sheet such as @import, @font-face, url(), etc.
|
| - attributeName = &HTMLNames::hrefAttr;
|
| - }
|
| - } else if (element->hasTagName(HTMLNames::objectTag))
|
| - attributeName = &HTMLNames::dataAttr;
|
| - else if (element->hasTagName(HTMLNames::embedTag))
|
| - attributeName = &HTMLNames::srcAttr;
|
| -
|
| - if (!attributeName)
|
| - return KURL();
|
| -
|
| - String value = element->getAttribute(*attributeName);
|
| - // Ignore javascript content.
|
| - if (value.isEmpty() || value.stripWhiteSpace().startsWith("javascript:", false))
|
| - return KURL();
|
| -
|
| - return element->document().completeURL(value);
|
| -}
|
| -
|
| -void retrieveResourcesForElement(Element* element,
|
| - Vector<Frame*>* visitedFrames,
|
| - Vector<Frame*>* framesToVisit,
|
| - Vector<KURL>* frameURLs,
|
| - Vector<KURL>* resourceURLs)
|
| -{
|
| - // If the node is a frame, we'll process it later in retrieveResourcesForFrame.
|
| - if ((element->hasTagName(HTMLNames::iframeTag) || element->hasTagName(HTMLNames::frameTag)
|
| - || element->hasTagName(HTMLNames::objectTag) || element->hasTagName(HTMLNames::embedTag))
|
| - && element->isFrameOwnerElement()) {
|
| - if (Frame* frame = toHTMLFrameOwnerElement(element)->contentFrame()) {
|
| - if (!visitedFrames->contains(frame))
|
| - framesToVisit->append(frame);
|
| - return;
|
| - }
|
| - }
|
| -
|
| - KURL url = getSubResourceURLFromElement(element);
|
| - if (url.isEmpty() || !url.isValid())
|
| - return; // No subresource for this node.
|
| -
|
| - // Ignore URLs that have a non-standard protocols. Since the FTP protocol
|
| - // does no have a cache mechanism, we skip it as well.
|
| - if (!url.protocolIsInHTTPFamily() && !url.isLocalFile())
|
| - return;
|
| -
|
| - if (!resourceURLs->contains(url))
|
| - resourceURLs->append(url);
|
| -}
|
| -
|
| -void retrieveResourcesForFrame(Frame* frame,
|
| - const blink::WebVector<blink::WebCString>& supportedSchemes,
|
| - Vector<Frame*>* visitedFrames,
|
| - Vector<Frame*>* framesToVisit,
|
| - Vector<KURL>* frameURLs,
|
| - Vector<KURL>* resourceURLs)
|
| -{
|
| - KURL frameURL = frame->loader().documentLoader()->request().url();
|
| -
|
| - // If the frame's URL is invalid, ignore it, it is not retrievable.
|
| - if (!frameURL.isValid())
|
| - return;
|
| -
|
| - // Ignore frames from unsupported schemes.
|
| - bool isValidScheme = false;
|
| - for (size_t i = 0; i < supportedSchemes.size(); ++i) {
|
| - if (frameURL.protocolIs(static_cast<CString>(supportedSchemes[i]).data())) {
|
| - isValidScheme = true;
|
| - break;
|
| - }
|
| - }
|
| - if (!isValidScheme)
|
| - return;
|
| -
|
| - // If we have already seen that frame, ignore it.
|
| - if (visitedFrames->contains(frame))
|
| - return;
|
| - visitedFrames->append(frame);
|
| - if (!frameURLs->contains(frameURL))
|
| - frameURLs->append(frameURL);
|
| -
|
| - // Now get the resources associated with each node of the document.
|
| - RefPtr<HTMLCollection> allNodes = frame->document()->all();
|
| - for (unsigned i = 0; i < allNodes->length(); ++i) {
|
| - Node* node = allNodes->item(i);
|
| - // We are only interested in HTML resources.
|
| - if (!node->isElementNode())
|
| - continue;
|
| - retrieveResourcesForElement(toElement(node),
|
| - visitedFrames, framesToVisit,
|
| - frameURLs, resourceURLs);
|
| - }
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| namespace blink {
|
|
|
| void WebPageSerializer::serialize(WebView* view, WebVector<WebPageSerializer::Resource>* resourcesParam)
|
| @@ -230,50 +109,26 @@ bool WebPageSerializer::serialize(WebFrame* frame,
|
| const WebVector<WebString>& localPaths,
|
| const WebString& localDirectoryName)
|
| {
|
| - WebPageSerializerImpl serializerImpl(
|
| - frame, recursive, client, links, localPaths, localDirectoryName);
|
| - return serializerImpl.serialize();
|
| -}
|
| + ASSERT(frame);
|
| + ASSERT(client);
|
| + ASSERT(links.size() == localPaths.size());
|
|
|
| -bool WebPageSerializer::retrieveAllResources(WebView* view,
|
| - const WebVector<WebCString>& supportedSchemes,
|
| - WebVector<WebURL>* resourceURLs,
|
| - WebVector<WebURL>* frameURLs) {
|
| - WebFrameImpl* mainFrame = toWebFrameImpl(view->mainFrame());
|
| - if (!mainFrame)
|
| - return false;
|
| + LinkLocalPathMap m_localLinks;
|
|
|
| - Vector<Frame*> framesToVisit;
|
| - Vector<Frame*> visitedFrames;
|
| - Vector<KURL> frameKURLs;
|
| - Vector<KURL> resourceKURLs;
|
| -
|
| - // Let's retrieve the resources from every frame in this page.
|
| - framesToVisit.append(mainFrame->frame());
|
| - while (!framesToVisit.isEmpty()) {
|
| - Frame* frame = framesToVisit[0];
|
| - framesToVisit.remove(0);
|
| - retrieveResourcesForFrame(frame, supportedSchemes,
|
| - &visitedFrames, &framesToVisit,
|
| - &frameKURLs, &resourceKURLs);
|
| + for (size_t i = 0; i < links.size(); i++) {
|
| + KURL url = links[i];
|
| + ASSERT(!m_localLinks.contains(url.string()));
|
| + m_localLinks.set(url.string(), localPaths[i]);
|
| }
|
|
|
| - // Converts the results to WebURLs.
|
| - WebVector<WebURL> resultResourceURLs(resourceKURLs.size());
|
| - for (size_t i = 0; i < resourceKURLs.size(); ++i) {
|
| - resultResourceURLs[i] = resourceKURLs[i];
|
| - // A frame's src can point to the same URL as another resource, keep the
|
| - // resource URL only in such cases.
|
| - size_t index = frameKURLs.find(resourceKURLs[i]);
|
| - if (index != kNotFound)
|
| - frameKURLs.remove(index);
|
| - }
|
| - *resourceURLs = resultResourceURLs;
|
| - WebVector<WebURL> resultFrameURLs(frameKURLs.size());
|
| - for (size_t i = 0; i < frameKURLs.size(); ++i)
|
| - resultFrameURLs[i] = frameKURLs[i];
|
| - *frameURLs = resultFrameURLs;
|
| + Vector<SerializedResource> resources;
|
| + PageSerializer serializer(&resources, &m_localLinks, localDirectoryName);
|
| + serializer.serialize(toWebViewImpl(frame->view())->page());
|
|
|
| + for (Vector<SerializedResource>::const_iterator iter = resources.begin(); iter != resources.end(); ++iter) {
|
| + client->didSerializeDataForFrame(iter->url, WebCString(iter->data->data(), iter->data->size()), WebPageSerializerClient::CurrentFrameIsFinished);
|
| + }
|
| + client->didSerializeDataForFrame(KURL(), WebCString("", 0), WebPageSerializerClient::AllFramesAreFinished);
|
| return true;
|
| }
|
|
|
|
|