Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Unified Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1441553002: Generating CIDs in Blink during MHTML serialization. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-per-frame-page-serializer-only
Patch Set: Using references for out parameters in Blink. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/web/WebPageSerializer.cpp
diff --git a/third_party/WebKit/Source/web/WebPageSerializer.cpp b/third_party/WebKit/Source/web/WebPageSerializer.cpp
index c6954126ab227b4c3426e95364bc56011dc3c294..2629b11fb187739814af34109ebe6b9ddbcb0b47 100644
--- a/third_party/WebKit/Source/web/WebPageSerializer.cpp
+++ b/third_party/WebKit/Source/web/WebPageSerializer.cpp
@@ -45,6 +45,7 @@
#include "core/page/PageSerializer.h"
#include "platform/SerializedResource.h"
#include "platform/mhtml/MHTMLArchive.h"
+#include "platform/mhtml/MHTMLParser.h"
#include "platform/weborigin/KURL.h"
#include "public/platform/WebCString.h"
#include "public/platform/WebString.h"
@@ -56,6 +57,8 @@
#include "web/WebLocalFrameImpl.h"
#include "web/WebPageSerializerImpl.h"
#include "web/WebViewImpl.h"
+#include "wtf/Assertions.h"
+#include "wtf/HashMap.h"
#include "wtf/Vector.h"
#include "wtf/text/StringConcatenate.h"
@@ -65,12 +68,16 @@ namespace {
class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
public:
tkent 2015/12/02 00:59:59 Please add STACK_ALLCOATED(); for oilpan
Łukasz Anforowicz 2015/12/02 02:57:08 After doing this I got the following error: [blink
tkent 2015/12/02 06:33:54 Looks ok. It seems it's hard to make Delegate STA
- ~MHTMLPageSerializerDelegate() override;
+ MHTMLPageSerializerDelegate(HashMap<Frame*, String>* frameToContentID);
tkent 2015/12/02 00:59:59 The argument and m_frameToContentId should be |con
Łukasz Anforowicz 2015/12/02 02:57:08 I made the change you've suggested, but I wanted t
Łukasz Anforowicz 2015/12/02 03:11:47 Actually, I don't know how I managed to confuse my
bool shouldIgnoreAttribute(const Attribute&) override;
+ bool rewriteLink(const Element&, String& rewrittenLink) override;
+private:
tkent 2015/12/02 00:59:59 nit: add a blank line before |private:|
Łukasz Anforowicz 2015/12/02 02:57:08 Done.
+ HashMap<Frame*, String>* m_frameToContentID;
};
-
-MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate()
+MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
+ HashMap<Frame*, String>* frameToContentID)
+ : m_frameToContentID(frameToContentID)
{
}
@@ -82,12 +89,64 @@ bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu
return attribute.localName() == HTMLNames::srcsetAttr;
}
+bool MHTMLPageSerializerDelegate::rewriteLink(
+ const Element& element,
+ String& rewrittenLink)
+{
+ if (!element.isFrameOwnerElement())
+ return false;
+
+ auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
+ Frame* frame = frameOwnerElement->contentFrame();
+ if (!frame)
+ return false;
+
+ KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID->get(frame));
+ ASSERT(cidURI.isValid());
+
+ if (isHTMLFrameElementBase(&element)) {
+ rewrittenLink = cidURI.string();
+ return true;
+ }
+
+ if (isHTMLObjectElement(&element)) {
+ Document* doc = frameOwnerElement->contentDocument();
+ bool isHandledBySerializer = doc->isHTMLDocument()
+ || doc->isXHTMLDocument() || doc->isImageDocument();
+ if (isHandledBySerializer) {
+ rewrittenLink = cidURI.string();
+ return true;
+ }
+ }
+
+ return false;
+}
+
} // namespace
+static HashMap<Frame*, String> generateFrameContentIDs(Page* page)
tkent 2015/12/02 00:59:59 The function should be in anonymous namespace abov
Łukasz Anforowicz 2015/12/02 02:57:08 Done (for this and the next function). I am not s
+{
+ HashMap<Frame*, String> frameToContentID;
+ int frameID = 0;
+ for (Frame* frame = page->mainFrame(); frame; frame = frame->tree().traverseNext()) {
+ // TODO(lukasza): Move cid generation to the browser + use base/guid.h
+ // (see the draft at crrev.com/1386873003).
+ StringBuilder contentIDBuilder;
+ contentIDBuilder.appendLiteral("<frame");
+ contentIDBuilder.appendNumber(frameID++);
+ contentIDBuilder.appendLiteral("@mhtml.blink>");
+
+ frameToContentID.add(frame, contentIDBuilder.toString());
+ }
+ return frameToContentID;
+}
+
static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::EncodingPolicy encodingPolicy)
{
Vector<SerializedResource> resources;
- PageSerializer serializer(&resources, adoptPtr(new MHTMLPageSerializerDelegate));
+ HashMap<Frame*, String> frameToContentID = generateFrameContentIDs(page);
+ MHTMLPageSerializerDelegate delegate(&frameToContentID);
+ PageSerializer serializer(resources, &delegate);
RefPtr<SharedBuffer> output = SharedBuffer::create();
String boundary = MHTMLArchive::generateMHTMLBoundary();
@@ -105,9 +164,16 @@ static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::E
resources.clear();
serializer.serializeFrame(*toLocalFrame(frame));
- for (const auto& resource : resources) {
+ bool isFirstResource = true;
+ for (const SerializedResource& resource : resources) {
+ // Frame is the 1st resource (see PageSerializer::serializeFrame doc
+ // comment). Frames need a Content-ID header.
+ String contentID = isFirstResource ? frameToContentID.get(frame) : String();
+
MHTMLArchive::generateMHTMLPart(
- boundary, encodingPolicy, resource, *output);
+ boundary, contentID, encodingPolicy, resource, *output);
+
+ isFirstResource = false;
}
}

Powered by Google App Engine
This is Rietveld 408576698