Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: third_party/WebKit/Source/web/WebPageSerializer.cpp

Issue 1441553002: Generating CIDs in Blink during MHTML serialization. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-per-frame-page-serializer-only
Patch Set: Rebasing... Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2009 Google Inc. All rights reserved. 2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 27 matching lines...) Expand all
38 #include "core/html/HTMLAllCollection.h" 38 #include "core/html/HTMLAllCollection.h"
39 #include "core/html/HTMLFrameElementBase.h" 39 #include "core/html/HTMLFrameElementBase.h"
40 #include "core/html/HTMLFrameOwnerElement.h" 40 #include "core/html/HTMLFrameOwnerElement.h"
41 #include "core/html/HTMLInputElement.h" 41 #include "core/html/HTMLInputElement.h"
42 #include "core/html/HTMLTableElement.h" 42 #include "core/html/HTMLTableElement.h"
43 #include "core/loader/DocumentLoader.h" 43 #include "core/loader/DocumentLoader.h"
44 #include "core/page/Page.h" 44 #include "core/page/Page.h"
45 #include "core/page/PageSerializer.h" 45 #include "core/page/PageSerializer.h"
46 #include "platform/SerializedResource.h" 46 #include "platform/SerializedResource.h"
47 #include "platform/mhtml/MHTMLArchive.h" 47 #include "platform/mhtml/MHTMLArchive.h"
48 #include "platform/mhtml/MHTMLParser.h"
48 #include "platform/weborigin/KURL.h" 49 #include "platform/weborigin/KURL.h"
49 #include "public/platform/WebCString.h" 50 #include "public/platform/WebCString.h"
50 #include "public/platform/WebString.h" 51 #include "public/platform/WebString.h"
51 #include "public/platform/WebURL.h" 52 #include "public/platform/WebURL.h"
52 #include "public/platform/WebVector.h" 53 #include "public/platform/WebVector.h"
53 #include "public/web/WebFrame.h" 54 #include "public/web/WebFrame.h"
54 #include "public/web/WebPageSerializerClient.h" 55 #include "public/web/WebPageSerializerClient.h"
55 #include "public/web/WebView.h" 56 #include "public/web/WebView.h"
56 #include "web/WebLocalFrameImpl.h" 57 #include "web/WebLocalFrameImpl.h"
57 #include "web/WebPageSerializerImpl.h" 58 #include "web/WebPageSerializerImpl.h"
58 #include "web/WebViewImpl.h" 59 #include "web/WebViewImpl.h"
60 #include "wtf/Assertions.h"
61 #include "wtf/HashMap.h"
62 #include "wtf/Noncopyable.h"
59 #include "wtf/Vector.h" 63 #include "wtf/Vector.h"
60 #include "wtf/text/StringConcatenate.h" 64 #include "wtf/text/StringConcatenate.h"
61 65
62 namespace blink { 66 namespace blink {
63 67
64 namespace { 68 namespace {
65 69
70 using ContentIDMap = WillBeHeapHashMap<RawPtrWillBeMember<Frame>, String>;
71
66 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate { 72 class MHTMLPageSerializerDelegate final : public PageSerializer::Delegate {
73 WTF_MAKE_NONCOPYABLE(MHTMLPageSerializerDelegate);
67 public: 74 public:
68 ~MHTMLPageSerializerDelegate() override; 75 MHTMLPageSerializerDelegate(const ContentIDMap& frameToContentID);
69 bool shouldIgnoreAttribute(const Attribute&) override; 76 bool shouldIgnoreAttribute(const Attribute&) override;
77 bool rewriteLink(const Element&, String& rewrittenLink) override;
78
79 private:
80 const ContentIDMap& m_frameToContentID;
70 }; 81 };
71 82
72 83 MHTMLPageSerializerDelegate::MHTMLPageSerializerDelegate(
73 MHTMLPageSerializerDelegate::~MHTMLPageSerializerDelegate() 84 const ContentIDMap& frameToContentID)
85 : m_frameToContentID(frameToContentID)
74 { 86 {
75 } 87 }
76 88
77 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te) 89 bool MHTMLPageSerializerDelegate::shouldIgnoreAttribute(const Attribute& attribu te)
78 { 90 {
79 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src 91 // TODO(fgorski): Presence of srcset attribute causes MHTML to not display i mages, as only the value of src
80 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML 92 // is pulled into the archive. Discarding srcset prevents the problem. Long term we should make sure to MHTML
81 // plays nicely with srcset. 93 // plays nicely with srcset.
82 return attribute.localName() == HTMLNames::srcsetAttr; 94 return attribute.localName() == HTMLNames::srcsetAttr;
83 } 95 }
84 96
85 } // namespace 97 bool MHTMLPageSerializerDelegate::rewriteLink(
98 const Element& element,
99 String& rewrittenLink)
100 {
101 if (!element.isFrameOwnerElement())
102 return false;
86 103
87 static PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::E ncodingPolicy encodingPolicy) 104 auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
105 Frame* frame = frameOwnerElement->contentFrame();
106 if (!frame)
107 return false;
108
109 KURL cidURI = MHTMLParser::convertContentIDToURI(m_frameToContentID.get(fram e));
110 ASSERT(cidURI.isValid());
111
112 if (isHTMLFrameElementBase(&element)) {
113 rewrittenLink = cidURI.string();
114 return true;
115 }
116
117 if (isHTMLObjectElement(&element)) {
118 Document* doc = frameOwnerElement->contentDocument();
119 bool isHandledBySerializer = doc->isHTMLDocument()
120 || doc->isXHTMLDocument() || doc->isImageDocument();
121 if (isHandledBySerializer) {
122 rewrittenLink = cidURI.string();
123 return true;
124 }
125 }
126
127 return false;
128 }
129
130 ContentIDMap generateFrameContentIDs(Page* page)
131 {
132 ContentIDMap frameToContentID;
133 int frameID = 0;
134 for (Frame* frame = page->mainFrame(); frame; frame = frame->tree().traverse Next()) {
135 // TODO(lukasza): Move cid generation to the browser + use base/guid.h
136 // (see the draft at crrev.com/1386873003).
137 StringBuilder contentIDBuilder;
138 contentIDBuilder.appendLiteral("<frame");
139 contentIDBuilder.appendNumber(frameID++);
140 contentIDBuilder.appendLiteral("@mhtml.blink>");
141
142 frameToContentID.add(frame, contentIDBuilder.toString());
143 }
144 return frameToContentID;
145 }
146
147 PassRefPtr<SharedBuffer> serializePageToMHTML(Page* page, MHTMLArchive::Encoding Policy encodingPolicy)
88 { 148 {
89 Vector<SerializedResource> resources; 149 Vector<SerializedResource> resources;
90 PageSerializer serializer(&resources, adoptPtr(new MHTMLPageSerializerDelega te)); 150 ContentIDMap frameToContentID = generateFrameContentIDs(page);
151 MHTMLPageSerializerDelegate delegate(frameToContentID);
152 PageSerializer serializer(resources, &delegate);
91 153
92 RefPtr<SharedBuffer> output = SharedBuffer::create(); 154 RefPtr<SharedBuffer> output = SharedBuffer::create();
93 String boundary = MHTMLArchive::generateMHTMLBoundary(); 155 String boundary = MHTMLArchive::generateMHTMLBoundary();
94 156
95 Document* document = page->deprecatedLocalMainFrame()->document(); 157 Document* document = page->deprecatedLocalMainFrame()->document();
96 MHTMLArchive::generateMHTMLHeader( 158 MHTMLArchive::generateMHTMLHeader(
97 boundary, document->title(), document->suggestedMIMEType(), *output); 159 boundary, document->title(), document->suggestedMIMEType(), *output);
98 160
99 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) { 161 for (Frame* frame = page->deprecatedLocalMainFrame(); frame; frame = frame-> tree().traverseNext()) {
100 // TODO(lukasza): This causes incomplete MHTML for OOPIFs. 162 // TODO(lukasza): This causes incomplete MHTML for OOPIFs.
101 // (crbug.com/538766) 163 // (crbug.com/538766)
102 if (!frame->isLocalFrame()) 164 if (!frame->isLocalFrame())
103 continue; 165 continue;
104 166
105 resources.clear(); 167 resources.clear();
106 serializer.serializeFrame(*toLocalFrame(frame)); 168 serializer.serializeFrame(*toLocalFrame(frame));
107 169
108 for (const auto& resource : resources) { 170 bool isFirstResource = true;
171 for (const SerializedResource& resource : resources) {
172 // Frame is the 1st resource (see PageSerializer::serializeFrame doc
173 // comment). Frames need a Content-ID header.
174 String contentID = isFirstResource ? frameToContentID.get(frame) : S tring();
175
109 MHTMLArchive::generateMHTMLPart( 176 MHTMLArchive::generateMHTMLPart(
110 boundary, encodingPolicy, resource, *output); 177 boundary, contentID, encodingPolicy, resource, *output);
178
179 isFirstResource = false;
111 } 180 }
112 } 181 }
113 182
114 MHTMLArchive::generateMHTMLFooter(boundary, *output); 183 MHTMLArchive::generateMHTMLFooter(boundary, *output);
115 return output.release(); 184 return output.release();
116 } 185 }
117 186
187 } // namespace
188
118 WebCString WebPageSerializer::serializeToMHTML(WebView* view) 189 WebCString WebPageSerializer::serializeToMHTML(WebView* view)
119 { 190 {
120 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding); 191 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseDefaultEncoding);
121 // FIXME: we are copying all the data here. Idealy we would have a WebShared Data(). 192 // FIXME: we are copying all the data here. Idealy we would have a WebShared Data().
122 return WebCString(mhtml->data(), mhtml->size()); 193 return WebCString(mhtml->data(), mhtml->size());
123 } 194 }
124 195
125 WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view) 196 WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
126 { 197 {
127 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseBinaryEncoding); 198 RefPtr<SharedBuffer> mhtml = serializePageToMHTML(toWebViewImpl(view)->page( ), MHTMLArchive::UseBinaryEncoding);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get) 230 WebString WebPageSerializer::generateBaseTagDeclaration(const WebString& baseTar get)
160 { 231 {
161 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|. 232 // TODO(yosin) We should call |PageSerializer::baseTagDeclarationOf()|.
162 if (baseTarget.isEmpty()) 233 if (baseTarget.isEmpty())
163 return String("<base href=\".\">"); 234 return String("<base href=\".\">");
164 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">"; 235 String baseString = "<base href=\".\" target=\"" + static_cast<const String& >(baseTarget) + "\">";
165 return baseString; 236 return baseString;
166 } 237 }
167 238
168 } // namespace blink 239 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698