Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2118)

Side by Side Diff: content/browser/download/mhtml_generation_manager.cc

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Rebasing... Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/mhtml_generation_manager.h" 5 #include "content/browser/download/mhtml_generation_manager.h"
6 6
7 #include <map> 7 #include <map>
8 #include <queue> 8 #include <queue>
9 #include <utility> 9 #include <utility>
10 10
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/files/file.h" 12 #include "base/files/file.h"
13 #include "base/guid.h" 13 #include "base/guid.h"
14 #include "base/macros.h" 14 #include "base/macros.h"
15 #include "base/rand_util.h" 15 #include "base/rand_util.h"
16 #include "base/scoped_observer.h" 16 #include "base/scoped_observer.h"
17 #include "base/stl_util.h" 17 #include "base/stl_util.h"
18 #include "base/strings/string_number_conversions.h" 18 #include "base/strings/string_number_conversions.h"
19 #include "base/strings/stringprintf.h" 19 #include "base/strings/stringprintf.h"
20 #include "content/browser/frame_host/frame_tree_node.h" 20 #include "content/browser/frame_host/frame_tree_node.h"
21 #include "content/browser/frame_host/render_frame_host_impl.h"
21 #include "content/common/frame_messages.h" 22 #include "content/common/frame_messages.h"
22 #include "content/public/browser/browser_thread.h" 23 #include "content/public/browser/browser_thread.h"
23 #include "content/public/browser/render_frame_host.h" 24 #include "content/public/browser/render_frame_host.h"
24 #include "content/public/browser/render_process_host.h" 25 #include "content/public/browser/render_process_host.h"
25 #include "content/public/browser/render_process_host_observer.h" 26 #include "content/public/browser/render_process_host_observer.h"
26 #include "content/public/browser/web_contents.h" 27 #include "content/public/browser/web_contents.h"
28 #include "url/gurl.h"
27 29
28 namespace content { 30 namespace content {
29 31
30 // The class and all of its members live on the UI thread. Only static methods 32 // The class and all of its members live on the UI thread. Only static methods
31 // are executed on other threads. 33 // are executed on other threads.
32 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { 34 class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
33 public: 35 public:
34 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); 36 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback);
35 ~Job() override; 37 ~Job() override;
36 38
37 void set_browser_file(base::File file) { browser_file_ = std::move(file); } 39 void set_browser_file(base::File file) { browser_file_ = std::move(file); }
38 40
39 GenerateMHTMLCallback callback() const { return callback_; } 41 GenerateMHTMLCallback callback() const { return callback_; }
40 42
43 // Handler for FrameHostMsg_SerializeAsMHTMLResponse (a notification from the
44 // renderer that the MHTML generation for previous frame has finished).
45 // Returns |true| upon success; |false| otherwise.
46 bool OnSerializeAsMHTMLResponse(
47 RenderFrameHostImpl* sender,
48 const std::set<std::string>& digests_of_uris_of_serialized_resources);
49
41 // Sends IPC to the renderer, asking for MHTML generation of the next frame. 50 // Sends IPC to the renderer, asking for MHTML generation of the next frame.
42 // 51 //
43 // Returns true if the message was sent successfully; false otherwise. 52 // Returns true if the message was sent successfully; false otherwise.
44 bool SendToNextRenderFrame(); 53 bool SendToNextRenderFrame();
45 54
46 // Indicates if more calls to SendToNextRenderFrame are needed. 55 // Indicates if more calls to SendToNextRenderFrame are needed.
47 bool HasMoreFramesToProcess() const { 56 bool IsDone() const {
48 return !pending_frame_tree_node_ids_.empty(); 57 bool waiting_for_response_from_renderer =
58 frame_tree_node_id_of_busy_frame_ !=
59 FrameTreeNode::kFrameTreeNodeInvalidId;
60 bool no_more_requests_to_send = pending_frame_tree_node_ids_.empty();
61 return !waiting_for_response_from_renderer && no_more_requests_to_send;
49 } 62 }
50 63
51 // Close the file on the file thread and respond back on the UI thread with 64 // Close the file on the file thread and respond back on the UI thread with
52 // file size. 65 // file size.
53 void CloseFile(base::Callback<void(int64_t file_size)> callback); 66 void CloseFile(base::Callback<void(int64_t file_size)> callback);
54 67
55 // RenderProcessHostObserver: 68 // RenderProcessHostObserver:
56 void RenderProcessExited(RenderProcessHost* host, 69 void RenderProcessExited(RenderProcessHost* host,
57 base::TerminationStatus status, 70 base::TerminationStatus status,
58 int exit_code) override; 71 int exit_code) override;
59 void RenderProcessHostDestroyed(RenderProcessHost* host) override; 72 void RenderProcessHostDestroyed(RenderProcessHost* host) override;
60 73
61 private: 74 private:
62 static std::string GenerateMHTMLBoundaryMarker(); 75 static std::string GenerateMHTMLBoundaryMarker();
63 static int64_t CloseFileOnFileThread(base::File file); 76 static int64_t CloseFileOnFileThread(base::File file);
64 void AddFrame(RenderFrameHost* render_frame_host); 77 void AddFrame(RenderFrameHost* render_frame_host);
65 78
66 // Creates a new map with values (content ids) the same as in 79 // Creates a new map with values (content ids) the same as in
67 // |frame_tree_node_to_content_id_| map, but with the keys translated from 80 // |frame_tree_node_to_content_id_| map, but with the keys translated from
68 // frame_tree_node_id into a |site_instance|-specific routing_id. 81 // frame_tree_node_id into a |site_instance|-specific routing_id.
69 std::map<int, std::string> CreateFrameRoutingIdToContentId( 82 std::map<int, std::string> CreateFrameRoutingIdToContentId(
70 SiteInstance* site_instance); 83 SiteInstance* site_instance);
71 84
72 // Id used to map renderer responses to jobs. 85 // Id used to map renderer responses to jobs.
73 // See also MHTMLGenerationManager::id_to_job_ map. 86 // See also MHTMLGenerationManager::id_to_job_ map.
74 int job_id_; 87 int job_id_;
75 88
89 // The IDs of frames that still need to be processed.
90 std::queue<int> pending_frame_tree_node_ids_;
91
92 // Identifies a frame to which we've sent FrameMsg_SerializeAsMHTML but for
93 // which we didn't yet process FrameHostMsg_SerializeAsMHTMLResponse via
94 // OnSerializeAsMHTMLResponse.
95 int frame_tree_node_id_of_busy_frame_;
96
76 // The handle to the file the MHTML is saved to for the browser process. 97 // The handle to the file the MHTML is saved to for the browser process.
77 base::File browser_file_; 98 base::File browser_file_;
78 99
79 // The IDs of frames we still need to process.
80 std::queue<int> pending_frame_tree_node_ids_;
81
82 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts 100 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts
83 // for more details about what "content ids" are and how they are used). 101 // for more details about what "content ids" are and how they are used).
84 std::map<int, std::string> frame_tree_node_to_content_id_; 102 std::map<int, std::string> frame_tree_node_to_content_id_;
85 103
86 // MIME multipart boundary to use in the MHTML doc. 104 // MIME multipart boundary to use in the MHTML doc.
87 std::string mhtml_boundary_marker_; 105 std::string mhtml_boundary_marker_;
88 106
107 // Digests of URIs of already generated MHTML parts.
108 std::set<std::string> digests_of_already_serialized_uris_;
109 std::string salt_;
110
89 // The callback to call once generation is complete. 111 // The callback to call once generation is complete.
90 GenerateMHTMLCallback callback_; 112 GenerateMHTMLCallback callback_;
91 113
92 // RAII helper for registering this Job as a RenderProcessHost observer. 114 // RAII helper for registering this Job as a RenderProcessHost observer.
93 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> 115 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job>
94 observed_renderer_process_host_; 116 observed_renderer_process_host_;
95 117
96 DISALLOW_COPY_AND_ASSIGN(Job); 118 DISALLOW_COPY_AND_ASSIGN(Job);
97 }; 119 };
98 120
99 MHTMLGenerationManager::Job::Job(int job_id, 121 MHTMLGenerationManager::Job::Job(int job_id,
100 WebContents* web_contents, 122 WebContents* web_contents,
101 GenerateMHTMLCallback callback) 123 GenerateMHTMLCallback callback)
102 : job_id_(job_id), 124 : job_id_(job_id),
125 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId),
103 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()), 126 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()),
127 salt_(base::GenerateGUID()),
104 callback_(callback), 128 callback_(callback),
105 observed_renderer_process_host_(this) { 129 observed_renderer_process_host_(this) {
106 DCHECK_CURRENTLY_ON(BrowserThread::UI); 130 DCHECK_CURRENTLY_ON(BrowserThread::UI);
107 web_contents->ForEachFrame(base::Bind( 131 web_contents->ForEachFrame(base::Bind(
108 &MHTMLGenerationManager::Job::AddFrame, 132 &MHTMLGenerationManager::Job::AddFrame,
109 base::Unretained(this))); // Safe because ForEachFrame is synchronous. 133 base::Unretained(this))); // Safe because ForEachFrame is synchronous.
110 134
111 // Main frame needs to be processed first. 135 // Main frame needs to be processed first.
112 DCHECK(!pending_frame_tree_node_ids_.empty()); 136 DCHECK(!pending_frame_tree_node_ids_.empty());
113 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) 137 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front())
(...skipping 23 matching lines...) Expand all
137 161
138 result[routing_id] = content_id; 162 result[routing_id] = content_id;
139 } 163 }
140 return result; 164 return result;
141 } 165 }
142 166
143 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { 167 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() {
144 DCHECK(browser_file_.IsValid()); 168 DCHECK(browser_file_.IsValid());
145 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); 169 DCHECK_LT(0u, pending_frame_tree_node_ids_.size());
146 170
171 FrameMsg_SerializeAsMHTML_Params ipc_params;
172 ipc_params.job_id = job_id_;
173 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_;
174
147 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); 175 int frame_tree_node_id = pending_frame_tree_node_ids_.front();
148 pending_frame_tree_node_ids_.pop(); 176 pending_frame_tree_node_ids_.pop();
149 bool is_last_frame = pending_frame_tree_node_ids_.empty(); 177 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty();
150 178
151 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); 179 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
152 if (!ftn) // The contents went away. 180 if (!ftn) // The contents went away.
153 return false; 181 return false;
154 RenderFrameHost* rfh = ftn->current_frame_host(); 182 RenderFrameHost* rfh = ftn->current_frame_host();
155 183
156 // Get notified if the target of the IPC message dies between responding. 184 // Get notified if the target of the IPC message dies between responding.
157 observed_renderer_process_host_.RemoveAll(); 185 observed_renderer_process_host_.RemoveAll();
158 observed_renderer_process_host_.Add(rfh->GetProcess()); 186 observed_renderer_process_host_.Add(rfh->GetProcess());
159 187
160 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( 188 // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
189 ipc_params.salt = salt_;
190 ipc_params.digests_of_uris_to_skip = digests_of_already_serialized_uris_;
191
192 ipc_params.destination_file = IPC::GetFileHandleForProcess(
161 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), 193 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(),
162 false); // |close_source_handle|. 194 false); // |close_source_handle|.
163 rfh->Send(new FrameMsg_SerializeAsMHTML( 195 ipc_params.frame_routing_id_to_content_id =
164 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, 196 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance());
165 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); 197
198 // Send the IPC asking the renderer to serialize the frame.
199 DCHECK_EQ(FrameTreeNode::kFrameTreeNodeInvalidId,
200 frame_tree_node_id_of_busy_frame_);
201 frame_tree_node_id_of_busy_frame_ = frame_tree_node_id;
202 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params));
166 return true; 203 return true;
167 } 204 }
168 205
169 void MHTMLGenerationManager::Job::RenderProcessExited( 206 void MHTMLGenerationManager::Job::RenderProcessExited(
170 RenderProcessHost* host, 207 RenderProcessHost* host,
171 base::TerminationStatus status, 208 base::TerminationStatus status,
172 int exit_code) { 209 int exit_code) {
173 DCHECK_CURRENTLY_ON(BrowserThread::UI); 210 DCHECK_CURRENTLY_ON(BrowserThread::UI);
174 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); 211 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this);
175 } 212 }
(...skipping 24 matching lines...) Expand all
200 return; 237 return;
201 } 238 }
202 239
203 BrowserThread::PostTaskAndReplyWithResult( 240 BrowserThread::PostTaskAndReplyWithResult(
204 BrowserThread::FILE, FROM_HERE, 241 BrowserThread::FILE, FROM_HERE,
205 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, 242 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread,
206 base::Passed(std::move(browser_file_))), 243 base::Passed(std::move(browser_file_))),
207 callback); 244 callback);
208 } 245 }
209 246
247 bool MHTMLGenerationManager::Job::OnSerializeAsMHTMLResponse(
248 RenderFrameHostImpl* sender,
249 const std::set<std::string>& digests_of_uris_of_serialized_resources) {
250 // Sanitize renderer input / reject unexpected messages.
251 int sender_id = sender->frame_tree_node()->frame_tree_node_id();
252 if (sender_id != frame_tree_node_id_of_busy_frame_) {
253 NOTREACHED();
254 return false; // Report failure.
255 }
256 frame_tree_node_id_of_busy_frame_ = FrameTreeNode::kFrameTreeNodeInvalidId;
257
258 // Renderer should be deduping resources with the same uris.
259 DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>(
260 digests_of_already_serialized_uris_,
261 digests_of_uris_of_serialized_resources).size());
262 digests_of_already_serialized_uris_.insert(
263 digests_of_uris_of_serialized_resources.begin(),
264 digests_of_uris_of_serialized_resources.end());
265
266 if (pending_frame_tree_node_ids_.empty())
267 return true; // Report success.
268
269 return SendToNextRenderFrame();
270 }
271
210 // static 272 // static
211 int64_t MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { 273 int64_t MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) {
212 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 274 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
213 DCHECK(file.IsValid()); 275 DCHECK(file.IsValid());
214 int64_t file_size = file.GetLength(); 276 int64_t file_size = file.GetLength();
215 file.Close(); 277 file.Close();
216 return file_size; 278 return file_size;
217 } 279 }
218 280
219 // static 281 // static
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 int job_id = NewJob(web_contents, callback); 315 int job_id = NewJob(web_contents, callback);
254 316
255 BrowserThread::PostTaskAndReplyWithResult( 317 BrowserThread::PostTaskAndReplyWithResult(
256 BrowserThread::FILE, FROM_HERE, 318 BrowserThread::FILE, FROM_HERE,
257 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), 319 base::Bind(&MHTMLGenerationManager::CreateFile, file_path),
258 base::Bind(&MHTMLGenerationManager::OnFileAvailable, 320 base::Bind(&MHTMLGenerationManager::OnFileAvailable,
259 base::Unretained(this), // Safe b/c |this| is a singleton. 321 base::Unretained(this), // Safe b/c |this| is a singleton.
260 job_id)); 322 job_id));
261 } 323 }
262 324
263 void MHTMLGenerationManager::OnSavedFrameAsMHTML( 325 void MHTMLGenerationManager::OnSerializeAsMHTMLResponse(
326 RenderFrameHostImpl* sender,
264 int job_id, 327 int job_id,
265 bool mhtml_generation_in_renderer_succeeded) { 328 bool mhtml_generation_in_renderer_succeeded,
329 const std::set<std::string>& digests_of_uris_of_serialized_resources) {
266 DCHECK_CURRENTLY_ON(BrowserThread::UI); 330 DCHECK_CURRENTLY_ON(BrowserThread::UI);
267 331
268 if (!mhtml_generation_in_renderer_succeeded) { 332 if (!mhtml_generation_in_renderer_succeeded) {
269 JobFinished(job_id, JobStatus::FAILURE); 333 JobFinished(job_id, JobStatus::FAILURE);
270 return; 334 return;
271 } 335 }
272 336
273 Job* job = FindJob(job_id); 337 Job* job = FindJob(job_id);
274 if (!job) 338 if (!job)
275 return; 339 return;
276 340
277 if (job->HasMoreFramesToProcess()) { 341 if (!job->OnSerializeAsMHTMLResponse(
278 if (!job->SendToNextRenderFrame()) { 342 sender, digests_of_uris_of_serialized_resources)) {
279 JobFinished(job_id, JobStatus::FAILURE); 343 JobFinished(job_id, JobStatus::FAILURE);
280 }
281 return; 344 return;
282 } 345 }
283 346
284 JobFinished(job_id, JobStatus::SUCCESS); 347 if (job->IsDone())
348 JobFinished(job_id, JobStatus::SUCCESS);
285 } 349 }
286 350
287 // static 351 // static
288 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { 352 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) {
289 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 353 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
290 354
291 // SECURITY NOTE: A file descriptor to the file created below will be passed 355 // SECURITY NOTE: A file descriptor to the file created below will be passed
292 // to multiple renderer processes which (in out-of-process iframes mode) can 356 // to multiple renderer processes which (in out-of-process iframes mode) can
293 // act on behalf of separate web principals. Therefore it is important to 357 // act on behalf of separate web principals. Therefore it is important to
294 // only allow writing to the file and forbid reading from the file (as this 358 // only allow writing to the file and forbid reading from the file (as this
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
380 ++it) { 444 ++it) {
381 if (it->second == job) { 445 if (it->second == job) {
382 JobFinished(it->first, JobStatus::FAILURE); 446 JobFinished(it->first, JobStatus::FAILURE);
383 return; 447 return;
384 } 448 }
385 } 449 }
386 NOTREACHED(); 450 NOTREACHED();
387 } 451 }
388 452
389 } // namespace content 453 } // namespace content
OLDNEW
« no previous file with comments | « content/browser/download/mhtml_generation_manager.h ('k') | content/browser/frame_host/render_frame_host_impl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698