Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Side by Side Diff: content/browser/download/mhtml_generation_manager.cc

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Rebasing... Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/mhtml_generation_manager.h" 5 #include "content/browser/download/mhtml_generation_manager.h"
6 6
7 #include <map> 7 #include <map>
8 #include <queue> 8 #include <queue>
9 #include <sstream> 9 #include <sstream>
10 10
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/files/file.h" 12 #include "base/files/file.h"
13 #include "base/guid.h" 13 #include "base/guid.h"
14 #include "base/scoped_observer.h" 14 #include "base/scoped_observer.h"
15 #include "base/stl_util.h" 15 #include "base/stl_util.h"
16 #include "content/browser/frame_host/frame_tree_node.h" 16 #include "content/browser/frame_host/frame_tree_node.h"
17 #include "content/common/frame_messages.h" 17 #include "content/common/frame_messages.h"
18 #include "content/public/browser/browser_thread.h" 18 #include "content/public/browser/browser_thread.h"
19 #include "content/public/browser/render_frame_host.h" 19 #include "content/public/browser/render_frame_host.h"
20 #include "content/public/browser/render_process_host.h" 20 #include "content/public/browser/render_process_host.h"
21 #include "content/public/browser/render_process_host_observer.h" 21 #include "content/public/browser/render_process_host_observer.h"
22 #include "content/public/browser/web_contents.h" 22 #include "content/public/browser/web_contents.h"
23 #include "crypto/sha2.h"
24 #include "url/gurl.h"
23 25
24 namespace content { 26 namespace content {
25 27
26 // The class and all of its members live on the UI thread. Only static methods 28 // The class and all of its members live on the UI thread. Only static methods
27 // are executed on other threads. 29 // are executed on other threads.
28 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { 30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
29 public: 31 public:
30 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); 32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback);
31 ~Job() override; 33 ~Job() override;
32 34
33 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } 35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); }
34 36
35 GenerateMHTMLCallback callback() const { return callback_; } 37 GenerateMHTMLCallback callback() const { return callback_; }
36 38
37 // Sends IPC to the renderer, asking for MHTML generation of the next frame. 39 // Sends IPC to the renderer of the first frame, asking it to start
38 // 40 // MHTML serialization.
39 // Returns true if the message was sent successfully; false otherwise. 41 // Returns |true| upon success; |false| otherwise.
40 // 42 bool SendToFirstFrame();
41 // See FrameMsg_SerializeAsMHTML IPC message for description of
42 // |mhtml_boundary_marker| parameter.
43 bool SendToNextRenderFrame(const std::string& mhtml_boundary_marker);
44 43
45 // Indicates if more calls to SendToNextRenderFrame are needed. 44 // Handler for ViewHostMsg_SavedPageAsMHTML (a notification from the renderer
45 // that the MHTML generation for previous frame has finished).
46 // Returns |true| upon success; |false| otherwise.
47 bool OnSavedPageAsMHTML(std::string mhtml_boundary_marker,
48 const std::set<GURL>& uris_of_generated_mhtml_parts);
49
50 // Indicates if the job has completed.
46 bool GotMoreFramesToProcess() { 51 bool GotMoreFramesToProcess() {
47 return !pending_frame_tree_node_ids_.empty(); 52 return !pending_frame_tree_node_ids_.empty();
48 } 53 }
49 54
50 // Close the file on the file thread and respond back on the UI thread with 55 // Close the file on the file thread and respond back on the UI thread with
51 // file size. 56 // file size.
52 void CloseFile(base::Callback<void(int64 file_size)> callback); 57 void CloseFile(base::Callback<void(int64 file_size)> callback);
53 58
54 // RenderProcessHostObserver: 59 // RenderProcessHostObserver:
55 void RenderProcessExited(RenderProcessHost* host, 60 void RenderProcessExited(RenderProcessHost* host,
56 base::TerminationStatus status, 61 base::TerminationStatus status,
57 int exit_code) override; 62 int exit_code) override;
58 void RenderProcessHostDestroyed(RenderProcessHost* host) override; 63 void RenderProcessHostDestroyed(RenderProcessHost* host) override;
59 64
60 private: 65 private:
61 static int64 CloseFileOnFileThread(base::File file); 66 static int64 CloseFileOnFileThread(base::File file);
62 void AddFrame(RenderFrameHost* render_frame_host); 67 void AddFrame(RenderFrameHost* render_frame_host);
63 68
64 // Translates |frame_tree_node_to_content_id_| into 69 // Translates |frame_tree_node_to_content_id_| into
65 // a |site_instance|-specific, routing-id-based map. 70 // a |site_instance|-specific, routing-id-based map.
66 std::map<int, std::string> CreateFrameRoutingIdToContentId( 71 std::map<int, std::string> CreateFrameRoutingIdToContentId(
67 SiteInstance* site_instance); 72 SiteInstance* site_instance);
68 73
74 // Sends IPC to the renderer, asking for MHTML generation of the next frame.
75 //
76 // Returns true if the message was sent successfully; false otherwise.
77 //
78 // See FrameMsg_SerializeAsMHTML IPC message for description of
79 // |mhtml_boundary_marker| parameter.
80 bool SendToNextRenderFrame(const std::string& mhtml_boundary_marker);
81
69 // Id used to map renderer responses to jobs. 82 // Id used to map renderer responses to jobs.
70 // See also MHTMLGenerationManager::id_to_job_ map. 83 // See also MHTMLGenerationManager::id_to_job_ map.
71 int job_id_; 84 int job_id_;
72 85
73 // The handle to the file the MHTML is saved to for the browser process. 86 // The handle to the file the MHTML is saved to for the browser process.
74 base::File browser_file_; 87 base::File browser_file_;
75 88
76 // The IDs of frames we still need to process. 89 // The IDs of frames we still need to process.
77 std::queue<int> pending_frame_tree_node_ids_; 90 std::queue<int> pending_frame_tree_node_ids_;
78 91
79 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts 92 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts
80 // for more details about what "content ids" are and how they are used). 93 // for more details about what "content ids" are and how they are used).
81 std::map<int, std::string> frame_tree_node_to_content_id_; 94 std::map<int, std::string> frame_tree_node_to_content_id_;
82 95
96 // URIs of already generated MHTML parts.
97 std::set<GURL> already_serialized_uris_;
98
83 // The callback to call once generation is complete. 99 // The callback to call once generation is complete.
84 GenerateMHTMLCallback callback_; 100 GenerateMHTMLCallback callback_;
85 101
86 // RAII helper for registering this Job as a RenderProcessHost observer. 102 // RAII helper for registering this Job as a RenderProcessHost observer.
87 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> 103 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job>
88 observed_renderer_process_host_; 104 observed_renderer_process_host_;
89 105
90 DISALLOW_COPY_AND_ASSIGN(Job); 106 DISALLOW_COPY_AND_ASSIGN(Job);
91 }; 107 };
92 108
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 result[routing_id] = content_id; 147 result[routing_id] = content_id;
132 } 148 }
133 return result; 149 return result;
134 } 150 }
135 151
136 bool MHTMLGenerationManager::Job::SendToNextRenderFrame( 152 bool MHTMLGenerationManager::Job::SendToNextRenderFrame(
137 const std::string& mhtml_boundary_marker) { 153 const std::string& mhtml_boundary_marker) {
138 DCHECK(browser_file_.IsValid()); 154 DCHECK(browser_file_.IsValid());
139 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); 155 DCHECK_LT(0u, pending_frame_tree_node_ids_.size());
140 156
157 FrameMsg_SerializeAsMHTML_Params ipc_params;
158 ipc_params.job_id = job_id_;
159 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker;
160
141 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); 161 int frame_tree_node_id = pending_frame_tree_node_ids_.front();
142 pending_frame_tree_node_ids_.pop(); 162 pending_frame_tree_node_ids_.pop();
143 bool is_last_frame = pending_frame_tree_node_ids_.empty(); 163 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty();
144 164
145 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); 165 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
146 if (!ftn) // The contents went away. 166 if (!ftn) // The contents went away.
147 return false; 167 return false;
148 RenderFrameHost* rfh = ftn->current_frame_host(); 168 RenderFrameHost* rfh = ftn->current_frame_host();
149 169
150 // Get notified if the target of the IPC message dies between responding. 170 // Get notified if the target of the IPC message dies between responding.
151 observed_renderer_process_host_.RemoveAll(); 171 observed_renderer_process_host_.RemoveAll();
152 observed_renderer_process_host_.Add(rfh->GetProcess()); 172 observed_renderer_process_host_.Add(rfh->GetProcess());
153 173
154 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( 174 // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
175 std::set<std::string> digests_of_uris_to_skip;
ncarter (slow) 2015/12/04 21:52:39 This is unused.
Łukasz Anforowicz 2015/12/04 22:55:43 Thanks for catching this. I must have missed this
176 ipc_params.salt = base::GenerateGUID();
177 for (const GURL& uri : already_serialized_uris_) {
178 std::string digest = crypto::SHA256HashString(ipc_params.salt + uri.spec());
179 ipc_params.digests_of_uris_to_skip.insert(digest);
180 }
181
182 ipc_params.destination_file = IPC::GetFileHandleForProcess(
155 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), 183 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(),
156 false); // |close_source_handle|. 184 false); // |close_source_handle|.
157 rfh->Send(new FrameMsg_SerializeAsMHTML( 185 ipc_params.frame_routing_id_to_content_id =
158 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker, 186 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance());
159 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); 187 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params));
160 return true; 188 return true;
161 } 189 }
162 190
163 void MHTMLGenerationManager::Job::RenderProcessExited( 191 void MHTMLGenerationManager::Job::RenderProcessExited(
164 RenderProcessHost* host, 192 RenderProcessHost* host,
165 base::TerminationStatus status, 193 base::TerminationStatus status,
166 int exit_code) { 194 int exit_code) {
167 DCHECK_CURRENTLY_ON(BrowserThread::UI); 195 DCHECK_CURRENTLY_ON(BrowserThread::UI);
168 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); 196 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this);
169 } 197 }
(...skipping 25 matching lines...) Expand all
195 return; 223 return;
196 } 224 }
197 225
198 BrowserThread::PostTaskAndReplyWithResult( 226 BrowserThread::PostTaskAndReplyWithResult(
199 BrowserThread::FILE, FROM_HERE, 227 BrowserThread::FILE, FROM_HERE,
200 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, 228 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread,
201 base::Passed(browser_file_.Pass())), 229 base::Passed(browser_file_.Pass())),
202 callback); 230 callback);
203 } 231 }
204 232
233 bool MHTMLGenerationManager::Job::SendToFirstFrame() {
234 // We don't yet have an mhtml boundary marker when serializing the 1st frame.
235 std::string initial_mhtml_boundary_marker = "";
236
237 return SendToNextRenderFrame(initial_mhtml_boundary_marker);
238 }
239
240 bool MHTMLGenerationManager::Job::OnSavedPageAsMHTML(
241 std::string mhtml_boundary_marker,
242 const std::set<GURL>& uris_of_generated_mhtml_parts) {
243 bool success = true;
244
245 already_serialized_uris_.insert(uris_of_generated_mhtml_parts.begin(),
246 uris_of_generated_mhtml_parts.end());
247
248 if (GotMoreFramesToProcess())
249 success = SendToNextRenderFrame(mhtml_boundary_marker);
250
251 return success;
252 }
253
205 // static 254 // static
206 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { 255 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) {
207 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 256 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
208 DCHECK(file.IsValid()); 257 DCHECK(file.IsValid());
209 int64 file_size = file.GetLength(); 258 int64 file_size = file.GetLength();
210 file.Close(); 259 file.Close();
211 return file_size; 260 return file_size;
212 } 261 }
213 262
214 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { 263 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() {
(...skipping 17 matching lines...) Expand all
232 BrowserThread::FILE, FROM_HERE, 281 BrowserThread::FILE, FROM_HERE,
233 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), 282 base::Bind(&MHTMLGenerationManager::CreateFile, file_path),
234 base::Bind(&MHTMLGenerationManager::OnFileAvailable, 283 base::Bind(&MHTMLGenerationManager::OnFileAvailable,
235 base::Unretained(this), // Safe b/c |this| is a singleton. 284 base::Unretained(this), // Safe b/c |this| is a singleton.
236 job_id)); 285 job_id));
237 } 286 }
238 287
239 void MHTMLGenerationManager::OnSavedPageAsMHTML( 288 void MHTMLGenerationManager::OnSavedPageAsMHTML(
240 int job_id, 289 int job_id,
241 bool mhtml_generation_in_renderer_succeeded, 290 bool mhtml_generation_in_renderer_succeeded,
242 const std::string& mhtml_boundary_marker) { 291 const std::string& mhtml_boundary_marker,
292 const std::set<GURL>& uris_of_generated_mhtml_parts) {
243 DCHECK_CURRENTLY_ON(BrowserThread::UI); 293 DCHECK_CURRENTLY_ON(BrowserThread::UI);
244 294
245 if (!mhtml_generation_in_renderer_succeeded) { 295 if (!mhtml_generation_in_renderer_succeeded) {
246 JobFinished(job_id, JobStatus::FAILURE); 296 JobFinished(job_id, JobStatus::FAILURE);
247 return; 297 return;
248 } 298 }
249 299
250 Job* job = FindJob(job_id); 300 Job* job = FindJob(job_id);
251 if (!job) 301 if (!job)
252 return; 302 return;
253 303
254 if (job->GotMoreFramesToProcess()) { 304 if (!job->GotMoreFramesToProcess()) {
255 if (!job->SendToNextRenderFrame(mhtml_boundary_marker)) { 305 JobFinished(job_id, JobStatus::SUCCESS);
256 JobFinished(job_id, JobStatus::FAILURE);
257 }
258 return; 306 return;
259 } 307 }
260 308
261 JobFinished(job_id, JobStatus::SUCCESS); 309 if (!job->OnSavedPageAsMHTML(mhtml_boundary_marker,
310 uris_of_generated_mhtml_parts)) {
311 JobFinished(job_id, JobStatus::FAILURE);
312 }
262 } 313 }
263 314
264 // static 315 // static
265 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { 316 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) {
266 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 317 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
267 318
268 // SECURITY NOTE: A file descriptor to the file created below will be passed 319 // SECURITY NOTE: A file descriptor to the file created below will be passed
269 // to multiple renderer processes which (in out-of-process iframes mode) can 320 // to multiple renderer processes which (in out-of-process iframes mode) can
270 // act on behalf of separate web principals. Therefore it is important to 321 // act on behalf of separate web principals. Therefore it is important to
271 // only allow writing to the file and forbid reading from the file (as this 322 // only allow writing to the file and forbid reading from the file (as this
(...skipping 18 matching lines...) Expand all
290 JobFinished(job_id, JobStatus::FAILURE); 341 JobFinished(job_id, JobStatus::FAILURE);
291 return; 342 return;
292 } 343 }
293 344
294 Job* job = FindJob(job_id); 345 Job* job = FindJob(job_id);
295 if (!job) 346 if (!job)
296 return; 347 return;
297 348
298 job->set_browser_file(browser_file.Pass()); 349 job->set_browser_file(browser_file.Pass());
299 350
300 // We don't yet have an mhtml boundary marker when serializing the 1st frame. 351 if (!job->SendToFirstFrame()) {
301 std::string initial_mhtml_boundary_marker = "";
302 if (!job->SendToNextRenderFrame(initial_mhtml_boundary_marker)) {
303 JobFinished(job_id, JobStatus::FAILURE); 352 JobFinished(job_id, JobStatus::FAILURE);
304 } 353 }
305 } 354 }
306 355
307 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { 356 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) {
308 DCHECK_CURRENTLY_ON(BrowserThread::UI); 357 DCHECK_CURRENTLY_ON(BrowserThread::UI);
309 358
310 Job* job = FindJob(job_id); 359 Job* job = FindJob(job_id);
311 if (!job) 360 if (!job)
312 return; 361 return;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
359 ++it) { 408 ++it) {
360 if (it->second == job) { 409 if (it->second == job) {
361 JobFinished(it->first, JobStatus::FAILURE); 410 JobFinished(it->first, JobStatus::FAILURE);
362 return; 411 return;
363 } 412 }
364 } 413 }
365 NOTREACHED(); 414 NOTREACHED();
366 } 415 }
367 416
368 } // namespace content 417 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698