Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1151)

Side by Side Diff: content/browser/download/mhtml_generation_manager.cc

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Rebasing... Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/mhtml_generation_manager.h" 5 #include "content/browser/download/mhtml_generation_manager.h"
6 6
7 #include <map> 7 #include <map>
8 #include <queue> 8 #include <queue>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/files/file.h" 11 #include "base/files/file.h"
12 #include "base/guid.h" 12 #include "base/guid.h"
13 #include "base/rand_util.h" 13 #include "base/rand_util.h"
14 #include "base/scoped_observer.h" 14 #include "base/scoped_observer.h"
15 #include "base/stl_util.h" 15 #include "base/stl_util.h"
16 #include "base/strings/string_number_conversions.h" 16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/stringprintf.h" 17 #include "base/strings/stringprintf.h"
18 #include "content/browser/frame_host/frame_tree_node.h" 18 #include "content/browser/frame_host/frame_tree_node.h"
19 #include "content/common/frame_messages.h" 19 #include "content/common/frame_messages.h"
20 #include "content/public/browser/browser_thread.h" 20 #include "content/public/browser/browser_thread.h"
21 #include "content/public/browser/render_frame_host.h" 21 #include "content/public/browser/render_frame_host.h"
22 #include "content/public/browser/render_process_host.h" 22 #include "content/public/browser/render_process_host.h"
23 #include "content/public/browser/render_process_host_observer.h" 23 #include "content/public/browser/render_process_host_observer.h"
24 #include "content/public/browser/web_contents.h" 24 #include "content/public/browser/web_contents.h"
25 #include "crypto/sha2.h"
26 #include "url/gurl.h"
25 27
26 namespace content { 28 namespace content {
27 29
28 // The class and all of its members live on the UI thread. Only static methods 30 // The class and all of its members live on the UI thread. Only static methods
29 // are executed on other threads. 31 // are executed on other threads.
30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { 32 class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
31 public: 33 public:
32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); 34 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback);
33 ~Job() override; 35 ~Job() override;
34 36
35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } 37 void set_browser_file(base::File file) { browser_file_ = file.Pass(); }
36 38
37 GenerateMHTMLCallback callback() const { return callback_; } 39 GenerateMHTMLCallback callback() const { return callback_; }
38 40
41 // Handler for ViewHostMsg_SavedPageAsMHTML (a notification from the renderer
42 // that the MHTML generation for previous frame has finished).
43 // Returns |true| upon success; |false| otherwise.
44 bool OnSavedPageAsMHTML(const std::set<GURL>& uris_of_generated_mhtml_parts);
45
39 // Sends IPC to the renderer, asking for MHTML generation of the next frame. 46 // Sends IPC to the renderer, asking for MHTML generation of the next frame.
40 // 47 //
41 // Returns true if the message was sent successfully; false otherwise. 48 // Returns true if the message was sent successfully; false otherwise.
42 bool SendToNextRenderFrame(); 49 bool SendToNextRenderFrame();
43 50
44 // Indicates if more calls to SendToNextRenderFrame are needed. 51 // Indicates if more calls to SendToNextRenderFrame are needed.
45 bool HasMoreFramesToProcess() const { 52 bool HasMoreFramesToProcess() const {
46 return !pending_frame_tree_node_ids_.empty(); 53 return !pending_frame_tree_node_ids_.empty();
47 } 54 }
48 55
(...skipping 27 matching lines...) Expand all
76 // The IDs of frames we still need to process. 83 // The IDs of frames we still need to process.
77 std::queue<int> pending_frame_tree_node_ids_; 84 std::queue<int> pending_frame_tree_node_ids_;
78 85
79 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts 86 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts
80 // for more details about what "content ids" are and how they are used). 87 // for more details about what "content ids" are and how they are used).
81 std::map<int, std::string> frame_tree_node_to_content_id_; 88 std::map<int, std::string> frame_tree_node_to_content_id_;
82 89
83 // MIME multipart boundary to use in the MHTML doc. 90 // MIME multipart boundary to use in the MHTML doc.
84 std::string mhtml_boundary_marker_; 91 std::string mhtml_boundary_marker_;
85 92
93 // URIs of already generated MHTML parts.
94 std::set<GURL> already_serialized_uris_;
95
86 // The callback to call once generation is complete. 96 // The callback to call once generation is complete.
87 GenerateMHTMLCallback callback_; 97 GenerateMHTMLCallback callback_;
88 98
89 // RAII helper for registering this Job as a RenderProcessHost observer. 99 // RAII helper for registering this Job as a RenderProcessHost observer.
90 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> 100 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job>
91 observed_renderer_process_host_; 101 observed_renderer_process_host_;
92 102
93 DISALLOW_COPY_AND_ASSIGN(Job); 103 DISALLOW_COPY_AND_ASSIGN(Job);
94 }; 104 };
95 105
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 144
135 result[routing_id] = content_id; 145 result[routing_id] = content_id;
136 } 146 }
137 return result; 147 return result;
138 } 148 }
139 149
140 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { 150 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() {
141 DCHECK(browser_file_.IsValid()); 151 DCHECK(browser_file_.IsValid());
142 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); 152 DCHECK_LT(0u, pending_frame_tree_node_ids_.size());
143 153
154 FrameMsg_SerializeAsMHTML_Params ipc_params;
155 ipc_params.job_id = job_id_;
156 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_;
157
144 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); 158 int frame_tree_node_id = pending_frame_tree_node_ids_.front();
145 pending_frame_tree_node_ids_.pop(); 159 pending_frame_tree_node_ids_.pop();
146 bool is_last_frame = pending_frame_tree_node_ids_.empty(); 160 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty();
147 161
148 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); 162 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
149 if (!ftn) // The contents went away. 163 if (!ftn) // The contents went away.
150 return false; 164 return false;
151 RenderFrameHost* rfh = ftn->current_frame_host(); 165 RenderFrameHost* rfh = ftn->current_frame_host();
152 166
153 // Get notified if the target of the IPC message dies between responding. 167 // Get notified if the target of the IPC message dies between responding.
154 observed_renderer_process_host_.RemoveAll(); 168 observed_renderer_process_host_.RemoveAll();
155 observed_renderer_process_host_.Add(rfh->GetProcess()); 169 observed_renderer_process_host_.Add(rfh->GetProcess());
156 170
157 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( 171 // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
172 ipc_params.salt = base::GenerateGUID();
173 for (const GURL& uri : already_serialized_uris_) {
174 std::string digest = crypto::SHA256HashString(ipc_params.salt + uri.spec());
dcheng 2015/12/11 07:36:05 Maybe already_serialized_uris should just be pre-h
Łukasz Anforowicz 2015/12/14 19:39:02 Yes, good point. Done.
175 ipc_params.digests_of_uris_to_skip.insert(digest);
176 }
177
178 ipc_params.destination_file = IPC::GetFileHandleForProcess(
158 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), 179 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(),
159 false); // |close_source_handle|. 180 false); // |close_source_handle|.
160 rfh->Send(new FrameMsg_SerializeAsMHTML( 181 ipc_params.frame_routing_id_to_content_id =
161 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, 182 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance());
162 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); 183 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params));
163 return true; 184 return true;
164 } 185 }
165 186
166 void MHTMLGenerationManager::Job::RenderProcessExited( 187 void MHTMLGenerationManager::Job::RenderProcessExited(
167 RenderProcessHost* host, 188 RenderProcessHost* host,
168 base::TerminationStatus status, 189 base::TerminationStatus status,
169 int exit_code) { 190 int exit_code) {
170 DCHECK_CURRENTLY_ON(BrowserThread::UI); 191 DCHECK_CURRENTLY_ON(BrowserThread::UI);
171 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); 192 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this);
172 } 193 }
(...skipping 24 matching lines...) Expand all
197 return; 218 return;
198 } 219 }
199 220
200 BrowserThread::PostTaskAndReplyWithResult( 221 BrowserThread::PostTaskAndReplyWithResult(
201 BrowserThread::FILE, FROM_HERE, 222 BrowserThread::FILE, FROM_HERE,
202 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, 223 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread,
203 base::Passed(browser_file_.Pass())), 224 base::Passed(browser_file_.Pass())),
204 callback); 225 callback);
205 } 226 }
206 227
228 bool MHTMLGenerationManager::Job::OnSavedPageAsMHTML(
229 const std::set<GURL>& uris_of_generated_mhtml_parts) {
230 bool success = true;
dcheng 2015/12/11 07:36:05 Remove this.
Łukasz Anforowicz 2015/12/14 19:39:02 Done.
231
232 already_serialized_uris_.insert(uris_of_generated_mhtml_parts.begin(),
233 uris_of_generated_mhtml_parts.end());
234
235 if (HasMoreFramesToProcess())
236 success = SendToNextRenderFrame();
dcheng 2015/12/11 07:36:05 And return this directly.
Łukasz Anforowicz 2015/12/14 19:39:02 Done.
237
238 return success;
dcheng 2015/12/11 07:36:05 And return true here. It might be useful in the fu
Łukasz Anforowicz 2015/12/14 19:39:02 Yes - this is fair feedback. It does read better
239 }
240
207 // static 241 // static
208 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { 242 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) {
209 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 243 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
210 DCHECK(file.IsValid()); 244 DCHECK(file.IsValid());
211 int64 file_size = file.GetLength(); 245 int64 file_size = file.GetLength();
212 file.Close(); 246 file.Close();
213 return file_size; 247 return file_size;
214 } 248 }
215 249
216 // static 250 // static
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
252 BrowserThread::PostTaskAndReplyWithResult( 286 BrowserThread::PostTaskAndReplyWithResult(
253 BrowserThread::FILE, FROM_HERE, 287 BrowserThread::FILE, FROM_HERE,
254 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), 288 base::Bind(&MHTMLGenerationManager::CreateFile, file_path),
255 base::Bind(&MHTMLGenerationManager::OnFileAvailable, 289 base::Bind(&MHTMLGenerationManager::OnFileAvailable,
256 base::Unretained(this), // Safe b/c |this| is a singleton. 290 base::Unretained(this), // Safe b/c |this| is a singleton.
257 job_id)); 291 job_id));
258 } 292 }
259 293
260 void MHTMLGenerationManager::OnSavedPageAsMHTML( 294 void MHTMLGenerationManager::OnSavedPageAsMHTML(
261 int job_id, 295 int job_id,
262 bool mhtml_generation_in_renderer_succeeded) { 296 bool mhtml_generation_in_renderer_succeeded,
297 const std::set<GURL>& uris_of_generated_mhtml_parts) {
263 DCHECK_CURRENTLY_ON(BrowserThread::UI); 298 DCHECK_CURRENTLY_ON(BrowserThread::UI);
264 299
265 if (!mhtml_generation_in_renderer_succeeded) { 300 if (!mhtml_generation_in_renderer_succeeded) {
266 JobFinished(job_id, JobStatus::FAILURE); 301 JobFinished(job_id, JobStatus::FAILURE);
267 return; 302 return;
268 } 303 }
269 304
270 Job* job = FindJob(job_id); 305 Job* job = FindJob(job_id);
271 if (!job) 306 if (!job)
272 return; 307 return;
273 308
274 if (job->HasMoreFramesToProcess()) { 309 if (!job->HasMoreFramesToProcess()) {
275 if (!job->SendToNextRenderFrame()) { 310 JobFinished(job_id, JobStatus::SUCCESS);
276 JobFinished(job_id, JobStatus::FAILURE);
277 }
278 return; 311 return;
279 } 312 }
280 313
281 JobFinished(job_id, JobStatus::SUCCESS); 314 if (!job->OnSavedPageAsMHTML(uris_of_generated_mhtml_parts)) {
315 JobFinished(job_id, JobStatus::FAILURE);
316 }
282 } 317 }
283 318
284 // static 319 // static
285 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { 320 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) {
286 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 321 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
287 322
288 // SECURITY NOTE: A file descriptor to the file created below will be passed 323 // SECURITY NOTE: A file descriptor to the file created below will be passed
289 // to multiple renderer processes which (in out-of-process iframes mode) can 324 // to multiple renderer processes which (in out-of-process iframes mode) can
290 // act on behalf of separate web principals. Therefore it is important to 325 // act on behalf of separate web principals. Therefore it is important to
291 // only allow writing to the file and forbid reading from the file (as this 326 // only allow writing to the file and forbid reading from the file (as this
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
377 ++it) { 412 ++it) {
378 if (it->second == job) { 413 if (it->second == job) {
379 JobFinished(it->first, JobStatus::FAILURE); 414 JobFinished(it->first, JobStatus::FAILURE);
380 return; 415 return;
381 } 416 }
382 } 417 }
383 NOTREACHED(); 418 NOTREACHED();
384 } 419 }
385 420
386 } // namespace content 421 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698