Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Side by Side Diff: content/browser/download/mhtml_generation_manager.cc

Issue 1417323006: OOPIFs: Deduplicating MHTML parts across frames. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@mhtml-serialization-per-frame
Patch Set: Addressed CR feedback from rdsmith@. Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/mhtml_generation_manager.h" 5 #include "content/browser/download/mhtml_generation_manager.h"
6 6
7 #include <map> 7 #include <map>
8 #include <queue> 8 #include <queue>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/files/file.h" 11 #include "base/files/file.h"
12 #include "base/guid.h" 12 #include "base/guid.h"
13 #include "base/rand_util.h" 13 #include "base/rand_util.h"
14 #include "base/scoped_observer.h" 14 #include "base/scoped_observer.h"
15 #include "base/stl_util.h" 15 #include "base/stl_util.h"
16 #include "base/strings/string_number_conversions.h" 16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/stringprintf.h" 17 #include "base/strings/stringprintf.h"
18 #include "content/browser/frame_host/frame_tree_node.h" 18 #include "content/browser/frame_host/frame_tree_node.h"
19 #include "content/common/frame_messages.h" 19 #include "content/common/frame_messages.h"
20 #include "content/public/browser/browser_thread.h" 20 #include "content/public/browser/browser_thread.h"
21 #include "content/public/browser/render_frame_host.h" 21 #include "content/public/browser/render_frame_host.h"
22 #include "content/public/browser/render_process_host.h" 22 #include "content/public/browser/render_process_host.h"
23 #include "content/public/browser/render_process_host_observer.h" 23 #include "content/public/browser/render_process_host_observer.h"
24 #include "content/public/browser/web_contents.h" 24 #include "content/public/browser/web_contents.h"
25 #include "url/gurl.h"
25 26
26 namespace content { 27 namespace content {
27 28
28 // The class and all of its members live on the UI thread. Only static methods 29 // The class and all of its members live on the UI thread. Only static methods
29 // are executed on other threads. 30 // are executed on other threads.
30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { 31 class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
31 public: 32 public:
32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); 33 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback);
33 ~Job() override; 34 ~Job() override;
34 35
35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } 36 void set_browser_file(base::File file) { browser_file_ = file.Pass(); }
36 37
37 GenerateMHTMLCallback callback() const { return callback_; } 38 GenerateMHTMLCallback callback() const { return callback_; }
38 39
40 // Handler for FrameHostMsg_SerializeAsMHTMLResponse (a notification from the
41 // renderer that the MHTML generation for previous frame has finished).
42 // Returns |true| upon success; |false| otherwise.
43 bool OnSerializeAsMHTMLResponse(
44 int frame_tree_node_id_of_sending_frame,
45 const std::set<std::string>& digests_of_uris_of_serialized_resources);
46
39 // Sends IPC to the renderer, asking for MHTML generation of the next frame. 47 // Sends IPC to the renderer, asking for MHTML generation of the next frame.
40 // 48 //
41 // Returns true if the message was sent successfully; false otherwise. 49 // Returns true if the message was sent successfully; false otherwise.
42 bool SendToNextRenderFrame(); 50 bool SendToNextRenderFrame();
43 51
44 // Indicates if more calls to SendToNextRenderFrame are needed. 52 // Indicates if more calls to SendToNextRenderFrame are needed.
45 bool HasMoreFramesToProcess() const { 53 bool IsDone() const {
46 return !pending_frame_tree_node_ids_.empty(); 54 bool waiting_for_response_from_renderer =
55 frame_tree_node_id_of_busy_frame_ !=
56 FrameTreeNode::kFrameTreeNodeInvalidId;
57 bool no_more_requests_to_send = pending_frame_tree_node_ids_.empty();
58 bool done = !waiting_for_response_from_renderer && no_more_requests_to_send;
nasko 2015/12/30 19:22:26 Why not "return !b1 && b2;"?
Łukasz Anforowicz 2015/12/30 19:47:10 Done. RE: Why: Desire to spell "done" one more t
59 return done;
47 } 60 }
48 61
49 // Close the file on the file thread and respond back on the UI thread with 62 // Close the file on the file thread and respond back on the UI thread with
50 // file size. 63 // file size.
51 void CloseFile(base::Callback<void(int64 file_size)> callback); 64 void CloseFile(base::Callback<void(int64 file_size)> callback);
52 65
53 // RenderProcessHostObserver: 66 // RenderProcessHostObserver:
54 void RenderProcessExited(RenderProcessHost* host, 67 void RenderProcessExited(RenderProcessHost* host,
55 base::TerminationStatus status, 68 base::TerminationStatus status,
56 int exit_code) override; 69 int exit_code) override;
57 void RenderProcessHostDestroyed(RenderProcessHost* host) override; 70 void RenderProcessHostDestroyed(RenderProcessHost* host) override;
58 71
59 private: 72 private:
60 static std::string GenerateMHTMLBoundaryMarker(); 73 static std::string GenerateMHTMLBoundaryMarker();
61 static int64 CloseFileOnFileThread(base::File file); 74 static int64 CloseFileOnFileThread(base::File file);
62 void AddFrame(RenderFrameHost* render_frame_host); 75 void AddFrame(RenderFrameHost* render_frame_host);
63 76
64 // Creates a new map with values (content ids) the same as in 77 // Creates a new map with values (content ids) the same as in
65 // |frame_tree_node_to_content_id_| map, but with the keys translated from 78 // |frame_tree_node_to_content_id_| map, but with the keys translated from
66 // frame_tree_node_id into a |site_instance|-specific routing_id. 79 // frame_tree_node_id into a |site_instance|-specific routing_id.
67 std::map<int, std::string> CreateFrameRoutingIdToContentId( 80 std::map<int, std::string> CreateFrameRoutingIdToContentId(
68 SiteInstance* site_instance); 81 SiteInstance* site_instance);
69 82
70 // Id used to map renderer responses to jobs. 83 // Id used to map renderer responses to jobs.
71 // See also MHTMLGenerationManager::id_to_job_ map. 84 // See also MHTMLGenerationManager::id_to_job_ map.
72 int job_id_; 85 int job_id_;
73 86
87 // The IDs of frames we still need to process.
Łukasz Anforowicz 2015/12/30 19:47:10 I tackled one of "we"-s in the comments, but didn'
88 std::queue<int> pending_frame_tree_node_ids_;
89
90 // Identifies a frame to which we've sent FrameMsg_SerializeAsMHTML but for
91 // which we didn't yet process FrameHostMsg_SerializeAsMHTMLResponse via
92 // OnSerializeAsMHTMLResponse.
93 int frame_tree_node_id_of_busy_frame_;
94
74 // The handle to the file the MHTML is saved to for the browser process. 95 // The handle to the file the MHTML is saved to for the browser process.
75 base::File browser_file_; 96 base::File browser_file_;
76 97
77 // The IDs of frames we still need to process.
78 std::queue<int> pending_frame_tree_node_ids_;
79
80 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts 98 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts
81 // for more details about what "content ids" are and how they are used). 99 // for more details about what "content ids" are and how they are used).
82 std::map<int, std::string> frame_tree_node_to_content_id_; 100 std::map<int, std::string> frame_tree_node_to_content_id_;
83 101
84 // MIME multipart boundary to use in the MHTML doc. 102 // MIME multipart boundary to use in the MHTML doc.
85 std::string mhtml_boundary_marker_; 103 std::string mhtml_boundary_marker_;
86 104
105 // Digests of URIs of already generated MHTML parts.
106 std::set<std::string> digests_of_already_serialized_uris_;
107 std::string salt_;
108
87 // The callback to call once generation is complete. 109 // The callback to call once generation is complete.
88 GenerateMHTMLCallback callback_; 110 GenerateMHTMLCallback callback_;
89 111
90 // RAII helper for registering this Job as a RenderProcessHost observer. 112 // RAII helper for registering this Job as a RenderProcessHost observer.
91 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> 113 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job>
92 observed_renderer_process_host_; 114 observed_renderer_process_host_;
93 115
94 DISALLOW_COPY_AND_ASSIGN(Job); 116 DISALLOW_COPY_AND_ASSIGN(Job);
95 }; 117 };
96 118
97 MHTMLGenerationManager::Job::Job(int job_id, 119 MHTMLGenerationManager::Job::Job(int job_id,
98 WebContents* web_contents, 120 WebContents* web_contents,
99 GenerateMHTMLCallback callback) 121 GenerateMHTMLCallback callback)
100 : job_id_(job_id), 122 : job_id_(job_id),
123 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId),
101 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()), 124 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()),
125 salt_(base::GenerateGUID()),
102 callback_(callback), 126 callback_(callback),
103 observed_renderer_process_host_(this) { 127 observed_renderer_process_host_(this) {
104 DCHECK_CURRENTLY_ON(BrowserThread::UI); 128 DCHECK_CURRENTLY_ON(BrowserThread::UI);
105 web_contents->ForEachFrame(base::Bind( 129 web_contents->ForEachFrame(base::Bind(
106 &MHTMLGenerationManager::Job::AddFrame, 130 &MHTMLGenerationManager::Job::AddFrame,
107 base::Unretained(this))); // Safe because ForEachFrame is synchronous. 131 base::Unretained(this))); // Safe because ForEachFrame is synchronous.
108 132
109 // Main frame needs to be processed first. 133 // Main frame needs to be processed first.
110 DCHECK(!pending_frame_tree_node_ids_.empty()); 134 DCHECK(!pending_frame_tree_node_ids_.empty());
111 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) 135 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front())
(...skipping 23 matching lines...) Expand all
135 159
136 result[routing_id] = content_id; 160 result[routing_id] = content_id;
137 } 161 }
138 return result; 162 return result;
139 } 163 }
140 164
141 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { 165 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() {
142 DCHECK(browser_file_.IsValid()); 166 DCHECK(browser_file_.IsValid());
143 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); 167 DCHECK_LT(0u, pending_frame_tree_node_ids_.size());
144 168
169 FrameMsg_SerializeAsMHTML_Params ipc_params;
170 ipc_params.job_id = job_id_;
171 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_;
172
145 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); 173 int frame_tree_node_id = pending_frame_tree_node_ids_.front();
146 pending_frame_tree_node_ids_.pop(); 174 pending_frame_tree_node_ids_.pop();
147 bool is_last_frame = pending_frame_tree_node_ids_.empty(); 175 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty();
148 176
149 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); 177 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id);
150 if (!ftn) // The contents went away. 178 if (!ftn) // The contents went away.
151 return false; 179 return false;
152 RenderFrameHost* rfh = ftn->current_frame_host(); 180 RenderFrameHost* rfh = ftn->current_frame_host();
153 181
154 // Get notified if the target of the IPC message dies between responding. 182 // Get notified if the target of the IPC message dies between responding.
155 observed_renderer_process_host_.RemoveAll(); 183 observed_renderer_process_host_.RemoveAll();
156 observed_renderer_process_host_.Add(rfh->GetProcess()); 184 observed_renderer_process_host_.Add(rfh->GetProcess());
157 185
158 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( 186 // Tell the renderer to skip (= deduplicate) already covered MHTML parts.
187 ipc_params.salt = salt_;
188 ipc_params.digests_of_uris_to_skip = digests_of_already_serialized_uris_;
189
190 ipc_params.destination_file = IPC::GetFileHandleForProcess(
159 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), 191 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(),
160 false); // |close_source_handle|. 192 false); // |close_source_handle|.
161 rfh->Send(new FrameMsg_SerializeAsMHTML( 193 ipc_params.frame_routing_id_to_content_id =
162 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, 194 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance());
163 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); 195
196 // Send the IPC asking the renderer to serialize the frame.
197 DCHECK_EQ(FrameTreeNode::kFrameTreeNodeInvalidId,
198 frame_tree_node_id_of_busy_frame_);
199 frame_tree_node_id_of_busy_frame_ = frame_tree_node_id;
200 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params));
164 return true; 201 return true;
165 } 202 }
166 203
167 void MHTMLGenerationManager::Job::RenderProcessExited( 204 void MHTMLGenerationManager::Job::RenderProcessExited(
168 RenderProcessHost* host, 205 RenderProcessHost* host,
169 base::TerminationStatus status, 206 base::TerminationStatus status,
170 int exit_code) { 207 int exit_code) {
171 DCHECK_CURRENTLY_ON(BrowserThread::UI); 208 DCHECK_CURRENTLY_ON(BrowserThread::UI);
172 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); 209 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this);
173 } 210 }
(...skipping 24 matching lines...) Expand all
198 return; 235 return;
199 } 236 }
200 237
201 BrowserThread::PostTaskAndReplyWithResult( 238 BrowserThread::PostTaskAndReplyWithResult(
202 BrowserThread::FILE, FROM_HERE, 239 BrowserThread::FILE, FROM_HERE,
203 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, 240 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread,
204 base::Passed(browser_file_.Pass())), 241 base::Passed(browser_file_.Pass())),
205 callback); 242 callback);
206 } 243 }
207 244
245 bool MHTMLGenerationManager::Job::OnSerializeAsMHTMLResponse(
246 int frame_tree_node_id_of_sending_frame,
247 const std::set<std::string>& digests_of_uris_of_serialized_resources) {
248 // Sanitize renderer input / reject unexpected messages.
249 if (frame_tree_node_id_of_sending_frame != frame_tree_node_id_of_busy_frame_)
250 return false; // Report failure.
251 frame_tree_node_id_of_busy_frame_ = FrameTreeNode::kFrameTreeNodeInvalidId;
252
253 // Renderer should be deduping resources with the same uris.
254 DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>(
255 digests_of_already_serialized_uris_,
256 digests_of_uris_of_serialized_resources).size());
257 digests_of_already_serialized_uris_.insert(
258 digests_of_uris_of_serialized_resources.begin(),
259 digests_of_uris_of_serialized_resources.end());
260
261 if (pending_frame_tree_node_ids_.empty())
262 return true; // Report success.
263
264 return SendToNextRenderFrame();
265 }
266
208 // static 267 // static
209 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { 268 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) {
210 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 269 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
211 DCHECK(file.IsValid()); 270 DCHECK(file.IsValid());
212 int64 file_size = file.GetLength(); 271 int64 file_size = file.GetLength();
213 file.Close(); 272 file.Close();
214 return file_size; 273 return file_size;
215 } 274 }
216 275
217 // static 276 // static
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
251 int job_id = NewJob(web_contents, callback); 310 int job_id = NewJob(web_contents, callback);
252 311
253 BrowserThread::PostTaskAndReplyWithResult( 312 BrowserThread::PostTaskAndReplyWithResult(
254 BrowserThread::FILE, FROM_HERE, 313 BrowserThread::FILE, FROM_HERE,
255 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), 314 base::Bind(&MHTMLGenerationManager::CreateFile, file_path),
256 base::Bind(&MHTMLGenerationManager::OnFileAvailable, 315 base::Bind(&MHTMLGenerationManager::OnFileAvailable,
257 base::Unretained(this), // Safe b/c |this| is a singleton. 316 base::Unretained(this), // Safe b/c |this| is a singleton.
258 job_id)); 317 job_id));
259 } 318 }
260 319
261 void MHTMLGenerationManager::OnSavedFrameAsMHTML( 320 void MHTMLGenerationManager::OnSerializeAsMHTMLResponse(
321 int frame_tree_node_id_of_sending_frame,
262 int job_id, 322 int job_id,
263 bool mhtml_generation_in_renderer_succeeded) { 323 bool mhtml_generation_in_renderer_succeeded,
324 const std::set<std::string>& digests_of_uris_of_serialized_resources) {
264 DCHECK_CURRENTLY_ON(BrowserThread::UI); 325 DCHECK_CURRENTLY_ON(BrowserThread::UI);
265 326
266 if (!mhtml_generation_in_renderer_succeeded) { 327 if (!mhtml_generation_in_renderer_succeeded) {
267 JobFinished(job_id, JobStatus::FAILURE); 328 JobFinished(job_id, JobStatus::FAILURE);
268 return; 329 return;
269 } 330 }
270 331
271 Job* job = FindJob(job_id); 332 Job* job = FindJob(job_id);
272 if (!job) 333 if (!job)
273 return; 334 return;
274 335
275 if (job->HasMoreFramesToProcess()) { 336 if (!job->OnSerializeAsMHTMLResponse(
276 if (!job->SendToNextRenderFrame()) { 337 frame_tree_node_id_of_sending_frame,
277 JobFinished(job_id, JobStatus::FAILURE); 338 digests_of_uris_of_serialized_resources)) {
278 } 339 JobFinished(job_id, JobStatus::FAILURE);
279 return; 340 return;
280 } 341 }
281 342
282 JobFinished(job_id, JobStatus::SUCCESS); 343 if (job->IsDone())
344 JobFinished(job_id, JobStatus::SUCCESS);
283 } 345 }
284 346
285 // static 347 // static
286 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { 348 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) {
287 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 349 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
288 350
289 // SECURITY NOTE: A file descriptor to the file created below will be passed 351 // SECURITY NOTE: A file descriptor to the file created below will be passed
290 // to multiple renderer processes which (in out-of-process iframes mode) can 352 // to multiple renderer processes which (in out-of-process iframes mode) can
291 // act on behalf of separate web principals. Therefore it is important to 353 // act on behalf of separate web principals. Therefore it is important to
292 // only allow writing to the file and forbid reading from the file (as this 354 // only allow writing to the file and forbid reading from the file (as this
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
378 ++it) { 440 ++it) {
379 if (it->second == job) { 441 if (it->second == job) {
380 JobFinished(it->first, JobStatus::FAILURE); 442 JobFinished(it->first, JobStatus::FAILURE);
381 return; 443 return;
382 } 444 }
383 } 445 }
384 NOTREACHED(); 446 NOTREACHED();
385 } 447 }
386 448
387 } // namespace content 449 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698