OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/download/mhtml_generation_manager.h" | 5 #include "content/browser/download/mhtml_generation_manager.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <queue> | 8 #include <queue> |
9 | 9 |
10 #include "base/bind.h" | 10 #include "base/bind.h" |
11 #include "base/files/file.h" | 11 #include "base/files/file.h" |
12 #include "base/guid.h" | 12 #include "base/guid.h" |
13 #include "base/rand_util.h" | 13 #include "base/rand_util.h" |
14 #include "base/scoped_observer.h" | 14 #include "base/scoped_observer.h" |
15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
16 #include "base/strings/string_number_conversions.h" | 16 #include "base/strings/string_number_conversions.h" |
17 #include "base/strings/stringprintf.h" | 17 #include "base/strings/stringprintf.h" |
18 #include "content/browser/frame_host/frame_tree_node.h" | 18 #include "content/browser/frame_host/frame_tree_node.h" |
19 #include "content/common/frame_messages.h" | 19 #include "content/common/frame_messages.h" |
20 #include "content/public/browser/browser_thread.h" | 20 #include "content/public/browser/browser_thread.h" |
21 #include "content/public/browser/render_frame_host.h" | 21 #include "content/public/browser/render_frame_host.h" |
22 #include "content/public/browser/render_process_host.h" | 22 #include "content/public/browser/render_process_host.h" |
23 #include "content/public/browser/render_process_host_observer.h" | 23 #include "content/public/browser/render_process_host_observer.h" |
24 #include "content/public/browser/web_contents.h" | 24 #include "content/public/browser/web_contents.h" |
25 #include "crypto/sha2.h" | |
26 #include "url/gurl.h" | |
25 | 27 |
26 namespace content { | 28 namespace content { |
27 | 29 |
28 // The class and all of its members live on the UI thread. Only static methods | 30 // The class and all of its members live on the UI thread. Only static methods |
29 // are executed on other threads. | 31 // are executed on other threads. |
30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { | 32 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
31 public: | 33 public: |
32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); | 34 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); |
33 ~Job() override; | 35 ~Job() override; |
34 | 36 |
35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } | 37 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } |
36 | 38 |
37 GenerateMHTMLCallback callback() const { return callback_; } | 39 GenerateMHTMLCallback callback() const { return callback_; } |
38 | 40 |
41 // Handler for ViewHostMsg_SavedPageAsMHTML (a notification from the renderer | |
42 // that the MHTML generation for previous frame has finished). | |
43 // Returns |true| upon success; |false| otherwise. | |
44 bool OnSavedPageAsMHTML(const std::set<GURL>& uris_of_generated_mhtml_parts); | |
45 | |
39 // Sends IPC to the renderer, asking for MHTML generation of the next frame. | 46 // Sends IPC to the renderer, asking for MHTML generation of the next frame. |
40 // | 47 // |
41 // Returns true if the message was sent successfully; false otherwise. | 48 // Returns true if the message was sent successfully; false otherwise. |
42 bool SendToNextRenderFrame(); | 49 bool SendToNextRenderFrame(); |
43 | 50 |
44 // Indicates if more calls to SendToNextRenderFrame are needed. | 51 // Indicates if more calls to SendToNextRenderFrame are needed. |
45 bool HasMoreFramesToProcess() const { | 52 bool HasMoreFramesToProcess() const { |
46 return !pending_frame_tree_node_ids_.empty(); | 53 return !pending_frame_tree_node_ids_.empty(); |
47 } | 54 } |
48 | 55 |
(...skipping 27 matching lines...) Expand all Loading... | |
76 // The IDs of frames we still need to process. | 83 // The IDs of frames we still need to process. |
77 std::queue<int> pending_frame_tree_node_ids_; | 84 std::queue<int> pending_frame_tree_node_ids_; |
78 | 85 |
79 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts | 86 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts |
80 // for more details about what "content ids" are and how they are used). | 87 // for more details about what "content ids" are and how they are used). |
81 std::map<int, std::string> frame_tree_node_to_content_id_; | 88 std::map<int, std::string> frame_tree_node_to_content_id_; |
82 | 89 |
83 // MIME multipart boundary to use in the MHTML doc. | 90 // MIME multipart boundary to use in the MHTML doc. |
84 std::string mhtml_boundary_marker_; | 91 std::string mhtml_boundary_marker_; |
85 | 92 |
93 // URIs of already generated MHTML parts. | |
94 std::set<GURL> already_serialized_uris_; | |
95 | |
86 // The callback to call once generation is complete. | 96 // The callback to call once generation is complete. |
87 GenerateMHTMLCallback callback_; | 97 GenerateMHTMLCallback callback_; |
88 | 98 |
89 // RAII helper for registering this Job as a RenderProcessHost observer. | 99 // RAII helper for registering this Job as a RenderProcessHost observer. |
90 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> | 100 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> |
91 observed_renderer_process_host_; | 101 observed_renderer_process_host_; |
92 | 102 |
93 DISALLOW_COPY_AND_ASSIGN(Job); | 103 DISALLOW_COPY_AND_ASSIGN(Job); |
94 }; | 104 }; |
95 | 105 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
134 | 144 |
135 result[routing_id] = content_id; | 145 result[routing_id] = content_id; |
136 } | 146 } |
137 return result; | 147 return result; |
138 } | 148 } |
139 | 149 |
140 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { | 150 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { |
141 DCHECK(browser_file_.IsValid()); | 151 DCHECK(browser_file_.IsValid()); |
142 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); | 152 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); |
143 | 153 |
154 FrameMsg_SerializeAsMHTML_Params ipc_params; | |
155 ipc_params.job_id = job_id_; | |
156 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_; | |
157 | |
144 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); | 158 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
145 pending_frame_tree_node_ids_.pop(); | 159 pending_frame_tree_node_ids_.pop(); |
146 bool is_last_frame = pending_frame_tree_node_ids_.empty(); | 160 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty(); |
147 | 161 |
148 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); | 162 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
149 if (!ftn) // The contents went away. | 163 if (!ftn) // The contents went away. |
150 return false; | 164 return false; |
151 RenderFrameHost* rfh = ftn->current_frame_host(); | 165 RenderFrameHost* rfh = ftn->current_frame_host(); |
152 | 166 |
153 // Get notified if the target of the IPC message dies between responding. | 167 // Get notified if the target of the IPC message dies between responding. |
154 observed_renderer_process_host_.RemoveAll(); | 168 observed_renderer_process_host_.RemoveAll(); |
155 observed_renderer_process_host_.Add(rfh->GetProcess()); | 169 observed_renderer_process_host_.Add(rfh->GetProcess()); |
156 | 170 |
157 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( | 171 // Tell the renderer to skip (= deduplicate) already covered MHTML parts. |
172 ipc_params.salt = base::GenerateGUID(); | |
173 for (const GURL& uri : already_serialized_uris_) { | |
174 std::string digest = crypto::SHA256HashString(ipc_params.salt + uri.spec()); | |
dcheng
2015/12/11 07:36:05
Maybe already_serialized_uris should just be pre-h
Łukasz Anforowicz
2015/12/14 19:39:02
Yes, good point. Done.
| |
175 ipc_params.digests_of_uris_to_skip.insert(digest); | |
176 } | |
177 | |
178 ipc_params.destination_file = IPC::GetFileHandleForProcess( | |
158 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), | 179 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
159 false); // |close_source_handle|. | 180 false); // |close_source_handle|. |
160 rfh->Send(new FrameMsg_SerializeAsMHTML( | 181 ipc_params.frame_routing_id_to_content_id = |
161 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, | 182 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()); |
162 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); | 183 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params)); |
163 return true; | 184 return true; |
164 } | 185 } |
165 | 186 |
166 void MHTMLGenerationManager::Job::RenderProcessExited( | 187 void MHTMLGenerationManager::Job::RenderProcessExited( |
167 RenderProcessHost* host, | 188 RenderProcessHost* host, |
168 base::TerminationStatus status, | 189 base::TerminationStatus status, |
169 int exit_code) { | 190 int exit_code) { |
170 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 191 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
171 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); | 192 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
172 } | 193 } |
(...skipping 24 matching lines...) Expand all Loading... | |
197 return; | 218 return; |
198 } | 219 } |
199 | 220 |
200 BrowserThread::PostTaskAndReplyWithResult( | 221 BrowserThread::PostTaskAndReplyWithResult( |
201 BrowserThread::FILE, FROM_HERE, | 222 BrowserThread::FILE, FROM_HERE, |
202 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, | 223 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, |
203 base::Passed(browser_file_.Pass())), | 224 base::Passed(browser_file_.Pass())), |
204 callback); | 225 callback); |
205 } | 226 } |
206 | 227 |
228 bool MHTMLGenerationManager::Job::OnSavedPageAsMHTML( | |
229 const std::set<GURL>& uris_of_generated_mhtml_parts) { | |
230 bool success = true; | |
dcheng
2015/12/11 07:36:05
Remove this.
Łukasz Anforowicz
2015/12/14 19:39:02
Done.
| |
231 | |
232 already_serialized_uris_.insert(uris_of_generated_mhtml_parts.begin(), | |
233 uris_of_generated_mhtml_parts.end()); | |
234 | |
235 if (HasMoreFramesToProcess()) | |
236 success = SendToNextRenderFrame(); | |
dcheng
2015/12/11 07:36:05
And return this directly.
Łukasz Anforowicz
2015/12/14 19:39:02
Done.
| |
237 | |
238 return success; | |
dcheng
2015/12/11 07:36:05
And return true here. It might be useful in the fu
Łukasz Anforowicz
2015/12/14 19:39:02
Yes - this is fair feedback. It does read better
| |
239 } | |
240 | |
207 // static | 241 // static |
208 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { | 242 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
209 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 243 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
210 DCHECK(file.IsValid()); | 244 DCHECK(file.IsValid()); |
211 int64 file_size = file.GetLength(); | 245 int64 file_size = file.GetLength(); |
212 file.Close(); | 246 file.Close(); |
213 return file_size; | 247 return file_size; |
214 } | 248 } |
215 | 249 |
216 // static | 250 // static |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
252 BrowserThread::PostTaskAndReplyWithResult( | 286 BrowserThread::PostTaskAndReplyWithResult( |
253 BrowserThread::FILE, FROM_HERE, | 287 BrowserThread::FILE, FROM_HERE, |
254 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), | 288 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), |
255 base::Bind(&MHTMLGenerationManager::OnFileAvailable, | 289 base::Bind(&MHTMLGenerationManager::OnFileAvailable, |
256 base::Unretained(this), // Safe b/c |this| is a singleton. | 290 base::Unretained(this), // Safe b/c |this| is a singleton. |
257 job_id)); | 291 job_id)); |
258 } | 292 } |
259 | 293 |
260 void MHTMLGenerationManager::OnSavedPageAsMHTML( | 294 void MHTMLGenerationManager::OnSavedPageAsMHTML( |
261 int job_id, | 295 int job_id, |
262 bool mhtml_generation_in_renderer_succeeded) { | 296 bool mhtml_generation_in_renderer_succeeded, |
297 const std::set<GURL>& uris_of_generated_mhtml_parts) { | |
263 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 298 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
264 | 299 |
265 if (!mhtml_generation_in_renderer_succeeded) { | 300 if (!mhtml_generation_in_renderer_succeeded) { |
266 JobFinished(job_id, JobStatus::FAILURE); | 301 JobFinished(job_id, JobStatus::FAILURE); |
267 return; | 302 return; |
268 } | 303 } |
269 | 304 |
270 Job* job = FindJob(job_id); | 305 Job* job = FindJob(job_id); |
271 if (!job) | 306 if (!job) |
272 return; | 307 return; |
273 | 308 |
274 if (job->HasMoreFramesToProcess()) { | 309 if (!job->HasMoreFramesToProcess()) { |
275 if (!job->SendToNextRenderFrame()) { | 310 JobFinished(job_id, JobStatus::SUCCESS); |
276 JobFinished(job_id, JobStatus::FAILURE); | |
277 } | |
278 return; | 311 return; |
279 } | 312 } |
280 | 313 |
281 JobFinished(job_id, JobStatus::SUCCESS); | 314 if (!job->OnSavedPageAsMHTML(uris_of_generated_mhtml_parts)) { |
315 JobFinished(job_id, JobStatus::FAILURE); | |
316 } | |
282 } | 317 } |
283 | 318 |
284 // static | 319 // static |
285 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { | 320 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
286 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 321 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
287 | 322 |
288 // SECURITY NOTE: A file descriptor to the file created below will be passed | 323 // SECURITY NOTE: A file descriptor to the file created below will be passed |
289 // to multiple renderer processes which (in out-of-process iframes mode) can | 324 // to multiple renderer processes which (in out-of-process iframes mode) can |
290 // act on behalf of separate web principals. Therefore it is important to | 325 // act on behalf of separate web principals. Therefore it is important to |
291 // only allow writing to the file and forbid reading from the file (as this | 326 // only allow writing to the file and forbid reading from the file (as this |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
377 ++it) { | 412 ++it) { |
378 if (it->second == job) { | 413 if (it->second == job) { |
379 JobFinished(it->first, JobStatus::FAILURE); | 414 JobFinished(it->first, JobStatus::FAILURE); |
380 return; | 415 return; |
381 } | 416 } |
382 } | 417 } |
383 NOTREACHED(); | 418 NOTREACHED(); |
384 } | 419 } |
385 | 420 |
386 } // namespace content | 421 } // namespace content |
OLD | NEW |