Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/download/mhtml_generation_manager.h" | 5 #include "content/browser/download/mhtml_generation_manager.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <queue> | 8 #include <queue> |
| 9 #include <sstream> | 9 #include <sstream> |
| 10 | 10 |
| 11 #include "base/bind.h" | 11 #include "base/bind.h" |
| 12 #include "base/files/file.h" | 12 #include "base/files/file.h" |
| 13 #include "base/guid.h" | 13 #include "base/guid.h" |
| 14 #include "base/scoped_observer.h" | 14 #include "base/scoped_observer.h" |
| 15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
| 16 #include "content/browser/frame_host/frame_tree_node.h" | 16 #include "content/browser/frame_host/frame_tree_node.h" |
| 17 #include "content/common/frame_messages.h" | 17 #include "content/common/frame_messages.h" |
| 18 #include "content/public/browser/browser_thread.h" | 18 #include "content/public/browser/browser_thread.h" |
| 19 #include "content/public/browser/render_frame_host.h" | 19 #include "content/public/browser/render_frame_host.h" |
| 20 #include "content/public/browser/render_process_host.h" | 20 #include "content/public/browser/render_process_host.h" |
| 21 #include "content/public/browser/render_process_host_observer.h" | 21 #include "content/public/browser/render_process_host_observer.h" |
| 22 #include "content/public/browser/web_contents.h" | 22 #include "content/public/browser/web_contents.h" |
| 23 #include "crypto/sha2.h" | |
| 24 #include "url/gurl.h" | |
| 23 | 25 |
| 24 namespace content { | 26 namespace content { |
| 25 | 27 |
| 26 // The class and all of its members live on the UI thread. Only static methods | 28 // The class and all of its members live on the UI thread. Only static methods |
| 27 // are executed on other threads. | 29 // are executed on other threads. |
| 28 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { | 30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
| 29 public: | 31 public: |
| 30 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); | 32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); |
| 31 ~Job() override; | 33 ~Job() override; |
| 32 | 34 |
| 33 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } | 35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } |
| 34 | 36 |
| 35 GenerateMHTMLCallback callback() const { return callback_; } | 37 GenerateMHTMLCallback callback() const { return callback_; } |
| 36 | 38 |
| 37 // Sends IPC to the renderer, asking for MHTML generation of the next frame. | 39 // Sends IPC to the renderer of the first frame, asking it to start |
| 38 // | 40 // MHTML serialization. |
| 39 // Returns true if the message was sent successfully; false otherwise. | 41 // Returns |true| upon success; |false| otherwise. |
| 40 // | 42 bool SendToFirstFrame(); |
| 41 // See FrameMsg_SerializeAsMHTML IPC message for description of | |
| 42 // |mhtml_boundary_marker| parameter. | |
| 43 bool SendToNextRenderFrame(const std::string& mhtml_boundary_marker); | |
| 44 | 43 |
| 45 // Indicates if more calls to SendToNextRenderFrame are needed. | 44 // Handler for ViewHostMsg_SavedPageAsMHTML (a notification from the renderer |
| 45 // that the MHTML generation for previous frame has finished). | |
| 46 // Returns |true| upon success; |false| otherwise. | |
| 47 bool OnSavedPageAsMHTML(std::string mhtml_boundary_marker, | |
| 48 const std::set<GURL>& uris_of_generated_mhtml_parts); | |
| 49 | |
| 50 // Indicates if the job has completed. | |
| 46 bool GotMoreFramesToProcess() { | 51 bool GotMoreFramesToProcess() { |
| 47 return !pending_frame_tree_node_ids_.empty(); | 52 return !pending_frame_tree_node_ids_.empty(); |
| 48 } | 53 } |
| 49 | 54 |
| 50 // Close the file on the file thread and respond back on the UI thread with | 55 // Close the file on the file thread and respond back on the UI thread with |
| 51 // file size. | 56 // file size. |
| 52 void CloseFile(base::Callback<void(int64 file_size)> callback); | 57 void CloseFile(base::Callback<void(int64 file_size)> callback); |
| 53 | 58 |
| 54 // RenderProcessHostObserver: | 59 // RenderProcessHostObserver: |
| 55 void RenderProcessExited(RenderProcessHost* host, | 60 void RenderProcessExited(RenderProcessHost* host, |
| 56 base::TerminationStatus status, | 61 base::TerminationStatus status, |
| 57 int exit_code) override; | 62 int exit_code) override; |
| 58 void RenderProcessHostDestroyed(RenderProcessHost* host) override; | 63 void RenderProcessHostDestroyed(RenderProcessHost* host) override; |
| 59 | 64 |
| 60 private: | 65 private: |
| 61 static int64 CloseFileOnFileThread(base::File file); | 66 static int64 CloseFileOnFileThread(base::File file); |
| 62 void AddFrame(RenderFrameHost* render_frame_host); | 67 void AddFrame(RenderFrameHost* render_frame_host); |
| 63 | 68 |
| 64 // Translates |frame_tree_node_to_content_id_| into | 69 // Translates |frame_tree_node_to_content_id_| into |
| 65 // a |site_instance|-specific, routing-id-based map. | 70 // a |site_instance|-specific, routing-id-based map. |
| 66 std::map<int, std::string> CreateFrameRoutingIdToContentId( | 71 std::map<int, std::string> CreateFrameRoutingIdToContentId( |
| 67 SiteInstance* site_instance); | 72 SiteInstance* site_instance); |
| 68 | 73 |
| 74 // Sends IPC to the renderer, asking for MHTML generation of the next frame. | |
| 75 // | |
| 76 // Returns true if the message was sent successfully; false otherwise. | |
| 77 // | |
| 78 // See FrameMsg_SerializeAsMHTML IPC message for description of | |
| 79 // |mhtml_boundary_marker| parameter. | |
| 80 bool SendToNextRenderFrame(const std::string& mhtml_boundary_marker); | |
| 81 | |
| 69 // Id used to map renderer responses to jobs. | 82 // Id used to map renderer responses to jobs. |
| 70 // See also MHTMLGenerationManager::id_to_job_ map. | 83 // See also MHTMLGenerationManager::id_to_job_ map. |
| 71 int job_id_; | 84 int job_id_; |
| 72 | 85 |
| 73 // The handle to the file the MHTML is saved to for the browser process. | 86 // The handle to the file the MHTML is saved to for the browser process. |
| 74 base::File browser_file_; | 87 base::File browser_file_; |
| 75 | 88 |
| 76 // The IDs of frames we still need to process. | 89 // The IDs of frames we still need to process. |
| 77 std::queue<int> pending_frame_tree_node_ids_; | 90 std::queue<int> pending_frame_tree_node_ids_; |
| 78 | 91 |
| 79 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts | 92 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts |
| 80 // for more details about what "content ids" are and how they are used). | 93 // for more details about what "content ids" are and how they are used). |
| 81 std::map<int, std::string> frame_tree_node_to_content_id_; | 94 std::map<int, std::string> frame_tree_node_to_content_id_; |
| 82 | 95 |
| 96 // URIs of already generated MHTML parts. | |
| 97 std::set<GURL> already_serialized_uris_; | |
| 98 | |
| 83 // The callback to call once generation is complete. | 99 // The callback to call once generation is complete. |
| 84 GenerateMHTMLCallback callback_; | 100 GenerateMHTMLCallback callback_; |
| 85 | 101 |
| 86 // RAII helper for registering this Job as a RenderProcessHost observer. | 102 // RAII helper for registering this Job as a RenderProcessHost observer. |
| 87 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> | 103 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> |
| 88 observed_renderer_process_host_; | 104 observed_renderer_process_host_; |
| 89 | 105 |
| 90 DISALLOW_COPY_AND_ASSIGN(Job); | 106 DISALLOW_COPY_AND_ASSIGN(Job); |
| 91 }; | 107 }; |
| 92 | 108 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 131 result[routing_id] = content_id; | 147 result[routing_id] = content_id; |
| 132 } | 148 } |
| 133 return result; | 149 return result; |
| 134 } | 150 } |
| 135 | 151 |
| 136 bool MHTMLGenerationManager::Job::SendToNextRenderFrame( | 152 bool MHTMLGenerationManager::Job::SendToNextRenderFrame( |
| 137 const std::string& mhtml_boundary_marker) { | 153 const std::string& mhtml_boundary_marker) { |
| 138 DCHECK(browser_file_.IsValid()); | 154 DCHECK(browser_file_.IsValid()); |
| 139 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); | 155 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); |
| 140 | 156 |
| 157 FrameMsg_SerializeAsMHTML_Params ipc_params; | |
| 158 ipc_params.job_id = job_id_; | |
| 159 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker; | |
| 160 | |
| 141 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); | 161 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
| 142 pending_frame_tree_node_ids_.pop(); | 162 pending_frame_tree_node_ids_.pop(); |
| 143 bool is_last_frame = pending_frame_tree_node_ids_.empty(); | 163 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty(); |
| 144 | 164 |
| 145 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); | 165 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
| 146 if (!ftn) // The contents went away. | 166 if (!ftn) // The contents went away. |
| 147 return false; | 167 return false; |
| 148 RenderFrameHost* rfh = ftn->current_frame_host(); | 168 RenderFrameHost* rfh = ftn->current_frame_host(); |
| 149 | 169 |
| 150 // Get notified if the target of the IPC message dies between responding. | 170 // Get notified if the target of the IPC message dies between responding. |
| 151 observed_renderer_process_host_.RemoveAll(); | 171 observed_renderer_process_host_.RemoveAll(); |
| 152 observed_renderer_process_host_.Add(rfh->GetProcess()); | 172 observed_renderer_process_host_.Add(rfh->GetProcess()); |
| 153 | 173 |
| 154 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( | 174 // Tell the renderer to skip (= deduplicate) already covered MHTML parts. |
| 175 std::set<std::string> digests_of_uris_to_skip; | |
|
ncarter (slow)
2015/12/04 21:52:39
This is unused.
Łukasz Anforowicz
2015/12/04 22:55:43
Thanks for catching this. I must have missed this
| |
| 176 ipc_params.salt = base::GenerateGUID(); | |
| 177 for (const GURL& uri : already_serialized_uris_) { | |
| 178 std::string digest = crypto::SHA256HashString(ipc_params.salt + uri.spec()); | |
| 179 ipc_params.digests_of_uris_to_skip.insert(digest); | |
| 180 } | |
| 181 | |
| 182 ipc_params.destination_file = IPC::GetFileHandleForProcess( | |
| 155 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), | 183 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
| 156 false); // |close_source_handle|. | 184 false); // |close_source_handle|. |
| 157 rfh->Send(new FrameMsg_SerializeAsMHTML( | 185 ipc_params.frame_routing_id_to_content_id = |
| 158 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker, | 186 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()); |
| 159 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); | 187 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params)); |
| 160 return true; | 188 return true; |
| 161 } | 189 } |
| 162 | 190 |
| 163 void MHTMLGenerationManager::Job::RenderProcessExited( | 191 void MHTMLGenerationManager::Job::RenderProcessExited( |
| 164 RenderProcessHost* host, | 192 RenderProcessHost* host, |
| 165 base::TerminationStatus status, | 193 base::TerminationStatus status, |
| 166 int exit_code) { | 194 int exit_code) { |
| 167 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 195 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 168 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); | 196 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
| 169 } | 197 } |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 195 return; | 223 return; |
| 196 } | 224 } |
| 197 | 225 |
| 198 BrowserThread::PostTaskAndReplyWithResult( | 226 BrowserThread::PostTaskAndReplyWithResult( |
| 199 BrowserThread::FILE, FROM_HERE, | 227 BrowserThread::FILE, FROM_HERE, |
| 200 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, | 228 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, |
| 201 base::Passed(browser_file_.Pass())), | 229 base::Passed(browser_file_.Pass())), |
| 202 callback); | 230 callback); |
| 203 } | 231 } |
| 204 | 232 |
| 233 bool MHTMLGenerationManager::Job::SendToFirstFrame() { | |
| 234 // We don't yet have an mhtml boundary marker when serializing the 1st frame. | |
| 235 std::string initial_mhtml_boundary_marker = ""; | |
| 236 | |
| 237 return SendToNextRenderFrame(initial_mhtml_boundary_marker); | |
| 238 } | |
| 239 | |
| 240 bool MHTMLGenerationManager::Job::OnSavedPageAsMHTML( | |
| 241 std::string mhtml_boundary_marker, | |
| 242 const std::set<GURL>& uris_of_generated_mhtml_parts) { | |
| 243 bool success = true; | |
| 244 | |
| 245 already_serialized_uris_.insert(uris_of_generated_mhtml_parts.begin(), | |
| 246 uris_of_generated_mhtml_parts.end()); | |
| 247 | |
| 248 if (GotMoreFramesToProcess()) | |
| 249 success = SendToNextRenderFrame(mhtml_boundary_marker); | |
| 250 | |
| 251 return success; | |
| 252 } | |
| 253 | |
| 205 // static | 254 // static |
| 206 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { | 255 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
| 207 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 256 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
| 208 DCHECK(file.IsValid()); | 257 DCHECK(file.IsValid()); |
| 209 int64 file_size = file.GetLength(); | 258 int64 file_size = file.GetLength(); |
| 210 file.Close(); | 259 file.Close(); |
| 211 return file_size; | 260 return file_size; |
| 212 } | 261 } |
| 213 | 262 |
| 214 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { | 263 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 232 BrowserThread::FILE, FROM_HERE, | 281 BrowserThread::FILE, FROM_HERE, |
| 233 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), | 282 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), |
| 234 base::Bind(&MHTMLGenerationManager::OnFileAvailable, | 283 base::Bind(&MHTMLGenerationManager::OnFileAvailable, |
| 235 base::Unretained(this), // Safe b/c |this| is a singleton. | 284 base::Unretained(this), // Safe b/c |this| is a singleton. |
| 236 job_id)); | 285 job_id)); |
| 237 } | 286 } |
| 238 | 287 |
| 239 void MHTMLGenerationManager::OnSavedPageAsMHTML( | 288 void MHTMLGenerationManager::OnSavedPageAsMHTML( |
| 240 int job_id, | 289 int job_id, |
| 241 bool mhtml_generation_in_renderer_succeeded, | 290 bool mhtml_generation_in_renderer_succeeded, |
| 242 const std::string& mhtml_boundary_marker) { | 291 const std::string& mhtml_boundary_marker, |
| 292 const std::set<GURL>& uris_of_generated_mhtml_parts) { | |
| 243 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 293 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 244 | 294 |
| 245 if (!mhtml_generation_in_renderer_succeeded) { | 295 if (!mhtml_generation_in_renderer_succeeded) { |
| 246 JobFinished(job_id, JobStatus::FAILURE); | 296 JobFinished(job_id, JobStatus::FAILURE); |
| 247 return; | 297 return; |
| 248 } | 298 } |
| 249 | 299 |
| 250 Job* job = FindJob(job_id); | 300 Job* job = FindJob(job_id); |
| 251 if (!job) | 301 if (!job) |
| 252 return; | 302 return; |
| 253 | 303 |
| 254 if (job->GotMoreFramesToProcess()) { | 304 if (!job->GotMoreFramesToProcess()) { |
| 255 if (!job->SendToNextRenderFrame(mhtml_boundary_marker)) { | 305 JobFinished(job_id, JobStatus::SUCCESS); |
| 256 JobFinished(job_id, JobStatus::FAILURE); | |
| 257 } | |
| 258 return; | 306 return; |
| 259 } | 307 } |
| 260 | 308 |
| 261 JobFinished(job_id, JobStatus::SUCCESS); | 309 if (!job->OnSavedPageAsMHTML(mhtml_boundary_marker, |
| 310 uris_of_generated_mhtml_parts)) { | |
| 311 JobFinished(job_id, JobStatus::FAILURE); | |
| 312 } | |
| 262 } | 313 } |
| 263 | 314 |
| 264 // static | 315 // static |
| 265 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { | 316 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
| 266 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 317 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
| 267 | 318 |
| 268 // SECURITY NOTE: A file descriptor to the file created below will be passed | 319 // SECURITY NOTE: A file descriptor to the file created below will be passed |
| 269 // to multiple renderer processes which (in out-of-process iframes mode) can | 320 // to multiple renderer processes which (in out-of-process iframes mode) can |
| 270 // act on behalf of separate web principals. Therefore it is important to | 321 // act on behalf of separate web principals. Therefore it is important to |
| 271 // only allow writing to the file and forbid reading from the file (as this | 322 // only allow writing to the file and forbid reading from the file (as this |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 290 JobFinished(job_id, JobStatus::FAILURE); | 341 JobFinished(job_id, JobStatus::FAILURE); |
| 291 return; | 342 return; |
| 292 } | 343 } |
| 293 | 344 |
| 294 Job* job = FindJob(job_id); | 345 Job* job = FindJob(job_id); |
| 295 if (!job) | 346 if (!job) |
| 296 return; | 347 return; |
| 297 | 348 |
| 298 job->set_browser_file(browser_file.Pass()); | 349 job->set_browser_file(browser_file.Pass()); |
| 299 | 350 |
| 300 // We don't yet have an mhtml boundary marker when serializing the 1st frame. | 351 if (!job->SendToFirstFrame()) { |
| 301 std::string initial_mhtml_boundary_marker = ""; | |
| 302 if (!job->SendToNextRenderFrame(initial_mhtml_boundary_marker)) { | |
| 303 JobFinished(job_id, JobStatus::FAILURE); | 352 JobFinished(job_id, JobStatus::FAILURE); |
| 304 } | 353 } |
| 305 } | 354 } |
| 306 | 355 |
| 307 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { | 356 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { |
| 308 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 357 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 309 | 358 |
| 310 Job* job = FindJob(job_id); | 359 Job* job = FindJob(job_id); |
| 311 if (!job) | 360 if (!job) |
| 312 return; | 361 return; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 359 ++it) { | 408 ++it) { |
| 360 if (it->second == job) { | 409 if (it->second == job) { |
| 361 JobFinished(it->first, JobStatus::FAILURE); | 410 JobFinished(it->first, JobStatus::FAILURE); |
| 362 return; | 411 return; |
| 363 } | 412 } |
| 364 } | 413 } |
| 365 NOTREACHED(); | 414 NOTREACHED(); |
| 366 } | 415 } |
| 367 | 416 |
| 368 } // namespace content | 417 } // namespace content |
| OLD | NEW |