OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/download/mhtml_generation_manager.h" | 5 #include "content/browser/download/mhtml_generation_manager.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <queue> | 8 #include <queue> |
9 #include <utility> | 9 #include <utility> |
10 | 10 |
11 #include "base/bind.h" | 11 #include "base/bind.h" |
12 #include "base/files/file.h" | 12 #include "base/files/file.h" |
13 #include "base/guid.h" | 13 #include "base/guid.h" |
14 #include "base/macros.h" | 14 #include "base/macros.h" |
15 #include "base/rand_util.h" | 15 #include "base/rand_util.h" |
16 #include "base/scoped_observer.h" | 16 #include "base/scoped_observer.h" |
17 #include "base/stl_util.h" | 17 #include "base/stl_util.h" |
18 #include "base/strings/string_number_conversions.h" | 18 #include "base/strings/string_number_conversions.h" |
19 #include "base/strings/stringprintf.h" | 19 #include "base/strings/stringprintf.h" |
20 #include "content/browser/frame_host/frame_tree_node.h" | 20 #include "content/browser/frame_host/frame_tree_node.h" |
| 21 #include "content/browser/frame_host/render_frame_host_impl.h" |
21 #include "content/common/frame_messages.h" | 22 #include "content/common/frame_messages.h" |
22 #include "content/public/browser/browser_thread.h" | 23 #include "content/public/browser/browser_thread.h" |
23 #include "content/public/browser/render_frame_host.h" | 24 #include "content/public/browser/render_frame_host.h" |
24 #include "content/public/browser/render_process_host.h" | 25 #include "content/public/browser/render_process_host.h" |
25 #include "content/public/browser/render_process_host_observer.h" | 26 #include "content/public/browser/render_process_host_observer.h" |
26 #include "content/public/browser/web_contents.h" | 27 #include "content/public/browser/web_contents.h" |
| 28 #include "url/gurl.h" |
27 | 29 |
28 namespace content { | 30 namespace content { |
29 | 31 |
30 // The class and all of its members live on the UI thread. Only static methods | 32 // The class and all of its members live on the UI thread. Only static methods |
31 // are executed on other threads. | 33 // are executed on other threads. |
32 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { | 34 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
33 public: | 35 public: |
34 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); | 36 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); |
35 ~Job() override; | 37 ~Job() override; |
36 | 38 |
37 void set_browser_file(base::File file) { browser_file_ = std::move(file); } | 39 void set_browser_file(base::File file) { browser_file_ = std::move(file); } |
38 | 40 |
39 GenerateMHTMLCallback callback() const { return callback_; } | 41 GenerateMHTMLCallback callback() const { return callback_; } |
40 | 42 |
| 43 // Handler for FrameHostMsg_SerializeAsMHTMLResponse (a notification from the |
| 44 // renderer that the MHTML generation for previous frame has finished). |
| 45 // Returns |true| upon success; |false| otherwise. |
| 46 bool OnSerializeAsMHTMLResponse( |
| 47 RenderFrameHostImpl* sender, |
| 48 const std::set<std::string>& digests_of_uris_of_serialized_resources); |
| 49 |
41 // Sends IPC to the renderer, asking for MHTML generation of the next frame. | 50 // Sends IPC to the renderer, asking for MHTML generation of the next frame. |
42 // | 51 // |
43 // Returns true if the message was sent successfully; false otherwise. | 52 // Returns true if the message was sent successfully; false otherwise. |
44 bool SendToNextRenderFrame(); | 53 bool SendToNextRenderFrame(); |
45 | 54 |
46 // Indicates if more calls to SendToNextRenderFrame are needed. | 55 // Indicates if more calls to SendToNextRenderFrame are needed. |
47 bool HasMoreFramesToProcess() const { | 56 bool IsDone() const { |
48 return !pending_frame_tree_node_ids_.empty(); | 57 bool waiting_for_response_from_renderer = |
| 58 frame_tree_node_id_of_busy_frame_ != |
| 59 FrameTreeNode::kFrameTreeNodeInvalidId; |
| 60 bool no_more_requests_to_send = pending_frame_tree_node_ids_.empty(); |
| 61 return !waiting_for_response_from_renderer && no_more_requests_to_send; |
49 } | 62 } |
50 | 63 |
51 // Close the file on the file thread and respond back on the UI thread with | 64 // Close the file on the file thread and respond back on the UI thread with |
52 // file size. | 65 // file size. |
53 void CloseFile(base::Callback<void(int64_t file_size)> callback); | 66 void CloseFile(base::Callback<void(int64_t file_size)> callback); |
54 | 67 |
55 // RenderProcessHostObserver: | 68 // RenderProcessHostObserver: |
56 void RenderProcessExited(RenderProcessHost* host, | 69 void RenderProcessExited(RenderProcessHost* host, |
57 base::TerminationStatus status, | 70 base::TerminationStatus status, |
58 int exit_code) override; | 71 int exit_code) override; |
59 void RenderProcessHostDestroyed(RenderProcessHost* host) override; | 72 void RenderProcessHostDestroyed(RenderProcessHost* host) override; |
60 | 73 |
61 private: | 74 private: |
62 static std::string GenerateMHTMLBoundaryMarker(); | 75 static std::string GenerateMHTMLBoundaryMarker(); |
63 static int64_t CloseFileOnFileThread(base::File file); | 76 static int64_t CloseFileOnFileThread(base::File file); |
64 void AddFrame(RenderFrameHost* render_frame_host); | 77 void AddFrame(RenderFrameHost* render_frame_host); |
65 | 78 |
66 // Creates a new map with values (content ids) the same as in | 79 // Creates a new map with values (content ids) the same as in |
67 // |frame_tree_node_to_content_id_| map, but with the keys translated from | 80 // |frame_tree_node_to_content_id_| map, but with the keys translated from |
68 // frame_tree_node_id into a |site_instance|-specific routing_id. | 81 // frame_tree_node_id into a |site_instance|-specific routing_id. |
69 std::map<int, std::string> CreateFrameRoutingIdToContentId( | 82 std::map<int, std::string> CreateFrameRoutingIdToContentId( |
70 SiteInstance* site_instance); | 83 SiteInstance* site_instance); |
71 | 84 |
72 // Id used to map renderer responses to jobs. | 85 // Id used to map renderer responses to jobs. |
73 // See also MHTMLGenerationManager::id_to_job_ map. | 86 // See also MHTMLGenerationManager::id_to_job_ map. |
74 int job_id_; | 87 int job_id_; |
75 | 88 |
| 89 // The IDs of frames that still need to be processed. |
| 90 std::queue<int> pending_frame_tree_node_ids_; |
| 91 |
| 92 // Identifies a frame to which we've sent FrameMsg_SerializeAsMHTML but for |
| 93 // which we didn't yet process FrameHostMsg_SerializeAsMHTMLResponse via |
| 94 // OnSerializeAsMHTMLResponse. |
| 95 int frame_tree_node_id_of_busy_frame_; |
| 96 |
76 // The handle to the file the MHTML is saved to for the browser process. | 97 // The handle to the file the MHTML is saved to for the browser process. |
77 base::File browser_file_; | 98 base::File browser_file_; |
78 | 99 |
79 // The IDs of frames we still need to process. | |
80 std::queue<int> pending_frame_tree_node_ids_; | |
81 | |
82 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts | 100 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts |
83 // for more details about what "content ids" are and how they are used). | 101 // for more details about what "content ids" are and how they are used). |
84 std::map<int, std::string> frame_tree_node_to_content_id_; | 102 std::map<int, std::string> frame_tree_node_to_content_id_; |
85 | 103 |
86 // MIME multipart boundary to use in the MHTML doc. | 104 // MIME multipart boundary to use in the MHTML doc. |
87 std::string mhtml_boundary_marker_; | 105 std::string mhtml_boundary_marker_; |
88 | 106 |
| 107 // Digests of URIs of already generated MHTML parts. |
| 108 std::set<std::string> digests_of_already_serialized_uris_; |
| 109 std::string salt_; |
| 110 |
89 // The callback to call once generation is complete. | 111 // The callback to call once generation is complete. |
90 GenerateMHTMLCallback callback_; | 112 GenerateMHTMLCallback callback_; |
91 | 113 |
92 // RAII helper for registering this Job as a RenderProcessHost observer. | 114 // RAII helper for registering this Job as a RenderProcessHost observer. |
93 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> | 115 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> |
94 observed_renderer_process_host_; | 116 observed_renderer_process_host_; |
95 | 117 |
96 DISALLOW_COPY_AND_ASSIGN(Job); | 118 DISALLOW_COPY_AND_ASSIGN(Job); |
97 }; | 119 }; |
98 | 120 |
99 MHTMLGenerationManager::Job::Job(int job_id, | 121 MHTMLGenerationManager::Job::Job(int job_id, |
100 WebContents* web_contents, | 122 WebContents* web_contents, |
101 GenerateMHTMLCallback callback) | 123 GenerateMHTMLCallback callback) |
102 : job_id_(job_id), | 124 : job_id_(job_id), |
| 125 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId), |
103 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()), | 126 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()), |
| 127 salt_(base::GenerateGUID()), |
104 callback_(callback), | 128 callback_(callback), |
105 observed_renderer_process_host_(this) { | 129 observed_renderer_process_host_(this) { |
106 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 130 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
107 web_contents->ForEachFrame(base::Bind( | 131 web_contents->ForEachFrame(base::Bind( |
108 &MHTMLGenerationManager::Job::AddFrame, | 132 &MHTMLGenerationManager::Job::AddFrame, |
109 base::Unretained(this))); // Safe because ForEachFrame is synchronous. | 133 base::Unretained(this))); // Safe because ForEachFrame is synchronous. |
110 | 134 |
111 // Main frame needs to be processed first. | 135 // Main frame needs to be processed first. |
112 DCHECK(!pending_frame_tree_node_ids_.empty()); | 136 DCHECK(!pending_frame_tree_node_ids_.empty()); |
113 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) | 137 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) |
(...skipping 23 matching lines...) Expand all Loading... |
137 | 161 |
138 result[routing_id] = content_id; | 162 result[routing_id] = content_id; |
139 } | 163 } |
140 return result; | 164 return result; |
141 } | 165 } |
142 | 166 |
143 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { | 167 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { |
144 DCHECK(browser_file_.IsValid()); | 168 DCHECK(browser_file_.IsValid()); |
145 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); | 169 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); |
146 | 170 |
| 171 FrameMsg_SerializeAsMHTML_Params ipc_params; |
| 172 ipc_params.job_id = job_id_; |
| 173 ipc_params.mhtml_boundary_marker = mhtml_boundary_marker_; |
| 174 |
147 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); | 175 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
148 pending_frame_tree_node_ids_.pop(); | 176 pending_frame_tree_node_ids_.pop(); |
149 bool is_last_frame = pending_frame_tree_node_ids_.empty(); | 177 ipc_params.is_last_frame = pending_frame_tree_node_ids_.empty(); |
150 | 178 |
151 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); | 179 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
152 if (!ftn) // The contents went away. | 180 if (!ftn) // The contents went away. |
153 return false; | 181 return false; |
154 RenderFrameHost* rfh = ftn->current_frame_host(); | 182 RenderFrameHost* rfh = ftn->current_frame_host(); |
155 | 183 |
156 // Get notified if the target of the IPC message dies between responding. | 184 // Get notified if the target of the IPC message dies between responding. |
157 observed_renderer_process_host_.RemoveAll(); | 185 observed_renderer_process_host_.RemoveAll(); |
158 observed_renderer_process_host_.Add(rfh->GetProcess()); | 186 observed_renderer_process_host_.Add(rfh->GetProcess()); |
159 | 187 |
160 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( | 188 // Tell the renderer to skip (= deduplicate) already covered MHTML parts. |
| 189 ipc_params.salt = salt_; |
| 190 ipc_params.digests_of_uris_to_skip = digests_of_already_serialized_uris_; |
| 191 |
| 192 ipc_params.destination_file = IPC::GetFileHandleForProcess( |
161 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), | 193 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
162 false); // |close_source_handle|. | 194 false); // |close_source_handle|. |
163 rfh->Send(new FrameMsg_SerializeAsMHTML( | 195 ipc_params.frame_routing_id_to_content_id = |
164 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, | 196 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()); |
165 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); | 197 |
| 198 // Send the IPC asking the renderer to serialize the frame. |
| 199 DCHECK_EQ(FrameTreeNode::kFrameTreeNodeInvalidId, |
| 200 frame_tree_node_id_of_busy_frame_); |
| 201 frame_tree_node_id_of_busy_frame_ = frame_tree_node_id; |
| 202 rfh->Send(new FrameMsg_SerializeAsMHTML(rfh->GetRoutingID(), ipc_params)); |
166 return true; | 203 return true; |
167 } | 204 } |
168 | 205 |
169 void MHTMLGenerationManager::Job::RenderProcessExited( | 206 void MHTMLGenerationManager::Job::RenderProcessExited( |
170 RenderProcessHost* host, | 207 RenderProcessHost* host, |
171 base::TerminationStatus status, | 208 base::TerminationStatus status, |
172 int exit_code) { | 209 int exit_code) { |
173 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 210 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
174 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); | 211 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
175 } | 212 } |
(...skipping 24 matching lines...) Expand all Loading... |
200 return; | 237 return; |
201 } | 238 } |
202 | 239 |
203 BrowserThread::PostTaskAndReplyWithResult( | 240 BrowserThread::PostTaskAndReplyWithResult( |
204 BrowserThread::FILE, FROM_HERE, | 241 BrowserThread::FILE, FROM_HERE, |
205 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, | 242 base::Bind(&MHTMLGenerationManager::Job::CloseFileOnFileThread, |
206 base::Passed(std::move(browser_file_))), | 243 base::Passed(std::move(browser_file_))), |
207 callback); | 244 callback); |
208 } | 245 } |
209 | 246 |
| 247 bool MHTMLGenerationManager::Job::OnSerializeAsMHTMLResponse( |
| 248 RenderFrameHostImpl* sender, |
| 249 const std::set<std::string>& digests_of_uris_of_serialized_resources) { |
| 250 // Sanitize renderer input / reject unexpected messages. |
| 251 int sender_id = sender->frame_tree_node()->frame_tree_node_id(); |
| 252 if (sender_id != frame_tree_node_id_of_busy_frame_) { |
| 253 NOTREACHED(); |
| 254 return false; // Report failure. |
| 255 } |
| 256 frame_tree_node_id_of_busy_frame_ = FrameTreeNode::kFrameTreeNodeInvalidId; |
| 257 |
| 258 // Renderer should be deduping resources with the same uris. |
| 259 DCHECK_EQ(0u, base::STLSetIntersection<std::set<std::string>>( |
| 260 digests_of_already_serialized_uris_, |
| 261 digests_of_uris_of_serialized_resources).size()); |
| 262 digests_of_already_serialized_uris_.insert( |
| 263 digests_of_uris_of_serialized_resources.begin(), |
| 264 digests_of_uris_of_serialized_resources.end()); |
| 265 |
| 266 if (pending_frame_tree_node_ids_.empty()) |
| 267 return true; // Report success. |
| 268 |
| 269 return SendToNextRenderFrame(); |
| 270 } |
| 271 |
210 // static | 272 // static |
211 int64_t MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { | 273 int64_t MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
212 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 274 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
213 DCHECK(file.IsValid()); | 275 DCHECK(file.IsValid()); |
214 int64_t file_size = file.GetLength(); | 276 int64_t file_size = file.GetLength(); |
215 file.Close(); | 277 file.Close(); |
216 return file_size; | 278 return file_size; |
217 } | 279 } |
218 | 280 |
219 // static | 281 // static |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
253 int job_id = NewJob(web_contents, callback); | 315 int job_id = NewJob(web_contents, callback); |
254 | 316 |
255 BrowserThread::PostTaskAndReplyWithResult( | 317 BrowserThread::PostTaskAndReplyWithResult( |
256 BrowserThread::FILE, FROM_HERE, | 318 BrowserThread::FILE, FROM_HERE, |
257 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), | 319 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), |
258 base::Bind(&MHTMLGenerationManager::OnFileAvailable, | 320 base::Bind(&MHTMLGenerationManager::OnFileAvailable, |
259 base::Unretained(this), // Safe b/c |this| is a singleton. | 321 base::Unretained(this), // Safe b/c |this| is a singleton. |
260 job_id)); | 322 job_id)); |
261 } | 323 } |
262 | 324 |
263 void MHTMLGenerationManager::OnSavedFrameAsMHTML( | 325 void MHTMLGenerationManager::OnSerializeAsMHTMLResponse( |
| 326 RenderFrameHostImpl* sender, |
264 int job_id, | 327 int job_id, |
265 bool mhtml_generation_in_renderer_succeeded) { | 328 bool mhtml_generation_in_renderer_succeeded, |
| 329 const std::set<std::string>& digests_of_uris_of_serialized_resources) { |
266 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 330 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
267 | 331 |
268 if (!mhtml_generation_in_renderer_succeeded) { | 332 if (!mhtml_generation_in_renderer_succeeded) { |
269 JobFinished(job_id, JobStatus::FAILURE); | 333 JobFinished(job_id, JobStatus::FAILURE); |
270 return; | 334 return; |
271 } | 335 } |
272 | 336 |
273 Job* job = FindJob(job_id); | 337 Job* job = FindJob(job_id); |
274 if (!job) | 338 if (!job) |
275 return; | 339 return; |
276 | 340 |
277 if (job->HasMoreFramesToProcess()) { | 341 if (!job->OnSerializeAsMHTMLResponse( |
278 if (!job->SendToNextRenderFrame()) { | 342 sender, digests_of_uris_of_serialized_resources)) { |
279 JobFinished(job_id, JobStatus::FAILURE); | 343 JobFinished(job_id, JobStatus::FAILURE); |
280 } | |
281 return; | 344 return; |
282 } | 345 } |
283 | 346 |
284 JobFinished(job_id, JobStatus::SUCCESS); | 347 if (job->IsDone()) |
| 348 JobFinished(job_id, JobStatus::SUCCESS); |
285 } | 349 } |
286 | 350 |
287 // static | 351 // static |
288 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { | 352 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
289 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 353 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
290 | 354 |
291 // SECURITY NOTE: A file descriptor to the file created below will be passed | 355 // SECURITY NOTE: A file descriptor to the file created below will be passed |
292 // to multiple renderer processes which (in out-of-process iframes mode) can | 356 // to multiple renderer processes which (in out-of-process iframes mode) can |
293 // act on behalf of separate web principals. Therefore it is important to | 357 // act on behalf of separate web principals. Therefore it is important to |
294 // only allow writing to the file and forbid reading from the file (as this | 358 // only allow writing to the file and forbid reading from the file (as this |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
380 ++it) { | 444 ++it) { |
381 if (it->second == job) { | 445 if (it->second == job) { |
382 JobFinished(it->first, JobStatus::FAILURE); | 446 JobFinished(it->first, JobStatus::FAILURE); |
383 return; | 447 return; |
384 } | 448 } |
385 } | 449 } |
386 NOTREACHED(); | 450 NOTREACHED(); |
387 } | 451 } |
388 | 452 |
389 } // namespace content | 453 } // namespace content |
OLD | NEW |