OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/download/mhtml_generation_manager.h" | 5 #include "content/browser/download/mhtml_generation_manager.h" |
6 | 6 |
| 7 #include <map> |
| 8 #include <queue> |
| 9 |
7 #include "base/bind.h" | 10 #include "base/bind.h" |
8 #include "base/files/file.h" | 11 #include "base/files/file.h" |
| 12 #include "base/guid.h" |
| 13 #include "base/rand_util.h" |
9 #include "base/scoped_observer.h" | 14 #include "base/scoped_observer.h" |
10 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
11 #include "content/browser/renderer_host/render_view_host_impl.h" | 16 #include "base/strings/string_number_conversions.h" |
| 17 #include "base/strings/stringprintf.h" |
| 18 #include "content/browser/frame_host/frame_tree_node.h" |
| 19 #include "content/common/frame_messages.h" |
12 #include "content/public/browser/browser_thread.h" | 20 #include "content/public/browser/browser_thread.h" |
13 #include "content/public/browser/render_frame_host.h" | 21 #include "content/public/browser/render_frame_host.h" |
14 #include "content/public/browser/render_process_host.h" | 22 #include "content/public/browser/render_process_host.h" |
15 #include "content/public/browser/render_process_host_observer.h" | 23 #include "content/public/browser/render_process_host_observer.h" |
16 #include "content/public/browser/web_contents.h" | 24 #include "content/public/browser/web_contents.h" |
17 #include "content/common/view_messages.h" | |
18 | 25 |
19 namespace content { | 26 namespace content { |
20 | 27 |
21 // The class and all of its members live on the UI thread. Only static methods | 28 // The class and all of its members live on the UI thread. Only static methods |
22 // are executed on other threads. | 29 // are executed on other threads. |
23 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { | 30 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
24 public: | 31 public: |
25 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); | 32 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); |
26 ~Job() override; | 33 ~Job() override; |
27 | 34 |
28 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } | 35 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } |
29 | 36 |
30 GenerateMHTMLCallback callback() const { return callback_; } | 37 GenerateMHTMLCallback callback() const { return callback_; } |
31 | 38 |
32 // Sends IPC to the renderer, asking for MHTML generation. | 39 // Sends IPC to the renderer, asking for MHTML generation of the next frame. |
| 40 // |
33 // Returns true if the message was sent successfully; false otherwise. | 41 // Returns true if the message was sent successfully; false otherwise. |
34 bool SendToRenderView(); | 42 bool SendToNextRenderFrame(); |
| 43 |
| 44 // Indicates if more calls to SendToNextRenderFrame are needed. |
| 45 bool HasMoreFramesToProcess() const { |
| 46 return !pending_frame_tree_node_ids_.empty(); |
| 47 } |
35 | 48 |
36 // Close the file on the file thread and respond back on the UI thread with | 49 // Close the file on the file thread and respond back on the UI thread with |
37 // file size. | 50 // file size. |
38 void CloseFile(base::Callback<void(int64 file_size)> callback); | 51 void CloseFile(base::Callback<void(int64 file_size)> callback); |
39 | 52 |
40 // RenderProcessHostObserver: | 53 // RenderProcessHostObserver: |
41 void RenderProcessExited(RenderProcessHost* host, | 54 void RenderProcessExited(RenderProcessHost* host, |
42 base::TerminationStatus status, | 55 base::TerminationStatus status, |
43 int exit_code) override; | 56 int exit_code) override; |
44 void RenderProcessHostDestroyed(RenderProcessHost* host) override; | 57 void RenderProcessHostDestroyed(RenderProcessHost* host) override; |
45 | 58 |
46 private: | 59 private: |
| 60 static std::string GenerateMHTMLBoundaryMarker(); |
47 static int64 CloseFileOnFileThread(base::File file); | 61 static int64 CloseFileOnFileThread(base::File file); |
| 62 void AddFrame(RenderFrameHost* render_frame_host); |
| 63 |
| 64 // Creates a new map with values (content ids) the same as in |
| 65 // |frame_tree_node_to_content_id_| map, but with the keys translated from |
| 66 // frame_tree_node_id into a |site_instance|-specific routing_id. |
| 67 std::map<int, std::string> CreateFrameRoutingIdToContentId( |
| 68 SiteInstance* site_instance); |
48 | 69 |
49 // Id used to map renderer responses to jobs. | 70 // Id used to map renderer responses to jobs. |
50 // See also MHTMLGenerationManager::id_to_job_ map. | 71 // See also MHTMLGenerationManager::id_to_job_ map. |
51 int job_id_; | 72 int job_id_; |
52 | 73 |
53 // The handle to the file the MHTML is saved to for the browser process. | 74 // The handle to the file the MHTML is saved to for the browser process. |
54 base::File browser_file_; | 75 base::File browser_file_; |
55 | 76 |
56 // The IDs mapping to a specific contents. | 77 // The IDs of frames we still need to process. |
57 int process_id_; | 78 std::queue<int> pending_frame_tree_node_ids_; |
58 int routing_id_; | 79 |
| 80 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts |
| 81 // for more details about what "content ids" are and how they are used). |
| 82 std::map<int, std::string> frame_tree_node_to_content_id_; |
| 83 |
| 84 // MIME multipart boundary to use in the MHTML doc. |
| 85 std::string mhtml_boundary_marker_; |
59 | 86 |
60 // The callback to call once generation is complete. | 87 // The callback to call once generation is complete. |
61 GenerateMHTMLCallback callback_; | 88 GenerateMHTMLCallback callback_; |
62 | 89 |
63 // RAII helper for registering this Job as a RenderProcessHost observer. | 90 // RAII helper for registering this Job as a RenderProcessHost observer. |
64 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> | 91 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> |
65 observed_renderer_process_host_; | 92 observed_renderer_process_host_; |
66 | 93 |
67 DISALLOW_COPY_AND_ASSIGN(Job); | 94 DISALLOW_COPY_AND_ASSIGN(Job); |
68 }; | 95 }; |
69 | 96 |
70 MHTMLGenerationManager::Job::Job(int job_id, | 97 MHTMLGenerationManager::Job::Job(int job_id, |
71 WebContents* web_contents, | 98 WebContents* web_contents, |
72 GenerateMHTMLCallback callback) | 99 GenerateMHTMLCallback callback) |
73 : job_id_(job_id), | 100 : job_id_(job_id), |
74 process_id_(web_contents->GetRenderProcessHost()->GetID()), | 101 mhtml_boundary_marker_(GenerateMHTMLBoundaryMarker()), |
75 routing_id_(web_contents->GetRenderViewHost()->GetRoutingID()), | |
76 callback_(callback), | 102 callback_(callback), |
77 observed_renderer_process_host_(this) { | 103 observed_renderer_process_host_(this) { |
78 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 104 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 105 web_contents->ForEachFrame(base::Bind( |
| 106 &MHTMLGenerationManager::Job::AddFrame, |
| 107 base::Unretained(this))); // Safe because ForEachFrame is synchronous. |
| 108 |
| 109 // Main frame needs to be processed first. |
| 110 DCHECK(!pending_frame_tree_node_ids_.empty()); |
| 111 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) |
| 112 ->parent() == nullptr); |
79 } | 113 } |
80 | 114 |
81 MHTMLGenerationManager::Job::~Job() { | 115 MHTMLGenerationManager::Job::~Job() { |
82 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 116 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
83 } | 117 } |
84 | 118 |
85 bool MHTMLGenerationManager::Job::SendToRenderView() { | 119 std::map<int, std::string> |
86 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 120 MHTMLGenerationManager::Job::CreateFrameRoutingIdToContentId( |
| 121 SiteInstance* site_instance) { |
| 122 std::map<int, std::string> result; |
| 123 for (const auto& it : frame_tree_node_to_content_id_) { |
| 124 int ftn_id = it.first; |
| 125 const std::string& content_id = it.second; |
| 126 |
| 127 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(ftn_id); |
| 128 if (!ftn) |
| 129 continue; |
| 130 |
| 131 int routing_id = |
| 132 ftn->render_manager()->GetRoutingIdForSiteInstance(site_instance); |
| 133 if (routing_id == MSG_ROUTING_NONE) |
| 134 continue; |
| 135 |
| 136 result[routing_id] = content_id; |
| 137 } |
| 138 return result; |
| 139 } |
| 140 |
| 141 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { |
87 DCHECK(browser_file_.IsValid()); | 142 DCHECK(browser_file_.IsValid()); |
| 143 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); |
88 | 144 |
89 RenderViewHost* rvh = RenderViewHost::FromID(process_id_, routing_id_); | 145 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
90 if (!rvh) { // The contents went away. | 146 pending_frame_tree_node_ids_.pop(); |
| 147 bool is_last_frame = pending_frame_tree_node_ids_.empty(); |
| 148 |
| 149 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
| 150 if (!ftn) // The contents went away. |
91 return false; | 151 return false; |
92 } | 152 RenderFrameHost* rfh = ftn->current_frame_host(); |
93 | 153 |
94 observed_renderer_process_host_.Add(rvh->GetMainFrame()->GetProcess()); | 154 // Get notified if the target of the IPC message dies between responding. |
| 155 observed_renderer_process_host_.RemoveAll(); |
| 156 observed_renderer_process_host_.Add(rfh->GetProcess()); |
95 | 157 |
96 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( | 158 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( |
97 browser_file_.GetPlatformFile(), rvh->GetProcess()->GetHandle(), | 159 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
98 false); // |close_source_handle|. | 160 false); // |close_source_handle|. |
99 rvh->Send( | 161 rfh->Send(new FrameMsg_SerializeAsMHTML( |
100 new ViewMsg_SavePageAsMHTML(rvh->GetRoutingID(), job_id_, renderer_file)); | 162 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, |
| 163 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); |
101 return true; | 164 return true; |
102 } | 165 } |
103 | 166 |
104 void MHTMLGenerationManager::Job::RenderProcessExited( | 167 void MHTMLGenerationManager::Job::RenderProcessExited( |
105 RenderProcessHost* host, | 168 RenderProcessHost* host, |
106 base::TerminationStatus status, | 169 base::TerminationStatus status, |
107 int exit_code) { | 170 int exit_code) { |
108 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 171 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
109 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); | 172 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
110 } | 173 } |
111 | 174 |
| 175 void MHTMLGenerationManager::Job::AddFrame(RenderFrameHost* render_frame_host) { |
| 176 auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host); |
| 177 int frame_tree_node_id = rfhi->frame_tree_node()->frame_tree_node_id(); |
| 178 pending_frame_tree_node_ids_.push(frame_tree_node_id); |
| 179 |
| 180 std::string guid = base::GenerateGUID(); |
| 181 std::string content_id = base::StringPrintf("<frame-%d-%s@mhtml.blink>", |
| 182 frame_tree_node_id, guid.c_str()); |
| 183 frame_tree_node_to_content_id_[frame_tree_node_id] = content_id; |
| 184 } |
| 185 |
112 void MHTMLGenerationManager::Job::RenderProcessHostDestroyed( | 186 void MHTMLGenerationManager::Job::RenderProcessHostDestroyed( |
113 RenderProcessHost* host) { | 187 RenderProcessHost* host) { |
114 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 188 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
115 observed_renderer_process_host_.Remove(host); | 189 observed_renderer_process_host_.Remove(host); |
116 } | 190 } |
117 | 191 |
118 void MHTMLGenerationManager::Job::CloseFile( | 192 void MHTMLGenerationManager::Job::CloseFile( |
119 base::Callback<void(int64)> callback) { | 193 base::Callback<void(int64)> callback) { |
120 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 194 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
121 | 195 |
(...skipping 11 matching lines...) Expand all Loading... |
133 | 207 |
134 // static | 208 // static |
135 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { | 209 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
136 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 210 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
137 DCHECK(file.IsValid()); | 211 DCHECK(file.IsValid()); |
138 int64 file_size = file.GetLength(); | 212 int64 file_size = file.GetLength(); |
139 file.Close(); | 213 file.Close(); |
140 return file_size; | 214 return file_size; |
141 } | 215 } |
142 | 216 |
| 217 // static |
| 218 std::string MHTMLGenerationManager::Job::GenerateMHTMLBoundaryMarker() { |
| 219 // TODO(lukasza): Introduce and use a shared helper function in |
| 220 // net/base/mime_util.h instead of having the ad-hoc code below. |
| 221 |
| 222 // Trying to generate random boundaries similar to IE/UnMHT |
| 223 // (ex: ----=_NextPart_000_001B_01CC157B.96F808A0). |
| 224 uint8_t random_values[10]; |
| 225 base::RandBytes(&random_values, sizeof(random_values)); |
| 226 |
| 227 std::string result("----=_NextPart_000_"); |
| 228 result += base::HexEncode(random_values + 0, 2); |
| 229 result += '_'; |
| 230 result += base::HexEncode(random_values + 2, 4); |
| 231 result += '.'; |
| 232 result += base::HexEncode(random_values + 6, 4); |
| 233 return result; |
| 234 } |
| 235 |
143 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { | 236 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { |
144 return base::Singleton<MHTMLGenerationManager>::get(); | 237 return base::Singleton<MHTMLGenerationManager>::get(); |
145 } | 238 } |
146 | 239 |
147 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {} | 240 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {} |
148 | 241 |
149 MHTMLGenerationManager::~MHTMLGenerationManager() { | 242 MHTMLGenerationManager::~MHTMLGenerationManager() { |
150 STLDeleteValues(&id_to_job_); | 243 STLDeleteValues(&id_to_job_); |
151 } | 244 } |
152 | 245 |
153 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents, | 246 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents, |
154 const base::FilePath& file_path, | 247 const base::FilePath& file_path, |
155 const GenerateMHTMLCallback& callback) { | 248 const GenerateMHTMLCallback& callback) { |
156 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 249 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
157 | 250 |
158 int job_id = NewJob(web_contents, callback); | 251 int job_id = NewJob(web_contents, callback); |
159 | 252 |
160 BrowserThread::PostTaskAndReplyWithResult( | 253 BrowserThread::PostTaskAndReplyWithResult( |
161 BrowserThread::FILE, FROM_HERE, | 254 BrowserThread::FILE, FROM_HERE, |
162 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), | 255 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), |
163 base::Bind(&MHTMLGenerationManager::OnFileAvailable, | 256 base::Bind(&MHTMLGenerationManager::OnFileAvailable, |
164 base::Unretained(this), // Safe b/c |this| is a singleton. | 257 base::Unretained(this), // Safe b/c |this| is a singleton. |
165 job_id)); | 258 job_id)); |
166 } | 259 } |
167 | 260 |
168 void MHTMLGenerationManager::OnSavedPageAsMHTML( | 261 void MHTMLGenerationManager::OnSavedFrameAsMHTML( |
169 int job_id, | 262 int job_id, |
170 bool mhtml_generation_in_renderer_succeeded) { | 263 bool mhtml_generation_in_renderer_succeeded) { |
171 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 264 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
172 JobStatus job_status = mhtml_generation_in_renderer_succeeded | 265 |
173 ? JobStatus::SUCCESS | 266 if (!mhtml_generation_in_renderer_succeeded) { |
174 : JobStatus::FAILURE; | 267 JobFinished(job_id, JobStatus::FAILURE); |
175 JobFinished(job_id, job_status); | 268 return; |
| 269 } |
| 270 |
| 271 Job* job = FindJob(job_id); |
| 272 if (!job) |
| 273 return; |
| 274 |
| 275 if (job->HasMoreFramesToProcess()) { |
| 276 if (!job->SendToNextRenderFrame()) { |
| 277 JobFinished(job_id, JobStatus::FAILURE); |
| 278 } |
| 279 return; |
| 280 } |
| 281 |
| 282 JobFinished(job_id, JobStatus::SUCCESS); |
176 } | 283 } |
177 | 284 |
178 // static | 285 // static |
179 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { | 286 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
180 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 287 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
181 base::File browser_file( | 288 |
182 file_path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE); | 289 // SECURITY NOTE: A file descriptor to the file created below will be passed |
| 290 // to multiple renderer processes which (in out-of-process iframes mode) can |
| 291 // act on behalf of separate web principals. Therefore it is important to |
| 292 // only allow writing to the file and forbid reading from the file (as this |
| 293 // would allow reading content generated by other renderers / other web |
| 294 // principals). |
| 295 uint32 file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; |
| 296 |
| 297 base::File browser_file(file_path, file_flags); |
183 if (!browser_file.IsValid()) { | 298 if (!browser_file.IsValid()) { |
184 LOG(ERROR) << "Failed to create file to save MHTML at: " << | 299 LOG(ERROR) << "Failed to create file to save MHTML at: " << |
185 file_path.value(); | 300 file_path.value(); |
186 } | 301 } |
187 return browser_file.Pass(); | 302 return browser_file.Pass(); |
188 } | 303 } |
189 | 304 |
190 void MHTMLGenerationManager::OnFileAvailable(int job_id, | 305 void MHTMLGenerationManager::OnFileAvailable(int job_id, |
191 base::File browser_file) { | 306 base::File browser_file) { |
192 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 307 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
193 | 308 |
194 if (!browser_file.IsValid()) { | 309 if (!browser_file.IsValid()) { |
195 LOG(ERROR) << "Failed to create file"; | 310 LOG(ERROR) << "Failed to create file"; |
196 JobFinished(job_id, JobStatus::FAILURE); | 311 JobFinished(job_id, JobStatus::FAILURE); |
197 return; | 312 return; |
198 } | 313 } |
199 | 314 |
200 Job* job = FindJob(job_id); | 315 Job* job = FindJob(job_id); |
201 if (!job) | 316 if (!job) |
202 return; | 317 return; |
203 | 318 |
204 job->set_browser_file(browser_file.Pass()); | 319 job->set_browser_file(browser_file.Pass()); |
205 | 320 |
206 if (!job->SendToRenderView()) { | 321 if (!job->SendToNextRenderFrame()) { |
207 JobFinished(job_id, JobStatus::FAILURE); | 322 JobFinished(job_id, JobStatus::FAILURE); |
208 } | 323 } |
209 } | 324 } |
210 | 325 |
211 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { | 326 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { |
212 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 327 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
213 | 328 |
214 Job* job = FindJob(job_id); | 329 Job* job = FindJob(job_id); |
215 if (!job) | 330 if (!job) |
216 return; | 331 return; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 ++it) { | 378 ++it) { |
264 if (it->second == job) { | 379 if (it->second == job) { |
265 JobFinished(it->first, JobStatus::FAILURE); | 380 JobFinished(it->first, JobStatus::FAILURE); |
266 return; | 381 return; |
267 } | 382 } |
268 } | 383 } |
269 NOTREACHED(); | 384 NOTREACHED(); |
270 } | 385 } |
271 | 386 |
272 } // namespace content | 387 } // namespace content |
OLD | NEW |