OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/download/mhtml_generation_manager.h" | 5 #include "content/browser/download/mhtml_generation_manager.h" |
6 | 6 |
7 #include <map> | |
8 #include <queue> | |
9 #include <sstream> | |
10 | |
7 #include "base/bind.h" | 11 #include "base/bind.h" |
8 #include "base/files/file.h" | 12 #include "base/files/file.h" |
13 #include "base/guid.h" | |
14 #include "base/rand_util.h" | |
9 #include "base/scoped_observer.h" | 15 #include "base/scoped_observer.h" |
10 #include "base/stl_util.h" | 16 #include "base/stl_util.h" |
11 #include "content/browser/renderer_host/render_view_host_impl.h" | 17 #include "content/browser/frame_host/frame_tree_node.h" |
18 #include "content/common/frame_messages.h" | |
12 #include "content/public/browser/browser_thread.h" | 19 #include "content/public/browser/browser_thread.h" |
13 #include "content/public/browser/render_frame_host.h" | 20 #include "content/public/browser/render_frame_host.h" |
14 #include "content/public/browser/render_process_host.h" | 21 #include "content/public/browser/render_process_host.h" |
15 #include "content/public/browser/render_process_host_observer.h" | 22 #include "content/public/browser/render_process_host_observer.h" |
16 #include "content/public/browser/web_contents.h" | 23 #include "content/public/browser/web_contents.h" |
17 #include "content/common/view_messages.h" | |
18 | 24 |
19 namespace content { | 25 namespace content { |
20 | 26 |
21 // The class and all of its members live on the UI thread. Only static methods | 27 // The class and all of its members live on the UI thread. Only static methods |
22 // are executed on other threads. | 28 // are executed on other threads. |
23 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { | 29 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
24 public: | 30 public: |
25 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); | 31 Job(int job_id, WebContents* web_contents, GenerateMHTMLCallback callback); |
26 ~Job() override; | 32 ~Job() override; |
27 | 33 |
28 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } | 34 void set_browser_file(base::File file) { browser_file_ = file.Pass(); } |
29 | 35 |
30 GenerateMHTMLCallback callback() const { return callback_; } | 36 GenerateMHTMLCallback callback() const { return callback_; } |
31 | 37 |
32 // Sends IPC to the renderer, asking for MHTML generation. | 38 // Sends IPC to the renderer, asking for MHTML generation of the next frame. |
39 // | |
33 // Returns true if the message was sent successfully; false otherwise. | 40 // Returns true if the message was sent successfully; false otherwise. |
34 bool SendToRenderView(); | 41 bool SendToNextRenderFrame(); |
42 | |
43 // Indicates if more calls to SendToNextRenderFrame are needed. | |
44 bool GotMoreFramesToProcess() { | |
45 return !pending_frame_tree_node_ids_.empty(); | |
46 } | |
35 | 47 |
36 // Close the file on the file thread and respond back on the UI thread with | 48 // Close the file on the file thread and respond back on the UI thread with |
37 // file size. | 49 // file size. |
38 void CloseFile(base::Callback<void(int64 file_size)> callback); | 50 void CloseFile(base::Callback<void(int64 file_size)> callback); |
39 | 51 |
40 // RenderProcessHostObserver: | 52 // RenderProcessHostObserver: |
41 void RenderProcessExited(RenderProcessHost* host, | 53 void RenderProcessExited(RenderProcessHost* host, |
42 base::TerminationStatus status, | 54 base::TerminationStatus status, |
43 int exit_code) override; | 55 int exit_code) override; |
44 void RenderProcessHostDestroyed(RenderProcessHost* host) override; | 56 void RenderProcessHostDestroyed(RenderProcessHost* host) override; |
45 | 57 |
46 private: | 58 private: |
59 static std::string GenerateMhtmlBoundaryMarker(); | |
47 static int64 CloseFileOnFileThread(base::File file); | 60 static int64 CloseFileOnFileThread(base::File file); |
61 void AddFrame(RenderFrameHost* render_frame_host); | |
62 | |
63 // Translates |frame_tree_node_to_content_id_| into | |
64 // a |site_instance|-specific, routing-id-based map. | |
65 std::map<int, std::string> CreateFrameRoutingIdToContentId( | |
66 SiteInstance* site_instance); | |
48 | 67 |
49 // Id used to map renderer responses to jobs. | 68 // Id used to map renderer responses to jobs. |
50 // See also MHTMLGenerationManager::id_to_job_ map. | 69 // See also MHTMLGenerationManager::id_to_job_ map. |
51 int job_id_; | 70 int job_id_; |
52 | 71 |
53 // The handle to the file the MHTML is saved to for the browser process. | 72 // The handle to the file the MHTML is saved to for the browser process. |
54 base::File browser_file_; | 73 base::File browser_file_; |
55 | 74 |
56 // The IDs mapping to a specific contents. | 75 // The IDs of frames we still need to process. |
57 int process_id_; | 76 std::queue<int> pending_frame_tree_node_ids_; |
58 int routing_id_; | 77 |
78 // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts | |
79 // for more details about what "content ids" are and how they are used). | |
80 std::map<int, std::string> frame_tree_node_to_content_id_; | |
81 | |
82 // MIME multipart boundary to use in the MHTML doc. | |
83 std::string mhtml_boundary_marker_; | |
59 | 84 |
60 // The callback to call once generation is complete. | 85 // The callback to call once generation is complete. |
61 GenerateMHTMLCallback callback_; | 86 GenerateMHTMLCallback callback_; |
62 | 87 |
63 // RAII helper for registering this Job as a RenderProcessHost observer. | 88 // RAII helper for registering this Job as a RenderProcessHost observer. |
64 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> | 89 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> |
65 observed_renderer_process_host_; | 90 observed_renderer_process_host_; |
66 | 91 |
67 DISALLOW_COPY_AND_ASSIGN(Job); | 92 DISALLOW_COPY_AND_ASSIGN(Job); |
68 }; | 93 }; |
69 | 94 |
70 MHTMLGenerationManager::Job::Job(int job_id, | 95 MHTMLGenerationManager::Job::Job(int job_id, |
71 WebContents* web_contents, | 96 WebContents* web_contents, |
72 GenerateMHTMLCallback callback) | 97 GenerateMHTMLCallback callback) |
73 : job_id_(job_id), | 98 : job_id_(job_id), |
74 process_id_(web_contents->GetRenderProcessHost()->GetID()), | 99 mhtml_boundary_marker_(GenerateMhtmlBoundaryMarker()), |
75 routing_id_(web_contents->GetRenderViewHost()->GetRoutingID()), | |
76 callback_(callback), | 100 callback_(callback), |
77 observed_renderer_process_host_(this) { | 101 observed_renderer_process_host_(this) { |
78 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 102 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
103 web_contents->ForEachFrame(base::Bind( | |
104 &MHTMLGenerationManager::Job::AddFrame, | |
105 base::Unretained(this))); // Safe because ForEachFrame is synchronous. | |
106 | |
107 // Main frame needs to be processed first. | |
108 DCHECK(!pending_frame_tree_node_ids_.empty()); | |
109 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) | |
dcheng
2015/12/08 06:22:27
DCHECK_EQ works here
Łukasz Anforowicz
2015/12/08 21:26:43
I've tried this:
109 DCHECK_EQ(nullptr, FrameTre
| |
110 ->parent() == nullptr); | |
79 } | 111 } |
80 | 112 |
81 MHTMLGenerationManager::Job::~Job() { | 113 MHTMLGenerationManager::Job::~Job() { |
82 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 114 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
83 } | 115 } |
84 | 116 |
85 bool MHTMLGenerationManager::Job::SendToRenderView() { | 117 std::map<int, std::string> |
86 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 118 MHTMLGenerationManager::Job::CreateFrameRoutingIdToContentId( |
119 SiteInstance* site_instance) { | |
120 std::map<int, std::string> result; | |
121 for (const auto& it : frame_tree_node_to_content_id_) { | |
122 int ftn_id = it.first; | |
123 const std::string& content_id = it.second; | |
124 | |
125 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(ftn_id); | |
126 if (!ftn) | |
127 continue; | |
128 | |
129 int routing_id = | |
130 ftn->render_manager()->GetRoutingIdForSiteInstance(site_instance); | |
131 if (routing_id == MSG_ROUTING_NONE) | |
132 continue; | |
133 | |
134 result[routing_id] = content_id; | |
135 } | |
136 return result; | |
137 } | |
138 | |
139 bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { | |
87 DCHECK(browser_file_.IsValid()); | 140 DCHECK(browser_file_.IsValid()); |
141 DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); | |
88 | 142 |
89 RenderViewHost* rvh = RenderViewHost::FromID(process_id_, routing_id_); | 143 int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
90 if (!rvh) { // The contents went away. | 144 pending_frame_tree_node_ids_.pop(); |
145 bool is_last_frame = pending_frame_tree_node_ids_.empty(); | |
146 | |
147 FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); | |
148 if (!ftn) // The contents went away. | |
91 return false; | 149 return false; |
92 } | 150 RenderFrameHost* rfh = ftn->current_frame_host(); |
93 | 151 |
94 observed_renderer_process_host_.Add(rvh->GetMainFrame()->GetProcess()); | 152 // Get notified if the target of the IPC message dies between responding. |
153 observed_renderer_process_host_.RemoveAll(); | |
154 observed_renderer_process_host_.Add(rfh->GetProcess()); | |
95 | 155 |
96 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( | 156 IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( |
97 browser_file_.GetPlatformFile(), rvh->GetProcess()->GetHandle(), | 157 browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
98 false); // |close_source_handle|. | 158 false); // |close_source_handle|. |
99 rvh->Send( | 159 rfh->Send(new FrameMsg_SerializeAsMHTML( |
100 new ViewMsg_SavePageAsMHTML(rvh->GetRoutingID(), job_id_, renderer_file)); | 160 rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, |
161 CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); | |
101 return true; | 162 return true; |
102 } | 163 } |
103 | 164 |
104 void MHTMLGenerationManager::Job::RenderProcessExited( | 165 void MHTMLGenerationManager::Job::RenderProcessExited( |
105 RenderProcessHost* host, | 166 RenderProcessHost* host, |
106 base::TerminationStatus status, | 167 base::TerminationStatus status, |
107 int exit_code) { | 168 int exit_code) { |
108 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 169 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
109 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); | 170 MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
110 } | 171 } |
111 | 172 |
173 void MHTMLGenerationManager::Job::AddFrame(RenderFrameHost* render_frame_host) { | |
174 auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host); | |
175 int frame_tree_node_id = rfhi->frame_tree_node()->frame_tree_node_id(); | |
176 pending_frame_tree_node_ids_.push(frame_tree_node_id); | |
177 | |
178 std::ostringstream content_id_builder; | |
179 content_id_builder << "<frame" << frame_tree_node_id << "@save" | |
dcheng
2015/12/08 06:22:27
I was going to say that streams aren't permitted i
Łukasz Anforowicz
2015/12/08 21:26:43
Yeah, I guess StringPrintf will be clearer indeed.
| |
180 << base::GenerateGUID() << ".mhtml.blink>"; | |
181 std::string content_id = content_id_builder.str(); | |
182 frame_tree_node_to_content_id_[frame_tree_node_id] = content_id; | |
183 } | |
184 | |
112 void MHTMLGenerationManager::Job::RenderProcessHostDestroyed( | 185 void MHTMLGenerationManager::Job::RenderProcessHostDestroyed( |
113 RenderProcessHost* host) { | 186 RenderProcessHost* host) { |
114 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 187 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
115 observed_renderer_process_host_.Remove(host); | 188 observed_renderer_process_host_.Remove(host); |
116 } | 189 } |
117 | 190 |
118 void MHTMLGenerationManager::Job::CloseFile( | 191 void MHTMLGenerationManager::Job::CloseFile( |
119 base::Callback<void(int64)> callback) { | 192 base::Callback<void(int64)> callback) { |
120 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 193 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
121 | 194 |
(...skipping 11 matching lines...) Expand all Loading... | |
133 | 206 |
134 // static | 207 // static |
135 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { | 208 int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
136 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 209 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
137 DCHECK(file.IsValid()); | 210 DCHECK(file.IsValid()); |
138 int64 file_size = file.GetLength(); | 211 int64 file_size = file.GetLength(); |
139 file.Close(); | 212 file.Close(); |
140 return file_size; | 213 return file_size; |
141 } | 214 } |
142 | 215 |
216 // static | |
217 std::string MHTMLGenerationManager::Job::GenerateMhtmlBoundaryMarker() { | |
218 // TODO(lukasza): Introduce and use a shared helper function in | |
219 // net/base/mime_util.h instead of having the ad-hoc code below. | |
220 | |
221 // Trying to generate random boundaries similar to IE/UnMHT | |
222 // (ex: ----=_NextPart_000_001B_01CC157B.96F808A0). | |
223 const size_t kRandomValuesLength = 10; | |
224 uint8_t random_values[kRandomValuesLength]; | |
225 base::RandBytes(&random_values, kRandomValuesLength); | |
226 | |
227 std::ostringstream ss; | |
228 ss << "----=_NextPart_000_"; | |
229 for (size_t i = 0; i < kRandomValuesLength; ++i) { | |
230 if (i == 2) | |
231 ss << '_'; | |
232 else if (i == 6) | |
233 ss << '.'; | |
234 ss << std::hex << static_cast<uint32_t>(random_values[i]); | |
dcheng
2015/12/08 06:22:27
https://google.github.io/styleguide/cppguide.html#
ncarter (slow)
2015/12/08 19:31:01
Agree with Daniel here. I'd use StringPrintf (whic
Łukasz Anforowicz
2015/12/08 21:26:43
Done.
| |
235 } | |
236 return ss.str(); | |
237 } | |
238 | |
143 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { | 239 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { |
144 return base::Singleton<MHTMLGenerationManager>::get(); | 240 return base::Singleton<MHTMLGenerationManager>::get(); |
145 } | 241 } |
146 | 242 |
147 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {} | 243 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {} |
148 | 244 |
149 MHTMLGenerationManager::~MHTMLGenerationManager() { | 245 MHTMLGenerationManager::~MHTMLGenerationManager() { |
150 STLDeleteValues(&id_to_job_); | 246 STLDeleteValues(&id_to_job_); |
151 } | 247 } |
152 | 248 |
153 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents, | 249 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents, |
154 const base::FilePath& file_path, | 250 const base::FilePath& file_path, |
155 const GenerateMHTMLCallback& callback) { | 251 const GenerateMHTMLCallback& callback) { |
156 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 252 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
157 | 253 |
158 int job_id = NewJob(web_contents, callback); | 254 int job_id = NewJob(web_contents, callback); |
159 | 255 |
160 BrowserThread::PostTaskAndReplyWithResult( | 256 BrowserThread::PostTaskAndReplyWithResult( |
161 BrowserThread::FILE, FROM_HERE, | 257 BrowserThread::FILE, FROM_HERE, |
162 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), | 258 base::Bind(&MHTMLGenerationManager::CreateFile, file_path), |
163 base::Bind(&MHTMLGenerationManager::OnFileAvailable, | 259 base::Bind(&MHTMLGenerationManager::OnFileAvailable, |
164 base::Unretained(this), // Safe b/c |this| is a singleton. | 260 base::Unretained(this), // Safe b/c |this| is a singleton. |
165 job_id)); | 261 job_id)); |
166 } | 262 } |
167 | 263 |
168 void MHTMLGenerationManager::OnSavedPageAsMHTML( | 264 void MHTMLGenerationManager::OnSavedPageAsMHTML( |
169 int job_id, | 265 int job_id, |
170 bool mhtml_generation_in_renderer_succeeded) { | 266 bool mhtml_generation_in_renderer_succeeded) { |
171 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 267 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
172 JobStatus job_status = mhtml_generation_in_renderer_succeeded | 268 |
173 ? JobStatus::SUCCESS | 269 if (!mhtml_generation_in_renderer_succeeded) { |
174 : JobStatus::FAILURE; | 270 JobFinished(job_id, JobStatus::FAILURE); |
175 JobFinished(job_id, job_status); | 271 return; |
272 } | |
273 | |
274 Job* job = FindJob(job_id); | |
275 if (!job) | |
276 return; | |
277 | |
278 if (job->GotMoreFramesToProcess()) { | |
279 if (!job->SendToNextRenderFrame()) { | |
280 JobFinished(job_id, JobStatus::FAILURE); | |
281 } | |
282 return; | |
283 } | |
284 | |
285 JobFinished(job_id, JobStatus::SUCCESS); | |
176 } | 286 } |
177 | 287 |
178 // static | 288 // static |
179 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { | 289 base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
180 DCHECK_CURRENTLY_ON(BrowserThread::FILE); | 290 DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
181 base::File browser_file( | 291 |
182 file_path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE); | 292 // SECURITY NOTE: A file descriptor to the file created below will be passed |
293 // to multiple renderer processes which (in out-of-process iframes mode) can | |
294 // act on behalf of separate web principals. Therefore it is important to | |
295 // only allow writing to the file and forbid reading from the file (as this | |
296 // would allow reading content generated by other renderers / other web | |
297 // principals). | |
298 uint32 file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; | |
299 | |
300 base::File browser_file(file_path, file_flags); | |
183 if (!browser_file.IsValid()) { | 301 if (!browser_file.IsValid()) { |
184 LOG(ERROR) << "Failed to create file to save MHTML at: " << | 302 LOG(ERROR) << "Failed to create file to save MHTML at: " << |
185 file_path.value(); | 303 file_path.value(); |
186 } | 304 } |
187 return browser_file.Pass(); | 305 return browser_file.Pass(); |
188 } | 306 } |
189 | 307 |
190 void MHTMLGenerationManager::OnFileAvailable(int job_id, | 308 void MHTMLGenerationManager::OnFileAvailable(int job_id, |
191 base::File browser_file) { | 309 base::File browser_file) { |
192 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 310 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
193 | 311 |
194 if (!browser_file.IsValid()) { | 312 if (!browser_file.IsValid()) { |
195 LOG(ERROR) << "Failed to create file"; | 313 LOG(ERROR) << "Failed to create file"; |
196 JobFinished(job_id, JobStatus::FAILURE); | 314 JobFinished(job_id, JobStatus::FAILURE); |
197 return; | 315 return; |
198 } | 316 } |
199 | 317 |
200 Job* job = FindJob(job_id); | 318 Job* job = FindJob(job_id); |
201 if (!job) | 319 if (!job) |
202 return; | 320 return; |
203 | 321 |
204 job->set_browser_file(browser_file.Pass()); | 322 job->set_browser_file(browser_file.Pass()); |
205 | 323 |
206 if (!job->SendToRenderView()) { | 324 if (!job->SendToNextRenderFrame()) { |
207 JobFinished(job_id, JobStatus::FAILURE); | 325 JobFinished(job_id, JobStatus::FAILURE); |
208 } | 326 } |
209 } | 327 } |
210 | 328 |
211 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { | 329 void MHTMLGenerationManager::JobFinished(int job_id, JobStatus job_status) { |
212 DCHECK_CURRENTLY_ON(BrowserThread::UI); | 330 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
213 | 331 |
214 Job* job = FindJob(job_id); | 332 Job* job = FindJob(job_id); |
215 if (!job) | 333 if (!job) |
216 return; | 334 return; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
263 ++it) { | 381 ++it) { |
264 if (it->second == job) { | 382 if (it->second == job) { |
265 JobFinished(it->first, JobStatus::FAILURE); | 383 JobFinished(it->first, JobStatus::FAILURE); |
266 return; | 384 return; |
267 } | 385 } |
268 } | 386 } |
269 NOTREACHED(); | 387 NOTREACHED(); |
270 } | 388 } |
271 | 389 |
272 } // namespace content | 390 } // namespace content |
OLD | NEW |