Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Side by Side Diff: content/browser/download/mhtml_generation_manager.cc

Issue 2683493002: Get signals working in the EXTRA_DATA section of MHTML (Closed)
Patch Set: Change header for our data to be X-Chrome- instead of Chromium- Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/mhtml_generation_manager.h" 5 #include "content/browser/download/mhtml_generation_manager.h"
6 6
7 #include <map> 7 #include <map>
8 #include <queue> 8 #include <queue>
9 #include <utility> 9 #include <utility>
10 10
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/files/file.h" 12 #include "base/files/file.h"
13 #include "base/guid.h" 13 #include "base/guid.h"
14 #include "base/macros.h" 14 #include "base/macros.h"
15 #include "base/memory/ptr_util.h" 15 #include "base/memory/ptr_util.h"
16 #include "base/metrics/histogram_macros.h" 16 #include "base/metrics/histogram_macros.h"
17 #include "base/scoped_observer.h" 17 #include "base/scoped_observer.h"
18 #include "base/stl_util.h" 18 #include "base/stl_util.h"
19 #include "base/strings/string_util.h" 19 #include "base/strings/string_util.h"
20 #include "base/strings/stringprintf.h" 20 #include "base/strings/stringprintf.h"
21 #include "base/time/time.h" 21 #include "base/time/time.h"
22 #include "base/trace_event/trace_event.h" 22 #include "base/trace_event/trace_event.h"
23 #include "content/browser/bad_message.h" 23 #include "content/browser/bad_message.h"
24 #include "content/browser/download/mhtml_extra_parts_impl.h"
24 #include "content/browser/frame_host/frame_tree_node.h" 25 #include "content/browser/frame_host/frame_tree_node.h"
25 #include "content/browser/frame_host/render_frame_host_impl.h" 26 #include "content/browser/frame_host/render_frame_host_impl.h"
26 #include "content/common/frame_messages.h" 27 #include "content/common/frame_messages.h"
27 #include "content/public/browser/browser_thread.h" 28 #include "content/public/browser/browser_thread.h"
29 #include "content/public/browser/mhtml_extra_parts.h"
28 #include "content/public/browser/render_frame_host.h" 30 #include "content/public/browser/render_frame_host.h"
29 #include "content/public/browser/render_process_host.h" 31 #include "content/public/browser/render_process_host.h"
30 #include "content/public/browser/render_process_host_observer.h" 32 #include "content/public/browser/render_process_host_observer.h"
31 #include "content/public/browser/web_contents.h" 33 #include "content/public/browser/web_contents.h"
32 #include "content/public/common/mhtml_generation_params.h" 34 #include "content/public/common/mhtml_generation_params.h"
33 #include "net/base/mime_util.h" 35 #include "net/base/mime_util.h"
34 36
37 namespace {
38 const char kContentLocation[] = "Content-Location: ";
39 const char kContentType[] = "Content-Type: ";
40 int kInvalidFileSize = -1;
41 } // namespace
42
35 namespace content { 43 namespace content {
36 44
37 // The class and all of its members live on the UI thread. Only static methods 45 // The class and all of its members live on the UI thread. Only static methods
38 // are executed on other threads. 46 // are executed on other threads.
39 class MHTMLGenerationManager::Job : public RenderProcessHostObserver { 47 class MHTMLGenerationManager::Job : public RenderProcessHostObserver {
40 public: 48 public:
41 Job(int job_id, 49 Job(int job_id,
42 WebContents* web_contents, 50 WebContents* web_contents,
43 const MHTMLGenerationParams& params, 51 const MHTMLGenerationParams& params,
44 const GenerateMHTMLCallback& callback); 52 const GenerateMHTMLCallback& callback);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 void MarkAsFinished(); 99 void MarkAsFinished();
92 100
93 void ReportRendererMainThreadTime(base::TimeDelta renderer_main_thread_time); 101 void ReportRendererMainThreadTime(base::TimeDelta renderer_main_thread_time);
94 102
95 private: 103 private:
96 // Writes the MHTML footer to the file and closes it. 104 // Writes the MHTML footer to the file and closes it.
97 // 105 //
98 // Note: The same |boundary| marker must be used for all "boundaries" -- in 106 // Note: The same |boundary| marker must be used for all "boundaries" -- in
99 // the header, parts and footer -- that belong to the same MHTML document (see 107 // the header, parts and footer -- that belong to the same MHTML document (see
100 // also rfc1341, section 7.2.1, "boundary" description). 108 // also rfc1341, section 7.2.1, "boundary" description).
101 static std::tuple<MhtmlSaveStatus, int64_t> CloseFileOnFileThread( 109 static std::tuple<MhtmlSaveStatus, int64_t> FinalizeAndCloseFileOnFileThread(
102 MhtmlSaveStatus save_status, 110 MhtmlSaveStatus save_status,
103 const std::string& boundary, 111 const std::string& boundary,
104 base::File file); 112 base::File file,
113 const MHTMLExtraPartsImpl* extra_parts);
105 void AddFrame(RenderFrameHost* render_frame_host); 114 void AddFrame(RenderFrameHost* render_frame_host);
106 115
116 // If we have any extra MHTML parts to write out, write them into the file
117 // while on the file thread. Returns true for success, or if there is no data
118 // to write.
119 static bool WriteExtraDataParts(const std::string& boundary,
120 base::File& file,
121 const MHTMLExtraPartsImpl* extra_parts);
122
123 // Writes the footer into the MHTML file. Returns false for faiulre.
124 static bool WriteFooter(const std::string& boundary, base::File& file);
125
126 // Close the MHTML file if it looks good, setting the size param. Returns
127 // false for failure.
128 static MhtmlSaveStatus CloseFileIfValid(base::File& file, int64_t& file_size);
129
107 // Creates a new map with values (content ids) the same as in 130 // Creates a new map with values (content ids) the same as in
108 // |frame_tree_node_to_content_id_| map, but with the keys translated from 131 // |frame_tree_node_to_content_id_| map, but with the keys translated from
109 // frame_tree_node_id into a |site_instance|-specific routing_id. 132 // frame_tree_node_id into a |site_instance|-specific routing_id.
110 std::map<int, std::string> CreateFrameRoutingIdToContentId( 133 std::map<int, std::string> CreateFrameRoutingIdToContentId(
111 SiteInstance* site_instance); 134 SiteInstance* site_instance);
112 135
113 // Id used to map renderer responses to jobs. 136 // Id used to map renderer responses to jobs.
114 // See also MHTMLGenerationManager::id_to_job_ map. 137 // See also MHTMLGenerationManager::id_to_job_ map.
115 const int job_id_; 138 const int job_id_;
116 139
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 std::string salt_; 172 std::string salt_;
150 173
151 // The callback to call once generation is complete. 174 // The callback to call once generation is complete.
152 const GenerateMHTMLCallback callback_; 175 const GenerateMHTMLCallback callback_;
153 176
154 // Whether the job is finished (set to true only for the short duration of 177 // Whether the job is finished (set to true only for the short duration of
155 // time between MHTMLGenerationManager::JobFinished is called and the job is 178 // time between MHTMLGenerationManager::JobFinished is called and the job is
156 // destroyed by MHTMLGenerationManager::OnFileClosed). 179 // destroyed by MHTMLGenerationManager::OnFileClosed).
157 bool is_finished_; 180 bool is_finished_;
158 181
182 // Any extra data parts that should be emitted into the output MHTML.
183 MHTMLExtraPartsImpl* extra_parts_;
184
159 // RAII helper for registering this Job as a RenderProcessHost observer. 185 // RAII helper for registering this Job as a RenderProcessHost observer.
160 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job> 186 ScopedObserver<RenderProcessHost, MHTMLGenerationManager::Job>
161 observed_renderer_process_host_; 187 observed_renderer_process_host_;
162 188
163 DISALLOW_COPY_AND_ASSIGN(Job); 189 DISALLOW_COPY_AND_ASSIGN(Job);
164 }; 190 };
165 191
166 MHTMLGenerationManager::Job::Job(int job_id, 192 MHTMLGenerationManager::Job::Job(int job_id,
167 WebContents* web_contents, 193 WebContents* web_contents,
168 const MHTMLGenerationParams& params, 194 const MHTMLGenerationParams& params,
169 const GenerateMHTMLCallback& callback) 195 const GenerateMHTMLCallback& callback)
170 : job_id_(job_id), 196 : job_id_(job_id),
171 creation_time_(base::TimeTicks::Now()), 197 creation_time_(base::TimeTicks::Now()),
172 params_(params), 198 params_(params),
173 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId), 199 frame_tree_node_id_of_busy_frame_(FrameTreeNode::kFrameTreeNodeInvalidId),
174 mhtml_boundary_marker_(net::GenerateMimeMultipartBoundary()), 200 mhtml_boundary_marker_(net::GenerateMimeMultipartBoundary()),
175 salt_(base::GenerateGUID()), 201 salt_(base::GenerateGUID()),
176 callback_(callback), 202 callback_(callback),
177 is_finished_(false), 203 is_finished_(false),
178 observed_renderer_process_host_(this) { 204 observed_renderer_process_host_(this) {
179 DCHECK_CURRENTLY_ON(BrowserThread::UI); 205 DCHECK_CURRENTLY_ON(BrowserThread::UI);
180 web_contents->ForEachFrame(base::Bind( 206 web_contents->ForEachFrame(base::Bind(
181 &MHTMLGenerationManager::Job::AddFrame, 207 &MHTMLGenerationManager::Job::AddFrame,
182 base::Unretained(this))); // Safe because ForEachFrame is synchronous. 208 base::Unretained(this))); // Safe because ForEachFrame is synchronous.
183 209
184 // Main frame needs to be processed first. 210 // Main frame needs to be processed first.
185 DCHECK(!pending_frame_tree_node_ids_.empty()); 211 DCHECK(!pending_frame_tree_node_ids_.empty());
186 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) 212 DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front())
187 ->parent() == nullptr); 213 ->parent() == nullptr);
214
215 // Save off any extra data.
216 extra_parts_ = static_cast<MHTMLExtraPartsImpl*>(
217 MHTMLExtraParts::FromWebContents(web_contents));
188 } 218 }
189 219
190 MHTMLGenerationManager::Job::~Job() { 220 MHTMLGenerationManager::Job::~Job() {
191 DCHECK_CURRENTLY_ON(BrowserThread::UI); 221 DCHECK_CURRENTLY_ON(BrowserThread::UI);
192 } 222 }
193 223
194 std::map<int, std::string> 224 std::map<int, std::string>
195 MHTMLGenerationManager::Job::CreateFrameRoutingIdToContentId( 225 MHTMLGenerationManager::Job::CreateFrameRoutingIdToContentId(
196 SiteInstance* site_instance) { 226 SiteInstance* site_instance) {
197 std::map<int, std::string> result; 227 std::map<int, std::string> result;
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
345 if (save_status == MhtmlSaveStatus::SUCCESS) 375 if (save_status == MhtmlSaveStatus::SUCCESS)
346 save_status = MhtmlSaveStatus::FILE_WRITTING_ERROR; 376 save_status = MhtmlSaveStatus::FILE_WRITTING_ERROR;
347 callback.Run(std::make_tuple(save_status, -1)); 377 callback.Run(std::make_tuple(save_status, -1));
348 return; 378 return;
349 } 379 }
350 380
351 // If no previous error occurred the boundary should be sent. 381 // If no previous error occurred the boundary should be sent.
352 BrowserThread::PostTaskAndReplyWithResult( 382 BrowserThread::PostTaskAndReplyWithResult(
353 BrowserThread::FILE, FROM_HERE, 383 BrowserThread::FILE, FROM_HERE,
354 base::Bind( 384 base::Bind(
355 &MHTMLGenerationManager::Job::CloseFileOnFileThread, save_status, 385 &MHTMLGenerationManager::Job::FinalizeAndCloseFileOnFileThread,
386 save_status,
356 (save_status == MhtmlSaveStatus::SUCCESS ? mhtml_boundary_marker_ 387 (save_status == MhtmlSaveStatus::SUCCESS ? mhtml_boundary_marker_
357 : std::string()), 388 : std::string()),
358 base::Passed(&browser_file_)), 389 base::Passed(&browser_file_), extra_parts_),
359 callback); 390 callback);
360 } 391 }
361 392
362 bool MHTMLGenerationManager::Job::IsMessageFromFrameExpected( 393 bool MHTMLGenerationManager::Job::IsMessageFromFrameExpected(
363 RenderFrameHostImpl* sender) { 394 RenderFrameHostImpl* sender) {
364 int sender_id = sender->frame_tree_node()->frame_tree_node_id(); 395 int sender_id = sender->frame_tree_node()->frame_tree_node_id();
365 if (sender_id != frame_tree_node_id_of_busy_frame_) 396 if (sender_id != frame_tree_node_id_of_busy_frame_)
366 return false; 397 return false;
367 398
368 // We only expect one message per frame - let's make sure subsequent messages 399 // We only expect one message per frame - let's make sure subsequent messages
(...skipping 25 matching lines...) Expand all
394 425
395 // Report success if all frames have been processed. 426 // Report success if all frames have been processed.
396 if (pending_frame_tree_node_ids_.empty()) 427 if (pending_frame_tree_node_ids_.empty())
397 return MhtmlSaveStatus::SUCCESS; 428 return MhtmlSaveStatus::SUCCESS;
398 429
399 return SendToNextRenderFrame(); 430 return SendToNextRenderFrame();
400 } 431 }
401 432
402 // static 433 // static
403 std::tuple<MhtmlSaveStatus, int64_t> 434 std::tuple<MhtmlSaveStatus, int64_t>
404 MHTMLGenerationManager::Job::CloseFileOnFileThread(MhtmlSaveStatus save_status, 435 MHTMLGenerationManager::Job::FinalizeAndCloseFileOnFileThread(
405 const std::string& boundary, 436 MhtmlSaveStatus save_status,
406 base::File file) { 437 const std::string& boundary,
438 base::File file,
439 const MHTMLExtraPartsImpl* extra_parts) {
407 DCHECK_CURRENTLY_ON(BrowserThread::FILE); 440 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
408 441
409 // If no previous error occurred the boundary should have been provided. 442 // If no previous error occurred the boundary should have been provided.
410 if (save_status == MhtmlSaveStatus::SUCCESS) { 443 if (save_status == MhtmlSaveStatus::SUCCESS) {
411 TRACE_EVENT0("page-serialization", 444 TRACE_EVENT0("page-serialization",
412 "MHTMLGenerationManager::Job MHTML footer writing"); 445 "MHTMLGenerationManager::Job MHTML footer writing");
413 DCHECK(!boundary.empty()); 446 DCHECK(!boundary.empty());
414 std::string footer = base::StringPrintf("--%s--\r\n", boundary.c_str()); 447
415 DCHECK(base::IsStringASCII(footer)); 448 // Write the extra data into a part of its own, if we have any.
416 if (file.WriteAtCurrentPos(footer.data(), footer.size()) < 0) 449 if (!WriteExtraDataParts(boundary, file, extra_parts)) {
417 save_status = MhtmlSaveStatus::FILE_WRITTING_ERROR; 450 save_status = MhtmlSaveStatus::FILE_WRITTING_ERROR;
451 return std::make_tuple(save_status, kInvalidFileSize);
fgorski 2017/04/05 18:59:13 I believe this return is wrong: This whole method
Pete Williamson 2017/04/05 22:11:30 Done. Thanks for catching this!
452 }
453
454 // Write out the footer at the bottom of the file.
455 if (!WriteFooter(boundary, file)) {
456 save_status = MhtmlSaveStatus::FILE_WRITTING_ERROR;
457 return std::make_tuple(save_status, kInvalidFileSize);
fgorski 2017/04/05 18:59:12 same here
Pete Williamson 2017/04/05 22:11:30 Done.
458 }
418 } 459 }
419 460
420 // If the file is still valid try to close it. Only update the status if that 461 // If the file is still valid try to close it. Only update the status if that
421 // won't hide an earlier error. 462 // won't hide an earlier error.
422 int64_t file_size = -1; 463 int64_t file_size = kInvalidFileSize;
423 if (file.IsValid()) { 464 MhtmlSaveStatus close_save_status = CloseFileIfValid(file, file_size);
fgorski 2017/04/05 18:59:13 Because there are only 2 outcomes of CloseFileIfVa
Pete Williamson 2017/04/05 22:11:30 Done, but it ended up a bit more complex than that
fgorski 2017/04/05 22:42:38 Still looks cleaner. I like it.
424 file_size = file.GetLength(); 465 if (save_status == MhtmlSaveStatus::SUCCESS &&
425 file.Close(); 466 close_save_status == MhtmlSaveStatus::FILE_WRITTING_ERROR) {
426 } else if (save_status == MhtmlSaveStatus::SUCCESS) { 467 save_status = close_save_status;
427 save_status = MhtmlSaveStatus::FILE_CLOSING_ERROR;
428 } 468 }
429 469
430 return std::make_tuple(save_status, file_size); 470 return std::make_tuple(save_status, file_size);
431 } 471 }
432 472
473 // static
474 bool MHTMLGenerationManager::Job::WriteExtraDataParts(
475 const std::string& boundary,
476 base::File& file,
477 const MHTMLExtraPartsImpl* extra_parts) {
478 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
479 // Don't write an extra data part if there is none.
480 if (extra_parts == nullptr)
481 return true;
482
483 const std::vector<MHTMLExtraDataPart>& extra_data_parts(extra_parts->parts());
484 if (extra_data_parts.empty())
485 return true;
486
487 std::string serialized_extra_data_parts;
488
489 // For each extra part, serialize that part and add to our accumulator
490 // string.
491 for (auto part : extra_data_parts) {
492 // Write a newline, then a boundary, another newline, then the content
493 // location, another newline, the content type, another newline, the another
494 // newline, the extra data string, and end with a newline.
495 std::string serialized_extra_data_part = base::StringPrintf(
496 "--%s\r\n%s%s\r\n%s%s\r\n%s\r\n", boundary.c_str(), kContentLocation,
497 part.content_location.c_str(), kContentType, part.content_type.c_str(),
498 part.body.c_str());
499 DCHECK(base::IsStringASCII(serialized_extra_data_part));
500
501 serialized_extra_data_parts += serialized_extra_data_part;
502 }
503
504 // Write the string into the file. Returns false if we failed the write.
505 return (file.WriteAtCurrentPos(serialized_extra_data_parts.data(),
506 serialized_extra_data_parts.size()) >= 0);
507 }
508
509 // static
510 bool MHTMLGenerationManager::Job::WriteFooter(const std::string& boundary,
511 base::File& file) {
512 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
513 std::string footer = base::StringPrintf("--%s--\r\n", boundary.c_str());
514 DCHECK(base::IsStringASCII(footer));
515 return (file.WriteAtCurrentPos(footer.data(), footer.size()) >= 0);
516 }
517
518 // static
519 MhtmlSaveStatus MHTMLGenerationManager::Job::CloseFileIfValid(
fgorski 2017/04/05 18:59:13 This should be a function in anonymous namespace (
Pete Williamson 2017/04/05 22:11:30 It is already part of the MHTMLGenerationManager::
fgorski 2017/04/05 22:42:38 Acknowledged.
520 base::File& file,
521 int64_t& file_size) {
fgorski 2017/04/05 18:59:13 This should be a pointer (and because of above it
Pete Williamson 2017/04/05 22:11:30 Ah, I had forgotten that. Fixed.
522 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
523 if (file.IsValid()) {
524 file_size = file.GetLength();
525 file.Close();
526 return MhtmlSaveStatus::SUCCESS;
527 }
528
529 return MhtmlSaveStatus::FILE_CLOSING_ERROR;
530 }
531
433 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { 532 MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() {
434 return base::Singleton<MHTMLGenerationManager>::get(); 533 return base::Singleton<MHTMLGenerationManager>::get();
435 } 534 }
436 535
437 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {} 536 MHTMLGenerationManager::MHTMLGenerationManager() : next_job_id_(0) {}
438 537
439 MHTMLGenerationManager::~MHTMLGenerationManager() { 538 MHTMLGenerationManager::~MHTMLGenerationManager() {
440 } 539 }
441 540
442 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents, 541 void MHTMLGenerationManager::SaveMHTML(WebContents* web_contents,
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 return iter->second.get(); 691 return iter->second.get();
593 } 692 }
594 693
595 void MHTMLGenerationManager::RenderProcessExited(Job* job) { 694 void MHTMLGenerationManager::RenderProcessExited(Job* job) {
596 DCHECK_CURRENTLY_ON(BrowserThread::UI); 695 DCHECK_CURRENTLY_ON(BrowserThread::UI);
597 DCHECK(job); 696 DCHECK(job);
598 JobFinished(job, MhtmlSaveStatus::RENDER_PROCESS_EXITED); 697 JobFinished(job, MhtmlSaveStatus::RENDER_PROCESS_EXITED);
599 } 698 }
600 699
601 } // namespace content 700 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698