Chromium Code Reviews| Index: content/browser/download/mhtml_generation_manager.cc |
| diff --git a/content/browser/download/mhtml_generation_manager.cc b/content/browser/download/mhtml_generation_manager.cc |
| index bd432eefd4df03d74039f513002993c852d3f87b..b841a249f4dcfd162c43e762d4f4d14fd8a1b1b4 100644 |
| --- a/content/browser/download/mhtml_generation_manager.cc |
| +++ b/content/browser/download/mhtml_generation_manager.cc |
| @@ -4,17 +4,24 @@ |
| #include "content/browser/download/mhtml_generation_manager.h" |
| +#include <map> |
| +#include <queue> |
| + |
| #include "base/bind.h" |
| #include "base/files/file.h" |
| +#include "base/guid.h" |
| +#include "base/rand_util.h" |
| #include "base/scoped_observer.h" |
| #include "base/stl_util.h" |
| -#include "content/browser/renderer_host/render_view_host_impl.h" |
| +#include "base/strings/string_number_conversions.h" |
| +#include "base/strings/stringprintf.h" |
| +#include "content/browser/frame_host/frame_tree_node.h" |
| +#include "content/common/frame_messages.h" |
| #include "content/public/browser/browser_thread.h" |
| #include "content/public/browser/render_frame_host.h" |
| #include "content/public/browser/render_process_host.h" |
| #include "content/public/browser/render_process_host_observer.h" |
| #include "content/public/browser/web_contents.h" |
| -#include "content/common/view_messages.h" |
| namespace content { |
| @@ -29,9 +36,15 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
| GenerateMHTMLCallback callback() const { return callback_; } |
| - // Sends IPC to the renderer, asking for MHTML generation. |
| + // Sends IPC to the renderer, asking for MHTML generation of the next frame. |
| + // |
| // Returns true if the message was sent successfully; false otherwise. |
| - bool SendToRenderView(); |
| + bool SendToNextRenderFrame(); |
| + |
| + // Indicates if more calls to SendToNextRenderFrame are needed. |
| + bool GotMoreFramesToProcess() { |
|
dcheng
2015/12/09 08:02:37
Nit: HasMoreFramesToProcess() const?
Got sounds l
Łukasz Anforowicz
2015/12/09 17:33:57
Makes sense. Done.
|
| + return !pending_frame_tree_node_ids_.empty(); |
| + } |
| // Close the file on the file thread and respond back on the UI thread with |
| // file size. |
| @@ -44,7 +57,14 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
| void RenderProcessHostDestroyed(RenderProcessHost* host) override; |
| private: |
| + static std::string GenerateMhtmlBoundaryMarker(); |
| static int64 CloseFileOnFileThread(base::File file); |
| + void AddFrame(RenderFrameHost* render_frame_host); |
| + |
| + // Translates |frame_tree_node_to_content_id_| into |
| + // a |site_instance|-specific, routing-id-based map. |
| + std::map<int, std::string> CreateFrameRoutingIdToContentId( |
| + SiteInstance* site_instance); |
| // Id used to map renderer responses to jobs. |
| // See also MHTMLGenerationManager::id_to_job_ map. |
| @@ -53,9 +73,15 @@ class MHTMLGenerationManager::Job : public RenderProcessHostObserver { |
| // The handle to the file the MHTML is saved to for the browser process. |
| base::File browser_file_; |
| - // The IDs mapping to a specific contents. |
| - int process_id_; |
| - int routing_id_; |
| + // The IDs of frames we still need to process. |
| + std::queue<int> pending_frame_tree_node_ids_; |
| + |
| + // Map from frames into content ids (see WebPageSerializer::generateMHTMLParts |
| + // for more details about what "content ids" are and how they are used). |
| + std::map<int, std::string> frame_tree_node_to_content_id_; |
| + |
| + // MIME multipart boundary to use in the MHTML doc. |
| + std::string mhtml_boundary_marker_; |
| // The callback to call once generation is complete. |
| GenerateMHTMLCallback callback_; |
| @@ -71,33 +97,69 @@ MHTMLGenerationManager::Job::Job(int job_id, |
| WebContents* web_contents, |
| GenerateMHTMLCallback callback) |
| : job_id_(job_id), |
| - process_id_(web_contents->GetRenderProcessHost()->GetID()), |
| - routing_id_(web_contents->GetRenderViewHost()->GetRoutingID()), |
| + mhtml_boundary_marker_(GenerateMhtmlBoundaryMarker()), |
| callback_(callback), |
| observed_renderer_process_host_(this) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| + web_contents->ForEachFrame(base::Bind( |
| + &MHTMLGenerationManager::Job::AddFrame, |
| + base::Unretained(this))); // Safe because ForEachFrame is synchronous. |
| + |
| + // Main frame needs to be processed first. |
| + DCHECK(!pending_frame_tree_node_ids_.empty()); |
| + DCHECK(FrameTreeNode::GloballyFindByID(pending_frame_tree_node_ids_.front()) |
| + ->parent() == nullptr); |
| } |
| MHTMLGenerationManager::Job::~Job() { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| } |
| -bool MHTMLGenerationManager::Job::SendToRenderView() { |
| - DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| +std::map<int, std::string> |
| +MHTMLGenerationManager::Job::CreateFrameRoutingIdToContentId( |
| + SiteInstance* site_instance) { |
| + std::map<int, std::string> result; |
| + for (const auto& it : frame_tree_node_to_content_id_) { |
| + int ftn_id = it.first; |
| + const std::string& content_id = it.second; |
| + |
| + FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(ftn_id); |
| + if (!ftn) |
| + continue; |
| + |
| + int routing_id = |
| + ftn->render_manager()->GetRoutingIdForSiteInstance(site_instance); |
| + if (routing_id == MSG_ROUTING_NONE) |
| + continue; |
| + |
| + result[routing_id] = content_id; |
| + } |
| + return result; |
| +} |
| + |
| +bool MHTMLGenerationManager::Job::SendToNextRenderFrame() { |
| DCHECK(browser_file_.IsValid()); |
| + DCHECK_LT(0u, pending_frame_tree_node_ids_.size()); |
| + |
| + int frame_tree_node_id = pending_frame_tree_node_ids_.front(); |
| + pending_frame_tree_node_ids_.pop(); |
| + bool is_last_frame = pending_frame_tree_node_ids_.empty(); |
| - RenderViewHost* rvh = RenderViewHost::FromID(process_id_, routing_id_); |
| - if (!rvh) { // The contents went away. |
| + FrameTreeNode* ftn = FrameTreeNode::GloballyFindByID(frame_tree_node_id); |
| + if (!ftn) // The contents went away. |
| return false; |
| - } |
| + RenderFrameHost* rfh = ftn->current_frame_host(); |
| - observed_renderer_process_host_.Add(rvh->GetMainFrame()->GetProcess()); |
| + // Get notified if the target of the IPC message dies between responding. |
| + observed_renderer_process_host_.RemoveAll(); |
| + observed_renderer_process_host_.Add(rfh->GetProcess()); |
| IPC::PlatformFileForTransit renderer_file = IPC::GetFileHandleForProcess( |
| - browser_file_.GetPlatformFile(), rvh->GetProcess()->GetHandle(), |
| + browser_file_.GetPlatformFile(), rfh->GetProcess()->GetHandle(), |
| false); // |close_source_handle|. |
| - rvh->Send( |
| - new ViewMsg_SavePageAsMHTML(rvh->GetRoutingID(), job_id_, renderer_file)); |
| + rfh->Send(new FrameMsg_SerializeAsMHTML( |
| + rfh->GetRoutingID(), job_id_, renderer_file, mhtml_boundary_marker_, |
| + CreateFrameRoutingIdToContentId(rfh->GetSiteInstance()), is_last_frame)); |
| return true; |
| } |
| @@ -109,6 +171,17 @@ void MHTMLGenerationManager::Job::RenderProcessExited( |
| MHTMLGenerationManager::GetInstance()->RenderProcessExited(this); |
| } |
| +void MHTMLGenerationManager::Job::AddFrame(RenderFrameHost* render_frame_host) { |
| + auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host); |
| + int frame_tree_node_id = rfhi->frame_tree_node()->frame_tree_node_id(); |
| + pending_frame_tree_node_ids_.push(frame_tree_node_id); |
| + |
| + std::string guid = base::GenerateGUID(); |
| + std::string content_id = base::StringPrintf("<frame-%d-%s@mhtml.blink>", |
| + frame_tree_node_id, guid.c_str()); |
| + frame_tree_node_to_content_id_[frame_tree_node_id] = content_id; |
| +} |
| + |
| void MHTMLGenerationManager::Job::RenderProcessHostDestroyed( |
| RenderProcessHost* host) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| @@ -140,6 +213,26 @@ int64 MHTMLGenerationManager::Job::CloseFileOnFileThread(base::File file) { |
| return file_size; |
| } |
| +// static |
| +std::string MHTMLGenerationManager::Job::GenerateMhtmlBoundaryMarker() { |
|
dcheng
2015/12/09 08:02:37
I think you should stick either with MHTML or Mhtm
Łukasz Anforowicz
2015/12/09 17:33:57
Done.
|
| + // TODO(lukasza): Introduce and use a shared helper function in |
| + // net/base/mime_util.h instead of having the ad-hoc code below. |
| + |
| + // Trying to generate random boundaries similar to IE/UnMHT |
| + // (ex: ----=_NextPart_000_001B_01CC157B.96F808A0). |
| + const size_t kRandomValuesLength = 10; |
| + uint8_t random_values[kRandomValuesLength]; |
|
dcheng
2015/12/09 08:02:37
Just put 10 in here.
Łukasz Anforowicz
2015/12/09 17:33:57
Done.
|
| + base::RandBytes(&random_values, kRandomValuesLength); |
|
dcheng
2015/12/09 08:02:37
And then use sizeof() here.
Łukasz Anforowicz
2015/12/09 17:33:57
Done.
|
| + |
| + std::string result("----=_NextPart_000_"); |
| + result += base::HexEncode(random_values + 0, 2); |
| + result += '_'; |
| + result += base::HexEncode(random_values + 2, 4); |
| + result += '.'; |
| + result += base::HexEncode(random_values + 6, 4); |
| + return result; |
| +} |
| + |
| MHTMLGenerationManager* MHTMLGenerationManager::GetInstance() { |
| return base::Singleton<MHTMLGenerationManager>::get(); |
| } |
| @@ -169,17 +262,39 @@ void MHTMLGenerationManager::OnSavedPageAsMHTML( |
| int job_id, |
| bool mhtml_generation_in_renderer_succeeded) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| - JobStatus job_status = mhtml_generation_in_renderer_succeeded |
| - ? JobStatus::SUCCESS |
| - : JobStatus::FAILURE; |
| - JobFinished(job_id, job_status); |
| + |
| + if (!mhtml_generation_in_renderer_succeeded) { |
| + JobFinished(job_id, JobStatus::FAILURE); |
| + return; |
| + } |
| + |
| + Job* job = FindJob(job_id); |
| + if (!job) |
| + return; |
| + |
| + if (job->GotMoreFramesToProcess()) { |
| + if (!job->SendToNextRenderFrame()) { |
| + JobFinished(job_id, JobStatus::FAILURE); |
| + } |
| + return; |
| + } |
| + |
| + JobFinished(job_id, JobStatus::SUCCESS); |
| } |
| // static |
| base::File MHTMLGenerationManager::CreateFile(const base::FilePath& file_path) { |
| DCHECK_CURRENTLY_ON(BrowserThread::FILE); |
| - base::File browser_file( |
| - file_path, base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE); |
| + |
| + // SECURITY NOTE: A file descriptor to the file created below will be passed |
| + // to multiple renderer processes which (in out-of-process iframes mode) can |
| + // act on behalf of separate web principals. Therefore it is important to |
| + // only allow writing to the file and forbid reading from the file (as this |
| + // would allow reading content generated by other renderers / other web |
| + // principals). |
| + uint32 file_flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; |
| + |
| + base::File browser_file(file_path, file_flags); |
| if (!browser_file.IsValid()) { |
| LOG(ERROR) << "Failed to create file to save MHTML at: " << |
| file_path.value(); |
| @@ -203,7 +318,7 @@ void MHTMLGenerationManager::OnFileAvailable(int job_id, |
| job->set_browser_file(browser_file.Pass()); |
| - if (!job->SendToRenderView()) { |
| + if (!job->SendToNextRenderFrame()) { |
| JobFinished(job_id, JobStatus::FAILURE); |
| } |
| } |