Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: content/browser/download/save_package.cc

Issue 1373573002: ABANDONED: OOPIFs: Moving stitching of local paths from renderer to browser process. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@page-serialization-recursive-begone
Patch Set: Removed no longer needed WebKit dependency. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/download/save_package.h" 5 #include "content/browser/download/save_package.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/bind.h" 9 #include "base/bind.h"
10 #include "base/files/file_path.h" 10 #include "base/files/file_path.h"
11 #include "base/files/file_util.h" 11 #include "base/files/file_util.h"
12 #include "base/i18n/file_util_icu.h" 12 #include "base/i18n/file_util_icu.h"
13 #include "base/logging.h" 13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h" 14 #include "base/message_loop/message_loop.h"
15 #include "base/stl_util.h" 15 #include "base/stl_util.h"
16 #include "base/strings/string_piece.h" 16 #include "base/strings/string_piece.h"
17 #include "base/strings/string_split.h" 17 #include "base/strings/string_split.h"
18 #include "base/strings/sys_string_conversions.h" 18 #include "base/strings/sys_string_conversions.h"
19 #include "base/strings/utf_string_conversions.h" 19 #include "base/strings/utf_string_conversions.h"
20 #include "base/threading/thread.h" 20 #include "base/threading/thread.h"
21 #include "components/url_formatter/url_formatter.h" 21 #include "components/url_formatter/url_formatter.h"
22 #include "content/browser/download/download_item_impl.h" 22 #include "content/browser/download/download_item_impl.h"
23 #include "content/browser/download/download_manager_impl.h" 23 #include "content/browser/download/download_manager_impl.h"
24 #include "content/browser/download/download_stats.h" 24 #include "content/browser/download/download_stats.h"
25 #include "content/browser/download/save_file.h" 25 #include "content/browser/download/save_file.h"
26 #include "content/browser/download/save_file_manager.h" 26 #include "content/browser/download/save_file_manager.h"
27 #include "content/browser/download/save_item.h" 27 #include "content/browser/download/save_item.h"
28 #include "content/browser/frame_host/frame_tree.h"
29 #include "content/browser/frame_host/frame_tree_node.h"
28 #include "content/browser/loader/resource_dispatcher_host_impl.h" 30 #include "content/browser/loader/resource_dispatcher_host_impl.h"
29 #include "content/browser/renderer_host/render_process_host_impl.h" 31 #include "content/browser/renderer_host/render_process_host_impl.h"
30 #include "content/browser/renderer_host/render_view_host_delegate.h" 32 #include "content/browser/renderer_host/render_view_host_delegate.h"
31 #include "content/browser/renderer_host/render_view_host_impl.h" 33 #include "content/browser/renderer_host/render_view_host_impl.h"
34 #include "content/browser/web_contents/web_contents_impl.h"
32 #include "content/common/frame_messages.h" 35 #include "content/common/frame_messages.h"
33 #include "content/common/view_messages.h"
34 #include "content/public/browser/browser_context.h" 36 #include "content/public/browser/browser_context.h"
35 #include "content/public/browser/browser_thread.h" 37 #include "content/public/browser/browser_thread.h"
36 #include "content/public/browser/content_browser_client.h" 38 #include "content/public/browser/content_browser_client.h"
37 #include "content/public/browser/download_manager_delegate.h" 39 #include "content/public/browser/download_manager_delegate.h"
38 #include "content/public/browser/navigation_entry.h" 40 #include "content/public/browser/navigation_entry.h"
39 #include "content/public/browser/notification_service.h" 41 #include "content/public/browser/notification_service.h"
40 #include "content/public/browser/notification_types.h" 42 #include "content/public/browser/notification_types.h"
41 #include "content/public/browser/render_frame_host.h" 43 #include "content/public/browser/render_frame_host.h"
42 #include "content/public/browser/resource_context.h" 44 #include "content/public/browser/resource_context.h"
43 #include "content/public/browser/web_contents.h" 45 #include "content/public/browser/web_contents.h"
44 #include "net/base/filename_util.h" 46 #include "net/base/filename_util.h"
45 #include "net/base/io_buffer.h" 47 #include "net/base/io_buffer.h"
46 #include "net/base/mime_util.h" 48 #include "net/base/mime_util.h"
47 #include "net/url_request/url_request_context.h" 49 #include "net/url_request/url_request_context.h"
48 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
49 #include "url/url_constants.h" 50 #include "url/url_constants.h"
50 51
51 using base::Time; 52 using base::Time;
52 using blink::WebPageSerializerClient;
53 53
54 namespace content { 54 namespace content {
55 namespace { 55 namespace {
56 56
57 // A counter for uniquely identifying each save package. 57 // A counter for uniquely identifying each save package.
58 int g_save_package_id = 0; 58 int g_save_package_id = 0;
59 59
60 // Default name which will be used when we can not get proper name from 60 // Default name which will be used when we can not get proper name from
61 // resource URL. 61 // resource URL.
62 const char kDefaultSaveName[] = "saved_resource"; 62 const char kDefaultSaveName[] = "saved_resource";
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 void ResumeRequest() const override {} 124 void ResumeRequest() const override {}
125 void CancelRequest() const override {} 125 void CancelRequest() const override {}
126 std::string DebugString() const override { 126 std::string DebugString() const override {
127 return "SavePackage DownloadRequestHandle"; 127 return "SavePackage DownloadRequestHandle";
128 } 128 }
129 129
130 private: 130 private:
131 base::WeakPtr<SavePackage> save_package_; 131 base::WeakPtr<SavePackage> save_package_;
132 }; 132 };
133 133
134 int GetFrameTreeNodeId(RenderFrameHost* render_frame_host) {
135 auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host);
136 return rfhi->frame_tree_node()->frame_tree_node_id();
137 }
138
134 } // namespace 139 } // namespace
135 140
136 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] = 141 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
137 FILE_PATH_LITERAL("html"); 142 FILE_PATH_LITERAL("html");
138 143
139 SavePackage::SavePackage(WebContents* web_contents, 144 SavePackage::SavePackage(WebContents* web_contents,
140 SavePageType save_type, 145 SavePageType save_type,
141 const base::FilePath& file_full_path, 146 const base::FilePath& file_full_path,
142 const base::FilePath& directory_full_path) 147 const base::FilePath& directory_full_path)
143 : WebContentsObserver(web_contents), 148 : WebContentsObserver(web_contents),
144 number_of_frames_pending_response_(0), 149 number_of_frames_pending_response_(0),
145 file_manager_(NULL), 150 file_manager_(NULL),
146 download_manager_(NULL), 151 download_manager_(NULL),
147 download_(NULL), 152 download_(NULL),
148 page_url_(GetUrlToBeSaved()), 153 page_url_(GetUrlToBeSaved()),
149 saved_main_file_path_(file_full_path), 154 saved_main_file_path_(file_full_path),
150 saved_main_directory_path_(directory_full_path), 155 saved_main_directory_path_(directory_full_path),
151 title_(web_contents->GetTitle()), 156 title_(web_contents->GetTitle()),
152 start_tick_(base::TimeTicks::Now()), 157 start_tick_(base::TimeTicks::Now()),
153 finished_(false), 158 finished_(false),
154 mhtml_finishing_(false), 159 mhtml_finishing_(false),
155 user_canceled_(false), 160 user_canceled_(false),
156 disk_error_occurred_(false), 161 disk_error_occurred_(false),
157 save_type_(save_type), 162 save_type_(save_type),
158 all_save_items_count_(0), 163 all_save_items_count_(0),
159 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 164 file_name_set_(&base::FilePath::CompareLessIgnoreCase),
160 wait_state_(INITIALIZE), 165 wait_state_(INITIALIZE),
161 contents_id_(web_contents->GetRenderProcessHost()->GetID()), 166 contents_id_(web_contents->GetRenderProcessHost()->GetID()),
ncarter (slow) 2015/10/08 20:57:54 Not introduced by your cl, but just wanted to put
Łukasz Anforowicz 2015/10/09 16:54:08 Sure - I'll put that on my TODO list. OTOH, I see
ncarter (slow) 2015/10/12 23:31:47 Looks like it's used as a key in a map here, so th
162 unique_id_(g_save_package_id++), 167 unique_id_(g_save_package_id++),
163 wrote_to_completed_file_(false), 168 wrote_to_completed_file_(false),
164 wrote_to_failed_file_(false) { 169 wrote_to_failed_file_(false) {
165 DCHECK(page_url_.is_valid()); 170 DCHECK(page_url_.is_valid());
166 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 171 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
167 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || 172 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
168 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)); 173 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
169 DCHECK(!saved_main_file_path_.empty() && 174 DCHECK(!saved_main_file_path_.empty() &&
170 saved_main_file_path_.value().length() <= kMaxFilePathLength); 175 saved_main_file_path_.value().length() <= kMaxFilePathLength);
171 DCHECK(!saved_main_directory_path_.empty() && 176 DCHECK(!saved_main_directory_path_.empty() &&
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
232 Cancel(true); 237 Cancel(true);
233 } 238 }
234 239
235 // We should no longer be observing the DownloadItem at this point. 240 // We should no longer be observing the DownloadItem at this point.
236 CHECK(!download_); 241 CHECK(!download_);
237 242
238 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() + 243 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
239 completed_count() + 244 completed_count() +
240 in_process_count())); 245 in_process_count()));
241 // Free all SaveItems. 246 // Free all SaveItems.
242 while (!waiting_item_queue_.empty()) { 247 STLDeleteElements(&waiting_item_queue_);
243 // We still have some items which are waiting for start to save. 248 STLDeleteValues(&in_progress_items_);
244 SaveItem* save_item = waiting_item_queue_.front();
245 waiting_item_queue_.pop();
246 delete save_item;
247 }
248
249 STLDeleteValues(&saved_success_items_); 249 STLDeleteValues(&saved_success_items_);
250 STLDeleteValues(&in_progress_items_);
251 STLDeleteValues(&saved_failed_items_); 250 STLDeleteValues(&saved_failed_items_);
252 251
253 file_manager_ = NULL; 252 file_manager_ = NULL;
254 } 253 }
255 254
256 GURL SavePackage::GetUrlToBeSaved() { 255 GURL SavePackage::GetUrlToBeSaved() {
257 // Instead of using web_contents_.GetURL here, we use url() (which is the 256 // Instead of using web_contents_.GetURL here, we use url() (which is the
258 // "real" url of the page) from the NavigationEntry because it reflects its 257 // "real" url of the page) from the NavigationEntry because it reflects its
259 // origin rather than the displayed one (returned by GetURL) which may be 258 // origin rather than the displayed one (returned by GetURL) which may be
260 // different (like having "view-source:" on the front). 259 // different (like having "view-source:" on the front).
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 GetSavableResourceLinks(); 341 GetSavableResourceLinks();
343 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) { 342 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
344 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind( 343 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
345 &SavePackage::OnMHTMLGenerated, this)); 344 &SavePackage::OnMHTMLGenerated, this));
346 } else { 345 } else {
347 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_; 346 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
348 wait_state_ = NET_FILES; 347 wait_state_ = NET_FILES;
349 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ? 348 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
350 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : 349 SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
351 SaveFileCreateInfo::SAVE_FILE_FROM_NET; 350 SaveFileCreateInfo::SAVE_FILE_FROM_NET;
352 SaveItem* save_item = new SaveItem(page_url_, 351 SaveItem* save_item = new SaveItem(page_url_, this, save_source);
353 Referrer(),
354 this,
355 save_source);
356 // Add this item to waiting list. 352 // Add this item to waiting list.
357 waiting_item_queue_.push(save_item); 353 waiting_item_queue_.push_back(save_item);
358 all_save_items_count_ = 1; 354 all_save_items_count_ = 1;
359 download_->SetTotalBytes(1); 355 download_->SetTotalBytes(1);
360 356
361 DoSavingProcess(); 357 DoSavingProcess();
362 } 358 }
363 } 359 }
364 360
365 void SavePackage::OnMHTMLGenerated(int64 size) { 361 void SavePackage::OnMHTMLGenerated(int64 size) {
366 if (size <= 0) { 362 if (size <= 0) {
367 Cancel(false); 363 Cancel(false);
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after
918 // SaveFileManager, which will dispatch it to different approach according to 914 // SaveFileManager, which will dispatch it to different approach according to
919 // the save source. Parameter process_all_remaining_items indicates whether 915 // the save source. Parameter process_all_remaining_items indicates whether
920 // we need to save all remaining items. 916 // we need to save all remaining items.
921 void SavePackage::SaveNextFile(bool process_all_remaining_items) { 917 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
922 DCHECK(web_contents()); 918 DCHECK(web_contents());
923 DCHECK(waiting_item_queue_.size()); 919 DCHECK(waiting_item_queue_.size());
924 920
925 do { 921 do {
926 // Pop SaveItem from waiting list. 922 // Pop SaveItem from waiting list.
927 SaveItem* save_item = waiting_item_queue_.front(); 923 SaveItem* save_item = waiting_item_queue_.front();
928 waiting_item_queue_.pop(); 924 waiting_item_queue_.pop_front();
929 925
930 // Add the item to in_progress_items_. 926 // Add the item to in_progress_items_.
931 SaveUrlItemMap::iterator it = in_progress_items_.find( 927 SaveUrlItemMap::iterator it = in_progress_items_.find(
932 save_item->url().spec()); 928 save_item->url().spec());
933 DCHECK(it == in_progress_items_.end()); 929 DCHECK(it == in_progress_items_.end());
934 in_progress_items_[save_item->url().spec()] = save_item; 930 in_progress_items_[save_item->url().spec()] = save_item;
935 save_item->Start(); 931 save_item->Start();
936 file_manager_->SaveURL(save_item->url(), 932 file_manager_->SaveURL(save_item->url(),
937 save_item->referrer(), 933 save_item->referrer(),
938 web_contents()->GetRenderProcessHost()->GetID(), 934 web_contents()->GetRenderProcessHost()->GetID(),
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
998 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML)); 994 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
999 if (waiting_item_queue_.size()) { 995 if (waiting_item_queue_.size()) {
1000 DCHECK(all_save_items_count_ == waiting_item_queue_.size()); 996 DCHECK(all_save_items_count_ == waiting_item_queue_.size());
1001 SaveNextFile(false); 997 SaveNextFile(false);
1002 } 998 }
1003 } 999 }
1004 } 1000 }
1005 1001
1006 bool SavePackage::OnMessageReceived(const IPC::Message& message, 1002 bool SavePackage::OnMessageReceived(const IPC::Message& message,
1007 RenderFrameHost* render_frame_host) { 1003 RenderFrameHost* render_frame_host) {
1008 bool handled = true; 1004 bool handled = true;
ncarter (slow) 2015/10/08 20:57:55 Consider adding here: RenderFrameHostImpl* sender
Łukasz Anforowicz 2015/10/09 16:54:07 Done. Thanks for the suggestion.
1009 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(SavePackage, message, render_frame_host) 1005 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(SavePackage, message, render_frame_host)
1010 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksResponse, 1006 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksResponse,
1011 OnSavableResourceLinksResponse) 1007 OnSavableResourceLinksResponse)
1012 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksError, 1008 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksError,
1013 OnSavableResourceLinksError) 1009 OnSavableResourceLinksError)
1014 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedHtmlWithLocalLinksResponse, 1010 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedHtmlFragment,
1015 OnSerializedHtmlWithLocalLinksResponse) 1011 OnSerializedHtmlFragment)
1012 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedLocalPathForSubFrame,
1013 OnSerializedLocalPathForSubFrame)
1014 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedLocalPathForSavableResource,
1015 OnSerializedLocalPathForSavableResource)
1016 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedEndOfFrame,
1017 OnSerializedEndOfFrame)
1016 IPC_MESSAGE_UNHANDLED(handled = false) 1018 IPC_MESSAGE_UNHANDLED(handled = false)
1017 IPC_END_MESSAGE_MAP() 1019 IPC_END_MESSAGE_MAP()
1018 return handled; 1020 return handled;
1019 } 1021 }
1020 1022
1021 // After finishing all SaveItems which need to get data from net. 1023 // After finishing all SaveItems which need to get data from net.
1022 // We collect all URLs which have local storage and send the 1024 // We collect all URLs which have local storage and send the
1023 // map:(originalURL:currentLocalPath) to render process (backend). 1025 // map:(originalURL:currentLocalPath) to render process (backend).
1024 // Then render process will serialize DOM and send data to us. 1026 // Then render process will serialize DOM and send data to us.
1025 void SavePackage::GetSerializedHtmlWithLocalLinks() { 1027 void SavePackage::GetSerializedHtmlWithLocalLinks() {
1026 if (wait_state_ != HTML_DATA) 1028 if (wait_state_ != HTML_DATA)
1027 return; 1029 return;
1028 std::vector<GURL> saved_links;
1029 std::vector<base::FilePath> saved_file_paths;
1030 int successful_started_items_count = 0; 1030 int successful_started_items_count = 0;
1031 1031
1032 // Collect all saved items which have local storage. 1032 // Count all saved items which have local storage.
1033 // First collect the status of all the resource files and check whether they 1033 for (const auto& it : in_progress_items_) {
1034 // have created local files although they have not been completely saved. 1034 DCHECK_EQ(SaveFileCreateInfo::SAVE_FILE_FROM_DOM, it.second->save_source());
1035 // If yes, the file can be saved. Otherwise, there is a disk error, so we 1035 if (it.second->has_final_name())
1036 // need to cancel the page saving job.
1037 for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1038 it != in_progress_items_.end(); ++it) {
1039 DCHECK(it->second->save_source() ==
1040 SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1041 if (it->second->has_final_name())
1042 successful_started_items_count++; 1036 successful_started_items_count++;
1043 saved_links.push_back(it->second->url());
1044 saved_file_paths.push_back(it->second->file_name());
1045 } 1037 }
1046 1038
1047 // If not all file of HTML resource have been started, then wait. 1039 // If not all file of HTML resource have been started, then wait.
1048 if (successful_started_items_count != in_process_count()) 1040 if (successful_started_items_count != in_process_count())
1049 return; 1041 return;
1050 1042
1051 // Collect all saved success items.
1052 for (SavedItemMap::iterator it = saved_success_items_.begin();
1053 it != saved_success_items_.end(); ++it) {
1054 DCHECK(it->second->has_final_name());
1055 saved_links.push_back(it->second->url());
1056 saved_file_paths.push_back(it->second->file_name());
1057 }
1058
1059 // Get the relative directory name.
1060 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1061
1062 // Ask all frames for their serialized data. 1043 // Ask all frames for their serialized data.
1063 DCHECK_EQ(0, number_of_frames_pending_response_); 1044 DCHECK_EQ(0, number_of_frames_pending_response_);
1064 web_contents()->ForEachFrame(base::Bind( 1045 web_contents()->ForEachFrame(base::Bind(
1065 &SavePackage::GetSerializedHtmlWithLocalLinksForFrame, 1046 &SavePackage::GetSerializedHtmlWithLocalLinksForFrame,
1066 base::Unretained(this), // Safe, because ForEachFrame is synchronous. 1047 base::Unretained(this))); // Safe, because ForEachFrame is synchronous.
1067 saved_links, saved_file_paths, relative_dir_name));
1068 DCHECK_LT(0, number_of_frames_pending_response_); 1048 DCHECK_LT(0, number_of_frames_pending_response_);
ncarter (slow) 2015/10/08 20:57:54 Another "fyi, no action needed, just want to put t
Łukasz Anforowicz 2015/10/09 16:54:07 Disclaimer: I've introduced this DCHECK in an earl
ncarter (slow) 2015/10/12 23:31:47 I agree with everything in your reply. I don't thi
1069 } 1049 }
1070 1050
1071 void SavePackage::GetSerializedHtmlWithLocalLinksForFrame( 1051 void SavePackage::GetSerializedHtmlWithLocalLinksForFrame(
1072 const std::vector<GURL>& saved_links,
1073 const std::vector<base::FilePath>& saved_file_paths,
1074 const base::FilePath& relative_dir_name,
1075 RenderFrameHost* target) { 1052 RenderFrameHost* target) {
1076 number_of_frames_pending_response_++; 1053 number_of_frames_pending_response_++;
1077 target->Send(new FrameMsg_GetSerializedHtmlWithLocalLinks( 1054 target->Send(
1078 target->GetRoutingID(), saved_links, saved_file_paths, 1055 new FrameMsg_GetSerializedHtmlWithLocalLinks(target->GetRoutingID()));
1079 relative_dir_name));
1080 } 1056 }
1081 1057
1082 // Process the serialized HTML content data of a specified frame 1058 void SavePackage::OnSerializedHtmlFragment(RenderFrameHost* sender,
1083 // retrieved from the renderer process. 1059 const std::string& data) {
1084 void SavePackage::OnSerializedHtmlWithLocalLinksResponse(
1085 RenderFrameHost* sender,
1086 const GURL& frame_url,
1087 const std::string& data,
1088 int32 status) {
1089 WebPageSerializerClient::PageSerializationStatus flag =
1090 static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1091
1092 // When calling WebPageSerializer::serialize in non-recursive mode, the
1093 // AllFramesAreFinished is redundant - it is sent by each frame right after
1094 // CurrentFrameIsFinished. Therefore we ignore AllFramesAreFinished and
1095 // instead track pending frames in |number_of_frames_pending_response_|.
1096 if (flag == WebPageSerializerClient::AllFramesAreFinished)
1097 return;
1098
1099 // Check current state.
1100 if (wait_state_ != HTML_DATA) 1060 if (wait_state_ != HTML_DATA)
1101 return; 1061 return;
1102 1062
1103 int id = contents_id(); 1063 int frame_tree_node_id = GetFrameTreeNodeId(sender);
1104 1064 SaveItem* save_item = frame_tree_node_id_to_save_item_[frame_tree_node_id];
ncarter (slow) 2015/10/08 20:57:55 What if it's not in the map?
Łukasz Anforowicz 2015/10/09 16:54:07 Good point. Thanks for catching this. When think
1105 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec()); 1065 if (save_item->state() != SaveItem::IN_PROGRESS) {
1106 if (it == in_progress_items_.end()) { 1066 LogWriteToAlreadyCompletedOrFailedSaveItem(save_item);
1107 for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1108 saved_it != saved_success_items_.end(); ++saved_it) {
1109 if (saved_it->second->url() == frame_url) {
1110 wrote_to_completed_file_ = true;
1111 break;
1112 }
1113 }
1114
1115 it = saved_failed_items_.find(frame_url.spec());
1116 if (it != saved_failed_items_.end())
1117 wrote_to_failed_file_ = true;
1118
1119 return; 1067 return;
1120 } 1068 }
1121 1069
1122 SaveItem* save_item = it->second;
1123 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1070 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1124
1125 if (!data.empty()) { 1071 if (!data.empty()) {
1126 // Prepare buffer for saving HTML data. 1072 // Prepare buffer for saving HTML data.
1127 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size())); 1073 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1128 memcpy(new_data->data(), data.data(), data.size()); 1074 memcpy(new_data->data(), data.data(), data.size());
1129 1075
1130 // Call write file functionality in file thread. 1076 // Call write file functionality in file thread.
1131 BrowserThread::PostTask( 1077 BrowserThread::PostTask(
1132 BrowserThread::FILE, FROM_HERE, 1078 BrowserThread::FILE, FROM_HERE,
1133 base::Bind(&SaveFileManager::UpdateSaveProgress, 1079 base::Bind(&SaveFileManager::UpdateSaveProgress, file_manager_,
1134 file_manager_, 1080 save_item->save_id(), new_data,
1135 save_item->save_id(),
1136 new_data,
1137 static_cast<int>(data.size()))); 1081 static_cast<int>(data.size())));
1138 } 1082 }
1083 }
1084
1085 void SavePackage::OnSerializedLocalPathForSubFrame(
1086 RenderFrameHost* sender,
1087 int render_frame_or_proxy_routing_id) {
1088 if (wait_state_ != HTML_DATA)
1089 return;
1090
1091 FrameTreeNode* frame_tree_node =
1092 static_cast<WebContentsImpl*>(web_contents())
1093 ->GetFrameTree()
ncarter (slow) 2015/10/08 20:57:54 If you passed in an RFHImpl here, you could altern
Łukasz Anforowicz 2015/10/09 16:54:07 Done (to have a safer code in case somebody wants
1094 ->FindByRoutingID(sender->GetProcess()->GetID(),
1095 render_frame_or_proxy_routing_id);
1096 if (!frame_tree_node)
1097 return;
ncarter (slow) 2015/10/08 20:57:55 Would it be worthwhile here to validate that frame
Łukasz Anforowicz 2015/10/09 16:54:08 Done.
1098
1099 SaveItem* save_item =
1100 frame_tree_node_id_to_save_item_[frame_tree_node->frame_tree_node_id()];
ncarter (slow) 2015/10/08 20:57:55 What if it's not in the map?
Łukasz Anforowicz 2015/10/09 16:54:08 It's ok in this case - this will be handled by Ser
1101 SerializeLocalPathForSaveItem(sender, save_item,
1102 frame_tree_node->current_url());
1103 }
1104
1105 void SavePackage::OnSerializedLocalPathForSavableResource(
1106 RenderFrameHost* sender,
1107 const GURL& savable_resource) {
1108 if (wait_state_ != HTML_DATA)
1109 return;
1110 if (!savable_resource.is_valid())
1111 return;
1112
1113 SaveItem* save_item = url_to_save_item_[savable_resource];
1114 SerializeLocalPathForSaveItem(sender, save_item, savable_resource);
1115 }
1116
1117 void SavePackage::OnSerializedEndOfFrame(RenderFrameHost* sender) {
1118 if (wait_state_ != HTML_DATA)
1119 return;
1120
1121 int frame_tree_node_id = GetFrameTreeNodeId(sender);
1122 SaveItem* save_item = frame_tree_node_id_to_save_item_[frame_tree_node_id];
1123 if (save_item->state() != SaveItem::IN_PROGRESS) {
1124 LogWriteToAlreadyCompletedOrFailedSaveItem(save_item);
1125 return;
1126 }
1127
1128 int id = contents_id();
1139 1129
1140 // Current frame is completed saving, call finish in file thread. 1130 // Current frame is completed saving, call finish in file thread.
1141 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) { 1131 DVLOG(20) << " " << __FUNCTION__ << "()"
1142 DVLOG(20) << " " << __FUNCTION__ << "()" 1132 << " save_id = " << save_item->save_id() << " url = \""
1143 << " save_id = " << save_item->save_id() 1133 << save_item->url().spec() << "\"";
1144 << " url = \"" << save_item->url().spec() << "\""; 1134 BrowserThread::PostTask(
1145 BrowserThread::PostTask( 1135 BrowserThread::FILE, FROM_HERE,
1146 BrowserThread::FILE, FROM_HERE, 1136 base::Bind(&SaveFileManager::SaveFinished, file_manager_,
1147 base::Bind(&SaveFileManager::SaveFinished, 1137 save_item->save_id(), save_item->url(), id, true));
1148 file_manager_, 1138 number_of_frames_pending_response_--;
ncarter (slow) 2015/10/08 20:57:55 Shouldn't save_item()->state() be transitioned to
Łukasz Anforowicz 2015/10/09 16:54:08 I've spent 10-15 minutes thinking that SaveItem::s
ncarter (slow) 2015/10/12 23:31:47 Gosh, I hope not. I'm glad you added the UI thread
1149 save_item->save_id(), 1139 DCHECK_LE(0, number_of_frames_pending_response_);
1150 save_item->url(),
1151 id,
1152 true));
1153 number_of_frames_pending_response_--;
1154 DCHECK_LE(0, number_of_frames_pending_response_);
1155 }
1156 1140
1157 // If all frames are finished saving, we need to close the remaining 1141 // If the all frames are finished saving, we need to close the
1158 // SaveItems. 1142 // remaining SaveItems.
1159 if (number_of_frames_pending_response_ == 0) { 1143 if (number_of_frames_pending_response_ == 0) {
1160 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 1144 for (const auto& pair : in_progress_items_) {
1161 it != in_progress_items_.end(); ++it) {
1162 DVLOG(20) << " " << __FUNCTION__ << "()" 1145 DVLOG(20) << " " << __FUNCTION__ << "()"
1163 << " save_id = " << it->second->save_id() << " url = \"" 1146 << " save_id = " << pair.second->save_id() << " url = \""
1164 << it->second->url().spec() << "\""; 1147 << pair.second->url().spec() << "\"";
1165 BrowserThread::PostTask( 1148 BrowserThread::PostTask(
1166 BrowserThread::FILE, FROM_HERE, 1149 BrowserThread::FILE, FROM_HERE,
1167 base::Bind(&SaveFileManager::SaveFinished, file_manager_, 1150 base::Bind(&SaveFileManager::SaveFinished, file_manager_,
1168 it->second->save_id(), it->second->url(), id, true)); 1151 pair.second->save_id(), pair.second->url(), id, true));
1169 } 1152 }
1170 } 1153 }
1171 } 1154 }
1172 1155
1156 void SavePackage::SerializeLocalPathForSaveItem(RenderFrameHost* sender,
1157 SaveItem* save_item,
1158 const GURL& fallback_url) {
1159 std::string attr_value;
1160 if (save_item) {
1161 base::FilePath relative_path =
1162 base::FilePath(FILE_PATH_LITERAL("."))
1163 .Append(saved_main_directory_path_.BaseName())
1164 .Append(save_item->file_name())
1165 .NormalizePathSeparatorsTo(FILE_PATH_LITERAL('/'));
1166 attr_value = relative_path.AsUTF8Unsafe();
1167 } else {
1168 attr_value = fallback_url.spec();
ncarter (slow) 2015/10/08 21:04:14 When does this happen? Could you add a comment exp
Łukasz Anforowicz 2015/10/09 16:54:08 Done.
1169 }
1170
1171 // TODO(lukasza): Escape attr_value (&quot; + &amp;).
ncarter (slow) 2015/10/08 20:57:55 My understanding is that there are no security con
Łukasz Anforowicz 2015/10/09 16:54:08 Hmmm... thinking about it some more, I've realized
1172 OnSerializedHtmlFragment(sender, attr_value);
1173 }
1174
1175 void SavePackage::LogWriteToAlreadyCompletedOrFailedSaveItem(
1176 SaveItem* save_item) {
1177 auto already_completed_it =
1178 std::find_if(saved_success_items_.begin(), saved_success_items_.end(),
1179 [save_item](const SavedItemMap::value_type& v) {
1180 return v.second->url() == save_item->url();
1181 });
1182 if (already_completed_it != saved_success_items_.end())
1183 wrote_to_completed_file_ = true;
1184
1185 auto failed_it =
1186 std::find_if(saved_failed_items_.begin(), saved_failed_items_.end(),
1187 [save_item](const SaveUrlItemMap::value_type& v) {
1188 DCHECK(v.second->has_final_name());
1189 return v.second->url() == save_item->url();
1190 });
1191 if (failed_it != saved_failed_items_.end())
1192 wrote_to_failed_file_ = true;
1193 }
1194
1173 // Ask for all savable resource links from backend, include main frame and 1195 // Ask for all savable resource links from backend, include main frame and
1174 // sub-frame. 1196 // sub-frame.
1175 void SavePackage::GetSavableResourceLinks() { 1197 void SavePackage::GetSavableResourceLinks() {
1176 if (wait_state_ != START_PROCESS) 1198 if (wait_state_ != START_PROCESS)
1177 return; 1199 return;
1178 1200
1179 wait_state_ = RESOURCES_LIST; 1201 wait_state_ = RESOURCES_LIST;
1180 1202
1181 DCHECK_EQ(0, number_of_frames_pending_response_); 1203 DCHECK_EQ(0, number_of_frames_pending_response_);
1182 web_contents()->ForEachFrame(base::Bind( 1204 web_contents()->ForEachFrame(base::Bind(
(...skipping 16 matching lines...) Expand all
1199 return; 1221 return;
1200 1222
1201 if (resources_list.size() != referrers_list.size()) 1223 if (resources_list.size() != referrers_list.size())
1202 return; 1224 return;
1203 1225
1204 // Add all sub-resources to wait list. 1226 // Add all sub-resources to wait list.
1205 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) { 1227 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1206 const GURL& u = resources_list[i]; 1228 const GURL& u = resources_list[i];
1207 if (!u.is_valid()) 1229 if (!u.is_valid())
1208 continue; 1230 continue;
1209 if (unique_urls_to_save_.count(u))
1210 continue;
1211 unique_urls_to_save_.insert(u);
1212 1231
1213 SaveFileCreateInfo::SaveFileSource save_source = 1232 SaveItem* save_item = url_to_save_item_[u];
1214 u.SchemeIsFile() ? SaveFileCreateInfo::SAVE_FILE_FROM_FILE 1233 if (save_item == nullptr) {
ncarter (slow) 2015/10/08 20:57:55 You seem to be preferring operator[] throughout in
Łukasz Anforowicz 2015/10/09 16:54:07 I've changed to find in some places, but when I tr
ncarter (slow) 2015/10/12 23:31:47 I'm OK with operator[] in these cases, since it's
1215 : SaveFileCreateInfo::SAVE_FILE_FROM_NET; 1234 SaveFileCreateInfo::SaveFileSource save_source =
1216 SaveItem* save_item = new SaveItem(u, referrers_list[i], this, save_source); 1235 u.SchemeIsFile() ? SaveFileCreateInfo::SAVE_FILE_FROM_FILE
1217 waiting_item_queue_.push(save_item); 1236 : SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1237 url_to_save_item_[u] = save_item = new SaveItem(u, this, save_source);
1238 waiting_item_queue_.push_back(save_item);
1239 }
1240 save_item->set_referrer(referrers_list[i]);
1218 } 1241 }
1219 1242
1220 // Store savable frame_url for later processing. 1243 // Add the frame to wait list.
1221 if (frame_url.is_valid()) 1244 // TODO(lukasza): Do not dedupe subframes based on url (see crbug.com/538188).
1222 frame_urls_to_save_.push_back(frame_url); 1245 if (frame_url.is_valid()) {
1246 SaveItem* save_item = url_to_save_item_[frame_url];
1247 if (save_item == nullptr) {
1248 url_to_save_item_[frame_url] = save_item =
1249 new SaveItem(frame_url, this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1250 waiting_item_queue_.push_back(save_item);
1251 }
1252 frame_tree_node_id_to_save_item_[GetFrameTreeNodeId(sender)] = save_item;
1253 }
1223 1254
1224 CompleteSavableResourceLinksResponse(); 1255 CompleteSavableResourceLinksResponse();
1225 } 1256 }
1226 1257
1227 void SavePackage::OnSavableResourceLinksError(RenderFrameHost* sender) { 1258 void SavePackage::OnSavableResourceLinksError(RenderFrameHost* sender) {
1228 CompleteSavableResourceLinksResponse(); 1259 CompleteSavableResourceLinksResponse();
1229 } 1260 }
1230 1261
1231 void SavePackage::CompleteSavableResourceLinksResponse() { 1262 void SavePackage::CompleteSavableResourceLinksResponse() {
1232 --number_of_frames_pending_response_; 1263 --number_of_frames_pending_response_;
1233 DCHECK_LE(0, number_of_frames_pending_response_); 1264 DCHECK_LE(0, number_of_frames_pending_response_);
1234 if (number_of_frames_pending_response_ != 0) 1265 if (number_of_frames_pending_response_ != 0)
1235 return; // Need to wait for more responses from RenderFrames. 1266 return; // Need to wait for more responses from RenderFrames.
1236 1267
1237 // Add frame urls to the waiting_item_queue_. This is done *after* processing 1268 // Sort |waiting_item_queue_| so that frames go last.
1238 // all savable resource links (i.e. in OnSavableResourceLinksResponse), to 1269 std::sort(
ncarter (slow) 2015/10/08 20:57:55 I wonder if we ought to use stable_sort here to av
Łukasz Anforowicz 2015/10/09 16:54:07 There is a partial order here (things from the sam
1239 // prefer their referrers in cases where the frame url has already been 1270 waiting_item_queue_.begin(), waiting_item_queue_.end(),
1240 // covered by savable resource links. 1271 [](SaveItem* x, SaveItem* y) {
1241 for (auto& frame_url : frame_urls_to_save_) { 1272 DCHECK(x);
1242 DCHECK(frame_url.is_valid()); 1273 DCHECK(y);
1243 if (0 == unique_urls_to_save_.count(frame_url)) { 1274 return (x->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) &&
1244 unique_urls_to_save_.insert(frame_url); 1275 (y->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
ncarter (slow) 2015/10/08 20:57:55 Would this be easier to comprehend if we wrote it
Łukasz Anforowicz 2015/10/09 16:54:08 I think your comparison would be broken as it woul
1245 SaveItem* save_item = new SaveItem( 1276 });
1246 frame_url, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1247 waiting_item_queue_.push(save_item);
1248 }
1249 }
1250
1251 all_save_items_count_ = static_cast<int>(waiting_item_queue_.size()); 1277 all_save_items_count_ = static_cast<int>(waiting_item_queue_.size());
1252 1278
1253 // We use total bytes as the total number of files we want to save. 1279 // We use total bytes as the total number of files we want to save.
1254 // Hack to avoid touching download_ after user cancel. 1280 // Hack to avoid touching download_ after user cancel.
1255 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 1281 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1256 // with SavePackage flow. 1282 // with SavePackage flow.
1257 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) 1283 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1258 download_->SetTotalBytes(all_save_items_count_); 1284 download_->SetTotalBytes(all_save_items_count_);
1259 1285
1260 if (all_save_items_count_) { 1286 if (all_save_items_count_) {
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
1505 1531
1506 void SavePackage::FinalizeDownloadEntry() { 1532 void SavePackage::FinalizeDownloadEntry() {
1507 DCHECK(download_); 1533 DCHECK(download_);
1508 DCHECK(download_manager_); 1534 DCHECK(download_manager_);
1509 1535
1510 download_manager_->OnSavePackageSuccessfullyFinished(download_); 1536 download_manager_->OnSavePackageSuccessfullyFinished(download_);
1511 StopObservation(); 1537 StopObservation();
1512 } 1538 }
1513 1539
1514 } // namespace content 1540 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698