OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/download/save_package.h" | 5 #include "content/browser/download/save_package.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/files/file_path.h" | 10 #include "base/files/file_path.h" |
11 #include "base/files/file_util.h" | 11 #include "base/files/file_util.h" |
12 #include "base/i18n/file_util_icu.h" | 12 #include "base/i18n/file_util_icu.h" |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
14 #include "base/message_loop/message_loop.h" | 14 #include "base/message_loop/message_loop.h" |
15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
16 #include "base/strings/string_piece.h" | 16 #include "base/strings/string_piece.h" |
17 #include "base/strings/string_split.h" | 17 #include "base/strings/string_split.h" |
18 #include "base/strings/sys_string_conversions.h" | 18 #include "base/strings/sys_string_conversions.h" |
19 #include "base/strings/utf_string_conversions.h" | 19 #include "base/strings/utf_string_conversions.h" |
20 #include "base/threading/thread.h" | 20 #include "base/threading/thread.h" |
21 #include "components/url_formatter/url_formatter.h" | 21 #include "components/url_formatter/url_formatter.h" |
22 #include "content/browser/download/download_item_impl.h" | 22 #include "content/browser/download/download_item_impl.h" |
23 #include "content/browser/download/download_manager_impl.h" | 23 #include "content/browser/download/download_manager_impl.h" |
24 #include "content/browser/download/download_stats.h" | 24 #include "content/browser/download/download_stats.h" |
25 #include "content/browser/download/save_file.h" | 25 #include "content/browser/download/save_file.h" |
26 #include "content/browser/download/save_file_manager.h" | 26 #include "content/browser/download/save_file_manager.h" |
27 #include "content/browser/download/save_item.h" | 27 #include "content/browser/download/save_item.h" |
28 #include "content/browser/frame_host/frame_tree.h" | |
29 #include "content/browser/frame_host/frame_tree_node.h" | |
28 #include "content/browser/loader/resource_dispatcher_host_impl.h" | 30 #include "content/browser/loader/resource_dispatcher_host_impl.h" |
29 #include "content/browser/renderer_host/render_process_host_impl.h" | 31 #include "content/browser/renderer_host/render_process_host_impl.h" |
30 #include "content/browser/renderer_host/render_view_host_delegate.h" | 32 #include "content/browser/renderer_host/render_view_host_delegate.h" |
31 #include "content/browser/renderer_host/render_view_host_impl.h" | 33 #include "content/browser/renderer_host/render_view_host_impl.h" |
34 #include "content/browser/web_contents/web_contents_impl.h" | |
32 #include "content/common/frame_messages.h" | 35 #include "content/common/frame_messages.h" |
33 #include "content/common/view_messages.h" | |
34 #include "content/public/browser/browser_context.h" | 36 #include "content/public/browser/browser_context.h" |
35 #include "content/public/browser/browser_thread.h" | 37 #include "content/public/browser/browser_thread.h" |
36 #include "content/public/browser/content_browser_client.h" | 38 #include "content/public/browser/content_browser_client.h" |
37 #include "content/public/browser/download_manager_delegate.h" | 39 #include "content/public/browser/download_manager_delegate.h" |
38 #include "content/public/browser/navigation_entry.h" | 40 #include "content/public/browser/navigation_entry.h" |
39 #include "content/public/browser/notification_service.h" | 41 #include "content/public/browser/notification_service.h" |
40 #include "content/public/browser/notification_types.h" | 42 #include "content/public/browser/notification_types.h" |
41 #include "content/public/browser/render_frame_host.h" | 43 #include "content/public/browser/render_frame_host.h" |
42 #include "content/public/browser/resource_context.h" | 44 #include "content/public/browser/resource_context.h" |
43 #include "content/public/browser/web_contents.h" | 45 #include "content/public/browser/web_contents.h" |
44 #include "net/base/filename_util.h" | 46 #include "net/base/filename_util.h" |
45 #include "net/base/io_buffer.h" | 47 #include "net/base/io_buffer.h" |
46 #include "net/base/mime_util.h" | 48 #include "net/base/mime_util.h" |
47 #include "net/url_request/url_request_context.h" | 49 #include "net/url_request/url_request_context.h" |
48 #include "third_party/WebKit/public/web/WebPageSerializerClient.h" | |
49 #include "url/url_constants.h" | 50 #include "url/url_constants.h" |
50 | 51 |
51 using base::Time; | 52 using base::Time; |
52 using blink::WebPageSerializerClient; | |
53 | 53 |
54 namespace content { | 54 namespace content { |
55 namespace { | 55 namespace { |
56 | 56 |
57 // A counter for uniquely identifying each save package. | 57 // A counter for uniquely identifying each save package. |
58 int g_save_package_id = 0; | 58 int g_save_package_id = 0; |
59 | 59 |
60 // Default name which will be used when we can not get proper name from | 60 // Default name which will be used when we can not get proper name from |
61 // resource URL. | 61 // resource URL. |
62 const char kDefaultSaveName[] = "saved_resource"; | 62 const char kDefaultSaveName[] = "saved_resource"; |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
124 void ResumeRequest() const override {} | 124 void ResumeRequest() const override {} |
125 void CancelRequest() const override {} | 125 void CancelRequest() const override {} |
126 std::string DebugString() const override { | 126 std::string DebugString() const override { |
127 return "SavePackage DownloadRequestHandle"; | 127 return "SavePackage DownloadRequestHandle"; |
128 } | 128 } |
129 | 129 |
130 private: | 130 private: |
131 base::WeakPtr<SavePackage> save_package_; | 131 base::WeakPtr<SavePackage> save_package_; |
132 }; | 132 }; |
133 | 133 |
134 int GetFrameTreeNodeId(RenderFrameHost* render_frame_host) { | |
135 auto* rfhi = static_cast<RenderFrameHostImpl*>(render_frame_host); | |
136 return rfhi->frame_tree_node()->frame_tree_node_id(); | |
137 } | |
138 | |
134 } // namespace | 139 } // namespace |
135 | 140 |
136 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] = | 141 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] = |
137 FILE_PATH_LITERAL("html"); | 142 FILE_PATH_LITERAL("html"); |
138 | 143 |
139 SavePackage::SavePackage(WebContents* web_contents, | 144 SavePackage::SavePackage(WebContents* web_contents, |
140 SavePageType save_type, | 145 SavePageType save_type, |
141 const base::FilePath& file_full_path, | 146 const base::FilePath& file_full_path, |
142 const base::FilePath& directory_full_path) | 147 const base::FilePath& directory_full_path) |
143 : WebContentsObserver(web_contents), | 148 : WebContentsObserver(web_contents), |
144 number_of_frames_pending_response_(0), | 149 number_of_frames_pending_response_(0), |
145 file_manager_(NULL), | 150 file_manager_(NULL), |
146 download_manager_(NULL), | 151 download_manager_(NULL), |
147 download_(NULL), | 152 download_(NULL), |
148 page_url_(GetUrlToBeSaved()), | 153 page_url_(GetUrlToBeSaved()), |
149 saved_main_file_path_(file_full_path), | 154 saved_main_file_path_(file_full_path), |
150 saved_main_directory_path_(directory_full_path), | 155 saved_main_directory_path_(directory_full_path), |
151 title_(web_contents->GetTitle()), | 156 title_(web_contents->GetTitle()), |
152 start_tick_(base::TimeTicks::Now()), | 157 start_tick_(base::TimeTicks::Now()), |
153 finished_(false), | 158 finished_(false), |
154 mhtml_finishing_(false), | 159 mhtml_finishing_(false), |
155 user_canceled_(false), | 160 user_canceled_(false), |
156 disk_error_occurred_(false), | 161 disk_error_occurred_(false), |
157 save_type_(save_type), | 162 save_type_(save_type), |
158 all_save_items_count_(0), | 163 all_save_items_count_(0), |
159 file_name_set_(&base::FilePath::CompareLessIgnoreCase), | 164 file_name_set_(&base::FilePath::CompareLessIgnoreCase), |
160 wait_state_(INITIALIZE), | 165 wait_state_(INITIALIZE), |
161 contents_id_(web_contents->GetRenderProcessHost()->GetID()), | 166 contents_id_(web_contents->GetRenderProcessHost()->GetID()), |
ncarter (slow)
2015/10/08 20:57:54
Not introduced by your cl, but just wanted to put
Łukasz Anforowicz
2015/10/09 16:54:08
Sure - I'll put that on my TODO list. OTOH, I see
ncarter (slow)
2015/10/12 23:31:47
Looks like it's used as a key in a map here, so th
| |
162 unique_id_(g_save_package_id++), | 167 unique_id_(g_save_package_id++), |
163 wrote_to_completed_file_(false), | 168 wrote_to_completed_file_(false), |
164 wrote_to_failed_file_(false) { | 169 wrote_to_failed_file_(false) { |
165 DCHECK(page_url_.is_valid()); | 170 DCHECK(page_url_.is_valid()); |
166 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || | 171 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || |
167 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || | 172 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || |
168 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)); | 173 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)); |
169 DCHECK(!saved_main_file_path_.empty() && | 174 DCHECK(!saved_main_file_path_.empty() && |
170 saved_main_file_path_.value().length() <= kMaxFilePathLength); | 175 saved_main_file_path_.value().length() <= kMaxFilePathLength); |
171 DCHECK(!saved_main_directory_path_.empty() && | 176 DCHECK(!saved_main_directory_path_.empty() && |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
232 Cancel(true); | 237 Cancel(true); |
233 } | 238 } |
234 | 239 |
235 // We should no longer be observing the DownloadItem at this point. | 240 // We should no longer be observing the DownloadItem at this point. |
236 CHECK(!download_); | 241 CHECK(!download_); |
237 | 242 |
238 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() + | 243 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() + |
239 completed_count() + | 244 completed_count() + |
240 in_process_count())); | 245 in_process_count())); |
241 // Free all SaveItems. | 246 // Free all SaveItems. |
242 while (!waiting_item_queue_.empty()) { | 247 STLDeleteElements(&waiting_item_queue_); |
243 // We still have some items which are waiting for start to save. | 248 STLDeleteValues(&in_progress_items_); |
244 SaveItem* save_item = waiting_item_queue_.front(); | |
245 waiting_item_queue_.pop(); | |
246 delete save_item; | |
247 } | |
248 | |
249 STLDeleteValues(&saved_success_items_); | 249 STLDeleteValues(&saved_success_items_); |
250 STLDeleteValues(&in_progress_items_); | |
251 STLDeleteValues(&saved_failed_items_); | 250 STLDeleteValues(&saved_failed_items_); |
252 | 251 |
253 file_manager_ = NULL; | 252 file_manager_ = NULL; |
254 } | 253 } |
255 | 254 |
256 GURL SavePackage::GetUrlToBeSaved() { | 255 GURL SavePackage::GetUrlToBeSaved() { |
257 // Instead of using web_contents_.GetURL here, we use url() (which is the | 256 // Instead of using web_contents_.GetURL here, we use url() (which is the |
258 // "real" url of the page) from the NavigationEntry because it reflects its | 257 // "real" url of the page) from the NavigationEntry because it reflects its |
259 // origin rather than the displayed one (returned by GetURL) which may be | 258 // origin rather than the displayed one (returned by GetURL) which may be |
260 // different (like having "view-source:" on the front). | 259 // different (like having "view-source:" on the front). |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
342 GetSavableResourceLinks(); | 341 GetSavableResourceLinks(); |
343 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) { | 342 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) { |
344 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind( | 343 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind( |
345 &SavePackage::OnMHTMLGenerated, this)); | 344 &SavePackage::OnMHTMLGenerated, this)); |
346 } else { | 345 } else { |
347 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_; | 346 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_; |
348 wait_state_ = NET_FILES; | 347 wait_state_ = NET_FILES; |
349 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ? | 348 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ? |
350 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : | 349 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : |
351 SaveFileCreateInfo::SAVE_FILE_FROM_NET; | 350 SaveFileCreateInfo::SAVE_FILE_FROM_NET; |
352 SaveItem* save_item = new SaveItem(page_url_, | 351 SaveItem* save_item = new SaveItem(page_url_, this, save_source); |
353 Referrer(), | |
354 this, | |
355 save_source); | |
356 // Add this item to waiting list. | 352 // Add this item to waiting list. |
357 waiting_item_queue_.push(save_item); | 353 waiting_item_queue_.push_back(save_item); |
358 all_save_items_count_ = 1; | 354 all_save_items_count_ = 1; |
359 download_->SetTotalBytes(1); | 355 download_->SetTotalBytes(1); |
360 | 356 |
361 DoSavingProcess(); | 357 DoSavingProcess(); |
362 } | 358 } |
363 } | 359 } |
364 | 360 |
365 void SavePackage::OnMHTMLGenerated(int64 size) { | 361 void SavePackage::OnMHTMLGenerated(int64 size) { |
366 if (size <= 0) { | 362 if (size <= 0) { |
367 Cancel(false); | 363 Cancel(false); |
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
918 // SaveFileManager, which will dispatch it to different approach according to | 914 // SaveFileManager, which will dispatch it to different approach according to |
919 // the save source. Parameter process_all_remaining_items indicates whether | 915 // the save source. Parameter process_all_remaining_items indicates whether |
920 // we need to save all remaining items. | 916 // we need to save all remaining items. |
921 void SavePackage::SaveNextFile(bool process_all_remaining_items) { | 917 void SavePackage::SaveNextFile(bool process_all_remaining_items) { |
922 DCHECK(web_contents()); | 918 DCHECK(web_contents()); |
923 DCHECK(waiting_item_queue_.size()); | 919 DCHECK(waiting_item_queue_.size()); |
924 | 920 |
925 do { | 921 do { |
926 // Pop SaveItem from waiting list. | 922 // Pop SaveItem from waiting list. |
927 SaveItem* save_item = waiting_item_queue_.front(); | 923 SaveItem* save_item = waiting_item_queue_.front(); |
928 waiting_item_queue_.pop(); | 924 waiting_item_queue_.pop_front(); |
929 | 925 |
930 // Add the item to in_progress_items_. | 926 // Add the item to in_progress_items_. |
931 SaveUrlItemMap::iterator it = in_progress_items_.find( | 927 SaveUrlItemMap::iterator it = in_progress_items_.find( |
932 save_item->url().spec()); | 928 save_item->url().spec()); |
933 DCHECK(it == in_progress_items_.end()); | 929 DCHECK(it == in_progress_items_.end()); |
934 in_progress_items_[save_item->url().spec()] = save_item; | 930 in_progress_items_[save_item->url().spec()] = save_item; |
935 save_item->Start(); | 931 save_item->Start(); |
936 file_manager_->SaveURL(save_item->url(), | 932 file_manager_->SaveURL(save_item->url(), |
937 save_item->referrer(), | 933 save_item->referrer(), |
938 web_contents()->GetRenderProcessHost()->GetID(), | 934 web_contents()->GetRenderProcessHost()->GetID(), |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
998 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML)); | 994 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML)); |
999 if (waiting_item_queue_.size()) { | 995 if (waiting_item_queue_.size()) { |
1000 DCHECK(all_save_items_count_ == waiting_item_queue_.size()); | 996 DCHECK(all_save_items_count_ == waiting_item_queue_.size()); |
1001 SaveNextFile(false); | 997 SaveNextFile(false); |
1002 } | 998 } |
1003 } | 999 } |
1004 } | 1000 } |
1005 | 1001 |
1006 bool SavePackage::OnMessageReceived(const IPC::Message& message, | 1002 bool SavePackage::OnMessageReceived(const IPC::Message& message, |
1007 RenderFrameHost* render_frame_host) { | 1003 RenderFrameHost* render_frame_host) { |
1008 bool handled = true; | 1004 bool handled = true; |
ncarter (slow)
2015/10/08 20:57:55
Consider adding here:
RenderFrameHostImpl* sender
Łukasz Anforowicz
2015/10/09 16:54:07
Done. Thanks for the suggestion.
| |
1009 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(SavePackage, message, render_frame_host) | 1005 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(SavePackage, message, render_frame_host) |
1010 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksResponse, | 1006 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksResponse, |
1011 OnSavableResourceLinksResponse) | 1007 OnSavableResourceLinksResponse) |
1012 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksError, | 1008 IPC_MESSAGE_HANDLER(FrameHostMsg_SavableResourceLinksError, |
1013 OnSavableResourceLinksError) | 1009 OnSavableResourceLinksError) |
1014 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedHtmlWithLocalLinksResponse, | 1010 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedHtmlFragment, |
1015 OnSerializedHtmlWithLocalLinksResponse) | 1011 OnSerializedHtmlFragment) |
1012 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedLocalPathForSubFrame, | |
1013 OnSerializedLocalPathForSubFrame) | |
1014 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedLocalPathForSavableResource, | |
1015 OnSerializedLocalPathForSavableResource) | |
1016 IPC_MESSAGE_HANDLER(FrameHostMsg_SerializedEndOfFrame, | |
1017 OnSerializedEndOfFrame) | |
1016 IPC_MESSAGE_UNHANDLED(handled = false) | 1018 IPC_MESSAGE_UNHANDLED(handled = false) |
1017 IPC_END_MESSAGE_MAP() | 1019 IPC_END_MESSAGE_MAP() |
1018 return handled; | 1020 return handled; |
1019 } | 1021 } |
1020 | 1022 |
1021 // After finishing all SaveItems which need to get data from net. | 1023 // After finishing all SaveItems which need to get data from net. |
1022 // We collect all URLs which have local storage and send the | 1024 // We collect all URLs which have local storage and send the |
1023 // map:(originalURL:currentLocalPath) to render process (backend). | 1025 // map:(originalURL:currentLocalPath) to render process (backend). |
1024 // Then render process will serialize DOM and send data to us. | 1026 // Then render process will serialize DOM and send data to us. |
1025 void SavePackage::GetSerializedHtmlWithLocalLinks() { | 1027 void SavePackage::GetSerializedHtmlWithLocalLinks() { |
1026 if (wait_state_ != HTML_DATA) | 1028 if (wait_state_ != HTML_DATA) |
1027 return; | 1029 return; |
1028 std::vector<GURL> saved_links; | |
1029 std::vector<base::FilePath> saved_file_paths; | |
1030 int successful_started_items_count = 0; | 1030 int successful_started_items_count = 0; |
1031 | 1031 |
1032 // Collect all saved items which have local storage. | 1032 // Count all saved items which have local storage. |
1033 // First collect the status of all the resource files and check whether they | 1033 for (const auto& it : in_progress_items_) { |
1034 // have created local files although they have not been completely saved. | 1034 DCHECK_EQ(SaveFileCreateInfo::SAVE_FILE_FROM_DOM, it.second->save_source()); |
1035 // If yes, the file can be saved. Otherwise, there is a disk error, so we | 1035 if (it.second->has_final_name()) |
1036 // need to cancel the page saving job. | |
1037 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); | |
1038 it != in_progress_items_.end(); ++it) { | |
1039 DCHECK(it->second->save_source() == | |
1040 SaveFileCreateInfo::SAVE_FILE_FROM_DOM); | |
1041 if (it->second->has_final_name()) | |
1042 successful_started_items_count++; | 1036 successful_started_items_count++; |
1043 saved_links.push_back(it->second->url()); | |
1044 saved_file_paths.push_back(it->second->file_name()); | |
1045 } | 1037 } |
1046 | 1038 |
1047 // If not all file of HTML resource have been started, then wait. | 1039 // If not all file of HTML resource have been started, then wait. |
1048 if (successful_started_items_count != in_process_count()) | 1040 if (successful_started_items_count != in_process_count()) |
1049 return; | 1041 return; |
1050 | 1042 |
1051 // Collect all saved success items. | |
1052 for (SavedItemMap::iterator it = saved_success_items_.begin(); | |
1053 it != saved_success_items_.end(); ++it) { | |
1054 DCHECK(it->second->has_final_name()); | |
1055 saved_links.push_back(it->second->url()); | |
1056 saved_file_paths.push_back(it->second->file_name()); | |
1057 } | |
1058 | |
1059 // Get the relative directory name. | |
1060 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName(); | |
1061 | |
1062 // Ask all frames for their serialized data. | 1043 // Ask all frames for their serialized data. |
1063 DCHECK_EQ(0, number_of_frames_pending_response_); | 1044 DCHECK_EQ(0, number_of_frames_pending_response_); |
1064 web_contents()->ForEachFrame(base::Bind( | 1045 web_contents()->ForEachFrame(base::Bind( |
1065 &SavePackage::GetSerializedHtmlWithLocalLinksForFrame, | 1046 &SavePackage::GetSerializedHtmlWithLocalLinksForFrame, |
1066 base::Unretained(this), // Safe, because ForEachFrame is synchronous. | 1047 base::Unretained(this))); // Safe, because ForEachFrame is synchronous. |
1067 saved_links, saved_file_paths, relative_dir_name)); | |
1068 DCHECK_LT(0, number_of_frames_pending_response_); | 1048 DCHECK_LT(0, number_of_frames_pending_response_); |
ncarter (slow)
2015/10/08 20:57:54
Another "fyi, no action needed, just want to put t
Łukasz Anforowicz
2015/10/09 16:54:07
Disclaimer: I've introduced this DCHECK in an earl
ncarter (slow)
2015/10/12 23:31:47
I agree with everything in your reply. I don't thi
| |
1069 } | 1049 } |
1070 | 1050 |
1071 void SavePackage::GetSerializedHtmlWithLocalLinksForFrame( | 1051 void SavePackage::GetSerializedHtmlWithLocalLinksForFrame( |
1072 const std::vector<GURL>& saved_links, | |
1073 const std::vector<base::FilePath>& saved_file_paths, | |
1074 const base::FilePath& relative_dir_name, | |
1075 RenderFrameHost* target) { | 1052 RenderFrameHost* target) { |
1076 number_of_frames_pending_response_++; | 1053 number_of_frames_pending_response_++; |
1077 target->Send(new FrameMsg_GetSerializedHtmlWithLocalLinks( | 1054 target->Send( |
1078 target->GetRoutingID(), saved_links, saved_file_paths, | 1055 new FrameMsg_GetSerializedHtmlWithLocalLinks(target->GetRoutingID())); |
1079 relative_dir_name)); | |
1080 } | 1056 } |
1081 | 1057 |
1082 // Process the serialized HTML content data of a specified frame | 1058 void SavePackage::OnSerializedHtmlFragment(RenderFrameHost* sender, |
1083 // retrieved from the renderer process. | 1059 const std::string& data) { |
1084 void SavePackage::OnSerializedHtmlWithLocalLinksResponse( | |
1085 RenderFrameHost* sender, | |
1086 const GURL& frame_url, | |
1087 const std::string& data, | |
1088 int32 status) { | |
1089 WebPageSerializerClient::PageSerializationStatus flag = | |
1090 static_cast<WebPageSerializerClient::PageSerializationStatus>(status); | |
1091 | |
1092 // When calling WebPageSerializer::serialize in non-recursive mode, the | |
1093 // AllFramesAreFinished is redundant - it is sent by each frame right after | |
1094 // CurrentFrameIsFinished. Therefore we ignore AllFramesAreFinished and | |
1095 // instead track pending frames in |number_of_frames_pending_response_|. | |
1096 if (flag == WebPageSerializerClient::AllFramesAreFinished) | |
1097 return; | |
1098 | |
1099 // Check current state. | |
1100 if (wait_state_ != HTML_DATA) | 1060 if (wait_state_ != HTML_DATA) |
1101 return; | 1061 return; |
1102 | 1062 |
1103 int id = contents_id(); | 1063 int frame_tree_node_id = GetFrameTreeNodeId(sender); |
1104 | 1064 SaveItem* save_item = frame_tree_node_id_to_save_item_[frame_tree_node_id]; |
ncarter (slow)
2015/10/08 20:57:55
What if it's not in the map?
Łukasz Anforowicz
2015/10/09 16:54:07
Good point. Thanks for catching this.
When think
| |
1105 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec()); | 1065 if (save_item->state() != SaveItem::IN_PROGRESS) { |
1106 if (it == in_progress_items_.end()) { | 1066 LogWriteToAlreadyCompletedOrFailedSaveItem(save_item); |
1107 for (SavedItemMap::iterator saved_it = saved_success_items_.begin(); | |
1108 saved_it != saved_success_items_.end(); ++saved_it) { | |
1109 if (saved_it->second->url() == frame_url) { | |
1110 wrote_to_completed_file_ = true; | |
1111 break; | |
1112 } | |
1113 } | |
1114 | |
1115 it = saved_failed_items_.find(frame_url.spec()); | |
1116 if (it != saved_failed_items_.end()) | |
1117 wrote_to_failed_file_ = true; | |
1118 | |
1119 return; | 1067 return; |
1120 } | 1068 } |
1121 | 1069 |
1122 SaveItem* save_item = it->second; | |
1123 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); | 1070 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); |
1124 | |
1125 if (!data.empty()) { | 1071 if (!data.empty()) { |
1126 // Prepare buffer for saving HTML data. | 1072 // Prepare buffer for saving HTML data. |
1127 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size())); | 1073 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size())); |
1128 memcpy(new_data->data(), data.data(), data.size()); | 1074 memcpy(new_data->data(), data.data(), data.size()); |
1129 | 1075 |
1130 // Call write file functionality in file thread. | 1076 // Call write file functionality in file thread. |
1131 BrowserThread::PostTask( | 1077 BrowserThread::PostTask( |
1132 BrowserThread::FILE, FROM_HERE, | 1078 BrowserThread::FILE, FROM_HERE, |
1133 base::Bind(&SaveFileManager::UpdateSaveProgress, | 1079 base::Bind(&SaveFileManager::UpdateSaveProgress, file_manager_, |
1134 file_manager_, | 1080 save_item->save_id(), new_data, |
1135 save_item->save_id(), | |
1136 new_data, | |
1137 static_cast<int>(data.size()))); | 1081 static_cast<int>(data.size()))); |
1138 } | 1082 } |
1083 } | |
1084 | |
1085 void SavePackage::OnSerializedLocalPathForSubFrame( | |
1086 RenderFrameHost* sender, | |
1087 int render_frame_or_proxy_routing_id) { | |
1088 if (wait_state_ != HTML_DATA) | |
1089 return; | |
1090 | |
1091 FrameTreeNode* frame_tree_node = | |
1092 static_cast<WebContentsImpl*>(web_contents()) | |
1093 ->GetFrameTree() | |
ncarter (slow)
2015/10/08 20:57:54
If you passed in an RFHImpl here, you could altern
Łukasz Anforowicz
2015/10/09 16:54:07
Done (to have a safer code in case somebody wants
| |
1094 ->FindByRoutingID(sender->GetProcess()->GetID(), | |
1095 render_frame_or_proxy_routing_id); | |
1096 if (!frame_tree_node) | |
1097 return; | |
ncarter (slow)
2015/10/08 20:57:55
Would it be worthwhile here to validate that frame
Łukasz Anforowicz
2015/10/09 16:54:08
Done.
| |
1098 | |
1099 SaveItem* save_item = | |
1100 frame_tree_node_id_to_save_item_[frame_tree_node->frame_tree_node_id()]; | |
ncarter (slow)
2015/10/08 20:57:55
What if it's not in the map?
Łukasz Anforowicz
2015/10/09 16:54:08
It's ok in this case - this will be handled by Ser
| |
1101 SerializeLocalPathForSaveItem(sender, save_item, | |
1102 frame_tree_node->current_url()); | |
1103 } | |
1104 | |
1105 void SavePackage::OnSerializedLocalPathForSavableResource( | |
1106 RenderFrameHost* sender, | |
1107 const GURL& savable_resource) { | |
1108 if (wait_state_ != HTML_DATA) | |
1109 return; | |
1110 if (!savable_resource.is_valid()) | |
1111 return; | |
1112 | |
1113 SaveItem* save_item = url_to_save_item_[savable_resource]; | |
1114 SerializeLocalPathForSaveItem(sender, save_item, savable_resource); | |
1115 } | |
1116 | |
1117 void SavePackage::OnSerializedEndOfFrame(RenderFrameHost* sender) { | |
1118 if (wait_state_ != HTML_DATA) | |
1119 return; | |
1120 | |
1121 int frame_tree_node_id = GetFrameTreeNodeId(sender); | |
1122 SaveItem* save_item = frame_tree_node_id_to_save_item_[frame_tree_node_id]; | |
1123 if (save_item->state() != SaveItem::IN_PROGRESS) { | |
1124 LogWriteToAlreadyCompletedOrFailedSaveItem(save_item); | |
1125 return; | |
1126 } | |
1127 | |
1128 int id = contents_id(); | |
1139 | 1129 |
1140 // Current frame is completed saving, call finish in file thread. | 1130 // Current frame is completed saving, call finish in file thread. |
1141 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) { | 1131 DVLOG(20) << " " << __FUNCTION__ << "()" |
1142 DVLOG(20) << " " << __FUNCTION__ << "()" | 1132 << " save_id = " << save_item->save_id() << " url = \"" |
1143 << " save_id = " << save_item->save_id() | 1133 << save_item->url().spec() << "\""; |
1144 << " url = \"" << save_item->url().spec() << "\""; | 1134 BrowserThread::PostTask( |
1145 BrowserThread::PostTask( | 1135 BrowserThread::FILE, FROM_HERE, |
1146 BrowserThread::FILE, FROM_HERE, | 1136 base::Bind(&SaveFileManager::SaveFinished, file_manager_, |
1147 base::Bind(&SaveFileManager::SaveFinished, | 1137 save_item->save_id(), save_item->url(), id, true)); |
1148 file_manager_, | 1138 number_of_frames_pending_response_--; |
ncarter (slow)
2015/10/08 20:57:55
Shouldn't save_item()->state() be transitioned to
Łukasz Anforowicz
2015/10/09 16:54:08
I've spent 10-15 minutes thinking that SaveItem::s
ncarter (slow)
2015/10/12 23:31:47
Gosh, I hope not. I'm glad you added the UI thread
| |
1149 save_item->save_id(), | 1139 DCHECK_LE(0, number_of_frames_pending_response_); |
1150 save_item->url(), | |
1151 id, | |
1152 true)); | |
1153 number_of_frames_pending_response_--; | |
1154 DCHECK_LE(0, number_of_frames_pending_response_); | |
1155 } | |
1156 | 1140 |
1157 // If all frames are finished saving, we need to close the remaining | 1141 // If the all frames are finished saving, we need to close the |
1158 // SaveItems. | 1142 // remaining SaveItems. |
1159 if (number_of_frames_pending_response_ == 0) { | 1143 if (number_of_frames_pending_response_ == 0) { |
1160 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); | 1144 for (const auto& pair : in_progress_items_) { |
1161 it != in_progress_items_.end(); ++it) { | |
1162 DVLOG(20) << " " << __FUNCTION__ << "()" | 1145 DVLOG(20) << " " << __FUNCTION__ << "()" |
1163 << " save_id = " << it->second->save_id() << " url = \"" | 1146 << " save_id = " << pair.second->save_id() << " url = \"" |
1164 << it->second->url().spec() << "\""; | 1147 << pair.second->url().spec() << "\""; |
1165 BrowserThread::PostTask( | 1148 BrowserThread::PostTask( |
1166 BrowserThread::FILE, FROM_HERE, | 1149 BrowserThread::FILE, FROM_HERE, |
1167 base::Bind(&SaveFileManager::SaveFinished, file_manager_, | 1150 base::Bind(&SaveFileManager::SaveFinished, file_manager_, |
1168 it->second->save_id(), it->second->url(), id, true)); | 1151 pair.second->save_id(), pair.second->url(), id, true)); |
1169 } | 1152 } |
1170 } | 1153 } |
1171 } | 1154 } |
1172 | 1155 |
1156 void SavePackage::SerializeLocalPathForSaveItem(RenderFrameHost* sender, | |
1157 SaveItem* save_item, | |
1158 const GURL& fallback_url) { | |
1159 std::string attr_value; | |
1160 if (save_item) { | |
1161 base::FilePath relative_path = | |
1162 base::FilePath(FILE_PATH_LITERAL(".")) | |
1163 .Append(saved_main_directory_path_.BaseName()) | |
1164 .Append(save_item->file_name()) | |
1165 .NormalizePathSeparatorsTo(FILE_PATH_LITERAL('/')); | |
1166 attr_value = relative_path.AsUTF8Unsafe(); | |
1167 } else { | |
1168 attr_value = fallback_url.spec(); | |
ncarter (slow)
2015/10/08 21:04:14
When does this happen? Could you add a comment exp
Łukasz Anforowicz
2015/10/09 16:54:08
Done.
| |
1169 } | |
1170 | |
1171 // TODO(lukasza): Escape attr_value (" + &). | |
ncarter (slow)
2015/10/08 20:57:55
My understanding is that there are no security con
Łukasz Anforowicz
2015/10/09 16:54:08
Hmmm... thinking about it some more, I've realized
| |
1172 OnSerializedHtmlFragment(sender, attr_value); | |
1173 } | |
1174 | |
1175 void SavePackage::LogWriteToAlreadyCompletedOrFailedSaveItem( | |
1176 SaveItem* save_item) { | |
1177 auto already_completed_it = | |
1178 std::find_if(saved_success_items_.begin(), saved_success_items_.end(), | |
1179 [save_item](const SavedItemMap::value_type& v) { | |
1180 return v.second->url() == save_item->url(); | |
1181 }); | |
1182 if (already_completed_it != saved_success_items_.end()) | |
1183 wrote_to_completed_file_ = true; | |
1184 | |
1185 auto failed_it = | |
1186 std::find_if(saved_failed_items_.begin(), saved_failed_items_.end(), | |
1187 [save_item](const SaveUrlItemMap::value_type& v) { | |
1188 DCHECK(v.second->has_final_name()); | |
1189 return v.second->url() == save_item->url(); | |
1190 }); | |
1191 if (failed_it != saved_failed_items_.end()) | |
1192 wrote_to_failed_file_ = true; | |
1193 } | |
1194 | |
1173 // Ask for all savable resource links from backend, include main frame and | 1195 // Ask for all savable resource links from backend, include main frame and |
1174 // sub-frame. | 1196 // sub-frame. |
1175 void SavePackage::GetSavableResourceLinks() { | 1197 void SavePackage::GetSavableResourceLinks() { |
1176 if (wait_state_ != START_PROCESS) | 1198 if (wait_state_ != START_PROCESS) |
1177 return; | 1199 return; |
1178 | 1200 |
1179 wait_state_ = RESOURCES_LIST; | 1201 wait_state_ = RESOURCES_LIST; |
1180 | 1202 |
1181 DCHECK_EQ(0, number_of_frames_pending_response_); | 1203 DCHECK_EQ(0, number_of_frames_pending_response_); |
1182 web_contents()->ForEachFrame(base::Bind( | 1204 web_contents()->ForEachFrame(base::Bind( |
(...skipping 16 matching lines...) Expand all Loading... | |
1199 return; | 1221 return; |
1200 | 1222 |
1201 if (resources_list.size() != referrers_list.size()) | 1223 if (resources_list.size() != referrers_list.size()) |
1202 return; | 1224 return; |
1203 | 1225 |
1204 // Add all sub-resources to wait list. | 1226 // Add all sub-resources to wait list. |
1205 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) { | 1227 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) { |
1206 const GURL& u = resources_list[i]; | 1228 const GURL& u = resources_list[i]; |
1207 if (!u.is_valid()) | 1229 if (!u.is_valid()) |
1208 continue; | 1230 continue; |
1209 if (unique_urls_to_save_.count(u)) | |
1210 continue; | |
1211 unique_urls_to_save_.insert(u); | |
1212 | 1231 |
1213 SaveFileCreateInfo::SaveFileSource save_source = | 1232 SaveItem* save_item = url_to_save_item_[u]; |
1214 u.SchemeIsFile() ? SaveFileCreateInfo::SAVE_FILE_FROM_FILE | 1233 if (save_item == nullptr) { |
ncarter (slow)
2015/10/08 20:57:55
You seem to be preferring operator[] throughout in
Łukasz Anforowicz
2015/10/09 16:54:07
I've changed to find in some places, but when I tr
ncarter (slow)
2015/10/12 23:31:47
I'm OK with operator[] in these cases, since it's
| |
1215 : SaveFileCreateInfo::SAVE_FILE_FROM_NET; | 1234 SaveFileCreateInfo::SaveFileSource save_source = |
1216 SaveItem* save_item = new SaveItem(u, referrers_list[i], this, save_source); | 1235 u.SchemeIsFile() ? SaveFileCreateInfo::SAVE_FILE_FROM_FILE |
1217 waiting_item_queue_.push(save_item); | 1236 : SaveFileCreateInfo::SAVE_FILE_FROM_NET; |
1237 url_to_save_item_[u] = save_item = new SaveItem(u, this, save_source); | |
1238 waiting_item_queue_.push_back(save_item); | |
1239 } | |
1240 save_item->set_referrer(referrers_list[i]); | |
1218 } | 1241 } |
1219 | 1242 |
1220 // Store savable frame_url for later processing. | 1243 // Add the frame to wait list. |
1221 if (frame_url.is_valid()) | 1244 // TODO(lukasza): Do not dedupe subframes based on url (see crbug.com/538188). |
1222 frame_urls_to_save_.push_back(frame_url); | 1245 if (frame_url.is_valid()) { |
1246 SaveItem* save_item = url_to_save_item_[frame_url]; | |
1247 if (save_item == nullptr) { | |
1248 url_to_save_item_[frame_url] = save_item = | |
1249 new SaveItem(frame_url, this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM); | |
1250 waiting_item_queue_.push_back(save_item); | |
1251 } | |
1252 frame_tree_node_id_to_save_item_[GetFrameTreeNodeId(sender)] = save_item; | |
1253 } | |
1223 | 1254 |
1224 CompleteSavableResourceLinksResponse(); | 1255 CompleteSavableResourceLinksResponse(); |
1225 } | 1256 } |
1226 | 1257 |
1227 void SavePackage::OnSavableResourceLinksError(RenderFrameHost* sender) { | 1258 void SavePackage::OnSavableResourceLinksError(RenderFrameHost* sender) { |
1228 CompleteSavableResourceLinksResponse(); | 1259 CompleteSavableResourceLinksResponse(); |
1229 } | 1260 } |
1230 | 1261 |
1231 void SavePackage::CompleteSavableResourceLinksResponse() { | 1262 void SavePackage::CompleteSavableResourceLinksResponse() { |
1232 --number_of_frames_pending_response_; | 1263 --number_of_frames_pending_response_; |
1233 DCHECK_LE(0, number_of_frames_pending_response_); | 1264 DCHECK_LE(0, number_of_frames_pending_response_); |
1234 if (number_of_frames_pending_response_ != 0) | 1265 if (number_of_frames_pending_response_ != 0) |
1235 return; // Need to wait for more responses from RenderFrames. | 1266 return; // Need to wait for more responses from RenderFrames. |
1236 | 1267 |
1237 // Add frame urls to the waiting_item_queue_. This is done *after* processing | 1268 // Sort |waiting_item_queue_| so that frames go last. |
1238 // all savable resource links (i.e. in OnSavableResourceLinksResponse), to | 1269 std::sort( |
ncarter (slow)
2015/10/08 20:57:55
I wonder if we ought to use stable_sort here to av
Łukasz Anforowicz
2015/10/09 16:54:07
There is a partial order here (things from the sam
| |
1239 // prefer their referrers in cases where the frame url has already been | 1270 waiting_item_queue_.begin(), waiting_item_queue_.end(), |
1240 // covered by savable resource links. | 1271 [](SaveItem* x, SaveItem* y) { |
1241 for (auto& frame_url : frame_urls_to_save_) { | 1272 DCHECK(x); |
1242 DCHECK(frame_url.is_valid()); | 1273 DCHECK(y); |
1243 if (0 == unique_urls_to_save_.count(frame_url)) { | 1274 return (x->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) && |
1244 unique_urls_to_save_.insert(frame_url); | 1275 (y->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); |
ncarter (slow)
2015/10/08 20:57:55
Would this be easier to comprehend if we wrote it
Łukasz Anforowicz
2015/10/09 16:54:08
I think your comparison would be broken as it woul
| |
1245 SaveItem* save_item = new SaveItem( | 1276 }); |
1246 frame_url, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM); | |
1247 waiting_item_queue_.push(save_item); | |
1248 } | |
1249 } | |
1250 | |
1251 all_save_items_count_ = static_cast<int>(waiting_item_queue_.size()); | 1277 all_save_items_count_ = static_cast<int>(waiting_item_queue_.size()); |
1252 | 1278 |
1253 // We use total bytes as the total number of files we want to save. | 1279 // We use total bytes as the total number of files we want to save. |
1254 // Hack to avoid touching download_ after user cancel. | 1280 // Hack to avoid touching download_ after user cancel. |
1255 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem | 1281 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem |
1256 // with SavePackage flow. | 1282 // with SavePackage flow. |
1257 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) | 1283 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) |
1258 download_->SetTotalBytes(all_save_items_count_); | 1284 download_->SetTotalBytes(all_save_items_count_); |
1259 | 1285 |
1260 if (all_save_items_count_) { | 1286 if (all_save_items_count_) { |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1505 | 1531 |
1506 void SavePackage::FinalizeDownloadEntry() { | 1532 void SavePackage::FinalizeDownloadEntry() { |
1507 DCHECK(download_); | 1533 DCHECK(download_); |
1508 DCHECK(download_manager_); | 1534 DCHECK(download_manager_); |
1509 | 1535 |
1510 download_manager_->OnSavePackageSuccessfullyFinished(download_); | 1536 download_manager_->OnSavePackageSuccessfullyFinished(download_); |
1511 StopObservation(); | 1537 StopObservation(); |
1512 } | 1538 } |
1513 | 1539 |
1514 } // namespace content | 1540 } // namespace content |
OLD | NEW |