| OLD | NEW |
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/renderer/savable_resources.h" | 5 #include "content/renderer/savable_resources.h" |
| 6 | 6 |
| 7 #include <set> | 7 #include <set> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 using blink::WebLocalFrame; | 28 using blink::WebLocalFrame; |
| 29 using blink::WebNode; | 29 using blink::WebNode; |
| 30 using blink::WebNodeList; | 30 using blink::WebNodeList; |
| 31 using blink::WebString; | 31 using blink::WebString; |
| 32 using blink::WebVector; | 32 using blink::WebVector; |
| 33 using blink::WebView; | 33 using blink::WebView; |
| 34 | 34 |
| 35 namespace content { | 35 namespace content { |
| 36 namespace { | 36 namespace { |
| 37 | 37 |
| 38 // Structure for storage the unique set of all savable resource links for | |
| 39 // making sure that no duplicated resource link in final result. The consumer | |
| 40 // of the SavableResourcesUniqueCheck is responsible for keeping these pointers | |
| 41 // valid for the lifetime of the SavableResourcesUniqueCheck instance. | |
| 42 struct SavableResourcesUniqueCheck { | |
| 43 // Unique set of all sub resource links. | |
| 44 std::set<GURL>* resources_set; | |
| 45 // Unique set of all frame links. | |
| 46 std::set<GURL>* frames_set; | |
| 47 // Collection of all frames we go through when getting all savable resource | |
| 48 // links. | |
| 49 std::vector<WebFrame*>* frames; | |
| 50 | |
| 51 SavableResourcesUniqueCheck() | |
| 52 : resources_set(NULL), | |
| 53 frames_set(NULL), | |
| 54 frames(NULL) {} | |
| 55 | |
| 56 SavableResourcesUniqueCheck(std::set<GURL>* resources_set, | |
| 57 std::set<GURL>* frames_set, std::vector<WebFrame*>* frames) | |
| 58 : resources_set(resources_set), | |
| 59 frames_set(frames_set), | |
| 60 frames(frames) {} | |
| 61 }; | |
| 62 | |
| 63 // Get all savable resource links from current element. One element might | 38 // Get all savable resource links from current element. One element might |
| 64 // have more than one resource link. It is possible to have some links | 39 // have more than one resource link. It is possible to have some links |
| 65 // in one CSS stylesheet. | 40 // in one CSS stylesheet. |
| 66 void GetSavableResourceLinkForElement( | 41 void GetSavableResourceLinkForElement( |
| 67 const WebElement& element, | 42 const WebElement& element, |
| 68 const WebDocument& current_doc, | 43 const WebDocument& current_doc, |
| 69 SavableResourcesUniqueCheck* unique_check, | |
| 70 SavableResourcesResult* result) { | 44 SavableResourcesResult* result) { |
| 71 | 45 // Skipping frame and iframe tag. |
| 72 // Handle frame and iframe tag. | 46 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) { |
| 73 if (element.hasHTMLTagName("iframe") || | |
| 74 element.hasHTMLTagName("frame")) { | |
| 75 WebFrame* sub_frame = WebLocalFrame::fromFrameOwnerElement(element); | |
| 76 if (sub_frame) | |
| 77 unique_check->frames->push_back(sub_frame); | |
| 78 return; | 47 return; |
| 79 } | 48 } |
| 80 | 49 |
| 81 // Check whether the node has sub resource URL or not. | 50 // Check whether the node has sub resource URL or not. |
| 82 WebString value = GetSubResourceLinkFromElement(element); | 51 WebString value = GetSubResourceLinkFromElement(element); |
| 83 if (value.isNull()) | 52 if (value.isNull()) |
| 84 return; | 53 return; |
| 85 // Get absolute URL. | 54 // Get absolute URL. |
| 86 GURL u = current_doc.completeURL(value); | 55 GURL u = current_doc.completeURL(value); |
| 87 // ignore invalid URL | 56 // ignore invalid URL |
| 88 if (!u.is_valid()) | 57 if (!u.is_valid()) |
| 89 return; | 58 return; |
| 90 // Ignore those URLs which are not standard protocols. Because FTP | 59 // Ignore those URLs which are not standard protocols. Because FTP |
| 91 // protocol does no have cache mechanism, we will skip all | 60 // protocol does no have cache mechanism, we will skip all |
| 92 // sub-resources if they use FTP protocol. | 61 // sub-resources if they use FTP protocol. |
| 93 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) | 62 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) |
| 94 return; | 63 return; |
| 95 // Ignore duplicated resource link. | 64 // Ignore duplicated resource link. |
| 96 if (!unique_check->resources_set->insert(u).second) | |
| 97 return; | |
| 98 result->resources_list->push_back(u); | 65 result->resources_list->push_back(u); |
| 99 // Insert referrer for above new resource link. | 66 // Insert referrer for above new resource link. |
| 100 result->referrer_urls_list->push_back(GURL()); | 67 result->referrer_urls_list->push_back(GURL()); |
| 101 result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); | 68 result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); |
| 102 } | 69 } |
| 103 | 70 |
| 104 // Get all savable resource links from current WebFrameImpl object pointer. | 71 } // namespace |
| 105 void GetAllSavableResourceLinksForFrame(WebFrame* current_frame, | 72 |
| 106 SavableResourcesUniqueCheck* unique_check, | 73 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, |
| 107 SavableResourcesResult* result, | 74 SavableResourcesResult* result, |
| 108 const char** savable_schemes) { | 75 const char** savable_schemes) { |
| 109 // Get current frame's URL. | 76 // Get current frame's URL. |
| 110 GURL current_frame_url = current_frame->document().url(); | 77 GURL current_frame_url = current_frame->document().url(); |
| 111 | 78 |
| 112 // If url of current frame is invalid, ignore it. | 79 // If url of current frame is invalid, ignore it. |
| 113 if (!current_frame_url.is_valid()) | 80 if (!current_frame_url.is_valid()) |
| 114 return; | 81 return false; |
| 115 | 82 |
| 116 // If url of current frame is not a savable protocol, ignore it. | 83 // If url of current frame is not a savable protocol, ignore it. |
| 117 bool is_valid_protocol = false; | 84 bool is_valid_protocol = false; |
| 118 for (int i = 0; savable_schemes[i] != NULL; ++i) { | 85 for (int i = 0; savable_schemes[i] != NULL; ++i) { |
| 119 if (current_frame_url.SchemeIs(savable_schemes[i])) { | 86 if (current_frame_url.SchemeIs(savable_schemes[i])) { |
| 120 is_valid_protocol = true; | 87 is_valid_protocol = true; |
| 121 break; | 88 break; |
| 122 } | 89 } |
| 123 } | 90 } |
| 124 if (!is_valid_protocol) | 91 if (!is_valid_protocol) |
| 125 return; | 92 return false; |
| 126 | |
| 127 // If find same frame we have recorded, ignore it. | |
| 128 if (!unique_check->frames_set->insert(current_frame_url).second) | |
| 129 return; | |
| 130 | 93 |
| 131 // Get current using document. | 94 // Get current using document. |
| 132 WebDocument current_doc = current_frame->document(); | 95 WebDocument current_doc = current_frame->document(); |
| 133 // Go through all descent nodes. | 96 // Go through all descent nodes. |
| 134 WebElementCollection all = current_doc.all(); | 97 WebElementCollection all = current_doc.all(); |
| 135 // Go through all elements in this frame. | 98 // Go through all elements in this frame. |
| 136 for (WebElement element = all.firstItem(); !element.isNull(); | 99 for (WebElement element = all.firstItem(); !element.isNull(); |
| 137 element = all.nextItem()) { | 100 element = all.nextItem()) { |
| 138 GetSavableResourceLinkForElement(element, | 101 GetSavableResourceLinkForElement(element, |
| 139 current_doc, | 102 current_doc, |
| 140 unique_check, | |
| 141 result); | 103 result); |
| 142 } | 104 } |
| 105 |
| 106 return true; |
| 143 } | 107 } |
| 144 | 108 |
| 145 } // namespace | |
| 146 | |
| 147 WebString GetSubResourceLinkFromElement(const WebElement& element) { | 109 WebString GetSubResourceLinkFromElement(const WebElement& element) { |
| 148 const char* attribute_name = NULL; | 110 const char* attribute_name = NULL; |
| 149 if (element.hasHTMLTagName("img") || | 111 if (element.hasHTMLTagName("img") || |
| 150 element.hasHTMLTagName("script")) { | 112 element.hasHTMLTagName("script")) { |
| 151 attribute_name = "src"; | 113 attribute_name = "src"; |
| 152 } else if (element.hasHTMLTagName("input")) { | 114 } else if (element.hasHTMLTagName("input")) { |
| 153 const WebInputElement input = element.toConst<WebInputElement>(); | 115 const WebInputElement input = element.toConst<WebInputElement>(); |
| 154 if (input.isImageButton()) { | 116 if (input.isImageButton()) { |
| 155 attribute_name = "src"; | 117 attribute_name = "src"; |
| 156 } | 118 } |
| (...skipping 25 matching lines...) Expand all Loading... |
| 182 // If value has content and not start with "javascript:" then return it, | 144 // If value has content and not start with "javascript:" then return it, |
| 183 // otherwise return NULL. | 145 // otherwise return NULL. |
| 184 if (!value.isNull() && !value.isEmpty() && | 146 if (!value.isNull() && !value.isEmpty() && |
| 185 !base::StartsWith(value.utf8(), "javascript:", | 147 !base::StartsWith(value.utf8(), "javascript:", |
| 186 base::CompareCase::INSENSITIVE_ASCII)) | 148 base::CompareCase::INSENSITIVE_ASCII)) |
| 187 return value; | 149 return value; |
| 188 | 150 |
| 189 return WebString(); | 151 return WebString(); |
| 190 } | 152 } |
| 191 | 153 |
| 192 // Get all savable resource links from current webview, include main | |
| 193 // frame and sub-frame | |
| 194 bool GetAllSavableResourceLinksForCurrentPage(WebView* view, | |
| 195 const GURL& page_url, SavableResourcesResult* result, | |
| 196 const char** savable_schemes) { | |
| 197 WebFrame* main_frame = view->mainFrame(); | |
| 198 if (!main_frame) | |
| 199 return false; | |
| 200 | |
| 201 std::set<GURL> resources_set; | |
| 202 std::set<GURL> frames_set; | |
| 203 std::vector<WebFrame*> frames; | |
| 204 SavableResourcesUniqueCheck unique_check(&resources_set, | |
| 205 &frames_set, | |
| 206 &frames); | |
| 207 | |
| 208 GURL main_page_gurl(main_frame->document().url()); | |
| 209 | |
| 210 // Make sure we are saving same page between embedder and webkit. | |
| 211 // If page has being navigated, embedder will get three empty vector, | |
| 212 // which will make the saving page job ended. | |
| 213 if (page_url != main_page_gurl) | |
| 214 return true; | |
| 215 | |
| 216 // First, process main frame. | |
| 217 frames.push_back(main_frame); | |
| 218 | |
| 219 // Check all resource in this page, include sub-frame. | |
| 220 for (int i = 0; i < static_cast<int>(frames.size()); ++i) { | |
| 221 // Get current frame's all savable resource links. | |
| 222 GetAllSavableResourceLinksForFrame(frames[i], &unique_check, result, | |
| 223 savable_schemes); | |
| 224 } | |
| 225 | |
| 226 // Since frame's src can also point to sub-resources link, so it is possible | |
| 227 // that some URLs in frames_list are also in resources_list. For those | |
| 228 // URLs, we will remove it from frame_list, only keep them in resources_list. | |
| 229 for (std::set<GURL>::iterator it = frames_set.begin(); | |
| 230 it != frames_set.end(); ++it) { | |
| 231 // Append unique frame source to savable frame list. | |
| 232 if (resources_set.find(*it) == resources_set.end()) | |
| 233 result->frames_list->push_back(*it); | |
| 234 } | |
| 235 | |
| 236 return true; | |
| 237 } | |
| 238 | |
| 239 } // namespace content | 154 } // namespace content |
| OLD | NEW |