OLD | NEW |
---|---|
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/renderer/savable_resources.h" | 5 #include "content/renderer/savable_resources.h" |
6 | 6 |
7 #include <set> | 7 #include <set> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 17 matching lines...) Expand all Loading... | |
28 using blink::WebLocalFrame; | 28 using blink::WebLocalFrame; |
29 using blink::WebNode; | 29 using blink::WebNode; |
30 using blink::WebNodeList; | 30 using blink::WebNodeList; |
31 using blink::WebString; | 31 using blink::WebString; |
32 using blink::WebVector; | 32 using blink::WebVector; |
33 using blink::WebView; | 33 using blink::WebView; |
34 | 34 |
35 namespace content { | 35 namespace content { |
36 namespace { | 36 namespace { |
37 | 37 |
38 // Structure for storage the unique set of all savable resource links for | |
39 // making sure that no duplicated resource link in final result. The consumer | |
40 // of the SavableResourcesUniqueCheck is responsible for keeping these pointers | |
41 // valid for the lifetime of the SavableResourcesUniqueCheck instance. | |
42 struct SavableResourcesUniqueCheck { | |
43 // Unique set of all sub resource links. | |
44 std::set<GURL>* resources_set; | |
45 // Unique set of all frame links. | |
46 std::set<GURL>* frames_set; | |
47 // Collection of all frames we go through when getting all savable resource | |
48 // links. | |
49 std::vector<WebFrame*>* frames; | |
50 | |
51 SavableResourcesUniqueCheck() | |
52 : resources_set(NULL), | |
53 frames_set(NULL), | |
54 frames(NULL) {} | |
55 | |
56 SavableResourcesUniqueCheck(std::set<GURL>* resources_set, | |
57 std::set<GURL>* frames_set, std::vector<WebFrame*>* frames) | |
58 : resources_set(resources_set), | |
59 frames_set(frames_set), | |
60 frames(frames) {} | |
61 }; | |
62 | |
63 // Get all savable resource links from current element. One element might | 38 // Get all savable resource links from current element. One element might |
64 // have more than one resource link. It is possible to have some links | 39 // have more than one resource link. It is possible to have some links |
65 // in one CSS stylesheet. | 40 // in one CSS stylesheet. |
66 void GetSavableResourceLinkForElement( | 41 void GetSavableResourceLinkForElement( |
67 const WebElement& element, | 42 const WebElement& element, |
68 const WebDocument& current_doc, | 43 const WebDocument& current_doc, |
69 SavableResourcesUniqueCheck* unique_check, | |
70 SavableResourcesResult* result) { | 44 SavableResourcesResult* result) { |
71 | 45 |
72 // Handle frame and iframe tag. | 46 // Handle frame and iframe tag. |
73 if (element.hasHTMLTagName("iframe") || | 47 if (element.hasHTMLTagName("iframe") || |
74 element.hasHTMLTagName("frame")) { | 48 element.hasHTMLTagName("frame")) { |
nasko
2015/09/02 23:45:55
nit: The above fits on one line. Also might be goo
Łukasz Anforowicz
2015/09/03 16:59:57
Good point. Done.
| |
75 WebFrame* sub_frame = WebLocalFrame::fromFrameOwnerElement(element); | |
76 if (sub_frame) | |
77 unique_check->frames->push_back(sub_frame); | |
78 return; | 49 return; |
79 } | 50 } |
80 | 51 |
81 // Check whether the node has sub resource URL or not. | 52 // Check whether the node has sub resource URL or not. |
82 WebString value = GetSubResourceLinkFromElement(element); | 53 WebString value = GetSubResourceLinkFromElement(element); |
83 if (value.isNull()) | 54 if (value.isNull()) |
84 return; | 55 return; |
85 // Get absolute URL. | 56 // Get absolute URL. |
86 GURL u = current_doc.completeURL(value); | 57 GURL u = current_doc.completeURL(value); |
87 // ignore invalid URL | 58 // ignore invalid URL |
88 if (!u.is_valid()) | 59 if (!u.is_valid()) |
89 return; | 60 return; |
90 // Ignore those URLs which are not standard protocols. Because FTP | 61 // Ignore those URLs which are not standard protocols. Because FTP |
91 // protocol does no have cache mechanism, we will skip all | 62 // protocol does no have cache mechanism, we will skip all |
92 // sub-resources if they use FTP protocol. | 63 // sub-resources if they use FTP protocol. |
93 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) | 64 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) |
94 return; | 65 return; |
95 // Ignore duplicated resource link. | 66 // Ignore duplicated resource link. |
96 if (!unique_check->resources_set->insert(u).second) | |
97 return; | |
98 result->resources_list->push_back(u); | 67 result->resources_list->push_back(u); |
99 // Insert referrer for above new resource link. | 68 // Insert referrer for above new resource link. |
100 result->referrer_urls_list->push_back(GURL()); | 69 result->referrer_urls_list->push_back(GURL()); |
101 result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); | 70 result->referrer_policies_list->push_back(blink::WebReferrerPolicyDefault); |
102 } | 71 } |
103 | 72 |
104 // Get all savable resource links from current WebFrameImpl object pointer. | 73 } // namespace |
105 void GetAllSavableResourceLinksForFrame(WebFrame* current_frame, | 74 |
106 SavableResourcesUniqueCheck* unique_check, | 75 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, |
107 SavableResourcesResult* result, | 76 SavableResourcesResult* result, |
108 const char** savable_schemes) { | 77 const char** savable_schemes) { |
109 // Get current frame's URL. | 78 // Get current frame's URL. |
110 GURL current_frame_url = current_frame->document().url(); | 79 GURL current_frame_url = current_frame->document().url(); |
111 | 80 |
112 // If url of current frame is invalid, ignore it. | 81 // If url of current frame is invalid, ignore it. |
113 if (!current_frame_url.is_valid()) | 82 if (!current_frame_url.is_valid()) |
114 return; | 83 return false; |
115 | 84 |
116 // If url of current frame is not a savable protocol, ignore it. | 85 // If url of current frame is not a savable protocol, ignore it. |
117 bool is_valid_protocol = false; | 86 bool is_valid_protocol = false; |
118 for (int i = 0; savable_schemes[i] != NULL; ++i) { | 87 for (int i = 0; savable_schemes[i] != NULL; ++i) { |
119 if (current_frame_url.SchemeIs(savable_schemes[i])) { | 88 if (current_frame_url.SchemeIs(savable_schemes[i])) { |
120 is_valid_protocol = true; | 89 is_valid_protocol = true; |
121 break; | 90 break; |
122 } | 91 } |
123 } | 92 } |
124 if (!is_valid_protocol) | 93 if (!is_valid_protocol) |
125 return; | 94 return false; |
126 | |
127 // If find same frame we have recorded, ignore it. | |
128 if (!unique_check->frames_set->insert(current_frame_url).second) | |
129 return; | |
130 | 95 |
131 // Get current using document. | 96 // Get current using document. |
132 WebDocument current_doc = current_frame->document(); | 97 WebDocument current_doc = current_frame->document(); |
133 // Go through all descent nodes. | 98 // Go through all descent nodes. |
134 WebElementCollection all = current_doc.all(); | 99 WebElementCollection all = current_doc.all(); |
135 // Go through all elements in this frame. | 100 // Go through all elements in this frame. |
136 for (WebElement element = all.firstItem(); !element.isNull(); | 101 for (WebElement element = all.firstItem(); !element.isNull(); |
137 element = all.nextItem()) { | 102 element = all.nextItem()) { |
138 GetSavableResourceLinkForElement(element, | 103 GetSavableResourceLinkForElement(element, |
139 current_doc, | 104 current_doc, |
140 unique_check, | |
141 result); | 105 result); |
142 } | 106 } |
107 | |
108 return true; | |
143 } | 109 } |
144 | 110 |
145 } // namespace | |
146 | |
147 WebString GetSubResourceLinkFromElement(const WebElement& element) { | 111 WebString GetSubResourceLinkFromElement(const WebElement& element) { |
148 const char* attribute_name = NULL; | 112 const char* attribute_name = NULL; |
149 if (element.hasHTMLTagName("img") || | 113 if (element.hasHTMLTagName("img") || |
150 element.hasHTMLTagName("script")) { | 114 element.hasHTMLTagName("script")) { |
151 attribute_name = "src"; | 115 attribute_name = "src"; |
152 } else if (element.hasHTMLTagName("input")) { | 116 } else if (element.hasHTMLTagName("input")) { |
153 const WebInputElement input = element.toConst<WebInputElement>(); | 117 const WebInputElement input = element.toConst<WebInputElement>(); |
154 if (input.isImageButton()) { | 118 if (input.isImageButton()) { |
155 attribute_name = "src"; | 119 attribute_name = "src"; |
156 } | 120 } |
(...skipping 25 matching lines...) Expand all Loading... | |
182 // If value has content and not start with "javascript:" then return it, | 146 // If value has content and not start with "javascript:" then return it, |
183 // otherwise return NULL. | 147 // otherwise return NULL. |
184 if (!value.isNull() && !value.isEmpty() && | 148 if (!value.isNull() && !value.isEmpty() && |
185 !base::StartsWith(value.utf8(), "javascript:", | 149 !base::StartsWith(value.utf8(), "javascript:", |
186 base::CompareCase::INSENSITIVE_ASCII)) | 150 base::CompareCase::INSENSITIVE_ASCII)) |
187 return value; | 151 return value; |
188 | 152 |
189 return WebString(); | 153 return WebString(); |
190 } | 154 } |
191 | 155 |
192 // Get all savable resource links from current webview, include main | |
193 // frame and sub-frame | |
194 bool GetAllSavableResourceLinksForCurrentPage(WebView* view, | |
195 const GURL& page_url, SavableResourcesResult* result, | |
196 const char** savable_schemes) { | |
197 WebFrame* main_frame = view->mainFrame(); | |
198 if (!main_frame) | |
199 return false; | |
200 | |
201 std::set<GURL> resources_set; | |
202 std::set<GURL> frames_set; | |
203 std::vector<WebFrame*> frames; | |
204 SavableResourcesUniqueCheck unique_check(&resources_set, | |
205 &frames_set, | |
206 &frames); | |
207 | |
208 GURL main_page_gurl(main_frame->document().url()); | |
209 | |
210 // Make sure we are saving same page between embedder and webkit. | |
211 // If page has being navigated, embedder will get three empty vector, | |
212 // which will make the saving page job ended. | |
213 if (page_url != main_page_gurl) | |
214 return true; | |
215 | |
216 // First, process main frame. | |
217 frames.push_back(main_frame); | |
218 | |
219 // Check all resource in this page, include sub-frame. | |
220 for (int i = 0; i < static_cast<int>(frames.size()); ++i) { | |
221 // Get current frame's all savable resource links. | |
222 GetAllSavableResourceLinksForFrame(frames[i], &unique_check, result, | |
223 savable_schemes); | |
224 } | |
225 | |
226 // Since frame's src can also point to sub-resources link, so it is possible | |
227 // that some URLs in frames_list are also in resources_list. For those | |
228 // URLs, we will remove it from frame_list, only keep them in resources_list. | |
229 for (std::set<GURL>::iterator it = frames_set.begin(); | |
230 it != frames_set.end(); ++it) { | |
231 // Append unique frame source to savable frame list. | |
232 if (resources_set.find(*it) == resources_set.end()) | |
233 result->frames_list->push_back(*it); | |
234 } | |
235 | |
236 return true; | |
237 } | |
238 | |
239 } // namespace content | 156 } // namespace content |
OLD | NEW |