OLD | NEW |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/renderer/savable_resources.h" | 5 #include "content/renderer/savable_resources.h" |
6 | 6 |
7 #include <set> | 7 #include <set> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 16 matching lines...) Expand all Loading... |
27 using blink::WebInputElement; | 27 using blink::WebInputElement; |
28 using blink::WebLocalFrame; | 28 using blink::WebLocalFrame; |
29 using blink::WebNode; | 29 using blink::WebNode; |
30 using blink::WebString; | 30 using blink::WebString; |
31 using blink::WebVector; | 31 using blink::WebVector; |
32 using blink::WebView; | 32 using blink::WebView; |
33 | 33 |
34 namespace content { | 34 namespace content { |
35 namespace { | 35 namespace { |
36 | 36 |
37 // Get all savable resource links from current element. One element might | 37 // Returns |true| if |web_frame| contains (or should be assumed to contain) |
38 // have more than one resource link. It is possible to have some links | 38 // a html document. |
39 // in one CSS stylesheet. | 39 bool DoesFrameContainHtmlDocument(const WebFrame& web_frame, |
| 40 const WebElement& element) { |
| 41 if (web_frame.isWebLocalFrame()) { |
| 42 WebDocument doc = web_frame.document(); |
| 43 return doc.isHTMLDocument() || doc.isXHTMLDocument(); |
| 44 } |
| 45 |
| 46 // Cannot inspect contents of a remote frame, so we use a heuristic: |
| 47 // Assume that <iframe> and <frame> elements contain a html document, |
| 48 // and other elements (i.e. <object>) contain plugins or other resources. |
| 49 // If the heuristic is wrong (i.e. the remote frame in <object> does |
| 50 // contain an html document), then things will still work, but with the |
| 51 // following caveats: 1) original frame content will be saved and 2) links |
| 52 // in frame's html doc will not be rewritten to point to locally saved |
| 53 // files. |
| 54 return element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame"); |
| 55 } |
| 56 |
| 57 // If present and valid, then push the link associated with |element| |
| 58 // into either SavableResourcesResult::subframes or |
| 59 // SavableResourcesResult::resources_list. |
40 void GetSavableResourceLinkForElement( | 60 void GetSavableResourceLinkForElement( |
41 const WebElement& element, | 61 const WebElement& element, |
42 const WebDocument& current_doc, | 62 const WebDocument& current_doc, |
43 SavableResourcesResult* result) { | 63 SavableResourcesResult* result) { |
44 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) { | 64 // Check whether the node has sub resource URL or not. |
45 GURL complete_url = current_doc.completeURL(element.getAttribute("src")); | 65 WebString value = GetSubResourceLinkFromElement(element); |
46 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element); | 66 if (value.isNull()) |
| 67 return; |
47 | 68 |
| 69 // Get absolute URL. |
| 70 GURL element_url = current_doc.completeURL(value); |
| 71 |
| 72 // See whether to report this element as a subframe. |
| 73 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element); |
| 74 if (web_frame && DoesFrameContainHtmlDocument(*web_frame, element)) { |
48 SavableSubframe subframe; | 75 SavableSubframe subframe; |
49 subframe.original_url = complete_url; | 76 subframe.original_url = element_url; |
50 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame); | 77 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame); |
51 | |
52 result->subframes->push_back(subframe); | 78 result->subframes->push_back(subframe); |
53 return; | 79 return; |
54 } | 80 } |
55 | 81 |
56 // Check whether the node has sub resource URL or not. | 82 // Ignore invalid URL. |
57 WebString value = GetSubResourceLinkFromElement(element); | 83 if (!element_url.is_valid()) |
58 if (value.isNull()) | |
59 return; | 84 return; |
60 // Get absolute URL. | 85 |
61 GURL u = current_doc.completeURL(value); | |
62 // ignore invalid URL | |
63 if (!u.is_valid()) | |
64 return; | |
65 // Ignore those URLs which are not standard protocols. Because FTP | 86 // Ignore those URLs which are not standard protocols. Because FTP |
66 // protocol does no have cache mechanism, we will skip all | 87 // protocol does no have cache mechanism, we will skip all |
67 // sub-resources if they use FTP protocol. | 88 // sub-resources if they use FTP protocol. |
68 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) | 89 if (!element_url.SchemeIsHTTPOrHTTPS() && |
| 90 !element_url.SchemeIs(url::kFileScheme)) |
69 return; | 91 return; |
70 | 92 |
71 result->resources_list->push_back(u); | 93 result->resources_list->push_back(element_url); |
72 } | 94 } |
73 | 95 |
74 } // namespace | 96 } // namespace |
75 | 97 |
76 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, | 98 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, |
77 SavableResourcesResult* result, | 99 SavableResourcesResult* result, |
78 const char** savable_schemes) { | 100 const char** savable_schemes) { |
79 // Get current frame's URL. | 101 // Get current frame's URL. |
80 GURL current_frame_url = current_frame->document().url(); | 102 GURL current_frame_url = current_frame->document().url(); |
81 | 103 |
(...skipping 23 matching lines...) Expand all Loading... |
105 current_doc, | 127 current_doc, |
106 result); | 128 result); |
107 } | 129 } |
108 | 130 |
109 return true; | 131 return true; |
110 } | 132 } |
111 | 133 |
112 WebString GetSubResourceLinkFromElement(const WebElement& element) { | 134 WebString GetSubResourceLinkFromElement(const WebElement& element) { |
113 const char* attribute_name = NULL; | 135 const char* attribute_name = NULL; |
114 if (element.hasHTMLTagName("img") || | 136 if (element.hasHTMLTagName("img") || |
| 137 element.hasHTMLTagName("frame") || |
| 138 element.hasHTMLTagName("iframe") || |
115 element.hasHTMLTagName("script")) { | 139 element.hasHTMLTagName("script")) { |
116 attribute_name = "src"; | 140 attribute_name = "src"; |
117 } else if (element.hasHTMLTagName("input")) { | 141 } else if (element.hasHTMLTagName("input")) { |
118 const WebInputElement input = element.toConst<WebInputElement>(); | 142 const WebInputElement input = element.toConst<WebInputElement>(); |
119 if (input.isImageButton()) { | 143 if (input.isImageButton()) { |
120 attribute_name = "src"; | 144 attribute_name = "src"; |
121 } | 145 } |
122 } else if (element.hasHTMLTagName("body") || | 146 } else if (element.hasHTMLTagName("body") || |
123 element.hasHTMLTagName("table") || | 147 element.hasHTMLTagName("table") || |
124 element.hasHTMLTagName("tr") || | 148 element.hasHTMLTagName("tr") || |
125 element.hasHTMLTagName("td")) { | 149 element.hasHTMLTagName("td")) { |
126 attribute_name = "background"; | 150 attribute_name = "background"; |
127 } else if (element.hasHTMLTagName("blockquote") || | 151 } else if (element.hasHTMLTagName("blockquote") || |
128 element.hasHTMLTagName("q") || | 152 element.hasHTMLTagName("q") || |
129 element.hasHTMLTagName("del") || | 153 element.hasHTMLTagName("del") || |
130 element.hasHTMLTagName("ins")) { | 154 element.hasHTMLTagName("ins")) { |
131 attribute_name = "cite"; | 155 attribute_name = "cite"; |
132 } else if (element.hasHTMLTagName("object")) { | 156 } else if (element.hasHTMLTagName("object")) { |
133 // TODO(lukasza): When <object> contains a html document, it should be | |
134 // reported as a subframe, not as a savable resource (reporting as a | |
135 // savable resource works, but will save original html contents, not | |
136 // current html contents of the frame). | |
137 attribute_name = "data"; | 157 attribute_name = "data"; |
138 } else if (element.hasHTMLTagName("link")) { | 158 } else if (element.hasHTMLTagName("link")) { |
139 // If the link element is not linked to css, ignore it. | 159 // If the link element is not linked to css, ignore it. |
140 if (base::LowerCaseEqualsASCII( | 160 if (base::LowerCaseEqualsASCII( |
141 base::StringPiece16(element.getAttribute("type")), "text/css") || | 161 base::StringPiece16(element.getAttribute("type")), "text/css") || |
142 base::LowerCaseEqualsASCII( | 162 base::LowerCaseEqualsASCII( |
143 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) { | 163 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) { |
144 // TODO(jnd): Add support for extracting links of sub-resources which | 164 // TODO(jnd): Add support for extracting links of sub-resources which |
145 // are inside style-sheet such as @import, url(), etc. | 165 // are inside style-sheet such as @import, url(), etc. |
146 // See bug: http://b/issue?id=1111667. | 166 // See bug: http://b/issue?id=1111667. |
147 attribute_name = "href"; | 167 attribute_name = "href"; |
148 } | 168 } |
149 } | 169 } |
150 if (!attribute_name) | 170 if (!attribute_name) |
151 return WebString(); | 171 return WebString(); |
152 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); | 172 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); |
153 // If value has content and not start with "javascript:" then return it, | 173 // If value has content and not start with "javascript:" then return it, |
154 // otherwise return NULL. | 174 // otherwise return NULL. |
155 if (!value.isNull() && !value.isEmpty() && | 175 if (!value.isNull() && !value.isEmpty() && |
156 !base::StartsWith(value.utf8(), "javascript:", | 176 !base::StartsWith(value.utf8(), "javascript:", |
157 base::CompareCase::INSENSITIVE_ASCII)) | 177 base::CompareCase::INSENSITIVE_ASCII)) |
158 return value; | 178 return value; |
159 | 179 |
160 return WebString(); | 180 return WebString(); |
161 } | 181 } |
162 | 182 |
163 } // namespace content | 183 } // namespace content |
OLD | NEW |