Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(99)

Side by Side Diff: content/renderer/savable_resources.cc

Issue 1442463002: Save-Page-As-Complete-HTML: Even better handling of <object> elements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@objects-fix-for-complete-html
Patch Set: s/\<u\>/element_url/g in savable_resources.cc Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/savable_resources.h" 5 #include "content/renderer/savable_resources.h"
6 6
7 #include <set> 7 #include <set>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 16 matching lines...) Expand all
27 using blink::WebInputElement; 27 using blink::WebInputElement;
28 using blink::WebLocalFrame; 28 using blink::WebLocalFrame;
29 using blink::WebNode; 29 using blink::WebNode;
30 using blink::WebString; 30 using blink::WebString;
31 using blink::WebVector; 31 using blink::WebVector;
32 using blink::WebView; 32 using blink::WebView;
33 33
34 namespace content { 34 namespace content {
35 namespace { 35 namespace {
36 36
37 // Get all savable resource links from current element. One element might 37 // If present and valid, then push the link associated with |element|
38 // have more than one resource link. It is possible to have some links 38 // into either SavableResourcesResult::subframes or
39 // in one CSS stylesheet. 39 // SavableResourcesResult::resources_list.
40 void GetSavableResourceLinkForElement( 40 void GetSavableResourceLinkForElement(
41 const WebElement& element, 41 const WebElement& element,
42 const WebDocument& current_doc, 42 const WebDocument& current_doc,
43 SavableResourcesResult* result) { 43 SavableResourcesResult* result) {
44 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) {
45 GURL complete_url = current_doc.completeURL(element.getAttribute("src"));
46 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
47
48 SavableSubframe subframe;
49 subframe.original_url = complete_url;
50 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
51
52 result->subframes->push_back(subframe);
53 return;
54 }
55
56 // Check whether the node has sub resource URL or not. 44 // Check whether the node has sub resource URL or not.
57 WebString value = GetSubResourceLinkFromElement(element); 45 WebString value = GetSubResourceLinkFromElement(element);
58 if (value.isNull()) 46 if (value.isNull())
59 return; 47 return;
48
60 // Get absolute URL. 49 // Get absolute URL.
61 GURL u = current_doc.completeURL(value); 50 GURL element_url = current_doc.completeURL(value);
51
52 // See whether to report this element as a subframe.
53 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
54 if (web_frame) {
55 bool frameContainsHtmlDoc = false;
yosin_UTC9 2015/11/24 01:32:41 nit: Can we move out this if-statement, L55-L70 as
Łukasz Anforowicz 2015/11/24 17:57:11 Done. For a moment I wondered if we should have G
56 if (web_frame->isWebLocalFrame()) {
57 WebDocument doc = web_frame->document();
58 frameContainsHtmlDoc = doc.isHTMLDocument() || doc.isXHTMLDocument();
59 } else {
60 // Cannot inspect contents of a remote frame, so we use a heuristic:
61 // Assume that <iframe> and <frame> elements contain a html document,
62 // and other elements (i.e. <object>) contain plugins or other resources.
63 // If the heuristic is wrong (i.e. the remote frame in <object> does
64 // contain an html document), then things will still work, but with the
65 // following caveats: 1) original frame content will be saved and 2) links
66 // in frame's html doc will not be rewritten to point to locally saved
67 // files.
68 frameContainsHtmlDoc = element.hasHTMLTagName("iframe") ||
69 element.hasHTMLTagName("frame");
70 }
71
72 if (frameContainsHtmlDoc) {
73 SavableSubframe subframe;
74 subframe.original_url = element_url;
75 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
76 result->subframes->push_back(subframe);
77 return;
78 }
79 }
80
62 // ignore invalid URL 81 // ignore invalid URL
63 if (!u.is_valid()) 82 if (!element_url.is_valid())
64 return; 83 return;
84
65 // Ignore those URLs which are not standard protocols. Because FTP 85 // Ignore those URLs which are not standard protocols. Because FTP
66 // protocol does no have cache mechanism, we will skip all 86 // protocol does no have cache mechanism, we will skip all
67 // sub-resources if they use FTP protocol. 87 // sub-resources if they use FTP protocol.
68 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) 88 if (!element_url.SchemeIsHTTPOrHTTPS() &&
89 !element_url.SchemeIs(url::kFileScheme))
69 return; 90 return;
70 91
71 result->resources_list->push_back(u); 92 result->resources_list->push_back(element_url);
72 } 93 }
73 94
74 } // namespace 95 } // namespace
75 96
76 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, 97 bool GetSavableResourceLinksForFrame(WebFrame* current_frame,
77 SavableResourcesResult* result, 98 SavableResourcesResult* result,
78 const char** savable_schemes) { 99 const char** savable_schemes) {
79 // Get current frame's URL. 100 // Get current frame's URL.
80 GURL current_frame_url = current_frame->document().url(); 101 GURL current_frame_url = current_frame->document().url();
81 102
(...skipping 23 matching lines...) Expand all
105 current_doc, 126 current_doc,
106 result); 127 result);
107 } 128 }
108 129
109 return true; 130 return true;
110 } 131 }
111 132
112 WebString GetSubResourceLinkFromElement(const WebElement& element) { 133 WebString GetSubResourceLinkFromElement(const WebElement& element) {
113 const char* attribute_name = NULL; 134 const char* attribute_name = NULL;
114 if (element.hasHTMLTagName("img") || 135 if (element.hasHTMLTagName("img") ||
136 element.hasHTMLTagName("frame") ||
137 element.hasHTMLTagName("iframe") ||
115 element.hasHTMLTagName("script")) { 138 element.hasHTMLTagName("script")) {
116 attribute_name = "src"; 139 attribute_name = "src";
117 } else if (element.hasHTMLTagName("input")) { 140 } else if (element.hasHTMLTagName("input")) {
118 const WebInputElement input = element.toConst<WebInputElement>(); 141 const WebInputElement input = element.toConst<WebInputElement>();
119 if (input.isImageButton()) { 142 if (input.isImageButton()) {
120 attribute_name = "src"; 143 attribute_name = "src";
121 } 144 }
122 } else if (element.hasHTMLTagName("body") || 145 } else if (element.hasHTMLTagName("body") ||
123 element.hasHTMLTagName("table") || 146 element.hasHTMLTagName("table") ||
124 element.hasHTMLTagName("tr") || 147 element.hasHTMLTagName("tr") ||
125 element.hasHTMLTagName("td")) { 148 element.hasHTMLTagName("td")) {
126 attribute_name = "background"; 149 attribute_name = "background";
127 } else if (element.hasHTMLTagName("blockquote") || 150 } else if (element.hasHTMLTagName("blockquote") ||
128 element.hasHTMLTagName("q") || 151 element.hasHTMLTagName("q") ||
129 element.hasHTMLTagName("del") || 152 element.hasHTMLTagName("del") ||
130 element.hasHTMLTagName("ins")) { 153 element.hasHTMLTagName("ins")) {
131 attribute_name = "cite"; 154 attribute_name = "cite";
132 } else if (element.hasHTMLTagName("object")) { 155 } else if (element.hasHTMLTagName("object")) {
133 // TODO(lukasza): When <object> contains a html document, it should be
134 // reported as a subframe, not as a savable resource (reporting as a
135 // savable resource works, but will save original html contents, not
136 // current html contents of the frame).
137 attribute_name = "data"; 156 attribute_name = "data";
138 } else if (element.hasHTMLTagName("link")) { 157 } else if (element.hasHTMLTagName("link")) {
139 // If the link element is not linked to css, ignore it. 158 // If the link element is not linked to css, ignore it.
140 if (base::LowerCaseEqualsASCII( 159 if (base::LowerCaseEqualsASCII(
141 base::StringPiece16(element.getAttribute("type")), "text/css") || 160 base::StringPiece16(element.getAttribute("type")), "text/css") ||
142 base::LowerCaseEqualsASCII( 161 base::LowerCaseEqualsASCII(
143 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) { 162 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) {
144 // TODO(jnd): Add support for extracting links of sub-resources which 163 // TODO(jnd): Add support for extracting links of sub-resources which
145 // are inside style-sheet such as @import, url(), etc. 164 // are inside style-sheet such as @import, url(), etc.
146 // See bug: http://b/issue?id=1111667. 165 // See bug: http://b/issue?id=1111667.
147 attribute_name = "href"; 166 attribute_name = "href";
148 } 167 }
149 } 168 }
150 if (!attribute_name) 169 if (!attribute_name)
151 return WebString(); 170 return WebString();
152 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); 171 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name));
153 // If value has content and not start with "javascript:" then return it, 172 // If value has content and not start with "javascript:" then return it,
154 // otherwise return NULL. 173 // otherwise return NULL.
155 if (!value.isNull() && !value.isEmpty() && 174 if (!value.isNull() && !value.isEmpty() &&
156 !base::StartsWith(value.utf8(), "javascript:", 175 !base::StartsWith(value.utf8(), "javascript:",
157 base::CompareCase::INSENSITIVE_ASCII)) 176 base::CompareCase::INSENSITIVE_ASCII))
158 return value; 177 return value;
159 178
160 return WebString(); 179 return WebString();
161 } 180 }
162 181
163 } // namespace content 182 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698