Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: content/renderer/savable_resources.cc

Issue 1442463002: Save-Page-As-Complete-HTML: Even better handling of <object> elements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@objects-fix-for-complete-html
Patch Set: Rebasing... Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/savable_resources.h" 5 #include "content/renderer/savable_resources.h"
6 6
7 #include <set> 7 #include <set>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 16 matching lines...) Expand all
27 using blink::WebInputElement; 27 using blink::WebInputElement;
28 using blink::WebLocalFrame; 28 using blink::WebLocalFrame;
29 using blink::WebNode; 29 using blink::WebNode;
30 using blink::WebString; 30 using blink::WebString;
31 using blink::WebVector; 31 using blink::WebVector;
32 using blink::WebView; 32 using blink::WebView;
33 33
34 namespace content { 34 namespace content {
35 namespace { 35 namespace {
36 36
37 // Get all savable resource links from current element. One element might 37 // If present and valid, then push the link associated with |element|
38 // have more than one resource link. It is possible to have some links 38 // into either SavableResourcesResult::subframes or
39 // in one CSS stylesheet. 39 // SavableResourcesResult::resources_list.
40 void GetSavableResourceLinkForElement( 40 void GetSavableResourceLinkForElement(
41 const WebElement& element, 41 const WebElement& element,
42 const WebDocument& current_doc, 42 const WebDocument& current_doc,
43 SavableResourcesResult* result) { 43 SavableResourcesResult* result) {
44 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) {
45 GURL complete_url = current_doc.completeURL(element.getAttribute("src"));
46 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
47
48 SavableSubframe subframe;
49 subframe.original_url = complete_url;
50 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
51
52 result->subframes->push_back(subframe);
53 return;
54 }
55
56 // Check whether the node has sub resource URL or not. 44 // Check whether the node has sub resource URL or not.
57 WebString value = GetSubResourceLinkFromElement(element); 45 WebString value = GetSubResourceLinkFromElement(element);
58 if (value.isNull()) 46 if (value.isNull())
59 return; 47 return;
48
60 // Get absolute URL. 49 // Get absolute URL.
61 GURL u = current_doc.completeURL(value); 50 GURL u = current_doc.completeURL(value);
nasko 2015/11/23 23:38:19 nit: I find single letter variables to be hard to
Łukasz Anforowicz 2015/11/24 00:02:06 Done. (using "element_url" as the variable name -
51
52 // See whether to report this element as a subframe.
53 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
54 if (web_frame) {
55 bool frameContainsHtmlDoc = false;
56 if (web_frame->isWebLocalFrame()) {
57 WebDocument doc = web_frame->document();
58 frameContainsHtmlDoc = doc.isHTMLDocument() || doc.isXHTMLDocument();
59 } else {
60 // Cannot inspect contents of a remote frame, so we use a heuristic:
61 // Assume that <iframe> and <frame> elements contain a html document,
62 // and other elements (i.e. <object>) contain plugins or other resources.
63 // If the heuristic is wrong (i.e. the remote frame in <object> does
64 // contain an html document), then things will still work, but with the
65 // following caveats: 1) original frame content will be saved and 2) links
66 // in frame's html doc will not be rewritten to point to locally saved
67 // files.
68 frameContainsHtmlDoc = element.hasHTMLTagName("iframe") ||
69 element.hasHTMLTagName("frame");
70 }
71
72 if (frameContainsHtmlDoc) {
73 SavableSubframe subframe;
74 subframe.original_url = u;
75 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
76 result->subframes->push_back(subframe);
77 return;
78 }
79 }
80
62 // ignore invalid URL 81 // ignore invalid URL
63 if (!u.is_valid()) 82 if (!u.is_valid())
64 return; 83 return;
84
65 // Ignore those URLs which are not standard protocols. Because FTP 85 // Ignore those URLs which are not standard protocols. Because FTP
66 // protocol does no have cache mechanism, we will skip all 86 // protocol does no have cache mechanism, we will skip all
67 // sub-resources if they use FTP protocol. 87 // sub-resources if they use FTP protocol.
68 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) 88 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme))
69 return; 89 return;
70 90
71 result->resources_list->push_back(u); 91 result->resources_list->push_back(u);
72 } 92 }
73 93
74 } // namespace 94 } // namespace
(...skipping 30 matching lines...) Expand all
105 current_doc, 125 current_doc,
106 result); 126 result);
107 } 127 }
108 128
109 return true; 129 return true;
110 } 130 }
111 131
112 WebString GetSubResourceLinkFromElement(const WebElement& element) { 132 WebString GetSubResourceLinkFromElement(const WebElement& element) {
113 const char* attribute_name = NULL; 133 const char* attribute_name = NULL;
114 if (element.hasHTMLTagName("img") || 134 if (element.hasHTMLTagName("img") ||
135 element.hasHTMLTagName("frame") ||
136 element.hasHTMLTagName("iframe") ||
115 element.hasHTMLTagName("script")) { 137 element.hasHTMLTagName("script")) {
116 attribute_name = "src"; 138 attribute_name = "src";
117 } else if (element.hasHTMLTagName("input")) { 139 } else if (element.hasHTMLTagName("input")) {
118 const WebInputElement input = element.toConst<WebInputElement>(); 140 const WebInputElement input = element.toConst<WebInputElement>();
119 if (input.isImageButton()) { 141 if (input.isImageButton()) {
120 attribute_name = "src"; 142 attribute_name = "src";
121 } 143 }
122 } else if (element.hasHTMLTagName("body") || 144 } else if (element.hasHTMLTagName("body") ||
123 element.hasHTMLTagName("table") || 145 element.hasHTMLTagName("table") ||
124 element.hasHTMLTagName("tr") || 146 element.hasHTMLTagName("tr") ||
125 element.hasHTMLTagName("td")) { 147 element.hasHTMLTagName("td")) {
126 attribute_name = "background"; 148 attribute_name = "background";
127 } else if (element.hasHTMLTagName("blockquote") || 149 } else if (element.hasHTMLTagName("blockquote") ||
128 element.hasHTMLTagName("q") || 150 element.hasHTMLTagName("q") ||
129 element.hasHTMLTagName("del") || 151 element.hasHTMLTagName("del") ||
130 element.hasHTMLTagName("ins")) { 152 element.hasHTMLTagName("ins")) {
131 attribute_name = "cite"; 153 attribute_name = "cite";
132 } else if (element.hasHTMLTagName("object")) { 154 } else if (element.hasHTMLTagName("object")) {
133 // TODO(lukasza): When <object> contains a html document, it should be
134 // reported as a subframe, not as a savable resource (reporting as a
135 // savable resource works, but will save original html contents, not
136 // current html contents of the frame).
137 attribute_name = "data"; 155 attribute_name = "data";
138 } else if (element.hasHTMLTagName("link")) { 156 } else if (element.hasHTMLTagName("link")) {
139 // If the link element is not linked to css, ignore it. 157 // If the link element is not linked to css, ignore it.
140 if (base::LowerCaseEqualsASCII( 158 if (base::LowerCaseEqualsASCII(
141 base::StringPiece16(element.getAttribute("type")), "text/css") || 159 base::StringPiece16(element.getAttribute("type")), "text/css") ||
142 base::LowerCaseEqualsASCII( 160 base::LowerCaseEqualsASCII(
143 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) { 161 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) {
144 // TODO(jnd): Add support for extracting links of sub-resources which 162 // TODO(jnd): Add support for extracting links of sub-resources which
145 // are inside style-sheet such as @import, url(), etc. 163 // are inside style-sheet such as @import, url(), etc.
146 // See bug: http://b/issue?id=1111667. 164 // See bug: http://b/issue?id=1111667.
147 attribute_name = "href"; 165 attribute_name = "href";
148 } 166 }
149 } 167 }
150 if (!attribute_name) 168 if (!attribute_name)
151 return WebString(); 169 return WebString();
152 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); 170 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name));
153 // If value has content and not start with "javascript:" then return it, 171 // If value has content and not start with "javascript:" then return it,
154 // otherwise return NULL. 172 // otherwise return NULL.
155 if (!value.isNull() && !value.isEmpty() && 173 if (!value.isNull() && !value.isEmpty() &&
156 !base::StartsWith(value.utf8(), "javascript:", 174 !base::StartsWith(value.utf8(), "javascript:",
157 base::CompareCase::INSENSITIVE_ASCII)) 175 base::CompareCase::INSENSITIVE_ASCII))
158 return value; 176 return value;
159 177
160 return WebString(); 178 return WebString();
161 } 179 }
162 180
163 } // namespace content 181 } // namespace content
OLDNEW
« no previous file with comments | « chrome/test/data/save_page/frames-objects.htm ('k') | third_party/WebKit/Source/web/WebFrame.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698