Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3028)

Side by Side Diff: content/renderer/savable_resources.cc

Issue 1442463002: Save-Page-As-Complete-HTML: Even better handling of <object> elements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@objects-fix-for-complete-html
Patch Set: Fixing a silly build issue... :-/ Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/renderer/savable_resources.h" 5 #include "content/renderer/savable_resources.h"
6 6
7 #include <set> 7 #include <set>
8 8
9 #include "base/compiler_specific.h" 9 #include "base/compiler_specific.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 16 matching lines...) Expand all
27 using blink::WebInputElement; 27 using blink::WebInputElement;
28 using blink::WebLocalFrame; 28 using blink::WebLocalFrame;
29 using blink::WebNode; 29 using blink::WebNode;
30 using blink::WebString; 30 using blink::WebString;
31 using blink::WebVector; 31 using blink::WebVector;
32 using blink::WebView; 32 using blink::WebView;
33 33
34 namespace content { 34 namespace content {
35 namespace { 35 namespace {
36 36
37 // Get all savable resource links from current element. One element might 37 // Returns |true| if |web_frame| contains (or should be assumed to contain)
38 // have more than one resource link. It is possible to have some links 38 // a html document.
39 // in one CSS stylesheet. 39 bool DoesFrameContainHtmlDocument(const WebFrame& web_frame,
40 const WebElement& element) {
41 if (web_frame.isWebLocalFrame()) {
42 WebDocument doc = web_frame.document();
43 return doc.isHTMLDocument() || doc.isXHTMLDocument();
44 }
45
46 // Cannot inspect contents of a remote frame, so we use a heuristic:
47 // Assume that <iframe> and <frame> elements contain a html document,
48 // and other elements (i.e. <object>) contain plugins or other resources.
49 // If the heuristic is wrong (i.e. the remote frame in <object> does
50 // contain an html document), then things will still work, but with the
51 // following caveats: 1) original frame content will be saved and 2) links
52 // in frame's html doc will not be rewritten to point to locally saved
53 // files.
54 return element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame");
55 }
56
57 // If present and valid, then push the link associated with |element|
58 // into either SavableResourcesResult::subframes or
59 // SavableResourcesResult::resources_list.
40 void GetSavableResourceLinkForElement( 60 void GetSavableResourceLinkForElement(
41 const WebElement& element, 61 const WebElement& element,
42 const WebDocument& current_doc, 62 const WebDocument& current_doc,
43 SavableResourcesResult* result) { 63 SavableResourcesResult* result) {
44 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) { 64 // Check whether the node has sub resource URL or not.
45 GURL complete_url = current_doc.completeURL(element.getAttribute("src")); 65 WebString value = GetSubResourceLinkFromElement(element);
46 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element); 66 if (value.isNull())
67 return;
47 68
69 // Get absolute URL.
70 GURL element_url = current_doc.completeURL(value);
71
72 // See whether to report this element as a subframe.
73 WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
74 if (web_frame && DoesFrameContainHtmlDocument(*web_frame, element)) {
48 SavableSubframe subframe; 75 SavableSubframe subframe;
49 subframe.original_url = complete_url; 76 subframe.original_url = element_url;
50 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame); 77 subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
51
52 result->subframes->push_back(subframe); 78 result->subframes->push_back(subframe);
53 return; 79 return;
54 } 80 }
55 81
56 // Check whether the node has sub resource URL or not. 82 // Ignore invalid URL.
57 WebString value = GetSubResourceLinkFromElement(element); 83 if (!element_url.is_valid())
58 if (value.isNull())
59 return; 84 return;
60 // Get absolute URL. 85
61 GURL u = current_doc.completeURL(value);
62 // ignore invalid URL
63 if (!u.is_valid())
64 return;
65 // Ignore those URLs which are not standard protocols. Because FTP 86 // Ignore those URLs which are not standard protocols. Because FTP
66 // protocol does no have cache mechanism, we will skip all 87 // protocol does no have cache mechanism, we will skip all
67 // sub-resources if they use FTP protocol. 88 // sub-resources if they use FTP protocol.
68 if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme)) 89 if (!element_url.SchemeIsHTTPOrHTTPS() &&
90 !element_url.SchemeIs(url::kFileScheme))
69 return; 91 return;
70 92
71 result->resources_list->push_back(u); 93 result->resources_list->push_back(element_url);
72 } 94 }
73 95
74 } // namespace 96 } // namespace
75 97
76 bool GetSavableResourceLinksForFrame(WebFrame* current_frame, 98 bool GetSavableResourceLinksForFrame(WebFrame* current_frame,
77 SavableResourcesResult* result, 99 SavableResourcesResult* result,
78 const char** savable_schemes) { 100 const char** savable_schemes) {
79 // Get current frame's URL. 101 // Get current frame's URL.
80 GURL current_frame_url = current_frame->document().url(); 102 GURL current_frame_url = current_frame->document().url();
81 103
(...skipping 23 matching lines...) Expand all
105 current_doc, 127 current_doc,
106 result); 128 result);
107 } 129 }
108 130
109 return true; 131 return true;
110 } 132 }
111 133
112 WebString GetSubResourceLinkFromElement(const WebElement& element) { 134 WebString GetSubResourceLinkFromElement(const WebElement& element) {
113 const char* attribute_name = NULL; 135 const char* attribute_name = NULL;
114 if (element.hasHTMLTagName("img") || 136 if (element.hasHTMLTagName("img") ||
137 element.hasHTMLTagName("frame") ||
138 element.hasHTMLTagName("iframe") ||
115 element.hasHTMLTagName("script")) { 139 element.hasHTMLTagName("script")) {
116 attribute_name = "src"; 140 attribute_name = "src";
117 } else if (element.hasHTMLTagName("input")) { 141 } else if (element.hasHTMLTagName("input")) {
118 const WebInputElement input = element.toConst<WebInputElement>(); 142 const WebInputElement input = element.toConst<WebInputElement>();
119 if (input.isImageButton()) { 143 if (input.isImageButton()) {
120 attribute_name = "src"; 144 attribute_name = "src";
121 } 145 }
122 } else if (element.hasHTMLTagName("body") || 146 } else if (element.hasHTMLTagName("body") ||
123 element.hasHTMLTagName("table") || 147 element.hasHTMLTagName("table") ||
124 element.hasHTMLTagName("tr") || 148 element.hasHTMLTagName("tr") ||
125 element.hasHTMLTagName("td")) { 149 element.hasHTMLTagName("td")) {
126 attribute_name = "background"; 150 attribute_name = "background";
127 } else if (element.hasHTMLTagName("blockquote") || 151 } else if (element.hasHTMLTagName("blockquote") ||
128 element.hasHTMLTagName("q") || 152 element.hasHTMLTagName("q") ||
129 element.hasHTMLTagName("del") || 153 element.hasHTMLTagName("del") ||
130 element.hasHTMLTagName("ins")) { 154 element.hasHTMLTagName("ins")) {
131 attribute_name = "cite"; 155 attribute_name = "cite";
132 } else if (element.hasHTMLTagName("object")) { 156 } else if (element.hasHTMLTagName("object")) {
133 // TODO(lukasza): When <object> contains a html document, it should be
134 // reported as a subframe, not as a savable resource (reporting as a
135 // savable resource works, but will save original html contents, not
136 // current html contents of the frame).
137 attribute_name = "data"; 157 attribute_name = "data";
138 } else if (element.hasHTMLTagName("link")) { 158 } else if (element.hasHTMLTagName("link")) {
139 // If the link element is not linked to css, ignore it. 159 // If the link element is not linked to css, ignore it.
140 if (base::LowerCaseEqualsASCII( 160 if (base::LowerCaseEqualsASCII(
141 base::StringPiece16(element.getAttribute("type")), "text/css") || 161 base::StringPiece16(element.getAttribute("type")), "text/css") ||
142 base::LowerCaseEqualsASCII( 162 base::LowerCaseEqualsASCII(
143 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) { 163 base::StringPiece16(element.getAttribute("rel")), "stylesheet")) {
144 // TODO(jnd): Add support for extracting links of sub-resources which 164 // TODO(jnd): Add support for extracting links of sub-resources which
145 // are inside style-sheet such as @import, url(), etc. 165 // are inside style-sheet such as @import, url(), etc.
146 // See bug: http://b/issue?id=1111667. 166 // See bug: http://b/issue?id=1111667.
147 attribute_name = "href"; 167 attribute_name = "href";
148 } 168 }
149 } 169 }
150 if (!attribute_name) 170 if (!attribute_name)
151 return WebString(); 171 return WebString();
152 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name)); 172 WebString value = element.getAttribute(WebString::fromUTF8(attribute_name));
153 // If value has content and not start with "javascript:" then return it, 173 // If value has content and not start with "javascript:" then return it,
154 // otherwise return NULL. 174 // otherwise return NULL.
155 if (!value.isNull() && !value.isEmpty() && 175 if (!value.isNull() && !value.isEmpty() &&
156 !base::StartsWith(value.utf8(), "javascript:", 176 !base::StartsWith(value.utf8(), "javascript:",
157 base::CompareCase::INSENSITIVE_ASCII)) 177 base::CompareCase::INSENSITIVE_ASCII))
158 return value; 178 return value;
159 179
160 return WebString(); 180 return WebString();
161 } 181 }
162 182
163 } // namespace content 183 } // namespace content
OLDNEW
« no previous file with comments | « chrome/test/data/save_page/frames-objects.htm ('k') | third_party/WebKit/Source/web/WebFrame.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698