Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2633)

Unified Diff: content/renderer/savable_resources.cc

Issue 1442463002: Save-Page-As-Complete-HTML: Even better handling of <object> elements. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@objects-fix-for-complete-html
Patch Set: s/\<u\>/element_url/g in savable_resources.cc Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/renderer/savable_resources.cc
diff --git a/content/renderer/savable_resources.cc b/content/renderer/savable_resources.cc
index 4c62c7cda77b5cc0a57532860e66d5116a03b5c5..79ad23dcf5bc47323ddf97f8a53c0e4bf76bfe9d 100644
--- a/content/renderer/savable_resources.cc
+++ b/content/renderer/savable_resources.cc
@@ -34,41 +34,62 @@ using blink::WebView;
namespace content {
namespace {
-// Get all savable resource links from current element. One element might
-// have more than one resource link. It is possible to have some links
-// in one CSS stylesheet.
+// If present and valid, then push the link associated with |element|
+// into either SavableResourcesResult::subframes or
+// SavableResourcesResult::resources_list.
void GetSavableResourceLinkForElement(
const WebElement& element,
const WebDocument& current_doc,
SavableResourcesResult* result) {
- if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame")) {
- GURL complete_url = current_doc.completeURL(element.getAttribute("src"));
- WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
-
- SavableSubframe subframe;
- subframe.original_url = complete_url;
- subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
-
- result->subframes->push_back(subframe);
- return;
- }
-
// Check whether the node has sub resource URL or not.
WebString value = GetSubResourceLinkFromElement(element);
if (value.isNull())
return;
+
// Get absolute URL.
- GURL u = current_doc.completeURL(value);
+ GURL element_url = current_doc.completeURL(value);
+
+ // See whether to report this element as a subframe.
+ WebFrame* web_frame = WebFrame::fromFrameOwnerElement(element);
+ if (web_frame) {
+ bool frameContainsHtmlDoc = false;
yosin_UTC9 2015/11/24 01:32:41 nit: Can we move out this if-statement, L55-L70 as
Łukasz Anforowicz 2015/11/24 17:57:11 Done. For a moment I wondered if we should have G
+ if (web_frame->isWebLocalFrame()) {
+ WebDocument doc = web_frame->document();
+ frameContainsHtmlDoc = doc.isHTMLDocument() || doc.isXHTMLDocument();
+ } else {
+ // Cannot inspect contents of a remote frame, so we use a heuristic:
+ // Assume that <iframe> and <frame> elements contain a html document,
+ // and other elements (i.e. <object>) contain plugins or other resources.
+ // If the heuristic is wrong (i.e. the remote frame in <object> does
+ // contain an html document), then things will still work, but with the
+ // following caveats: 1) original frame content will be saved and 2) links
+ // in frame's html doc will not be rewritten to point to locally saved
+ // files.
+ frameContainsHtmlDoc = element.hasHTMLTagName("iframe") ||
+ element.hasHTMLTagName("frame");
+ }
+
+ if (frameContainsHtmlDoc) {
+ SavableSubframe subframe;
+ subframe.original_url = element_url;
+ subframe.routing_id = GetRoutingIdForFrameOrProxy(web_frame);
+ result->subframes->push_back(subframe);
+ return;
+ }
+ }
+
// ignore invalid URL
- if (!u.is_valid())
+ if (!element_url.is_valid())
return;
+
// Ignore those URLs which are not standard protocols. Because FTP
// protocol does no have cache mechanism, we will skip all
// sub-resources if they use FTP protocol.
- if (!u.SchemeIsHTTPOrHTTPS() && !u.SchemeIs(url::kFileScheme))
+ if (!element_url.SchemeIsHTTPOrHTTPS() &&
+ !element_url.SchemeIs(url::kFileScheme))
return;
- result->resources_list->push_back(u);
+ result->resources_list->push_back(element_url);
}
} // namespace
@@ -112,6 +133,8 @@ bool GetSavableResourceLinksForFrame(WebFrame* current_frame,
WebString GetSubResourceLinkFromElement(const WebElement& element) {
const char* attribute_name = NULL;
if (element.hasHTMLTagName("img") ||
+ element.hasHTMLTagName("frame") ||
+ element.hasHTMLTagName("iframe") ||
element.hasHTMLTagName("script")) {
attribute_name = "src";
} else if (element.hasHTMLTagName("input")) {
@@ -130,10 +153,6 @@ WebString GetSubResourceLinkFromElement(const WebElement& element) {
element.hasHTMLTagName("ins")) {
attribute_name = "cite";
} else if (element.hasHTMLTagName("object")) {
- // TODO(lukasza): When <object> contains a html document, it should be
- // reported as a subframe, not as a savable resource (reporting as a
- // savable resource works, but will save original html contents, not
- // current html contents of the frame).
attribute_name = "data";
} else if (element.hasHTMLTagName("link")) {
// If the link element is not linked to css, ignore it.

Powered by Google App Engine
This is Rietveld 408576698