Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Unified Diff: chrome/browser/safe_browsing/threat_details.cc

Issue 2837603002: Content API changes to improve DOM stitching in ThreatDetails code. (Closed)
Patch Set: Set output pointers correctly Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/threat_details.cc
diff --git a/chrome/browser/safe_browsing/threat_details.cc b/chrome/browser/safe_browsing/threat_details.cc
index 891651613248dd4c6fbdeb9902b93093edb83cfc..8d27d2e87c3af84852cc08bd523f39350acf25fc 100644
--- a/chrome/browser/safe_browsing/threat_details.cc
+++ b/chrome/browser/safe_browsing/threat_details.cc
@@ -22,6 +22,7 @@
#include "content/public/browser/navigation_controller.h"
#include "content/public/browser/navigation_entry.h"
#include "content/public/browser/render_frame_host.h"
+#include "content/public/browser/render_process_host.h"
#include "content/public/browser/web_contents.h"
#include "net/url_request/url_request_context_getter.h"
@@ -286,8 +287,9 @@ ClientSafeBrowsingReportRequest::Resource* ThreatDetails::AddUrl(
}
void ThreatDetails::AddDomElement(
+ const int process_id,
const int frame_tree_node_id,
- const std::string& frame_url,
+ const int other_frame_routing_id,
const int element_node_id,
const std::string& tagname,
const int parent_element_node_id,
@@ -314,12 +316,15 @@ void ThreatDetails::AddDomElement(
if (resource) {
cur_element->set_resource_id(resource->id());
- // For iframes, remember that this HTML Element represents an iframe with a
- // specific URL. Elements from a frame with this URL are children of this
- // element.
- if (is_frame &&
- !base::ContainsKey(iframe_src_to_element_map_, resource->url())) {
- iframe_src_to_element_map_[resource->url()] = cur_element;
+ // For iframes, lookup the frame tree node id of the render frame that
Charlie Reis 2017/05/05 21:03:06 nit: FrameTreeNode ID of the frame that
lpz 2017/05/10 14:21:08 Done.
+ // handled that iframe's content. This must be done on the UI thread, and
+ // will update a map of |element_key| to |frame_tree_node_id|. A second pass
+ // is done to update the |elements_| list using this mapping.
+ if (is_frame) {
+ BrowserThread::PostTask(
+ BrowserThread::UI, FROM_HERE,
+ base::Bind(&ThreatDetails::LookupOtherFrameId, this, element_key,
+ process_id, other_frame_routing_id));
Charlie Reis 2017/05/05 21:03:06 There's a lot of posting back and forth here. Is
lpz 2017/05/10 14:21:08 Applied suggestion
}
}
@@ -328,23 +333,11 @@ void ThreatDetails::AddDomElement(
HTMLElement* parent_element = nullptr;
if (parent_element_node_id == 0) {
// No parent indicates that this element is at the top of the current frame.
- // This frame could be a child of an iframe in another frame, or it could be
- // at the root of the whole page. If we have a frame URL then we can try to
- // map this element to its parent.
- if (!frame_url.empty()) {
- // First, remember that this element is at the top-level of a frame with
- // our frame URL.
- document_url_to_children_map_[frame_url].insert(cur_element->id());
-
- // Now check if the frame URL matches the src URL of an iframe elsewhere.
- // This means that we processed the parent iframe element earlier, so we
- // can add ourselves as a child of that iframe.
- // If no such iframe exists, it could be processed later, or this element
- // is in the top-level frame and truly has no parent.
- if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) {
- parent_element = iframe_src_to_element_map_[frame_url];
- }
- }
+ // Remember that this is a top-level element of the frame with the
+ // current |frame_tree_node_id|. If this element is inside an iframe, a
+ // second pass will insert this element as a child of its parent iframe.
+ frame_tree_id_to_children_map_[frame_tree_node_id].insert(
+ cur_element->id());
} else {
// We have a parent ID, so this element is just a child of something inside
// of our current frame. We can easily lookup our parent.
@@ -369,20 +362,6 @@ void ThreatDetails::AddDomElement(
parent_element->add_child_ids(cur_element->id());
}
}
-
- // Finally, we need to check if the current element is the parent of some
- // other elements that came in from another frame earlier. This only happens
- // if we are an iframe, and our src URL exists in
- // document_url_to_children_map_. If there is a match, then all of the
- // children in that map belong to us.
- if (is_frame && resource &&
- base::ContainsKey(document_url_to_children_map_, resource->url())) {
- const std::unordered_set<int>& child_ids =
- document_url_to_children_map_[resource->url()];
- for (const int child_id : child_ids) {
- cur_element->add_child_ids(child_id);
- }
- }
}
void ThreatDetails::StartCollection() {
@@ -456,12 +435,13 @@ void ThreatDetails::OnReceivedThreatDOMDetails(
// of our data structures (eg GetSerializedReport).
BrowserThread::PostTask(
BrowserThread::IO, FROM_HERE,
- base::BindOnce(&ThreatDetails::AddDOMDetails, this,
- sender->GetFrameTreeNodeId(),
- sender->GetLastCommittedURL(), params));
+ base::Bind(&ThreatDetails::AddDOMDetails, this,
+ sender->GetProcess()->GetID(), sender->GetFrameTreeNodeId(),
+ sender->GetLastCommittedURL(), params));
Charlie Reis 2017/05/05 21:03:06 From AddDOMDetails, it looks like params has all t
lpz 2017/05/10 14:21:08 Nice thanks for this. It seems to take care of the
}
void ThreatDetails::AddDOMDetails(
+ const int process_id,
const int frame_tree_node_id,
const GURL& frame_last_committed_url,
const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) {
@@ -482,22 +462,6 @@ void ThreatDetails::AddDOMDetails(
if (params.empty())
return;
- // Try to deduce the URL that the render frame was handling. First check if
- // the summary node from the renderer has a document URL. If not, try looking
- // at the last committed URL of the frame.
- GURL frame_url;
- if (IsReportableUrl(params.back().url)) {
- frame_url = params.back().url;
- } else if (IsReportableUrl(frame_last_committed_url)) {
- frame_url = frame_last_committed_url;
- }
-
- // If we can't figure out which URL the frame was rendering then we don't know
- // where these elements belong in the hierarchy. The DOM will be ambiguous.
- if (frame_url.is_empty()) {
- ambiguous_dom_ = true;
- }
-
// Add the urls from the DOM to |resources_|. The renderer could be sending
// bogus messages, so limit the number of nodes we accept.
// Also update |elements_| with the DOM structure.
@@ -510,13 +474,25 @@ void ThreatDetails::AddDOMDetails(
}
// Check for a tag_name to avoid adding the summary node to the DOM.
if (!node.tag_name.empty()) {
- AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id,
- node.tag_name, node.parent_node_id, node.attributes,
- resource);
+ AddDomElement(process_id, frame_tree_node_id, node.other_frame_routing_id,
+ node.node_id, node.tag_name, node.parent_node_id,
+ node.attributes, resource);
}
}
}
+void ThreatDetails::LookupOtherFrameId(const std::string& element_key,
+ const int process_id,
+ const int other_frame_routing_id) {
+ DCHECK_CURRENTLY_ON(BrowserThread::UI);
+ int other_frame_tree_node_id =
+ content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId(
+ process_id, other_frame_routing_id);
+ if (other_frame_tree_node_id == content::RenderFrameHost::kNoFrameTreeNodeId)
+ ambiguous_dom_ = true;
+ iframe_key_to_frame_tree_id_map_[element_key] = other_frame_tree_node_id;
+}
+
// Called from the SB Service on the IO thread, after the user has
// closed the tab, or clicked proceed or goback. Since the user needs
// to take an action, we expect this to be called after
@@ -525,6 +501,22 @@ void ThreatDetails::AddDOMDetails(
void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
+ // Do a second pass over the elements and update iframe elements to have
+ // references to their children. Children will have been received from a
Charlie Reis 2017/05/05 21:03:07 s/will/may/? (Or does this not apply to same-proc
lpz 2017/05/10 14:21:08 Done - this code doesn't do anything special for s
+ // different renderer than the iframe element.
+ for (auto& element_pair : elements_) {
+ const std::string& element_key = element_pair.first;
+ HTMLElement* element = element_pair.second.get();
+ if (element->tag() == "IFRAME" || element->tag() == "FRAME") {
+ int frame_tree_id_of_iframe_renderer =
+ iframe_key_to_frame_tree_id_map_[element_key];
Charlie Reis 2017/05/05 21:03:07 This doesn't look safe. We're reading it from the
lpz 2017/05/10 14:21:08 Your suggestion should cover this. In general, tho
+ const std::unordered_set<int>& child_ids =
+ frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer];
+ for (const int child_id : child_ids) {
+ element->add_child_ids(child_id);
+ }
+ }
+ }
did_proceed_ = did_proceed;
num_visits_ = num_visit;
std::vector<GURL> urls;

Powered by Google App Engine
This is Rietveld 408576698