Index: chrome/renderer/safe_browsing/threat_dom_details.cc |
diff --git a/chrome/renderer/safe_browsing/threat_dom_details.cc b/chrome/renderer/safe_browsing/threat_dom_details.cc |
index 45e314523497be9fce453322cae6fb9fc47d7dc5..671e2bbdc91523cdfa6e7e9902204fe0e3042cfc 100644 |
--- a/chrome/renderer/safe_browsing/threat_dom_details.cc |
+++ b/chrome/renderer/safe_browsing/threat_dom_details.cc |
@@ -4,9 +4,14 @@ |
#include "chrome/renderer/safe_browsing/threat_dom_details.h" |
+#include <algorithm> |
#include <map> |
+#include <unordered_set> |
#include "base/compiler_specific.h" |
+#include "base/metrics/field_trial_params.h" |
+#include "base/strings/string_piece.h" |
+#include "base/strings/string_split.h" |
#include "base/strings/stringprintf.h" |
#include "components/safe_browsing/common/safebrowsing_messages.h" |
#include "content/public/renderer/render_frame.h" |
@@ -24,56 +29,174 @@ namespace safe_browsing { |
// maintain proper parent/child relationships. |
// They key is a WebNode from the DOM, which is basically a pointer so can be |
// copied into the map when inserting new elements. |
-// The values are pointers to IPC messages generated by ThreatDOMDetails. They |
-// are not owned by the map - ownership remains with the vector of resources |
-// collected by this class. |
-typedef std::map<blink::WebNode, SafeBrowsingHostMsg_ThreatDOMDetails_Node*> |
- ElementToNodeMap; |
+// The values are indices into the resource vector, and are used to retrieve IPC |
+// messages generated by ThreatDOMDetails. |
+using ElementToNodeMap = std::map<blink::WebNode, int>; |
+ |
+// This Feature specifies which non-resource HTML Elements to collect based on |
+// their tag and attributes. It's a single param containing a comma-separated |
+// list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will |
+// collect elements with tag "tag1" that have attribute "id" or "height" set, |
+// and elements of tag "tag2" if they have attribute "foo" set. All tag names |
+// and attributes should be lower case. |
+const base::Feature kThreatDomDetailsTagAndAttributeFeature{ |
+ "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT}; |
+ |
+// The name of the param containing the tags and attributes list. |
+const char kTagAndAttributeParamName[] = "tag_attribute_csv"; |
namespace { |
+// Predicate used to search |tag_and_attributes_list_| by tag_name. |
+class TagNameIs { |
+ public: |
+ explicit TagNameIs(const std::string& tag) : tag_(tag) {} |
+ bool operator()(const TagAndAttributesItem& tag_and_attribute) { |
+ return tag_ == tag_and_attribute.tag_name; |
+ } |
+ |
+ private: |
+ std::string tag_; |
+}; |
+ |
+void ParseTagAndAttributeParams( |
+ std::vector<TagAndAttributesItem>* tag_and_attributes_list) { |
+ DCHECK(tag_and_attributes_list); |
+ if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) { |
+ return; |
+ } |
+ tag_and_attributes_list->clear(); |
+ const std::string& tag_attribute_csv_param = |
+ base::GetFieldTrialParamValueByFeature( |
+ kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName); |
+ if (tag_attribute_csv_param.empty()) { |
+ return; |
+ } |
+ |
+ std::vector<std::string> split = |
+ base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE, |
+ base::SPLIT_WANT_NONEMPTY); |
+ // If we don't have the right number of pairs in the csv then don't bother |
+ // parsing further. |
+ if (split.size() % 2 != 0) { |
+ return; |
+ } |
+ for (size_t i = 0; i < split.size(); i += 2) { |
+ const std::string& tag_name = split[i]; |
+ const std::string& attribute = split[i + 1]; |
+ auto item_iter = |
+ std::find_if(tag_and_attributes_list->begin(), |
+ tag_and_attributes_list->end(), TagNameIs(tag_name)); |
+ if (item_iter == tag_and_attributes_list->end()) { |
+ TagAndAttributesItem item; |
+ item.tag_name = tag_name; |
+ item.attributes.push_back(attribute); |
+ tag_and_attributes_list->push_back(item); |
+ } else { |
+ item_iter->attributes.push_back(attribute); |
+ } |
+ } |
+ |
+ std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(), |
+ [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) { |
+ return a.tag_name < b.tag_name; |
+ }); |
+} |
+ |
+SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement( |
+ const blink::WebNode& element, |
+ const safe_browsing::ElementToNodeMap& element_to_node_map, |
+ std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { |
+ DCHECK(element_to_node_map.count(element) > 0); |
+ int resource_index = element_to_node_map.at(element); |
+ return &(resources->at(resource_index)); |
+} |
+ |
// Handler for the various HTML elements that we extract URLs from. |
void HandleElement( |
const blink::WebElement& element, |
- SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node, |
+ SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node, |
std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, |
safe_browsing::ElementToNodeMap* element_to_node_map) { |
- if (!element.hasAttribute("src")) |
- return; |
- |
// Retrieve the link and resolve the link in case it's relative. |
blink::WebURL full_url = |
element.document().completeURL(element.getAttribute("src")); |
const GURL& child_url = GURL(full_url); |
+ if (!child_url.is_empty() && child_url.is_valid()) { |
+ summary_node->children.push_back(child_url); |
+ } |
- // Add to the parent node. |
- parent_node->children.push_back(child_url); |
- |
- // Create the child node. |
- resources->push_back(SafeBrowsingHostMsg_ThreatDOMDetails_Node()); |
- SafeBrowsingHostMsg_ThreatDOMDetails_Node* child_node = &resources->back(); |
- child_node->url = child_url; |
- child_node->tag_name = element.tagName().utf8(); |
- child_node->parent = parent_node->url; |
+ SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node; |
+ child_node.url = child_url; |
+ child_node.tag_name = element.tagName().utf8(); |
+ child_node.parent = summary_node->url; |
// Update the ID mapping. First generate the ID for the current node. |
// Then, if its parent is available, set the current node's parent ID, and |
// also update the parent's children with the current node's ID. |
const int child_id = element_to_node_map->size() + 1; |
- child_node->node_id = child_id; |
- if (!element.parentNode().isNull()) { |
- auto parent_node_iter = element_to_node_map->find(element.parentNode()); |
- if (parent_node_iter != element_to_node_map->end()) { |
- child_node->parent_node_id = parent_node->node_id; |
+ child_node.node_id = child_id; |
+ blink::WebNode cur_parent_element = element.parentNode(); |
+ while (!cur_parent_element.isNull()) { |
+ if (element_to_node_map->count(cur_parent_element) > 0) { |
+ SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node = |
+ GetNodeForElement(cur_parent_element, *element_to_node_map, |
+ resources); |
+ child_node.parent_node_id = parent_node->node_id; |
parent_node->child_node_ids.push_back(child_id); |
+ |
+ // TODO(lpz): Consider also updating the URL-level parent/child mapping |
+ // here. Eg: child_node.parent=parent_node.url, and |
+ // parent_node.children.push_back(child_url). |
+ break; |
+ } else { |
+ // It's possible that the direct parent of this node wasn't handled, so it |
+ // isn't represented in |element_to_node_map|. Try walking up the |
+ // hierarchy to see if a parent further up was handled. |
+ cur_parent_element = cur_parent_element.parentNode(); |
} |
} |
- (*element_to_node_map)[element] = child_node; |
+ // Add the child node to the list of resources. |
+ resources->push_back(child_node); |
+ // .. and remember which index it was inserted at so we can look it up later. |
+ (*element_to_node_map)[element] = resources->size() - 1; |
} |
+bool ShouldHandleElement( |
+ const blink::WebElement& element, |
+ const std::vector<TagAndAttributesItem>& tag_and_attributes_list) { |
+ // Resources with a SRC are always handled. |
+ if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || |
+ element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) && |
+ element.hasAttribute("src")) { |
+ return true; |
+ } |
+ |
+ std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii()); |
+ const auto& tag_attribute_iter = |
+ std::find_if(tag_and_attributes_list.begin(), |
+ tag_and_attributes_list.end(), TagNameIs(tag_name_lower)); |
+ if (tag_attribute_iter == tag_and_attributes_list.end()) { |
+ return false; |
+ } |
+ |
+ const std::vector<std::string>& valid_attributes = |
+ tag_attribute_iter->attributes; |
+ for (const std::string& attribute : valid_attributes) { |
+ if (element.hasAttribute(blink::WebString::fromASCII(attribute))) { |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
} // namespace |
+TagAndAttributesItem::TagAndAttributesItem() {} |
+TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item) |
+ : tag_name(item.tag_name), attributes(item.attributes) {} |
+TagAndAttributesItem::~TagAndAttributesItem() {} |
+ |
// An upper limit on the number of nodes we collect. |
uint32_t ThreatDOMDetails::kMaxNodes = 500; |
@@ -85,7 +208,9 @@ ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { |
} |
ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame) |
- : content::RenderFrameObserver(render_frame) {} |
+ : content::RenderFrameObserver(render_frame) { |
+ ParseTagAndAttributeParams(&tag_and_attributes_list_); |
+} |
ThreatDOMDetails::~ThreatDOMDetails() {} |
@@ -124,8 +249,7 @@ void ThreatDOMDetails::ExtractResources( |
blink::WebElementCollection elements = document.all(); |
blink::WebElement element = elements.firstItem(); |
for (; !element.isNull(); element = elements.nextItem()) { |
- if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || |
- element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) { |
+ if (ShouldHandleElement(element, tag_and_attributes_list_)) { |
HandleElement(element, &details_node, resources, &element_to_node_map); |
if (resources->size() >= kMaxNodes) { |
// We have reached kMaxNodes, exit early. |