Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1498)

Side by Side Diff: chrome/renderer/safe_browsing/threat_dom_details.cc

Issue 2713233002: Update ThreatDOMDetails to be able to collect non-resource HTML Elements based on their attributes. (Closed)
Patch Set: Sync Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h"
6 6
7 #include <algorithm>
7 #include <map> 8 #include <map>
9 #include <unordered_set>
8 10
9 #include "base/compiler_specific.h" 11 #include "base/compiler_specific.h"
12 #include "base/metrics/field_trial_params.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_split.h"
10 #include "base/strings/stringprintf.h" 15 #include "base/strings/stringprintf.h"
11 #include "components/safe_browsing/common/safebrowsing_messages.h" 16 #include "components/safe_browsing/common/safebrowsing_messages.h"
12 #include "content/public/renderer/render_frame.h" 17 #include "content/public/renderer/render_frame.h"
13 #include "third_party/WebKit/public/platform/WebString.h" 18 #include "third_party/WebKit/public/platform/WebString.h"
14 #include "third_party/WebKit/public/web/WebDocument.h" 19 #include "third_party/WebKit/public/web/WebDocument.h"
15 #include "third_party/WebKit/public/web/WebElement.h" 20 #include "third_party/WebKit/public/web/WebElement.h"
16 #include "third_party/WebKit/public/web/WebElementCollection.h" 21 #include "third_party/WebKit/public/web/WebElementCollection.h"
17 #include "third_party/WebKit/public/web/WebFrame.h" 22 #include "third_party/WebKit/public/web/WebFrame.h"
18 #include "third_party/WebKit/public/web/WebLocalFrame.h" 23 #include "third_party/WebKit/public/web/WebLocalFrame.h"
19 24
20 namespace safe_browsing { 25 namespace safe_browsing {
21 26
22 // A map for keeping track of the identity of DOM Elements, used to generate 27 // A map for keeping track of the identity of DOM Elements, used to generate
23 // unique IDs for each element and lookup elements IDs by parent Element, to 28 // unique IDs for each element and lookup elements IDs by parent Element, to
24 // maintain proper parent/child relationships. 29 // maintain proper parent/child relationships.
25 // They key is a WebNode from the DOM, which is basically a pointer so can be 30 // They key is a WebNode from the DOM, which is basically a pointer so can be
26 // copied into the map when inserting new elements. 31 // copied into the map when inserting new elements.
27 // The values are pointers to IPC messages generated by ThreatDOMDetails. They 32 // The values are indices into the resource vector, and are used to retrieve IPC
28 // are not owned by the map - ownership remains with the vector of resources 33 // messages generated by ThreatDOMDetails.
29 // collected by this class. 34 using ElementToNodeMap = std::map<blink::WebNode, int>;
30 typedef std::map<blink::WebNode, SafeBrowsingHostMsg_ThreatDOMDetails_Node*> 35
31 ElementToNodeMap; 36 // This Feature specifies which non-resource HTML Elements to collect based on
37 // their tag and attributes. It's a single param containing a comma-separated
38 // list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will
39 // collect elements with tag "tag1" that have attribute "id" or "height" set,
40 // and elements of tag "tag2" if they have attribute "foo" set. All tag names
41 // and attributes should be lower case.
42 const base::Feature kThreatDomDetailsTagAndAttributeFeature{
43 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT};
44
45 // The name of the param containing the tags and attributes list.
46 const char kTagAndAttributeParamName[] = "tag_attribute_csv";
32 47
33 namespace { 48 namespace {
34 49
50 // Predicate used to search |tag_and_attributes_list_| by tag_name.
51 class TagNameIs {
52 public:
53 explicit TagNameIs(const std::string& tag) : tag_(tag) {}
54 bool operator()(const TagAndAttributesItem& tag_and_attribute) {
55 return tag_ == tag_and_attribute.tag_name;
56 }
57
58 private:
59 std::string tag_;
60 };
61
62 void ParseTagAndAttributeParams(
63 std::vector<TagAndAttributesItem>* tag_and_attributes_list) {
64 DCHECK(tag_and_attributes_list);
65 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) {
66 return;
67 }
68 tag_and_attributes_list->clear();
69 const std::string& tag_attribute_csv_param =
70 base::GetFieldTrialParamValueByFeature(
71 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName);
72 if (tag_attribute_csv_param.empty()) {
73 return;
74 }
75
76 std::vector<std::string> split =
77 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE,
78 base::SPLIT_WANT_NONEMPTY);
79 // If we don't have the right number of pairs in the csv then don't bother
80 // parsing further.
81 if (split.size() % 2 != 0) {
82 return;
83 }
84 for (size_t i = 0; i < split.size(); i += 2) {
85 const std::string& tag_name = split[i];
86 const std::string& attribute = split[i + 1];
87 auto item_iter =
88 std::find_if(tag_and_attributes_list->begin(),
89 tag_and_attributes_list->end(), TagNameIs(tag_name));
90 if (item_iter == tag_and_attributes_list->end()) {
91 TagAndAttributesItem item;
92 item.tag_name = tag_name;
93 item.attributes.push_back(attribute);
94 tag_and_attributes_list->push_back(item);
95 } else {
96 item_iter->attributes.push_back(attribute);
97 }
98 }
99
100 std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(),
101 [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) {
102 return a.tag_name < b.tag_name;
103 });
104 }
105
106 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement(
107 const blink::WebNode& element,
108 const safe_browsing::ElementToNodeMap& element_to_node_map,
109 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
110 DCHECK(element_to_node_map.count(element) > 0);
111 int resource_index = element_to_node_map.at(element);
112 return &(resources->at(resource_index));
113 }
114
35 // Handler for the various HTML elements that we extract URLs from. 115 // Handler for the various HTML elements that we extract URLs from.
36 void HandleElement( 116 void HandleElement(
37 const blink::WebElement& element, 117 const blink::WebElement& element,
38 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node, 118 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node,
39 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, 119 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources,
40 safe_browsing::ElementToNodeMap* element_to_node_map) { 120 safe_browsing::ElementToNodeMap* element_to_node_map) {
41 if (!element.hasAttribute("src"))
42 return;
43
44 // Retrieve the link and resolve the link in case it's relative. 121 // Retrieve the link and resolve the link in case it's relative.
45 blink::WebURL full_url = 122 blink::WebURL full_url =
46 element.document().completeURL(element.getAttribute("src")); 123 element.document().completeURL(element.getAttribute("src"));
47 124
48 const GURL& child_url = GURL(full_url); 125 const GURL& child_url = GURL(full_url);
126 if (!child_url.is_empty() && child_url.is_valid()) {
127 summary_node->children.push_back(child_url);
128 }
49 129
50 // Add to the parent node. 130 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node;
51 parent_node->children.push_back(child_url); 131 child_node.url = child_url;
52 132 child_node.tag_name = element.tagName().utf8();
53 // Create the child node. 133 child_node.parent = summary_node->url;
54 resources->push_back(SafeBrowsingHostMsg_ThreatDOMDetails_Node());
55 SafeBrowsingHostMsg_ThreatDOMDetails_Node* child_node = &resources->back();
56 child_node->url = child_url;
57 child_node->tag_name = element.tagName().utf8();
58 child_node->parent = parent_node->url;
59 134
60 // Update the ID mapping. First generate the ID for the current node. 135 // Update the ID mapping. First generate the ID for the current node.
61 // Then, if its parent is available, set the current node's parent ID, and 136 // Then, if its parent is available, set the current node's parent ID, and
62 // also update the parent's children with the current node's ID. 137 // also update the parent's children with the current node's ID.
63 const int child_id = element_to_node_map->size() + 1; 138 const int child_id = element_to_node_map->size() + 1;
64 child_node->node_id = child_id; 139 child_node.node_id = child_id;
65 if (!element.parentNode().isNull()) { 140 blink::WebNode cur_parent_element = element.parentNode();
66 auto parent_node_iter = element_to_node_map->find(element.parentNode()); 141 while (!cur_parent_element.isNull()) {
67 if (parent_node_iter != element_to_node_map->end()) { 142 if (element_to_node_map->count(cur_parent_element) > 0) {
68 child_node->parent_node_id = parent_node->node_id; 143 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node =
144 GetNodeForElement(cur_parent_element, *element_to_node_map,
145 resources);
146 child_node.parent_node_id = parent_node->node_id;
69 parent_node->child_node_ids.push_back(child_id); 147 parent_node->child_node_ids.push_back(child_id);
148
149 // TODO(lpz): Consider also updating the URL-level parent/child mapping
150 // here. Eg: child_node.parent=parent_node.url, and
151 // parent_node.children.push_back(child_url).
152 break;
153 } else {
154 // It's possible that the direct parent of this node wasn't handled, so it
155 // isn't represented in |element_to_node_map|. Try walking up the
156 // hierarchy to see if a parent further up was handled.
157 cur_parent_element = cur_parent_element.parentNode();
70 } 158 }
71 } 159 }
72 (*element_to_node_map)[element] = child_node; 160 // Add the child node to the list of resources.
161 resources->push_back(child_node);
162 // .. and remember which index it was inserted at so we can look it up later.
163 (*element_to_node_map)[element] = resources->size() - 1;
73 } 164 }
74 165
166 bool ShouldHandleElement(
167 const blink::WebElement& element,
168 const std::vector<TagAndAttributesItem>& tag_and_attributes_list) {
169 // Resources with a SRC are always handled.
170 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") ||
171 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) &&
172 element.hasAttribute("src")) {
173 return true;
174 }
175
176 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii());
177 const auto& tag_attribute_iter =
178 std::find_if(tag_and_attributes_list.begin(),
179 tag_and_attributes_list.end(), TagNameIs(tag_name_lower));
180 if (tag_attribute_iter == tag_and_attributes_list.end()) {
181 return false;
182 }
183
184 const std::vector<std::string>& valid_attributes =
185 tag_attribute_iter->attributes;
186 for (const std::string& attribute : valid_attributes) {
187 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) {
188 return true;
189 }
190 }
191 return false;
192 }
75 } // namespace 193 } // namespace
76 194
195 TagAndAttributesItem::TagAndAttributesItem() {}
196 TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item)
197 : tag_name(item.tag_name), attributes(item.attributes) {}
198 TagAndAttributesItem::~TagAndAttributesItem() {}
199
77 // An upper limit on the number of nodes we collect. 200 // An upper limit on the number of nodes we collect.
78 uint32_t ThreatDOMDetails::kMaxNodes = 500; 201 uint32_t ThreatDOMDetails::kMaxNodes = 500;
79 202
80 // static 203 // static
81 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { 204 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) {
82 // Private constructor and public static Create() method to facilitate 205 // Private constructor and public static Create() method to facilitate
83 // stubbing out this class for binary-size reduction purposes. 206 // stubbing out this class for binary-size reduction purposes.
84 return new ThreatDOMDetails(render_frame); 207 return new ThreatDOMDetails(render_frame);
85 } 208 }
86 209
87 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame) 210 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame)
88 : content::RenderFrameObserver(render_frame) {} 211 : content::RenderFrameObserver(render_frame) {
212 ParseTagAndAttributeParams(&tag_and_attributes_list_);
213 }
89 214
90 ThreatDOMDetails::~ThreatDOMDetails() {} 215 ThreatDOMDetails::~ThreatDOMDetails() {}
91 216
92 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) { 217 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) {
93 bool handled = true; 218 bool handled = true;
94 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message) 219 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message)
95 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails, 220 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails,
96 OnGetThreatDOMDetails) 221 OnGetThreatDOMDetails)
97 IPC_MESSAGE_UNHANDLED(handled = false) 222 IPC_MESSAGE_UNHANDLED(handled = false)
98 IPC_END_MESSAGE_MAP() 223 IPC_END_MESSAGE_MAP()
(...skipping 18 matching lines...) Expand all
117 if (document.isNull()) { 242 if (document.isNull()) {
118 // Nothing in this frame. Just report its URL. 243 // Nothing in this frame. Just report its URL.
119 resources->push_back(details_node); 244 resources->push_back(details_node);
120 return; 245 return;
121 } 246 }
122 247
123 ElementToNodeMap element_to_node_map; 248 ElementToNodeMap element_to_node_map;
124 blink::WebElementCollection elements = document.all(); 249 blink::WebElementCollection elements = document.all();
125 blink::WebElement element = elements.firstItem(); 250 blink::WebElement element = elements.firstItem();
126 for (; !element.isNull(); element = elements.nextItem()) { 251 for (; !element.isNull(); element = elements.nextItem()) {
127 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || 252 if (ShouldHandleElement(element, tag_and_attributes_list_)) {
128 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) {
129 HandleElement(element, &details_node, resources, &element_to_node_map); 253 HandleElement(element, &details_node, resources, &element_to_node_map);
130 if (resources->size() >= kMaxNodes) { 254 if (resources->size() >= kMaxNodes) {
131 // We have reached kMaxNodes, exit early. 255 // We have reached kMaxNodes, exit early.
132 resources->push_back(details_node); 256 resources->push_back(details_node);
133 return; 257 return;
134 } 258 }
135 } 259 }
136 } 260 }
137 resources->push_back(details_node); 261 resources->push_back(details_node);
138 } 262 }
139 263
140 void ThreatDOMDetails::OnDestruct() { 264 void ThreatDOMDetails::OnDestruct() {
141 delete this; 265 delete this;
142 } 266 }
143 267
144 } // namespace safe_browsing 268 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/threat_dom_details.h ('k') | chrome/renderer/safe_browsing/threat_dom_details_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698