OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" | 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" |
6 | 6 |
7 #include <map> | 7 #include <map> |
8 #include <unordered_set> | |
8 | 9 |
9 #include "base/compiler_specific.h" | 10 #include "base/compiler_specific.h" |
11 #include "base/metrics/field_trial_params.h" | |
12 #include "base/strings/string_piece.h" | |
13 #include "base/strings/string_split.h" | |
10 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
11 #include "components/safe_browsing/common/safebrowsing_messages.h" | 15 #include "components/safe_browsing/common/safebrowsing_messages.h" |
12 #include "content/public/renderer/render_frame.h" | 16 #include "content/public/renderer/render_frame.h" |
13 #include "third_party/WebKit/public/platform/WebString.h" | 17 #include "third_party/WebKit/public/platform/WebString.h" |
14 #include "third_party/WebKit/public/web/WebDocument.h" | 18 #include "third_party/WebKit/public/web/WebDocument.h" |
15 #include "third_party/WebKit/public/web/WebElement.h" | 19 #include "third_party/WebKit/public/web/WebElement.h" |
16 #include "third_party/WebKit/public/web/WebElementCollection.h" | 20 #include "third_party/WebKit/public/web/WebElementCollection.h" |
17 #include "third_party/WebKit/public/web/WebFrame.h" | 21 #include "third_party/WebKit/public/web/WebFrame.h" |
18 #include "third_party/WebKit/public/web/WebLocalFrame.h" | 22 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
19 | 23 |
20 namespace safe_browsing { | 24 namespace safe_browsing { |
21 | 25 |
22 // A map for keeping track of the identity of DOM Elements, used to generate | 26 // A map for keeping track of the identity of DOM Elements, used to generate |
23 // unique IDs for each element and lookup elements IDs by parent Element, to | 27 // unique IDs for each element and lookup elements IDs by parent Element, to |
24 // maintain proper parent/child relationships. | 28 // maintain proper parent/child relationships. |
25 // They key is a WebNode from the DOM, which is basically a pointer so can be | 29 // They key is a WebNode from the DOM, which is basically a pointer so can be |
26 // copied into the map when inserting new elements. | 30 // copied into the map when inserting new elements. |
27 // The values are pointers to IPC messages generated by ThreatDOMDetails. They | 31 // The values are indices into the resource vector, and are used to retrieve IPC |
28 // are not owned by the map - ownership remains with the vector of resources | 32 // messages generated by ThreatDOMDetails. |
29 // collected by this class. | 33 using ElementToNodeMap = std::map<blink::WebNode, int>; |
30 typedef std::map<blink::WebNode, SafeBrowsingHostMsg_ThreatDOMDetails_Node*> | 34 |
31 ElementToNodeMap; | 35 // This Feature specifies which non-resource HTML Elements to collect based on |
36 // their tag and attributes. It's a single param containing a comma-separated | |
37 // list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will | |
Nathan Parker
2017/02/27 23:00:36
Should they be lowercased?
lpz
2017/02/28 22:53:28
Yes - the code and the comment both mention that.
Nathan Parker
2017/02/28 23:27:10
SGTM
| |
38 // collect elements with tag "tag1" that have attribute "id" or "height" set, | |
39 // and elements of tag "tag2" if they have attribute "foo" set. All tag names | |
40 // and attributes should be lower case. | |
41 const base::Feature kThreatDomDetailsTagAndAttributeFeature{ | |
42 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT}; | |
43 | |
44 // The name of the param containing the tags and attributes list. | |
45 const char kTagAndAttributeParamName[] = "tag_attribute_csv"; | |
46 | |
47 // A map containing the attributes of interest for some tag. The key is a tag | |
48 // name and the value is a collection of attribute names. If a tag-attribute | |
49 // pair exists in this map, then it should be collected by ThreatDOMDetails. | |
50 using TagToAttributesMap = std::map<std::string, std::vector<std::string>>; | |
32 | 51 |
33 namespace { | 52 namespace { |
34 | 53 |
54 void ParseTagAndAttributeParams(TagToAttributesMap* tag_to_attributes_map) { | |
55 DCHECK(tag_to_attributes_map); | |
vakh (use Gerrit instead)
2017/02/27 17:24:35
optional and nit: this will fail on line 59 specta
Nathan Parker
2017/02/27 23:00:36
...It does add a little value since it'd wouldn't
lpz
2017/02/28 22:53:28
Ack, leaving the dcheck for doc'ing purposes
| |
56 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) { | |
57 return; | |
58 } | |
59 tag_to_attributes_map->clear(); | |
60 const std::string& tag_attribute_csv_param = | |
61 base::GetFieldTrialParamValueByFeature( | |
62 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName); | |
63 if (tag_attribute_csv_param.empty()) { | |
64 return; | |
65 } | |
66 | |
67 std::vector<std::string> split = | |
68 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE, | |
69 base::SPLIT_WANT_NONEMPTY); | |
70 // If we don't have the right number of pairs in the csv then don't bother | |
71 // parsing further. | |
72 if (split.size() % 2 != 0) { | |
73 return; | |
74 } | |
75 for (size_t i = 0; i < split.size(); i += 2) { | |
76 (*tag_to_attributes_map)[split[i]].push_back(split[i + 1]); | |
77 } | |
78 } | |
79 | |
80 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement( | |
81 const blink::WebNode& element, | |
82 const safe_browsing::ElementToNodeMap& element_to_node_map, | |
83 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { | |
84 DCHECK(element_to_node_map.count(element) > 0); | |
85 int resource_index = element_to_node_map.at(element); | |
86 return &(resources->at(resource_index)); | |
87 } | |
88 | |
35 // Handler for the various HTML elements that we extract URLs from. | 89 // Handler for the various HTML elements that we extract URLs from. |
36 void HandleElement( | 90 void HandleElement( |
37 const blink::WebElement& element, | 91 const blink::WebElement& element, |
38 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node, | 92 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node, |
39 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, | 93 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, |
40 safe_browsing::ElementToNodeMap* element_to_node_map) { | 94 safe_browsing::ElementToNodeMap* element_to_node_map) { |
41 if (!element.hasAttribute("src")) | |
42 return; | |
43 | |
44 // Retrieve the link and resolve the link in case it's relative. | 95 // Retrieve the link and resolve the link in case it's relative. |
45 blink::WebURL full_url = | 96 blink::WebURL full_url = |
46 element.document().completeURL(element.getAttribute("src")); | 97 element.document().completeURL(element.getAttribute("src")); |
Nathan Parker
2017/02/27 23:00:36
What does getAttritribute("src") do if there is no
lpz
2017/02/28 22:53:28
It behaves nicely - returns an empty object (url o
| |
47 | 98 |
48 const GURL& child_url = GURL(full_url); | 99 const GURL& child_url = GURL(full_url); |
100 if (!child_url.is_empty() && child_url.is_valid()) { | |
101 summary_node->children.push_back(child_url); | |
102 } | |
49 | 103 |
50 // Add to the parent node. | 104 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node; |
51 parent_node->children.push_back(child_url); | 105 child_node.url = child_url; |
52 | 106 child_node.tag_name = element.tagName().utf8(); |
53 // Create the child node. | 107 child_node.parent = summary_node->url; |
54 resources->push_back(SafeBrowsingHostMsg_ThreatDOMDetails_Node()); | |
55 SafeBrowsingHostMsg_ThreatDOMDetails_Node* child_node = &resources->back(); | |
56 child_node->url = child_url; | |
57 child_node->tag_name = element.tagName().utf8(); | |
58 child_node->parent = parent_node->url; | |
59 | 108 |
60 // Update the ID mapping. First generate the ID for the current node. | 109 // Update the ID mapping. First generate the ID for the current node. |
61 // Then, if its parent is available, set the current node's parent ID, and | 110 // Then, if its parent is available, set the current node's parent ID, and |
62 // also update the parent's children with the current node's ID. | 111 // also update the parent's children with the current node's ID. |
63 const int child_id = element_to_node_map->size() + 1; | 112 const int child_id = element_to_node_map->size() + 1; |
64 child_node->node_id = child_id; | 113 child_node.node_id = child_id; |
65 if (!element.parentNode().isNull()) { | 114 blink::WebNode cur_parent_element = element.parentNode(); |
66 auto parent_node_iter = element_to_node_map->find(element.parentNode()); | 115 while (!cur_parent_element.isNull()) { |
67 if (parent_node_iter != element_to_node_map->end()) { | 116 if (element_to_node_map->count(cur_parent_element) > 0) { |
68 child_node->parent_node_id = parent_node->node_id; | 117 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node = |
118 GetNodeForElement(cur_parent_element, *element_to_node_map, | |
119 resources); | |
120 child_node.parent_node_id = parent_node->node_id; | |
69 parent_node->child_node_ids.push_back(child_id); | 121 parent_node->child_node_ids.push_back(child_id); |
122 | |
123 // TODO(lpz): Consider also updating the URL-level parent/child mapping | |
124 // here. Eg: child_node.parent=parent_node.url, and | |
125 // parent_node.children.push_back(child_url). | |
126 break; | |
127 } else { | |
128 // It's possible that the direct parent of this node wasn't handled, so it | |
Nathan Parker
2017/02/27 23:00:36
What's the use case for this? Will this link a sub
lpz
2017/02/28 22:53:28
imagine something like:
<div foo>
<div style-stu
| |
129 // isn't represented in |element_to_node_map|. Try walking up the | |
130 // hierarchy to see if a parent further up was handled. | |
131 cur_parent_element = cur_parent_element.parentNode(); | |
70 } | 132 } |
71 } | 133 } |
72 (*element_to_node_map)[element] = child_node; | 134 // Add the child node to the list of resources. |
135 resources->push_back(child_node); | |
136 // .. and remember which index it was inserted at so we can look it up later. | |
137 (*element_to_node_map)[element] = resources->size() - 1; | |
73 } | 138 } |
74 | 139 |
140 bool ShouldHandleElement(const blink::WebElement& element, | |
141 const TagToAttributesMap& tag_to_attributes_map) { | |
142 // Resources with a SRC are always handled. | |
143 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | |
Nathan Parker
2017/02/27 23:00:36
An aside: Are there any other tags we might want b
lpz
2017/02/28 22:53:28
Not sure of the answer but, at a glance, there are
| |
144 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) && | |
145 element.hasAttribute("src")) { | |
146 return true; | |
147 } | |
148 | |
149 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii()); | |
150 const auto& tag_attribute_iter = tag_to_attributes_map.find(tag_name_lower); | |
151 if (tag_attribute_iter == tag_to_attributes_map.end()) { | |
152 return false; | |
153 } | |
154 | |
155 const std::vector<std::string>& valid_attributes = tag_attribute_iter->second; | |
156 for (const std::string& attribute : valid_attributes) { | |
157 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) { | |
158 return true; | |
159 } | |
160 } | |
161 return false; | |
162 } | |
75 } // namespace | 163 } // namespace |
76 | 164 |
77 // An upper limit on the number of nodes we collect. | 165 // An upper limit on the number of nodes we collect. |
78 uint32_t ThreatDOMDetails::kMaxNodes = 500; | 166 uint32_t ThreatDOMDetails::kMaxNodes = 500; |
79 | 167 |
80 // static | 168 // static |
81 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { | 169 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { |
82 // Private constructor and public static Create() method to facilitate | 170 // Private constructor and public static Create() method to facilitate |
83 // stubbing out this class for binary-size reduction purposes. | 171 // stubbing out this class for binary-size reduction purposes. |
84 return new ThreatDOMDetails(render_frame); | 172 return new ThreatDOMDetails(render_frame); |
(...skipping 28 matching lines...) Expand all Loading... | |
113 return; | 201 return; |
114 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node; | 202 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node; |
115 blink::WebDocument document = frame->document(); | 203 blink::WebDocument document = frame->document(); |
116 details_node.url = GURL(document.url()); | 204 details_node.url = GURL(document.url()); |
117 if (document.isNull()) { | 205 if (document.isNull()) { |
118 // Nothing in this frame. Just report its URL. | 206 // Nothing in this frame. Just report its URL. |
119 resources->push_back(details_node); | 207 resources->push_back(details_node); |
120 return; | 208 return; |
121 } | 209 } |
122 | 210 |
211 TagToAttributesMap tag_to_attributes_map; | |
212 ParseTagAndAttributeParams(&tag_to_attributes_map); | |
213 | |
123 ElementToNodeMap element_to_node_map; | 214 ElementToNodeMap element_to_node_map; |
124 blink::WebElementCollection elements = document.all(); | 215 blink::WebElementCollection elements = document.all(); |
125 blink::WebElement element = elements.firstItem(); | 216 blink::WebElement element = elements.firstItem(); |
126 for (; !element.isNull(); element = elements.nextItem()) { | 217 for (; !element.isNull(); element = elements.nextItem()) { |
127 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | 218 if (ShouldHandleElement(element, tag_to_attributes_map)) { |
128 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) { | |
129 HandleElement(element, &details_node, resources, &element_to_node_map); | 219 HandleElement(element, &details_node, resources, &element_to_node_map); |
130 if (resources->size() >= kMaxNodes) { | 220 if (resources->size() >= kMaxNodes) { |
131 // We have reached kMaxNodes, exit early. | 221 // We have reached kMaxNodes, exit early. |
132 resources->push_back(details_node); | 222 resources->push_back(details_node); |
133 return; | 223 return; |
134 } | 224 } |
135 } | 225 } |
136 } | 226 } |
137 resources->push_back(details_node); | 227 resources->push_back(details_node); |
138 } | 228 } |
139 | 229 |
140 void ThreatDOMDetails::OnDestruct() { | 230 void ThreatDOMDetails::OnDestruct() { |
141 delete this; | 231 delete this; |
142 } | 232 } |
143 | 233 |
144 } // namespace safe_browsing | 234 } // namespace safe_browsing |
OLD | NEW |