OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" | 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" |
6 | 6 |
| 7 #include <algorithm> |
7 #include <map> | 8 #include <map> |
| 9 #include <unordered_set> |
8 | 10 |
9 #include "base/compiler_specific.h" | 11 #include "base/compiler_specific.h" |
| 12 #include "base/metrics/field_trial_params.h" |
| 13 #include "base/strings/string_piece.h" |
| 14 #include "base/strings/string_split.h" |
10 #include "base/strings/stringprintf.h" | 15 #include "base/strings/stringprintf.h" |
11 #include "components/safe_browsing/common/safebrowsing_messages.h" | 16 #include "components/safe_browsing/common/safebrowsing_messages.h" |
12 #include "content/public/renderer/render_frame.h" | 17 #include "content/public/renderer/render_frame.h" |
13 #include "third_party/WebKit/public/platform/WebString.h" | 18 #include "third_party/WebKit/public/platform/WebString.h" |
14 #include "third_party/WebKit/public/web/WebDocument.h" | 19 #include "third_party/WebKit/public/web/WebDocument.h" |
15 #include "third_party/WebKit/public/web/WebElement.h" | 20 #include "third_party/WebKit/public/web/WebElement.h" |
16 #include "third_party/WebKit/public/web/WebElementCollection.h" | 21 #include "third_party/WebKit/public/web/WebElementCollection.h" |
17 #include "third_party/WebKit/public/web/WebFrame.h" | 22 #include "third_party/WebKit/public/web/WebFrame.h" |
18 #include "third_party/WebKit/public/web/WebLocalFrame.h" | 23 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
19 | 24 |
20 namespace safe_browsing { | 25 namespace safe_browsing { |
21 | 26 |
22 // A map for keeping track of the identity of DOM Elements, used to generate | 27 // A map for keeping track of the identity of DOM Elements, used to generate |
23 // unique IDs for each element and lookup elements IDs by parent Element, to | 28 // unique IDs for each element and lookup elements IDs by parent Element, to |
24 // maintain proper parent/child relationships. | 29 // maintain proper parent/child relationships. |
25 // They key is a WebNode from the DOM, which is basically a pointer so can be | 30 // They key is a WebNode from the DOM, which is basically a pointer so can be |
26 // copied into the map when inserting new elements. | 31 // copied into the map when inserting new elements. |
27 // The values are pointers to IPC messages generated by ThreatDOMDetails. They | 32 // The values are indices into the resource vector, and are used to retrieve IPC |
28 // are not owned by the map - ownership remains with the vector of resources | 33 // messages generated by ThreatDOMDetails. |
29 // collected by this class. | 34 using ElementToNodeMap = std::map<blink::WebNode, int>; |
30 typedef std::map<blink::WebNode, SafeBrowsingHostMsg_ThreatDOMDetails_Node*> | 35 |
31 ElementToNodeMap; | 36 // This Feature specifies which non-resource HTML Elements to collect based on |
| 37 // their tag and attributes. It's a single param containing a comma-separated |
| 38 // list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will |
| 39 // collect elements with tag "tag1" that have attribute "id" or "height" set, |
| 40 // and elements of tag "tag2" if they have attribute "foo" set. All tag names |
| 41 // and attributes should be lower case. |
| 42 const base::Feature kThreatDomDetailsTagAndAttributeFeature{ |
| 43 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT}; |
| 44 |
| 45 // The name of the param containing the tags and attributes list. |
| 46 const char kTagAndAttributeParamName[] = "tag_attribute_csv"; |
32 | 47 |
33 namespace { | 48 namespace { |
34 | 49 |
| 50 // Predicate used to search |tag_and_attributes_list_| by tag_name. |
| 51 class TagNameIs { |
| 52 public: |
| 53 explicit TagNameIs(const std::string& tag) : tag_(tag) {} |
| 54 bool operator()(const TagAndAttributesItem& tag_and_attribute) { |
| 55 return tag_ == tag_and_attribute.tag_name; |
| 56 } |
| 57 |
| 58 private: |
| 59 std::string tag_; |
| 60 }; |
| 61 |
| 62 void ParseTagAndAttributeParams( |
| 63 std::vector<TagAndAttributesItem>* tag_and_attributes_list) { |
| 64 DCHECK(tag_and_attributes_list); |
| 65 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) { |
| 66 return; |
| 67 } |
| 68 tag_and_attributes_list->clear(); |
| 69 const std::string& tag_attribute_csv_param = |
| 70 base::GetFieldTrialParamValueByFeature( |
| 71 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName); |
| 72 if (tag_attribute_csv_param.empty()) { |
| 73 return; |
| 74 } |
| 75 |
| 76 std::vector<std::string> split = |
| 77 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE, |
| 78 base::SPLIT_WANT_NONEMPTY); |
| 79 // If we don't have the right number of pairs in the csv then don't bother |
| 80 // parsing further. |
| 81 if (split.size() % 2 != 0) { |
| 82 return; |
| 83 } |
| 84 for (size_t i = 0; i < split.size(); i += 2) { |
| 85 const std::string& tag_name = split[i]; |
| 86 const std::string& attribute = split[i + 1]; |
| 87 auto item_iter = |
| 88 std::find_if(tag_and_attributes_list->begin(), |
| 89 tag_and_attributes_list->end(), TagNameIs(tag_name)); |
| 90 if (item_iter == tag_and_attributes_list->end()) { |
| 91 TagAndAttributesItem item; |
| 92 item.tag_name = tag_name; |
| 93 item.attributes.push_back(attribute); |
| 94 tag_and_attributes_list->push_back(item); |
| 95 } else { |
| 96 item_iter->attributes.push_back(attribute); |
| 97 } |
| 98 } |
| 99 |
| 100 std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(), |
| 101 [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) { |
| 102 return a.tag_name < b.tag_name; |
| 103 }); |
| 104 } |
| 105 |
| 106 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement( |
| 107 const blink::WebNode& element, |
| 108 const safe_browsing::ElementToNodeMap& element_to_node_map, |
| 109 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { |
| 110 DCHECK(element_to_node_map.count(element) > 0); |
| 111 int resource_index = element_to_node_map.at(element); |
| 112 return &(resources->at(resource_index)); |
| 113 } |
| 114 |
35 // Handler for the various HTML elements that we extract URLs from. | 115 // Handler for the various HTML elements that we extract URLs from. |
36 void HandleElement( | 116 void HandleElement( |
37 const blink::WebElement& element, | 117 const blink::WebElement& element, |
38 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node, | 118 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node, |
39 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, | 119 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, |
40 safe_browsing::ElementToNodeMap* element_to_node_map) { | 120 safe_browsing::ElementToNodeMap* element_to_node_map) { |
41 if (!element.hasAttribute("src")) | |
42 return; | |
43 | |
44 // Retrieve the link and resolve the link in case it's relative. | 121 // Retrieve the link and resolve the link in case it's relative. |
45 blink::WebURL full_url = | 122 blink::WebURL full_url = |
46 element.document().completeURL(element.getAttribute("src")); | 123 element.document().completeURL(element.getAttribute("src")); |
47 | 124 |
48 const GURL& child_url = GURL(full_url); | 125 const GURL& child_url = GURL(full_url); |
| 126 if (!child_url.is_empty() && child_url.is_valid()) { |
| 127 summary_node->children.push_back(child_url); |
| 128 } |
49 | 129 |
50 // Add to the parent node. | 130 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node; |
51 parent_node->children.push_back(child_url); | 131 child_node.url = child_url; |
52 | 132 child_node.tag_name = element.tagName().utf8(); |
53 // Create the child node. | 133 child_node.parent = summary_node->url; |
54 resources->push_back(SafeBrowsingHostMsg_ThreatDOMDetails_Node()); | |
55 SafeBrowsingHostMsg_ThreatDOMDetails_Node* child_node = &resources->back(); | |
56 child_node->url = child_url; | |
57 child_node->tag_name = element.tagName().utf8(); | |
58 child_node->parent = parent_node->url; | |
59 | 134 |
60 // Update the ID mapping. First generate the ID for the current node. | 135 // Update the ID mapping. First generate the ID for the current node. |
61 // Then, if its parent is available, set the current node's parent ID, and | 136 // Then, if its parent is available, set the current node's parent ID, and |
62 // also update the parent's children with the current node's ID. | 137 // also update the parent's children with the current node's ID. |
63 const int child_id = element_to_node_map->size() + 1; | 138 const int child_id = element_to_node_map->size() + 1; |
64 child_node->node_id = child_id; | 139 child_node.node_id = child_id; |
65 if (!element.parentNode().isNull()) { | 140 blink::WebNode cur_parent_element = element.parentNode(); |
66 auto parent_node_iter = element_to_node_map->find(element.parentNode()); | 141 while (!cur_parent_element.isNull()) { |
67 if (parent_node_iter != element_to_node_map->end()) { | 142 if (element_to_node_map->count(cur_parent_element) > 0) { |
68 child_node->parent_node_id = parent_node->node_id; | 143 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node = |
| 144 GetNodeForElement(cur_parent_element, *element_to_node_map, |
| 145 resources); |
| 146 child_node.parent_node_id = parent_node->node_id; |
69 parent_node->child_node_ids.push_back(child_id); | 147 parent_node->child_node_ids.push_back(child_id); |
| 148 |
| 149 // TODO(lpz): Consider also updating the URL-level parent/child mapping |
| 150 // here. Eg: child_node.parent=parent_node.url, and |
| 151 // parent_node.children.push_back(child_url). |
| 152 break; |
| 153 } else { |
| 154 // It's possible that the direct parent of this node wasn't handled, so it |
| 155 // isn't represented in |element_to_node_map|. Try walking up the |
| 156 // hierarchy to see if a parent further up was handled. |
| 157 cur_parent_element = cur_parent_element.parentNode(); |
70 } | 158 } |
71 } | 159 } |
72 (*element_to_node_map)[element] = child_node; | 160 // Add the child node to the list of resources. |
| 161 resources->push_back(child_node); |
| 162 // .. and remember which index it was inserted at so we can look it up later. |
| 163 (*element_to_node_map)[element] = resources->size() - 1; |
73 } | 164 } |
74 | 165 |
| 166 bool ShouldHandleElement( |
| 167 const blink::WebElement& element, |
| 168 const std::vector<TagAndAttributesItem>& tag_and_attributes_list) { |
| 169 // Resources with a SRC are always handled. |
| 170 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || |
| 171 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) && |
| 172 element.hasAttribute("src")) { |
| 173 return true; |
| 174 } |
| 175 |
| 176 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii()); |
| 177 const auto& tag_attribute_iter = |
| 178 std::find_if(tag_and_attributes_list.begin(), |
| 179 tag_and_attributes_list.end(), TagNameIs(tag_name_lower)); |
| 180 if (tag_attribute_iter == tag_and_attributes_list.end()) { |
| 181 return false; |
| 182 } |
| 183 |
| 184 const std::vector<std::string>& valid_attributes = |
| 185 tag_attribute_iter->attributes; |
| 186 for (const std::string& attribute : valid_attributes) { |
| 187 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) { |
| 188 return true; |
| 189 } |
| 190 } |
| 191 return false; |
| 192 } |
75 } // namespace | 193 } // namespace |
76 | 194 |
| 195 TagAndAttributesItem::TagAndAttributesItem() {} |
| 196 TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item) |
| 197 : tag_name(item.tag_name), attributes(item.attributes) {} |
| 198 TagAndAttributesItem::~TagAndAttributesItem() {} |
| 199 |
77 // An upper limit on the number of nodes we collect. | 200 // An upper limit on the number of nodes we collect. |
78 uint32_t ThreatDOMDetails::kMaxNodes = 500; | 201 uint32_t ThreatDOMDetails::kMaxNodes = 500; |
79 | 202 |
80 // static | 203 // static |
81 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { | 204 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { |
82 // Private constructor and public static Create() method to facilitate | 205 // Private constructor and public static Create() method to facilitate |
83 // stubbing out this class for binary-size reduction purposes. | 206 // stubbing out this class for binary-size reduction purposes. |
84 return new ThreatDOMDetails(render_frame); | 207 return new ThreatDOMDetails(render_frame); |
85 } | 208 } |
86 | 209 |
87 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame) | 210 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame) |
88 : content::RenderFrameObserver(render_frame) {} | 211 : content::RenderFrameObserver(render_frame) { |
| 212 ParseTagAndAttributeParams(&tag_and_attributes_list_); |
| 213 } |
89 | 214 |
90 ThreatDOMDetails::~ThreatDOMDetails() {} | 215 ThreatDOMDetails::~ThreatDOMDetails() {} |
91 | 216 |
92 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) { | 217 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) { |
93 bool handled = true; | 218 bool handled = true; |
94 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message) | 219 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message) |
95 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails, | 220 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails, |
96 OnGetThreatDOMDetails) | 221 OnGetThreatDOMDetails) |
97 IPC_MESSAGE_UNHANDLED(handled = false) | 222 IPC_MESSAGE_UNHANDLED(handled = false) |
98 IPC_END_MESSAGE_MAP() | 223 IPC_END_MESSAGE_MAP() |
(...skipping 18 matching lines...) Expand all Loading... |
117 if (document.isNull()) { | 242 if (document.isNull()) { |
118 // Nothing in this frame. Just report its URL. | 243 // Nothing in this frame. Just report its URL. |
119 resources->push_back(details_node); | 244 resources->push_back(details_node); |
120 return; | 245 return; |
121 } | 246 } |
122 | 247 |
123 ElementToNodeMap element_to_node_map; | 248 ElementToNodeMap element_to_node_map; |
124 blink::WebElementCollection elements = document.all(); | 249 blink::WebElementCollection elements = document.all(); |
125 blink::WebElement element = elements.firstItem(); | 250 blink::WebElement element = elements.firstItem(); |
126 for (; !element.isNull(); element = elements.nextItem()) { | 251 for (; !element.isNull(); element = elements.nextItem()) { |
127 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | 252 if (ShouldHandleElement(element, tag_and_attributes_list_)) { |
128 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) { | |
129 HandleElement(element, &details_node, resources, &element_to_node_map); | 253 HandleElement(element, &details_node, resources, &element_to_node_map); |
130 if (resources->size() >= kMaxNodes) { | 254 if (resources->size() >= kMaxNodes) { |
131 // We have reached kMaxNodes, exit early. | 255 // We have reached kMaxNodes, exit early. |
132 resources->push_back(details_node); | 256 resources->push_back(details_node); |
133 return; | 257 return; |
134 } | 258 } |
135 } | 259 } |
136 } | 260 } |
137 resources->push_back(details_node); | 261 resources->push_back(details_node); |
138 } | 262 } |
139 | 263 |
140 void ThreatDOMDetails::OnDestruct() { | 264 void ThreatDOMDetails::OnDestruct() { |
141 delete this; | 265 delete this; |
142 } | 266 } |
143 | 267 |
144 } // namespace safe_browsing | 268 } // namespace safe_browsing |
OLD | NEW |