Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" | 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" |
| 6 | 6 |
| 7 #include <map> | 7 #include <map> |
| 8 #include <unordered_set> | |
| 8 | 9 |
| 9 #include "base/compiler_specific.h" | 10 #include "base/compiler_specific.h" |
| 11 #include "base/metrics/field_trial_params.h" | |
| 12 #include "base/strings/string_piece.h" | |
| 13 #include "base/strings/string_split.h" | |
| 10 #include "base/strings/stringprintf.h" | 14 #include "base/strings/stringprintf.h" |
| 11 #include "components/safe_browsing/common/safebrowsing_messages.h" | 15 #include "components/safe_browsing/common/safebrowsing_messages.h" |
| 12 #include "content/public/renderer/render_frame.h" | 16 #include "content/public/renderer/render_frame.h" |
| 13 #include "third_party/WebKit/public/platform/WebString.h" | 17 #include "third_party/WebKit/public/platform/WebString.h" |
| 14 #include "third_party/WebKit/public/web/WebDocument.h" | 18 #include "third_party/WebKit/public/web/WebDocument.h" |
| 15 #include "third_party/WebKit/public/web/WebElement.h" | 19 #include "third_party/WebKit/public/web/WebElement.h" |
| 16 #include "third_party/WebKit/public/web/WebElementCollection.h" | 20 #include "third_party/WebKit/public/web/WebElementCollection.h" |
| 17 #include "third_party/WebKit/public/web/WebFrame.h" | 21 #include "third_party/WebKit/public/web/WebFrame.h" |
| 18 #include "third_party/WebKit/public/web/WebLocalFrame.h" | 22 #include "third_party/WebKit/public/web/WebLocalFrame.h" |
| 19 | 23 |
| 20 namespace safe_browsing { | 24 namespace safe_browsing { |
| 21 | 25 |
| 22 // A map for keeping track of the identity of DOM Elements, used to generate | 26 // A map for keeping track of the identity of DOM Elements, used to generate |
| 23 // unique IDs for each element and lookup elements IDs by parent Element, to | 27 // unique IDs for each element and lookup elements IDs by parent Element, to |
| 24 // maintain proper parent/child relationships. | 28 // maintain proper parent/child relationships. |
| 25 // They key is a WebNode from the DOM, which is basically a pointer so can be | 29 // They key is a WebNode from the DOM, which is basically a pointer so can be |
| 26 // copied into the map when inserting new elements. | 30 // copied into the map when inserting new elements. |
| 27 // The values are pointers to IPC messages generated by ThreatDOMDetails. They | 31 // The values are indices into the resource vector, and are used to retrieve IPC |
| 28 // are not owned by the map - ownership remains with the vector of resources | 32 // messages generated by ThreatDOMDetails. |
| 29 // collected by this class. | 33 using ElementToNodeMap = std::map<blink::WebNode, int>; |
| 30 typedef std::map<blink::WebNode, SafeBrowsingHostMsg_ThreatDOMDetails_Node*> | 34 |
| 31 ElementToNodeMap; | 35 // This Feature specifies which HTML Elements to collect based on their tag and |
|
Jialiu Lin
2017/02/25 00:46:18
In addition to "iframe", "frame", "embed" and "scr
lpz
2017/02/27 15:46:43
clarified that it's only for "non-resource" elemen
| |
| 36 // attributes. It's a single param containing a comma-separated list of pairs. | |
| 37 // For example: "tag1,id,tag1,height,tag2,foo" - this will collect elements with | |
| 38 // tag "tag1" that have attribute "id" or "height" set, and elements of tag | |
| 39 // "tag2" if they have attribute "foo" set. | |
| 40 // All tag names and attributes should be lower case. | |
| 41 const base::Feature kThreatDomDetailsTagAndAttributeFeature{ | |
| 42 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT}; | |
| 43 | |
| 44 // The name of the param containing the tags and attributes list. | |
| 45 const char kTagAndAttributeParamName[] = "tag_attribute_csv"; | |
| 46 | |
| 47 // A map containing the attributes of interest for some tag. The key is a tag | |
| 48 // name and the value is a collection of attribute names. If a tag-attribute | |
| 49 // pair exists in this map, then it should be collected by ThreatDOMDetails. | |
| 50 using TagToAttributesMap = | |
| 51 std::map<std::string, std::unordered_set<std::string>>; | |
|
vakh (use Gerrit instead)
2017/02/24 19:08:38
You might want to consider using:
std::map<std::st
lpz
2017/02/27 15:46:43
Thanks - good point about using a vector, I don't
vakh (use Gerrit instead)
2017/02/27 17:24:35
The advantage is the same: cache locality (big win
lpz
2017/02/28 22:53:28
Thanks Varun, PTAL.
Re: lifetime, yes I believe t
vakh (use Gerrit instead)
2017/02/28 23:00:04
Acknowledged. Thanks for evaluating both approache
| |
| 32 | 52 |
| 33 namespace { | 53 namespace { |
| 34 | 54 |
| 55 void ParseTagAndAttributeParams(TagToAttributesMap* tag_to_attributes_map) { | |
| 56 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) { | |
| 57 return; | |
| 58 } | |
| 59 if (!tag_to_attributes_map) { | |
|
vakh (use Gerrit instead)
2017/02/24 19:08:38
Would this condition mask a real problem?
It can o
lpz
2017/02/27 15:46:43
Yes, switched to a dcheck
| |
| 60 return; | |
| 61 } | |
| 62 tag_to_attributes_map->clear(); | |
| 63 const std::string& tag_attribute_csv_param = | |
| 64 base::GetFieldTrialParamValueByFeature( | |
| 65 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName); | |
| 66 if (tag_attribute_csv_param.empty()) { | |
| 67 return; | |
| 68 } | |
| 69 | |
| 70 std::vector<std::string> split = | |
| 71 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE, | |
| 72 base::SPLIT_WANT_NONEMPTY); | |
| 73 for (size_t i = 0; i < split.size(); i += 2) { | |
|
Jialiu Lin
2017/02/25 00:46:18
Maybe do a DCHECK if split.size() is an even numbe
vakh (use Gerrit instead)
2017/02/25 00:48:07
Good idea, but if you do this, might as well do th
lpz
2017/02/27 15:46:43
Done.
| |
| 74 if (i == split.size() - 1) { | |
| 75 // We're at the end of the vector but don't have enough elements for | |
| 76 // a pair, exit now and ignore this last tag. | |
| 77 break; | |
| 78 } | |
| 79 (*tag_to_attributes_map)[split[i]].insert(split[i + 1]); | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement( | |
| 84 const blink::WebNode& element, | |
| 85 const safe_browsing::ElementToNodeMap& element_to_node_map, | |
| 86 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { | |
| 87 DCHECK(element_to_node_map.count(element) > 0); | |
| 88 int resource_index = element_to_node_map.at(element); | |
| 89 return &(resources->at(resource_index)); | |
| 90 } | |
| 91 | |
| 35 // Handler for the various HTML elements that we extract URLs from. | 92 // Handler for the various HTML elements that we extract URLs from. |
| 36 void HandleElement( | 93 void HandleElement( |
| 37 const blink::WebElement& element, | 94 const blink::WebElement& element, |
| 38 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node, | 95 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node, |
| 39 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, | 96 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, |
| 40 safe_browsing::ElementToNodeMap* element_to_node_map) { | 97 safe_browsing::ElementToNodeMap* element_to_node_map) { |
| 41 if (!element.hasAttribute("src")) | |
| 42 return; | |
| 43 | |
| 44 // Retrieve the link and resolve the link in case it's relative. | 98 // Retrieve the link and resolve the link in case it's relative. |
| 45 blink::WebURL full_url = | 99 blink::WebURL full_url = |
| 46 element.document().completeURL(element.getAttribute("src")); | 100 element.document().completeURL(element.getAttribute("src")); |
| 47 | 101 |
| 48 const GURL& child_url = GURL(full_url); | 102 const GURL& child_url = GURL(full_url); |
| 49 | 103 |
| 50 // Add to the parent node. | 104 // Update summary node with the URL if this element has one. |
|
vakh (use Gerrit instead)
2017/02/24 19:08:38
Code is clear enough so the comment is not particu
lpz
2017/02/27 15:46:43
Done.
| |
| 51 parent_node->children.push_back(child_url); | 105 if (!child_url.is_empty() && child_url.is_valid()) { |
| 106 summary_node->children.push_back(child_url); | |
| 107 } | |
| 52 | 108 |
| 53 // Create the child node. | 109 // Create the child node. |
|
vakh (use Gerrit instead)
2017/02/24 19:08:38
Same here
lpz
2017/02/27 15:46:43
Done.
| |
| 54 resources->push_back(SafeBrowsingHostMsg_ThreatDOMDetails_Node()); | 110 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node; |
| 55 SafeBrowsingHostMsg_ThreatDOMDetails_Node* child_node = &resources->back(); | 111 child_node.url = child_url; |
| 56 child_node->url = child_url; | 112 child_node.tag_name = element.tagName().utf8(); |
| 57 child_node->tag_name = element.tagName().utf8(); | 113 child_node.parent = summary_node->url; |
| 58 child_node->parent = parent_node->url; | |
| 59 | 114 |
| 60 // Update the ID mapping. First generate the ID for the current node. | 115 // Update the ID mapping. First generate the ID for the current node. |
| 61 // Then, if its parent is available, set the current node's parent ID, and | 116 // Then, if its parent is available, set the current node's parent ID, and |
| 62 // also update the parent's children with the current node's ID. | 117 // also update the parent's children with the current node's ID. |
| 63 const int child_id = element_to_node_map->size() + 1; | 118 const int child_id = element_to_node_map->size() + 1; |
| 64 child_node->node_id = child_id; | 119 child_node.node_id = child_id; |
| 65 if (!element.parentNode().isNull()) { | 120 blink::WebNode cur_parent_element = element.parentNode(); |
| 66 auto parent_node_iter = element_to_node_map->find(element.parentNode()); | 121 while (!cur_parent_element.isNull()) { |
| 67 if (parent_node_iter != element_to_node_map->end()) { | 122 if (element_to_node_map->count(cur_parent_element) > 0) { |
| 68 child_node->parent_node_id = parent_node->node_id; | 123 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node = |
| 124 GetNodeForElement(cur_parent_element, *element_to_node_map, | |
| 125 resources); | |
| 126 child_node.parent_node_id = parent_node->node_id; | |
| 69 parent_node->child_node_ids.push_back(child_id); | 127 parent_node->child_node_ids.push_back(child_id); |
| 128 | |
| 129 // TODO(lpz): Consider also updating the URL-level parent/child mapping | |
| 130 // here. Eg: child_node.parent=parent_node.url, and | |
| 131 // parent_node.children.push_back(child_url). | |
| 132 break; | |
| 133 } else { | |
| 134 // It's possible that the direct parent of this node wasn't handled, so it | |
| 135 // isn't represented in |element_to_node_map|. Try walking up the | |
| 136 // hierarchy to see if a parent further up was handled. | |
| 137 cur_parent_element = cur_parent_element.parentNode(); | |
| 70 } | 138 } |
| 71 } | 139 } |
| 72 (*element_to_node_map)[element] = child_node; | 140 // Add the child node to the list of resources. |
| 141 resources->push_back(child_node); | |
| 142 // .. and remember which index it was inserted at so we can look it up later. | |
| 143 (*element_to_node_map)[element] = resources->size() - 1; | |
| 73 } | 144 } |
| 74 | 145 |
| 146 bool ShouldHandleElement(const blink::WebElement& element, | |
| 147 const TagToAttributesMap& tag_to_attribute_map) { | |
|
vakh (use Gerrit instead)
2017/02/24 19:08:38
nit: s/tag_to_attribute_map/tag_to_attributes_map
lpz
2017/02/27 15:46:43
Done.
| |
| 148 // Resources with a SRC are always handled. | |
| 149 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | |
| 150 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) && | |
| 151 element.hasAttribute("src")) { | |
| 152 return true; | |
| 153 } | |
| 154 | |
| 155 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii()); | |
| 156 const auto& tag_attribute_iter = tag_to_attribute_map.find(tag_name_lower); | |
| 157 if (tag_attribute_iter == tag_to_attribute_map.end()) { | |
| 158 return false; | |
| 159 } | |
| 160 | |
| 161 const std::unordered_set<std::string>& valid_attributes = | |
| 162 tag_attribute_iter->second; | |
| 163 for (const std::string& attribute : valid_attributes) { | |
| 164 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) { | |
| 165 return true; | |
| 166 } | |
| 167 } | |
| 168 return false; | |
| 169 } | |
| 75 } // namespace | 170 } // namespace |
| 76 | 171 |
| 77 // An upper limit on the number of nodes we collect. | 172 // An upper limit on the number of nodes we collect. |
| 78 uint32_t ThreatDOMDetails::kMaxNodes = 500; | 173 uint32_t ThreatDOMDetails::kMaxNodes = 500; |
| 79 | 174 |
| 80 // static | 175 // static |
| 81 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { | 176 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { |
| 82 // Private constructor and public static Create() method to facilitate | 177 // Private constructor and public static Create() method to facilitate |
| 83 // stubbing out this class for binary-size reduction purposes. | 178 // stubbing out this class for binary-size reduction purposes. |
| 84 return new ThreatDOMDetails(render_frame); | 179 return new ThreatDOMDetails(render_frame); |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 113 return; | 208 return; |
| 114 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node; | 209 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node; |
| 115 blink::WebDocument document = frame->document(); | 210 blink::WebDocument document = frame->document(); |
| 116 details_node.url = GURL(document.url()); | 211 details_node.url = GURL(document.url()); |
| 117 if (document.isNull()) { | 212 if (document.isNull()) { |
| 118 // Nothing in this frame. Just report its URL. | 213 // Nothing in this frame. Just report its URL. |
| 119 resources->push_back(details_node); | 214 resources->push_back(details_node); |
| 120 return; | 215 return; |
| 121 } | 216 } |
| 122 | 217 |
| 218 TagToAttributesMap tag_to_attributes_map; | |
| 219 ParseTagAndAttributeParams(&tag_to_attributes_map); | |
|
vakh (use Gerrit instead)
2017/02/27 17:24:35
Can't this be done just once, during init/start-up
lpz
2017/02/28 22:53:28
Done.
| |
| 220 | |
| 123 ElementToNodeMap element_to_node_map; | 221 ElementToNodeMap element_to_node_map; |
| 124 blink::WebElementCollection elements = document.all(); | 222 blink::WebElementCollection elements = document.all(); |
| 125 blink::WebElement element = elements.firstItem(); | 223 blink::WebElement element = elements.firstItem(); |
| 126 for (; !element.isNull(); element = elements.nextItem()) { | 224 for (; !element.isNull(); element = elements.nextItem()) { |
| 127 if (element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | 225 if (ShouldHandleElement(element, tag_to_attributes_map)) { |
| 128 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) { | |
| 129 HandleElement(element, &details_node, resources, &element_to_node_map); | 226 HandleElement(element, &details_node, resources, &element_to_node_map); |
| 130 if (resources->size() >= kMaxNodes) { | 227 if (resources->size() >= kMaxNodes) { |
| 131 // We have reached kMaxNodes, exit early. | 228 // We have reached kMaxNodes, exit early. |
| 132 resources->push_back(details_node); | 229 resources->push_back(details_node); |
| 133 return; | 230 return; |
| 134 } | 231 } |
| 135 } | 232 } |
| 136 } | 233 } |
| 137 resources->push_back(details_node); | 234 resources->push_back(details_node); |
| 138 } | 235 } |
| 139 | 236 |
| 140 void ThreatDOMDetails::OnDestruct() { | 237 void ThreatDOMDetails::OnDestruct() { |
| 141 delete this; | 238 delete this; |
| 142 } | 239 } |
| 143 | 240 |
| 144 } // namespace safe_browsing | 241 } // namespace safe_browsing |
| OLD | NEW |