| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/renderer/safe_browsing/threat_dom_details.h" | |
| 6 | |
| 7 #include <algorithm> | |
| 8 #include <map> | |
| 9 #include <unordered_set> | |
| 10 | |
| 11 #include "base/compiler_specific.h" | |
| 12 #include "base/metrics/field_trial_params.h" | |
| 13 #include "base/strings/string_piece.h" | |
| 14 #include "base/strings/string_split.h" | |
| 15 #include "base/strings/stringprintf.h" | |
| 16 #include "components/safe_browsing/common/safebrowsing_messages.h" | |
| 17 #include "components/safe_browsing/common/safebrowsing_types.h" | |
| 18 #include "content/public/renderer/render_frame.h" | |
| 19 #include "third_party/WebKit/public/platform/WebString.h" | |
| 20 #include "third_party/WebKit/public/web/WebDocument.h" | |
| 21 #include "third_party/WebKit/public/web/WebElement.h" | |
| 22 #include "third_party/WebKit/public/web/WebElementCollection.h" | |
| 23 #include "third_party/WebKit/public/web/WebFrame.h" | |
| 24 #include "third_party/WebKit/public/web/WebLocalFrame.h" | |
| 25 | |
| 26 namespace safe_browsing { | |
| 27 | |
| 28 // A map for keeping track of the identity of DOM Elements, used to generate | |
| 29 // unique IDs for each element and lookup elements IDs by parent Element, to | |
| 30 // maintain proper parent/child relationships. | |
| 31 // They key is a WebNode from the DOM, which is basically a pointer so can be | |
| 32 // copied into the map when inserting new elements. | |
| 33 // The values are indices into the resource vector, and are used to retrieve IPC | |
| 34 // messages generated by ThreatDOMDetails. | |
| 35 using ElementToNodeMap = std::map<blink::WebNode, int>; | |
| 36 | |
| 37 // This Feature specifies which non-resource HTML Elements to collect based on | |
| 38 // their tag and attributes. It's a single param containing a comma-separated | |
| 39 // list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will | |
| 40 // collect elements with tag "tag1" that have attribute "id" or "height" set, | |
| 41 // and elements of tag "tag2" if they have attribute "foo" set. All tag names | |
| 42 // and attributes should be lower case. | |
| 43 const base::Feature kThreatDomDetailsTagAndAttributeFeature{ | |
| 44 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT}; | |
| 45 | |
| 46 // The name of the param containing the tags and attributes list. | |
| 47 const char kTagAndAttributeParamName[] = "tag_attribute_csv"; | |
| 48 | |
| 49 namespace { | |
| 50 | |
| 51 // Predicate used to search |tag_and_attributes_list_| by tag_name. | |
| 52 class TagNameIs { | |
| 53 public: | |
| 54 explicit TagNameIs(const std::string& tag) : tag_(tag) {} | |
| 55 bool operator()(const TagAndAttributesItem& tag_and_attribute) { | |
| 56 return tag_ == tag_and_attribute.tag_name; | |
| 57 } | |
| 58 | |
| 59 private: | |
| 60 std::string tag_; | |
| 61 }; | |
| 62 | |
| 63 void ParseTagAndAttributeParams( | |
| 64 std::vector<TagAndAttributesItem>* tag_and_attributes_list) { | |
| 65 DCHECK(tag_and_attributes_list); | |
| 66 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) { | |
| 67 return; | |
| 68 } | |
| 69 tag_and_attributes_list->clear(); | |
| 70 const std::string& tag_attribute_csv_param = | |
| 71 base::GetFieldTrialParamValueByFeature( | |
| 72 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName); | |
| 73 if (tag_attribute_csv_param.empty()) { | |
| 74 return; | |
| 75 } | |
| 76 | |
| 77 std::vector<std::string> split = | |
| 78 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE, | |
| 79 base::SPLIT_WANT_NONEMPTY); | |
| 80 // If we don't have the right number of pairs in the csv then don't bother | |
| 81 // parsing further. | |
| 82 if (split.size() % 2 != 0) { | |
| 83 return; | |
| 84 } | |
| 85 for (size_t i = 0; i < split.size(); i += 2) { | |
| 86 const std::string& tag_name = split[i]; | |
| 87 const std::string& attribute = split[i + 1]; | |
| 88 auto item_iter = | |
| 89 std::find_if(tag_and_attributes_list->begin(), | |
| 90 tag_and_attributes_list->end(), TagNameIs(tag_name)); | |
| 91 if (item_iter == tag_and_attributes_list->end()) { | |
| 92 TagAndAttributesItem item; | |
| 93 item.tag_name = tag_name; | |
| 94 item.attributes.push_back(attribute); | |
| 95 tag_and_attributes_list->push_back(item); | |
| 96 } else { | |
| 97 item_iter->attributes.push_back(attribute); | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(), | |
| 102 [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) { | |
| 103 return a.tag_name < b.tag_name; | |
| 104 }); | |
| 105 } | |
| 106 | |
| 107 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement( | |
| 108 const blink::WebNode& element, | |
| 109 const safe_browsing::ElementToNodeMap& element_to_node_map, | |
| 110 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { | |
| 111 DCHECK(element_to_node_map.count(element) > 0); | |
| 112 int resource_index = element_to_node_map.at(element); | |
| 113 return &(resources->at(resource_index)); | |
| 114 } | |
| 115 | |
| 116 std::string TruncateAttributeString(const std::string& input) { | |
| 117 if (input.length() <= ThreatDOMDetails::kMaxAttributeStringLength) { | |
| 118 return input; | |
| 119 } | |
| 120 | |
| 121 std::string truncated; | |
| 122 base::TruncateUTF8ToByteSize( | |
| 123 input, ThreatDOMDetails::kMaxAttributeStringLength - 3, &truncated); | |
| 124 truncated.append("..."); | |
| 125 return truncated; | |
| 126 } | |
| 127 | |
| 128 // Handler for the various HTML elements that we extract URLs from. | |
| 129 void HandleElement( | |
| 130 const blink::WebElement& element, | |
| 131 const std::vector<TagAndAttributesItem>& tag_and_attributes_list, | |
| 132 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node, | |
| 133 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources, | |
| 134 safe_browsing::ElementToNodeMap* element_to_node_map) { | |
| 135 // Retrieve the link and resolve the link in case it's relative. | |
| 136 blink::WebURL full_url = | |
| 137 element.document().completeURL(element.getAttribute("src")); | |
| 138 | |
| 139 const GURL& child_url = GURL(full_url); | |
| 140 if (!child_url.is_empty() && child_url.is_valid()) { | |
| 141 summary_node->children.push_back(child_url); | |
| 142 } | |
| 143 | |
| 144 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node; | |
| 145 child_node.url = child_url; | |
| 146 child_node.tag_name = element.tagName().utf8(); | |
| 147 child_node.parent = summary_node->url; | |
| 148 | |
| 149 // Populate the element's attributes, but only collect the ones that are | |
| 150 // configured in the finch study. | |
| 151 const auto& tag_attribute_iter = std::find_if( | |
| 152 tag_and_attributes_list.begin(), tag_and_attributes_list.end(), | |
| 153 TagNameIs(base::ToLowerASCII(child_node.tag_name))); | |
| 154 if (tag_attribute_iter != tag_and_attributes_list.end()) { | |
| 155 const std::vector<std::string> attributes_to_collect = | |
| 156 tag_attribute_iter->attributes; | |
| 157 for (const std::string& attribute : attributes_to_collect) { | |
| 158 blink::WebString attr_webstring = blink::WebString::fromASCII(attribute); | |
| 159 if (!element.hasAttribute(attr_webstring)) { | |
| 160 continue; | |
| 161 } | |
| 162 child_node.attributes.push_back(std::make_pair( | |
| 163 attribute, TruncateAttributeString( | |
| 164 element.getAttribute(attr_webstring).ascii()))); | |
| 165 if (child_node.attributes.size() == ThreatDOMDetails::kMaxAttributes) { | |
| 166 break; | |
| 167 } | |
| 168 } | |
| 169 } | |
| 170 | |
| 171 // Update the ID mapping. First generate the ID for the current node. | |
| 172 // Then, if its parent is available, set the current node's parent ID, and | |
| 173 // also update the parent's children with the current node's ID. | |
| 174 const int child_id = element_to_node_map->size() + 1; | |
| 175 child_node.node_id = child_id; | |
| 176 blink::WebNode cur_parent_element = element.parentNode(); | |
| 177 while (!cur_parent_element.isNull()) { | |
| 178 if (element_to_node_map->count(cur_parent_element) > 0) { | |
| 179 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node = | |
| 180 GetNodeForElement(cur_parent_element, *element_to_node_map, | |
| 181 resources); | |
| 182 child_node.parent_node_id = parent_node->node_id; | |
| 183 parent_node->child_node_ids.push_back(child_id); | |
| 184 | |
| 185 // TODO(lpz): Consider also updating the URL-level parent/child mapping | |
| 186 // here. Eg: child_node.parent=parent_node.url, and | |
| 187 // parent_node.children.push_back(child_url). | |
| 188 break; | |
| 189 } else { | |
| 190 // It's possible that the direct parent of this node wasn't handled, so it | |
| 191 // isn't represented in |element_to_node_map|. Try walking up the | |
| 192 // hierarchy to see if a parent further up was handled. | |
| 193 cur_parent_element = cur_parent_element.parentNode(); | |
| 194 } | |
| 195 } | |
| 196 // Add the child node to the list of resources. | |
| 197 resources->push_back(child_node); | |
| 198 // .. and remember which index it was inserted at so we can look it up later. | |
| 199 (*element_to_node_map)[element] = resources->size() - 1; | |
| 200 } | |
| 201 | |
| 202 bool ShouldHandleElement( | |
| 203 const blink::WebElement& element, | |
| 204 const std::vector<TagAndAttributesItem>& tag_and_attributes_list) { | |
| 205 // Resources with a SRC are always handled. | |
| 206 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") || | |
| 207 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) && | |
| 208 element.hasAttribute("src")) { | |
| 209 return true; | |
| 210 } | |
| 211 | |
| 212 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii()); | |
| 213 const auto& tag_attribute_iter = | |
| 214 std::find_if(tag_and_attributes_list.begin(), | |
| 215 tag_and_attributes_list.end(), TagNameIs(tag_name_lower)); | |
| 216 if (tag_attribute_iter == tag_and_attributes_list.end()) { | |
| 217 return false; | |
| 218 } | |
| 219 | |
| 220 const std::vector<std::string>& valid_attributes = | |
| 221 tag_attribute_iter->attributes; | |
| 222 for (const std::string& attribute : valid_attributes) { | |
| 223 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) { | |
| 224 return true; | |
| 225 } | |
| 226 } | |
| 227 return false; | |
| 228 } | |
| 229 | |
| 230 } // namespace | |
| 231 | |
| 232 TagAndAttributesItem::TagAndAttributesItem() {} | |
| 233 TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item) | |
| 234 : tag_name(item.tag_name), attributes(item.attributes) {} | |
| 235 TagAndAttributesItem::~TagAndAttributesItem() {} | |
| 236 | |
| 237 uint32_t ThreatDOMDetails::kMaxNodes = 500; | |
| 238 uint32_t ThreatDOMDetails::kMaxAttributes = 100; | |
| 239 uint32_t ThreatDOMDetails::kMaxAttributeStringLength = 100; | |
| 240 | |
| 241 // static | |
| 242 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) { | |
| 243 // Private constructor and public static Create() method to facilitate | |
| 244 // stubbing out this class for binary-size reduction purposes. | |
| 245 return new ThreatDOMDetails(render_frame); | |
| 246 } | |
| 247 | |
| 248 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame) | |
| 249 : content::RenderFrameObserver(render_frame) { | |
| 250 ParseTagAndAttributeParams(&tag_and_attributes_list_); | |
| 251 } | |
| 252 | |
| 253 ThreatDOMDetails::~ThreatDOMDetails() {} | |
| 254 | |
| 255 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) { | |
| 256 bool handled = true; | |
| 257 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message) | |
| 258 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails, | |
| 259 OnGetThreatDOMDetails) | |
| 260 IPC_MESSAGE_UNHANDLED(handled = false) | |
| 261 IPC_END_MESSAGE_MAP() | |
| 262 return handled; | |
| 263 } | |
| 264 | |
| 265 void ThreatDOMDetails::OnGetThreatDOMDetails() { | |
| 266 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node> resources; | |
| 267 ExtractResources(&resources); | |
| 268 // Notify the browser. | |
| 269 Send(new SafeBrowsingHostMsg_ThreatDOMDetails(routing_id(), resources)); | |
| 270 } | |
| 271 | |
| 272 void ThreatDOMDetails::ExtractResources( | |
| 273 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) { | |
| 274 blink::WebFrame* frame = render_frame()->GetWebFrame(); | |
| 275 if (!frame) | |
| 276 return; | |
| 277 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node; | |
| 278 blink::WebDocument document = frame->document(); | |
| 279 details_node.url = GURL(document.url()); | |
| 280 if (document.isNull()) { | |
| 281 // Nothing in this frame. Just report its URL. | |
| 282 resources->push_back(details_node); | |
| 283 return; | |
| 284 } | |
| 285 | |
| 286 ElementToNodeMap element_to_node_map; | |
| 287 blink::WebElementCollection elements = document.all(); | |
| 288 blink::WebElement element = elements.firstItem(); | |
| 289 for (; !element.isNull(); element = elements.nextItem()) { | |
| 290 if (ShouldHandleElement(element, tag_and_attributes_list_)) { | |
| 291 HandleElement(element, tag_and_attributes_list_, &details_node, resources, | |
| 292 &element_to_node_map); | |
| 293 if (resources->size() >= kMaxNodes) { | |
| 294 // We have reached kMaxNodes, exit early. | |
| 295 resources->push_back(details_node); | |
| 296 return; | |
| 297 } | |
| 298 } | |
| 299 } | |
| 300 resources->push_back(details_node); | |
| 301 } | |
| 302 | |
| 303 void ThreatDOMDetails::OnDestruct() { | |
| 304 delete this; | |
| 305 } | |
| 306 | |
| 307 } // namespace safe_browsing | |
| OLD | NEW |