| Index: chrome/renderer/safe_browsing/threat_dom_details.cc
|
| diff --git a/chrome/renderer/safe_browsing/threat_dom_details.cc b/chrome/renderer/safe_browsing/threat_dom_details.cc
|
| deleted file mode 100644
|
| index 7d715f15acd6d0be4fe7e7a3e0d2ac8030669f2b..0000000000000000000000000000000000000000
|
| --- a/chrome/renderer/safe_browsing/threat_dom_details.cc
|
| +++ /dev/null
|
| @@ -1,307 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "chrome/renderer/safe_browsing/threat_dom_details.h"
|
| -
|
| -#include <algorithm>
|
| -#include <map>
|
| -#include <unordered_set>
|
| -
|
| -#include "base/compiler_specific.h"
|
| -#include "base/metrics/field_trial_params.h"
|
| -#include "base/strings/string_piece.h"
|
| -#include "base/strings/string_split.h"
|
| -#include "base/strings/stringprintf.h"
|
| -#include "components/safe_browsing/common/safebrowsing_messages.h"
|
| -#include "components/safe_browsing/common/safebrowsing_types.h"
|
| -#include "content/public/renderer/render_frame.h"
|
| -#include "third_party/WebKit/public/platform/WebString.h"
|
| -#include "third_party/WebKit/public/web/WebDocument.h"
|
| -#include "third_party/WebKit/public/web/WebElement.h"
|
| -#include "third_party/WebKit/public/web/WebElementCollection.h"
|
| -#include "third_party/WebKit/public/web/WebFrame.h"
|
| -#include "third_party/WebKit/public/web/WebLocalFrame.h"
|
| -
|
| -namespace safe_browsing {
|
| -
|
| -// A map for keeping track of the identity of DOM Elements, used to generate
|
| -// unique IDs for each element and lookup elements IDs by parent Element, to
|
| -// maintain proper parent/child relationships.
|
| -// They key is a WebNode from the DOM, which is basically a pointer so can be
|
| -// copied into the map when inserting new elements.
|
| -// The values are indices into the resource vector, and are used to retrieve IPC
|
| -// messages generated by ThreatDOMDetails.
|
| -using ElementToNodeMap = std::map<blink::WebNode, int>;
|
| -
|
| -// This Feature specifies which non-resource HTML Elements to collect based on
|
| -// their tag and attributes. It's a single param containing a comma-separated
|
| -// list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will
|
| -// collect elements with tag "tag1" that have attribute "id" or "height" set,
|
| -// and elements of tag "tag2" if they have attribute "foo" set. All tag names
|
| -// and attributes should be lower case.
|
| -const base::Feature kThreatDomDetailsTagAndAttributeFeature{
|
| - "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT};
|
| -
|
| -// The name of the param containing the tags and attributes list.
|
| -const char kTagAndAttributeParamName[] = "tag_attribute_csv";
|
| -
|
| -namespace {
|
| -
|
| -// Predicate used to search |tag_and_attributes_list_| by tag_name.
|
| -class TagNameIs {
|
| - public:
|
| - explicit TagNameIs(const std::string& tag) : tag_(tag) {}
|
| - bool operator()(const TagAndAttributesItem& tag_and_attribute) {
|
| - return tag_ == tag_and_attribute.tag_name;
|
| - }
|
| -
|
| - private:
|
| - std::string tag_;
|
| -};
|
| -
|
| -void ParseTagAndAttributeParams(
|
| - std::vector<TagAndAttributesItem>* tag_and_attributes_list) {
|
| - DCHECK(tag_and_attributes_list);
|
| - if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) {
|
| - return;
|
| - }
|
| - tag_and_attributes_list->clear();
|
| - const std::string& tag_attribute_csv_param =
|
| - base::GetFieldTrialParamValueByFeature(
|
| - kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName);
|
| - if (tag_attribute_csv_param.empty()) {
|
| - return;
|
| - }
|
| -
|
| - std::vector<std::string> split =
|
| - base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE,
|
| - base::SPLIT_WANT_NONEMPTY);
|
| - // If we don't have the right number of pairs in the csv then don't bother
|
| - // parsing further.
|
| - if (split.size() % 2 != 0) {
|
| - return;
|
| - }
|
| - for (size_t i = 0; i < split.size(); i += 2) {
|
| - const std::string& tag_name = split[i];
|
| - const std::string& attribute = split[i + 1];
|
| - auto item_iter =
|
| - std::find_if(tag_and_attributes_list->begin(),
|
| - tag_and_attributes_list->end(), TagNameIs(tag_name));
|
| - if (item_iter == tag_and_attributes_list->end()) {
|
| - TagAndAttributesItem item;
|
| - item.tag_name = tag_name;
|
| - item.attributes.push_back(attribute);
|
| - tag_and_attributes_list->push_back(item);
|
| - } else {
|
| - item_iter->attributes.push_back(attribute);
|
| - }
|
| - }
|
| -
|
| - std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(),
|
| - [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) {
|
| - return a.tag_name < b.tag_name;
|
| - });
|
| -}
|
| -
|
| -SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement(
|
| - const blink::WebNode& element,
|
| - const safe_browsing::ElementToNodeMap& element_to_node_map,
|
| - std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
|
| - DCHECK(element_to_node_map.count(element) > 0);
|
| - int resource_index = element_to_node_map.at(element);
|
| - return &(resources->at(resource_index));
|
| -}
|
| -
|
| -std::string TruncateAttributeString(const std::string& input) {
|
| - if (input.length() <= ThreatDOMDetails::kMaxAttributeStringLength) {
|
| - return input;
|
| - }
|
| -
|
| - std::string truncated;
|
| - base::TruncateUTF8ToByteSize(
|
| - input, ThreatDOMDetails::kMaxAttributeStringLength - 3, &truncated);
|
| - truncated.append("...");
|
| - return truncated;
|
| -}
|
| -
|
| -// Handler for the various HTML elements that we extract URLs from.
|
| -void HandleElement(
|
| - const blink::WebElement& element,
|
| - const std::vector<TagAndAttributesItem>& tag_and_attributes_list,
|
| - SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node,
|
| - std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources,
|
| - safe_browsing::ElementToNodeMap* element_to_node_map) {
|
| - // Retrieve the link and resolve the link in case it's relative.
|
| - blink::WebURL full_url =
|
| - element.document().completeURL(element.getAttribute("src"));
|
| -
|
| - const GURL& child_url = GURL(full_url);
|
| - if (!child_url.is_empty() && child_url.is_valid()) {
|
| - summary_node->children.push_back(child_url);
|
| - }
|
| -
|
| - SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node;
|
| - child_node.url = child_url;
|
| - child_node.tag_name = element.tagName().utf8();
|
| - child_node.parent = summary_node->url;
|
| -
|
| - // Populate the element's attributes, but only collect the ones that are
|
| - // configured in the finch study.
|
| - const auto& tag_attribute_iter = std::find_if(
|
| - tag_and_attributes_list.begin(), tag_and_attributes_list.end(),
|
| - TagNameIs(base::ToLowerASCII(child_node.tag_name)));
|
| - if (tag_attribute_iter != tag_and_attributes_list.end()) {
|
| - const std::vector<std::string> attributes_to_collect =
|
| - tag_attribute_iter->attributes;
|
| - for (const std::string& attribute : attributes_to_collect) {
|
| - blink::WebString attr_webstring = blink::WebString::fromASCII(attribute);
|
| - if (!element.hasAttribute(attr_webstring)) {
|
| - continue;
|
| - }
|
| - child_node.attributes.push_back(std::make_pair(
|
| - attribute, TruncateAttributeString(
|
| - element.getAttribute(attr_webstring).ascii())));
|
| - if (child_node.attributes.size() == ThreatDOMDetails::kMaxAttributes) {
|
| - break;
|
| - }
|
| - }
|
| - }
|
| -
|
| - // Update the ID mapping. First generate the ID for the current node.
|
| - // Then, if its parent is available, set the current node's parent ID, and
|
| - // also update the parent's children with the current node's ID.
|
| - const int child_id = element_to_node_map->size() + 1;
|
| - child_node.node_id = child_id;
|
| - blink::WebNode cur_parent_element = element.parentNode();
|
| - while (!cur_parent_element.isNull()) {
|
| - if (element_to_node_map->count(cur_parent_element) > 0) {
|
| - SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node =
|
| - GetNodeForElement(cur_parent_element, *element_to_node_map,
|
| - resources);
|
| - child_node.parent_node_id = parent_node->node_id;
|
| - parent_node->child_node_ids.push_back(child_id);
|
| -
|
| - // TODO(lpz): Consider also updating the URL-level parent/child mapping
|
| - // here. Eg: child_node.parent=parent_node.url, and
|
| - // parent_node.children.push_back(child_url).
|
| - break;
|
| - } else {
|
| - // It's possible that the direct parent of this node wasn't handled, so it
|
| - // isn't represented in |element_to_node_map|. Try walking up the
|
| - // hierarchy to see if a parent further up was handled.
|
| - cur_parent_element = cur_parent_element.parentNode();
|
| - }
|
| - }
|
| - // Add the child node to the list of resources.
|
| - resources->push_back(child_node);
|
| - // .. and remember which index it was inserted at so we can look it up later.
|
| - (*element_to_node_map)[element] = resources->size() - 1;
|
| -}
|
| -
|
| -bool ShouldHandleElement(
|
| - const blink::WebElement& element,
|
| - const std::vector<TagAndAttributesItem>& tag_and_attributes_list) {
|
| - // Resources with a SRC are always handled.
|
| - if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") ||
|
| - element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) &&
|
| - element.hasAttribute("src")) {
|
| - return true;
|
| - }
|
| -
|
| - std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii());
|
| - const auto& tag_attribute_iter =
|
| - std::find_if(tag_and_attributes_list.begin(),
|
| - tag_and_attributes_list.end(), TagNameIs(tag_name_lower));
|
| - if (tag_attribute_iter == tag_and_attributes_list.end()) {
|
| - return false;
|
| - }
|
| -
|
| - const std::vector<std::string>& valid_attributes =
|
| - tag_attribute_iter->attributes;
|
| - for (const std::string& attribute : valid_attributes) {
|
| - if (element.hasAttribute(blink::WebString::fromASCII(attribute))) {
|
| - return true;
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -TagAndAttributesItem::TagAndAttributesItem() {}
|
| -TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item)
|
| - : tag_name(item.tag_name), attributes(item.attributes) {}
|
| -TagAndAttributesItem::~TagAndAttributesItem() {}
|
| -
|
| -uint32_t ThreatDOMDetails::kMaxNodes = 500;
|
| -uint32_t ThreatDOMDetails::kMaxAttributes = 100;
|
| -uint32_t ThreatDOMDetails::kMaxAttributeStringLength = 100;
|
| -
|
| -// static
|
| -ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) {
|
| - // Private constructor and public static Create() method to facilitate
|
| - // stubbing out this class for binary-size reduction purposes.
|
| - return new ThreatDOMDetails(render_frame);
|
| -}
|
| -
|
| -ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame)
|
| - : content::RenderFrameObserver(render_frame) {
|
| - ParseTagAndAttributeParams(&tag_and_attributes_list_);
|
| -}
|
| -
|
| -ThreatDOMDetails::~ThreatDOMDetails() {}
|
| -
|
| -bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) {
|
| - bool handled = true;
|
| - IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message)
|
| - IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails,
|
| - OnGetThreatDOMDetails)
|
| - IPC_MESSAGE_UNHANDLED(handled = false)
|
| - IPC_END_MESSAGE_MAP()
|
| - return handled;
|
| -}
|
| -
|
| -void ThreatDOMDetails::OnGetThreatDOMDetails() {
|
| - std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node> resources;
|
| - ExtractResources(&resources);
|
| - // Notify the browser.
|
| - Send(new SafeBrowsingHostMsg_ThreatDOMDetails(routing_id(), resources));
|
| -}
|
| -
|
| -void ThreatDOMDetails::ExtractResources(
|
| - std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
|
| - blink::WebFrame* frame = render_frame()->GetWebFrame();
|
| - if (!frame)
|
| - return;
|
| - SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node;
|
| - blink::WebDocument document = frame->document();
|
| - details_node.url = GURL(document.url());
|
| - if (document.isNull()) {
|
| - // Nothing in this frame. Just report its URL.
|
| - resources->push_back(details_node);
|
| - return;
|
| - }
|
| -
|
| - ElementToNodeMap element_to_node_map;
|
| - blink::WebElementCollection elements = document.all();
|
| - blink::WebElement element = elements.firstItem();
|
| - for (; !element.isNull(); element = elements.nextItem()) {
|
| - if (ShouldHandleElement(element, tag_and_attributes_list_)) {
|
| - HandleElement(element, tag_and_attributes_list_, &details_node, resources,
|
| - &element_to_node_map);
|
| - if (resources->size() >= kMaxNodes) {
|
| - // We have reached kMaxNodes, exit early.
|
| - resources->push_back(details_node);
|
| - return;
|
| - }
|
| - }
|
| - }
|
| - resources->push_back(details_node);
|
| -}
|
| -
|
| -void ThreatDOMDetails::OnDestruct() {
|
| - delete this;
|
| -}
|
| -
|
| -} // namespace safe_browsing
|
|
|