Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(169)

Side by Side Diff: chrome/renderer/safe_browsing/threat_dom_details.cc

Issue 2756933003: Componentize safe_browsing: move renderer/ reporting part for WebView. (Closed)
Patch Set: fix comments Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/threat_dom_details.h"
6
7 #include <algorithm>
8 #include <map>
9 #include <unordered_set>
10
11 #include "base/compiler_specific.h"
12 #include "base/metrics/field_trial_params.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/stringprintf.h"
16 #include "components/safe_browsing/common/safebrowsing_messages.h"
17 #include "components/safe_browsing/common/safebrowsing_types.h"
18 #include "content/public/renderer/render_frame.h"
19 #include "third_party/WebKit/public/platform/WebString.h"
20 #include "third_party/WebKit/public/web/WebDocument.h"
21 #include "third_party/WebKit/public/web/WebElement.h"
22 #include "third_party/WebKit/public/web/WebElementCollection.h"
23 #include "third_party/WebKit/public/web/WebFrame.h"
24 #include "third_party/WebKit/public/web/WebLocalFrame.h"
25
26 namespace safe_browsing {
27
28 // A map for keeping track of the identity of DOM Elements, used to generate
29 // unique IDs for each element and lookup elements IDs by parent Element, to
30 // maintain proper parent/child relationships.
31 // They key is a WebNode from the DOM, which is basically a pointer so can be
32 // copied into the map when inserting new elements.
33 // The values are indices into the resource vector, and are used to retrieve IPC
34 // messages generated by ThreatDOMDetails.
35 using ElementToNodeMap = std::map<blink::WebNode, int>;
36
37 // This Feature specifies which non-resource HTML Elements to collect based on
38 // their tag and attributes. It's a single param containing a comma-separated
39 // list of pairs. For example: "tag1,id,tag1,height,tag2,foo" - this will
40 // collect elements with tag "tag1" that have attribute "id" or "height" set,
41 // and elements of tag "tag2" if they have attribute "foo" set. All tag names
42 // and attributes should be lower case.
43 const base::Feature kThreatDomDetailsTagAndAttributeFeature{
44 "ThreatDomDetailsTagAttributes", base::FEATURE_DISABLED_BY_DEFAULT};
45
46 // The name of the param containing the tags and attributes list.
47 const char kTagAndAttributeParamName[] = "tag_attribute_csv";
48
49 namespace {
50
51 // Predicate used to search |tag_and_attributes_list_| by tag_name.
52 class TagNameIs {
53 public:
54 explicit TagNameIs(const std::string& tag) : tag_(tag) {}
55 bool operator()(const TagAndAttributesItem& tag_and_attribute) {
56 return tag_ == tag_and_attribute.tag_name;
57 }
58
59 private:
60 std::string tag_;
61 };
62
63 void ParseTagAndAttributeParams(
64 std::vector<TagAndAttributesItem>* tag_and_attributes_list) {
65 DCHECK(tag_and_attributes_list);
66 if (!base::FeatureList::IsEnabled(kThreatDomDetailsTagAndAttributeFeature)) {
67 return;
68 }
69 tag_and_attributes_list->clear();
70 const std::string& tag_attribute_csv_param =
71 base::GetFieldTrialParamValueByFeature(
72 kThreatDomDetailsTagAndAttributeFeature, kTagAndAttributeParamName);
73 if (tag_attribute_csv_param.empty()) {
74 return;
75 }
76
77 std::vector<std::string> split =
78 base::SplitString(tag_attribute_csv_param, ",", base::TRIM_WHITESPACE,
79 base::SPLIT_WANT_NONEMPTY);
80 // If we don't have the right number of pairs in the csv then don't bother
81 // parsing further.
82 if (split.size() % 2 != 0) {
83 return;
84 }
85 for (size_t i = 0; i < split.size(); i += 2) {
86 const std::string& tag_name = split[i];
87 const std::string& attribute = split[i + 1];
88 auto item_iter =
89 std::find_if(tag_and_attributes_list->begin(),
90 tag_and_attributes_list->end(), TagNameIs(tag_name));
91 if (item_iter == tag_and_attributes_list->end()) {
92 TagAndAttributesItem item;
93 item.tag_name = tag_name;
94 item.attributes.push_back(attribute);
95 tag_and_attributes_list->push_back(item);
96 } else {
97 item_iter->attributes.push_back(attribute);
98 }
99 }
100
101 std::sort(tag_and_attributes_list->begin(), tag_and_attributes_list->end(),
102 [](const TagAndAttributesItem& a, const TagAndAttributesItem& b) {
103 return a.tag_name < b.tag_name;
104 });
105 }
106
107 SafeBrowsingHostMsg_ThreatDOMDetails_Node* GetNodeForElement(
108 const blink::WebNode& element,
109 const safe_browsing::ElementToNodeMap& element_to_node_map,
110 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
111 DCHECK(element_to_node_map.count(element) > 0);
112 int resource_index = element_to_node_map.at(element);
113 return &(resources->at(resource_index));
114 }
115
116 std::string TruncateAttributeString(const std::string& input) {
117 if (input.length() <= ThreatDOMDetails::kMaxAttributeStringLength) {
118 return input;
119 }
120
121 std::string truncated;
122 base::TruncateUTF8ToByteSize(
123 input, ThreatDOMDetails::kMaxAttributeStringLength - 3, &truncated);
124 truncated.append("...");
125 return truncated;
126 }
127
128 // Handler for the various HTML elements that we extract URLs from.
129 void HandleElement(
130 const blink::WebElement& element,
131 const std::vector<TagAndAttributesItem>& tag_and_attributes_list,
132 SafeBrowsingHostMsg_ThreatDOMDetails_Node* summary_node,
133 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources,
134 safe_browsing::ElementToNodeMap* element_to_node_map) {
135 // Retrieve the link and resolve the link in case it's relative.
136 blink::WebURL full_url =
137 element.document().completeURL(element.getAttribute("src"));
138
139 const GURL& child_url = GURL(full_url);
140 if (!child_url.is_empty() && child_url.is_valid()) {
141 summary_node->children.push_back(child_url);
142 }
143
144 SafeBrowsingHostMsg_ThreatDOMDetails_Node child_node;
145 child_node.url = child_url;
146 child_node.tag_name = element.tagName().utf8();
147 child_node.parent = summary_node->url;
148
149 // Populate the element's attributes, but only collect the ones that are
150 // configured in the finch study.
151 const auto& tag_attribute_iter = std::find_if(
152 tag_and_attributes_list.begin(), tag_and_attributes_list.end(),
153 TagNameIs(base::ToLowerASCII(child_node.tag_name)));
154 if (tag_attribute_iter != tag_and_attributes_list.end()) {
155 const std::vector<std::string> attributes_to_collect =
156 tag_attribute_iter->attributes;
157 for (const std::string& attribute : attributes_to_collect) {
158 blink::WebString attr_webstring = blink::WebString::fromASCII(attribute);
159 if (!element.hasAttribute(attr_webstring)) {
160 continue;
161 }
162 child_node.attributes.push_back(std::make_pair(
163 attribute, TruncateAttributeString(
164 element.getAttribute(attr_webstring).ascii())));
165 if (child_node.attributes.size() == ThreatDOMDetails::kMaxAttributes) {
166 break;
167 }
168 }
169 }
170
171 // Update the ID mapping. First generate the ID for the current node.
172 // Then, if its parent is available, set the current node's parent ID, and
173 // also update the parent's children with the current node's ID.
174 const int child_id = element_to_node_map->size() + 1;
175 child_node.node_id = child_id;
176 blink::WebNode cur_parent_element = element.parentNode();
177 while (!cur_parent_element.isNull()) {
178 if (element_to_node_map->count(cur_parent_element) > 0) {
179 SafeBrowsingHostMsg_ThreatDOMDetails_Node* parent_node =
180 GetNodeForElement(cur_parent_element, *element_to_node_map,
181 resources);
182 child_node.parent_node_id = parent_node->node_id;
183 parent_node->child_node_ids.push_back(child_id);
184
185 // TODO(lpz): Consider also updating the URL-level parent/child mapping
186 // here. Eg: child_node.parent=parent_node.url, and
187 // parent_node.children.push_back(child_url).
188 break;
189 } else {
190 // It's possible that the direct parent of this node wasn't handled, so it
191 // isn't represented in |element_to_node_map|. Try walking up the
192 // hierarchy to see if a parent further up was handled.
193 cur_parent_element = cur_parent_element.parentNode();
194 }
195 }
196 // Add the child node to the list of resources.
197 resources->push_back(child_node);
198 // .. and remember which index it was inserted at so we can look it up later.
199 (*element_to_node_map)[element] = resources->size() - 1;
200 }
201
202 bool ShouldHandleElement(
203 const blink::WebElement& element,
204 const std::vector<TagAndAttributesItem>& tag_and_attributes_list) {
205 // Resources with a SRC are always handled.
206 if ((element.hasHTMLTagName("iframe") || element.hasHTMLTagName("frame") ||
207 element.hasHTMLTagName("embed") || element.hasHTMLTagName("script")) &&
208 element.hasAttribute("src")) {
209 return true;
210 }
211
212 std::string tag_name_lower = base::ToLowerASCII(element.tagName().ascii());
213 const auto& tag_attribute_iter =
214 std::find_if(tag_and_attributes_list.begin(),
215 tag_and_attributes_list.end(), TagNameIs(tag_name_lower));
216 if (tag_attribute_iter == tag_and_attributes_list.end()) {
217 return false;
218 }
219
220 const std::vector<std::string>& valid_attributes =
221 tag_attribute_iter->attributes;
222 for (const std::string& attribute : valid_attributes) {
223 if (element.hasAttribute(blink::WebString::fromASCII(attribute))) {
224 return true;
225 }
226 }
227 return false;
228 }
229
230 } // namespace
231
232 TagAndAttributesItem::TagAndAttributesItem() {}
233 TagAndAttributesItem::TagAndAttributesItem(const TagAndAttributesItem& item)
234 : tag_name(item.tag_name), attributes(item.attributes) {}
235 TagAndAttributesItem::~TagAndAttributesItem() {}
236
237 uint32_t ThreatDOMDetails::kMaxNodes = 500;
238 uint32_t ThreatDOMDetails::kMaxAttributes = 100;
239 uint32_t ThreatDOMDetails::kMaxAttributeStringLength = 100;
240
241 // static
242 ThreatDOMDetails* ThreatDOMDetails::Create(content::RenderFrame* render_frame) {
243 // Private constructor and public static Create() method to facilitate
244 // stubbing out this class for binary-size reduction purposes.
245 return new ThreatDOMDetails(render_frame);
246 }
247
248 ThreatDOMDetails::ThreatDOMDetails(content::RenderFrame* render_frame)
249 : content::RenderFrameObserver(render_frame) {
250 ParseTagAndAttributeParams(&tag_and_attributes_list_);
251 }
252
253 ThreatDOMDetails::~ThreatDOMDetails() {}
254
255 bool ThreatDOMDetails::OnMessageReceived(const IPC::Message& message) {
256 bool handled = true;
257 IPC_BEGIN_MESSAGE_MAP(ThreatDOMDetails, message)
258 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_GetThreatDOMDetails,
259 OnGetThreatDOMDetails)
260 IPC_MESSAGE_UNHANDLED(handled = false)
261 IPC_END_MESSAGE_MAP()
262 return handled;
263 }
264
265 void ThreatDOMDetails::OnGetThreatDOMDetails() {
266 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node> resources;
267 ExtractResources(&resources);
268 // Notify the browser.
269 Send(new SafeBrowsingHostMsg_ThreatDOMDetails(routing_id(), resources));
270 }
271
272 void ThreatDOMDetails::ExtractResources(
273 std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>* resources) {
274 blink::WebFrame* frame = render_frame()->GetWebFrame();
275 if (!frame)
276 return;
277 SafeBrowsingHostMsg_ThreatDOMDetails_Node details_node;
278 blink::WebDocument document = frame->document();
279 details_node.url = GURL(document.url());
280 if (document.isNull()) {
281 // Nothing in this frame. Just report its URL.
282 resources->push_back(details_node);
283 return;
284 }
285
286 ElementToNodeMap element_to_node_map;
287 blink::WebElementCollection elements = document.all();
288 blink::WebElement element = elements.firstItem();
289 for (; !element.isNull(); element = elements.nextItem()) {
290 if (ShouldHandleElement(element, tag_and_attributes_list_)) {
291 HandleElement(element, tag_and_attributes_list_, &details_node, resources,
292 &element_to_node_map);
293 if (resources->size() >= kMaxNodes) {
294 // We have reached kMaxNodes, exit early.
295 resources->push_back(details_node);
296 return;
297 }
298 }
299 }
300 resources->push_back(details_node);
301 }
302
303 void ThreatDOMDetails::OnDestruct() {
304 delete this;
305 }
306
307 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/threat_dom_details.h ('k') | chrome/renderer/safe_browsing/threat_dom_details_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698