Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(203)

Side by Side Diff: components/safe_browsing/browser/threat_details.cc

Issue 2837603002: Content API changes to improve DOM stitching in ThreatDetails code. (Closed)
Patch Set: Use explicitly-sized int types in IPC definition Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // Implementation of the ThreatDetails class. 5 // Implementation of the ThreatDetails class.
6 6
7 #include "components/safe_browsing/browser/threat_details.h" 7 #include "components/safe_browsing/browser/threat_details.h"
8 8
9 #include <stddef.h> 9 #include <stddef.h>
10 #include <stdint.h> 10 #include <stdint.h>
11 11
12 #include "base/bind.h" 12 #include "base/bind.h"
13 #include "base/lazy_instance.h" 13 #include "base/lazy_instance.h"
14 #include "base/metrics/histogram_macros.h" 14 #include "base/metrics/histogram_macros.h"
15 #include "base/strings/string_util.h" 15 #include "base/strings/string_util.h"
16 #include "components/history/core/browser/history_service.h" 16 #include "components/history/core/browser/history_service.h"
17 #include "components/safe_browsing/base_ui_manager.h" 17 #include "components/safe_browsing/base_ui_manager.h"
18 #include "components/safe_browsing/browser/threat_details_cache.h" 18 #include "components/safe_browsing/browser/threat_details_cache.h"
19 #include "components/safe_browsing/browser/threat_details_history.h" 19 #include "components/safe_browsing/browser/threat_details_history.h"
20 #include "components/safe_browsing/common/safebrowsing_messages.h" 20 #include "components/safe_browsing/common/safebrowsing_messages.h"
21 #include "content/public/browser/browser_thread.h" 21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/browser/navigation_controller.h" 22 #include "content/public/browser/navigation_controller.h"
23 #include "content/public/browser/navigation_entry.h" 23 #include "content/public/browser/navigation_entry.h"
24 #include "content/public/browser/render_frame_host.h" 24 #include "content/public/browser/render_frame_host.h"
25 #include "content/public/browser/render_process_host.h"
25 #include "content/public/browser/web_contents.h" 26 #include "content/public/browser/web_contents.h"
26 #include "net/url_request/url_request_context_getter.h" 27 #include "net/url_request/url_request_context_getter.h"
27 28
28 using content::BrowserThread; 29 using content::BrowserThread;
29 using content::NavigationEntry; 30 using content::NavigationEntry;
30 using content::RenderFrameHost; 31 using content::RenderFrameHost;
31 using content::WebContents; 32 using content::WebContents;
32 33
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/
34 // threat_dom_details.cc 35 // threat_dom_details.cc
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after
280 if (!duplicate_child) 281 if (!duplicate_child)
281 url_resource->add_child_ids(child_resource->id()); 282 url_resource->add_child_ids(child_resource->id());
282 } 283 }
283 } 284 }
284 285
285 return url_resource; 286 return url_resource;
286 } 287 }
287 288
288 void ThreatDetails::AddDomElement( 289 void ThreatDetails::AddDomElement(
289 const int frame_tree_node_id, 290 const int frame_tree_node_id,
290 const std::string& frame_url,
291 const int element_node_id, 291 const int element_node_id,
292 const std::string& tagname, 292 const std::string& tagname,
293 const int parent_element_node_id, 293 const int parent_element_node_id,
294 const std::vector<AttributeNameValue>& attributes, 294 const std::vector<AttributeNameValue>& attributes,
295 const ClientSafeBrowsingReportRequest::Resource* resource) { 295 const ClientSafeBrowsingReportRequest::Resource* resource) {
296 // Create the element. It should not exist already since this function should 296 // Create the element. It should not exist already since this function should
297 // only be called once for each element. 297 // only be called once for each element.
298 const std::string element_key = 298 const std::string element_key =
299 GetElementKey(frame_tree_node_id, element_node_id); 299 GetElementKey(frame_tree_node_id, element_node_id);
300 HTMLElement* cur_element = FindOrCreateElement(element_key); 300 HTMLElement* cur_element = FindOrCreateElement(element_key);
301 301
302 // Set some basic metadata about the element. 302 // Set some basic metadata about the element.
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); 303 const std::string tag_name_upper = base::ToUpperASCII(tagname);
304 if (!tag_name_upper.empty()) { 304 if (!tag_name_upper.empty()) {
305 cur_element->set_tag(tag_name_upper); 305 cur_element->set_tag(tag_name_upper);
306 } 306 }
307 for (const AttributeNameValue& attribute : attributes) { 307 for (const AttributeNameValue& attribute : attributes) {
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); 308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute();
309 attribute_pb->set_name(attribute.first); 309 attribute_pb->set_name(attribute.first);
310 attribute_pb->set_value(attribute.second); 310 attribute_pb->set_value(attribute.second);
311 } 311 }
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME";
313 312
314 if (resource) { 313 if (resource) {
315 cur_element->set_resource_id(resource->id()); 314 cur_element->set_resource_id(resource->id());
316
317 // For iframes, remember that this HTML Element represents an iframe with a
318 // specific URL. Elements from a frame with this URL are children of this
319 // element.
320 if (is_frame &&
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) {
322 iframe_src_to_element_map_[resource->url()] = cur_element;
323 }
324 } 315 }
325 316
326 // Next we try to lookup the parent of the current element and add ourselves 317 // Next we try to lookup the parent of the current element and add ourselves
327 // as a child of it. 318 // as a child of it.
328 HTMLElement* parent_element = nullptr; 319 HTMLElement* parent_element = nullptr;
329 if (parent_element_node_id == 0) { 320 if (parent_element_node_id == 0) {
330 // No parent indicates that this element is at the top of the current frame. 321 // No parent indicates that this element is at the top of the current frame.
331 // This frame could be a child of an iframe in another frame, or it could be 322 // Remember that this is a top-level element of the frame with the
332 // at the root of the whole page. If we have a frame URL then we can try to 323 // current |frame_tree_node_id|. If this element is inside an iframe, a
333 // map this element to its parent. 324 // second pass will insert this element as a child of its parent iframe.
334 if (!frame_url.empty()) { 325 frame_tree_id_to_children_map_[frame_tree_node_id].insert(
335 // First, remember that this element is at the top-level of a frame with 326 cur_element->id());
336 // our frame URL.
337 document_url_to_children_map_[frame_url].insert(cur_element->id());
338
339 // Now check if the frame URL matches the src URL of an iframe elsewhere.
340 // This means that we processed the parent iframe element earlier, so we
341 // can add ourselves as a child of that iframe.
342 // If no such iframe exists, it could be processed later, or this element
343 // is in the top-level frame and truly has no parent.
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) {
345 parent_element = iframe_src_to_element_map_[frame_url];
346 }
347 }
348 } else { 327 } else {
349 // We have a parent ID, so this element is just a child of something inside 328 // We have a parent ID, so this element is just a child of something inside
350 // of our current frame. We can easily lookup our parent. 329 // of our current frame. We can easily lookup our parent.
351 const std::string& parent_key = 330 const std::string& parent_key =
352 GetElementKey(frame_tree_node_id, parent_element_node_id); 331 GetElementKey(frame_tree_node_id, parent_element_node_id);
353 if (base::ContainsKey(elements_, parent_key)) { 332 if (base::ContainsKey(elements_, parent_key)) {
354 parent_element = elements_[parent_key].get(); 333 parent_element = elements_[parent_key].get();
355 } 334 }
356 } 335 }
357 336
358 // If a parent element was found, add ourselves as a child, ensuring not to 337 // If a parent element was found, add ourselves as a child, ensuring not to
359 // duplicate child IDs. 338 // duplicate child IDs.
360 if (parent_element) { 339 if (parent_element) {
361 bool duplicate_child = false; 340 bool duplicate_child = false;
362 for (const int child_id : parent_element->child_ids()) { 341 for (const int child_id : parent_element->child_ids()) {
363 if (child_id == cur_element->id()) { 342 if (child_id == cur_element->id()) {
364 duplicate_child = true; 343 duplicate_child = true;
365 break; 344 break;
366 } 345 }
367 } 346 }
368 if (!duplicate_child) { 347 if (!duplicate_child) {
369 parent_element->add_child_ids(cur_element->id()); 348 parent_element->add_child_ids(cur_element->id());
370 } 349 }
371 } 350 }
372
373 // Finally, we need to check if the current element is the parent of some
374 // other elements that came in from another frame earlier. This only happens
375 // if we are an iframe, and our src URL exists in
376 // document_url_to_children_map_. If there is a match, then all of the
377 // children in that map belong to us.
378 if (is_frame && resource &&
379 base::ContainsKey(document_url_to_children_map_, resource->url())) {
380 const std::unordered_set<int>& child_ids =
381 document_url_to_children_map_[resource->url()];
382 for (const int child_id : child_ids) {
383 cur_element->add_child_ids(child_id);
384 }
385 }
386 } 351 }
387 352
388 void ThreatDetails::StartCollection() { 353 void ThreatDetails::StartCollection() {
389 DVLOG(1) << "Starting to compute threat details."; 354 DVLOG(1) << "Starting to compute threat details.";
390 report_.reset(new ClientSafeBrowsingReportRequest()); 355 report_.reset(new ClientSafeBrowsingReportRequest());
391 356
392 if (IsReportableUrl(resource_.url)) { 357 if (IsReportableUrl(resource_.url)) {
393 report_->set_url(resource_.url.spec()); 358 report_->set_url(resource_.url.spec());
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); 359 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type));
395 } 360 }
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
445 // detail collection could be started once the page loads. 410 // detail collection could be started once the page loads.
446 web_contents()->SendToAllFrames( 411 web_contents()->SendToAllFrames(
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); 412 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE));
448 } 413 }
449 } 414 }
450 415
451 // When the renderer is done, this is called. 416 // When the renderer is done, this is called.
452 void ThreatDetails::OnReceivedThreatDOMDetails( 417 void ThreatDetails::OnReceivedThreatDOMDetails(
453 content::RenderFrameHost* sender, 418 content::RenderFrameHost* sender,
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { 419 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) {
420 // Lookup the FrameTreeNode ID of any child frames in the list of DOM nodes.
421 const int sender_process_id = sender->GetProcess()->GetID();
422 const int sender_frame_tree_node_id = sender->GetFrameTreeNodeId();
423 KeyToFrameTreeIdMap child_frame_tree_map;
424 for (const SafeBrowsingHostMsg_ThreatDOMDetails_Node& node : params) {
425 if (node.child_frame_routing_id == 0)
426 continue;
427
428 const std::string cur_element_key =
429 GetElementKey(sender_frame_tree_node_id, node.node_id);
430 int child_frame_tree_node_id =
431 content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId(
432 sender_process_id, node.child_frame_routing_id);
433 if (child_frame_tree_node_id ==
434 content::RenderFrameHost::kNoFrameTreeNodeId) {
435 ambiguous_dom_ = true;
436 } else {
437 child_frame_tree_map[cur_element_key] = child_frame_tree_node_id;
438 }
439 }
440
455 // Schedule this in IO thread, so it doesn't conflict with future users 441 // Schedule this in IO thread, so it doesn't conflict with future users
456 // of our data structures (eg GetSerializedReport). 442 // of our data structures (eg GetSerializedReport).
457 BrowserThread::PostTask( 443 BrowserThread::PostTask(
458 BrowserThread::IO, FROM_HERE, 444 BrowserThread::IO, FROM_HERE,
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, 445 base::Bind(&ThreatDetails::AddDOMDetails, this, sender_frame_tree_node_id,
460 sender->GetFrameTreeNodeId(), 446 params, child_frame_tree_map));
461 sender->GetLastCommittedURL(), params));
462 } 447 }
463 448
464 void ThreatDetails::AddDOMDetails( 449 void ThreatDetails::AddDOMDetails(
465 const int frame_tree_node_id, 450 const int frame_tree_node_id,
466 const GURL& frame_last_committed_url, 451 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params,
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { 452 const KeyToFrameTreeIdMap& child_frame_tree_map) {
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); 453 DCHECK_CURRENTLY_ON(BrowserThread::IO);
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); 454 DVLOG(1) << "Nodes from the DOM: " << params.size();
470 455
471 // If we have already started getting redirects from history service, 456 // If we have already started getting redirects from history service,
472 // don't modify state, otherwise will invalidate the iterators. 457 // don't modify state, otherwise will invalidate the iterators.
473 if (redirects_collector_->HasStarted()) 458 if (redirects_collector_->HasStarted())
474 return; 459 return;
475 460
476 // If we have already started collecting data from the HTTP cache, don't 461 // If we have already started collecting data from the HTTP cache, don't
477 // modify our state. 462 // modify our state.
478 if (cache_collector_->HasStarted()) 463 if (cache_collector_->HasStarted())
479 return; 464 return;
480 465
481 // Exit early if there are no nodes to process. 466 // Exit early if there are no nodes to process.
482 if (params.empty()) 467 if (params.empty())
483 return; 468 return;
484 469
485 // Try to deduce the URL that the render frame was handling. First check if 470 // Copy FrameTreeNode IDs for the child frame into the combined mapping.
486 // the summary node from the renderer has a document URL. If not, try looking 471 iframe_key_to_frame_tree_id_map_.insert(child_frame_tree_map.begin(),
487 // at the last committed URL of the frame. 472 child_frame_tree_map.end());
488 GURL frame_url;
489 if (IsReportableUrl(params.back().url)) {
490 frame_url = params.back().url;
491 } else if (IsReportableUrl(frame_last_committed_url)) {
492 frame_url = frame_last_committed_url;
493 }
494
495 // If we can't figure out which URL the frame was rendering then we don't know
496 // where these elements belong in the hierarchy. The DOM will be ambiguous.
497 if (frame_url.is_empty()) {
498 ambiguous_dom_ = true;
499 }
500 473
501 // Add the urls from the DOM to |resources_|. The renderer could be sending 474 // Add the urls from the DOM to |resources_|. The renderer could be sending
502 // bogus messages, so limit the number of nodes we accept. 475 // bogus messages, so limit the number of nodes we accept.
503 // Also update |elements_| with the DOM structure. 476 // Also update |elements_| with the DOM structure.
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { 477 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; 478 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i];
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; 479 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; 480 ClientSafeBrowsingReportRequest::Resource* resource = nullptr;
508 if (!node.url.is_empty()) { 481 if (!node.url.is_empty()) {
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); 482 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children));
510 } 483 }
511 // Check for a tag_name to avoid adding the summary node to the DOM. 484 // Check for a tag_name to avoid adding the summary node to the DOM.
512 if (!node.tag_name.empty()) { 485 if (!node.tag_name.empty()) {
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, 486 AddDomElement(frame_tree_node_id, node.node_id, node.tag_name,
514 node.tag_name, node.parent_node_id, node.attributes, 487 node.parent_node_id, node.attributes, resource);
515 resource);
516 } 488 }
517 } 489 }
518 } 490 }
519 491
520 // Called from the SB Service on the IO thread, after the user has 492 // Called from the SB Service on the IO thread, after the user has
521 // closed the tab, or clicked proceed or goback. Since the user needs 493 // closed the tab, or clicked proceed or goback. Since the user needs
522 // to take an action, we expect this to be called after 494 // to take an action, we expect this to be called after
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include 495 // OnReceivedThreatDOMDetails in most cases. If not, we don't include
524 // the DOM data in our report. 496 // the DOM data in our report.
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { 497 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) {
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); 498 DCHECK_CURRENTLY_ON(BrowserThread::IO);
527 499
500 // Do a second pass over the elements and update iframe elements to have
501 // references to their children. Children may have been received from a
502 // different renderer than the iframe element.
503 for (auto& element_pair : elements_) {
504 const std::string& element_key = element_pair.first;
505 HTMLElement* element = element_pair.second.get();
506 if (base::ContainsKey(iframe_key_to_frame_tree_id_map_, element_key)) {
507 int frame_tree_id_of_iframe_renderer =
508 iframe_key_to_frame_tree_id_map_[element_key];
509 const std::unordered_set<int>& child_ids =
510 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer];
511 for (const int child_id : child_ids) {
512 element->add_child_ids(child_id);
513 }
514 }
515 }
528 did_proceed_ = did_proceed; 516 did_proceed_ = did_proceed;
529 num_visits_ = num_visit; 517 num_visits_ = num_visit;
530 std::vector<GURL> urls; 518 std::vector<GURL> urls;
531 for (ResourceMap::const_iterator it = resources_.begin(); 519 for (ResourceMap::const_iterator it = resources_.begin();
532 it != resources_.end(); ++it) { 520 it != resources_.end(); ++it) {
533 urls.push_back(GURL(it->first)); 521 urls.push_back(GURL(it->first));
534 } 522 }
535 redirects_collector_->StartHistoryCollection( 523 redirects_collector_->StartHistoryCollection(
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); 524 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this));
537 } 525 }
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 // Send the report, using the SafeBrowsingService. 581 // Send the report, using the SafeBrowsingService.
594 std::string serialized; 582 std::string serialized;
595 if (!report_->SerializeToString(&serialized)) { 583 if (!report_->SerializeToString(&serialized)) {
596 DLOG(ERROR) << "Unable to serialize the threat report."; 584 DLOG(ERROR) << "Unable to serialize the threat report.";
597 return; 585 return;
598 } 586 }
599 ui_manager_->SendSerializedThreatDetails(serialized); 587 ui_manager_->SendSerializedThreatDetails(serialized);
600 } 588 }
601 589
602 } // namespace safe_browsing 590 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « components/safe_browsing/browser/threat_details.h ('k') | components/safe_browsing/common/safebrowsing_messages.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698