| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // Implementation of the ThreatDetails class. | 5 // Implementation of the ThreatDetails class. |
| 6 | 6 |
| 7 #include "components/safe_browsing/browser/threat_details.h" | 7 #include "components/safe_browsing/browser/threat_details.h" |
| 8 | 8 |
| 9 #include <stddef.h> | 9 #include <stddef.h> |
| 10 #include <stdint.h> | 10 #include <stdint.h> |
| 11 | 11 |
| 12 #include "base/bind.h" | 12 #include "base/bind.h" |
| 13 #include "base/lazy_instance.h" | 13 #include "base/lazy_instance.h" |
| 14 #include "base/metrics/histogram_macros.h" | 14 #include "base/metrics/histogram_macros.h" |
| 15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
| 16 #include "components/history/core/browser/history_service.h" | 16 #include "components/history/core/browser/history_service.h" |
| 17 #include "components/safe_browsing/base_ui_manager.h" | 17 #include "components/safe_browsing/base_ui_manager.h" |
| 18 #include "components/safe_browsing/browser/threat_details_cache.h" | 18 #include "components/safe_browsing/browser/threat_details_cache.h" |
| 19 #include "components/safe_browsing/browser/threat_details_history.h" | 19 #include "components/safe_browsing/browser/threat_details_history.h" |
| 20 #include "components/safe_browsing/common/safebrowsing_messages.h" | 20 #include "components/safe_browsing/common/safebrowsing_messages.h" |
| 21 #include "content/public/browser/browser_thread.h" | 21 #include "content/public/browser/browser_thread.h" |
| 22 #include "content/public/browser/navigation_controller.h" | 22 #include "content/public/browser/navigation_controller.h" |
| 23 #include "content/public/browser/navigation_entry.h" | 23 #include "content/public/browser/navigation_entry.h" |
| 24 #include "content/public/browser/render_frame_host.h" | 24 #include "content/public/browser/render_frame_host.h" |
| 25 #include "content/public/browser/render_process_host.h" |
| 25 #include "content/public/browser/web_contents.h" | 26 #include "content/public/browser/web_contents.h" |
| 26 #include "net/url_request/url_request_context_getter.h" | 27 #include "net/url_request/url_request_context_getter.h" |
| 27 | 28 |
| 28 using content::BrowserThread; | 29 using content::BrowserThread; |
| 29 using content::NavigationEntry; | 30 using content::NavigationEntry; |
| 30 using content::RenderFrameHost; | 31 using content::RenderFrameHost; |
| 31 using content::WebContents; | 32 using content::WebContents; |
| 32 | 33 |
| 33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ |
| 34 // threat_dom_details.cc | 35 // threat_dom_details.cc |
| (...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 280 if (!duplicate_child) | 281 if (!duplicate_child) |
| 281 url_resource->add_child_ids(child_resource->id()); | 282 url_resource->add_child_ids(child_resource->id()); |
| 282 } | 283 } |
| 283 } | 284 } |
| 284 | 285 |
| 285 return url_resource; | 286 return url_resource; |
| 286 } | 287 } |
| 287 | 288 |
| 288 void ThreatDetails::AddDomElement( | 289 void ThreatDetails::AddDomElement( |
| 289 const int frame_tree_node_id, | 290 const int frame_tree_node_id, |
| 290 const std::string& frame_url, | |
| 291 const int element_node_id, | 291 const int element_node_id, |
| 292 const std::string& tagname, | 292 const std::string& tagname, |
| 293 const int parent_element_node_id, | 293 const int parent_element_node_id, |
| 294 const std::vector<AttributeNameValue>& attributes, | 294 const std::vector<AttributeNameValue>& attributes, |
| 295 const ClientSafeBrowsingReportRequest::Resource* resource) { | 295 const ClientSafeBrowsingReportRequest::Resource* resource) { |
| 296 // Create the element. It should not exist already since this function should | 296 // Create the element. It should not exist already since this function should |
| 297 // only be called once for each element. | 297 // only be called once for each element. |
| 298 const std::string element_key = | 298 const std::string element_key = |
| 299 GetElementKey(frame_tree_node_id, element_node_id); | 299 GetElementKey(frame_tree_node_id, element_node_id); |
| 300 HTMLElement* cur_element = FindOrCreateElement(element_key); | 300 HTMLElement* cur_element = FindOrCreateElement(element_key); |
| 301 | 301 |
| 302 // Set some basic metadata about the element. | 302 // Set some basic metadata about the element. |
| 303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | 303 const std::string tag_name_upper = base::ToUpperASCII(tagname); |
| 304 if (!tag_name_upper.empty()) { | 304 if (!tag_name_upper.empty()) { |
| 305 cur_element->set_tag(tag_name_upper); | 305 cur_element->set_tag(tag_name_upper); |
| 306 } | 306 } |
| 307 for (const AttributeNameValue& attribute : attributes) { | 307 for (const AttributeNameValue& attribute : attributes) { |
| 308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | 308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); |
| 309 attribute_pb->set_name(attribute.first); | 309 attribute_pb->set_name(attribute.first); |
| 310 attribute_pb->set_value(attribute.second); | 310 attribute_pb->set_value(attribute.second); |
| 311 } | 311 } |
| 312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | |
| 313 | 312 |
| 314 if (resource) { | 313 if (resource) { |
| 315 cur_element->set_resource_id(resource->id()); | 314 cur_element->set_resource_id(resource->id()); |
| 316 | |
| 317 // For iframes, remember that this HTML Element represents an iframe with a | |
| 318 // specific URL. Elements from a frame with this URL are children of this | |
| 319 // element. | |
| 320 if (is_frame && | |
| 321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | |
| 322 iframe_src_to_element_map_[resource->url()] = cur_element; | |
| 323 } | |
| 324 } | 315 } |
| 325 | 316 |
| 326 // Next we try to lookup the parent of the current element and add ourselves | 317 // Next we try to lookup the parent of the current element and add ourselves |
| 327 // as a child of it. | 318 // as a child of it. |
| 328 HTMLElement* parent_element = nullptr; | 319 HTMLElement* parent_element = nullptr; |
| 329 if (parent_element_node_id == 0) { | 320 if (parent_element_node_id == 0) { |
| 330 // No parent indicates that this element is at the top of the current frame. | 321 // No parent indicates that this element is at the top of the current frame. |
| 331 // This frame could be a child of an iframe in another frame, or it could be | 322 // Remember that this is a top-level element of the frame with the |
| 332 // at the root of the whole page. If we have a frame URL then we can try to | 323 // current |frame_tree_node_id|. If this element is inside an iframe, a |
| 333 // map this element to its parent. | 324 // second pass will insert this element as a child of its parent iframe. |
| 334 if (!frame_url.empty()) { | 325 frame_tree_id_to_children_map_[frame_tree_node_id].insert( |
| 335 // First, remember that this element is at the top-level of a frame with | 326 cur_element->id()); |
| 336 // our frame URL. | |
| 337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
| 338 | |
| 339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
| 340 // This means that we processed the parent iframe element earlier, so we | |
| 341 // can add ourselves as a child of that iframe. | |
| 342 // If no such iframe exists, it could be processed later, or this element | |
| 343 // is in the top-level frame and truly has no parent. | |
| 344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
| 345 parent_element = iframe_src_to_element_map_[frame_url]; | |
| 346 } | |
| 347 } | |
| 348 } else { | 327 } else { |
| 349 // We have a parent ID, so this element is just a child of something inside | 328 // We have a parent ID, so this element is just a child of something inside |
| 350 // of our current frame. We can easily lookup our parent. | 329 // of our current frame. We can easily lookup our parent. |
| 351 const std::string& parent_key = | 330 const std::string& parent_key = |
| 352 GetElementKey(frame_tree_node_id, parent_element_node_id); | 331 GetElementKey(frame_tree_node_id, parent_element_node_id); |
| 353 if (base::ContainsKey(elements_, parent_key)) { | 332 if (base::ContainsKey(elements_, parent_key)) { |
| 354 parent_element = elements_[parent_key].get(); | 333 parent_element = elements_[parent_key].get(); |
| 355 } | 334 } |
| 356 } | 335 } |
| 357 | 336 |
| 358 // If a parent element was found, add ourselves as a child, ensuring not to | 337 // If a parent element was found, add ourselves as a child, ensuring not to |
| 359 // duplicate child IDs. | 338 // duplicate child IDs. |
| 360 if (parent_element) { | 339 if (parent_element) { |
| 361 bool duplicate_child = false; | 340 bool duplicate_child = false; |
| 362 for (const int child_id : parent_element->child_ids()) { | 341 for (const int child_id : parent_element->child_ids()) { |
| 363 if (child_id == cur_element->id()) { | 342 if (child_id == cur_element->id()) { |
| 364 duplicate_child = true; | 343 duplicate_child = true; |
| 365 break; | 344 break; |
| 366 } | 345 } |
| 367 } | 346 } |
| 368 if (!duplicate_child) { | 347 if (!duplicate_child) { |
| 369 parent_element->add_child_ids(cur_element->id()); | 348 parent_element->add_child_ids(cur_element->id()); |
| 370 } | 349 } |
| 371 } | 350 } |
| 372 | |
| 373 // Finally, we need to check if the current element is the parent of some | |
| 374 // other elements that came in from another frame earlier. This only happens | |
| 375 // if we are an iframe, and our src URL exists in | |
| 376 // document_url_to_children_map_. If there is a match, then all of the | |
| 377 // children in that map belong to us. | |
| 378 if (is_frame && resource && | |
| 379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
| 380 const std::unordered_set<int>& child_ids = | |
| 381 document_url_to_children_map_[resource->url()]; | |
| 382 for (const int child_id : child_ids) { | |
| 383 cur_element->add_child_ids(child_id); | |
| 384 } | |
| 385 } | |
| 386 } | 351 } |
| 387 | 352 |
| 388 void ThreatDetails::StartCollection() { | 353 void ThreatDetails::StartCollection() { |
| 389 DVLOG(1) << "Starting to compute threat details."; | 354 DVLOG(1) << "Starting to compute threat details."; |
| 390 report_.reset(new ClientSafeBrowsingReportRequest()); | 355 report_.reset(new ClientSafeBrowsingReportRequest()); |
| 391 | 356 |
| 392 if (IsReportableUrl(resource_.url)) { | 357 if (IsReportableUrl(resource_.url)) { |
| 393 report_->set_url(resource_.url.spec()); | 358 report_->set_url(resource_.url.spec()); |
| 394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | 359 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); |
| 395 } | 360 } |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 445 // detail collection could be started once the page loads. | 410 // detail collection could be started once the page loads. |
| 446 web_contents()->SendToAllFrames( | 411 web_contents()->SendToAllFrames( |
| 447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | 412 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); |
| 448 } | 413 } |
| 449 } | 414 } |
| 450 | 415 |
| 451 // When the renderer is done, this is called. | 416 // When the renderer is done, this is called. |
| 452 void ThreatDetails::OnReceivedThreatDOMDetails( | 417 void ThreatDetails::OnReceivedThreatDOMDetails( |
| 453 content::RenderFrameHost* sender, | 418 content::RenderFrameHost* sender, |
| 454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 419 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { |
| 420 // Lookup the FrameTreeNode ID of any child frames in the list of DOM nodes. |
| 421 const int sender_process_id = sender->GetProcess()->GetID(); |
| 422 const int sender_frame_tree_node_id = sender->GetFrameTreeNodeId(); |
| 423 KeyToFrameTreeIdMap child_frame_tree_map; |
| 424 for (const SafeBrowsingHostMsg_ThreatDOMDetails_Node& node : params) { |
| 425 if (node.child_frame_routing_id == 0) |
| 426 continue; |
| 427 |
| 428 const std::string cur_element_key = |
| 429 GetElementKey(sender_frame_tree_node_id, node.node_id); |
| 430 int child_frame_tree_node_id = |
| 431 content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId( |
| 432 sender_process_id, node.child_frame_routing_id); |
| 433 if (child_frame_tree_node_id == |
| 434 content::RenderFrameHost::kNoFrameTreeNodeId) { |
| 435 ambiguous_dom_ = true; |
| 436 } else { |
| 437 child_frame_tree_map[cur_element_key] = child_frame_tree_node_id; |
| 438 } |
| 439 } |
| 440 |
| 455 // Schedule this in IO thread, so it doesn't conflict with future users | 441 // Schedule this in IO thread, so it doesn't conflict with future users |
| 456 // of our data structures (eg GetSerializedReport). | 442 // of our data structures (eg GetSerializedReport). |
| 457 BrowserThread::PostTask( | 443 BrowserThread::PostTask( |
| 458 BrowserThread::IO, FROM_HERE, | 444 BrowserThread::IO, FROM_HERE, |
| 459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | 445 base::Bind(&ThreatDetails::AddDOMDetails, this, sender_frame_tree_node_id, |
| 460 sender->GetFrameTreeNodeId(), | 446 params, child_frame_tree_map)); |
| 461 sender->GetLastCommittedURL(), params)); | |
| 462 } | 447 } |
| 463 | 448 |
| 464 void ThreatDetails::AddDOMDetails( | 449 void ThreatDetails::AddDOMDetails( |
| 465 const int frame_tree_node_id, | 450 const int frame_tree_node_id, |
| 466 const GURL& frame_last_committed_url, | 451 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params, |
| 467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 452 const KeyToFrameTreeIdMap& child_frame_tree_map) { |
| 468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 453 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| 469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | 454 DVLOG(1) << "Nodes from the DOM: " << params.size(); |
| 470 | 455 |
| 471 // If we have already started getting redirects from history service, | 456 // If we have already started getting redirects from history service, |
| 472 // don't modify state, otherwise will invalidate the iterators. | 457 // don't modify state, otherwise will invalidate the iterators. |
| 473 if (redirects_collector_->HasStarted()) | 458 if (redirects_collector_->HasStarted()) |
| 474 return; | 459 return; |
| 475 | 460 |
| 476 // If we have already started collecting data from the HTTP cache, don't | 461 // If we have already started collecting data from the HTTP cache, don't |
| 477 // modify our state. | 462 // modify our state. |
| 478 if (cache_collector_->HasStarted()) | 463 if (cache_collector_->HasStarted()) |
| 479 return; | 464 return; |
| 480 | 465 |
| 481 // Exit early if there are no nodes to process. | 466 // Exit early if there are no nodes to process. |
| 482 if (params.empty()) | 467 if (params.empty()) |
| 483 return; | 468 return; |
| 484 | 469 |
| 485 // Try to deduce the URL that the render frame was handling. First check if | 470 // Copy FrameTreeNode IDs for the child frame into the combined mapping. |
| 486 // the summary node from the renderer has a document URL. If not, try looking | 471 iframe_key_to_frame_tree_id_map_.insert(child_frame_tree_map.begin(), |
| 487 // at the last committed URL of the frame. | 472 child_frame_tree_map.end()); |
| 488 GURL frame_url; | |
| 489 if (IsReportableUrl(params.back().url)) { | |
| 490 frame_url = params.back().url; | |
| 491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
| 492 frame_url = frame_last_committed_url; | |
| 493 } | |
| 494 | |
| 495 // If we can't figure out which URL the frame was rendering then we don't know | |
| 496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
| 497 if (frame_url.is_empty()) { | |
| 498 ambiguous_dom_ = true; | |
| 499 } | |
| 500 | 473 |
| 501 // Add the urls from the DOM to |resources_|. The renderer could be sending | 474 // Add the urls from the DOM to |resources_|. The renderer could be sending |
| 502 // bogus messages, so limit the number of nodes we accept. | 475 // bogus messages, so limit the number of nodes we accept. |
| 503 // Also update |elements_| with the DOM structure. | 476 // Also update |elements_| with the DOM structure. |
| 504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | 477 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { |
| 505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | 478 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; |
| 506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | 479 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; |
| 507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | 480 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; |
| 508 if (!node.url.is_empty()) { | 481 if (!node.url.is_empty()) { |
| 509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | 482 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); |
| 510 } | 483 } |
| 511 // Check for a tag_name to avoid adding the summary node to the DOM. | 484 // Check for a tag_name to avoid adding the summary node to the DOM. |
| 512 if (!node.tag_name.empty()) { | 485 if (!node.tag_name.empty()) { |
| 513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | 486 AddDomElement(frame_tree_node_id, node.node_id, node.tag_name, |
| 514 node.tag_name, node.parent_node_id, node.attributes, | 487 node.parent_node_id, node.attributes, resource); |
| 515 resource); | |
| 516 } | 488 } |
| 517 } | 489 } |
| 518 } | 490 } |
| 519 | 491 |
| 520 // Called from the SB Service on the IO thread, after the user has | 492 // Called from the SB Service on the IO thread, after the user has |
| 521 // closed the tab, or clicked proceed or goback. Since the user needs | 493 // closed the tab, or clicked proceed or goback. Since the user needs |
| 522 // to take an action, we expect this to be called after | 494 // to take an action, we expect this to be called after |
| 523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | 495 // OnReceivedThreatDOMDetails in most cases. If not, we don't include |
| 524 // the DOM data in our report. | 496 // the DOM data in our report. |
| 525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | 497 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { |
| 526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 498 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| 527 | 499 |
| 500 // Do a second pass over the elements and update iframe elements to have |
| 501 // references to their children. Children may have been received from a |
| 502 // different renderer than the iframe element. |
| 503 for (auto& element_pair : elements_) { |
| 504 const std::string& element_key = element_pair.first; |
| 505 HTMLElement* element = element_pair.second.get(); |
| 506 if (base::ContainsKey(iframe_key_to_frame_tree_id_map_, element_key)) { |
| 507 int frame_tree_id_of_iframe_renderer = |
| 508 iframe_key_to_frame_tree_id_map_[element_key]; |
| 509 const std::unordered_set<int>& child_ids = |
| 510 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer]; |
| 511 for (const int child_id : child_ids) { |
| 512 element->add_child_ids(child_id); |
| 513 } |
| 514 } |
| 515 } |
| 528 did_proceed_ = did_proceed; | 516 did_proceed_ = did_proceed; |
| 529 num_visits_ = num_visit; | 517 num_visits_ = num_visit; |
| 530 std::vector<GURL> urls; | 518 std::vector<GURL> urls; |
| 531 for (ResourceMap::const_iterator it = resources_.begin(); | 519 for (ResourceMap::const_iterator it = resources_.begin(); |
| 532 it != resources_.end(); ++it) { | 520 it != resources_.end(); ++it) { |
| 533 urls.push_back(GURL(it->first)); | 521 urls.push_back(GURL(it->first)); |
| 534 } | 522 } |
| 535 redirects_collector_->StartHistoryCollection( | 523 redirects_collector_->StartHistoryCollection( |
| 536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | 524 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); |
| 537 } | 525 } |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 593 // Send the report, using the SafeBrowsingService. | 581 // Send the report, using the SafeBrowsingService. |
| 594 std::string serialized; | 582 std::string serialized; |
| 595 if (!report_->SerializeToString(&serialized)) { | 583 if (!report_->SerializeToString(&serialized)) { |
| 596 DLOG(ERROR) << "Unable to serialize the threat report."; | 584 DLOG(ERROR) << "Unable to serialize the threat report."; |
| 597 return; | 585 return; |
| 598 } | 586 } |
| 599 ui_manager_->SendSerializedThreatDetails(serialized); | 587 ui_manager_->SendSerializedThreatDetails(serialized); |
| 600 } | 588 } |
| 601 | 589 |
| 602 } // namespace safe_browsing | 590 } // namespace safe_browsing |
| OLD | NEW |