OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Implementation of the ThreatDetails class. | 5 // Implementation of the ThreatDetails class. |
6 | 6 |
7 #include "components/safe_browsing/browser/threat_details.h" | 7 #include "components/safe_browsing/browser/threat_details.h" |
8 | 8 |
9 #include <stddef.h> | 9 #include <stddef.h> |
10 #include <stdint.h> | 10 #include <stdint.h> |
11 | 11 |
12 #include "base/bind.h" | 12 #include "base/bind.h" |
13 #include "base/lazy_instance.h" | 13 #include "base/lazy_instance.h" |
14 #include "base/metrics/histogram_macros.h" | 14 #include "base/metrics/histogram_macros.h" |
15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
16 #include "components/history/core/browser/history_service.h" | 16 #include "components/history/core/browser/history_service.h" |
17 #include "components/safe_browsing/base_ui_manager.h" | 17 #include "components/safe_browsing/base_ui_manager.h" |
18 #include "components/safe_browsing/browser/threat_details_cache.h" | 18 #include "components/safe_browsing/browser/threat_details_cache.h" |
19 #include "components/safe_browsing/browser/threat_details_history.h" | 19 #include "components/safe_browsing/browser/threat_details_history.h" |
20 #include "components/safe_browsing/common/safebrowsing_messages.h" | 20 #include "components/safe_browsing/common/safebrowsing_messages.h" |
21 #include "content/public/browser/browser_thread.h" | 21 #include "content/public/browser/browser_thread.h" |
22 #include "content/public/browser/navigation_controller.h" | 22 #include "content/public/browser/navigation_controller.h" |
23 #include "content/public/browser/navigation_entry.h" | 23 #include "content/public/browser/navigation_entry.h" |
24 #include "content/public/browser/render_frame_host.h" | 24 #include "content/public/browser/render_frame_host.h" |
| 25 #include "content/public/browser/render_process_host.h" |
25 #include "content/public/browser/web_contents.h" | 26 #include "content/public/browser/web_contents.h" |
26 #include "net/url_request/url_request_context_getter.h" | 27 #include "net/url_request/url_request_context_getter.h" |
27 | 28 |
28 using content::BrowserThread; | 29 using content::BrowserThread; |
29 using content::NavigationEntry; | 30 using content::NavigationEntry; |
30 using content::RenderFrameHost; | 31 using content::RenderFrameHost; |
31 using content::WebContents; | 32 using content::WebContents; |
32 | 33 |
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ |
34 // threat_dom_details.cc | 35 // threat_dom_details.cc |
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
280 if (!duplicate_child) | 281 if (!duplicate_child) |
281 url_resource->add_child_ids(child_resource->id()); | 282 url_resource->add_child_ids(child_resource->id()); |
282 } | 283 } |
283 } | 284 } |
284 | 285 |
285 return url_resource; | 286 return url_resource; |
286 } | 287 } |
287 | 288 |
288 void ThreatDetails::AddDomElement( | 289 void ThreatDetails::AddDomElement( |
289 const int frame_tree_node_id, | 290 const int frame_tree_node_id, |
290 const std::string& frame_url, | |
291 const int element_node_id, | 291 const int element_node_id, |
292 const std::string& tagname, | 292 const std::string& tagname, |
293 const int parent_element_node_id, | 293 const int parent_element_node_id, |
294 const std::vector<AttributeNameValue>& attributes, | 294 const std::vector<AttributeNameValue>& attributes, |
295 const ClientSafeBrowsingReportRequest::Resource* resource) { | 295 const ClientSafeBrowsingReportRequest::Resource* resource) { |
296 // Create the element. It should not exist already since this function should | 296 // Create the element. It should not exist already since this function should |
297 // only be called once for each element. | 297 // only be called once for each element. |
298 const std::string element_key = | 298 const std::string element_key = |
299 GetElementKey(frame_tree_node_id, element_node_id); | 299 GetElementKey(frame_tree_node_id, element_node_id); |
300 HTMLElement* cur_element = FindOrCreateElement(element_key); | 300 HTMLElement* cur_element = FindOrCreateElement(element_key); |
301 | 301 |
302 // Set some basic metadata about the element. | 302 // Set some basic metadata about the element. |
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | 303 const std::string tag_name_upper = base::ToUpperASCII(tagname); |
304 if (!tag_name_upper.empty()) { | 304 if (!tag_name_upper.empty()) { |
305 cur_element->set_tag(tag_name_upper); | 305 cur_element->set_tag(tag_name_upper); |
306 } | 306 } |
307 for (const AttributeNameValue& attribute : attributes) { | 307 for (const AttributeNameValue& attribute : attributes) { |
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | 308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); |
309 attribute_pb->set_name(attribute.first); | 309 attribute_pb->set_name(attribute.first); |
310 attribute_pb->set_value(attribute.second); | 310 attribute_pb->set_value(attribute.second); |
311 } | 311 } |
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | |
313 | 312 |
314 if (resource) { | 313 if (resource) { |
315 cur_element->set_resource_id(resource->id()); | 314 cur_element->set_resource_id(resource->id()); |
316 | |
317 // For iframes, remember that this HTML Element represents an iframe with a | |
318 // specific URL. Elements from a frame with this URL are children of this | |
319 // element. | |
320 if (is_frame && | |
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | |
322 iframe_src_to_element_map_[resource->url()] = cur_element; | |
323 } | |
324 } | 315 } |
325 | 316 |
326 // Next we try to lookup the parent of the current element and add ourselves | 317 // Next we try to lookup the parent of the current element and add ourselves |
327 // as a child of it. | 318 // as a child of it. |
328 HTMLElement* parent_element = nullptr; | 319 HTMLElement* parent_element = nullptr; |
329 if (parent_element_node_id == 0) { | 320 if (parent_element_node_id == 0) { |
330 // No parent indicates that this element is at the top of the current frame. | 321 // No parent indicates that this element is at the top of the current frame. |
331 // This frame could be a child of an iframe in another frame, or it could be | 322 // Remember that this is a top-level element of the frame with the |
332 // at the root of the whole page. If we have a frame URL then we can try to | 323 // current |frame_tree_node_id|. If this element is inside an iframe, a |
333 // map this element to its parent. | 324 // second pass will insert this element as a child of its parent iframe. |
334 if (!frame_url.empty()) { | 325 frame_tree_id_to_children_map_[frame_tree_node_id].insert( |
335 // First, remember that this element is at the top-level of a frame with | 326 cur_element->id()); |
336 // our frame URL. | |
337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
338 | |
339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
340 // This means that we processed the parent iframe element earlier, so we | |
341 // can add ourselves as a child of that iframe. | |
342 // If no such iframe exists, it could be processed later, or this element | |
343 // is in the top-level frame and truly has no parent. | |
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
345 parent_element = iframe_src_to_element_map_[frame_url]; | |
346 } | |
347 } | |
348 } else { | 327 } else { |
349 // We have a parent ID, so this element is just a child of something inside | 328 // We have a parent ID, so this element is just a child of something inside |
350 // of our current frame. We can easily lookup our parent. | 329 // of our current frame. We can easily lookup our parent. |
351 const std::string& parent_key = | 330 const std::string& parent_key = |
352 GetElementKey(frame_tree_node_id, parent_element_node_id); | 331 GetElementKey(frame_tree_node_id, parent_element_node_id); |
353 if (base::ContainsKey(elements_, parent_key)) { | 332 if (base::ContainsKey(elements_, parent_key)) { |
354 parent_element = elements_[parent_key].get(); | 333 parent_element = elements_[parent_key].get(); |
355 } | 334 } |
356 } | 335 } |
357 | 336 |
358 // If a parent element was found, add ourselves as a child, ensuring not to | 337 // If a parent element was found, add ourselves as a child, ensuring not to |
359 // duplicate child IDs. | 338 // duplicate child IDs. |
360 if (parent_element) { | 339 if (parent_element) { |
361 bool duplicate_child = false; | 340 bool duplicate_child = false; |
362 for (const int child_id : parent_element->child_ids()) { | 341 for (const int child_id : parent_element->child_ids()) { |
363 if (child_id == cur_element->id()) { | 342 if (child_id == cur_element->id()) { |
364 duplicate_child = true; | 343 duplicate_child = true; |
365 break; | 344 break; |
366 } | 345 } |
367 } | 346 } |
368 if (!duplicate_child) { | 347 if (!duplicate_child) { |
369 parent_element->add_child_ids(cur_element->id()); | 348 parent_element->add_child_ids(cur_element->id()); |
370 } | 349 } |
371 } | 350 } |
372 | |
373 // Finally, we need to check if the current element is the parent of some | |
374 // other elements that came in from another frame earlier. This only happens | |
375 // if we are an iframe, and our src URL exists in | |
376 // document_url_to_children_map_. If there is a match, then all of the | |
377 // children in that map belong to us. | |
378 if (is_frame && resource && | |
379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
380 const std::unordered_set<int>& child_ids = | |
381 document_url_to_children_map_[resource->url()]; | |
382 for (const int child_id : child_ids) { | |
383 cur_element->add_child_ids(child_id); | |
384 } | |
385 } | |
386 } | 351 } |
387 | 352 |
388 void ThreatDetails::StartCollection() { | 353 void ThreatDetails::StartCollection() { |
389 DVLOG(1) << "Starting to compute threat details."; | 354 DVLOG(1) << "Starting to compute threat details."; |
390 report_.reset(new ClientSafeBrowsingReportRequest()); | 355 report_.reset(new ClientSafeBrowsingReportRequest()); |
391 | 356 |
392 if (IsReportableUrl(resource_.url)) { | 357 if (IsReportableUrl(resource_.url)) { |
393 report_->set_url(resource_.url.spec()); | 358 report_->set_url(resource_.url.spec()); |
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | 359 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); |
395 } | 360 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
445 // detail collection could be started once the page loads. | 410 // detail collection could be started once the page loads. |
446 web_contents()->SendToAllFrames( | 411 web_contents()->SendToAllFrames( |
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | 412 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); |
448 } | 413 } |
449 } | 414 } |
450 | 415 |
451 // When the renderer is done, this is called. | 416 // When the renderer is done, this is called. |
452 void ThreatDetails::OnReceivedThreatDOMDetails( | 417 void ThreatDetails::OnReceivedThreatDOMDetails( |
453 content::RenderFrameHost* sender, | 418 content::RenderFrameHost* sender, |
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 419 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { |
| 420 // Lookup the FrameTreeNode ID of any child frames in the list of DOM nodes. |
| 421 const int sender_process_id = sender->GetProcess()->GetID(); |
| 422 const int sender_frame_tree_node_id = sender->GetFrameTreeNodeId(); |
| 423 KeyToFrameTreeIdMap child_frame_tree_map; |
| 424 for (const SafeBrowsingHostMsg_ThreatDOMDetails_Node& node : params) { |
| 425 if (node.child_frame_routing_id == 0) |
| 426 continue; |
| 427 |
| 428 const std::string cur_element_key = |
| 429 GetElementKey(sender_frame_tree_node_id, node.node_id); |
| 430 int child_frame_tree_node_id = |
| 431 content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId( |
| 432 sender_process_id, node.child_frame_routing_id); |
| 433 if (child_frame_tree_node_id == |
| 434 content::RenderFrameHost::kNoFrameTreeNodeId) { |
| 435 ambiguous_dom_ = true; |
| 436 } else { |
| 437 child_frame_tree_map[cur_element_key] = child_frame_tree_node_id; |
| 438 } |
| 439 } |
| 440 |
455 // Schedule this in IO thread, so it doesn't conflict with future users | 441 // Schedule this in IO thread, so it doesn't conflict with future users |
456 // of our data structures (eg GetSerializedReport). | 442 // of our data structures (eg GetSerializedReport). |
457 BrowserThread::PostTask( | 443 BrowserThread::PostTask( |
458 BrowserThread::IO, FROM_HERE, | 444 BrowserThread::IO, FROM_HERE, |
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | 445 base::Bind(&ThreatDetails::AddDOMDetails, this, sender_frame_tree_node_id, |
460 sender->GetFrameTreeNodeId(), | 446 params, child_frame_tree_map)); |
461 sender->GetLastCommittedURL(), params)); | |
462 } | 447 } |
463 | 448 |
464 void ThreatDetails::AddDOMDetails( | 449 void ThreatDetails::AddDOMDetails( |
465 const int frame_tree_node_id, | 450 const int frame_tree_node_id, |
466 const GURL& frame_last_committed_url, | 451 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params, |
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 452 const KeyToFrameTreeIdMap& child_frame_tree_map) { |
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 453 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | 454 DVLOG(1) << "Nodes from the DOM: " << params.size(); |
470 | 455 |
471 // If we have already started getting redirects from history service, | 456 // If we have already started getting redirects from history service, |
472 // don't modify state, otherwise will invalidate the iterators. | 457 // don't modify state, otherwise will invalidate the iterators. |
473 if (redirects_collector_->HasStarted()) | 458 if (redirects_collector_->HasStarted()) |
474 return; | 459 return; |
475 | 460 |
476 // If we have already started collecting data from the HTTP cache, don't | 461 // If we have already started collecting data from the HTTP cache, don't |
477 // modify our state. | 462 // modify our state. |
478 if (cache_collector_->HasStarted()) | 463 if (cache_collector_->HasStarted()) |
479 return; | 464 return; |
480 | 465 |
481 // Exit early if there are no nodes to process. | 466 // Exit early if there are no nodes to process. |
482 if (params.empty()) | 467 if (params.empty()) |
483 return; | 468 return; |
484 | 469 |
485 // Try to deduce the URL that the render frame was handling. First check if | 470 // Copy FrameTreeNode IDs for the child frame into the combined mapping. |
486 // the summary node from the renderer has a document URL. If not, try looking | 471 iframe_key_to_frame_tree_id_map_.insert(child_frame_tree_map.begin(), |
487 // at the last committed URL of the frame. | 472 child_frame_tree_map.end()); |
488 GURL frame_url; | |
489 if (IsReportableUrl(params.back().url)) { | |
490 frame_url = params.back().url; | |
491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
492 frame_url = frame_last_committed_url; | |
493 } | |
494 | |
495 // If we can't figure out which URL the frame was rendering then we don't know | |
496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
497 if (frame_url.is_empty()) { | |
498 ambiguous_dom_ = true; | |
499 } | |
500 | 473 |
501 // Add the urls from the DOM to |resources_|. The renderer could be sending | 474 // Add the urls from the DOM to |resources_|. The renderer could be sending |
502 // bogus messages, so limit the number of nodes we accept. | 475 // bogus messages, so limit the number of nodes we accept. |
503 // Also update |elements_| with the DOM structure. | 476 // Also update |elements_| with the DOM structure. |
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | 477 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { |
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | 478 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; |
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | 479 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; |
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | 480 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; |
508 if (!node.url.is_empty()) { | 481 if (!node.url.is_empty()) { |
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | 482 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); |
510 } | 483 } |
511 // Check for a tag_name to avoid adding the summary node to the DOM. | 484 // Check for a tag_name to avoid adding the summary node to the DOM. |
512 if (!node.tag_name.empty()) { | 485 if (!node.tag_name.empty()) { |
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | 486 AddDomElement(frame_tree_node_id, node.node_id, node.tag_name, |
514 node.tag_name, node.parent_node_id, node.attributes, | 487 node.parent_node_id, node.attributes, resource); |
515 resource); | |
516 } | 488 } |
517 } | 489 } |
518 } | 490 } |
519 | 491 |
520 // Called from the SB Service on the IO thread, after the user has | 492 // Called from the SB Service on the IO thread, after the user has |
521 // closed the tab, or clicked proceed or goback. Since the user needs | 493 // closed the tab, or clicked proceed or goback. Since the user needs |
522 // to take an action, we expect this to be called after | 494 // to take an action, we expect this to be called after |
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | 495 // OnReceivedThreatDOMDetails in most cases. If not, we don't include |
524 // the DOM data in our report. | 496 // the DOM data in our report. |
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | 497 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { |
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 498 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
527 | 499 |
| 500 // Do a second pass over the elements and update iframe elements to have |
| 501 // references to their children. Children may have been received from a |
| 502 // different renderer than the iframe element. |
| 503 for (auto& element_pair : elements_) { |
| 504 const std::string& element_key = element_pair.first; |
| 505 HTMLElement* element = element_pair.second.get(); |
| 506 if (base::ContainsKey(iframe_key_to_frame_tree_id_map_, element_key)) { |
| 507 int frame_tree_id_of_iframe_renderer = |
| 508 iframe_key_to_frame_tree_id_map_[element_key]; |
| 509 const std::unordered_set<int>& child_ids = |
| 510 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer]; |
| 511 for (const int child_id : child_ids) { |
| 512 element->add_child_ids(child_id); |
| 513 } |
| 514 } |
| 515 } |
528 did_proceed_ = did_proceed; | 516 did_proceed_ = did_proceed; |
529 num_visits_ = num_visit; | 517 num_visits_ = num_visit; |
530 std::vector<GURL> urls; | 518 std::vector<GURL> urls; |
531 for (ResourceMap::const_iterator it = resources_.begin(); | 519 for (ResourceMap::const_iterator it = resources_.begin(); |
532 it != resources_.end(); ++it) { | 520 it != resources_.end(); ++it) { |
533 urls.push_back(GURL(it->first)); | 521 urls.push_back(GURL(it->first)); |
534 } | 522 } |
535 redirects_collector_->StartHistoryCollection( | 523 redirects_collector_->StartHistoryCollection( |
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | 524 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); |
537 } | 525 } |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
593 // Send the report, using the SafeBrowsingService. | 581 // Send the report, using the SafeBrowsingService. |
594 std::string serialized; | 582 std::string serialized; |
595 if (!report_->SerializeToString(&serialized)) { | 583 if (!report_->SerializeToString(&serialized)) { |
596 DLOG(ERROR) << "Unable to serialize the threat report."; | 584 DLOG(ERROR) << "Unable to serialize the threat report."; |
597 return; | 585 return; |
598 } | 586 } |
599 ui_manager_->SendSerializedThreatDetails(serialized); | 587 ui_manager_->SendSerializedThreatDetails(serialized); |
600 } | 588 } |
601 | 589 |
602 } // namespace safe_browsing | 590 } // namespace safe_browsing |
OLD | NEW |