OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Implementation of the ThreatDetails class. | 5 // Implementation of the ThreatDetails class. |
6 | 6 |
7 #include "components/safe_browsing/browser/threat_details.h" | 7 #include "components/safe_browsing/browser/threat_details.h" |
8 | 8 |
9 #include <stddef.h> | 9 #include <stddef.h> |
10 #include <stdint.h> | 10 #include <stdint.h> |
11 | 11 |
12 #include "base/bind.h" | 12 #include "base/bind.h" |
13 #include "base/lazy_instance.h" | 13 #include "base/lazy_instance.h" |
14 #include "base/metrics/histogram_macros.h" | 14 #include "base/metrics/histogram_macros.h" |
15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
16 #include "components/history/core/browser/history_service.h" | 16 #include "components/history/core/browser/history_service.h" |
17 #include "components/safe_browsing/base_ui_manager.h" | 17 #include "components/safe_browsing/base_ui_manager.h" |
18 #include "components/safe_browsing/browser/threat_details_cache.h" | 18 #include "components/safe_browsing/browser/threat_details_cache.h" |
19 #include "components/safe_browsing/browser/threat_details_history.h" | 19 #include "components/safe_browsing/browser/threat_details_history.h" |
20 #include "components/safe_browsing/common/safebrowsing_messages.h" | 20 #include "components/safe_browsing/common/safebrowsing_messages.h" |
21 #include "content/public/browser/browser_thread.h" | 21 #include "content/public/browser/browser_thread.h" |
22 #include "content/public/browser/navigation_controller.h" | 22 #include "content/public/browser/navigation_controller.h" |
23 #include "content/public/browser/navigation_entry.h" | 23 #include "content/public/browser/navigation_entry.h" |
24 #include "content/public/browser/render_frame_host.h" | 24 #include "content/public/browser/render_frame_host.h" |
25 #include "content/public/browser/render_process_host.h" | |
25 #include "content/public/browser/web_contents.h" | 26 #include "content/public/browser/web_contents.h" |
26 #include "net/url_request/url_request_context_getter.h" | 27 #include "net/url_request/url_request_context_getter.h" |
27 | 28 |
28 using content::BrowserThread; | 29 using content::BrowserThread; |
29 using content::NavigationEntry; | 30 using content::NavigationEntry; |
30 using content::RenderFrameHost; | 31 using content::RenderFrameHost; |
31 using content::WebContents; | 32 using content::WebContents; |
32 | 33 |
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ |
34 // threat_dom_details.cc | 35 // threat_dom_details.cc |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
279 } | 280 } |
280 if (!duplicate_child) | 281 if (!duplicate_child) |
281 url_resource->add_child_ids(child_resource->id()); | 282 url_resource->add_child_ids(child_resource->id()); |
282 } | 283 } |
283 } | 284 } |
284 | 285 |
285 return url_resource; | 286 return url_resource; |
286 } | 287 } |
287 | 288 |
288 void ThreatDetails::AddDomElement( | 289 void ThreatDetails::AddDomElement( |
290 const int process_id, | |
289 const int frame_tree_node_id, | 291 const int frame_tree_node_id, |
290 const std::string& frame_url, | 292 const int child_frame_routing_id, |
Charlie Reis
2017/05/10 22:17:49
Both process_id and child_frame_routing_id look un
lpz
2017/05/12 13:53:16
Yep, and some cascading param cleanup from this in
| |
291 const int element_node_id, | 293 const int element_node_id, |
292 const std::string& tagname, | 294 const std::string& tagname, |
293 const int parent_element_node_id, | 295 const int parent_element_node_id, |
294 const std::vector<AttributeNameValue>& attributes, | 296 const std::vector<AttributeNameValue>& attributes, |
295 const ClientSafeBrowsingReportRequest::Resource* resource) { | 297 const ClientSafeBrowsingReportRequest::Resource* resource) { |
296 // Create the element. It should not exist already since this function should | 298 // Create the element. It should not exist already since this function should |
297 // only be called once for each element. | 299 // only be called once for each element. |
298 const std::string element_key = | 300 const std::string element_key = |
299 GetElementKey(frame_tree_node_id, element_node_id); | 301 GetElementKey(frame_tree_node_id, element_node_id); |
300 HTMLElement* cur_element = FindOrCreateElement(element_key); | 302 HTMLElement* cur_element = FindOrCreateElement(element_key); |
301 | 303 |
302 // Set some basic metadata about the element. | 304 // Set some basic metadata about the element. |
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | 305 const std::string tag_name_upper = base::ToUpperASCII(tagname); |
304 if (!tag_name_upper.empty()) { | 306 if (!tag_name_upper.empty()) { |
305 cur_element->set_tag(tag_name_upper); | 307 cur_element->set_tag(tag_name_upper); |
306 } | 308 } |
307 for (const AttributeNameValue& attribute : attributes) { | 309 for (const AttributeNameValue& attribute : attributes) { |
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | 310 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); |
309 attribute_pb->set_name(attribute.first); | 311 attribute_pb->set_name(attribute.first); |
310 attribute_pb->set_value(attribute.second); | 312 attribute_pb->set_value(attribute.second); |
311 } | 313 } |
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | |
313 | 314 |
314 if (resource) { | 315 if (resource) { |
315 cur_element->set_resource_id(resource->id()); | 316 cur_element->set_resource_id(resource->id()); |
316 | |
317 // For iframes, remember that this HTML Element represents an iframe with a | |
318 // specific URL. Elements from a frame with this URL are children of this | |
319 // element. | |
320 if (is_frame && | |
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | |
322 iframe_src_to_element_map_[resource->url()] = cur_element; | |
323 } | |
324 } | 317 } |
325 | 318 |
326 // Next we try to lookup the parent of the current element and add ourselves | 319 // Next we try to lookup the parent of the current element and add ourselves |
327 // as a child of it. | 320 // as a child of it. |
328 HTMLElement* parent_element = nullptr; | 321 HTMLElement* parent_element = nullptr; |
329 if (parent_element_node_id == 0) { | 322 if (parent_element_node_id == 0) { |
330 // No parent indicates that this element is at the top of the current frame. | 323 // No parent indicates that this element is at the top of the current frame. |
331 // This frame could be a child of an iframe in another frame, or it could be | 324 // Remember that this is a top-level element of the frame with the |
332 // at the root of the whole page. If we have a frame URL then we can try to | 325 // current |frame_tree_node_id|. If this element is inside an iframe, a |
333 // map this element to its parent. | 326 // second pass will insert this element as a child of its parent iframe. |
334 if (!frame_url.empty()) { | 327 frame_tree_id_to_children_map_[frame_tree_node_id].insert( |
335 // First, remember that this element is at the top-level of a frame with | 328 cur_element->id()); |
336 // our frame URL. | |
337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
338 | |
339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
340 // This means that we processed the parent iframe element earlier, so we | |
341 // can add ourselves as a child of that iframe. | |
342 // If no such iframe exists, it could be processed later, or this element | |
343 // is in the top-level frame and truly has no parent. | |
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
345 parent_element = iframe_src_to_element_map_[frame_url]; | |
346 } | |
347 } | |
348 } else { | 329 } else { |
349 // We have a parent ID, so this element is just a child of something inside | 330 // We have a parent ID, so this element is just a child of something inside |
350 // of our current frame. We can easily lookup our parent. | 331 // of our current frame. We can easily lookup our parent. |
351 const std::string& parent_key = | 332 const std::string& parent_key = |
352 GetElementKey(frame_tree_node_id, parent_element_node_id); | 333 GetElementKey(frame_tree_node_id, parent_element_node_id); |
353 if (base::ContainsKey(elements_, parent_key)) { | 334 if (base::ContainsKey(elements_, parent_key)) { |
354 parent_element = elements_[parent_key].get(); | 335 parent_element = elements_[parent_key].get(); |
355 } | 336 } |
356 } | 337 } |
357 | 338 |
358 // If a parent element was found, add ourselves as a child, ensuring not to | 339 // If a parent element was found, add ourselves as a child, ensuring not to |
359 // duplicate child IDs. | 340 // duplicate child IDs. |
360 if (parent_element) { | 341 if (parent_element) { |
361 bool duplicate_child = false; | 342 bool duplicate_child = false; |
362 for (const int child_id : parent_element->child_ids()) { | 343 for (const int child_id : parent_element->child_ids()) { |
363 if (child_id == cur_element->id()) { | 344 if (child_id == cur_element->id()) { |
364 duplicate_child = true; | 345 duplicate_child = true; |
365 break; | 346 break; |
366 } | 347 } |
367 } | 348 } |
368 if (!duplicate_child) { | 349 if (!duplicate_child) { |
369 parent_element->add_child_ids(cur_element->id()); | 350 parent_element->add_child_ids(cur_element->id()); |
370 } | 351 } |
371 } | 352 } |
372 | |
373 // Finally, we need to check if the current element is the parent of some | |
374 // other elements that came in from another frame earlier. This only happens | |
375 // if we are an iframe, and our src URL exists in | |
376 // document_url_to_children_map_. If there is a match, then all of the | |
377 // children in that map belong to us. | |
378 if (is_frame && resource && | |
379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
380 const std::unordered_set<int>& child_ids = | |
381 document_url_to_children_map_[resource->url()]; | |
382 for (const int child_id : child_ids) { | |
383 cur_element->add_child_ids(child_id); | |
384 } | |
385 } | |
386 } | 353 } |
387 | 354 |
388 void ThreatDetails::StartCollection() { | 355 void ThreatDetails::StartCollection() { |
389 DVLOG(1) << "Starting to compute threat details."; | 356 DVLOG(1) << "Starting to compute threat details."; |
390 report_.reset(new ClientSafeBrowsingReportRequest()); | 357 report_.reset(new ClientSafeBrowsingReportRequest()); |
391 | 358 |
392 if (IsReportableUrl(resource_.url)) { | 359 if (IsReportableUrl(resource_.url)) { |
393 report_->set_url(resource_.url.spec()); | 360 report_->set_url(resource_.url.spec()); |
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | 361 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); |
395 } | 362 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
445 // detail collection could be started once the page loads. | 412 // detail collection could be started once the page loads. |
446 web_contents()->SendToAllFrames( | 413 web_contents()->SendToAllFrames( |
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | 414 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); |
448 } | 415 } |
449 } | 416 } |
450 | 417 |
451 // When the renderer is done, this is called. | 418 // When the renderer is done, this is called. |
452 void ThreatDetails::OnReceivedThreatDOMDetails( | 419 void ThreatDetails::OnReceivedThreatDOMDetails( |
453 content::RenderFrameHost* sender, | 420 content::RenderFrameHost* sender, |
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 421 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { |
422 // Lookup the FrameTreeNodeId of any child frames in the list of DOM nodes. | |
Charlie Reis
2017/05/10 22:17:49
nit: FrameTreeNode ID
lpz
2017/05/12 13:53:16
Done.
| |
423 const int sender_process_id = sender->GetProcess()->GetID(); | |
424 const int sender_frame_tree_node_id = sender->GetFrameTreeNodeId(); | |
425 KeyToFrameTreeIdMap child_frame_tree_map; | |
426 for (const SafeBrowsingHostMsg_ThreatDOMDetails_Node& node : params) { | |
427 if (node.child_frame_routing_id == 0) | |
428 continue; | |
429 | |
430 const std::string cur_element_key = | |
431 GetElementKey(sender_frame_tree_node_id, node.node_id); | |
432 RenderFrameHost* rfh = | |
433 content::RenderFrameHost::GetRenderFrameHostForRoutingId( | |
434 sender_process_id, node.child_frame_routing_id); | |
435 if (!rfh) { | |
436 ambiguous_dom_ = true; | |
437 } else { | |
438 child_frame_tree_map[cur_element_key] = rfh->GetFrameTreeNodeId(); | |
439 } | |
440 } | |
441 | |
455 // Schedule this in IO thread, so it doesn't conflict with future users | 442 // Schedule this in IO thread, so it doesn't conflict with future users |
456 // of our data structures (eg GetSerializedReport). | 443 // of our data structures (eg GetSerializedReport). |
457 BrowserThread::PostTask( | 444 BrowserThread::PostTask( |
458 BrowserThread::IO, FROM_HERE, | 445 BrowserThread::IO, FROM_HERE, |
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | 446 base::Bind(&ThreatDetails::AddDOMDetails, this, |
460 sender->GetFrameTreeNodeId(), | 447 sender->GetProcess()->GetID(), sender->GetFrameTreeNodeId(), |
461 sender->GetLastCommittedURL(), params)); | 448 sender->GetLastCommittedURL(), params, child_frame_tree_map)); |
462 } | 449 } |
463 | 450 |
464 void ThreatDetails::AddDOMDetails( | 451 void ThreatDetails::AddDOMDetails( |
452 const int process_id, | |
465 const int frame_tree_node_id, | 453 const int frame_tree_node_id, |
466 const GURL& frame_last_committed_url, | 454 const GURL& frame_last_committed_url, |
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 455 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params, |
456 const KeyToFrameTreeIdMap& child_frame_tree_map) { | |
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 457 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | 458 DVLOG(1) << "Nodes from the DOM: " << params.size(); |
470 | 459 |
471 // If we have already started getting redirects from history service, | 460 // If we have already started getting redirects from history service, |
472 // don't modify state, otherwise will invalidate the iterators. | 461 // don't modify state, otherwise will invalidate the iterators. |
473 if (redirects_collector_->HasStarted()) | 462 if (redirects_collector_->HasStarted()) |
474 return; | 463 return; |
475 | 464 |
476 // If we have already started collecting data from the HTTP cache, don't | 465 // If we have already started collecting data from the HTTP cache, don't |
477 // modify our state. | 466 // modify our state. |
478 if (cache_collector_->HasStarted()) | 467 if (cache_collector_->HasStarted()) |
479 return; | 468 return; |
480 | 469 |
481 // Exit early if there are no nodes to process. | 470 // Exit early if there are no nodes to process. |
482 if (params.empty()) | 471 if (params.empty()) |
483 return; | 472 return; |
484 | 473 |
485 // Try to deduce the URL that the render frame was handling. First check if | 474 // Copy FrameTreeNode IDs for the child frame into the combined mapping. |
486 // the summary node from the renderer has a document URL. If not, try looking | 475 iframe_key_to_frame_tree_id_map_.insert(child_frame_tree_map.begin(), |
487 // at the last committed URL of the frame. | 476 child_frame_tree_map.end()); |
488 GURL frame_url; | |
489 if (IsReportableUrl(params.back().url)) { | |
490 frame_url = params.back().url; | |
491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
492 frame_url = frame_last_committed_url; | |
493 } | |
494 | |
495 // If we can't figure out which URL the frame was rendering then we don't know | |
496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
497 if (frame_url.is_empty()) { | |
498 ambiguous_dom_ = true; | |
499 } | |
500 | 477 |
501 // Add the urls from the DOM to |resources_|. The renderer could be sending | 478 // Add the urls from the DOM to |resources_|. The renderer could be sending |
502 // bogus messages, so limit the number of nodes we accept. | 479 // bogus messages, so limit the number of nodes we accept. |
503 // Also update |elements_| with the DOM structure. | 480 // Also update |elements_| with the DOM structure. |
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | 481 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { |
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | 482 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; |
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | 483 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; |
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | 484 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; |
508 if (!node.url.is_empty()) { | 485 if (!node.url.is_empty()) { |
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | 486 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); |
510 } | 487 } |
511 // Check for a tag_name to avoid adding the summary node to the DOM. | 488 // Check for a tag_name to avoid adding the summary node to the DOM. |
512 if (!node.tag_name.empty()) { | 489 if (!node.tag_name.empty()) { |
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | 490 AddDomElement(process_id, frame_tree_node_id, node.child_frame_routing_id, |
514 node.tag_name, node.parent_node_id, node.attributes, | 491 node.node_id, node.tag_name, node.parent_node_id, |
515 resource); | 492 node.attributes, resource); |
516 } | 493 } |
517 } | 494 } |
518 } | 495 } |
519 | 496 |
520 // Called from the SB Service on the IO thread, after the user has | 497 // Called from the SB Service on the IO thread, after the user has |
521 // closed the tab, or clicked proceed or goback. Since the user needs | 498 // closed the tab, or clicked proceed or goback. Since the user needs |
522 // to take an action, we expect this to be called after | 499 // to take an action, we expect this to be called after |
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | 500 // OnReceivedThreatDOMDetails in most cases. If not, we don't include |
524 // the DOM data in our report. | 501 // the DOM data in our report. |
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | 502 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { |
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 503 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
527 | 504 |
505 // Do a second pass over the elements and update iframe elements to have | |
Charlie Reis
2017/05/10 22:17:49
Side note: I don't fully understand this second pa
lpz
2017/05/12 13:53:16
Yes, this is tested by ThreatDetailsTest.ThreatDOM
Charlie Reis
2017/05/12 21:40:50
Acknowledged.
| |
506 // references to their children. Children may have been received from a | |
507 // different renderer than the iframe element. | |
508 for (auto& element_pair : elements_) { | |
509 const std::string& element_key = element_pair.first; | |
510 HTMLElement* element = element_pair.second.get(); | |
511 if (base::ContainsKey(iframe_key_to_frame_tree_id_map_, element_key)) { | |
512 int frame_tree_id_of_iframe_renderer = | |
513 iframe_key_to_frame_tree_id_map_[element_key]; | |
514 const std::unordered_set<int>& child_ids = | |
515 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer]; | |
516 for (const int child_id : child_ids) { | |
517 element->add_child_ids(child_id); | |
518 } | |
519 } | |
520 } | |
528 did_proceed_ = did_proceed; | 521 did_proceed_ = did_proceed; |
529 num_visits_ = num_visit; | 522 num_visits_ = num_visit; |
530 std::vector<GURL> urls; | 523 std::vector<GURL> urls; |
531 for (ResourceMap::const_iterator it = resources_.begin(); | 524 for (ResourceMap::const_iterator it = resources_.begin(); |
532 it != resources_.end(); ++it) { | 525 it != resources_.end(); ++it) { |
533 urls.push_back(GURL(it->first)); | 526 urls.push_back(GURL(it->first)); |
534 } | 527 } |
535 redirects_collector_->StartHistoryCollection( | 528 redirects_collector_->StartHistoryCollection( |
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | 529 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); |
537 } | 530 } |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
593 // Send the report, using the SafeBrowsingService. | 586 // Send the report, using the SafeBrowsingService. |
594 std::string serialized; | 587 std::string serialized; |
595 if (!report_->SerializeToString(&serialized)) { | 588 if (!report_->SerializeToString(&serialized)) { |
596 DLOG(ERROR) << "Unable to serialize the threat report."; | 589 DLOG(ERROR) << "Unable to serialize the threat report."; |
597 return; | 590 return; |
598 } | 591 } |
599 ui_manager_->SendSerializedThreatDetails(serialized); | 592 ui_manager_->SendSerializedThreatDetails(serialized); |
600 } | 593 } |
601 | 594 |
602 } // namespace safe_browsing | 595 } // namespace safe_browsing |
OLD | NEW |