OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // Implementation of the ThreatDetails class. | 5 // Implementation of the ThreatDetails class. |
6 | 6 |
7 #include "chrome/browser/safe_browsing/threat_details.h" | 7 #include "chrome/browser/safe_browsing/threat_details.h" |
8 | 8 |
9 #include <stddef.h> | 9 #include <stddef.h> |
10 #include <stdint.h> | 10 #include <stdint.h> |
11 | 11 |
12 #include "base/bind.h" | 12 #include "base/bind.h" |
13 #include "base/lazy_instance.h" | 13 #include "base/lazy_instance.h" |
14 #include "base/metrics/histogram_macros.h" | 14 #include "base/metrics/histogram_macros.h" |
15 #include "base/strings/string_util.h" | 15 #include "base/strings/string_util.h" |
16 #include "chrome/browser/safe_browsing/threat_details_cache.h" | 16 #include "chrome/browser/safe_browsing/threat_details_cache.h" |
17 #include "chrome/browser/safe_browsing/threat_details_history.h" | 17 #include "chrome/browser/safe_browsing/threat_details_history.h" |
18 #include "components/history/core/browser/history_service.h" | 18 #include "components/history/core/browser/history_service.h" |
19 #include "components/safe_browsing/base_ui_manager.h" | 19 #include "components/safe_browsing/base_ui_manager.h" |
20 #include "components/safe_browsing/common/safebrowsing_messages.h" | 20 #include "components/safe_browsing/common/safebrowsing_messages.h" |
21 #include "content/public/browser/browser_thread.h" | 21 #include "content/public/browser/browser_thread.h" |
22 #include "content/public/browser/navigation_controller.h" | 22 #include "content/public/browser/navigation_controller.h" |
23 #include "content/public/browser/navigation_entry.h" | 23 #include "content/public/browser/navigation_entry.h" |
24 #include "content/public/browser/render_frame_host.h" | 24 #include "content/public/browser/render_frame_host.h" |
25 #include "content/public/browser/render_process_host.h" | |
25 #include "content/public/browser/web_contents.h" | 26 #include "content/public/browser/web_contents.h" |
26 #include "net/url_request/url_request_context_getter.h" | 27 #include "net/url_request/url_request_context_getter.h" |
27 | 28 |
28 using content::BrowserThread; | 29 using content::BrowserThread; |
29 using content::NavigationEntry; | 30 using content::NavigationEntry; |
30 using content::RenderFrameHost; | 31 using content::RenderFrameHost; |
31 using content::WebContents; | 32 using content::WebContents; |
32 | 33 |
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ |
34 // threat_dom_details.cc | 35 // threat_dom_details.cc |
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
279 } | 280 } |
280 if (!duplicate_child) | 281 if (!duplicate_child) |
281 url_resource->add_child_ids(child_resource->id()); | 282 url_resource->add_child_ids(child_resource->id()); |
282 } | 283 } |
283 } | 284 } |
284 | 285 |
285 return url_resource; | 286 return url_resource; |
286 } | 287 } |
287 | 288 |
288 void ThreatDetails::AddDomElement( | 289 void ThreatDetails::AddDomElement( |
290 const int process_id, | |
289 const int frame_tree_node_id, | 291 const int frame_tree_node_id, |
290 const std::string& frame_url, | 292 const int other_frame_routing_id, |
291 const int element_node_id, | 293 const int element_node_id, |
292 const std::string& tagname, | 294 const std::string& tagname, |
293 const int parent_element_node_id, | 295 const int parent_element_node_id, |
294 const std::vector<AttributeNameValue>& attributes, | 296 const std::vector<AttributeNameValue>& attributes, |
295 const ClientSafeBrowsingReportRequest::Resource* resource) { | 297 const ClientSafeBrowsingReportRequest::Resource* resource) { |
296 // Create the element. It should not exist already since this function should | 298 // Create the element. It should not exist already since this function should |
297 // only be called once for each element. | 299 // only be called once for each element. |
298 const std::string element_key = | 300 const std::string element_key = |
299 GetElementKey(frame_tree_node_id, element_node_id); | 301 GetElementKey(frame_tree_node_id, element_node_id); |
300 HTMLElement* cur_element = FindOrCreateElement(element_key); | 302 HTMLElement* cur_element = FindOrCreateElement(element_key); |
301 | 303 |
302 // Set some basic metadata about the element. | 304 // Set some basic metadata about the element. |
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | 305 const std::string tag_name_upper = base::ToUpperASCII(tagname); |
304 if (!tag_name_upper.empty()) { | 306 if (!tag_name_upper.empty()) { |
305 cur_element->set_tag(tag_name_upper); | 307 cur_element->set_tag(tag_name_upper); |
306 } | 308 } |
307 for (const AttributeNameValue& attribute : attributes) { | 309 for (const AttributeNameValue& attribute : attributes) { |
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | 310 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); |
309 attribute_pb->set_name(attribute.first); | 311 attribute_pb->set_name(attribute.first); |
310 attribute_pb->set_value(attribute.second); | 312 attribute_pb->set_value(attribute.second); |
311 } | 313 } |
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | 314 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; |
313 | 315 |
314 if (resource) { | 316 if (resource) { |
315 cur_element->set_resource_id(resource->id()); | 317 cur_element->set_resource_id(resource->id()); |
316 | 318 |
317 // For iframes, remember that this HTML Element represents an iframe with a | 319 // For iframes, lookup the frame tree node id of the render frame that |
Charlie Reis
2017/05/05 21:03:06
nit: FrameTreeNode ID of the frame that
lpz
2017/05/10 14:21:08
Done.
| |
318 // specific URL. Elements from a frame with this URL are children of this | 320 // handled that iframe's content. This must be done on the UI thread, and |
319 // element. | 321 // will update a map of |element_key| to |frame_tree_node_id|. A second pass |
320 if (is_frame && | 322 // is done to update the |elements_| list using this mapping. |
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | 323 if (is_frame) { |
322 iframe_src_to_element_map_[resource->url()] = cur_element; | 324 BrowserThread::PostTask( |
325 BrowserThread::UI, FROM_HERE, | |
326 base::Bind(&ThreatDetails::LookupOtherFrameId, this, element_key, | |
327 process_id, other_frame_routing_id)); | |
Charlie Reis
2017/05/05 21:03:06
There's a lot of posting back and forth here. Is
lpz
2017/05/10 14:21:08
Applied suggestion
| |
323 } | 328 } |
324 } | 329 } |
325 | 330 |
326 // Next we try to lookup the parent of the current element and add ourselves | 331 // Next we try to lookup the parent of the current element and add ourselves |
327 // as a child of it. | 332 // as a child of it. |
328 HTMLElement* parent_element = nullptr; | 333 HTMLElement* parent_element = nullptr; |
329 if (parent_element_node_id == 0) { | 334 if (parent_element_node_id == 0) { |
330 // No parent indicates that this element is at the top of the current frame. | 335 // No parent indicates that this element is at the top of the current frame. |
331 // This frame could be a child of an iframe in another frame, or it could be | 336 // Remember that this is a top-level element of the frame with the |
332 // at the root of the whole page. If we have a frame URL then we can try to | 337 // current |frame_tree_node_id|. If this element is inside an iframe, a |
333 // map this element to its parent. | 338 // second pass will insert this element as a child of its parent iframe. |
334 if (!frame_url.empty()) { | 339 frame_tree_id_to_children_map_[frame_tree_node_id].insert( |
335 // First, remember that this element is at the top-level of a frame with | 340 cur_element->id()); |
336 // our frame URL. | |
337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
338 | |
339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
340 // This means that we processed the parent iframe element earlier, so we | |
341 // can add ourselves as a child of that iframe. | |
342 // If no such iframe exists, it could be processed later, or this element | |
343 // is in the top-level frame and truly has no parent. | |
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
345 parent_element = iframe_src_to_element_map_[frame_url]; | |
346 } | |
347 } | |
348 } else { | 341 } else { |
349 // We have a parent ID, so this element is just a child of something inside | 342 // We have a parent ID, so this element is just a child of something inside |
350 // of our current frame. We can easily lookup our parent. | 343 // of our current frame. We can easily lookup our parent. |
351 const std::string& parent_key = | 344 const std::string& parent_key = |
352 GetElementKey(frame_tree_node_id, parent_element_node_id); | 345 GetElementKey(frame_tree_node_id, parent_element_node_id); |
353 if (base::ContainsKey(elements_, parent_key)) { | 346 if (base::ContainsKey(elements_, parent_key)) { |
354 parent_element = elements_[parent_key].get(); | 347 parent_element = elements_[parent_key].get(); |
355 } | 348 } |
356 } | 349 } |
357 | 350 |
358 // If a parent element was found, add ourselves as a child, ensuring not to | 351 // If a parent element was found, add ourselves as a child, ensuring not to |
359 // duplicate child IDs. | 352 // duplicate child IDs. |
360 if (parent_element) { | 353 if (parent_element) { |
361 bool duplicate_child = false; | 354 bool duplicate_child = false; |
362 for (const int child_id : parent_element->child_ids()) { | 355 for (const int child_id : parent_element->child_ids()) { |
363 if (child_id == cur_element->id()) { | 356 if (child_id == cur_element->id()) { |
364 duplicate_child = true; | 357 duplicate_child = true; |
365 break; | 358 break; |
366 } | 359 } |
367 } | 360 } |
368 if (!duplicate_child) { | 361 if (!duplicate_child) { |
369 parent_element->add_child_ids(cur_element->id()); | 362 parent_element->add_child_ids(cur_element->id()); |
370 } | 363 } |
371 } | 364 } |
372 | |
373 // Finally, we need to check if the current element is the parent of some | |
374 // other elements that came in from another frame earlier. This only happens | |
375 // if we are an iframe, and our src URL exists in | |
376 // document_url_to_children_map_. If there is a match, then all of the | |
377 // children in that map belong to us. | |
378 if (is_frame && resource && | |
379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
380 const std::unordered_set<int>& child_ids = | |
381 document_url_to_children_map_[resource->url()]; | |
382 for (const int child_id : child_ids) { | |
383 cur_element->add_child_ids(child_id); | |
384 } | |
385 } | |
386 } | 365 } |
387 | 366 |
388 void ThreatDetails::StartCollection() { | 367 void ThreatDetails::StartCollection() { |
389 DVLOG(1) << "Starting to compute threat details."; | 368 DVLOG(1) << "Starting to compute threat details."; |
390 report_.reset(new ClientSafeBrowsingReportRequest()); | 369 report_.reset(new ClientSafeBrowsingReportRequest()); |
391 | 370 |
392 if (IsReportableUrl(resource_.url)) { | 371 if (IsReportableUrl(resource_.url)) { |
393 report_->set_url(resource_.url.spec()); | 372 report_->set_url(resource_.url.spec()); |
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | 373 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); |
395 } | 374 } |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
442 // Get URLs of frames, scripts etc from the DOM. | 421 // Get URLs of frames, scripts etc from the DOM. |
443 // OnReceivedThreatDOMDetails will be called when the renderer replies. | 422 // OnReceivedThreatDOMDetails will be called when the renderer replies. |
444 // TODO(mattm): In theory, if the user proceeds through the warning DOM | 423 // TODO(mattm): In theory, if the user proceeds through the warning DOM |
445 // detail collection could be started once the page loads. | 424 // detail collection could be started once the page loads. |
446 web_contents()->SendToAllFrames( | 425 web_contents()->SendToAllFrames( |
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | 426 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); |
448 } | 427 } |
449 } | 428 } |
450 | 429 |
451 // When the renderer is done, this is called. | 430 // When the renderer is done, this is called. |
452 void ThreatDetails::OnReceivedThreatDOMDetails( | 431 void ThreatDetails::OnReceivedThreatDOMDetails( |
Charlie Reis
2017/05/05 21:03:07
Is this called on the UI thread? That seems unfor
lpz
2017/05/10 14:21:08
Done by looking up the child ftnids up front here,
| |
453 content::RenderFrameHost* sender, | 432 content::RenderFrameHost* sender, |
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 433 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { |
455 // Schedule this in IO thread, so it doesn't conflict with future users | 434 // Schedule this in IO thread, so it doesn't conflict with future users |
456 // of our data structures (eg GetSerializedReport). | 435 // of our data structures (eg GetSerializedReport). |
457 BrowserThread::PostTask( | 436 BrowserThread::PostTask( |
458 BrowserThread::IO, FROM_HERE, | 437 BrowserThread::IO, FROM_HERE, |
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | 438 base::Bind(&ThreatDetails::AddDOMDetails, this, |
460 sender->GetFrameTreeNodeId(), | 439 sender->GetProcess()->GetID(), sender->GetFrameTreeNodeId(), |
461 sender->GetLastCommittedURL(), params)); | 440 sender->GetLastCommittedURL(), params)); |
Charlie Reis
2017/05/05 21:03:06
From AddDOMDetails, it looks like params has all t
lpz
2017/05/10 14:21:08
Nice thanks for this. It seems to take care of the
| |
462 } | 441 } |
463 | 442 |
464 void ThreatDetails::AddDOMDetails( | 443 void ThreatDetails::AddDOMDetails( |
444 const int process_id, | |
465 const int frame_tree_node_id, | 445 const int frame_tree_node_id, |
466 const GURL& frame_last_committed_url, | 446 const GURL& frame_last_committed_url, |
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | 447 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { |
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 448 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | 449 DVLOG(1) << "Nodes from the DOM: " << params.size(); |
470 | 450 |
471 // If we have already started getting redirects from history service, | 451 // If we have already started getting redirects from history service, |
472 // don't modify state, otherwise will invalidate the iterators. | 452 // don't modify state, otherwise will invalidate the iterators. |
473 if (redirects_collector_->HasStarted()) | 453 if (redirects_collector_->HasStarted()) |
474 return; | 454 return; |
475 | 455 |
476 // If we have already started collecting data from the HTTP cache, don't | 456 // If we have already started collecting data from the HTTP cache, don't |
477 // modify our state. | 457 // modify our state. |
478 if (cache_collector_->HasStarted()) | 458 if (cache_collector_->HasStarted()) |
479 return; | 459 return; |
480 | 460 |
481 // Exit early if there are no nodes to process. | 461 // Exit early if there are no nodes to process. |
482 if (params.empty()) | 462 if (params.empty()) |
483 return; | 463 return; |
484 | 464 |
485 // Try to deduce the URL that the render frame was handling. First check if | |
486 // the summary node from the renderer has a document URL. If not, try looking | |
487 // at the last committed URL of the frame. | |
488 GURL frame_url; | |
489 if (IsReportableUrl(params.back().url)) { | |
490 frame_url = params.back().url; | |
491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
492 frame_url = frame_last_committed_url; | |
493 } | |
494 | |
495 // If we can't figure out which URL the frame was rendering then we don't know | |
496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
497 if (frame_url.is_empty()) { | |
498 ambiguous_dom_ = true; | |
499 } | |
500 | |
501 // Add the urls from the DOM to |resources_|. The renderer could be sending | 465 // Add the urls from the DOM to |resources_|. The renderer could be sending |
502 // bogus messages, so limit the number of nodes we accept. | 466 // bogus messages, so limit the number of nodes we accept. |
503 // Also update |elements_| with the DOM structure. | 467 // Also update |elements_| with the DOM structure. |
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | 468 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { |
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | 469 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; |
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | 470 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; |
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | 471 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; |
508 if (!node.url.is_empty()) { | 472 if (!node.url.is_empty()) { |
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | 473 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); |
510 } | 474 } |
511 // Check for a tag_name to avoid adding the summary node to the DOM. | 475 // Check for a tag_name to avoid adding the summary node to the DOM. |
512 if (!node.tag_name.empty()) { | 476 if (!node.tag_name.empty()) { |
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | 477 AddDomElement(process_id, frame_tree_node_id, node.other_frame_routing_id, |
514 node.tag_name, node.parent_node_id, node.attributes, | 478 node.node_id, node.tag_name, node.parent_node_id, |
515 resource); | 479 node.attributes, resource); |
516 } | 480 } |
517 } | 481 } |
518 } | 482 } |
519 | 483 |
484 void ThreatDetails::LookupOtherFrameId(const std::string& element_key, | |
485 const int process_id, | |
486 const int other_frame_routing_id) { | |
487 DCHECK_CURRENTLY_ON(BrowserThread::UI); | |
488 int other_frame_tree_node_id = | |
489 content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId( | |
490 process_id, other_frame_routing_id); | |
491 if (other_frame_tree_node_id == content::RenderFrameHost::kNoFrameTreeNodeId) | |
492 ambiguous_dom_ = true; | |
493 iframe_key_to_frame_tree_id_map_[element_key] = other_frame_tree_node_id; | |
494 } | |
495 | |
520 // Called from the SB Service on the IO thread, after the user has | 496 // Called from the SB Service on the IO thread, after the user has |
521 // closed the tab, or clicked proceed or goback. Since the user needs | 497 // closed the tab, or clicked proceed or goback. Since the user needs |
522 // to take an action, we expect this to be called after | 498 // to take an action, we expect this to be called after |
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | 499 // OnReceivedThreatDOMDetails in most cases. If not, we don't include |
524 // the DOM data in our report. | 500 // the DOM data in our report. |
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | 501 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { |
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | 502 DCHECK_CURRENTLY_ON(BrowserThread::IO); |
527 | 503 |
504 // Do a second pass over the elements and update iframe elements to have | |
505 // references to their children. Children will have been received from a | |
Charlie Reis
2017/05/05 21:03:07
s/will/may/?
(Or does this not apply to same-proc
lpz
2017/05/10 14:21:08
Done - this code doesn't do anything special for s
| |
506 // different renderer than the iframe element. | |
507 for (auto& element_pair : elements_) { | |
508 const std::string& element_key = element_pair.first; | |
509 HTMLElement* element = element_pair.second.get(); | |
510 if (element->tag() == "IFRAME" || element->tag() == "FRAME") { | |
511 int frame_tree_id_of_iframe_renderer = | |
512 iframe_key_to_frame_tree_id_map_[element_key]; | |
Charlie Reis
2017/05/05 21:03:07
This doesn't look safe. We're reading it from the
lpz
2017/05/10 14:21:08
Your suggestion should cover this. In general, tho
| |
513 const std::unordered_set<int>& child_ids = | |
514 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer]; | |
515 for (const int child_id : child_ids) { | |
516 element->add_child_ids(child_id); | |
517 } | |
518 } | |
519 } | |
528 did_proceed_ = did_proceed; | 520 did_proceed_ = did_proceed; |
529 num_visits_ = num_visit; | 521 num_visits_ = num_visit; |
530 std::vector<GURL> urls; | 522 std::vector<GURL> urls; |
531 for (ResourceMap::const_iterator it = resources_.begin(); | 523 for (ResourceMap::const_iterator it = resources_.begin(); |
532 it != resources_.end(); ++it) { | 524 it != resources_.end(); ++it) { |
533 urls.push_back(GURL(it->first)); | 525 urls.push_back(GURL(it->first)); |
534 } | 526 } |
535 redirects_collector_->StartHistoryCollection( | 527 redirects_collector_->StartHistoryCollection( |
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | 528 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); |
537 } | 529 } |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
593 // Send the report, using the SafeBrowsingService. | 585 // Send the report, using the SafeBrowsingService. |
594 std::string serialized; | 586 std::string serialized; |
595 if (!report_->SerializeToString(&serialized)) { | 587 if (!report_->SerializeToString(&serialized)) { |
596 DLOG(ERROR) << "Unable to serialize the threat report."; | 588 DLOG(ERROR) << "Unable to serialize the threat report."; |
597 return; | 589 return; |
598 } | 590 } |
599 ui_manager_->SendSerializedThreatDetails(serialized); | 591 ui_manager_->SendSerializedThreatDetails(serialized); |
600 } | 592 } |
601 | 593 |
602 } // namespace safe_browsing | 594 } // namespace safe_browsing |
OLD | NEW |