Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(87)

Side by Side Diff: chrome/browser/safe_browsing/threat_details.cc

Issue 2837603002: Content API changes to improve DOM stitching in ThreatDetails code. (Closed)
Patch Set: Set output pointers correctly Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // Implementation of the ThreatDetails class. 5 // Implementation of the ThreatDetails class.
6 6
7 #include "chrome/browser/safe_browsing/threat_details.h" 7 #include "chrome/browser/safe_browsing/threat_details.h"
8 8
9 #include <stddef.h> 9 #include <stddef.h>
10 #include <stdint.h> 10 #include <stdint.h>
11 11
12 #include "base/bind.h" 12 #include "base/bind.h"
13 #include "base/lazy_instance.h" 13 #include "base/lazy_instance.h"
14 #include "base/metrics/histogram_macros.h" 14 #include "base/metrics/histogram_macros.h"
15 #include "base/strings/string_util.h" 15 #include "base/strings/string_util.h"
16 #include "chrome/browser/safe_browsing/threat_details_cache.h" 16 #include "chrome/browser/safe_browsing/threat_details_cache.h"
17 #include "chrome/browser/safe_browsing/threat_details_history.h" 17 #include "chrome/browser/safe_browsing/threat_details_history.h"
18 #include "components/history/core/browser/history_service.h" 18 #include "components/history/core/browser/history_service.h"
19 #include "components/safe_browsing/base_ui_manager.h" 19 #include "components/safe_browsing/base_ui_manager.h"
20 #include "components/safe_browsing/common/safebrowsing_messages.h" 20 #include "components/safe_browsing/common/safebrowsing_messages.h"
21 #include "content/public/browser/browser_thread.h" 21 #include "content/public/browser/browser_thread.h"
22 #include "content/public/browser/navigation_controller.h" 22 #include "content/public/browser/navigation_controller.h"
23 #include "content/public/browser/navigation_entry.h" 23 #include "content/public/browser/navigation_entry.h"
24 #include "content/public/browser/render_frame_host.h" 24 #include "content/public/browser/render_frame_host.h"
25 #include "content/public/browser/render_process_host.h"
25 #include "content/public/browser/web_contents.h" 26 #include "content/public/browser/web_contents.h"
26 #include "net/url_request/url_request_context_getter.h" 27 #include "net/url_request/url_request_context_getter.h"
27 28
28 using content::BrowserThread; 29 using content::BrowserThread;
29 using content::NavigationEntry; 30 using content::NavigationEntry;
30 using content::RenderFrameHost; 31 using content::RenderFrameHost;
31 using content::WebContents; 32 using content::WebContents;
32 33
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ 34 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/
34 // threat_dom_details.cc 35 // threat_dom_details.cc
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
279 } 280 }
280 if (!duplicate_child) 281 if (!duplicate_child)
281 url_resource->add_child_ids(child_resource->id()); 282 url_resource->add_child_ids(child_resource->id());
282 } 283 }
283 } 284 }
284 285
285 return url_resource; 286 return url_resource;
286 } 287 }
287 288
288 void ThreatDetails::AddDomElement( 289 void ThreatDetails::AddDomElement(
290 const int process_id,
289 const int frame_tree_node_id, 291 const int frame_tree_node_id,
290 const std::string& frame_url, 292 const int other_frame_routing_id,
291 const int element_node_id, 293 const int element_node_id,
292 const std::string& tagname, 294 const std::string& tagname,
293 const int parent_element_node_id, 295 const int parent_element_node_id,
294 const std::vector<AttributeNameValue>& attributes, 296 const std::vector<AttributeNameValue>& attributes,
295 const ClientSafeBrowsingReportRequest::Resource* resource) { 297 const ClientSafeBrowsingReportRequest::Resource* resource) {
296 // Create the element. It should not exist already since this function should 298 // Create the element. It should not exist already since this function should
297 // only be called once for each element. 299 // only be called once for each element.
298 const std::string element_key = 300 const std::string element_key =
299 GetElementKey(frame_tree_node_id, element_node_id); 301 GetElementKey(frame_tree_node_id, element_node_id);
300 HTMLElement* cur_element = FindOrCreateElement(element_key); 302 HTMLElement* cur_element = FindOrCreateElement(element_key);
301 303
302 // Set some basic metadata about the element. 304 // Set some basic metadata about the element.
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); 305 const std::string tag_name_upper = base::ToUpperASCII(tagname);
304 if (!tag_name_upper.empty()) { 306 if (!tag_name_upper.empty()) {
305 cur_element->set_tag(tag_name_upper); 307 cur_element->set_tag(tag_name_upper);
306 } 308 }
307 for (const AttributeNameValue& attribute : attributes) { 309 for (const AttributeNameValue& attribute : attributes) {
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); 310 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute();
309 attribute_pb->set_name(attribute.first); 311 attribute_pb->set_name(attribute.first);
310 attribute_pb->set_value(attribute.second); 312 attribute_pb->set_value(attribute.second);
311 } 313 }
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; 314 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME";
313 315
314 if (resource) { 316 if (resource) {
315 cur_element->set_resource_id(resource->id()); 317 cur_element->set_resource_id(resource->id());
316 318
317 // For iframes, remember that this HTML Element represents an iframe with a 319 // For iframes, lookup the frame tree node id of the render frame that
Charlie Reis 2017/05/05 21:03:06 nit: FrameTreeNode ID of the frame that
lpz 2017/05/10 14:21:08 Done.
318 // specific URL. Elements from a frame with this URL are children of this 320 // handled that iframe's content. This must be done on the UI thread, and
319 // element. 321 // will update a map of |element_key| to |frame_tree_node_id|. A second pass
320 if (is_frame && 322 // is done to update the |elements_| list using this mapping.
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { 323 if (is_frame) {
322 iframe_src_to_element_map_[resource->url()] = cur_element; 324 BrowserThread::PostTask(
325 BrowserThread::UI, FROM_HERE,
326 base::Bind(&ThreatDetails::LookupOtherFrameId, this, element_key,
327 process_id, other_frame_routing_id));
Charlie Reis 2017/05/05 21:03:06 There's a lot of posting back and forth here. Is
lpz 2017/05/10 14:21:08 Applied suggestion
323 } 328 }
324 } 329 }
325 330
326 // Next we try to lookup the parent of the current element and add ourselves 331 // Next we try to lookup the parent of the current element and add ourselves
327 // as a child of it. 332 // as a child of it.
328 HTMLElement* parent_element = nullptr; 333 HTMLElement* parent_element = nullptr;
329 if (parent_element_node_id == 0) { 334 if (parent_element_node_id == 0) {
330 // No parent indicates that this element is at the top of the current frame. 335 // No parent indicates that this element is at the top of the current frame.
331 // This frame could be a child of an iframe in another frame, or it could be 336 // Remember that this is a top-level element of the frame with the
332 // at the root of the whole page. If we have a frame URL then we can try to 337 // current |frame_tree_node_id|. If this element is inside an iframe, a
333 // map this element to its parent. 338 // second pass will insert this element as a child of its parent iframe.
334 if (!frame_url.empty()) { 339 frame_tree_id_to_children_map_[frame_tree_node_id].insert(
335 // First, remember that this element is at the top-level of a frame with 340 cur_element->id());
336 // our frame URL.
337 document_url_to_children_map_[frame_url].insert(cur_element->id());
338
339 // Now check if the frame URL matches the src URL of an iframe elsewhere.
340 // This means that we processed the parent iframe element earlier, so we
341 // can add ourselves as a child of that iframe.
342 // If no such iframe exists, it could be processed later, or this element
343 // is in the top-level frame and truly has no parent.
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) {
345 parent_element = iframe_src_to_element_map_[frame_url];
346 }
347 }
348 } else { 341 } else {
349 // We have a parent ID, so this element is just a child of something inside 342 // We have a parent ID, so this element is just a child of something inside
350 // of our current frame. We can easily lookup our parent. 343 // of our current frame. We can easily lookup our parent.
351 const std::string& parent_key = 344 const std::string& parent_key =
352 GetElementKey(frame_tree_node_id, parent_element_node_id); 345 GetElementKey(frame_tree_node_id, parent_element_node_id);
353 if (base::ContainsKey(elements_, parent_key)) { 346 if (base::ContainsKey(elements_, parent_key)) {
354 parent_element = elements_[parent_key].get(); 347 parent_element = elements_[parent_key].get();
355 } 348 }
356 } 349 }
357 350
358 // If a parent element was found, add ourselves as a child, ensuring not to 351 // If a parent element was found, add ourselves as a child, ensuring not to
359 // duplicate child IDs. 352 // duplicate child IDs.
360 if (parent_element) { 353 if (parent_element) {
361 bool duplicate_child = false; 354 bool duplicate_child = false;
362 for (const int child_id : parent_element->child_ids()) { 355 for (const int child_id : parent_element->child_ids()) {
363 if (child_id == cur_element->id()) { 356 if (child_id == cur_element->id()) {
364 duplicate_child = true; 357 duplicate_child = true;
365 break; 358 break;
366 } 359 }
367 } 360 }
368 if (!duplicate_child) { 361 if (!duplicate_child) {
369 parent_element->add_child_ids(cur_element->id()); 362 parent_element->add_child_ids(cur_element->id());
370 } 363 }
371 } 364 }
372
373 // Finally, we need to check if the current element is the parent of some
374 // other elements that came in from another frame earlier. This only happens
375 // if we are an iframe, and our src URL exists in
376 // document_url_to_children_map_. If there is a match, then all of the
377 // children in that map belong to us.
378 if (is_frame && resource &&
379 base::ContainsKey(document_url_to_children_map_, resource->url())) {
380 const std::unordered_set<int>& child_ids =
381 document_url_to_children_map_[resource->url()];
382 for (const int child_id : child_ids) {
383 cur_element->add_child_ids(child_id);
384 }
385 }
386 } 365 }
387 366
388 void ThreatDetails::StartCollection() { 367 void ThreatDetails::StartCollection() {
389 DVLOG(1) << "Starting to compute threat details."; 368 DVLOG(1) << "Starting to compute threat details.";
390 report_.reset(new ClientSafeBrowsingReportRequest()); 369 report_.reset(new ClientSafeBrowsingReportRequest());
391 370
392 if (IsReportableUrl(resource_.url)) { 371 if (IsReportableUrl(resource_.url)) {
393 report_->set_url(resource_.url.spec()); 372 report_->set_url(resource_.url.spec());
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); 373 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type));
395 } 374 }
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
442 // Get URLs of frames, scripts etc from the DOM. 421 // Get URLs of frames, scripts etc from the DOM.
443 // OnReceivedThreatDOMDetails will be called when the renderer replies. 422 // OnReceivedThreatDOMDetails will be called when the renderer replies.
444 // TODO(mattm): In theory, if the user proceeds through the warning DOM 423 // TODO(mattm): In theory, if the user proceeds through the warning DOM
445 // detail collection could be started once the page loads. 424 // detail collection could be started once the page loads.
446 web_contents()->SendToAllFrames( 425 web_contents()->SendToAllFrames(
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); 426 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE));
448 } 427 }
449 } 428 }
450 429
451 // When the renderer is done, this is called. 430 // When the renderer is done, this is called.
452 void ThreatDetails::OnReceivedThreatDOMDetails( 431 void ThreatDetails::OnReceivedThreatDOMDetails(
Charlie Reis 2017/05/05 21:03:07 Is this called on the UI thread? That seems unfor
lpz 2017/05/10 14:21:08 Done by looking up the child ftnids up front here,
453 content::RenderFrameHost* sender, 432 content::RenderFrameHost* sender,
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { 433 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) {
455 // Schedule this in IO thread, so it doesn't conflict with future users 434 // Schedule this in IO thread, so it doesn't conflict with future users
456 // of our data structures (eg GetSerializedReport). 435 // of our data structures (eg GetSerializedReport).
457 BrowserThread::PostTask( 436 BrowserThread::PostTask(
458 BrowserThread::IO, FROM_HERE, 437 BrowserThread::IO, FROM_HERE,
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, 438 base::Bind(&ThreatDetails::AddDOMDetails, this,
460 sender->GetFrameTreeNodeId(), 439 sender->GetProcess()->GetID(), sender->GetFrameTreeNodeId(),
461 sender->GetLastCommittedURL(), params)); 440 sender->GetLastCommittedURL(), params));
Charlie Reis 2017/05/05 21:03:06 From AddDOMDetails, it looks like params has all t
lpz 2017/05/10 14:21:08 Nice thanks for this. It seems to take care of the
462 } 441 }
463 442
464 void ThreatDetails::AddDOMDetails( 443 void ThreatDetails::AddDOMDetails(
444 const int process_id,
465 const int frame_tree_node_id, 445 const int frame_tree_node_id,
466 const GURL& frame_last_committed_url, 446 const GURL& frame_last_committed_url,
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { 447 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) {
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); 448 DCHECK_CURRENTLY_ON(BrowserThread::IO);
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); 449 DVLOG(1) << "Nodes from the DOM: " << params.size();
470 450
471 // If we have already started getting redirects from history service, 451 // If we have already started getting redirects from history service,
472 // don't modify state, otherwise will invalidate the iterators. 452 // don't modify state, otherwise will invalidate the iterators.
473 if (redirects_collector_->HasStarted()) 453 if (redirects_collector_->HasStarted())
474 return; 454 return;
475 455
476 // If we have already started collecting data from the HTTP cache, don't 456 // If we have already started collecting data from the HTTP cache, don't
477 // modify our state. 457 // modify our state.
478 if (cache_collector_->HasStarted()) 458 if (cache_collector_->HasStarted())
479 return; 459 return;
480 460
481 // Exit early if there are no nodes to process. 461 // Exit early if there are no nodes to process.
482 if (params.empty()) 462 if (params.empty())
483 return; 463 return;
484 464
485 // Try to deduce the URL that the render frame was handling. First check if
486 // the summary node from the renderer has a document URL. If not, try looking
487 // at the last committed URL of the frame.
488 GURL frame_url;
489 if (IsReportableUrl(params.back().url)) {
490 frame_url = params.back().url;
491 } else if (IsReportableUrl(frame_last_committed_url)) {
492 frame_url = frame_last_committed_url;
493 }
494
495 // If we can't figure out which URL the frame was rendering then we don't know
496 // where these elements belong in the hierarchy. The DOM will be ambiguous.
497 if (frame_url.is_empty()) {
498 ambiguous_dom_ = true;
499 }
500
501 // Add the urls from the DOM to |resources_|. The renderer could be sending 465 // Add the urls from the DOM to |resources_|. The renderer could be sending
502 // bogus messages, so limit the number of nodes we accept. 466 // bogus messages, so limit the number of nodes we accept.
503 // Also update |elements_| with the DOM structure. 467 // Also update |elements_| with the DOM structure.
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { 468 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; 469 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i];
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; 470 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; 471 ClientSafeBrowsingReportRequest::Resource* resource = nullptr;
508 if (!node.url.is_empty()) { 472 if (!node.url.is_empty()) {
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); 473 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children));
510 } 474 }
511 // Check for a tag_name to avoid adding the summary node to the DOM. 475 // Check for a tag_name to avoid adding the summary node to the DOM.
512 if (!node.tag_name.empty()) { 476 if (!node.tag_name.empty()) {
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, 477 AddDomElement(process_id, frame_tree_node_id, node.other_frame_routing_id,
514 node.tag_name, node.parent_node_id, node.attributes, 478 node.node_id, node.tag_name, node.parent_node_id,
515 resource); 479 node.attributes, resource);
516 } 480 }
517 } 481 }
518 } 482 }
519 483
484 void ThreatDetails::LookupOtherFrameId(const std::string& element_key,
485 const int process_id,
486 const int other_frame_routing_id) {
487 DCHECK_CURRENTLY_ON(BrowserThread::UI);
488 int other_frame_tree_node_id =
489 content::RenderFrameHost::GetFrameTreeNodeIdForRoutingId(
490 process_id, other_frame_routing_id);
491 if (other_frame_tree_node_id == content::RenderFrameHost::kNoFrameTreeNodeId)
492 ambiguous_dom_ = true;
493 iframe_key_to_frame_tree_id_map_[element_key] = other_frame_tree_node_id;
494 }
495
520 // Called from the SB Service on the IO thread, after the user has 496 // Called from the SB Service on the IO thread, after the user has
521 // closed the tab, or clicked proceed or goback. Since the user needs 497 // closed the tab, or clicked proceed or goback. Since the user needs
522 // to take an action, we expect this to be called after 498 // to take an action, we expect this to be called after
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include 499 // OnReceivedThreatDOMDetails in most cases. If not, we don't include
524 // the DOM data in our report. 500 // the DOM data in our report.
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { 501 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) {
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); 502 DCHECK_CURRENTLY_ON(BrowserThread::IO);
527 503
504 // Do a second pass over the elements and update iframe elements to have
505 // references to their children. Children will have been received from a
Charlie Reis 2017/05/05 21:03:07 s/will/may/? (Or does this not apply to same-proc
lpz 2017/05/10 14:21:08 Done - this code doesn't do anything special for s
506 // different renderer than the iframe element.
507 for (auto& element_pair : elements_) {
508 const std::string& element_key = element_pair.first;
509 HTMLElement* element = element_pair.second.get();
510 if (element->tag() == "IFRAME" || element->tag() == "FRAME") {
511 int frame_tree_id_of_iframe_renderer =
512 iframe_key_to_frame_tree_id_map_[element_key];
Charlie Reis 2017/05/05 21:03:07 This doesn't look safe. We're reading it from the
lpz 2017/05/10 14:21:08 Your suggestion should cover this. In general, tho
513 const std::unordered_set<int>& child_ids =
514 frame_tree_id_to_children_map_[frame_tree_id_of_iframe_renderer];
515 for (const int child_id : child_ids) {
516 element->add_child_ids(child_id);
517 }
518 }
519 }
528 did_proceed_ = did_proceed; 520 did_proceed_ = did_proceed;
529 num_visits_ = num_visit; 521 num_visits_ = num_visit;
530 std::vector<GURL> urls; 522 std::vector<GURL> urls;
531 for (ResourceMap::const_iterator it = resources_.begin(); 523 for (ResourceMap::const_iterator it = resources_.begin();
532 it != resources_.end(); ++it) { 524 it != resources_.end(); ++it) {
533 urls.push_back(GURL(it->first)); 525 urls.push_back(GURL(it->first));
534 } 526 }
535 redirects_collector_->StartHistoryCollection( 527 redirects_collector_->StartHistoryCollection(
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); 528 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this));
537 } 529 }
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 // Send the report, using the SafeBrowsingService. 585 // Send the report, using the SafeBrowsingService.
594 std::string serialized; 586 std::string serialized;
595 if (!report_->SerializeToString(&serialized)) { 587 if (!report_->SerializeToString(&serialized)) {
596 DLOG(ERROR) << "Unable to serialize the threat report."; 588 DLOG(ERROR) << "Unable to serialize the threat report.";
597 return; 589 return;
598 } 590 }
599 ui_manager_->SendSerializedThreatDetails(serialized); 591 ui_manager_->SendSerializedThreatDetails(serialized);
600 } 592 }
601 593
602 } // namespace safe_browsing 594 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698