| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 // | |
| 5 // Implementation of the ThreatDetails class. | |
| 6 | |
| 7 #include "chrome/browser/safe_browsing/threat_details.h" | |
| 8 | |
| 9 #include <stddef.h> | |
| 10 #include <stdint.h> | |
| 11 | |
| 12 #include "base/bind.h" | |
| 13 #include "base/lazy_instance.h" | |
| 14 #include "base/metrics/histogram_macros.h" | |
| 15 #include "base/strings/string_util.h" | |
| 16 #include "chrome/browser/safe_browsing/threat_details_cache.h" | |
| 17 #include "chrome/browser/safe_browsing/threat_details_history.h" | |
| 18 #include "components/history/core/browser/history_service.h" | |
| 19 #include "components/safe_browsing/base_ui_manager.h" | |
| 20 #include "components/safe_browsing/common/safebrowsing_messages.h" | |
| 21 #include "content/public/browser/browser_thread.h" | |
| 22 #include "content/public/browser/navigation_controller.h" | |
| 23 #include "content/public/browser/navigation_entry.h" | |
| 24 #include "content/public/browser/render_frame_host.h" | |
| 25 #include "content/public/browser/web_contents.h" | |
| 26 #include "net/url_request/url_request_context_getter.h" | |
| 27 | |
| 28 using content::BrowserThread; | |
| 29 using content::NavigationEntry; | |
| 30 using content::RenderFrameHost; | |
| 31 using content::WebContents; | |
| 32 | |
| 33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | |
| 34 // threat_dom_details.cc | |
| 35 static const uint32_t kMaxDomNodes = 500; | |
| 36 | |
| 37 namespace safe_browsing { | |
| 38 | |
| 39 // static | |
| 40 ThreatDetailsFactory* ThreatDetails::factory_ = NULL; | |
| 41 | |
| 42 namespace { | |
| 43 | |
| 44 typedef std::unordered_set<std::string> StringSet; | |
| 45 // A set of HTTPS headers that are allowed to be collected. Contains both | |
| 46 // request and response headers. All entries in this list should be lower-case | |
| 47 // to support case-insensitive comparison. | |
| 48 struct WhitelistedHttpsHeadersTraits | |
| 49 : base::internal::DestructorAtExitLazyInstanceTraits<StringSet> { | |
| 50 static StringSet* New(void* instance) { | |
| 51 StringSet* headers = | |
| 52 base::internal::DestructorAtExitLazyInstanceTraits<StringSet>::New( | |
| 53 instance); | |
| 54 headers->insert({"google-creative-id", "google-lineitem-id", "referer", | |
| 55 "content-type", "content-length", "date", "server", "cache-control", | |
| 56 "pragma", "expires"}); | |
| 57 return headers; | |
| 58 } | |
| 59 }; | |
| 60 base::LazyInstance<StringSet, WhitelistedHttpsHeadersTraits> | |
| 61 g_https_headers_whitelist = LAZY_INSTANCE_INITIALIZER; | |
| 62 | |
| 63 // Helper function that converts SBThreatType to | |
| 64 // ClientSafeBrowsingReportRequest::ReportType. | |
| 65 ClientSafeBrowsingReportRequest::ReportType GetReportTypeFromSBThreatType( | |
| 66 SBThreatType threat_type) { | |
| 67 switch (threat_type) { | |
| 68 case SB_THREAT_TYPE_URL_PHISHING: | |
| 69 return ClientSafeBrowsingReportRequest::URL_PHISHING; | |
| 70 case SB_THREAT_TYPE_URL_MALWARE: | |
| 71 return ClientSafeBrowsingReportRequest::URL_MALWARE; | |
| 72 case SB_THREAT_TYPE_URL_UNWANTED: | |
| 73 return ClientSafeBrowsingReportRequest::URL_UNWANTED; | |
| 74 case SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL: | |
| 75 return ClientSafeBrowsingReportRequest::CLIENT_SIDE_PHISHING_URL; | |
| 76 case SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL: | |
| 77 return ClientSafeBrowsingReportRequest::CLIENT_SIDE_MALWARE_URL; | |
| 78 default: // Gated by SafeBrowsingBlockingPage::ShouldReportThreatDetails. | |
| 79 NOTREACHED() << "We should not send report for threat type " | |
| 80 << threat_type; | |
| 81 return ClientSafeBrowsingReportRequest::UNKNOWN; | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 // Clears the specified HTTPS resource of any sensitive data, only retaining | |
| 86 // data that is whitelisted for collection. | |
| 87 void ClearHttpsResource(ClientSafeBrowsingReportRequest::Resource* resource) { | |
| 88 // Make a copy of the original resource to retain all data. | |
| 89 ClientSafeBrowsingReportRequest::Resource orig_resource(*resource); | |
| 90 | |
| 91 // Clear the request headers and copy over any whitelisted ones. | |
| 92 resource->clear_request(); | |
| 93 for (int i = 0; i < orig_resource.request().headers_size(); ++i) { | |
| 94 ClientSafeBrowsingReportRequest::HTTPHeader* orig_header = orig_resource | |
| 95 .mutable_request()->mutable_headers(i); | |
| 96 if (g_https_headers_whitelist.Get().count( | |
| 97 base::ToLowerASCII(orig_header->name())) > 0) { | |
| 98 resource->mutable_request()->add_headers()->Swap(orig_header); | |
| 99 } | |
| 100 } | |
| 101 // Also copy some other request fields. | |
| 102 resource->mutable_request()->mutable_bodydigest()->swap( | |
| 103 *orig_resource.mutable_request()->mutable_bodydigest()); | |
| 104 resource->mutable_request()->set_bodylength( | |
| 105 orig_resource.request().bodylength()); | |
| 106 | |
| 107 // ...repeat for response headers. | |
| 108 resource->clear_response(); | |
| 109 for (int i = 0; i < orig_resource.response().headers_size(); ++i) { | |
| 110 ClientSafeBrowsingReportRequest::HTTPHeader* orig_header = orig_resource | |
| 111 .mutable_response()->mutable_headers(i); | |
| 112 if (g_https_headers_whitelist.Get().count( | |
| 113 base::ToLowerASCII(orig_header->name())) > 0) { | |
| 114 resource->mutable_response()->add_headers()->Swap(orig_header); | |
| 115 } | |
| 116 } | |
| 117 // Also copy some other response fields. | |
| 118 resource->mutable_response()->mutable_bodydigest()->swap( | |
| 119 *orig_resource.mutable_response()->mutable_bodydigest()); | |
| 120 resource->mutable_response()->set_bodylength( | |
| 121 orig_resource.response().bodylength()); | |
| 122 resource->mutable_response()->mutable_remote_ip()->swap( | |
| 123 *orig_resource.mutable_response()->mutable_remote_ip()); | |
| 124 } | |
| 125 | |
| 126 std::string GetElementKey(const int frame_tree_node_id, | |
| 127 const int element_node_id) { | |
| 128 return base::StringPrintf("%d-%d", frame_tree_node_id, element_node_id); | |
| 129 } | |
| 130 | |
| 131 } // namespace | |
| 132 | |
| 133 // The default ThreatDetailsFactory. Global, made a singleton so we | |
| 134 // don't leak it. | |
| 135 class ThreatDetailsFactoryImpl : public ThreatDetailsFactory { | |
| 136 public: | |
| 137 ThreatDetails* CreateThreatDetails( | |
| 138 BaseUIManager* ui_manager, | |
| 139 WebContents* web_contents, | |
| 140 const security_interstitials::UnsafeResource& unsafe_resource, | |
| 141 net::URLRequestContextGetter* request_context_getter, | |
| 142 history::HistoryService* history_service) override { | |
| 143 return new ThreatDetails(ui_manager, web_contents, unsafe_resource, | |
| 144 request_context_getter, history_service); | |
| 145 } | |
| 146 | |
| 147 private: | |
| 148 friend struct base::LazyInstanceTraitsBase<ThreatDetailsFactoryImpl>; | |
| 149 | |
| 150 ThreatDetailsFactoryImpl() {} | |
| 151 | |
| 152 DISALLOW_COPY_AND_ASSIGN(ThreatDetailsFactoryImpl); | |
| 153 }; | |
| 154 | |
| 155 static base::LazyInstance<ThreatDetailsFactoryImpl>::DestructorAtExit | |
| 156 g_threat_details_factory_impl = LAZY_INSTANCE_INITIALIZER; | |
| 157 | |
| 158 // Create a ThreatDetails for the given tab. | |
| 159 /* static */ | |
| 160 ThreatDetails* ThreatDetails::NewThreatDetails( | |
| 161 BaseUIManager* ui_manager, | |
| 162 WebContents* web_contents, | |
| 163 const UnsafeResource& resource, | |
| 164 net::URLRequestContextGetter* request_context_getter, | |
| 165 history::HistoryService* history_service) { | |
| 166 // Set up the factory if this has not been done already (tests do that | |
| 167 // before this method is called). | |
| 168 if (!factory_) | |
| 169 factory_ = g_threat_details_factory_impl.Pointer(); | |
| 170 return factory_->CreateThreatDetails(ui_manager, web_contents, resource, | |
| 171 request_context_getter, history_service); | |
| 172 } | |
| 173 | |
| 174 // Create a ThreatDetails for the given tab. Runs in the UI thread. | |
| 175 ThreatDetails::ThreatDetails( | |
| 176 BaseUIManager* ui_manager, | |
| 177 content::WebContents* web_contents, | |
| 178 const UnsafeResource& resource, | |
| 179 net::URLRequestContextGetter* request_context_getter, | |
| 180 history::HistoryService* history_service) | |
| 181 : content::WebContentsObserver(web_contents), | |
| 182 request_context_getter_(request_context_getter), | |
| 183 ui_manager_(ui_manager), | |
| 184 resource_(resource), | |
| 185 cache_result_(false), | |
| 186 did_proceed_(false), | |
| 187 num_visits_(0), | |
| 188 ambiguous_dom_(false), | |
| 189 cache_collector_(new ThreatDetailsCacheCollector) { | |
| 190 redirects_collector_ = new ThreatDetailsRedirectsCollector( | |
| 191 history_service ? history_service->AsWeakPtr() | |
| 192 : base::WeakPtr<history::HistoryService>()); | |
| 193 StartCollection(); | |
| 194 } | |
| 195 | |
| 196 ThreatDetails::~ThreatDetails() {} | |
| 197 | |
| 198 bool ThreatDetails::OnMessageReceived(const IPC::Message& message, | |
| 199 RenderFrameHost* render_frame_host) { | |
| 200 bool handled = true; | |
| 201 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(ThreatDetails, message, render_frame_host) | |
| 202 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_ThreatDOMDetails, | |
| 203 OnReceivedThreatDOMDetails) | |
| 204 IPC_MESSAGE_UNHANDLED(handled = false) | |
| 205 IPC_END_MESSAGE_MAP() | |
| 206 return handled; | |
| 207 } | |
| 208 | |
| 209 bool ThreatDetails::IsReportableUrl(const GURL& url) const { | |
| 210 // TODO(panayiotis): also skip internal urls. | |
| 211 return url.SchemeIs("http") || url.SchemeIs("https"); | |
| 212 } | |
| 213 | |
| 214 // Looks for a Resource for the given url in resources_. If found, it | |
| 215 // updates |resource|. Otherwise, it creates a new message, adds it to | |
| 216 // resources_ and updates |resource| to point to it. | |
| 217 // | |
| 218 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::FindOrCreateResource( | |
| 219 const GURL& url) { | |
| 220 auto& resource = resources_[url.spec()]; | |
| 221 if (!resource) { | |
| 222 // Create the resource for |url|. | |
| 223 int id = resources_.size() - 1; | |
| 224 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource> new_resource( | |
| 225 new ClientSafeBrowsingReportRequest::Resource()); | |
| 226 new_resource->set_url(url.spec()); | |
| 227 new_resource->set_id(id); | |
| 228 resource = std::move(new_resource); | |
| 229 } | |
| 230 return resource.get(); | |
| 231 } | |
| 232 | |
| 233 HTMLElement* ThreatDetails::FindOrCreateElement( | |
| 234 const std::string& element_key) { | |
| 235 auto& element = elements_[element_key]; | |
| 236 if (!element) { | |
| 237 // Create an entry for this element. | |
| 238 int element_dom_id = elements_.size() - 1; | |
| 239 std::unique_ptr<HTMLElement> new_element(new HTMLElement()); | |
| 240 new_element->set_id(element_dom_id); | |
| 241 element = std::move(new_element); | |
| 242 } | |
| 243 return element.get(); | |
| 244 } | |
| 245 | |
| 246 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::AddUrl( | |
| 247 const GURL& url, | |
| 248 const GURL& parent, | |
| 249 const std::string& tagname, | |
| 250 const std::vector<GURL>* children) { | |
| 251 if (!url.is_valid() || !IsReportableUrl(url)) | |
| 252 return nullptr; | |
| 253 | |
| 254 // Find (or create) the resource for the url. | |
| 255 ClientSafeBrowsingReportRequest::Resource* url_resource = | |
| 256 FindOrCreateResource(url); | |
| 257 if (!tagname.empty()) | |
| 258 url_resource->set_tag_name(tagname); | |
| 259 if (!parent.is_empty() && IsReportableUrl(parent)) { | |
| 260 // Add the resource for the parent. | |
| 261 ClientSafeBrowsingReportRequest::Resource* parent_resource = | |
| 262 FindOrCreateResource(parent); | |
| 263 // Update the parent-child relation | |
| 264 url_resource->set_parent_id(parent_resource->id()); | |
| 265 } | |
| 266 if (children) { | |
| 267 for (std::vector<GURL>::const_iterator it = children->begin(); | |
| 268 it != children->end(); ++it) { | |
| 269 // TODO(lpz): Should this first check if the child URL is reportable | |
| 270 // before creating the resource? | |
| 271 ClientSafeBrowsingReportRequest::Resource* child_resource = | |
| 272 FindOrCreateResource(*it); | |
| 273 bool duplicate_child = false; | |
| 274 for (auto child_id : url_resource->child_ids()) { | |
| 275 if (child_id == child_resource->id()) { | |
| 276 duplicate_child = true; | |
| 277 break; | |
| 278 } | |
| 279 } | |
| 280 if (!duplicate_child) | |
| 281 url_resource->add_child_ids(child_resource->id()); | |
| 282 } | |
| 283 } | |
| 284 | |
| 285 return url_resource; | |
| 286 } | |
| 287 | |
| 288 void ThreatDetails::AddDomElement( | |
| 289 const int frame_tree_node_id, | |
| 290 const std::string& frame_url, | |
| 291 const int element_node_id, | |
| 292 const std::string& tagname, | |
| 293 const int parent_element_node_id, | |
| 294 const std::vector<AttributeNameValue>& attributes, | |
| 295 const ClientSafeBrowsingReportRequest::Resource* resource) { | |
| 296 // Create the element. It should not exist already since this function should | |
| 297 // only be called once for each element. | |
| 298 const std::string element_key = | |
| 299 GetElementKey(frame_tree_node_id, element_node_id); | |
| 300 HTMLElement* cur_element = FindOrCreateElement(element_key); | |
| 301 | |
| 302 // Set some basic metadata about the element. | |
| 303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | |
| 304 if (!tag_name_upper.empty()) { | |
| 305 cur_element->set_tag(tag_name_upper); | |
| 306 } | |
| 307 for (const AttributeNameValue& attribute : attributes) { | |
| 308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | |
| 309 attribute_pb->set_name(attribute.first); | |
| 310 attribute_pb->set_value(attribute.second); | |
| 311 } | |
| 312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | |
| 313 | |
| 314 if (resource) { | |
| 315 cur_element->set_resource_id(resource->id()); | |
| 316 | |
| 317 // For iframes, remember that this HTML Element represents an iframe with a | |
| 318 // specific URL. Elements from a frame with this URL are children of this | |
| 319 // element. | |
| 320 if (is_frame && | |
| 321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | |
| 322 iframe_src_to_element_map_[resource->url()] = cur_element; | |
| 323 } | |
| 324 } | |
| 325 | |
| 326 // Next we try to lookup the parent of the current element and add ourselves | |
| 327 // as a child of it. | |
| 328 HTMLElement* parent_element = nullptr; | |
| 329 if (parent_element_node_id == 0) { | |
| 330 // No parent indicates that this element is at the top of the current frame. | |
| 331 // This frame could be a child of an iframe in another frame, or it could be | |
| 332 // at the root of the whole page. If we have a frame URL then we can try to | |
| 333 // map this element to its parent. | |
| 334 if (!frame_url.empty()) { | |
| 335 // First, remember that this element is at the top-level of a frame with | |
| 336 // our frame URL. | |
| 337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
| 338 | |
| 339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
| 340 // This means that we processed the parent iframe element earlier, so we | |
| 341 // can add ourselves as a child of that iframe. | |
| 342 // If no such iframe exists, it could be processed later, or this element | |
| 343 // is in the top-level frame and truly has no parent. | |
| 344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
| 345 parent_element = iframe_src_to_element_map_[frame_url]; | |
| 346 } | |
| 347 } | |
| 348 } else { | |
| 349 // We have a parent ID, so this element is just a child of something inside | |
| 350 // of our current frame. We can easily lookup our parent. | |
| 351 const std::string& parent_key = | |
| 352 GetElementKey(frame_tree_node_id, parent_element_node_id); | |
| 353 if (base::ContainsKey(elements_, parent_key)) { | |
| 354 parent_element = elements_[parent_key].get(); | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 // If a parent element was found, add ourselves as a child, ensuring not to | |
| 359 // duplicate child IDs. | |
| 360 if (parent_element) { | |
| 361 bool duplicate_child = false; | |
| 362 for (const int child_id : parent_element->child_ids()) { | |
| 363 if (child_id == cur_element->id()) { | |
| 364 duplicate_child = true; | |
| 365 break; | |
| 366 } | |
| 367 } | |
| 368 if (!duplicate_child) { | |
| 369 parent_element->add_child_ids(cur_element->id()); | |
| 370 } | |
| 371 } | |
| 372 | |
| 373 // Finally, we need to check if the current element is the parent of some | |
| 374 // other elements that came in from another frame earlier. This only happens | |
| 375 // if we are an iframe, and our src URL exists in | |
| 376 // document_url_to_children_map_. If there is a match, then all of the | |
| 377 // children in that map belong to us. | |
| 378 if (is_frame && resource && | |
| 379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
| 380 const std::unordered_set<int>& child_ids = | |
| 381 document_url_to_children_map_[resource->url()]; | |
| 382 for (const int child_id : child_ids) { | |
| 383 cur_element->add_child_ids(child_id); | |
| 384 } | |
| 385 } | |
| 386 } | |
| 387 | |
| 388 void ThreatDetails::StartCollection() { | |
| 389 DVLOG(1) << "Starting to compute threat details."; | |
| 390 report_.reset(new ClientSafeBrowsingReportRequest()); | |
| 391 | |
| 392 if (IsReportableUrl(resource_.url)) { | |
| 393 report_->set_url(resource_.url.spec()); | |
| 394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | |
| 395 } | |
| 396 | |
| 397 GURL referrer_url; | |
| 398 NavigationEntry* nav_entry = resource_.GetNavigationEntryForResource(); | |
| 399 if (nav_entry) { | |
| 400 GURL page_url = nav_entry->GetURL(); | |
| 401 if (IsReportableUrl(page_url)) | |
| 402 report_->set_page_url(page_url.spec()); | |
| 403 | |
| 404 referrer_url = nav_entry->GetReferrer().url; | |
| 405 if (IsReportableUrl(referrer_url)) | |
| 406 report_->set_referrer_url(referrer_url.spec()); | |
| 407 | |
| 408 // Add the nodes, starting from the page url. | |
| 409 AddUrl(page_url, GURL(), std::string(), NULL); | |
| 410 } | |
| 411 | |
| 412 // Add the resource_url and its original url, if non-empty and different. | |
| 413 if (!resource_.original_url.is_empty() && | |
| 414 resource_.url != resource_.original_url) { | |
| 415 // Add original_url, as the parent of resource_url. | |
| 416 AddUrl(resource_.original_url, GURL(), std::string(), NULL); | |
| 417 AddUrl(resource_.url, resource_.original_url, std::string(), NULL); | |
| 418 } else { | |
| 419 AddUrl(resource_.url, GURL(), std::string(), NULL); | |
| 420 } | |
| 421 | |
| 422 // Add the redirect urls, if non-empty. The redirect urls do not include the | |
| 423 // original url, but include the unsafe url which is the last one of the | |
| 424 // redirect urls chain | |
| 425 GURL parent_url; | |
| 426 // Set the original url as the parent of the first redirect url if it's not | |
| 427 // empty. | |
| 428 if (!resource_.original_url.is_empty()) | |
| 429 parent_url = resource_.original_url; | |
| 430 | |
| 431 // Set the previous redirect url as the parent of the next one | |
| 432 for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) { | |
| 433 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL); | |
| 434 parent_url = resource_.redirect_urls[i]; | |
| 435 } | |
| 436 | |
| 437 // Add the referrer url. | |
| 438 if (!referrer_url.is_empty()) | |
| 439 AddUrl(referrer_url, GURL(), std::string(), NULL); | |
| 440 | |
| 441 if (!resource_.IsMainPageLoadBlocked()) { | |
| 442 // Get URLs of frames, scripts etc from the DOM. | |
| 443 // OnReceivedThreatDOMDetails will be called when the renderer replies. | |
| 444 // TODO(mattm): In theory, if the user proceeds through the warning DOM | |
| 445 // detail collection could be started once the page loads. | |
| 446 web_contents()->SendToAllFrames( | |
| 447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | |
| 448 } | |
| 449 } | |
| 450 | |
| 451 // When the renderer is done, this is called. | |
| 452 void ThreatDetails::OnReceivedThreatDOMDetails( | |
| 453 content::RenderFrameHost* sender, | |
| 454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | |
| 455 // Schedule this in IO thread, so it doesn't conflict with future users | |
| 456 // of our data structures (eg GetSerializedReport). | |
| 457 BrowserThread::PostTask( | |
| 458 BrowserThread::IO, FROM_HERE, | |
| 459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | |
| 460 sender->GetFrameTreeNodeId(), | |
| 461 sender->GetLastCommittedURL(), params)); | |
| 462 } | |
| 463 | |
| 464 void ThreatDetails::AddDOMDetails( | |
| 465 const int frame_tree_node_id, | |
| 466 const GURL& frame_last_committed_url, | |
| 467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | |
| 468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
| 469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | |
| 470 | |
| 471 // If we have already started getting redirects from history service, | |
| 472 // don't modify state, otherwise will invalidate the iterators. | |
| 473 if (redirects_collector_->HasStarted()) | |
| 474 return; | |
| 475 | |
| 476 // If we have already started collecting data from the HTTP cache, don't | |
| 477 // modify our state. | |
| 478 if (cache_collector_->HasStarted()) | |
| 479 return; | |
| 480 | |
| 481 // Exit early if there are no nodes to process. | |
| 482 if (params.empty()) | |
| 483 return; | |
| 484 | |
| 485 // Try to deduce the URL that the render frame was handling. First check if | |
| 486 // the summary node from the renderer has a document URL. If not, try looking | |
| 487 // at the last committed URL of the frame. | |
| 488 GURL frame_url; | |
| 489 if (IsReportableUrl(params.back().url)) { | |
| 490 frame_url = params.back().url; | |
| 491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
| 492 frame_url = frame_last_committed_url; | |
| 493 } | |
| 494 | |
| 495 // If we can't figure out which URL the frame was rendering then we don't know | |
| 496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
| 497 if (frame_url.is_empty()) { | |
| 498 ambiguous_dom_ = true; | |
| 499 } | |
| 500 | |
| 501 // Add the urls from the DOM to |resources_|. The renderer could be sending | |
| 502 // bogus messages, so limit the number of nodes we accept. | |
| 503 // Also update |elements_| with the DOM structure. | |
| 504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | |
| 505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | |
| 506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | |
| 507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | |
| 508 if (!node.url.is_empty()) { | |
| 509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | |
| 510 } | |
| 511 // Check for a tag_name to avoid adding the summary node to the DOM. | |
| 512 if (!node.tag_name.empty()) { | |
| 513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | |
| 514 node.tag_name, node.parent_node_id, node.attributes, | |
| 515 resource); | |
| 516 } | |
| 517 } | |
| 518 } | |
| 519 | |
| 520 // Called from the SB Service on the IO thread, after the user has | |
| 521 // closed the tab, or clicked proceed or goback. Since the user needs | |
| 522 // to take an action, we expect this to be called after | |
| 523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | |
| 524 // the DOM data in our report. | |
| 525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | |
| 526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
| 527 | |
| 528 did_proceed_ = did_proceed; | |
| 529 num_visits_ = num_visit; | |
| 530 std::vector<GURL> urls; | |
| 531 for (ResourceMap::const_iterator it = resources_.begin(); | |
| 532 it != resources_.end(); ++it) { | |
| 533 urls.push_back(GURL(it->first)); | |
| 534 } | |
| 535 redirects_collector_->StartHistoryCollection( | |
| 536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | |
| 537 } | |
| 538 | |
| 539 void ThreatDetails::OnRedirectionCollectionReady() { | |
| 540 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
| 541 const std::vector<RedirectChain>& redirects = | |
| 542 redirects_collector_->GetCollectedUrls(); | |
| 543 | |
| 544 for (size_t i = 0; i < redirects.size(); ++i) | |
| 545 AddRedirectUrlList(redirects[i]); | |
| 546 | |
| 547 // Call the cache collector | |
| 548 cache_collector_->StartCacheCollection( | |
| 549 request_context_getter_.get(), &resources_, &cache_result_, | |
| 550 base::Bind(&ThreatDetails::OnCacheCollectionReady, this)); | |
| 551 } | |
| 552 | |
| 553 void ThreatDetails::AddRedirectUrlList(const std::vector<GURL>& urls) { | |
| 554 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
| 555 for (size_t i = 0; i < urls.size() - 1; ++i) { | |
| 556 AddUrl(urls[i], urls[i + 1], std::string(), NULL); | |
| 557 } | |
| 558 } | |
| 559 | |
| 560 void ThreatDetails::OnCacheCollectionReady() { | |
| 561 DVLOG(1) << "OnCacheCollectionReady."; | |
| 562 // Add all the urls in our |resources_| maps to the |report_| protocol buffer. | |
| 563 for (auto& resource_pair : resources_) { | |
| 564 ClientSafeBrowsingReportRequest::Resource* pb_resource = | |
| 565 report_->add_resources(); | |
| 566 pb_resource->Swap(resource_pair.second.get()); | |
| 567 const GURL url(pb_resource->url()); | |
| 568 if (url.SchemeIs("https")) { | |
| 569 // Sanitize the HTTPS resource by clearing out private data (like cookie | |
| 570 // headers). | |
| 571 DVLOG(1) << "Clearing out HTTPS resource: " << pb_resource->url(); | |
| 572 ClearHttpsResource(pb_resource); | |
| 573 // Keep id, parent_id, child_ids, and tag_name. | |
| 574 } | |
| 575 } | |
| 576 for (auto& element_pair : elements_) { | |
| 577 report_->add_dom()->Swap(element_pair.second.get()); | |
| 578 } | |
| 579 if (!elements_.empty()) { | |
| 580 // TODO(lpz): Consider including the ambiguous_dom_ bit in the report | |
| 581 // itself. | |
| 582 UMA_HISTOGRAM_BOOLEAN("SafeBrowsing.ThreatReport.DomIsAmbiguous", | |
| 583 ambiguous_dom_); | |
| 584 } | |
| 585 | |
| 586 report_->set_did_proceed(did_proceed_); | |
| 587 // Only sets repeat_visit if num_visits_ >= 0. | |
| 588 if (num_visits_ >= 0) { | |
| 589 report_->set_repeat_visit(num_visits_ > 0); | |
| 590 } | |
| 591 report_->set_complete(cache_result_); | |
| 592 | |
| 593 // Send the report, using the SafeBrowsingService. | |
| 594 std::string serialized; | |
| 595 if (!report_->SerializeToString(&serialized)) { | |
| 596 DLOG(ERROR) << "Unable to serialize the threat report."; | |
| 597 return; | |
| 598 } | |
| 599 ui_manager_->SendSerializedThreatDetails(serialized); | |
| 600 } | |
| 601 | |
| 602 } // namespace safe_browsing | |
| OLD | NEW |