OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // Implementation of the ThreatDetails class. | |
6 | |
7 #include "chrome/browser/safe_browsing/threat_details.h" | |
8 | |
9 #include <stddef.h> | |
10 #include <stdint.h> | |
11 | |
12 #include "base/bind.h" | |
13 #include "base/lazy_instance.h" | |
14 #include "base/metrics/histogram_macros.h" | |
15 #include "base/strings/string_util.h" | |
16 #include "chrome/browser/safe_browsing/threat_details_cache.h" | |
17 #include "chrome/browser/safe_browsing/threat_details_history.h" | |
18 #include "components/history/core/browser/history_service.h" | |
19 #include "components/safe_browsing/base_ui_manager.h" | |
20 #include "components/safe_browsing/common/safebrowsing_messages.h" | |
21 #include "content/public/browser/browser_thread.h" | |
22 #include "content/public/browser/navigation_controller.h" | |
23 #include "content/public/browser/navigation_entry.h" | |
24 #include "content/public/browser/render_frame_host.h" | |
25 #include "content/public/browser/web_contents.h" | |
26 #include "net/url_request/url_request_context_getter.h" | |
27 | |
28 using content::BrowserThread; | |
29 using content::NavigationEntry; | |
30 using content::RenderFrameHost; | |
31 using content::WebContents; | |
32 | |
33 // Keep in sync with KMaxNodes in components/safe_browsing/renderer/ | |
34 // threat_dom_details.cc | |
35 static const uint32_t kMaxDomNodes = 500; | |
36 | |
37 namespace safe_browsing { | |
38 | |
39 // static | |
40 ThreatDetailsFactory* ThreatDetails::factory_ = NULL; | |
41 | |
42 namespace { | |
43 | |
44 typedef std::unordered_set<std::string> StringSet; | |
45 // A set of HTTPS headers that are allowed to be collected. Contains both | |
46 // request and response headers. All entries in this list should be lower-case | |
47 // to support case-insensitive comparison. | |
48 struct WhitelistedHttpsHeadersTraits | |
49 : base::internal::DestructorAtExitLazyInstanceTraits<StringSet> { | |
50 static StringSet* New(void* instance) { | |
51 StringSet* headers = | |
52 base::internal::DestructorAtExitLazyInstanceTraits<StringSet>::New( | |
53 instance); | |
54 headers->insert({"google-creative-id", "google-lineitem-id", "referer", | |
55 "content-type", "content-length", "date", "server", "cache-control", | |
56 "pragma", "expires"}); | |
57 return headers; | |
58 } | |
59 }; | |
60 base::LazyInstance<StringSet, WhitelistedHttpsHeadersTraits> | |
61 g_https_headers_whitelist = LAZY_INSTANCE_INITIALIZER; | |
62 | |
63 // Helper function that converts SBThreatType to | |
64 // ClientSafeBrowsingReportRequest::ReportType. | |
65 ClientSafeBrowsingReportRequest::ReportType GetReportTypeFromSBThreatType( | |
66 SBThreatType threat_type) { | |
67 switch (threat_type) { | |
68 case SB_THREAT_TYPE_URL_PHISHING: | |
69 return ClientSafeBrowsingReportRequest::URL_PHISHING; | |
70 case SB_THREAT_TYPE_URL_MALWARE: | |
71 return ClientSafeBrowsingReportRequest::URL_MALWARE; | |
72 case SB_THREAT_TYPE_URL_UNWANTED: | |
73 return ClientSafeBrowsingReportRequest::URL_UNWANTED; | |
74 case SB_THREAT_TYPE_CLIENT_SIDE_PHISHING_URL: | |
75 return ClientSafeBrowsingReportRequest::CLIENT_SIDE_PHISHING_URL; | |
76 case SB_THREAT_TYPE_CLIENT_SIDE_MALWARE_URL: | |
77 return ClientSafeBrowsingReportRequest::CLIENT_SIDE_MALWARE_URL; | |
78 default: // Gated by SafeBrowsingBlockingPage::ShouldReportThreatDetails. | |
79 NOTREACHED() << "We should not send report for threat type " | |
80 << threat_type; | |
81 return ClientSafeBrowsingReportRequest::UNKNOWN; | |
82 } | |
83 } | |
84 | |
85 // Clears the specified HTTPS resource of any sensitive data, only retaining | |
86 // data that is whitelisted for collection. | |
87 void ClearHttpsResource(ClientSafeBrowsingReportRequest::Resource* resource) { | |
88 // Make a copy of the original resource to retain all data. | |
89 ClientSafeBrowsingReportRequest::Resource orig_resource(*resource); | |
90 | |
91 // Clear the request headers and copy over any whitelisted ones. | |
92 resource->clear_request(); | |
93 for (int i = 0; i < orig_resource.request().headers_size(); ++i) { | |
94 ClientSafeBrowsingReportRequest::HTTPHeader* orig_header = orig_resource | |
95 .mutable_request()->mutable_headers(i); | |
96 if (g_https_headers_whitelist.Get().count( | |
97 base::ToLowerASCII(orig_header->name())) > 0) { | |
98 resource->mutable_request()->add_headers()->Swap(orig_header); | |
99 } | |
100 } | |
101 // Also copy some other request fields. | |
102 resource->mutable_request()->mutable_bodydigest()->swap( | |
103 *orig_resource.mutable_request()->mutable_bodydigest()); | |
104 resource->mutable_request()->set_bodylength( | |
105 orig_resource.request().bodylength()); | |
106 | |
107 // ...repeat for response headers. | |
108 resource->clear_response(); | |
109 for (int i = 0; i < orig_resource.response().headers_size(); ++i) { | |
110 ClientSafeBrowsingReportRequest::HTTPHeader* orig_header = orig_resource | |
111 .mutable_response()->mutable_headers(i); | |
112 if (g_https_headers_whitelist.Get().count( | |
113 base::ToLowerASCII(orig_header->name())) > 0) { | |
114 resource->mutable_response()->add_headers()->Swap(orig_header); | |
115 } | |
116 } | |
117 // Also copy some other response fields. | |
118 resource->mutable_response()->mutable_bodydigest()->swap( | |
119 *orig_resource.mutable_response()->mutable_bodydigest()); | |
120 resource->mutable_response()->set_bodylength( | |
121 orig_resource.response().bodylength()); | |
122 resource->mutable_response()->mutable_remote_ip()->swap( | |
123 *orig_resource.mutable_response()->mutable_remote_ip()); | |
124 } | |
125 | |
126 std::string GetElementKey(const int frame_tree_node_id, | |
127 const int element_node_id) { | |
128 return base::StringPrintf("%d-%d", frame_tree_node_id, element_node_id); | |
129 } | |
130 | |
131 } // namespace | |
132 | |
133 // The default ThreatDetailsFactory. Global, made a singleton so we | |
134 // don't leak it. | |
135 class ThreatDetailsFactoryImpl : public ThreatDetailsFactory { | |
136 public: | |
137 ThreatDetails* CreateThreatDetails( | |
138 BaseUIManager* ui_manager, | |
139 WebContents* web_contents, | |
140 const security_interstitials::UnsafeResource& unsafe_resource, | |
141 net::URLRequestContextGetter* request_context_getter, | |
142 history::HistoryService* history_service) override { | |
143 return new ThreatDetails(ui_manager, web_contents, unsafe_resource, | |
144 request_context_getter, history_service); | |
145 } | |
146 | |
147 private: | |
148 friend struct base::LazyInstanceTraitsBase<ThreatDetailsFactoryImpl>; | |
149 | |
150 ThreatDetailsFactoryImpl() {} | |
151 | |
152 DISALLOW_COPY_AND_ASSIGN(ThreatDetailsFactoryImpl); | |
153 }; | |
154 | |
155 static base::LazyInstance<ThreatDetailsFactoryImpl>::DestructorAtExit | |
156 g_threat_details_factory_impl = LAZY_INSTANCE_INITIALIZER; | |
157 | |
158 // Create a ThreatDetails for the given tab. | |
159 /* static */ | |
160 ThreatDetails* ThreatDetails::NewThreatDetails( | |
161 BaseUIManager* ui_manager, | |
162 WebContents* web_contents, | |
163 const UnsafeResource& resource, | |
164 net::URLRequestContextGetter* request_context_getter, | |
165 history::HistoryService* history_service) { | |
166 // Set up the factory if this has not been done already (tests do that | |
167 // before this method is called). | |
168 if (!factory_) | |
169 factory_ = g_threat_details_factory_impl.Pointer(); | |
170 return factory_->CreateThreatDetails(ui_manager, web_contents, resource, | |
171 request_context_getter, history_service); | |
172 } | |
173 | |
174 // Create a ThreatDetails for the given tab. Runs in the UI thread. | |
175 ThreatDetails::ThreatDetails( | |
176 BaseUIManager* ui_manager, | |
177 content::WebContents* web_contents, | |
178 const UnsafeResource& resource, | |
179 net::URLRequestContextGetter* request_context_getter, | |
180 history::HistoryService* history_service) | |
181 : content::WebContentsObserver(web_contents), | |
182 request_context_getter_(request_context_getter), | |
183 ui_manager_(ui_manager), | |
184 resource_(resource), | |
185 cache_result_(false), | |
186 did_proceed_(false), | |
187 num_visits_(0), | |
188 ambiguous_dom_(false), | |
189 cache_collector_(new ThreatDetailsCacheCollector) { | |
190 redirects_collector_ = new ThreatDetailsRedirectsCollector( | |
191 history_service ? history_service->AsWeakPtr() | |
192 : base::WeakPtr<history::HistoryService>()); | |
193 StartCollection(); | |
194 } | |
195 | |
196 ThreatDetails::~ThreatDetails() {} | |
197 | |
198 bool ThreatDetails::OnMessageReceived(const IPC::Message& message, | |
199 RenderFrameHost* render_frame_host) { | |
200 bool handled = true; | |
201 IPC_BEGIN_MESSAGE_MAP_WITH_PARAM(ThreatDetails, message, render_frame_host) | |
202 IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_ThreatDOMDetails, | |
203 OnReceivedThreatDOMDetails) | |
204 IPC_MESSAGE_UNHANDLED(handled = false) | |
205 IPC_END_MESSAGE_MAP() | |
206 return handled; | |
207 } | |
208 | |
209 bool ThreatDetails::IsReportableUrl(const GURL& url) const { | |
210 // TODO(panayiotis): also skip internal urls. | |
211 return url.SchemeIs("http") || url.SchemeIs("https"); | |
212 } | |
213 | |
214 // Looks for a Resource for the given url in resources_. If found, it | |
215 // updates |resource|. Otherwise, it creates a new message, adds it to | |
216 // resources_ and updates |resource| to point to it. | |
217 // | |
218 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::FindOrCreateResource( | |
219 const GURL& url) { | |
220 auto& resource = resources_[url.spec()]; | |
221 if (!resource) { | |
222 // Create the resource for |url|. | |
223 int id = resources_.size() - 1; | |
224 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource> new_resource( | |
225 new ClientSafeBrowsingReportRequest::Resource()); | |
226 new_resource->set_url(url.spec()); | |
227 new_resource->set_id(id); | |
228 resource = std::move(new_resource); | |
229 } | |
230 return resource.get(); | |
231 } | |
232 | |
233 HTMLElement* ThreatDetails::FindOrCreateElement( | |
234 const std::string& element_key) { | |
235 auto& element = elements_[element_key]; | |
236 if (!element) { | |
237 // Create an entry for this element. | |
238 int element_dom_id = elements_.size() - 1; | |
239 std::unique_ptr<HTMLElement> new_element(new HTMLElement()); | |
240 new_element->set_id(element_dom_id); | |
241 element = std::move(new_element); | |
242 } | |
243 return element.get(); | |
244 } | |
245 | |
246 ClientSafeBrowsingReportRequest::Resource* ThreatDetails::AddUrl( | |
247 const GURL& url, | |
248 const GURL& parent, | |
249 const std::string& tagname, | |
250 const std::vector<GURL>* children) { | |
251 if (!url.is_valid() || !IsReportableUrl(url)) | |
252 return nullptr; | |
253 | |
254 // Find (or create) the resource for the url. | |
255 ClientSafeBrowsingReportRequest::Resource* url_resource = | |
256 FindOrCreateResource(url); | |
257 if (!tagname.empty()) | |
258 url_resource->set_tag_name(tagname); | |
259 if (!parent.is_empty() && IsReportableUrl(parent)) { | |
260 // Add the resource for the parent. | |
261 ClientSafeBrowsingReportRequest::Resource* parent_resource = | |
262 FindOrCreateResource(parent); | |
263 // Update the parent-child relation | |
264 url_resource->set_parent_id(parent_resource->id()); | |
265 } | |
266 if (children) { | |
267 for (std::vector<GURL>::const_iterator it = children->begin(); | |
268 it != children->end(); ++it) { | |
269 // TODO(lpz): Should this first check if the child URL is reportable | |
270 // before creating the resource? | |
271 ClientSafeBrowsingReportRequest::Resource* child_resource = | |
272 FindOrCreateResource(*it); | |
273 bool duplicate_child = false; | |
274 for (auto child_id : url_resource->child_ids()) { | |
275 if (child_id == child_resource->id()) { | |
276 duplicate_child = true; | |
277 break; | |
278 } | |
279 } | |
280 if (!duplicate_child) | |
281 url_resource->add_child_ids(child_resource->id()); | |
282 } | |
283 } | |
284 | |
285 return url_resource; | |
286 } | |
287 | |
288 void ThreatDetails::AddDomElement( | |
289 const int frame_tree_node_id, | |
290 const std::string& frame_url, | |
291 const int element_node_id, | |
292 const std::string& tagname, | |
293 const int parent_element_node_id, | |
294 const std::vector<AttributeNameValue>& attributes, | |
295 const ClientSafeBrowsingReportRequest::Resource* resource) { | |
296 // Create the element. It should not exist already since this function should | |
297 // only be called once for each element. | |
298 const std::string element_key = | |
299 GetElementKey(frame_tree_node_id, element_node_id); | |
300 HTMLElement* cur_element = FindOrCreateElement(element_key); | |
301 | |
302 // Set some basic metadata about the element. | |
303 const std::string tag_name_upper = base::ToUpperASCII(tagname); | |
304 if (!tag_name_upper.empty()) { | |
305 cur_element->set_tag(tag_name_upper); | |
306 } | |
307 for (const AttributeNameValue& attribute : attributes) { | |
308 HTMLElement::Attribute* attribute_pb = cur_element->add_attribute(); | |
309 attribute_pb->set_name(attribute.first); | |
310 attribute_pb->set_value(attribute.second); | |
311 } | |
312 bool is_frame = tag_name_upper == "IFRAME" || tag_name_upper == "FRAME"; | |
313 | |
314 if (resource) { | |
315 cur_element->set_resource_id(resource->id()); | |
316 | |
317 // For iframes, remember that this HTML Element represents an iframe with a | |
318 // specific URL. Elements from a frame with this URL are children of this | |
319 // element. | |
320 if (is_frame && | |
321 !base::ContainsKey(iframe_src_to_element_map_, resource->url())) { | |
322 iframe_src_to_element_map_[resource->url()] = cur_element; | |
323 } | |
324 } | |
325 | |
326 // Next we try to lookup the parent of the current element and add ourselves | |
327 // as a child of it. | |
328 HTMLElement* parent_element = nullptr; | |
329 if (parent_element_node_id == 0) { | |
330 // No parent indicates that this element is at the top of the current frame. | |
331 // This frame could be a child of an iframe in another frame, or it could be | |
332 // at the root of the whole page. If we have a frame URL then we can try to | |
333 // map this element to its parent. | |
334 if (!frame_url.empty()) { | |
335 // First, remember that this element is at the top-level of a frame with | |
336 // our frame URL. | |
337 document_url_to_children_map_[frame_url].insert(cur_element->id()); | |
338 | |
339 // Now check if the frame URL matches the src URL of an iframe elsewhere. | |
340 // This means that we processed the parent iframe element earlier, so we | |
341 // can add ourselves as a child of that iframe. | |
342 // If no such iframe exists, it could be processed later, or this element | |
343 // is in the top-level frame and truly has no parent. | |
344 if (base::ContainsKey(iframe_src_to_element_map_, frame_url)) { | |
345 parent_element = iframe_src_to_element_map_[frame_url]; | |
346 } | |
347 } | |
348 } else { | |
349 // We have a parent ID, so this element is just a child of something inside | |
350 // of our current frame. We can easily lookup our parent. | |
351 const std::string& parent_key = | |
352 GetElementKey(frame_tree_node_id, parent_element_node_id); | |
353 if (base::ContainsKey(elements_, parent_key)) { | |
354 parent_element = elements_[parent_key].get(); | |
355 } | |
356 } | |
357 | |
358 // If a parent element was found, add ourselves as a child, ensuring not to | |
359 // duplicate child IDs. | |
360 if (parent_element) { | |
361 bool duplicate_child = false; | |
362 for (const int child_id : parent_element->child_ids()) { | |
363 if (child_id == cur_element->id()) { | |
364 duplicate_child = true; | |
365 break; | |
366 } | |
367 } | |
368 if (!duplicate_child) { | |
369 parent_element->add_child_ids(cur_element->id()); | |
370 } | |
371 } | |
372 | |
373 // Finally, we need to check if the current element is the parent of some | |
374 // other elements that came in from another frame earlier. This only happens | |
375 // if we are an iframe, and our src URL exists in | |
376 // document_url_to_children_map_. If there is a match, then all of the | |
377 // children in that map belong to us. | |
378 if (is_frame && resource && | |
379 base::ContainsKey(document_url_to_children_map_, resource->url())) { | |
380 const std::unordered_set<int>& child_ids = | |
381 document_url_to_children_map_[resource->url()]; | |
382 for (const int child_id : child_ids) { | |
383 cur_element->add_child_ids(child_id); | |
384 } | |
385 } | |
386 } | |
387 | |
388 void ThreatDetails::StartCollection() { | |
389 DVLOG(1) << "Starting to compute threat details."; | |
390 report_.reset(new ClientSafeBrowsingReportRequest()); | |
391 | |
392 if (IsReportableUrl(resource_.url)) { | |
393 report_->set_url(resource_.url.spec()); | |
394 report_->set_type(GetReportTypeFromSBThreatType(resource_.threat_type)); | |
395 } | |
396 | |
397 GURL referrer_url; | |
398 NavigationEntry* nav_entry = resource_.GetNavigationEntryForResource(); | |
399 if (nav_entry) { | |
400 GURL page_url = nav_entry->GetURL(); | |
401 if (IsReportableUrl(page_url)) | |
402 report_->set_page_url(page_url.spec()); | |
403 | |
404 referrer_url = nav_entry->GetReferrer().url; | |
405 if (IsReportableUrl(referrer_url)) | |
406 report_->set_referrer_url(referrer_url.spec()); | |
407 | |
408 // Add the nodes, starting from the page url. | |
409 AddUrl(page_url, GURL(), std::string(), NULL); | |
410 } | |
411 | |
412 // Add the resource_url and its original url, if non-empty and different. | |
413 if (!resource_.original_url.is_empty() && | |
414 resource_.url != resource_.original_url) { | |
415 // Add original_url, as the parent of resource_url. | |
416 AddUrl(resource_.original_url, GURL(), std::string(), NULL); | |
417 AddUrl(resource_.url, resource_.original_url, std::string(), NULL); | |
418 } else { | |
419 AddUrl(resource_.url, GURL(), std::string(), NULL); | |
420 } | |
421 | |
422 // Add the redirect urls, if non-empty. The redirect urls do not include the | |
423 // original url, but include the unsafe url which is the last one of the | |
424 // redirect urls chain | |
425 GURL parent_url; | |
426 // Set the original url as the parent of the first redirect url if it's not | |
427 // empty. | |
428 if (!resource_.original_url.is_empty()) | |
429 parent_url = resource_.original_url; | |
430 | |
431 // Set the previous redirect url as the parent of the next one | |
432 for (size_t i = 0; i < resource_.redirect_urls.size(); ++i) { | |
433 AddUrl(resource_.redirect_urls[i], parent_url, std::string(), NULL); | |
434 parent_url = resource_.redirect_urls[i]; | |
435 } | |
436 | |
437 // Add the referrer url. | |
438 if (!referrer_url.is_empty()) | |
439 AddUrl(referrer_url, GURL(), std::string(), NULL); | |
440 | |
441 if (!resource_.IsMainPageLoadBlocked()) { | |
442 // Get URLs of frames, scripts etc from the DOM. | |
443 // OnReceivedThreatDOMDetails will be called when the renderer replies. | |
444 // TODO(mattm): In theory, if the user proceeds through the warning DOM | |
445 // detail collection could be started once the page loads. | |
446 web_contents()->SendToAllFrames( | |
447 new SafeBrowsingMsg_GetThreatDOMDetails(MSG_ROUTING_NONE)); | |
448 } | |
449 } | |
450 | |
451 // When the renderer is done, this is called. | |
452 void ThreatDetails::OnReceivedThreatDOMDetails( | |
453 content::RenderFrameHost* sender, | |
454 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | |
455 // Schedule this in IO thread, so it doesn't conflict with future users | |
456 // of our data structures (eg GetSerializedReport). | |
457 BrowserThread::PostTask( | |
458 BrowserThread::IO, FROM_HERE, | |
459 base::BindOnce(&ThreatDetails::AddDOMDetails, this, | |
460 sender->GetFrameTreeNodeId(), | |
461 sender->GetLastCommittedURL(), params)); | |
462 } | |
463 | |
464 void ThreatDetails::AddDOMDetails( | |
465 const int frame_tree_node_id, | |
466 const GURL& frame_last_committed_url, | |
467 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params) { | |
468 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
469 DVLOG(1) << "Nodes from the DOM: " << params.size(); | |
470 | |
471 // If we have already started getting redirects from history service, | |
472 // don't modify state, otherwise will invalidate the iterators. | |
473 if (redirects_collector_->HasStarted()) | |
474 return; | |
475 | |
476 // If we have already started collecting data from the HTTP cache, don't | |
477 // modify our state. | |
478 if (cache_collector_->HasStarted()) | |
479 return; | |
480 | |
481 // Exit early if there are no nodes to process. | |
482 if (params.empty()) | |
483 return; | |
484 | |
485 // Try to deduce the URL that the render frame was handling. First check if | |
486 // the summary node from the renderer has a document URL. If not, try looking | |
487 // at the last committed URL of the frame. | |
488 GURL frame_url; | |
489 if (IsReportableUrl(params.back().url)) { | |
490 frame_url = params.back().url; | |
491 } else if (IsReportableUrl(frame_last_committed_url)) { | |
492 frame_url = frame_last_committed_url; | |
493 } | |
494 | |
495 // If we can't figure out which URL the frame was rendering then we don't know | |
496 // where these elements belong in the hierarchy. The DOM will be ambiguous. | |
497 if (frame_url.is_empty()) { | |
498 ambiguous_dom_ = true; | |
499 } | |
500 | |
501 // Add the urls from the DOM to |resources_|. The renderer could be sending | |
502 // bogus messages, so limit the number of nodes we accept. | |
503 // Also update |elements_| with the DOM structure. | |
504 for (size_t i = 0; i < params.size() && i < kMaxDomNodes; ++i) { | |
505 SafeBrowsingHostMsg_ThreatDOMDetails_Node node = params[i]; | |
506 DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent; | |
507 ClientSafeBrowsingReportRequest::Resource* resource = nullptr; | |
508 if (!node.url.is_empty()) { | |
509 resource = AddUrl(node.url, node.parent, node.tag_name, &(node.children)); | |
510 } | |
511 // Check for a tag_name to avoid adding the summary node to the DOM. | |
512 if (!node.tag_name.empty()) { | |
513 AddDomElement(frame_tree_node_id, frame_url.spec(), node.node_id, | |
514 node.tag_name, node.parent_node_id, node.attributes, | |
515 resource); | |
516 } | |
517 } | |
518 } | |
519 | |
520 // Called from the SB Service on the IO thread, after the user has | |
521 // closed the tab, or clicked proceed or goback. Since the user needs | |
522 // to take an action, we expect this to be called after | |
523 // OnReceivedThreatDOMDetails in most cases. If not, we don't include | |
524 // the DOM data in our report. | |
525 void ThreatDetails::FinishCollection(bool did_proceed, int num_visit) { | |
526 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
527 | |
528 did_proceed_ = did_proceed; | |
529 num_visits_ = num_visit; | |
530 std::vector<GURL> urls; | |
531 for (ResourceMap::const_iterator it = resources_.begin(); | |
532 it != resources_.end(); ++it) { | |
533 urls.push_back(GURL(it->first)); | |
534 } | |
535 redirects_collector_->StartHistoryCollection( | |
536 urls, base::Bind(&ThreatDetails::OnRedirectionCollectionReady, this)); | |
537 } | |
538 | |
539 void ThreatDetails::OnRedirectionCollectionReady() { | |
540 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
541 const std::vector<RedirectChain>& redirects = | |
542 redirects_collector_->GetCollectedUrls(); | |
543 | |
544 for (size_t i = 0; i < redirects.size(); ++i) | |
545 AddRedirectUrlList(redirects[i]); | |
546 | |
547 // Call the cache collector | |
548 cache_collector_->StartCacheCollection( | |
549 request_context_getter_.get(), &resources_, &cache_result_, | |
550 base::Bind(&ThreatDetails::OnCacheCollectionReady, this)); | |
551 } | |
552 | |
553 void ThreatDetails::AddRedirectUrlList(const std::vector<GURL>& urls) { | |
554 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
555 for (size_t i = 0; i < urls.size() - 1; ++i) { | |
556 AddUrl(urls[i], urls[i + 1], std::string(), NULL); | |
557 } | |
558 } | |
559 | |
560 void ThreatDetails::OnCacheCollectionReady() { | |
561 DVLOG(1) << "OnCacheCollectionReady."; | |
562 // Add all the urls in our |resources_| maps to the |report_| protocol buffer. | |
563 for (auto& resource_pair : resources_) { | |
564 ClientSafeBrowsingReportRequest::Resource* pb_resource = | |
565 report_->add_resources(); | |
566 pb_resource->Swap(resource_pair.second.get()); | |
567 const GURL url(pb_resource->url()); | |
568 if (url.SchemeIs("https")) { | |
569 // Sanitize the HTTPS resource by clearing out private data (like cookie | |
570 // headers). | |
571 DVLOG(1) << "Clearing out HTTPS resource: " << pb_resource->url(); | |
572 ClearHttpsResource(pb_resource); | |
573 // Keep id, parent_id, child_ids, and tag_name. | |
574 } | |
575 } | |
576 for (auto& element_pair : elements_) { | |
577 report_->add_dom()->Swap(element_pair.second.get()); | |
578 } | |
579 if (!elements_.empty()) { | |
580 // TODO(lpz): Consider including the ambiguous_dom_ bit in the report | |
581 // itself. | |
582 UMA_HISTOGRAM_BOOLEAN("SafeBrowsing.ThreatReport.DomIsAmbiguous", | |
583 ambiguous_dom_); | |
584 } | |
585 | |
586 report_->set_did_proceed(did_proceed_); | |
587 // Only sets repeat_visit if num_visits_ >= 0. | |
588 if (num_visits_ >= 0) { | |
589 report_->set_repeat_visit(num_visits_ > 0); | |
590 } | |
591 report_->set_complete(cache_result_); | |
592 | |
593 // Send the report, using the SafeBrowsingService. | |
594 std::string serialized; | |
595 if (!report_->SerializeToString(&serialized)) { | |
596 DLOG(ERROR) << "Unable to serialize the threat report."; | |
597 return; | |
598 } | |
599 ui_manager_->SendSerializedThreatDetails(serialized); | |
600 } | |
601 | |
602 } // namespace safe_browsing | |
OLD | NEW |