Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Side by Side Diff: chrome/browser/safe_browsing/browser_feature_extractor.cc

Issue 351553004: Port HistoryService::GetVisibleVisitCountToHost to CancelableTaskTracker (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix Linux ASAN tests Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h" 5 #include "chrome/browser/safe_browsing/browser_feature_extractor.h"
6 6
7 #include <map> 7 #include <map>
8 #include <utility> 8 #include <utility>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/bind_helpers.h" 11 #include "base/bind_helpers.h"
12 #include "base/format_macros.h" 12 #include "base/format_macros.h"
13 #include "base/stl_util.h" 13 #include "base/stl_util.h"
14 #include "base/strings/stringprintf.h" 14 #include "base/strings/stringprintf.h"
15 #include "base/time/time.h" 15 #include "base/time/time.h"
16 #include "chrome/browser/common/cancelable_request.h"
17 #include "chrome/browser/history/history_service.h" 16 #include "chrome/browser/history/history_service.h"
18 #include "chrome/browser/history/history_service_factory.h" 17 #include "chrome/browser/history/history_service_factory.h"
19 #include "chrome/browser/history/history_types.h" 18 #include "chrome/browser/history/history_types.h"
20 #include "chrome/browser/profiles/profile.h" 19 #include "chrome/browser/profiles/profile.h"
21 #include "chrome/browser/safe_browsing/browser_features.h" 20 #include "chrome/browser/safe_browsing/browser_features.h"
22 #include "chrome/browser/safe_browsing/client_side_detection_host.h" 21 #include "chrome/browser/safe_browsing/client_side_detection_host.h"
23 #include "chrome/browser/safe_browsing/database_manager.h" 22 #include "chrome/browser/safe_browsing/database_manager.h"
24 #include "chrome/common/safe_browsing/csd.pb.h" 23 #include "chrome/common/safe_browsing/csd.pb.h"
25 #include "content/public/browser/browser_thread.h" 24 #include "content/public/browser/browser_thread.h"
26 #include "content/public/browser/navigation_controller.h" 25 #include "content/public/browser/navigation_controller.h"
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 WebContents* tab, 166 WebContents* tab,
168 ClientSideDetectionHost* host) 167 ClientSideDetectionHost* host)
169 : tab_(tab), 168 : tab_(tab),
170 host_(host), 169 host_(host),
171 weak_factory_(this) { 170 weak_factory_(this) {
172 DCHECK(tab); 171 DCHECK(tab);
173 } 172 }
174 173
175 BrowserFeatureExtractor::~BrowserFeatureExtractor() { 174 BrowserFeatureExtractor::~BrowserFeatureExtractor() {
176 weak_factory_.InvalidateWeakPtrs(); 175 weak_factory_.InvalidateWeakPtrs();
177 // Delete all the pending extractions (delete callback and request objects).
178 STLDeleteContainerPairFirstPointers(pending_extractions_.begin(),
179 pending_extractions_.end());
180
181 // Also cancel all the pending history service queries.
182 HistoryService* history;
183 bool success = GetHistoryService(&history);
184 DCHECK(success || pending_queries_.size() == 0);
185 // Cancel all the pending history lookups and cleanup the memory.
186 for (PendingQueriesMap::iterator it = pending_queries_.begin();
187 it != pending_queries_.end(); ++it) {
188 if (history) {
189 history->CancelRequest(it->first);
190 }
191 ExtractionData& extraction = it->second;
192 delete extraction.first; // delete request
193 }
194 pending_queries_.clear();
195 } 176 }
196 177
197 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info, 178 void BrowserFeatureExtractor::ExtractFeatures(const BrowseInfo* info,
198 ClientPhishingRequest* request, 179 ClientPhishingRequest* request,
199 const DoneCallback& callback) { 180 const DoneCallback& callback) {
200 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 181 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
201 DCHECK(request); 182 DCHECK(request);
202 DCHECK(info); 183 DCHECK(info);
203 DCHECK_EQ(0U, request->url().find("http:")); 184 DCHECK_EQ(0U, request->url().find("http:"));
204 DCHECK(!callback.is_null()); 185 DCHECK(!callback.is_null());
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
242 std::string(), controller, url_index, info->url_redirects, request); 223 std::string(), controller, url_index, info->url_redirects, request);
243 } 224 }
244 if (first_host_index != -1) { 225 if (first_host_index != -1) {
245 AddNavigationFeatures(features::kHostPrefix, 226 AddNavigationFeatures(features::kHostPrefix,
246 controller, 227 controller,
247 first_host_index, 228 first_host_index,
248 info->host_redirects, 229 info->host_redirects,
249 request); 230 request);
250 } 231 }
251 232
233 // The API doesn't take a scoped_ptr because the API gets mocked and we
234 // cannot mock an API that takes scoped_ptr as arguments.
235 scoped_ptr<ClientPhishingRequest> req(request);
236
252 ExtractBrowseInfoFeatures(*info, request); 237 ExtractBrowseInfoFeatures(*info, request);
253 pending_extractions_[request] = callback;
254 base::MessageLoop::current()->PostTask( 238 base::MessageLoop::current()->PostTask(
255 FROM_HERE, 239 FROM_HERE,
256 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures, 240 base::Bind(&BrowserFeatureExtractor::StartExtractFeatures,
257 weak_factory_.GetWeakPtr(), request, callback)); 241 weak_factory_.GetWeakPtr(),
242 base::Passed(&req),
243 callback));
258 } 244 }
259 245
260 void BrowserFeatureExtractor::ExtractMalwareFeatures( 246 void BrowserFeatureExtractor::ExtractMalwareFeatures(
261 BrowseInfo* info, 247 BrowseInfo* info,
262 ClientMalwareRequest* request, 248 ClientMalwareRequest* request,
263 const MalwareDoneCallback& callback) { 249 const MalwareDoneCallback& callback) {
264 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 250 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
265 DCHECK(!callback.is_null()); 251 DCHECK(!callback.is_null());
266 252
267 // Grab the IPs because they might go away before we're done 253 // Grab the IPs because they might go away before we're done
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
306 AddFeature(features::kSafeBrowsingThreatType, 292 AddFeature(features::kSafeBrowsingThreatType,
307 static_cast<double>(info.unsafe_resource->threat_type), 293 static_cast<double>(info.unsafe_resource->threat_type),
308 request); 294 request);
309 } 295 }
310 if (info.http_status_code != 0) { 296 if (info.http_status_code != 0) {
311 AddFeature(features::kHttpStatusCode, info.http_status_code, request); 297 AddFeature(features::kHttpStatusCode, info.http_status_code, request);
312 } 298 }
313 } 299 }
314 300
315 void BrowserFeatureExtractor::StartExtractFeatures( 301 void BrowserFeatureExtractor::StartExtractFeatures(
316 ClientPhishingRequest* request, 302 scoped_ptr<ClientPhishingRequest> request,
317 const DoneCallback& callback) { 303 const DoneCallback& callback) {
318 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 304 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
319 size_t removed = pending_extractions_.erase(request);
320 DCHECK_EQ(1U, removed);
321 HistoryService* history; 305 HistoryService* history;
322 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) { 306 if (!request || !request->IsInitialized() || !GetHistoryService(&history)) {
323 callback.Run(false, request); 307 callback.Run(false, request.Pass());
324 return; 308 return;
325 } 309 }
326 // HistoryService::QueryURL migrated from CancelableRequestComsumer to 310 GURL request_url(request->url());
327 // CancelableRequestTracker and there is no Handle to associate to the 311 history->QueryURL(request_url,
328 // request. Instead manage the request object lifetime by using a scoped_ptr
329 // and using base::Passed(). So if the asynchronous call is canceled, the
330 // request is deleted, otherwise the callback becomes the owner.
331 scoped_ptr<ClientPhishingRequest> owned_request(request);
332 history->QueryURL(GURL(request->url()),
333 true /* wants_visits */, 312 true /* wants_visits */,
334 base::Bind(&BrowserFeatureExtractor::QueryUrlHistoryDone, 313 base::Bind(&BrowserFeatureExtractor::QueryUrlHistoryDone,
335 base::Unretained(this), 314 base::Unretained(this),
336 base::Passed(&owned_request), 315 base::Passed(&request),
337 callback), 316 callback),
338 &cancelable_task_tracker_); 317 &cancelable_task_tracker_);
339 } 318 }
340 319
341 void BrowserFeatureExtractor::QueryUrlHistoryDone( 320 void BrowserFeatureExtractor::QueryUrlHistoryDone(
342 scoped_ptr<ClientPhishingRequest> owned_request, 321 scoped_ptr<ClientPhishingRequest> request,
343 const DoneCallback& callback, 322 const DoneCallback& callback,
344 bool success, 323 bool success,
345 const history::URLRow& row, 324 const history::URLRow& row,
346 const history::VisitVector& visits) { 325 const history::VisitVector& visits) {
347 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 326 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
348 DCHECK(owned_request); 327 DCHECK(request);
349 DCHECK(!callback.is_null()); 328 DCHECK(!callback.is_null());
350 ClientPhishingRequest* request = owned_request.release();
351 if (!success) { 329 if (!success) {
352 // URL is not found in the history. In practice this should not 330 // URL is not found in the history. In practice this should not
353 // happen (unless there is a real error) because we just visited 331 // happen (unless there is a real error) because we just visited
354 // that URL. 332 // that URL.
355 callback.Run(false, request); 333 callback.Run(false, request.Pass());
356 return; 334 return;
357 } 335 }
358 AddFeature(features::kUrlHistoryVisitCount, 336 AddFeature(features::kUrlHistoryVisitCount,
359 static_cast<double>(row.visit_count()), 337 static_cast<double>(row.visit_count()),
360 request); 338 request.get());
361 339
362 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1); 340 base::Time threshold = base::Time::Now() - base::TimeDelta::FromDays(1);
363 int num_visits_24h_ago = 0; 341 int num_visits_24h_ago = 0;
364 int num_visits_typed = 0; 342 int num_visits_typed = 0;
365 int num_visits_link = 0; 343 int num_visits_link = 0;
366 for (history::VisitVector::const_iterator it = visits.begin(); 344 for (history::VisitVector::const_iterator it = visits.begin();
367 it != visits.end(); 345 it != visits.end();
368 ++it) { 346 ++it) {
369 if (!content::PageTransitionIsMainFrame(it->transition)) { 347 if (!content::PageTransitionIsMainFrame(it->transition)) {
370 continue; 348 continue;
371 } 349 }
372 if (it->visit_time < threshold) { 350 if (it->visit_time < threshold) {
373 ++num_visits_24h_ago; 351 ++num_visits_24h_ago;
374 } 352 }
375 content::PageTransition transition = content::PageTransitionStripQualifier( 353 content::PageTransition transition = content::PageTransitionStripQualifier(
376 it->transition); 354 it->transition);
377 if (transition == content::PAGE_TRANSITION_TYPED) { 355 if (transition == content::PAGE_TRANSITION_TYPED) {
378 ++num_visits_typed; 356 ++num_visits_typed;
379 } else if (transition == content::PAGE_TRANSITION_LINK) { 357 } else if (transition == content::PAGE_TRANSITION_LINK) {
380 ++num_visits_link; 358 ++num_visits_link;
381 } 359 }
382 } 360 }
383 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo, 361 AddFeature(features::kUrlHistoryVisitCountMoreThan24hAgo,
384 static_cast<double>(num_visits_24h_ago), 362 static_cast<double>(num_visits_24h_ago),
385 request); 363 request.get());
386 AddFeature(features::kUrlHistoryTypedCount, 364 AddFeature(features::kUrlHistoryTypedCount,
387 static_cast<double>(num_visits_typed), 365 static_cast<double>(num_visits_typed),
388 request); 366 request.get());
389 AddFeature(features::kUrlHistoryLinkCount, 367 AddFeature(features::kUrlHistoryLinkCount,
390 static_cast<double>(num_visits_link), 368 static_cast<double>(num_visits_link),
391 request); 369 request.get());
392 370
393 // Issue next history lookup for host visits. 371 // Issue next history lookup for host visits.
394 HistoryService* history; 372 HistoryService* history;
395 if (!GetHistoryService(&history)) { 373 if (!GetHistoryService(&history)) {
396 callback.Run(false, request); 374 callback.Run(false, request.Pass());
397 return; 375 return;
398 } 376 }
399 CancelableRequestProvider::Handle next_handle = 377 GURL request_url(request->url());
400 history->GetVisibleVisitCountToHost( 378 history->GetVisibleVisitCountToHost(
401 GURL(request->url()), 379 request_url,
402 &request_consumer_, 380 base::Bind(&BrowserFeatureExtractor::QueryHttpHostVisitsDone,
403 base::Bind(&BrowserFeatureExtractor::QueryHttpHostVisitsDone, 381 base::Unretained(this),
404 base::Unretained(this))); 382 base::Passed(&request),
405 StorePendingQuery(next_handle, request, callback); 383 callback),
384 &cancelable_task_tracker_);
406 } 385 }
407 386
408 void BrowserFeatureExtractor::QueryHttpHostVisitsDone( 387 void BrowserFeatureExtractor::QueryHttpHostVisitsDone(
409 CancelableRequestProvider::Handle handle, 388 scoped_ptr<ClientPhishingRequest> request,
389 const DoneCallback& callback,
410 bool success, 390 bool success,
411 int num_visits, 391 int num_visits,
412 base::Time first_visit) { 392 base::Time first_visit) {
413 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 393 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
414 ClientPhishingRequest* request;
415 DoneCallback callback;
416 if (!GetPendingQuery(handle, &request, &callback)) {
417 DLOG(FATAL) << "No pending history query found";
418 return;
419 }
420 DCHECK(request); 394 DCHECK(request);
421 DCHECK(!callback.is_null()); 395 DCHECK(!callback.is_null());
422 if (!success) { 396 if (!success) {
423 callback.Run(false, request); 397 callback.Run(false, request.Pass());
424 return; 398 return;
425 } 399 }
426 SetHostVisitsFeatures(num_visits, first_visit, true, request); 400 SetHostVisitsFeatures(num_visits, first_visit, true, request.get());
427 401
428 // Same lookup but for the HTTPS URL. 402 // Same lookup but for the HTTPS URL.
429 HistoryService* history; 403 HistoryService* history;
430 if (!GetHistoryService(&history)) { 404 if (!GetHistoryService(&history)) {
431 callback.Run(false, request); 405 callback.Run(false, request.Pass());
432 return; 406 return;
433 } 407 }
434 std::string https_url = request->url(); 408 std::string https_url = request->url();
435 CancelableRequestProvider::Handle next_handle = 409 history->GetVisibleVisitCountToHost(
436 history->GetVisibleVisitCountToHost( 410 GURL(https_url.replace(0, 5, "https:")),
437 GURL(https_url.replace(0, 5, "https:")), 411 base::Bind(&BrowserFeatureExtractor::QueryHttpsHostVisitsDone,
438 &request_consumer_, 412 base::Unretained(this),
439 base::Bind(&BrowserFeatureExtractor::QueryHttpsHostVisitsDone, 413 base::Passed(&request),
440 base::Unretained(this))); 414 callback),
441 StorePendingQuery(next_handle, request, callback); 415 &cancelable_task_tracker_);
442 } 416 }
443 417
444 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone( 418 void BrowserFeatureExtractor::QueryHttpsHostVisitsDone(
445 CancelableRequestProvider::Handle handle, 419 scoped_ptr<ClientPhishingRequest> request,
420 const DoneCallback& callback,
446 bool success, 421 bool success,
447 int num_visits, 422 int num_visits,
448 base::Time first_visit) { 423 base::Time first_visit) {
449 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 424 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
450 ClientPhishingRequest* request;
451 DoneCallback callback;
452 if (!GetPendingQuery(handle, &request, &callback)) {
453 DLOG(FATAL) << "No pending history query found";
454 return;
455 }
456 DCHECK(request); 425 DCHECK(request);
457 DCHECK(!callback.is_null()); 426 DCHECK(!callback.is_null());
458 if (!success) { 427 if (!success) {
459 callback.Run(false, request); 428 callback.Run(false, request.Pass());
460 return; 429 return;
461 } 430 }
462 SetHostVisitsFeatures(num_visits, first_visit, false, request); 431 SetHostVisitsFeatures(num_visits, first_visit, false, request.get());
463 callback.Run(true, request); // We're done with all the history lookups. 432 callback.Run(true, request.Pass());
464 } 433 }
465 434
466 void BrowserFeatureExtractor::SetHostVisitsFeatures( 435 void BrowserFeatureExtractor::SetHostVisitsFeatures(
467 int num_visits, 436 int num_visits,
468 base::Time first_visit, 437 base::Time first_visit,
469 bool is_http_query, 438 bool is_http_query,
470 ClientPhishingRequest* request) { 439 ClientPhishingRequest* request) {
471 DCHECK(request); 440 DCHECK(request);
472 AddFeature(is_http_query ? 441 AddFeature(is_http_query ?
473 features::kHttpHostVisitCount : features::kHttpsHostVisitCount, 442 features::kHttpHostVisitCount : features::kHttpsHostVisitCount,
474 static_cast<double>(num_visits), 443 static_cast<double>(num_visits),
475 request); 444 request);
476 if (num_visits > 0) { 445 if (num_visits > 0) {
477 AddFeature( 446 AddFeature(
478 is_http_query ? 447 is_http_query ?
479 features::kFirstHttpHostVisitMoreThan24hAgo : 448 features::kFirstHttpHostVisitMoreThan24hAgo :
480 features::kFirstHttpsHostVisitMoreThan24hAgo, 449 features::kFirstHttpsHostVisitMoreThan24hAgo,
481 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ? 450 (first_visit < (base::Time::Now() - base::TimeDelta::FromDays(1))) ?
482 1.0 : 0.0, 451 1.0 : 0.0,
483 request); 452 request);
484 } 453 }
485 } 454 }
486 455
487 void BrowserFeatureExtractor::StorePendingQuery(
488 CancelableRequestProvider::Handle handle,
489 ClientPhishingRequest* request,
490 const DoneCallback& callback) {
491 DCHECK_EQ(0U, pending_queries_.count(handle));
492 pending_queries_[handle] = std::make_pair(request, callback);
493 }
494
495 bool BrowserFeatureExtractor::GetPendingQuery(
496 CancelableRequestProvider::Handle handle,
497 ClientPhishingRequest** request,
498 DoneCallback* callback) {
499 PendingQueriesMap::iterator it = pending_queries_.find(handle);
500 DCHECK(it != pending_queries_.end());
501 if (it != pending_queries_.end()) {
502 *request = it->second.first;
503 *callback = it->second.second;
504 pending_queries_.erase(it);
505 return true;
506 }
507 return false;
508 }
509
510 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) { 456 bool BrowserFeatureExtractor::GetHistoryService(HistoryService** history) {
511 *history = NULL; 457 *history = NULL;
512 if (tab_ && tab_->GetBrowserContext()) { 458 if (tab_ && tab_->GetBrowserContext()) {
513 Profile* profile = Profile::FromBrowserContext(tab_->GetBrowserContext()); 459 Profile* profile = Profile::FromBrowserContext(tab_->GetBrowserContext());
514 *history = HistoryServiceFactory::GetForProfile(profile, 460 *history = HistoryServiceFactory::GetForProfile(profile,
515 Profile::EXPLICIT_ACCESS); 461 Profile::EXPLICIT_ACCESS);
516 if (*history) { 462 if (*history) {
517 return true; 463 return true;
518 } 464 }
519 } 465 }
(...skipping 14 matching lines...) Expand all
534 // Limit the number of matched bad IPs in one request to control 480 // Limit the number of matched bad IPs in one request to control
535 // the request's size 481 // the request's size
536 if (matched_bad_ips >= kMaxMalwareIPPerRequest) { 482 if (matched_bad_ips >= kMaxMalwareIPPerRequest) {
537 break; 483 break;
538 } 484 }
539 } 485 }
540 callback.Run(true, request.Pass()); 486 callback.Run(true, request.Pass());
541 } 487 }
542 488
543 } // namespace safe_browsing 489 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698