Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_service.cc

Issue 6374017: Add caching to phishing client side detection. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add UMA stat for not being able to serialize request. Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
6 6
7 #include "base/command_line.h" 7 #include "base/command_line.h"
8 #include "base/file_path.h" 8 #include "base/file_path.h"
9 #include "base/file_util_proxy.h" 9 #include "base/file_util_proxy.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/message_loop.h" 11 #include "base/message_loop.h"
12 #include "base/metrics/histogram.h" 12 #include "base/metrics/histogram.h"
13 #include "base/platform_file.h" 13 #include "base/platform_file.h"
14 #include "base/scoped_ptr.h" 14 #include "base/scoped_ptr.h"
15 #include "base/stl_util-inl.h" 15 #include "base/stl_util-inl.h"
16 #include "base/task.h" 16 #include "base/task.h"
17 #include "base/time.h" 17 #include "base/time.h"
18 #include "chrome/browser/browser_thread.h" 18 #include "chrome/browser/browser_thread.h"
19 #include "chrome/browser/safe_browsing/csd.pb.h" 19 #include "chrome/browser/safe_browsing/csd.pb.h"
20 #include "chrome/common/net/http_return.h" 20 #include "chrome/common/net/http_return.h"
21 #include "chrome/common/net/url_fetcher.h" 21 #include "chrome/common/net/url_fetcher.h"
22 #include "chrome/common/net/url_request_context_getter.h" 22 #include "chrome/common/net/url_request_context_getter.h"
23 #include "googleurl/src/gurl.h" 23 #include "googleurl/src/gurl.h"
24 #include "net/base/load_flags.h" 24 #include "net/base/load_flags.h"
25 #include "net/url_request/url_request_status.h" 25 #include "net/url_request/url_request_status.h"
26 26
27 namespace safe_browsing { 27 namespace safe_browsing {
28 28
29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; 29 const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
30
31 const base::TimeDelta ClientSideDetectionService::kReportsInterval =
32 base::TimeDelta::FromDays(1);
33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
34 base::TimeDelta::FromDays(1);
35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
36 base::TimeDelta::FromMinutes(30);
30 37
31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = 38 const char ClientSideDetectionService::kClientReportPhishingUrl[] =
32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
33 const char ClientSideDetectionService::kClientModelUrl[] = 40 const char ClientSideDetectionService::kClientModelUrl[] =
34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb";
35 42
36 struct ClientSideDetectionService::ClientReportInfo { 43 struct ClientSideDetectionService::ClientReportInfo {
37 scoped_ptr<ClientReportPhishingRequestCallback> callback; 44 scoped_ptr<ClientReportPhishingRequestCallback> callback;
38 GURL phishing_url; 45 GURL phishing_url;
39 }; 46 };
40 47
48 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
49 : is_phishing(phish),
50 timestamp(time) {}
51
41 ClientSideDetectionService::ClientSideDetectionService( 52 ClientSideDetectionService::ClientSideDetectionService(
42 const FilePath& model_path, 53 const FilePath& model_path,
43 URLRequestContextGetter* request_context_getter) 54 URLRequestContextGetter* request_context_getter)
44 : model_path_(model_path), 55 : model_path_(model_path),
45 model_status_(UNKNOWN_STATUS), 56 model_status_(UNKNOWN_STATUS),
46 model_file_(base::kInvalidPlatformFileValue), 57 model_file_(base::kInvalidPlatformFileValue),
47 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
48 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)),
49 request_context_getter_(request_context_getter) { 60 request_context_getter_(request_context_getter) {
50 } 61 }
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 } 230 }
220 } 231 }
221 232
222 void ClientSideDetectionService::StartClientReportPhishingRequest( 233 void ClientSideDetectionService::StartClientReportPhishingRequest(
223 const GURL& phishing_url, 234 const GURL& phishing_url,
224 double score, 235 double score,
225 ClientReportPhishingRequestCallback* callback) { 236 ClientReportPhishingRequestCallback* callback) {
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); 238 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
228 239
229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { 240 bool is_phishing;
230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " 241 if (GetCachedResult(phishing_url, &is_phishing)) {
231 << "not checking " << phishing_url; 242 VLOG(1) << "Satisfying request for " << phishing_url << " from cache";
243 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
244 cb->Run(phishing_url, is_phishing);
245 return;
246 }
247
248 // We limit the number of distinct pings to kMaxReports, but we don't count
249 // urls already in the cache against this number. We don't want to start
250 // classifying too many pages as phishing, but for those that we already
251 // think are phishing we want to give ourselves a chance to fix false
252 // positives.
253 if (cache_.find(phishing_url) != cache_.end()) {
254 VLOG(1) << "Refreshing cache for " << phishing_url;
255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
256 } else if (GetNumReports() > kMaxReportsPerInterval) {
257 VLOG(1) << "Too many report phishing requests sent in the last "
258 << kReportsInterval.InHours() << " hours, not checking "
259 << phishing_url;
232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); 260 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
233 cb->Run(phishing_url, false); 261 cb->Run(phishing_url, false);
234 return; 262 return;
235 } 263 }
236 264
237 ClientPhishingRequest request; 265 ClientPhishingRequest request;
238 request.set_url(phishing_url.spec()); 266 request.set_url(phishing_url.spec());
239 request.set_client_score(static_cast<float>(score)); 267 request.set_client_score(static_cast<float>(score));
240 std::string request_data; 268 std::string request_data;
241 if (!request.SerializeToString(&request_data)) { 269 if (!request.SerializeToString(&request_data)) {
242 // For consistency, we always call the callback asynchronously, rather than 270 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
243 // directly from this method. 271 VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
244 LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?";
245 cb->Run(phishing_url, false); 272 cb->Run(phishing_url, false);
246 return; 273 return;
247 } 274 }
248 275
249 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, 276 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */,
250 GURL(kClientReportPhishingUrl), 277 GURL(kClientReportPhishingUrl),
251 URLFetcher::POST, 278 URLFetcher::POST,
252 this); 279 this);
253 280
254 // Remember which callback and URL correspond to the current fetcher object. 281 // Remember which callback and URL correspond to the current fetcher object.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 326
300 void ClientSideDetectionService::HandlePhishingVerdict( 327 void ClientSideDetectionService::HandlePhishingVerdict(
301 const URLFetcher* source, 328 const URLFetcher* source,
302 const GURL& url, 329 const GURL& url,
303 const net::URLRequestStatus& status, 330 const net::URLRequestStatus& status,
304 int response_code, 331 int response_code,
305 const ResponseCookies& cookies, 332 const ResponseCookies& cookies,
306 const std::string& data) { 333 const std::string& data) {
307 ClientPhishingResponse response; 334 ClientPhishingResponse response;
308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); 335 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
309 if (status.is_success() && RC_REQUEST_OK == response_code && 336 if (status.is_success() && RC_REQUEST_OK == response_code &&
310 response.ParseFromString(data)) { 337 response.ParseFromString(data)) {
338 // Cache response, possibly flushing an old one.
339 cache_[info->phishing_url] =
340 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
311 info->callback->Run(info->phishing_url, response.phishy()); 341 info->callback->Run(info->phishing_url, response.phishy());
312 } else { 342 } else {
313 DLOG(ERROR) << "Unable to get the server verdict for URL: " 343 DLOG(ERROR) << "Unable to get the server verdict for URL: "
314 << info->phishing_url; 344 << info->phishing_url;
315 info->callback->Run(info->phishing_url, false); 345 info->callback->Run(info->phishing_url, false);
316 } 346 }
317 client_phishing_reports_.erase(source); 347 client_phishing_reports_.erase(source);
318 delete source; 348 delete source;
319 } 349 }
320 350
321 int ClientSideDetectionService::GetNumReportsPerDay() { 351 bool ClientSideDetectionService::GetCachedResult(const GURL& url,
322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); 352 bool* is_phishing) {
353 UpdateCache();
323 354
324 // Erase elements older than a day because we will never care about them 355 PhishingCache::iterator it = cache_.find(url);
325 // again. 356 if (it == cache_.end()) {
357 return false;
358 }
359
360 // We still need to check if the result is valid.
361 const CacheState& cache_state = *it->second;
362 if (cache_state.is_phishing ?
363 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
364 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
365 *is_phishing = cache_state.is_phishing;
366 return true;
367 }
368 return false;
369 }
370
371 void ClientSideDetectionService::UpdateCache() {
372 // Since we limit the number of requests but allow pass-through for cache
373 // refreshes, we don't want to remove elements from the cache if they
374 // could be used for this purpose even if we will not use the entry to
375 // satisfy the request from the cache.
376 base::TimeDelta positive_cache_interval =
377 std::max(kPositiveCacheInterval, kReportsInterval);
378 base::TimeDelta negative_cache_interval =
379 std::max(kNegativeCacheInterval, kReportsInterval);
380
381 // Remove elements from the cache that will no longer be used.
382 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
383 const CacheState& cache_state = *it->second;
384 if (cache_state.is_phishing ?
385 cache_state.timestamp > base::Time::Now() - positive_cache_interval :
386 cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
387 ++it;
388 } else {
389 cache_.erase(it++);
390 }
391 }
392 }
393
394 int ClientSideDetectionService::GetNumReports() {
395 base::Time cutoff = base::Time::Now() - kReportsInterval;
396
397 // Erase items older than cutoff because we will never care about them again.
326 while (!phishing_report_times_.empty() && 398 while (!phishing_report_times_.empty() &&
327 phishing_report_times_.front() < cutoff) { 399 phishing_report_times_.front() < cutoff) {
328 phishing_report_times_.pop(); 400 phishing_report_times_.pop();
329 } 401 }
330 402
331 // Return the number of elements that are above the cutoff. 403 // Return the number of elements that are above the cutoff.
332 return phishing_report_times_.size(); 404 return phishing_report_times_.size();
333 } 405 }
334 406
335 } // namespace safe_browsing 407 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698