Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_service.cc

Issue 6374017: Add caching to phishing client side detection. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add testing for cache refresh. Also make interval that we keep track of requests configurable. Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
6 6
7 #include "base/command_line.h" 7 #include "base/command_line.h"
8 #include "base/file_path.h" 8 #include "base/file_path.h"
9 #include "base/file_util_proxy.h" 9 #include "base/file_util_proxy.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
11 #include "base/message_loop.h" 11 #include "base/message_loop.h"
12 #include "base/metrics/histogram.h" 12 #include "base/metrics/histogram.h"
13 #include "base/platform_file.h" 13 #include "base/platform_file.h"
14 #include "base/scoped_ptr.h" 14 #include "base/scoped_ptr.h"
15 #include "base/stl_util-inl.h" 15 #include "base/stl_util-inl.h"
16 #include "base/task.h" 16 #include "base/task.h"
17 #include "base/time.h" 17 #include "base/time.h"
18 #include "chrome/browser/browser_thread.h" 18 #include "chrome/browser/browser_thread.h"
19 #include "chrome/browser/safe_browsing/csd.pb.h" 19 #include "chrome/browser/safe_browsing/csd.pb.h"
20 #include "chrome/common/net/http_return.h" 20 #include "chrome/common/net/http_return.h"
21 #include "chrome/common/net/url_fetcher.h" 21 #include "chrome/common/net/url_fetcher.h"
22 #include "chrome/common/net/url_request_context_getter.h" 22 #include "chrome/common/net/url_request_context_getter.h"
23 #include "googleurl/src/gurl.h" 23 #include "googleurl/src/gurl.h"
24 #include "net/base/load_flags.h" 24 #include "net/base/load_flags.h"
25 #include "net/url_request/url_request_status.h" 25 #include "net/url_request/url_request_status.h"
26 26
27 namespace safe_browsing { 27 namespace safe_browsing {
28 28
29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; 29 const int ClientSideDetectionService::kMaxReports = 3;
30
31 const base::TimeDelta ClientSideDetectionService::kReportsInterval =
32 base::TimeDelta::FromDays(1);
33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
34 base::TimeDelta::FromDays(1);
35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
36 base::TimeDelta::FromMinutes(30);
30 37
31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = 38 const char ClientSideDetectionService::kClientReportPhishingUrl[] =
32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
33 const char ClientSideDetectionService::kClientModelUrl[] = 40 const char ClientSideDetectionService::kClientModelUrl[] =
34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb";
35 42
36 struct ClientSideDetectionService::ClientReportInfo { 43 struct ClientSideDetectionService::ClientReportInfo {
37 scoped_ptr<ClientReportPhishingRequestCallback> callback; 44 scoped_ptr<ClientReportPhishingRequestCallback> callback;
38 GURL phishing_url; 45 GURL phishing_url;
39 }; 46 };
40 47
48 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
49 : is_phishing(phish),
50 timestamp(time) {}
51
41 ClientSideDetectionService::ClientSideDetectionService( 52 ClientSideDetectionService::ClientSideDetectionService(
42 const FilePath& model_path, 53 const FilePath& model_path,
43 URLRequestContextGetter* request_context_getter) 54 URLRequestContextGetter* request_context_getter)
44 : model_path_(model_path), 55 : model_path_(model_path),
45 model_status_(UNKNOWN_STATUS), 56 model_status_(UNKNOWN_STATUS),
46 model_file_(base::kInvalidPlatformFileValue), 57 model_file_(base::kInvalidPlatformFileValue),
47 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)),
48 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)),
49 request_context_getter_(request_context_getter) { 60 request_context_getter_(request_context_getter) {
50 } 61 }
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 } 230 }
220 } 231 }
221 232
222 void ClientSideDetectionService::StartClientReportPhishingRequest( 233 void ClientSideDetectionService::StartClientReportPhishingRequest(
223 const GURL& phishing_url, 234 const GURL& phishing_url,
224 double score, 235 double score,
225 ClientReportPhishingRequestCallback* callback) { 236 ClientReportPhishingRequestCallback* callback) {
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); 238 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
228 239
229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { 240 bool is_phishing;
230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " 241 if (GetCachedResult(phishing_url, &is_phishing)) {
231 << "not checking " << phishing_url; 242 VLOG(1) << "Satisfying request for " << phishing_url << " from cache";
243 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
244 cb->Run(phishing_url, is_phishing);
245 return;
246 }
247
248 // We only limit the number of distinct urls to kMaxReports, but we
249 // currently don't count urls already in the cache against this number. We
250 // don't want to start classifying too many pages as phishing, but for those
251 // that we already think are phishing we want to give ourselves a chance to
252 // fix false positives.
253 if (cache_.find(phishing_url) != cache_.end()) {
254 VLOG(1) << "Refreshing cache for " << phishing_url;
255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
256 } else if (GetNumReports() > kMaxReports) {
257 VLOG(1) << "Too many report phishing requests sent in the last "
258 << kReportsInterval.InHours() << " hours, not checking "
259 << phishing_url;
232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); 260 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
233 cb->Run(phishing_url, false); 261 cb->Run(phishing_url, false);
234 return; 262 return;
235 } 263 }
236 264
237 ClientPhishingRequest request; 265 ClientPhishingRequest request;
238 request.set_url(phishing_url.spec()); 266 request.set_url(phishing_url.spec());
239 request.set_client_score(static_cast<float>(score)); 267 request.set_client_score(static_cast<float>(score));
240 std::string request_data; 268 std::string request_data;
241 if (!request.SerializeToString(&request_data)) { 269 if (!request.SerializeToString(&request_data)) {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 327
300 void ClientSideDetectionService::HandlePhishingVerdict( 328 void ClientSideDetectionService::HandlePhishingVerdict(
301 const URLFetcher* source, 329 const URLFetcher* source,
302 const GURL& url, 330 const GURL& url,
303 const net::URLRequestStatus& status, 331 const net::URLRequestStatus& status,
304 int response_code, 332 int response_code,
305 const ResponseCookies& cookies, 333 const ResponseCookies& cookies,
306 const std::string& data) { 334 const std::string& data) {
307 ClientPhishingResponse response; 335 ClientPhishingResponse response;
308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); 336 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
309 if (status.is_success() && RC_REQUEST_OK == response_code && 337 if (status.is_success() && RC_REQUEST_OK == response_code &&
310 response.ParseFromString(data)) { 338 response.ParseFromString(data)) {
339 // Cache response, possibly flushing an old one.
340 cache_[info->phishing_url] =
341 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
311 info->callback->Run(info->phishing_url, response.phishy()); 342 info->callback->Run(info->phishing_url, response.phishy());
312 } else { 343 } else {
313 DLOG(ERROR) << "Unable to get the server verdict for URL: " 344 DLOG(ERROR) << "Unable to get the server verdict for URL: "
314 << info->phishing_url; 345 << info->phishing_url;
315 info->callback->Run(info->phishing_url, false); 346 info->callback->Run(info->phishing_url, false);
316 } 347 }
317 client_phishing_reports_.erase(source); 348 client_phishing_reports_.erase(source);
318 delete source; 349 delete source;
319 } 350 }
320 351
321 int ClientSideDetectionService::GetNumReportsPerDay() { 352 bool ClientSideDetectionService::GetCachedResult(const GURL& url,
322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); 353 bool* is_phishing) {
354 UpdateCache();
Brian Ryner 2011/02/09 23:46:42 If the cache was going to be very large, the linea
323 355
324 // Erase elements older than a day because we will never care about them 356 PhishingCache::iterator it = cache_.find(url);
325 // again. 357 if (it == cache_.end()) {
358 return false;
359 }
360
361 // We still need to check if the result is valid.
362 const CacheState& cache_state = *it->second;
363 if (cache_state.is_phishing ?
364 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
365 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
366 *is_phishing = cache_state.is_phishing;
367 return true;
368 }
369 return false;
370 }
371
372 void ClientSideDetectionService::UpdateCache() {
373 // Since we limit the number of requests but allow pass-through for cache
374 // refreshes, we don't want to remove elements from the cache if they
375 // could be used for this purpose even if we will not use the entry to
376 // satisfy the request from the cache.
377 base::TimeDelta positive_cache_interval =
378 std::max(kPositiveCacheInterval, kReportsInterval);
379 base::TimeDelta negative_cache_interval =
380 std::max(kNegativeCacheInterval, kReportsInterval);
381
382 // Remove elements from the cache that will no longer be used.
383 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
384 const CacheState& cache_state = *it->second;
385 if (cache_state.is_phishing ?
386 cache_state.timestamp > base::Time::Now() - positive_cache_interval :
387 cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
388 ++it;
389 } else {
390 cache_.erase(it++);
391 }
392 }
393 }
394
395 int ClientSideDetectionService::GetNumReports() {
396 base::Time cutoff = base::Time::Now() - kReportsInterval;
397
398 // Erase items older than cutoff because we will never care about them again.
326 while (!phishing_report_times_.empty() && 399 while (!phishing_report_times_.empty() &&
327 phishing_report_times_.front() < cutoff) { 400 phishing_report_times_.front() < cutoff) {
328 phishing_report_times_.pop(); 401 phishing_report_times_.pop();
329 } 402 }
330 403
331 // Return the number of elements that are above the cutoff. 404 // Return the number of elements that are above the cutoff.
332 return phishing_report_times_.size(); 405 return phishing_report_times_.size();
333 } 406 }
334 407
335 } // namespace safe_browsing 408 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698