Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(381)

Side by Side Diff: chrome/browser/safe_browsing/client_side_detection_service.cc

Issue 6374017: Add caching to phishing client side detection. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Updated header comments Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h"
6 6
7 #include "base/command_line.h" 7 #include "base/command_line.h"
8 #include "base/file_path.h" 8 #include "base/file_path.h"
9 #include "base/file_util_proxy.h" 9 #include "base/file_util_proxy.h"
10 #include "base/logging.h" 10 #include "base/logging.h"
(...skipping 10 matching lines...) Expand all
21 #include "chrome/common/net/url_fetcher.h" 21 #include "chrome/common/net/url_fetcher.h"
22 #include "chrome/common/net/url_request_context_getter.h" 22 #include "chrome/common/net/url_request_context_getter.h"
23 #include "googleurl/src/gurl.h" 23 #include "googleurl/src/gurl.h"
24 #include "net/base/load_flags.h" 24 #include "net/base/load_flags.h"
25 #include "net/url_request/url_request_status.h" 25 #include "net/url_request/url_request_status.h"
26 26
27 namespace safe_browsing { 27 namespace safe_browsing {
28 28
29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; 29 const int ClientSideDetectionService::kMaxReportsPerDay = 3;
30 30
31 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
32 base::TimeDelta::FromDays(1);
33 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
34 base::TimeDelta::FromMinutes(30);
35
31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = 36 const char ClientSideDetectionService::kClientReportPhishingUrl[] =
32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; 37 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
33 const char ClientSideDetectionService::kClientModelUrl[] = 38 const char ClientSideDetectionService::kClientModelUrl[] =
34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; 39 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb";
35 40
36 struct ClientSideDetectionService::ClientReportInfo { 41 struct ClientSideDetectionService::ClientReportInfo {
37 scoped_ptr<ClientReportPhishingRequestCallback> callback; 42 scoped_ptr<ClientReportPhishingRequestCallback> callback;
38 GURL phishing_url; 43 GURL phishing_url;
39 }; 44 };
40 45
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 } 224 }
220 } 225 }
221 226
222 void ClientSideDetectionService::StartClientReportPhishingRequest( 227 void ClientSideDetectionService::StartClientReportPhishingRequest(
223 const GURL& phishing_url, 228 const GURL& phishing_url,
224 double score, 229 double score,
225 ClientReportPhishingRequestCallback* callback) { 230 ClientReportPhishingRequestCallback* callback) {
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); 232 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
228 233
229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { 234 bool is_phishing;
235 if (GetCachedResult(phishing_url, &is_phishing)) {
236 LOG(INFO) << "Satisfying request for " << phishing_url << " from cache";
Brian Ryner 2011/02/08 20:16:48 This is probably not something that should always
gcasto (DO NOT USE) 2011/02/09 00:10:52 Done.
237 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
238 cb->Run(phishing_url, is_phishing);
239 return;
240 }
241
242 // We only limit the number of distinct urls to kMaxReportsPerDay, but we
243 // currently don't count urls already in the cache against this number. We
244 // don't want to start classifying too many pages as phishing, but for those
245 // that we already think are phishing we want to give ourselves a chance to
246 // fix false positives.
247 if (cache_.find(phishing_url) != cache_.end()) {
248 LOG(INFO) << "Refreshing cache for " << phishing_url;
Brian Ryner 2011/02/08 20:16:48 Ditto here.
gcasto (DO NOT USE) 2011/02/09 00:10:52 Done.
249 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
250 } else if (GetNumReportsPerDay() > kMaxReportsPerDay) {
230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " 251 LOG(WARNING) << "Too many report phishing requests sent in the last day, "
Brian Ryner 2011/02/08 20:16:48 It's not directly part of this change, but this is
gcasto (DO NOT USE) 2011/02/09 00:10:52 Done.
231 << "not checking " << phishing_url; 252 << "not checking " << phishing_url;
232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); 253 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
233 cb->Run(phishing_url, false); 254 cb->Run(phishing_url, false);
234 return; 255 return;
235 } 256 }
236 257
237 ClientPhishingRequest request; 258 ClientPhishingRequest request;
238 request.set_url(phishing_url.spec()); 259 request.set_url(phishing_url.spec());
239 request.set_client_score(static_cast<float>(score)); 260 request.set_client_score(static_cast<float>(score));
240 std::string request_data; 261 std::string request_data;
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
299 320
300 void ClientSideDetectionService::HandlePhishingVerdict( 321 void ClientSideDetectionService::HandlePhishingVerdict(
301 const URLFetcher* source, 322 const URLFetcher* source,
302 const GURL& url, 323 const GURL& url,
303 const net::URLRequestStatus& status, 324 const net::URLRequestStatus& status,
304 int response_code, 325 int response_code,
305 const ResponseCookies& cookies, 326 const ResponseCookies& cookies,
306 const std::string& data) { 327 const std::string& data) {
307 ClientPhishingResponse response; 328 ClientPhishingResponse response;
308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); 329 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
309 if (status.is_success() && RC_REQUEST_OK == response_code && 330 if (status.is_success() && RC_REQUEST_OK == response_code &&
310 response.ParseFromString(data)) { 331 response.ParseFromString(data)) {
332 // Cache response, possibly flushing an old one.
Brian Ryner 2011/02/08 20:16:48 I'm not totally sure I understand the "possibly fl
gcasto (DO NOT USE) 2011/02/09 00:10:52 Your correct that this comment is currently wrong,
333 cache_[info->phishing_url] =
334 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
311 info->callback->Run(info->phishing_url, response.phishy()); 335 info->callback->Run(info->phishing_url, response.phishy());
312 } else { 336 } else {
313 DLOG(ERROR) << "Unable to get the server verdict for URL: " 337 DLOG(ERROR) << "Unable to get the server verdict for URL: "
314 << info->phishing_url; 338 << info->phishing_url;
315 info->callback->Run(info->phishing_url, false); 339 info->callback->Run(info->phishing_url, false);
316 } 340 }
317 client_phishing_reports_.erase(source); 341 client_phishing_reports_.erase(source);
318 delete source; 342 delete source;
319 } 343 }
320 344
345 bool ClientSideDetectionService::GetCachedResult(GURL url,
346 bool* is_phishing) {
347 UpdateCache();
348
349 PhishingCache::iterator it = cache_.find(url);
350 if (it == cache_.end()) {
351 return false;
352 }
353
354 // Result is guaranteed to be relevant since we just updated the cache.
355 *is_phishing = it->second->is_phishing;
356 return true;
357 }
358
359 void ClientSideDetectionService::UpdateCache() {
360 // Remove elements from the cache that will no longer be used.
361 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
362 if ((it->second->is_phishing &&
Brian Ryner 2011/02/08 20:16:48 This might be more readable if you use a temporary
gcasto (DO NOT USE) 2011/02/09 00:10:52 Done.
363 it->second->timestamp > base::Time::Now() - kPositiveCacheInterval) ||
364 (!it->second->is_phishing &&
365 it->second->timestamp > base::Time::Now() - kNegativeCacheInterval)) {
366 ++it;
367 } else {
368 cache_.erase(it++);
369 }
370 }
371 }
372
321 int ClientSideDetectionService::GetNumReportsPerDay() { 373 int ClientSideDetectionService::GetNumReportsPerDay() {
322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); 374 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1);
323 375
324 // Erase elements older than a day because we will never care about them 376 // Erase elements older than a day because we will never care about them
325 // again. 377 // again.
326 while (!phishing_report_times_.empty() && 378 while (!phishing_report_times_.empty() &&
327 phishing_report_times_.front() < cutoff) { 379 phishing_report_times_.front() < cutoff) {
328 phishing_report_times_.pop(); 380 phishing_report_times_.pop();
329 } 381 }
330 382
331 // Return the number of elements that are above the cutoff. 383 // Return the number of elements that are above the cutoff.
332 return phishing_report_times_.size(); 384 return phishing_report_times_.size();
333 } 385 }
334 386
335 } // namespace safe_browsing 387 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698