Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Unified Diff: chrome/browser/safe_browsing/client_side_detection_service.cc

Issue 6374017: Add caching to phishing client side detection. (Closed) Base URL: http://git.chromium.org/git/chromium.git@trunk
Patch Set: Add UMA stat for not being able to serialize request. Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/client_side_detection_service.cc
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc
index 008a2d2de44c1f90cd4cdaba323824fc6d4d285b..4adda8a0c207bf8054bfcf2133ed998314d146e0 100644
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc
@@ -26,7 +26,14 @@
namespace safe_browsing {
-const int ClientSideDetectionService::kMaxReportsPerDay = 3;
+const int ClientSideDetectionService::kMaxReportsPerInterval = 3;
+
+const base::TimeDelta ClientSideDetectionService::kReportsInterval =
+ base::TimeDelta::FromDays(1);
+const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval =
+ base::TimeDelta::FromDays(1);
+const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval =
+ base::TimeDelta::FromMinutes(30);
const char ClientSideDetectionService::kClientReportPhishingUrl[] =
"https://sb-ssl.google.com/safebrowsing/clientreport/phishing";
@@ -38,6 +45,10 @@ struct ClientSideDetectionService::ClientReportInfo {
GURL phishing_url;
};
+ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time)
+ : is_phishing(phish),
+ timestamp(time) {}
+
ClientSideDetectionService::ClientSideDetectionService(
const FilePath& model_path,
URLRequestContextGetter* request_context_getter)
@@ -226,9 +237,26 @@ void ClientSideDetectionService::StartClientReportPhishingRequest(
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
scoped_ptr<ClientReportPhishingRequestCallback> cb(callback);
- if (GetNumReportsPerDay() > kMaxReportsPerDay) {
- LOG(WARNING) << "Too many report phishing requests sent in the last day, "
- << "not checking " << phishing_url;
+ bool is_phishing;
+ if (GetCachedResult(phishing_url, &is_phishing)) {
+ VLOG(1) << "Satisfying request for " << phishing_url << " from cache";
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
+ cb->Run(phishing_url, is_phishing);
+ return;
+ }
+
+ // We limit the number of distinct pings to kMaxReports, but we don't count
+ // urls already in the cache against this number. We don't want to start
+ // classifying too many pages as phishing, but for those that we already
+ // think are phishing we want to give ourselves a chance to fix false
+ // positives.
+ if (cache_.find(phishing_url) != cache_.end()) {
+ VLOG(1) << "Refreshing cache for " << phishing_url;
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1);
+ } else if (GetNumReports() > kMaxReportsPerInterval) {
+ VLOG(1) << "Too many report phishing requests sent in the last "
+ << kReportsInterval.InHours() << " hours, not checking "
+ << phishing_url;
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1);
cb->Run(phishing_url, false);
return;
@@ -239,9 +267,8 @@ void ClientSideDetectionService::StartClientReportPhishingRequest(
request.set_client_score(static_cast<float>(score));
std::string request_data;
if (!request.SerializeToString(&request_data)) {
- // For consistency, we always call the callback asynchronously, rather than
- // directly from this method.
- LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?";
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1);
+ VLOG(1) << "Unable to serialize the CSD request. Proto file changed?";
cb->Run(phishing_url, false);
return;
}
@@ -306,8 +333,11 @@ void ClientSideDetectionService::HandlePhishingVerdict(
const std::string& data) {
ClientPhishingResponse response;
scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]);
- if (status.is_success() && RC_REQUEST_OK == response_code &&
+ if (status.is_success() && RC_REQUEST_OK == response_code &&
response.ParseFromString(data)) {
+ // Cache response, possibly flushing an old one.
+ cache_[info->phishing_url] =
+ make_linked_ptr(new CacheState(response.phishy(), base::Time::Now()));
info->callback->Run(info->phishing_url, response.phishy());
} else {
DLOG(ERROR) << "Unable to get the server verdict for URL: "
@@ -318,11 +348,53 @@ void ClientSideDetectionService::HandlePhishingVerdict(
delete source;
}
-int ClientSideDetectionService::GetNumReportsPerDay() {
- base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1);
+bool ClientSideDetectionService::GetCachedResult(const GURL& url,
+ bool* is_phishing) {
+ UpdateCache();
+
+ PhishingCache::iterator it = cache_.find(url);
+ if (it == cache_.end()) {
+ return false;
+ }
+
+ // We still need to check if the result is valid.
+ const CacheState& cache_state = *it->second;
+ if (cache_state.is_phishing ?
+ cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval :
+ cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) {
+ *is_phishing = cache_state.is_phishing;
+ return true;
+ }
+ return false;
+}
+
+void ClientSideDetectionService::UpdateCache() {
+ // Since we limit the number of requests but allow pass-through for cache
+ // refreshes, we don't want to remove elements from the cache if they
+ // could be used for this purpose even if we will not use the entry to
+ // satisfy the request from the cache.
+ base::TimeDelta positive_cache_interval =
+ std::max(kPositiveCacheInterval, kReportsInterval);
+ base::TimeDelta negative_cache_interval =
+ std::max(kNegativeCacheInterval, kReportsInterval);
+
+ // Remove elements from the cache that will no longer be used.
+ for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) {
+ const CacheState& cache_state = *it->second;
+ if (cache_state.is_phishing ?
+ cache_state.timestamp > base::Time::Now() - positive_cache_interval :
+ cache_state.timestamp > base::Time::Now() - negative_cache_interval) {
+ ++it;
+ } else {
+ cache_.erase(it++);
+ }
+ }
+}
+
+int ClientSideDetectionService::GetNumReports() {
+ base::Time cutoff = base::Time::Now() - kReportsInterval;
- // Erase elements older than a day because we will never care about them
- // again.
+ // Erase items older than cutoff because we will never care about them again.
while (!phishing_report_times_.empty() &&
phishing_report_times_.front() < cutoff) {
phishing_report_times_.pop();

Powered by Google App Engine
This is Rietveld 408576698