| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" | 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" |
| 6 | 6 |
| 7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
| 8 #include "base/file_path.h" | 8 #include "base/file_path.h" |
| 9 #include "base/file_util_proxy.h" | 9 #include "base/file_util_proxy.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| 11 #include "base/message_loop.h" | 11 #include "base/message_loop.h" |
| 12 #include "base/metrics/histogram.h" | 12 #include "base/metrics/histogram.h" |
| 13 #include "base/platform_file.h" | 13 #include "base/platform_file.h" |
| 14 #include "base/scoped_ptr.h" | 14 #include "base/scoped_ptr.h" |
| 15 #include "base/stl_util-inl.h" | 15 #include "base/stl_util-inl.h" |
| 16 #include "base/task.h" | 16 #include "base/task.h" |
| 17 #include "base/time.h" | 17 #include "base/time.h" |
| 18 #include "chrome/browser/browser_thread.h" | 18 #include "chrome/browser/browser_thread.h" |
| 19 #include "chrome/browser/safe_browsing/csd.pb.h" | 19 #include "chrome/browser/safe_browsing/csd.pb.h" |
| 20 #include "chrome/common/net/http_return.h" | 20 #include "chrome/common/net/http_return.h" |
| 21 #include "chrome/common/net/url_fetcher.h" | 21 #include "chrome/common/net/url_fetcher.h" |
| 22 #include "chrome/common/net/url_request_context_getter.h" | 22 #include "chrome/common/net/url_request_context_getter.h" |
| 23 #include "googleurl/src/gurl.h" | 23 #include "googleurl/src/gurl.h" |
| 24 #include "net/base/load_flags.h" | 24 #include "net/base/load_flags.h" |
| 25 #include "net/url_request/url_request_status.h" | 25 #include "net/url_request/url_request_status.h" |
| 26 | 26 |
| 27 namespace safe_browsing { | 27 namespace safe_browsing { |
| 28 | 28 |
| 29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; | 29 const int ClientSideDetectionService::kMaxReportsPerInterval = 3; |
| 30 |
| 31 const base::TimeDelta ClientSideDetectionService::kReportsInterval = |
| 32 base::TimeDelta::FromDays(1); |
| 33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = |
| 34 base::TimeDelta::FromDays(1); |
| 35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = |
| 36 base::TimeDelta::FromMinutes(30); |
| 30 | 37 |
| 31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = | 38 const char ClientSideDetectionService::kClientReportPhishingUrl[] = |
| 32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; | 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; |
| 33 const char ClientSideDetectionService::kClientModelUrl[] = | 40 const char ClientSideDetectionService::kClientModelUrl[] = |
| 34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; | 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; |
| 35 | 42 |
| 36 struct ClientSideDetectionService::ClientReportInfo { | 43 struct ClientSideDetectionService::ClientReportInfo { |
| 37 scoped_ptr<ClientReportPhishingRequestCallback> callback; | 44 scoped_ptr<ClientReportPhishingRequestCallback> callback; |
| 38 GURL phishing_url; | 45 GURL phishing_url; |
| 39 }; | 46 }; |
| 40 | 47 |
| 48 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) |
| 49 : is_phishing(phish), |
| 50 timestamp(time) {} |
| 51 |
| 41 ClientSideDetectionService::ClientSideDetectionService( | 52 ClientSideDetectionService::ClientSideDetectionService( |
| 42 const FilePath& model_path, | 53 const FilePath& model_path, |
| 43 URLRequestContextGetter* request_context_getter) | 54 URLRequestContextGetter* request_context_getter) |
| 44 : model_path_(model_path), | 55 : model_path_(model_path), |
| 45 model_status_(UNKNOWN_STATUS), | 56 model_status_(UNKNOWN_STATUS), |
| 46 model_file_(base::kInvalidPlatformFileValue), | 57 model_file_(base::kInvalidPlatformFileValue), |
| 47 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), | 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), |
| 48 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), | 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), |
| 49 request_context_getter_(request_context_getter) { | 60 request_context_getter_(request_context_getter) { |
| 50 } | 61 } |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 219 } | 230 } |
| 220 } | 231 } |
| 221 | 232 |
| 222 void ClientSideDetectionService::StartClientReportPhishingRequest( | 233 void ClientSideDetectionService::StartClientReportPhishingRequest( |
| 223 const GURL& phishing_url, | 234 const GURL& phishing_url, |
| 224 double score, | 235 double score, |
| 225 ClientReportPhishingRequestCallback* callback) { | 236 ClientReportPhishingRequestCallback* callback) { |
| 226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | 237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
| 227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); | 238 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); |
| 228 | 239 |
| 229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { | 240 bool is_phishing; |
| 230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " | 241 if (GetCachedResult(phishing_url, &is_phishing)) { |
| 231 << "not checking " << phishing_url; | 242 VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; |
| 243 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); |
| 244 cb->Run(phishing_url, is_phishing); |
| 245 return; |
| 246 } |
| 247 |
| 248 // We limit the number of distinct pings to kMaxReports, but we don't count |
| 249 // urls already in the cache against this number. We don't want to start |
| 250 // classifying too many pages as phishing, but for those that we already |
| 251 // think are phishing we want to give ourselves a chance to fix false |
| 252 // positives. |
| 253 if (cache_.find(phishing_url) != cache_.end()) { |
| 254 VLOG(1) << "Refreshing cache for " << phishing_url; |
| 255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); |
| 256 } else if (GetNumReports() > kMaxReportsPerInterval) { |
| 257 VLOG(1) << "Too many report phishing requests sent in the last " |
| 258 << kReportsInterval.InHours() << " hours, not checking " |
| 259 << phishing_url; |
| 232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); | 260 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); |
| 233 cb->Run(phishing_url, false); | 261 cb->Run(phishing_url, false); |
| 234 return; | 262 return; |
| 235 } | 263 } |
| 236 | 264 |
| 237 ClientPhishingRequest request; | 265 ClientPhishingRequest request; |
| 238 request.set_url(phishing_url.spec()); | 266 request.set_url(phishing_url.spec()); |
| 239 request.set_client_score(static_cast<float>(score)); | 267 request.set_client_score(static_cast<float>(score)); |
| 240 std::string request_data; | 268 std::string request_data; |
| 241 if (!request.SerializeToString(&request_data)) { | 269 if (!request.SerializeToString(&request_data)) { |
| 242 // For consistency, we always call the callback asynchronously, rather than | 270 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); |
| 243 // directly from this method. | 271 VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; |
| 244 LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?"; | |
| 245 cb->Run(phishing_url, false); | 272 cb->Run(phishing_url, false); |
| 246 return; | 273 return; |
| 247 } | 274 } |
| 248 | 275 |
| 249 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, | 276 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, |
| 250 GURL(kClientReportPhishingUrl), | 277 GURL(kClientReportPhishingUrl), |
| 251 URLFetcher::POST, | 278 URLFetcher::POST, |
| 252 this); | 279 this); |
| 253 | 280 |
| 254 // Remember which callback and URL correspond to the current fetcher object. | 281 // Remember which callback and URL correspond to the current fetcher object. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 299 | 326 |
| 300 void ClientSideDetectionService::HandlePhishingVerdict( | 327 void ClientSideDetectionService::HandlePhishingVerdict( |
| 301 const URLFetcher* source, | 328 const URLFetcher* source, |
| 302 const GURL& url, | 329 const GURL& url, |
| 303 const net::URLRequestStatus& status, | 330 const net::URLRequestStatus& status, |
| 304 int response_code, | 331 int response_code, |
| 305 const ResponseCookies& cookies, | 332 const ResponseCookies& cookies, |
| 306 const std::string& data) { | 333 const std::string& data) { |
| 307 ClientPhishingResponse response; | 334 ClientPhishingResponse response; |
| 308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); | 335 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); |
| 309 if (status.is_success() && RC_REQUEST_OK == response_code && | 336 if (status.is_success() && RC_REQUEST_OK == response_code && |
| 310 response.ParseFromString(data)) { | 337 response.ParseFromString(data)) { |
| 338 // Cache response, possibly flushing an old one. |
| 339 cache_[info->phishing_url] = |
| 340 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); |
| 311 info->callback->Run(info->phishing_url, response.phishy()); | 341 info->callback->Run(info->phishing_url, response.phishy()); |
| 312 } else { | 342 } else { |
| 313 DLOG(ERROR) << "Unable to get the server verdict for URL: " | 343 DLOG(ERROR) << "Unable to get the server verdict for URL: " |
| 314 << info->phishing_url; | 344 << info->phishing_url; |
| 315 info->callback->Run(info->phishing_url, false); | 345 info->callback->Run(info->phishing_url, false); |
| 316 } | 346 } |
| 317 client_phishing_reports_.erase(source); | 347 client_phishing_reports_.erase(source); |
| 318 delete source; | 348 delete source; |
| 319 } | 349 } |
| 320 | 350 |
| 321 int ClientSideDetectionService::GetNumReportsPerDay() { | 351 bool ClientSideDetectionService::GetCachedResult(const GURL& url, |
| 322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); | 352 bool* is_phishing) { |
| 353 UpdateCache(); |
| 323 | 354 |
| 324 // Erase elements older than a day because we will never care about them | 355 PhishingCache::iterator it = cache_.find(url); |
| 325 // again. | 356 if (it == cache_.end()) { |
| 357 return false; |
| 358 } |
| 359 |
| 360 // We still need to check if the result is valid. |
| 361 const CacheState& cache_state = *it->second; |
| 362 if (cache_state.is_phishing ? |
| 363 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : |
| 364 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { |
| 365 *is_phishing = cache_state.is_phishing; |
| 366 return true; |
| 367 } |
| 368 return false; |
| 369 } |
| 370 |
| 371 void ClientSideDetectionService::UpdateCache() { |
| 372 // Since we limit the number of requests but allow pass-through for cache |
| 373 // refreshes, we don't want to remove elements from the cache if they |
| 374 // could be used for this purpose even if we will not use the entry to |
| 375 // satisfy the request from the cache. |
| 376 base::TimeDelta positive_cache_interval = |
| 377 std::max(kPositiveCacheInterval, kReportsInterval); |
| 378 base::TimeDelta negative_cache_interval = |
| 379 std::max(kNegativeCacheInterval, kReportsInterval); |
| 380 |
| 381 // Remove elements from the cache that will no longer be used. |
| 382 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { |
| 383 const CacheState& cache_state = *it->second; |
| 384 if (cache_state.is_phishing ? |
| 385 cache_state.timestamp > base::Time::Now() - positive_cache_interval : |
| 386 cache_state.timestamp > base::Time::Now() - negative_cache_interval) { |
| 387 ++it; |
| 388 } else { |
| 389 cache_.erase(it++); |
| 390 } |
| 391 } |
| 392 } |
| 393 |
| 394 int ClientSideDetectionService::GetNumReports() { |
| 395 base::Time cutoff = base::Time::Now() - kReportsInterval; |
| 396 |
| 397 // Erase items older than cutoff because we will never care about them again. |
| 326 while (!phishing_report_times_.empty() && | 398 while (!phishing_report_times_.empty() && |
| 327 phishing_report_times_.front() < cutoff) { | 399 phishing_report_times_.front() < cutoff) { |
| 328 phishing_report_times_.pop(); | 400 phishing_report_times_.pop(); |
| 329 } | 401 } |
| 330 | 402 |
| 331 // Return the number of elements that are above the cutoff. | 403 // Return the number of elements that are above the cutoff. |
| 332 return phishing_report_times_.size(); | 404 return phishing_report_times_.size(); |
| 333 } | 405 } |
| 334 | 406 |
| 335 } // namespace safe_browsing | 407 } // namespace safe_browsing |
| OLD | NEW |