OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" | 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" |
6 | 6 |
7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
8 #include "base/file_path.h" | 8 #include "base/file_path.h" |
9 #include "base/file_util_proxy.h" | 9 #include "base/file_util_proxy.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/message_loop.h" | 11 #include "base/message_loop.h" |
12 #include "base/metrics/histogram.h" | 12 #include "base/metrics/histogram.h" |
13 #include "base/platform_file.h" | 13 #include "base/platform_file.h" |
14 #include "base/scoped_ptr.h" | 14 #include "base/scoped_ptr.h" |
15 #include "base/stl_util-inl.h" | 15 #include "base/stl_util-inl.h" |
16 #include "base/task.h" | 16 #include "base/task.h" |
17 #include "base/time.h" | 17 #include "base/time.h" |
18 #include "chrome/browser/browser_thread.h" | 18 #include "chrome/browser/browser_thread.h" |
19 #include "chrome/browser/safe_browsing/csd.pb.h" | 19 #include "chrome/browser/safe_browsing/csd.pb.h" |
20 #include "chrome/common/net/http_return.h" | 20 #include "chrome/common/net/http_return.h" |
21 #include "chrome/common/net/url_fetcher.h" | 21 #include "chrome/common/net/url_fetcher.h" |
22 #include "chrome/common/net/url_request_context_getter.h" | 22 #include "chrome/common/net/url_request_context_getter.h" |
23 #include "googleurl/src/gurl.h" | 23 #include "googleurl/src/gurl.h" |
24 #include "net/base/load_flags.h" | 24 #include "net/base/load_flags.h" |
25 #include "net/url_request/url_request_status.h" | 25 #include "net/url_request/url_request_status.h" |
26 | 26 |
27 namespace safe_browsing { | 27 namespace safe_browsing { |
28 | 28 |
29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; | 29 const int ClientSideDetectionService::kMaxReportsPerInterval = 3; |
| 30 |
| 31 const base::TimeDelta ClientSideDetectionService::kReportsInterval = |
| 32 base::TimeDelta::FromDays(1); |
| 33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = |
| 34 base::TimeDelta::FromDays(1); |
| 35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = |
| 36 base::TimeDelta::FromMinutes(30); |
30 | 37 |
31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = | 38 const char ClientSideDetectionService::kClientReportPhishingUrl[] = |
32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; | 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; |
33 const char ClientSideDetectionService::kClientModelUrl[] = | 40 const char ClientSideDetectionService::kClientModelUrl[] = |
34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; | 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; |
35 | 42 |
36 struct ClientSideDetectionService::ClientReportInfo { | 43 struct ClientSideDetectionService::ClientReportInfo { |
37 scoped_ptr<ClientReportPhishingRequestCallback> callback; | 44 scoped_ptr<ClientReportPhishingRequestCallback> callback; |
38 GURL phishing_url; | 45 GURL phishing_url; |
39 }; | 46 }; |
40 | 47 |
| 48 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) |
| 49 : is_phishing(phish), |
| 50 timestamp(time) {} |
| 51 |
41 ClientSideDetectionService::ClientSideDetectionService( | 52 ClientSideDetectionService::ClientSideDetectionService( |
42 const FilePath& model_path, | 53 const FilePath& model_path, |
43 URLRequestContextGetter* request_context_getter) | 54 URLRequestContextGetter* request_context_getter) |
44 : model_path_(model_path), | 55 : model_path_(model_path), |
45 model_status_(UNKNOWN_STATUS), | 56 model_status_(UNKNOWN_STATUS), |
46 model_file_(base::kInvalidPlatformFileValue), | 57 model_file_(base::kInvalidPlatformFileValue), |
47 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), | 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), |
48 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), | 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), |
49 request_context_getter_(request_context_getter) { | 60 request_context_getter_(request_context_getter) { |
50 } | 61 } |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
219 } | 230 } |
220 } | 231 } |
221 | 232 |
222 void ClientSideDetectionService::StartClientReportPhishingRequest( | 233 void ClientSideDetectionService::StartClientReportPhishingRequest( |
223 const GURL& phishing_url, | 234 const GURL& phishing_url, |
224 double score, | 235 double score, |
225 ClientReportPhishingRequestCallback* callback) { | 236 ClientReportPhishingRequestCallback* callback) { |
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | 237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); | 238 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); |
228 | 239 |
229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { | 240 bool is_phishing; |
230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " | 241 if (GetCachedResult(phishing_url, &is_phishing)) { |
231 << "not checking " << phishing_url; | 242 VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; |
| 243 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); |
| 244 cb->Run(phishing_url, is_phishing); |
| 245 return; |
| 246 } |
| 247 |
| 248 // We limit the number of distinct pings to kMaxReports, but we don't count |
| 249 // urls already in the cache against this number. We don't want to start |
| 250 // classifying too many pages as phishing, but for those that we already |
| 251 // think are phishing we want to give ourselves a chance to fix false |
| 252 // positives. |
| 253 if (cache_.find(phishing_url) != cache_.end()) { |
| 254 VLOG(1) << "Refreshing cache for " << phishing_url; |
| 255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); |
| 256 } else if (GetNumReports() > kMaxReportsPerInterval) { |
| 257 VLOG(1) << "Too many report phishing requests sent in the last " |
| 258 << kReportsInterval.InHours() << " hours, not checking " |
| 259 << phishing_url; |
232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); | 260 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); |
233 cb->Run(phishing_url, false); | 261 cb->Run(phishing_url, false); |
234 return; | 262 return; |
235 } | 263 } |
236 | 264 |
237 ClientPhishingRequest request; | 265 ClientPhishingRequest request; |
238 request.set_url(phishing_url.spec()); | 266 request.set_url(phishing_url.spec()); |
239 request.set_client_score(static_cast<float>(score)); | 267 request.set_client_score(static_cast<float>(score)); |
240 std::string request_data; | 268 std::string request_data; |
241 if (!request.SerializeToString(&request_data)) { | 269 if (!request.SerializeToString(&request_data)) { |
242 // For consistency, we always call the callback asynchronously, rather than | 270 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); |
243 // directly from this method. | 271 VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; |
244 LOG(ERROR) << "Unable to serialize the CSD request. Proto file changed?"; | |
245 cb->Run(phishing_url, false); | 272 cb->Run(phishing_url, false); |
246 return; | 273 return; |
247 } | 274 } |
248 | 275 |
249 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, | 276 URLFetcher* fetcher = URLFetcher::Create(0 /* ID is not used */, |
250 GURL(kClientReportPhishingUrl), | 277 GURL(kClientReportPhishingUrl), |
251 URLFetcher::POST, | 278 URLFetcher::POST, |
252 this); | 279 this); |
253 | 280 |
254 // Remember which callback and URL correspond to the current fetcher object. | 281 // Remember which callback and URL correspond to the current fetcher object. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
299 | 326 |
300 void ClientSideDetectionService::HandlePhishingVerdict( | 327 void ClientSideDetectionService::HandlePhishingVerdict( |
301 const URLFetcher* source, | 328 const URLFetcher* source, |
302 const GURL& url, | 329 const GURL& url, |
303 const net::URLRequestStatus& status, | 330 const net::URLRequestStatus& status, |
304 int response_code, | 331 int response_code, |
305 const ResponseCookies& cookies, | 332 const ResponseCookies& cookies, |
306 const std::string& data) { | 333 const std::string& data) { |
307 ClientPhishingResponse response; | 334 ClientPhishingResponse response; |
308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); | 335 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); |
309 if (status.is_success() && RC_REQUEST_OK == response_code && | 336 if (status.is_success() && RC_REQUEST_OK == response_code && |
310 response.ParseFromString(data)) { | 337 response.ParseFromString(data)) { |
| 338 // Cache response, possibly flushing an old one. |
| 339 cache_[info->phishing_url] = |
| 340 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); |
311 info->callback->Run(info->phishing_url, response.phishy()); | 341 info->callback->Run(info->phishing_url, response.phishy()); |
312 } else { | 342 } else { |
313 DLOG(ERROR) << "Unable to get the server verdict for URL: " | 343 DLOG(ERROR) << "Unable to get the server verdict for URL: " |
314 << info->phishing_url; | 344 << info->phishing_url; |
315 info->callback->Run(info->phishing_url, false); | 345 info->callback->Run(info->phishing_url, false); |
316 } | 346 } |
317 client_phishing_reports_.erase(source); | 347 client_phishing_reports_.erase(source); |
318 delete source; | 348 delete source; |
319 } | 349 } |
320 | 350 |
321 int ClientSideDetectionService::GetNumReportsPerDay() { | 351 bool ClientSideDetectionService::GetCachedResult(const GURL& url, |
322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); | 352 bool* is_phishing) { |
| 353 UpdateCache(); |
323 | 354 |
324 // Erase elements older than a day because we will never care about them | 355 PhishingCache::iterator it = cache_.find(url); |
325 // again. | 356 if (it == cache_.end()) { |
| 357 return false; |
| 358 } |
| 359 |
| 360 // We still need to check if the result is valid. |
| 361 const CacheState& cache_state = *it->second; |
| 362 if (cache_state.is_phishing ? |
| 363 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : |
| 364 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { |
| 365 *is_phishing = cache_state.is_phishing; |
| 366 return true; |
| 367 } |
| 368 return false; |
| 369 } |
| 370 |
| 371 void ClientSideDetectionService::UpdateCache() { |
| 372 // Since we limit the number of requests but allow pass-through for cache |
| 373 // refreshes, we don't want to remove elements from the cache if they |
| 374 // could be used for this purpose even if we will not use the entry to |
| 375 // satisfy the request from the cache. |
| 376 base::TimeDelta positive_cache_interval = |
| 377 std::max(kPositiveCacheInterval, kReportsInterval); |
| 378 base::TimeDelta negative_cache_interval = |
| 379 std::max(kNegativeCacheInterval, kReportsInterval); |
| 380 |
| 381 // Remove elements from the cache that will no longer be used. |
| 382 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { |
| 383 const CacheState& cache_state = *it->second; |
| 384 if (cache_state.is_phishing ? |
| 385 cache_state.timestamp > base::Time::Now() - positive_cache_interval : |
| 386 cache_state.timestamp > base::Time::Now() - negative_cache_interval) { |
| 387 ++it; |
| 388 } else { |
| 389 cache_.erase(it++); |
| 390 } |
| 391 } |
| 392 } |
| 393 |
| 394 int ClientSideDetectionService::GetNumReports() { |
| 395 base::Time cutoff = base::Time::Now() - kReportsInterval; |
| 396 |
| 397 // Erase items older than cutoff because we will never care about them again. |
326 while (!phishing_report_times_.empty() && | 398 while (!phishing_report_times_.empty() && |
327 phishing_report_times_.front() < cutoff) { | 399 phishing_report_times_.front() < cutoff) { |
328 phishing_report_times_.pop(); | 400 phishing_report_times_.pop(); |
329 } | 401 } |
330 | 402 |
331 // Return the number of elements that are above the cutoff. | 403 // Return the number of elements that are above the cutoff. |
332 return phishing_report_times_.size(); | 404 return phishing_report_times_.size(); |
333 } | 405 } |
334 | 406 |
335 } // namespace safe_browsing | 407 } // namespace safe_browsing |
OLD | NEW |