OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" | 5 #include "chrome/browser/safe_browsing/client_side_detection_service.h" |
6 | 6 |
7 #include "base/command_line.h" | 7 #include "base/command_line.h" |
8 #include "base/file_path.h" | 8 #include "base/file_path.h" |
9 #include "base/file_util_proxy.h" | 9 #include "base/file_util_proxy.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
11 #include "base/message_loop.h" | 11 #include "base/message_loop.h" |
12 #include "base/metrics/histogram.h" | 12 #include "base/metrics/histogram.h" |
13 #include "base/platform_file.h" | 13 #include "base/platform_file.h" |
14 #include "base/scoped_ptr.h" | 14 #include "base/scoped_ptr.h" |
15 #include "base/stl_util-inl.h" | 15 #include "base/stl_util-inl.h" |
16 #include "base/task.h" | 16 #include "base/task.h" |
17 #include "base/time.h" | 17 #include "base/time.h" |
18 #include "chrome/browser/browser_thread.h" | 18 #include "chrome/browser/browser_thread.h" |
19 #include "chrome/browser/safe_browsing/csd.pb.h" | 19 #include "chrome/browser/safe_browsing/csd.pb.h" |
20 #include "chrome/common/net/http_return.h" | 20 #include "chrome/common/net/http_return.h" |
21 #include "chrome/common/net/url_fetcher.h" | 21 #include "chrome/common/net/url_fetcher.h" |
22 #include "chrome/common/net/url_request_context_getter.h" | 22 #include "chrome/common/net/url_request_context_getter.h" |
23 #include "googleurl/src/gurl.h" | 23 #include "googleurl/src/gurl.h" |
24 #include "net/base/load_flags.h" | 24 #include "net/base/load_flags.h" |
25 #include "net/url_request/url_request_status.h" | 25 #include "net/url_request/url_request_status.h" |
26 | 26 |
27 namespace safe_browsing { | 27 namespace safe_browsing { |
28 | 28 |
29 const int ClientSideDetectionService::kMaxReportsPerDay = 3; | 29 const int ClientSideDetectionService::kMaxReports = 3; |
30 | |
31 const base::TimeDelta ClientSideDetectionService::kReportsInterval = | |
32 base::TimeDelta::FromDays(1); | |
33 const base::TimeDelta ClientSideDetectionService::kNegativeCacheInterval = | |
34 base::TimeDelta::FromDays(1); | |
35 const base::TimeDelta ClientSideDetectionService::kPositiveCacheInterval = | |
36 base::TimeDelta::FromMinutes(30); | |
30 | 37 |
31 const char ClientSideDetectionService::kClientReportPhishingUrl[] = | 38 const char ClientSideDetectionService::kClientReportPhishingUrl[] = |
32 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; | 39 "https://sb-ssl.google.com/safebrowsing/clientreport/phishing"; |
33 const char ClientSideDetectionService::kClientModelUrl[] = | 40 const char ClientSideDetectionService::kClientModelUrl[] = |
34 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; | 41 "https://ssl.gstatic.com/safebrowsing/csd/client_model_v0.pb"; |
35 | 42 |
36 struct ClientSideDetectionService::ClientReportInfo { | 43 struct ClientSideDetectionService::ClientReportInfo { |
37 scoped_ptr<ClientReportPhishingRequestCallback> callback; | 44 scoped_ptr<ClientReportPhishingRequestCallback> callback; |
38 GURL phishing_url; | 45 GURL phishing_url; |
39 }; | 46 }; |
40 | 47 |
48 ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) | |
49 : is_phishing(phish), | |
50 timestamp(time) {} | |
51 | |
41 ClientSideDetectionService::ClientSideDetectionService( | 52 ClientSideDetectionService::ClientSideDetectionService( |
42 const FilePath& model_path, | 53 const FilePath& model_path, |
43 URLRequestContextGetter* request_context_getter) | 54 URLRequestContextGetter* request_context_getter) |
44 : model_path_(model_path), | 55 : model_path_(model_path), |
45 model_status_(UNKNOWN_STATUS), | 56 model_status_(UNKNOWN_STATUS), |
46 model_file_(base::kInvalidPlatformFileValue), | 57 model_file_(base::kInvalidPlatformFileValue), |
47 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), | 58 ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), |
48 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), | 59 ALLOW_THIS_IN_INITIALIZER_LIST(callback_factory_(this)), |
49 request_context_getter_(request_context_getter) { | 60 request_context_getter_(request_context_getter) { |
50 } | 61 } |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
219 } | 230 } |
220 } | 231 } |
221 | 232 |
222 void ClientSideDetectionService::StartClientReportPhishingRequest( | 233 void ClientSideDetectionService::StartClientReportPhishingRequest( |
223 const GURL& phishing_url, | 234 const GURL& phishing_url, |
224 double score, | 235 double score, |
225 ClientReportPhishingRequestCallback* callback) { | 236 ClientReportPhishingRequestCallback* callback) { |
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | 237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); |
227 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); | 238 scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); |
228 | 239 |
229 if (GetNumReportsPerDay() > kMaxReportsPerDay) { | 240 bool is_phishing; |
230 LOG(WARNING) << "Too many report phishing requests sent in the last day, " | 241 if (GetCachedResult(phishing_url, &is_phishing)) { |
231 << "not checking " << phishing_url; | 242 VLOG(1) << "Satisfying request for " << phishing_url << " from cache"; |
243 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); | |
244 cb->Run(phishing_url, is_phishing); | |
245 return; | |
246 } | |
247 | |
248 // We only limit the number of distinct urls to kMaxReports, but we | |
249 // currently don't count urls already in the cache against this number. We | |
250 // don't want to start classifying too many pages as phishing, but for those | |
251 // that we already think are phishing we want to give ourselves a chance to | |
252 // fix false positives. | |
253 if (cache_.find(phishing_url) != cache_.end()) { | |
254 VLOG(1) << "Refreshing cache for " << phishing_url; | |
255 UMA_HISTOGRAM_COUNTS("SBClientPhishing.CacheRefresh", 1); | |
256 } else if (GetNumReports() > kMaxReports) { | |
257 VLOG(1) << "Too many report phishing requests sent in the last " | |
258 << kReportsInterval.InHours() << " hours, not checking " | |
259 << phishing_url; | |
232 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); | 260 UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSent", 1); |
233 cb->Run(phishing_url, false); | 261 cb->Run(phishing_url, false); |
234 return; | 262 return; |
235 } | 263 } |
236 | 264 |
237 ClientPhishingRequest request; | 265 ClientPhishingRequest request; |
238 request.set_url(phishing_url.spec()); | 266 request.set_url(phishing_url.spec()); |
239 request.set_client_score(static_cast<float>(score)); | 267 request.set_client_score(static_cast<float>(score)); |
240 std::string request_data; | 268 std::string request_data; |
241 if (!request.SerializeToString(&request_data)) { | 269 if (!request.SerializeToString(&request_data)) { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
299 | 327 |
300 void ClientSideDetectionService::HandlePhishingVerdict( | 328 void ClientSideDetectionService::HandlePhishingVerdict( |
301 const URLFetcher* source, | 329 const URLFetcher* source, |
302 const GURL& url, | 330 const GURL& url, |
303 const net::URLRequestStatus& status, | 331 const net::URLRequestStatus& status, |
304 int response_code, | 332 int response_code, |
305 const ResponseCookies& cookies, | 333 const ResponseCookies& cookies, |
306 const std::string& data) { | 334 const std::string& data) { |
307 ClientPhishingResponse response; | 335 ClientPhishingResponse response; |
308 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); | 336 scoped_ptr<ClientReportInfo> info(client_phishing_reports_[source]); |
309 if (status.is_success() && RC_REQUEST_OK == response_code && | 337 if (status.is_success() && RC_REQUEST_OK == response_code && |
310 response.ParseFromString(data)) { | 338 response.ParseFromString(data)) { |
339 // Cache response, possibly flushing an old one. | |
340 cache_[info->phishing_url] = | |
341 make_linked_ptr(new CacheState(response.phishy(), base::Time::Now())); | |
311 info->callback->Run(info->phishing_url, response.phishy()); | 342 info->callback->Run(info->phishing_url, response.phishy()); |
312 } else { | 343 } else { |
313 DLOG(ERROR) << "Unable to get the server verdict for URL: " | 344 DLOG(ERROR) << "Unable to get the server verdict for URL: " |
314 << info->phishing_url; | 345 << info->phishing_url; |
315 info->callback->Run(info->phishing_url, false); | 346 info->callback->Run(info->phishing_url, false); |
316 } | 347 } |
317 client_phishing_reports_.erase(source); | 348 client_phishing_reports_.erase(source); |
318 delete source; | 349 delete source; |
319 } | 350 } |
320 | 351 |
321 int ClientSideDetectionService::GetNumReportsPerDay() { | 352 bool ClientSideDetectionService::GetCachedResult(const GURL& url, |
322 base::Time cutoff = base::Time::Now() - base::TimeDelta::FromDays(1); | 353 bool* is_phishing) { |
354 UpdateCache(); | |
Brian Ryner
2011/02/09 23:46:42
If the cache was going to be very large, the linea
| |
323 | 355 |
324 // Erase elements older than a day because we will never care about them | 356 PhishingCache::iterator it = cache_.find(url); |
325 // again. | 357 if (it == cache_.end()) { |
358 return false; | |
359 } | |
360 | |
361 // We still need to check if the result is valid. | |
362 const CacheState& cache_state = *it->second; | |
363 if (cache_state.is_phishing ? | |
364 cache_state.timestamp > base::Time::Now() - kPositiveCacheInterval : | |
365 cache_state.timestamp > base::Time::Now() - kNegativeCacheInterval) { | |
366 *is_phishing = cache_state.is_phishing; | |
367 return true; | |
368 } | |
369 return false; | |
370 } | |
371 | |
372 void ClientSideDetectionService::UpdateCache() { | |
373 // Since we limit the number of requests but allow pass-through for cache | |
374 // refreshes, we don't want to remove elements from the cache if they | |
375 // could be used for this purpose even if we will not use the entry to | |
376 // satisfy the request from the cache. | |
377 base::TimeDelta positive_cache_interval = | |
378 std::max(kPositiveCacheInterval, kReportsInterval); | |
379 base::TimeDelta negative_cache_interval = | |
380 std::max(kNegativeCacheInterval, kReportsInterval); | |
381 | |
382 // Remove elements from the cache that will no longer be used. | |
383 for (PhishingCache::iterator it = cache_.begin(); it != cache_.end();) { | |
384 const CacheState& cache_state = *it->second; | |
385 if (cache_state.is_phishing ? | |
386 cache_state.timestamp > base::Time::Now() - positive_cache_interval : | |
387 cache_state.timestamp > base::Time::Now() - negative_cache_interval) { | |
388 ++it; | |
389 } else { | |
390 cache_.erase(it++); | |
391 } | |
392 } | |
393 } | |
394 | |
395 int ClientSideDetectionService::GetNumReports() { | |
396 base::Time cutoff = base::Time::Now() - kReportsInterval; | |
397 | |
398 // Erase items older than cutoff because we will never care about them again. | |
326 while (!phishing_report_times_.empty() && | 399 while (!phishing_report_times_.empty() && |
327 phishing_report_times_.front() < cutoff) { | 400 phishing_report_times_.front() < cutoff) { |
328 phishing_report_times_.pop(); | 401 phishing_report_times_.pop(); |
329 } | 402 } |
330 | 403 |
331 // Return the number of elements that are above the cutoff. | 404 // Return the number of elements that are above the cutoff. |
332 return phishing_report_times_.size(); | 405 return phishing_report_times_.size(); |
333 } | 406 } |
334 | 407 |
335 } // namespace safe_browsing | 408 } // namespace safe_browsing |
OLD | NEW |