Index: chrome/browser/safe_browsing/client_side_detection_service.cc |
diff --git a/chrome/browser/safe_browsing/client_side_detection_service.cc b/chrome/browser/safe_browsing/client_side_detection_service.cc |
index fc3aa2c7d637203025e632e7f45777e2958b29f1..a99b372a22f073eb6028331ee29aaa88b58b1578 100644 |
--- a/chrome/browser/safe_browsing/client_side_detection_service.cc |
+++ b/chrome/browser/safe_browsing/client_side_detection_service.cc |
@@ -10,13 +10,18 @@ |
#include "base/memory/scoped_ptr.h" |
#include "base/message_loop.h" |
#include "base/metrics/histogram.h" |
+#include "base/string_util.h" |
#include "base/stl_util.h" |
#include "base/task.h" |
#include "base/time.h" |
+#include "chrome/browser/browser_process.h" |
+#include "chrome/browser/safe_browsing/browser_features.h" |
+#include "chrome/browser/safe_browsing/safe_browsing_service.h" |
#include "chrome/common/net/http_return.h" |
#include "chrome/common/safe_browsing/client_model.pb.h" |
#include "chrome/common/safe_browsing/csd.pb.h" |
#include "chrome/common/safe_browsing/safebrowsing_messages.h" |
+#include "chrome/renderer/safe_browsing/features.h" |
#include "content/browser/browser_thread.h" |
#include "content/browser/renderer_host/render_process_host.h" |
#include "content/common/notification_service.h" |
@@ -63,7 +68,8 @@ ClientSideDetectionService::CacheState::CacheState(bool phish, base::Time time) |
ClientSideDetectionService::ClientSideDetectionService( |
net::URLRequestContextGetter* request_context_getter) |
- : ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), |
+ : sb_service_(g_browser_process->safe_browsing_service()), |
+ ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)), |
request_context_getter_(request_context_getter) { |
registrar_.Add(this, content::NOTIFICATION_RENDERER_PROCESS_CREATED, |
NotificationService::AllSources()); |
@@ -237,6 +243,77 @@ void ClientSideDetectionService::EndFetchModel(ClientModelStatus status) { |
delay_ms); |
} |
+// static |
+void ClientSideDetectionService::SanitizeRequestForPingback( |
+ const ClientPhishingRequest& full_request, |
+ ClientPhishingRequest* sanitized_request) { |
+ DCHECK(full_request.IsInitialized()); |
+ sanitized_request->Clear(); |
+ if (full_request.has_suffix_prefix_hash()) { |
+ sanitized_request->set_suffix_prefix_hash( |
+ full_request.suffix_prefix_hash()); |
+ } |
+ sanitized_request->set_client_score(full_request.client_score()); |
+ if (full_request.has_is_phishing()) { |
+ sanitized_request->set_is_phishing(full_request.is_phishing()); |
+ } |
+ |
+ // Token and term features are not included in the pingback. |
mattm
2011/08/12 03:00:28
Did you consider a white-listing approach to sanit
Brian Ryner
2011/08/12 04:52:24
That's a good point; I think whitelisting is proba
|
+ static const char* const kFilteredModelFeatures[] = { |
+ features::kUrlTldToken, |
+ features::kUrlDomainToken, |
+ features::kUrlOtherHostToken, |
+ features::kUrlPathToken, |
+ features::kPageLinkDomain, |
+ features::kPageTerm, |
+ }; |
+ |
+ for (int i = 0; i < full_request.feature_map_size(); ++i) { |
+ const ClientPhishingRequest_Feature& feature = full_request.feature_map(i); |
+ bool filtered = false; |
+ for (size_t j = 0; j < arraysize(kFilteredModelFeatures); ++j) { |
+ if (StartsWithASCII(feature.name(), kFilteredModelFeatures[j], |
+ true /* case sensitive */)) { |
+ filtered = true; |
+ break; |
+ } |
+ } |
+ if (!filtered) { |
+ sanitized_request->add_feature_map()->CopyFrom(feature); |
+ } |
+ } |
+ |
+ if (full_request.has_model_version()) { |
+ sanitized_request->set_model_version(full_request.model_version()); |
+ } |
+ |
+ static const char* const kFilteredNonModelFeatures[] = { |
+ features::kBadIpFetch, |
+ features::kSafeBrowsingMaliciousUrl, |
+ features::kSafeBrowsingOriginalUrl, |
+ }; |
+ |
+ for (int i = 0; i < full_request.non_model_feature_map_size(); ++i) { |
+ const ClientPhishingRequest_Feature& feature = |
+ full_request.non_model_feature_map(i); |
+ bool filtered = false; |
+ for (size_t j = 0; j < arraysize(kFilteredNonModelFeatures); ++j) { |
+ if (StartsWithASCII(feature.name(), kFilteredNonModelFeatures[j], |
+ true /* case sensitive */)) { |
+ filtered = true; |
+ break; |
+ } |
+ } |
+ // We look for referrer as a substring, since there are a few different |
+ // permutations of the feature. |
+ if (!filtered && |
+ feature.name().find(std::string(features::kReferrer) + "=") == |
+ std::string::npos) { |
+ sanitized_request->add_non_model_feature_map()->CopyFrom(feature); |
+ } |
+ } |
+} |
+ |
void ClientSideDetectionService::StartClientReportPhishingRequest( |
ClientPhishingRequest* verdict, |
ClientReportPhishingRequestCallback* callback) { |
@@ -244,8 +321,16 @@ void ClientSideDetectionService::StartClientReportPhishingRequest( |
scoped_ptr<ClientPhishingRequest> request(verdict); |
scoped_ptr<ClientReportPhishingRequestCallback> cb(callback); |
+ // Create the version of the request proto that we'll send over the network. |
+ ClientPhishingRequest request_to_send; |
+ if (sb_service_ && sb_service_->CanReportStats()) { |
+ request_to_send.CopyFrom(*request); |
+ } else { |
+ SanitizeRequestForPingback(*request, &request_to_send); |
+ } |
+ |
std::string request_data; |
- if (!request->SerializeToString(&request_data)) { |
+ if (!request_to_send.SerializeToString(&request_data)) { |
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestNotSerialized", 1); |
VLOG(1) << "Unable to serialize the CSD request. Proto file changed?"; |
if (cb.get()) { |