Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(151)

Unified Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Remove done() Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/client_side_detection_host.cc
diff --git a/chrome/browser/safe_browsing/client_side_detection_host.cc b/chrome/browser/safe_browsing/client_side_detection_host.cc
index 130f0cad52a83d237858b5fe12738cb59e8e25ea..45f9db40f31bedea013518c3029630ac71623031 100644
--- a/chrome/browser/safe_browsing/client_side_detection_host.cc
+++ b/chrome/browser/safe_browsing/client_side_detection_host.cc
@@ -50,28 +50,34 @@ const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;
const char kSafeBrowsingMatchKey[] = "safe_browsing_match";
+typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;
+
// This class is instantiated each time a new toplevel URL loads, and
-// asynchronously checks whether the phishing classifier should run for this
-// URL. If so, it notifies the renderer with a StartPhishingDetection IPC.
-// Objects of this class are ref-counted and will be destroyed once nobody
-// uses it anymore. If |web_contents|, |csd_service| or |host| go away you need
-// to call Cancel(). We keep the |database_manager| alive in a ref pointer for
-// as long as it takes.
+// asynchronously checks whether the malware and phishing classifiers should run
+// for this URL. If so, it notifies the host class by calling the provided
+// callback form the UI thread. Objects of this class are ref-counted and will
+// be destroyed once nobody uses it anymore. If |web_contents|, |csd_service|
+// or |host| go away you need to call Cancel(). We keep the |database_manager|
+// alive in a ref pointer for as long as it takes.
class ClientSideDetectionHost::ShouldClassifyUrlRequest
: public base::RefCountedThreadSafe<
ClientSideDetectionHost::ShouldClassifyUrlRequest> {
public:
- ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,
- WebContents* web_contents,
- ClientSideDetectionService* csd_service,
- SafeBrowsingDatabaseManager* database_manager,
- ClientSideDetectionHost* host)
- : canceled_(false),
- params_(params),
+ ShouldClassifyUrlRequest(
+ const content::FrameNavigateParams& params,
+ const ShouldClassifyUrlCallback& start_phishing_classification,
+ const ShouldClassifyUrlCallback& start_malware_classification,
+ WebContents* web_contents,
+ ClientSideDetectionService* csd_service,
+ SafeBrowsingDatabaseManager* database_manager,
+ ClientSideDetectionHost* host)
+ : params_(params),
web_contents_(web_contents),
csd_service_(csd_service),
database_manager_(database_manager),
- host_(host) {
+ host_(host),
+ start_phishing_classification_cb_(start_phishing_classification),
+ start_malware_classification_cb_(start_malware_classification) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
DCHECK(web_contents_);
DCHECK(csd_service_);
@@ -84,6 +90,7 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
// We start by doing some simple checks that can run on the UI thread.
UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);
+ UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);
// Only classify [X]HTML documents.
if (params_.contents_mime_type != "text/html" &&
@@ -91,47 +98,41 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because it has an unsupported MIME type: "
<< params_.contents_mime_type;
- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
- NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
- NO_CLASSIFY_MAX);
- return;
+ DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);
}
if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {
VLOG(1) << "Skipping phishing classification for URL: " << params_.url
<< " because of hosting on private IP: "
<< params_.socket_address.host();
- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
- NO_CLASSIFY_PRIVATE_IP,
- NO_CLASSIFY_MAX);
- return;
+ DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);
}
- // Don't run the phishing classifier if the tab is incognito.
+ // Don't run any classifier if the tab is incognito.
if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {
- VLOG(1) << "Skipping phishing classification for URL: " << params_.url
- << " because we're browsing incognito.";
- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
- NO_CLASSIFY_OFF_THE_RECORD,
- NO_CLASSIFY_MAX);
-
- return;
+ VLOG(1) << "Skipping phishing and malware classification for URL: "
+ << params_.url << " because we're browsing incognito.";
+ DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);
+ DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);
}
// We lookup the csd-whitelist before we lookup the cache because
// a URL may have recently been whitelisted. If the URL matches
- // the csd-whitelist we won't start classification. The
+ // the csd-whitelist we won't phishing start classification. The
// csd-whitelist check has to be done on the IO thread because it
// uses the SafeBrowsing service class.
- BrowserThread::PostTask(
- BrowserThread::IO,
- FROM_HERE,
- base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
- this, params_.url));
+ if (MaybeClassifyForPhishing() || MaybeClassifyForMalware()) {
+ BrowserThread::PostTask(
+ BrowserThread::IO,
+ FROM_HERE,
+ base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,
+ this, params_.url));
+ }
}
void Cancel() {
- canceled_ = true;
+ DontClassifyForPhishing(NO_CLASSIFY_CANCEL);
+ DontClassifyForMalware(NO_CLASSIFY_CANCEL);
// Just to make sure we don't do anything stupid we reset all these
// pointers except for the safebrowsing service class which may be
// accessed by CheckCsdWhitelist().
@@ -152,6 +153,10 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
NO_CLASSIFY_MATCH_CSD_WHITELIST,
NO_CLASSIFY_TOO_MANY_REPORTS,
NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,
+ NO_CLASSIFY_NO_DATABASE_MANAGER,
+ NO_CLASSIFY_KILLSWITCH,
+ NO_CLASSIFY_CANCEL,
+ NO_CLASSIFY_RESULT_FROM_CACHE,
NO_CLASSIFY_MAX // Always add new values before this one.
};
@@ -159,43 +164,87 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
// The destructor can be called either from the UI or the IO thread.
virtual ~ShouldClassifyUrlRequest() { }
- void CheckCsdWhitelist(const GURL& url) {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- if (!database_manager_.get() ||
- database_manager_->MatchCsdWhitelistUrl(url)) {
- // We're done. There is no point in going back to the UI thread.
- VLOG(1) << "Skipping phishing classification for URL: " << url
- << " because it matches the csd whitelist";
+ bool MaybeClassifyForPhishing() const {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ return !start_phishing_classification_cb_.is_null();
+ }
+
+ bool MaybeClassifyForMalware() const {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ return !start_malware_classification_cb_.is_null();
+ }
+
+ void DontClassifyForPhishing(PreClassificationCheckFailures reason) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ if (MaybeClassifyForPhishing()) {
+ // Track the first reason why we stopped classifying for phishing.
UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
- NO_CLASSIFY_MATCH_CSD_WHITELIST,
- NO_CLASSIFY_MAX);
- return;
+ reason, NO_CLASSIFY_MAX);
+ start_phishing_classification_cb_.Run(false);
}
+ start_phishing_classification_cb_.Reset();
+ }
- bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();
+ void DontClassifyForMalware(PreClassificationCheckFailures reason) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ if (MaybeClassifyForMalware()) {
+ // Track the first reason why we stopped classifying for malware.
+ UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",
+ reason, NO_CLASSIFY_MAX);
+ start_malware_classification_cb_.Run(false);
+ }
+ start_malware_classification_cb_.Reset();
+ }
+ void CheckCsdWhitelist(const GURL& url) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
+ // We don't want to call the classification callbacks from the IO
+ // thread so we simply pass the results of this method to CheckCache()
+ // which is called on the UI thread;
+ PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;
+ PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;
+ if (!database_manager_.get()) {
+ // We cannot check the Safe Browsing whitelists so we stop here
+ // for safety.
+ malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;
+ } else {
+ if (database_manager_->MatchCsdWhitelistUrl(url)) {
+ VLOG(1) << "Skipping phishing classification for URL: " << url
+ << " because it matches the csd whitelist";
+ phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;
+ }
+ if (database_manager_->IsMalwareKillSwitchOn()) {
+ malware_reason = NO_CLASSIFY_KILLSWITCH;
+ }
+ }
BrowserThread::PostTask(
BrowserThread::UI,
FROM_HERE,
- base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,
- malware_killswitch_on));
+ base::Bind(&ShouldClassifyUrlRequest::CheckCache,
+ this,
+ phishing_reason,
+ malware_reason));
}
- void CheckCache(bool malware_killswitch_on) {
+ void CheckCache(PreClassificationCheckFailures phishing_reason,
+ PreClassificationCheckFailures malware_reason) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
- if (canceled_) {
- return;
+ if (phishing_reason != NO_CLASSIFY_MAX)
+ DontClassifyForPhishing(phishing_reason);
+ if (malware_reason != NO_CLASSIFY_MAX)
+ DontClassifyForMalware(malware_reason);
+ if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {
+ return; // No point in doing anything else.
}
-
- host_->SetMalwareKillSwitch(malware_killswitch_on);
- // If result is cached, we don't want to run classification again
+ // If result is cached, we don't want to run classification again.
+ // In that case we're just trying to show the warning.
bool is_phishing;
if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {
VLOG(1) << "Satisfying request for " << params_.url << " from cache";
UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);
// Since we are already on the UI thread, this is safe.
host_->MaybeShowPhishingWarning(params_.url, is_phishing);
- return;
+ DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);
}
// We want to limit the number of requests, though we will ignore the
@@ -209,25 +258,21 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
} else if (csd_service_->OverPhishingReportLimit()) {
VLOG(1) << "Too many report phishing requests sent recently, "
<< "not running classification for " << params_.url;
- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",
- NO_CLASSIFY_TOO_MANY_REPORTS,
- NO_CLASSIFY_MAX);
- return;
+ DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);
+ }
+ if (csd_service_->OverMalwareReportLimit()) {
+ DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);
}
// Everything checks out, so start classification.
// |web_contents_| is safe to call as we will be destructed
// before it is.
- VLOG(1) << "Instruct renderer to start phishing detection for URL: "
- << params_.url;
- content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();
- rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
- rvh->GetRoutingID(), params_.url));
+ if (MaybeClassifyForPhishing())
+ start_phishing_classification_cb_.Run(true);
+ if (MaybeClassifyForMalware())
+ start_malware_classification_cb_.Run(true);
}
- // No need to protect |canceled_| with a lock because it is only read and
- // written by the UI thread.
- bool canceled_;
content::FrameNavigateParams params_;
WebContents* web_contents_;
ClientSideDetectionService* csd_service_;
@@ -236,6 +281,9 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest
scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;
ClientSideDetectionHost* host_;
+ ShouldClassifyUrlCallback start_phishing_classification_cb_;
+ ShouldClassifyUrlCallback start_malware_classification_cb_;
+
DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);
};
@@ -248,10 +296,11 @@ ClientSideDetectionHost* ClientSideDetectionHost::Create(
ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
: content::WebContentsObserver(tab),
csd_service_(NULL),
+ classification_request_(NULL),
+ should_extract_malware_features_(true),
+ onload_complete_(false),
weak_factory_(this),
- unsafe_unique_page_id_(-1),
- malware_killswitch_on_(false),
- malware_report_enabled_(false) {
+ unsafe_unique_page_id_(-1) {
DCHECK(tab);
// Note: csd_service_ and sb_service will be NULL here in testing.
csd_service_ = g_browser_process->safe_browsing_detection_service();
@@ -266,13 +315,6 @@ ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)
database_manager_ = sb_service->database_manager();
ui_manager_->AddObserver(this);
}
-
- // Only enable the malware bad IP matching and report feature for canary
- // and dev channel.
- chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
- malware_report_enabled_ = (
- channel == chrome::VersionInfo::CHANNEL_DEV ||
- channel == chrome::VersionInfo::CHANNEL_CANARY);
}
ClientSideDetectionHost::~ClientSideDetectionHost() {
@@ -324,14 +366,25 @@ void ClientSideDetectionHost::DidNavigateMainFrame(
cur_host_ = params.url.host();
cur_host_redirects_ = params.redirects;
}
+ browse_info_->url = params.url;
browse_info_->host_redirects = cur_host_redirects_;
browse_info_->url_redirects = params.redirects;
browse_info_->referrer = params.referrer.url;
browse_info_->http_status_code = details.http_status_code;
+ browse_info_->page_id = params.page_id;
- // Notify the renderer if it should classify this URL.
+ should_extract_malware_features_ = true;
+ should_classify_for_malware_.reset();
mattm 2014/02/22 02:49:03 Seems like a plain bool initialized to false here
noé 2014/03/14 22:21:32 MaybeStartMalwareFeatureExtraction() needs to know
mattm 2014/03/18 02:19:06 But the only thing it actually checks is "should_c
noé 2014/03/20 17:01:45 You are absolutely right. So sorry about the unne
+ onload_complete_ = false;
+
+ // Check whether we can cassify the current URL for phishing or malware.
classification_request_ = new ShouldClassifyUrlRequest(
- params, web_contents(), csd_service_, database_manager_.get(), this);
+ params,
+ base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,
+ weak_factory_.GetWeakPtr()),
+ base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,
+ weak_factory_.GetWeakPtr()),
+ web_contents(), csd_service_, database_manager_.get(), this);
classification_request_->Start();
}
@@ -414,6 +467,65 @@ void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {
feature_extractor_.reset();
}
+void ClientSideDetectionHost::OnPhishingPreClassificationDone(
+ bool should_classify) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ if (browse_info_.get() && should_classify) {
+ VLOG(1) << "Instruct renderer to start phishing detection for URL: "
+ << browse_info_->url;
+ content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();
+ rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(
+ rvh->GetRoutingID(), browse_info_->url));
+ }
+}
+
+void ClientSideDetectionHost::OnMalwarePreClassificationDone(
+ bool should_classify) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ // If classification checks failed we should stop extracting malware features.
+ should_extract_malware_features_ = should_classify;
+ should_classify_for_malware_.reset(new bool(should_classify));
+ MaybeStartMalwareFeatureExtraction();
+}
+
+void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(
+ int32 page_id) {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ DCHECK(browse_info_.get());
+ if (browse_info_->page_id != page_id) {
+ // Something weird is happening here. The BrowseInfo page ID
+ // should always be the same as the most recent load.
+ UMA_HISTOGRAM_COUNTS("SBClientMalware.UnexpectedPageId", 1);
+ return;
+ }
+ onload_complete_ = true;
+ MaybeStartMalwareFeatureExtraction();
+}
+
+void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {
+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
+ if (should_classify_for_malware_.get() &&
+ *should_classify_for_malware_ &&
+ onload_complete_) {
+ scoped_ptr<ClientMalwareRequest> malware_verdict(
+ new ClientMalwareRequest);
+ // Start browser-side malware feature extraction. Once we're done it will
+ // send the malware client verdict request.
+ malware_verdict->set_url(browse_info_->url.spec());
+ const GURL& referrer = browse_info_->referrer;
+ if (referrer.SchemeIs("http")) { // Only send http urls.
+ malware_verdict->set_referrer_url(referrer.spec());
+ }
+ // This function doesn't expect browse_info_ to stay around after this
+ // function returns.
+ feature_extractor_->ExtractMalwareFeatures(
+ browse_info_.get(),
+ malware_verdict.release(),
+ base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
+ weak_factory_.GetWeakPtr()));
+ }
+}
+
void ClientSideDetectionHost::OnPhishingDetectionDone(
const std::string& verdict_str) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
@@ -434,27 +546,6 @@ void ClientSideDetectionHost::OnPhishingDetectionDone(
browse_info_.get() &&
verdict->ParseFromString(verdict_str) &&
verdict->IsInitialized()) {
- // We do the malware IP matching and request sending if the feature
- // is enabled.
- if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {
- scoped_ptr<ClientMalwareRequest> malware_verdict(
- new ClientMalwareRequest);
- // Start browser-side malware feature extraction. Once we're done it will
- // send the malware client verdict request.
- malware_verdict->set_url(verdict->url());
- const GURL& referrer = browse_info_->referrer;
- if (referrer.SchemeIs("http")) { // Only send http urls.
- malware_verdict->set_referrer_url(referrer.spec());
- }
- // This function doesn't expect browse_info_ to stay around after this
- // function returns.
- feature_extractor_->ExtractMalwareFeatures(
- browse_info_.get(),
- malware_verdict.release(),
- base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,
- weak_factory_.GetWeakPtr()));
- }
-
// We only send phishing verdict to the server if the verdict is phishing or
// if a SafeBrowsing interstitial was already shown for this site. E.g., a
// malware or phishing interstitial was shown but the user clicked
@@ -472,7 +563,6 @@ void ClientSideDetectionHost::OnPhishingDetectionDone(
weak_factory_.GetWeakPtr()));
}
}
- browse_info_.reset();
}
void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,
@@ -601,15 +691,13 @@ void ClientSideDetectionHost::Observe(
DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);
const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(
details).ptr();
- if (req && browse_info_.get() && malware_report_enabled_ &&
- !MalwareKillSwitchIsOn()) {
- if (req->url.is_valid()) {
- UpdateIPUrlMap(req->socket_address.host() /* ip */,
- req->url.spec() /* url */,
- req->method,
- req->referrer,
- req->resource_type);
- }
+ if (req && browse_info_.get() &&
+ should_extract_malware_features_ && req->url.is_valid()) {
+ UpdateIPUrlMap(req->socket_address.host() /* ip */,
+ req->url.spec() /* url */,
+ req->method,
+ req->referrer,
+ req->resource_type);
}
}
@@ -640,14 +728,4 @@ void ClientSideDetectionHost::set_safe_browsing_managers(
database_manager_ = database_manager;
}
-bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
- return malware_killswitch_on_;
-}
-
-void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {
- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
- malware_killswitch_on_ = killswitch_on;
-}
-
} // namespace safe_browsing

Powered by Google App Engine
This is Rietveld 408576698