chrome/browser/safe_browsing/client_side_detection_host.cc - Issue 173133004: Separate pre-classification checks for client-side malware and phishing

Unified Diff: chrome/browser/safe_browsing/client_side_detection_host.cc

Issue 173133004: Separate pre-classification checks for client-side malware and phishing (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Remove done() Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/safe_browsing/client_side_detection_host.h ('k') | chrome/browser/safe_browsing/client_side_detection_service.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/safe_browsing/client_side_detection_host.cc

diff --git a/chrome/browser/safe_browsing/client_side_detection_host.cc b/chrome/browser/safe_browsing/client_side_detection_host.cc

index 130f0cad52a83d237858b5fe12738cb59e8e25ea..45f9db40f31bedea013518c3029630ac71623031 100644

--- a/chrome/browser/safe_browsing/client_side_detection_host.cc

+++ b/chrome/browser/safe_browsing/client_side_detection_host.cc

@@ -50,28 +50,34 @@ const int ClientSideDetectionHost::kMaxIPsPerBrowse = 200;

const char kSafeBrowsingMatchKey[] = "safe_browsing_match";

+typedef base::Callback<void(bool)> ShouldClassifyUrlCallback;

// This class is instantiated each time a new toplevel URL loads, and

-// asynchronously checks whether the phishing classifier should run for this

-// URL. If so, it notifies the renderer with a StartPhishingDetection IPC.

-// Objects of this class are ref-counted and will be destroyed once nobody

-// to call Cancel(). We keep the |database_manager| alive in a ref pointer for

-// as long as it takes.

+// asynchronously checks whether the malware and phishing classifiers should run

+// for this URL. If so, it notifies the host class by calling the provided

+// callback form the UI thread. Objects of this class are ref-counted and will

+// be destroyed once nobody uses it anymore. If |web_contents|, |csd_service|

+// or |host| go away you need to call Cancel(). We keep the |database_manager|

+// alive in a ref pointer for as long as it takes.

class ClientSideDetectionHost::ShouldClassifyUrlRequest

: public base::RefCountedThreadSafe<

ClientSideDetectionHost::ShouldClassifyUrlRequest> {

public:

- ShouldClassifyUrlRequest(const content::FrameNavigateParams& params,

- WebContents* web_contents,

- ClientSideDetectionService* csd_service,

- SafeBrowsingDatabaseManager* database_manager,

- ClientSideDetectionHost* host)

- : canceled_(false),

- params_(params),

+ ShouldClassifyUrlRequest(

+ const content::FrameNavigateParams& params,

+ const ShouldClassifyUrlCallback& start_phishing_classification,

+ const ShouldClassifyUrlCallback& start_malware_classification,

+ WebContents* web_contents,

+ ClientSideDetectionService* csd_service,

+ SafeBrowsingDatabaseManager* database_manager,

+ ClientSideDetectionHost* host)

+ : params_(params),

web_contents_(web_contents),

csd_service_(csd_service),

database_manager_(database_manager),

- host_(host) {

+ host_(host),

+ start_phishing_classification_cb_(start_phishing_classification),

+ start_malware_classification_cb_(start_malware_classification) {

DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

DCHECK(web_contents_);

DCHECK(csd_service_);

@@ -84,6 +90,7 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

// We start by doing some simple checks that can run on the UI thread.

UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1);

+ UMA_HISTOGRAM_COUNTS("SBClientMalware.ClassificationStart", 1);

// Only classify [X]HTML documents.

if (params_.contents_mime_type != "text/html" &&

@@ -91,47 +98,41 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

VLOG(1) << "Skipping phishing classification for URL: " << params_.url

<< " because it has an unsupported MIME type: "

<< params_.contents_mime_type;

- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

- NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

- NO_CLASSIFY_MAX);

- return;

+ DontClassifyForPhishing(NO_CLASSIFY_UNSUPPORTED_MIME_TYPE);

}

if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) {

VLOG(1) << "Skipping phishing classification for URL: " << params_.url

<< " because of hosting on private IP: "

<< params_.socket_address.host();

- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

- NO_CLASSIFY_PRIVATE_IP,

- NO_CLASSIFY_MAX);

- return;

+ DontClassifyForPhishing(NO_CLASSIFY_PRIVATE_IP);

}

- // Don't run the phishing classifier if the tab is incognito.

+ // Don't run any classifier if the tab is incognito.

if (web_contents_->GetBrowserContext()->IsOffTheRecord()) {

- VLOG(1) << "Skipping phishing classification for URL: " << params_.url

- << " because we're browsing incognito.";

- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

- NO_CLASSIFY_OFF_THE_RECORD,

- NO_CLASSIFY_MAX);

- return;

+ VLOG(1) << "Skipping phishing and malware classification for URL: "

+ << params_.url << " because we're browsing incognito.";

+ DontClassifyForPhishing(NO_CLASSIFY_OFF_THE_RECORD);

+ DontClassifyForMalware(NO_CLASSIFY_OFF_THE_RECORD);

}

// We lookup the csd-whitelist before we lookup the cache because

// a URL may have recently been whitelisted. If the URL matches

- // the csd-whitelist we won't start classification. The

+ // the csd-whitelist we won't phishing start classification. The

// csd-whitelist check has to be done on the IO thread because it

// uses the SafeBrowsing service class.

- BrowserThread::PostTask(

- BrowserThread::IO,

- FROM_HERE,

- base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,

- this, params_.url));

+ if (MaybeClassifyForPhishing() || MaybeClassifyForMalware()) {

+ BrowserThread::PostTask(

+ BrowserThread::IO,

+ FROM_HERE,

+ base::Bind(&ShouldClassifyUrlRequest::CheckCsdWhitelist,

+ this, params_.url));

+ }

}

void Cancel() {

- canceled_ = true;

+ DontClassifyForPhishing(NO_CLASSIFY_CANCEL);

+ DontClassifyForMalware(NO_CLASSIFY_CANCEL);

// Just to make sure we don't do anything stupid we reset all these

// pointers except for the safebrowsing service class which may be

// accessed by CheckCsdWhitelist().

@@ -152,6 +153,10 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

NO_CLASSIFY_MATCH_CSD_WHITELIST,

NO_CLASSIFY_TOO_MANY_REPORTS,

NO_CLASSIFY_UNSUPPORTED_MIME_TYPE,

+ NO_CLASSIFY_NO_DATABASE_MANAGER,

+ NO_CLASSIFY_KILLSWITCH,

+ NO_CLASSIFY_CANCEL,

+ NO_CLASSIFY_RESULT_FROM_CACHE,

NO_CLASSIFY_MAX // Always add new values before this one.

};

@@ -159,43 +164,87 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

// The destructor can be called either from the UI or the IO thread.

virtual ~ShouldClassifyUrlRequest() { }

- void CheckCsdWhitelist(const GURL& url) {

- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

- if (!database_manager_.get() ||

- database_manager_->MatchCsdWhitelistUrl(url)) {

- // We're done. There is no point in going back to the UI thread.

- VLOG(1) << "Skipping phishing classification for URL: " << url

- << " because it matches the csd whitelist";

+ bool MaybeClassifyForPhishing() const {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ return !start_phishing_classification_cb_.is_null();

+ }

+ bool MaybeClassifyForMalware() const {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ return !start_malware_classification_cb_.is_null();

+ }

+ void DontClassifyForPhishing(PreClassificationCheckFailures reason) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ if (MaybeClassifyForPhishing()) {

+ // Track the first reason why we stopped classifying for phishing.

UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

- NO_CLASSIFY_MATCH_CSD_WHITELIST,

- NO_CLASSIFY_MAX);

- return;

+ reason, NO_CLASSIFY_MAX);

+ start_phishing_classification_cb_.Run(false);

}

+ start_phishing_classification_cb_.Reset();

+ }

- bool malware_killswitch_on = database_manager_->IsMalwareKillSwitchOn();

+ void DontClassifyForMalware(PreClassificationCheckFailures reason) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ if (MaybeClassifyForMalware()) {

+ // Track the first reason why we stopped classifying for malware.

+ UMA_HISTOGRAM_ENUMERATION("SBClientMalware.PreClassificationCheckFail",

+ reason, NO_CLASSIFY_MAX);

+ start_malware_classification_cb_.Run(false);

+ }

+ start_malware_classification_cb_.Reset();

+ }

+ void CheckCsdWhitelist(const GURL& url) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

+ // We don't want to call the classification callbacks from the IO

+ // thread so we simply pass the results of this method to CheckCache()

+ // which is called on the UI thread;

+ PreClassificationCheckFailures phishing_reason = NO_CLASSIFY_MAX;

+ PreClassificationCheckFailures malware_reason = NO_CLASSIFY_MAX;

+ if (!database_manager_.get()) {

+ // We cannot check the Safe Browsing whitelists so we stop here

+ // for safety.

+ malware_reason = phishing_reason = NO_CLASSIFY_NO_DATABASE_MANAGER;

+ } else {

+ if (database_manager_->MatchCsdWhitelistUrl(url)) {

+ VLOG(1) << "Skipping phishing classification for URL: " << url

+ << " because it matches the csd whitelist";

+ phishing_reason = NO_CLASSIFY_MATCH_CSD_WHITELIST;

+ }

+ if (database_manager_->IsMalwareKillSwitchOn()) {

+ malware_reason = NO_CLASSIFY_KILLSWITCH;

+ }

BrowserThread::PostTask(

BrowserThread::UI,

FROM_HERE,

- base::Bind(&ShouldClassifyUrlRequest::CheckCache, this,

- malware_killswitch_on));

+ base::Bind(&ShouldClassifyUrlRequest::CheckCache,

+ this,

+ phishing_reason,

+ malware_reason));

}

- void CheckCache(bool malware_killswitch_on) {

+ void CheckCache(PreClassificationCheckFailures phishing_reason,

+ PreClassificationCheckFailures malware_reason) {

DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

- if (canceled_) {

- return;

+ if (phishing_reason != NO_CLASSIFY_MAX)

+ DontClassifyForPhishing(phishing_reason);

+ if (malware_reason != NO_CLASSIFY_MAX)

+ DontClassifyForMalware(malware_reason);

+ if (!MaybeClassifyForMalware() && !MaybeClassifyForPhishing()) {

+ return; // No point in doing anything else.

}

- host_->SetMalwareKillSwitch(malware_killswitch_on);

- // If result is cached, we don't want to run classification again

+ // If result is cached, we don't want to run classification again.

+ // In that case we're just trying to show the warning.

bool is_phishing;

if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) {

VLOG(1) << "Satisfying request for " << params_.url << " from cache";

UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1);

// Since we are already on the UI thread, this is safe.

host_->MaybeShowPhishingWarning(params_.url, is_phishing);

- return;

+ DontClassifyForPhishing(NO_CLASSIFY_RESULT_FROM_CACHE);

}

// We want to limit the number of requests, though we will ignore the

@@ -209,25 +258,21 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

} else if (csd_service_->OverPhishingReportLimit()) {

VLOG(1) << "Too many report phishing requests sent recently, "

<< "not running classification for " << params_.url;

- UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail",

- NO_CLASSIFY_TOO_MANY_REPORTS,

- NO_CLASSIFY_MAX);

- return;

+ DontClassifyForPhishing(NO_CLASSIFY_TOO_MANY_REPORTS);

+ }

+ if (csd_service_->OverMalwareReportLimit()) {

+ DontClassifyForMalware(NO_CLASSIFY_TOO_MANY_REPORTS);

}

// Everything checks out, so start classification.

// |web_contents_| is safe to call as we will be destructed

// before it is.

- VLOG(1) << "Instruct renderer to start phishing detection for URL: "

- << params_.url;

- content::RenderViewHost* rvh = web_contents_->GetRenderViewHost();

- rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(

- rvh->GetRoutingID(), params_.url));

+ if (MaybeClassifyForPhishing())

+ start_phishing_classification_cb_.Run(true);

+ if (MaybeClassifyForMalware())

+ start_malware_classification_cb_.Run(true);

}

- // No need to protect |canceled_| with a lock because it is only read and

- // written by the UI thread.

- bool canceled_;

content::FrameNavigateParams params_;

WebContents* web_contents_;

ClientSideDetectionService* csd_service_;

@@ -236,6 +281,9 @@ class ClientSideDetectionHost::ShouldClassifyUrlRequest

scoped_refptr<SafeBrowsingDatabaseManager> database_manager_;

ClientSideDetectionHost* host_;

+ ShouldClassifyUrlCallback start_phishing_classification_cb_;

+ ShouldClassifyUrlCallback start_malware_classification_cb_;

DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest);

};

@@ -248,10 +296,11 @@ ClientSideDetectionHost* ClientSideDetectionHost::Create(

ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)

: content::WebContentsObserver(tab),

csd_service_(NULL),

+ classification_request_(NULL),

+ should_extract_malware_features_(true),

+ onload_complete_(false),

weak_factory_(this),

- unsafe_unique_page_id_(-1),

- malware_killswitch_on_(false),

- malware_report_enabled_(false) {

+ unsafe_unique_page_id_(-1) {

DCHECK(tab);

// Note: csd_service_ and sb_service will be NULL here in testing.

csd_service_ = g_browser_process->safe_browsing_detection_service();

@@ -266,13 +315,6 @@ ClientSideDetectionHost::ClientSideDetectionHost(WebContents* tab)

database_manager_ = sb_service->database_manager();

ui_manager_->AddObserver(this);

}

- // Only enable the malware bad IP matching and report feature for canary

- // and dev channel.

- chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();

- malware_report_enabled_ = (

- channel == chrome::VersionInfo::CHANNEL_DEV ||

- channel == chrome::VersionInfo::CHANNEL_CANARY);

}

ClientSideDetectionHost::~ClientSideDetectionHost() {

@@ -324,14 +366,25 @@ void ClientSideDetectionHost::DidNavigateMainFrame(

cur_host_ = params.url.host();

cur_host_redirects_ = params.redirects;

}

+ browse_info_->url = params.url;

browse_info_->host_redirects = cur_host_redirects_;

browse_info_->url_redirects = params.redirects;

browse_info_->referrer = params.referrer.url;

browse_info_->http_status_code = details.http_status_code;

+ browse_info_->page_id = params.page_id;

- // Notify the renderer if it should classify this URL.

+ should_extract_malware_features_ = true;

+ should_classify_for_malware_.reset();

mattm 2014/02/22 02:49:03 Seems like a plain bool initialized to false here

noé 2014/03/14 22:21:32 MaybeStartMalwareFeatureExtraction() needs to know

mattm 2014/03/18 02:19:06 But the only thing it actually checks is "should_c

noé 2014/03/20 17:01:45 You are absolutely right. So sorry about the unne

+ onload_complete_ = false;

+ // Check whether we can cassify the current URL for phishing or malware.

classification_request_ = new ShouldClassifyUrlRequest(

- params, web_contents(), csd_service_, database_manager_.get(), this);

+ params,

+ base::Bind(&ClientSideDetectionHost::OnPhishingPreClassificationDone,

+ weak_factory_.GetWeakPtr()),

+ base::Bind(&ClientSideDetectionHost::OnMalwarePreClassificationDone,

+ weak_factory_.GetWeakPtr()),

+ web_contents(), csd_service_, database_manager_.get(), this);

classification_request_->Start();

}

@@ -414,6 +467,65 @@ void ClientSideDetectionHost::WebContentsDestroyed(WebContents* tab) {

feature_extractor_.reset();

}

+void ClientSideDetectionHost::OnPhishingPreClassificationDone(

+ bool should_classify) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ if (browse_info_.get() && should_classify) {

+ VLOG(1) << "Instruct renderer to start phishing detection for URL: "

+ << browse_info_->url;

+ content::RenderViewHost* rvh = web_contents()->GetRenderViewHost();

+ rvh->Send(new SafeBrowsingMsg_StartPhishingDetection(

+ rvh->GetRoutingID(), browse_info_->url));

+ }

+void ClientSideDetectionHost::OnMalwarePreClassificationDone(

+ bool should_classify) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ // If classification checks failed we should stop extracting malware features.

+ should_extract_malware_features_ = should_classify;

+ should_classify_for_malware_.reset(new bool(should_classify));

+ MaybeStartMalwareFeatureExtraction();

+void ClientSideDetectionHost::DocumentOnLoadCompletedInMainFrame(

+ int32 page_id) {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ DCHECK(browse_info_.get());

+ if (browse_info_->page_id != page_id) {

+ // Something weird is happening here. The BrowseInfo page ID

+ // should always be the same as the most recent load.

+ UMA_HISTOGRAM_COUNTS("SBClientMalware.UnexpectedPageId", 1);

+ return;

+ }

+ onload_complete_ = true;

+ MaybeStartMalwareFeatureExtraction();

+void ClientSideDetectionHost::MaybeStartMalwareFeatureExtraction() {

+ DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

+ if (should_classify_for_malware_.get() &&

+ *should_classify_for_malware_ &&

+ onload_complete_) {

+ scoped_ptr<ClientMalwareRequest> malware_verdict(

+ new ClientMalwareRequest);

+ // Start browser-side malware feature extraction. Once we're done it will

+ // send the malware client verdict request.

+ malware_verdict->set_url(browse_info_->url.spec());

+ const GURL& referrer = browse_info_->referrer;

+ if (referrer.SchemeIs("http")) { // Only send http urls.

+ malware_verdict->set_referrer_url(referrer.spec());

+ }

+ // This function doesn't expect browse_info_ to stay around after this

+ // function returns.

+ feature_extractor_->ExtractMalwareFeatures(

+ browse_info_.get(),

+ malware_verdict.release(),

+ base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

+ weak_factory_.GetWeakPtr()));

+ }

void ClientSideDetectionHost::OnPhishingDetectionDone(

const std::string& verdict_str) {

DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

@@ -434,27 +546,6 @@ void ClientSideDetectionHost::OnPhishingDetectionDone(

browse_info_.get() &&

verdict->ParseFromString(verdict_str) &&

verdict->IsInitialized()) {

- // We do the malware IP matching and request sending if the feature

- // is enabled.

- if (malware_report_enabled_ && !MalwareKillSwitchIsOn()) {

- scoped_ptr<ClientMalwareRequest> malware_verdict(

- new ClientMalwareRequest);

- // Start browser-side malware feature extraction. Once we're done it will

- // send the malware client verdict request.

- malware_verdict->set_url(verdict->url());

- const GURL& referrer = browse_info_->referrer;

- if (referrer.SchemeIs("http")) { // Only send http urls.

- malware_verdict->set_referrer_url(referrer.spec());

- }

- // This function doesn't expect browse_info_ to stay around after this

- // function returns.

- feature_extractor_->ExtractMalwareFeatures(

- browse_info_.get(),

- malware_verdict.release(),

- base::Bind(&ClientSideDetectionHost::MalwareFeatureExtractionDone,

- weak_factory_.GetWeakPtr()));

- }

// We only send phishing verdict to the server if the verdict is phishing or

// if a SafeBrowsing interstitial was already shown for this site. E.g., a

// malware or phishing interstitial was shown but the user clicked

@@ -472,7 +563,6 @@ void ClientSideDetectionHost::OnPhishingDetectionDone(

weak_factory_.GetWeakPtr()));

}

- browse_info_.reset();

}

void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url,

@@ -601,15 +691,13 @@ void ClientSideDetectionHost::Observe(

DCHECK_EQ(type, content::NOTIFICATION_RESOURCE_RESPONSE_STARTED);

const ResourceRequestDetails* req = content::Details<ResourceRequestDetails>(

details).ptr();

- if (req && browse_info_.get() && malware_report_enabled_ &&

- !MalwareKillSwitchIsOn()) {

- if (req->url.is_valid()) {

- UpdateIPUrlMap(req->socket_address.host() /* ip */,

- req->url.spec() /* url */,

- req->method,

- req->referrer,

- req->resource_type);

- }

+ if (req && browse_info_.get() &&

+ should_extract_malware_features_ && req->url.is_valid()) {

+ UpdateIPUrlMap(req->socket_address.host() /* ip */,

+ req->url.spec() /* url */,

+ req->method,

+ req->referrer,

+ req->resource_type);

}

@@ -640,14 +728,4 @@ void ClientSideDetectionHost::set_safe_browsing_managers(

database_manager_ = database_manager;

}

-bool ClientSideDetectionHost::MalwareKillSwitchIsOn() {

- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

- return malware_killswitch_on_;

-void ClientSideDetectionHost::SetMalwareKillSwitch(bool killswitch_on) {

- DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));

- malware_killswitch_on_ = killswitch_on;

} // namespace safe_browsing