chrome/browser/safe_browsing/database_manager.cc - Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet.

Unified Diff: chrome/browser/safe_browsing/database_manager.cc

Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: nits and cl format Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/browser/safe_browsing/database_manager.h ('k') | chrome/browser/safe_browsing/database_manager_unittest.cc » ('j') | chrome/browser/safe_browsing/safe_browsing_database.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/safe_browsing/database_manager.cc

diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc

index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..e64b02558b49a9fdaaba79f725b7ee7ba2619deb 100644

--- a/chrome/browser/safe_browsing/database_manager.cc

+++ b/chrome/browser/safe_browsing/database_manager.cc

@@ -68,26 +68,50 @@ bool IsExpectedThreat(

threat_type);

}

-// Return the list id from the first result in |full_hashes| which matches

+// Return the severest list id from the results in |full_hashes| which matches

// |hash|, or INVALID if none match.

-safe_browsing_util::ListType GetHashThreatListType(

+safe_browsing_util::ListType GetHashSeverestThreatListType(

const SBFullHash& hash,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID;

for (size_t i = 0; i < full_hashes.size(); ++i) {

if (SBFullHashEqual(hash, full_hashes[i].hash)) {

- if (index)

- *index = i;

- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);

+ const safe_browsing_util::ListType threat =

+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);

+ switch (threat) {

+ case safe_browsing_util::INVALID:

+ // |full_hashes| should never contain INVALID as a |list_id|.

+ NOTREACHED();

+ break;

+ case safe_browsing_util::MALWARE: // Falls through.

+ case safe_browsing_util::PHISH: // Falls through.

+ case safe_browsing_util::BINURL: // Falls through.

+ case safe_browsing_util::CSDWHITELIST: // Falls through.

+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through.

+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through.

+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through.

+ case safe_browsing_util::IPBLACKLIST:

+ if (index)

+ *index = i;

+ return threat;

+ case safe_browsing_util::UNWANTEDURL:

+ // UNWANTEDURL is considered less severe than other threats, keep

+ // looking.

+ pending_threat = threat;

+ if (index)

+ *index = i;

+ break;

+ }

}

- return safe_browsing_util::INVALID;

+ return pending_threat;

}

// Given a URL, compare all the possible host + path full hashes to the set of

-// provided full hashes. Returns the list id of the a matching result from

-// |full_hashes|, or INVALID if none match.

-safe_browsing_util::ListType GetUrlThreatListType(

+// provided full hashes. Returns the list id of the severest matching result

+// from |full_hashes|, or INVALID if none match.

+safe_browsing_util::ListType GetUrlSeverestThreatListType(

const GURL& url,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

@@ -97,13 +121,31 @@ safe_browsing_util::ListType GetUrlThreatListType(

std::vector<std::string> patterns;

safe_browsing_util::GeneratePatternsToCheck(url, &patterns);

+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID;

for (size_t i = 0; i < patterns.size(); ++i) {

- safe_browsing_util::ListType threat = GetHashThreatListType(

+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType(

SBFullHashForString(patterns[i]), full_hashes, index);

- if (threat != safe_browsing_util::INVALID)

- return threat;

+ switch (threat) {

+ case safe_browsing_util::INVALID:

+ // Ignore patterns with no matching threat.

+ break;

+ case safe_browsing_util::MALWARE: // Falls through.

+ case safe_browsing_util::PHISH: // Falls through.

+ case safe_browsing_util::BINURL: // Falls through.

+ case safe_browsing_util::CSDWHITELIST: // Falls through.

+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through.

+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through.

+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through.

+ case safe_browsing_util::IPBLACKLIST:

+ return threat;

+ case safe_browsing_util::UNWANTEDURL:

+ // UNWANTEDURL is considered less severe than other threats, keep

+ // looking.

+ pending_threat = threat;

+ break;

+ }

}

- return safe_browsing_util::INVALID;

+ return pending_threat;

}

SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

@@ -112,6 +154,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

return SB_THREAT_TYPE_URL_PHISHING;

case safe_browsing_util::MALWARE:

return SB_THREAT_TYPE_URL_MALWARE;

+ case safe_browsing_util::UNWANTEDURL:

+ return SB_THREAT_TYPE_URL_UNWANTED;

case safe_browsing_util::BINURL:

return SB_THREAT_TYPE_BINARY_MALWARE_URL;

case safe_browsing_util::EXTENSIONBLACKLIST:

@@ -125,20 +169,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

} // namespace

// static

-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType(

+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType(

const SBFullHash& hash,

const std::vector<SBFullHashResult>& full_hashes) {

return GetThreatTypeFromListType(

- GetHashThreatListType(hash, full_hashes, NULL));

+ GetHashSeverestThreatListType(hash, full_hashes, NULL));

}

// static

-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType(

+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType(

const GURL& url,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

return GetThreatTypeFromListType(

- GetUrlThreatListType(url, full_hashes, index));

+ GetUrlSeverestThreatListType(url, full_hashes, index));

}

SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck(

@@ -171,6 +215,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult(

switch (check.check_type) {

case safe_browsing_util::MALWARE:

case safe_browsing_util::PHISH:

+ case safe_browsing_util::UNWANTEDURL:

DCHECK_EQ(1u, check.urls.size());

OnCheckBrowseUrlResult(

check.urls[0], check.url_results[0], check.url_metadata[0]);

@@ -217,6 +262,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(

enable_extension_blacklist_(false),

enable_side_effect_free_whitelist_(false),

enable_ip_blacklist_(false),

+ enable_unwanted_software_blacklist_(false),

update_in_progress_(false),

database_update_in_progress_(false),

closing_database_(false),

@@ -253,6 +299,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(

// phishing protection for now.

enable_ip_blacklist_ = enable_csd_whitelist_;

+ // TODO(gab): Gate this on the same experiment that will soon control the UwS

+ // URL UI.

+ enable_unwanted_software_blacklist_ = true;

enum SideEffectFreeWhitelistStatus {

SIDE_EFFECT_FREE_WHITELIST_ENABLED,

SIDE_EFFECT_FREE_WHITELIST_DISABLED,

@@ -409,6 +459,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,

std::vector<SBThreatType> expected_threats;

expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE);

expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING);

+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED);

const base::TimeTicks start = base::TimeTicks::Now();

if (!MakeDatabaseAvailable()) {

@@ -421,19 +472,45 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,

return false;

}

- std::vector<SBPrefix> prefix_hits;

+ // Cache hits should, in general, be the same for both (ignoring potential

+ // cache evictions in the second call for entries that were just about to be

+ // evicted in the first call).

+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here.

std::vector<SBFullHashResult> cache_hits;

- bool prefix_match =

- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits);

+ std::vector<SBPrefix> browse_prefix_hits;

+ bool browse_prefix_match = database_->ContainsBrowseUrl(

+ url, &browse_prefix_hits, &cache_hits);

+ std::vector<SBPrefix> unwanted_prefix_hits;

+ std::vector<SBFullHashResult> unused_cache_hits;

+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl(

+ url, &unwanted_prefix_hits, &unused_cache_hits);

+ // Merge the two pre-sorted prefix hits lists.

+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list

+ // by default rather than building it here.

+ std::vector<SBPrefix> prefix_hits(browse_prefix_hits.size() +

+ unwanted_prefix_hits.size());

+ std::merge(browse_prefix_hits.begin(),

+ browse_prefix_hits.end(),

+ unwanted_prefix_hits.begin(),

+ unwanted_prefix_hits.end(),

+ prefix_hits.begin());

+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()),

+ prefix_hits.end());

UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start);

- if (!prefix_match)

+ if (!browse_prefix_match && !unwanted_prefix_match)

return true; // URL is okay.

// Needs to be asynchronous, since we could be in the constructor of a

// ResourceDispatcherHost event handler which can't pause there.

+ // This check will ping the Safe Browsing servers and get all lists which it

+ // matches. These lists will then be filtered against the |expected_threats|

+ // and the result callback for MALWARE (which is the same as for PHISH and

+ // UNWANTEDURL) will eventually be invoked with the final decision.

SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url),

std::vector<SBFullHash>(),

client,

@@ -705,7 +782,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() {

enable_download_whitelist_,

enable_extension_blacklist_,

enable_side_effect_free_whitelist_,

- enable_ip_blacklist_);

+ enable_ip_blacklist_,

+ enable_unwanted_software_blacklist_);

database->Init(SafeBrowsingService::GetBaseFilename());

{

@@ -771,10 +849,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) {

} else {

// We may have cached results for previous GetHash queries. Since

// this data comes from cache, don't histogram hits.

- bool is_threat = HandleOneCheck(check, check->cache_hits);

- // cache_hits should only contain hits for a fullhash we searched for, so if

- // we got to this point it should always result in a threat match.

- DCHECK(is_threat);

+ HandleOneCheck(check, check->cache_hits);

}

@@ -940,21 +1015,26 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(

bool is_threat = false;

- // TODO(shess): GetHashThreadListType() contains a loop,

- // GetUrlThreatListType() a loop around that loop. Having another loop out

- // here concerns me. It is likely that SAFE is an expected outcome, which

- // means all of those loops run to completion. Refactoring this to generate a

- // set of sorted items to compare in sequence would probably improve things.

+ // TODO(shess): GetHashSeverestThreadListType() contains a loop,

+ // GetUrlSeverestThreatListType() a loop around that loop. Having another

+ // loop out here concerns me. It is likely that SAFE is an expected outcome,

+ // which means all of those loops run to completion. Refactoring this to

+ // generate a set of sorted items to compare in sequence would probably

+ // improve things.

// Additionally, the set of patterns generated from the urls is very similar

// to the patterns generated in ContainsBrowseUrl() and other database checks,

// which are called from this code. Refactoring that across the checks could

// interact well with batching the checks here.

+ // TODO(gab): Fix the fact that Get(Url|Hash)SeverestThreatType() may return a

+ // threat for which IsExpectedThreat() returns false even if |full_hashes|

+ // actually contains an expected threat.

for (size_t i = 0; i < check->urls.size(); ++i) {

size_t threat_index;

SBThreatType threat =

- GetUrlThreatType(check->urls[i], full_hashes, &threat_index);

+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index);

if (threat != SB_THREAT_TYPE_SAFE &&

IsExpectedThreat(threat, check->expected_threats)) {

check->url_results[i] = threat;

@@ -964,7 +1044,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(

}

for (size_t i = 0; i < check->full_hashes.size(); ++i) {

- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes);

+ SBThreatType threat =

+ GetHashSeverestThreatType(check->full_hashes[i], full_hashes);

if (threat != SB_THREAT_TYPE_SAFE &&

IsExpectedThreat(threat, check->expected_threats)) {

check->full_hash_results[i] = threat;