chrome/browser/safe_browsing/database_manager.cc - Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet.

Unified Diff: chrome/browser/safe_browsing/database_manager.cc

Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: unwantedblhit => uwsblhit as per discussion with Noe for server-side logs Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: chrome/browser/safe_browsing/database_manager.cc

diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc

index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..73acf0ae271f1df6422f97107d98576bcbb7f0e4 100644

--- a/chrome/browser/safe_browsing/database_manager.cc

+++ b/chrome/browser/safe_browsing/database_manager.cc

@@ -68,26 +68,51 @@ bool IsExpectedThreat(

threat_type);

}

-// Return the list id from the first result in |full_hashes| which matches

+// Return the severest list id from the results in |full_hashes| which matches

// |hash|, or INVALID if none match.

-safe_browsing_util::ListType GetHashThreatListType(

+safe_browsing_util::ListType GetHashSeverestThreatListType(

const SBFullHash& hash,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID;

mattm 2014/11/12 03:24:30 name is a bit confusing since it's not used for th

gab 2014/11/12 16:05:08 Done.

for (size_t i = 0; i < full_hashes.size(); ++i) {

if (SBFullHashEqual(hash, full_hashes[i].hash)) {

- if (index)

- *index = i;

- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);

+ const safe_browsing_util::ListType threat =

+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);

+ switch (threat) {

+ case safe_browsing_util::INVALID:

+ // |full_hashes| should never contain INVALID as a |list_id|.

+ NOTREACHED();

+ break;

+ case safe_browsing_util::MALWARE: // Fall through.

+ case safe_browsing_util::PHISH: // Fall through.

+ case safe_browsing_util::BINURL: // Fall through.

+ case safe_browsing_util::CSDWHITELIST: // Fall through.

+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through.

+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through.

+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through.

+ case safe_browsing_util::IPBLACKLIST:

+ if (index)

+ *index = i;

+ return threat;

+ case safe_browsing_util::UNWANTEDURL:

+ // UNWANTEDURL is considered less severe than other threats, keep

+ // looking.

+ severest_threat = threat;

+ if (index)

+ *index = i;

+ break;

+ }

}

- return safe_browsing_util::INVALID;

+ return severest_threat;

}

// Given a URL, compare all the possible host + path full hashes to the set of

-// provided full hashes. Returns the list id of the a matching result from

-// |full_hashes|, or INVALID if none match.

-safe_browsing_util::ListType GetUrlThreatListType(

+// provided full hashes. Returns the list id of the severest matching result

+// from |full_hashes|, or INVALID if none match.

+safe_browsing_util::ListType GetUrlSeverestThreatListType(

const GURL& url,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

@@ -97,13 +122,31 @@ safe_browsing_util::ListType GetUrlThreatListType(

std::vector<std::string> patterns;

safe_browsing_util::GeneratePatternsToCheck(url, &patterns);

+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID;

for (size_t i = 0; i < patterns.size(); ++i) {

- safe_browsing_util::ListType threat = GetHashThreatListType(

+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType(

SBFullHashForString(patterns[i]), full_hashes, index);

- if (threat != safe_browsing_util::INVALID)

- return threat;

+ switch (threat) {

+ case safe_browsing_util::INVALID:

+ // Ignore patterns with no matching threat.

+ break;

+ case safe_browsing_util::MALWARE: // Fall through.

+ case safe_browsing_util::PHISH: // Fall through.

+ case safe_browsing_util::BINURL: // Fall through.

+ case safe_browsing_util::CSDWHITELIST: // Fall through.

+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through.

+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through.

+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through.

+ case safe_browsing_util::IPBLACKLIST:

+ return threat;

+ case safe_browsing_util::UNWANTEDURL:

+ // UNWANTEDURL is considered less severe than other threats, keep

+ // looking.

+ severest_threat = threat;

+ break;

+ }

}

- return safe_browsing_util::INVALID;

+ return severest_threat;

}

SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

@@ -112,6 +155,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

return SB_THREAT_TYPE_URL_PHISHING;

case safe_browsing_util::MALWARE:

return SB_THREAT_TYPE_URL_MALWARE;

+ case safe_browsing_util::UNWANTEDURL:

+ return SB_THREAT_TYPE_URL_UNWANTED;

case safe_browsing_util::BINURL:

return SB_THREAT_TYPE_BINARY_MALWARE_URL;

case safe_browsing_util::EXTENSIONBLACKLIST:

@@ -125,20 +170,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {

} // namespace

// static

-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType(

+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType(

const SBFullHash& hash,

const std::vector<SBFullHashResult>& full_hashes) {

return GetThreatTypeFromListType(

- GetHashThreatListType(hash, full_hashes, NULL));

+ GetHashSeverestThreatListType(hash, full_hashes, NULL));

}

// static

-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType(

+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType(

const GURL& url,

const std::vector<SBFullHashResult>& full_hashes,

size_t* index) {

return GetThreatTypeFromListType(

- GetUrlThreatListType(url, full_hashes, index));

+ GetUrlSeverestThreatListType(url, full_hashes, index));

}

SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck(

@@ -171,6 +216,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult(

switch (check.check_type) {

case safe_browsing_util::MALWARE:

case safe_browsing_util::PHISH:

+ case safe_browsing_util::UNWANTEDURL:

DCHECK_EQ(1u, check.urls.size());

OnCheckBrowseUrlResult(

check.urls[0], check.url_results[0], check.url_metadata[0]);

@@ -217,6 +263,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(

enable_extension_blacklist_(false),

enable_side_effect_free_whitelist_(false),

enable_ip_blacklist_(false),

+ enable_unwanted_software_blacklist_(false),

update_in_progress_(false),

database_update_in_progress_(false),

closing_database_(false),

@@ -253,6 +300,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(

// phishing protection for now.

enable_ip_blacklist_ = enable_csd_whitelist_;

+ // TODO(gab): Gate this on the same experiment that will soon control the UwS

+ // URL UI.

+ enable_unwanted_software_blacklist_ = true;

enum SideEffectFreeWhitelistStatus {

SIDE_EFFECT_FREE_WHITELIST_ENABLED,

SIDE_EFFECT_FREE_WHITELIST_DISABLED,

@@ -409,6 +460,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,

std::vector<SBThreatType> expected_threats;

expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE);

expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING);

+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED);

const base::TimeTicks start = base::TimeTicks::Now();

if (!MakeDatabaseAvailable()) {

@@ -421,24 +473,66 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,

return false;

}

- std::vector<SBPrefix> prefix_hits;

+ // Cache hits should be the same for both (as verified by the DCHECK below).

+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here.

std::vector<SBFullHashResult> cache_hits;

- bool prefix_match =

- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits);

+ std::vector<SBPrefix> browse_prefix_hits;

+ bool browse_prefix_match = database_->ContainsBrowseUrl(

+ url, &browse_prefix_hits, &cache_hits);

+ std::vector<SBPrefix> unwanted_prefix_hits;

+ std::vector<SBFullHashResult> other_cache_hits;

mattm 2014/11/12 03:24:30 unused_cache_hits ?

gab 2014/11/12 16:05:08 Done.

+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl(

+ url, &unwanted_prefix_hits, &other_cache_hits);

+#if defined(OS_WIN)

+ // TODO(gab): Figure out why this doesn't pass on non-win compilers (padding?)

+ // leave it here as Win-only for now to ensure changes to SBFullHashResult's

+ // format are at least caught by some bots...

mattm 2014/11/12 03:24:30 It's not win specific, but it is padding, yes. Ex

gab 2014/11/12 16:05:08 Removed.

+ static_assert(sizeof(SBFullHashResult) == sizeof(SBFullHash) + sizeof(int) +

+ sizeof(std::string),

+ "Need to update equality predicate below.");

+#endif

+ DCHECK(cache_hits.size() == other_cache_hits.size() &&

+ std::equal(cache_hits.begin(), cache_hits.end(),

+ other_cache_hits.begin(),

+ [](const SBFullHashResult& a, const SBFullHashResult& b) {

+ return SBFullHashEqual(a.hash, b.hash) &&

+ a.list_id == b.list_id && a.metadata == b.metadata;

+ }));

mattm 2014/11/12 03:24:30 Technically the cache hits could be different, sin

gab 2014/11/12 16:05:08 Good catch, removed.

+ // Merge the two pre-sorted prefix hits lists.

+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list

+ // by default rather than building it here.

+ DCHECK(std::is_sorted(browse_prefix_hits.begin(), browse_prefix_hits.end()));

+ DCHECK(std::is_sorted(unwanted_prefix_hits.begin(),

gab 2014/11/12 16:05:08 Also removed these two checks as std::is_sorted is

+ unwanted_prefix_hits.end()));

+ std::vector<SBPrefix> prefix_hits(

+ browse_prefix_hits.size() + unwanted_prefix_hits.size());

+ std::merge(browse_prefix_hits.begin(), browse_prefix_hits.end(),

+ unwanted_prefix_hits.begin(), unwanted_prefix_hits.end(),

+ prefix_hits.begin());

+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()),

+ prefix_hits.end());

UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start);

- if (!prefix_match)

+ if (!browse_prefix_match && !unwanted_prefix_match)

return true; // URL is okay.

// Needs to be asynchronous, since we could be in the constructor of a

// ResourceDispatcherHost event handler which can't pause there.

- SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url),

- std::vector<SBFullHash>(),

- client,

- safe_browsing_util::MALWARE,

- expected_threats);

+ // This check will ping the Safe Browsing servers and get all lists which it

+ // matches. These lists will then be filtered against the |expected_threats|

+ // and the result callback for MALWARE (which is the same as for PHISH and

+ // UNWANTEDURL) will eventually be invoked with the final decision.

+ SafeBrowsingCheck* check =

+ new SafeBrowsingCheck(std::vector<GURL>(1, url),

+ std::vector<SBFullHash>(),

+ client,

+ safe_browsing_util::MALWARE,

+ expected_threats);

check->need_get_hash = cache_hits.empty();

check->prefix_hits.swap(prefix_hits);

check->cache_hits.swap(cache_hits);

@@ -705,7 +799,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() {

enable_download_whitelist_,

enable_extension_blacklist_,

enable_side_effect_free_whitelist_,

- enable_ip_blacklist_);

+ enable_ip_blacklist_,

+ enable_unwanted_software_blacklist_);

database->Init(SafeBrowsingService::GetBaseFilename());

{

@@ -771,10 +866,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) {

} else {

// We may have cached results for previous GetHash queries. Since

// this data comes from cache, don't histogram hits.

- bool is_threat = HandleOneCheck(check, check->cache_hits);

- // cache_hits should only contain hits for a fullhash we searched for, so if

- // we got to this point it should always result in a threat match.

- DCHECK(is_threat);

+ HandleOneCheck(check, check->cache_hits);

}

@@ -940,11 +1032,12 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(

bool is_threat = false;

- // TODO(shess): GetHashThreadListType() contains a loop,

- // GetUrlThreatListType() a loop around that loop. Having another loop out

- // here concerns me. It is likely that SAFE is an expected outcome, which

- // means all of those loops run to completion. Refactoring this to generate a

- // set of sorted items to compare in sequence would probably improve things.

+ // TODO(shess): GetHashSeverestThreadListType() contains a loop,

+ // GetUrlSeverestThreatListType() a loop around that loop. Having another

+ // loop out here concerns me. It is likely that SAFE is an expected outcome,

+ // which means all of those loops run to completion. Refactoring this to

+ // generate a set of sorted items to compare in sequence would probably

+ // improve things.

// Additionally, the set of patterns generated from the urls is very similar

// to the patterns generated in ContainsBrowseUrl() and other database checks,

@@ -954,7 +1047,7 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(

for (size_t i = 0; i < check->urls.size(); ++i) {

size_t threat_index;

SBThreatType threat =

- GetUrlThreatType(check->urls[i], full_hashes, &threat_index);

+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index);

if (threat != SB_THREAT_TYPE_SAFE &&

IsExpectedThreat(threat, check->expected_threats)) {

mattm 2014/11/12 03:24:30 Hmm, I suppose ideally the severest threat should

gab 2014/11/12 16:05:08 Good point. Since this is an existing problem thou

check->url_results[i] = threat;

@@ -964,7 +1057,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(

}

for (size_t i = 0; i < check->full_hashes.size(); ++i) {

- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes);

+ SBThreatType threat = GetHashSeverestThreatType(check->full_hashes[i],

+ full_hashes);

if (threat != SB_THREAT_TYPE_SAFE &&

IsExpectedThreat(threat, check->expected_threats)) {

check->full_hash_results[i] = threat;

« no previous file with comments | « chrome/browser/safe_browsing/database_manager.h ('k') | chrome/browser/safe_browsing/database_manager_unittest.cc » ('j') | no next file with comments »