Index: chrome/browser/safe_browsing/database_manager.cc |
diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc |
index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..e64b02558b49a9fdaaba79f725b7ee7ba2619deb 100644 |
--- a/chrome/browser/safe_browsing/database_manager.cc |
+++ b/chrome/browser/safe_browsing/database_manager.cc |
@@ -68,26 +68,50 @@ bool IsExpectedThreat( |
threat_type); |
} |
-// Return the list id from the first result in |full_hashes| which matches |
+// Return the severest list id from the results in |full_hashes| which matches |
// |hash|, or INVALID if none match. |
-safe_browsing_util::ListType GetHashThreatListType( |
+safe_browsing_util::ListType GetHashSeverestThreatListType( |
const SBFullHash& hash, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID; |
for (size_t i = 0; i < full_hashes.size(); ++i) { |
if (SBFullHashEqual(hash, full_hashes[i].hash)) { |
- if (index) |
- *index = i; |
- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id); |
+ const safe_browsing_util::ListType threat = |
+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id); |
+ switch (threat) { |
+ case safe_browsing_util::INVALID: |
+ // |full_hashes| should never contain INVALID as a |list_id|. |
+ NOTREACHED(); |
+ break; |
+ case safe_browsing_util::MALWARE: // Falls through. |
+ case safe_browsing_util::PHISH: // Falls through. |
+ case safe_browsing_util::BINURL: // Falls through. |
+ case safe_browsing_util::CSDWHITELIST: // Falls through. |
+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through. |
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through. |
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through. |
+ case safe_browsing_util::IPBLACKLIST: |
+ if (index) |
+ *index = i; |
+ return threat; |
+ case safe_browsing_util::UNWANTEDURL: |
+ // UNWANTEDURL is considered less severe than other threats, keep |
+ // looking. |
+ pending_threat = threat; |
+ if (index) |
+ *index = i; |
+ break; |
+ } |
} |
} |
- return safe_browsing_util::INVALID; |
+ return pending_threat; |
} |
// Given a URL, compare all the possible host + path full hashes to the set of |
-// provided full hashes. Returns the list id of the a matching result from |
-// |full_hashes|, or INVALID if none match. |
-safe_browsing_util::ListType GetUrlThreatListType( |
+// provided full hashes. Returns the list id of the severest matching result |
+// from |full_hashes|, or INVALID if none match. |
+safe_browsing_util::ListType GetUrlSeverestThreatListType( |
const GURL& url, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
@@ -97,13 +121,31 @@ safe_browsing_util::ListType GetUrlThreatListType( |
std::vector<std::string> patterns; |
safe_browsing_util::GeneratePatternsToCheck(url, &patterns); |
+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID; |
for (size_t i = 0; i < patterns.size(); ++i) { |
- safe_browsing_util::ListType threat = GetHashThreatListType( |
+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType( |
SBFullHashForString(patterns[i]), full_hashes, index); |
- if (threat != safe_browsing_util::INVALID) |
- return threat; |
+ switch (threat) { |
+ case safe_browsing_util::INVALID: |
+ // Ignore patterns with no matching threat. |
+ break; |
+ case safe_browsing_util::MALWARE: // Falls through. |
+ case safe_browsing_util::PHISH: // Falls through. |
+ case safe_browsing_util::BINURL: // Falls through. |
+ case safe_browsing_util::CSDWHITELIST: // Falls through. |
+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through. |
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through. |
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through. |
+ case safe_browsing_util::IPBLACKLIST: |
+ return threat; |
+ case safe_browsing_util::UNWANTEDURL: |
+ // UNWANTEDURL is considered less severe than other threats, keep |
+ // looking. |
+ pending_threat = threat; |
+ break; |
+ } |
} |
- return safe_browsing_util::INVALID; |
+ return pending_threat; |
} |
SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
@@ -112,6 +154,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
return SB_THREAT_TYPE_URL_PHISHING; |
case safe_browsing_util::MALWARE: |
return SB_THREAT_TYPE_URL_MALWARE; |
+ case safe_browsing_util::UNWANTEDURL: |
+ return SB_THREAT_TYPE_URL_UNWANTED; |
case safe_browsing_util::BINURL: |
return SB_THREAT_TYPE_BINARY_MALWARE_URL; |
case safe_browsing_util::EXTENSIONBLACKLIST: |
@@ -125,20 +169,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
} // namespace |
// static |
-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType( |
+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType( |
const SBFullHash& hash, |
const std::vector<SBFullHashResult>& full_hashes) { |
return GetThreatTypeFromListType( |
- GetHashThreatListType(hash, full_hashes, NULL)); |
+ GetHashSeverestThreatListType(hash, full_hashes, NULL)); |
} |
// static |
-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType( |
+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType( |
const GURL& url, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
return GetThreatTypeFromListType( |
- GetUrlThreatListType(url, full_hashes, index)); |
+ GetUrlSeverestThreatListType(url, full_hashes, index)); |
} |
SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck( |
@@ -171,6 +215,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult( |
switch (check.check_type) { |
case safe_browsing_util::MALWARE: |
case safe_browsing_util::PHISH: |
+ case safe_browsing_util::UNWANTEDURL: |
DCHECK_EQ(1u, check.urls.size()); |
OnCheckBrowseUrlResult( |
check.urls[0], check.url_results[0], check.url_metadata[0]); |
@@ -217,6 +262,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager( |
enable_extension_blacklist_(false), |
enable_side_effect_free_whitelist_(false), |
enable_ip_blacklist_(false), |
+ enable_unwanted_software_blacklist_(false), |
update_in_progress_(false), |
database_update_in_progress_(false), |
closing_database_(false), |
@@ -253,6 +299,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager( |
// phishing protection for now. |
enable_ip_blacklist_ = enable_csd_whitelist_; |
+ // TODO(gab): Gate this on the same experiment that will soon control the UwS |
+ // URL UI. |
+ enable_unwanted_software_blacklist_ = true; |
+ |
enum SideEffectFreeWhitelistStatus { |
SIDE_EFFECT_FREE_WHITELIST_ENABLED, |
SIDE_EFFECT_FREE_WHITELIST_DISABLED, |
@@ -409,6 +459,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, |
std::vector<SBThreatType> expected_threats; |
expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE); |
expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING); |
+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED); |
const base::TimeTicks start = base::TimeTicks::Now(); |
if (!MakeDatabaseAvailable()) { |
@@ -421,19 +472,45 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, |
return false; |
} |
- std::vector<SBPrefix> prefix_hits; |
+ // Cache hits should, in general, be the same for both (ignoring potential |
+ // cache evictions in the second call for entries that were just about to be |
+ // evicted in the first call). |
+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here. |
std::vector<SBFullHashResult> cache_hits; |
- bool prefix_match = |
- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits); |
+ std::vector<SBPrefix> browse_prefix_hits; |
+ bool browse_prefix_match = database_->ContainsBrowseUrl( |
+ url, &browse_prefix_hits, &cache_hits); |
+ |
+ std::vector<SBPrefix> unwanted_prefix_hits; |
+ std::vector<SBFullHashResult> unused_cache_hits; |
+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl( |
+ url, &unwanted_prefix_hits, &unused_cache_hits); |
+ |
+ // Merge the two pre-sorted prefix hits lists. |
+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list |
+ // by default rather than building it here. |
+ std::vector<SBPrefix> prefix_hits(browse_prefix_hits.size() + |
+ unwanted_prefix_hits.size()); |
+ std::merge(browse_prefix_hits.begin(), |
+ browse_prefix_hits.end(), |
+ unwanted_prefix_hits.begin(), |
+ unwanted_prefix_hits.end(), |
+ prefix_hits.begin()); |
+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()), |
+ prefix_hits.end()); |
UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start); |
- if (!prefix_match) |
+ if (!browse_prefix_match && !unwanted_prefix_match) |
return true; // URL is okay. |
// Needs to be asynchronous, since we could be in the constructor of a |
// ResourceDispatcherHost event handler which can't pause there. |
+ // This check will ping the Safe Browsing servers and get all lists which it |
+ // matches. These lists will then be filtered against the |expected_threats| |
+ // and the result callback for MALWARE (which is the same as for PHISH and |
+ // UNWANTEDURL) will eventually be invoked with the final decision. |
SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url), |
std::vector<SBFullHash>(), |
client, |
@@ -705,7 +782,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() { |
enable_download_whitelist_, |
enable_extension_blacklist_, |
enable_side_effect_free_whitelist_, |
- enable_ip_blacklist_); |
+ enable_ip_blacklist_, |
+ enable_unwanted_software_blacklist_); |
database->Init(SafeBrowsingService::GetBaseFilename()); |
{ |
@@ -771,10 +849,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) { |
} else { |
// We may have cached results for previous GetHash queries. Since |
// this data comes from cache, don't histogram hits. |
- bool is_threat = HandleOneCheck(check, check->cache_hits); |
- // cache_hits should only contain hits for a fullhash we searched for, so if |
- // we got to this point it should always result in a threat match. |
- DCHECK(is_threat); |
+ HandleOneCheck(check, check->cache_hits); |
} |
} |
@@ -940,21 +1015,26 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck( |
bool is_threat = false; |
- // TODO(shess): GetHashThreadListType() contains a loop, |
- // GetUrlThreatListType() a loop around that loop. Having another loop out |
- // here concerns me. It is likely that SAFE is an expected outcome, which |
- // means all of those loops run to completion. Refactoring this to generate a |
- // set of sorted items to compare in sequence would probably improve things. |
+ // TODO(shess): GetHashSeverestThreadListType() contains a loop, |
+ // GetUrlSeverestThreatListType() a loop around that loop. Having another |
+ // loop out here concerns me. It is likely that SAFE is an expected outcome, |
+ // which means all of those loops run to completion. Refactoring this to |
+ // generate a set of sorted items to compare in sequence would probably |
+ // improve things. |
// |
// Additionally, the set of patterns generated from the urls is very similar |
// to the patterns generated in ContainsBrowseUrl() and other database checks, |
// which are called from this code. Refactoring that across the checks could |
// interact well with batching the checks here. |
+ // TODO(gab): Fix the fact that Get(Url|Hash)SeverestThreatType() may return a |
+ // threat for which IsExpectedThreat() returns false even if |full_hashes| |
+ // actually contains an expected threat. |
+ |
for (size_t i = 0; i < check->urls.size(); ++i) { |
size_t threat_index; |
SBThreatType threat = |
- GetUrlThreatType(check->urls[i], full_hashes, &threat_index); |
+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index); |
if (threat != SB_THREAT_TYPE_SAFE && |
IsExpectedThreat(threat, check->expected_threats)) { |
check->url_results[i] = threat; |
@@ -964,7 +1044,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck( |
} |
for (size_t i = 0; i < check->full_hashes.size(); ++i) { |
- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes); |
+ SBThreatType threat = |
+ GetHashSeverestThreatType(check->full_hashes[i], full_hashes); |
if (threat != SB_THREAT_TYPE_SAFE && |
IsExpectedThreat(threat, check->expected_threats)) { |
check->full_hash_results[i] = threat; |