Index: chrome/browser/safe_browsing/database_manager.cc |
diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc |
index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..73acf0ae271f1df6422f97107d98576bcbb7f0e4 100644 |
--- a/chrome/browser/safe_browsing/database_manager.cc |
+++ b/chrome/browser/safe_browsing/database_manager.cc |
@@ -68,26 +68,51 @@ bool IsExpectedThreat( |
threat_type); |
} |
-// Return the list id from the first result in |full_hashes| which matches |
+// Return the severest list id from the results in |full_hashes| which matches |
// |hash|, or INVALID if none match. |
-safe_browsing_util::ListType GetHashThreatListType( |
+safe_browsing_util::ListType GetHashSeverestThreatListType( |
const SBFullHash& hash, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
+ |
+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID; |
mattm
2014/11/12 03:24:30
name is a bit confusing since it's not used for th
gab
2014/11/12 16:05:08
Done.
|
for (size_t i = 0; i < full_hashes.size(); ++i) { |
if (SBFullHashEqual(hash, full_hashes[i].hash)) { |
- if (index) |
- *index = i; |
- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id); |
+ const safe_browsing_util::ListType threat = |
+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id); |
+ switch (threat) { |
+ case safe_browsing_util::INVALID: |
+ // |full_hashes| should never contain INVALID as a |list_id|. |
+ NOTREACHED(); |
+ break; |
+ case safe_browsing_util::MALWARE: // Fall through. |
+ case safe_browsing_util::PHISH: // Fall through. |
+ case safe_browsing_util::BINURL: // Fall through. |
+ case safe_browsing_util::CSDWHITELIST: // Fall through. |
+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through. |
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through. |
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through. |
+ case safe_browsing_util::IPBLACKLIST: |
+ if (index) |
+ *index = i; |
+ return threat; |
+ case safe_browsing_util::UNWANTEDURL: |
+ // UNWANTEDURL is considered less severe than other threats, keep |
+ // looking. |
+ severest_threat = threat; |
+ if (index) |
+ *index = i; |
+ break; |
+ } |
} |
} |
- return safe_browsing_util::INVALID; |
+ return severest_threat; |
} |
// Given a URL, compare all the possible host + path full hashes to the set of |
-// provided full hashes. Returns the list id of the a matching result from |
-// |full_hashes|, or INVALID if none match. |
-safe_browsing_util::ListType GetUrlThreatListType( |
+// provided full hashes. Returns the list id of the severest matching result |
+// from |full_hashes|, or INVALID if none match. |
+safe_browsing_util::ListType GetUrlSeverestThreatListType( |
const GURL& url, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
@@ -97,13 +122,31 @@ safe_browsing_util::ListType GetUrlThreatListType( |
std::vector<std::string> patterns; |
safe_browsing_util::GeneratePatternsToCheck(url, &patterns); |
+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID; |
for (size_t i = 0; i < patterns.size(); ++i) { |
- safe_browsing_util::ListType threat = GetHashThreatListType( |
+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType( |
SBFullHashForString(patterns[i]), full_hashes, index); |
- if (threat != safe_browsing_util::INVALID) |
- return threat; |
+ switch (threat) { |
+ case safe_browsing_util::INVALID: |
+ // Ignore patterns with no matching threat. |
+ break; |
+ case safe_browsing_util::MALWARE: // Fall through. |
+ case safe_browsing_util::PHISH: // Fall through. |
+ case safe_browsing_util::BINURL: // Fall through. |
+ case safe_browsing_util::CSDWHITELIST: // Fall through. |
+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through. |
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through. |
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through. |
+ case safe_browsing_util::IPBLACKLIST: |
+ return threat; |
+ case safe_browsing_util::UNWANTEDURL: |
+ // UNWANTEDURL is considered less severe than other threats, keep |
+ // looking. |
+ severest_threat = threat; |
+ break; |
+ } |
} |
- return safe_browsing_util::INVALID; |
+ return severest_threat; |
} |
SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
@@ -112,6 +155,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
return SB_THREAT_TYPE_URL_PHISHING; |
case safe_browsing_util::MALWARE: |
return SB_THREAT_TYPE_URL_MALWARE; |
+ case safe_browsing_util::UNWANTEDURL: |
+ return SB_THREAT_TYPE_URL_UNWANTED; |
case safe_browsing_util::BINURL: |
return SB_THREAT_TYPE_BINARY_MALWARE_URL; |
case safe_browsing_util::EXTENSIONBLACKLIST: |
@@ -125,20 +170,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) { |
} // namespace |
// static |
-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType( |
+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType( |
const SBFullHash& hash, |
const std::vector<SBFullHashResult>& full_hashes) { |
return GetThreatTypeFromListType( |
- GetHashThreatListType(hash, full_hashes, NULL)); |
+ GetHashSeverestThreatListType(hash, full_hashes, NULL)); |
} |
// static |
-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType( |
+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType( |
const GURL& url, |
const std::vector<SBFullHashResult>& full_hashes, |
size_t* index) { |
return GetThreatTypeFromListType( |
- GetUrlThreatListType(url, full_hashes, index)); |
+ GetUrlSeverestThreatListType(url, full_hashes, index)); |
} |
SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck( |
@@ -171,6 +216,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult( |
switch (check.check_type) { |
case safe_browsing_util::MALWARE: |
case safe_browsing_util::PHISH: |
+ case safe_browsing_util::UNWANTEDURL: |
DCHECK_EQ(1u, check.urls.size()); |
OnCheckBrowseUrlResult( |
check.urls[0], check.url_results[0], check.url_metadata[0]); |
@@ -217,6 +263,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager( |
enable_extension_blacklist_(false), |
enable_side_effect_free_whitelist_(false), |
enable_ip_blacklist_(false), |
+ enable_unwanted_software_blacklist_(false), |
update_in_progress_(false), |
database_update_in_progress_(false), |
closing_database_(false), |
@@ -253,6 +300,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager( |
// phishing protection for now. |
enable_ip_blacklist_ = enable_csd_whitelist_; |
+ // TODO(gab): Gate this on the same experiment that will soon control the UwS |
+ // URL UI. |
+ enable_unwanted_software_blacklist_ = true; |
+ |
enum SideEffectFreeWhitelistStatus { |
SIDE_EFFECT_FREE_WHITELIST_ENABLED, |
SIDE_EFFECT_FREE_WHITELIST_DISABLED, |
@@ -409,6 +460,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, |
std::vector<SBThreatType> expected_threats; |
expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE); |
expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING); |
+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED); |
const base::TimeTicks start = base::TimeTicks::Now(); |
if (!MakeDatabaseAvailable()) { |
@@ -421,24 +473,66 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url, |
return false; |
} |
- std::vector<SBPrefix> prefix_hits; |
+ // Cache hits should be the same for both (as verified by the DCHECK below). |
+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here. |
std::vector<SBFullHashResult> cache_hits; |
- bool prefix_match = |
- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits); |
+ std::vector<SBPrefix> browse_prefix_hits; |
+ bool browse_prefix_match = database_->ContainsBrowseUrl( |
+ url, &browse_prefix_hits, &cache_hits); |
+ |
+ std::vector<SBPrefix> unwanted_prefix_hits; |
+ std::vector<SBFullHashResult> other_cache_hits; |
mattm
2014/11/12 03:24:30
unused_cache_hits ?
gab
2014/11/12 16:05:08
Done.
|
+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl( |
+ url, &unwanted_prefix_hits, &other_cache_hits); |
+ |
+#if defined(OS_WIN) |
+ // TODO(gab): Figure out why this doesn't pass on non-win compilers (padding?) |
+ // leave it here as Win-only for now to ensure changes to SBFullHashResult's |
+ // format are at least caught by some bots... |
mattm
2014/11/12 03:24:30
It's not win specific, but it is padding, yes. Ex
gab
2014/11/12 16:05:08
Removed.
|
+ static_assert(sizeof(SBFullHashResult) == sizeof(SBFullHash) + sizeof(int) + |
+ sizeof(std::string), |
+ "Need to update equality predicate below."); |
+#endif |
+ DCHECK(cache_hits.size() == other_cache_hits.size() && |
+ std::equal(cache_hits.begin(), cache_hits.end(), |
+ other_cache_hits.begin(), |
+ [](const SBFullHashResult& a, const SBFullHashResult& b) { |
+ return SBFullHashEqual(a.hash, b.hash) && |
+ a.list_id == b.list_id && a.metadata == b.metadata; |
+ })); |
mattm
2014/11/12 03:24:30
Technically the cache hits could be different, sin
gab
2014/11/12 16:05:08
Good catch, removed.
|
+ |
+ // Merge the two pre-sorted prefix hits lists. |
+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list |
+ // by default rather than building it here. |
+ DCHECK(std::is_sorted(browse_prefix_hits.begin(), browse_prefix_hits.end())); |
+ DCHECK(std::is_sorted(unwanted_prefix_hits.begin(), |
gab
2014/11/12 16:05:08
Also removed these two checks as std::is_sorted is
|
+ unwanted_prefix_hits.end())); |
+ std::vector<SBPrefix> prefix_hits( |
+ browse_prefix_hits.size() + unwanted_prefix_hits.size()); |
+ std::merge(browse_prefix_hits.begin(), browse_prefix_hits.end(), |
+ unwanted_prefix_hits.begin(), unwanted_prefix_hits.end(), |
+ prefix_hits.begin()); |
+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()), |
+ prefix_hits.end()); |
UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start); |
- if (!prefix_match) |
+ if (!browse_prefix_match && !unwanted_prefix_match) |
return true; // URL is okay. |
// Needs to be asynchronous, since we could be in the constructor of a |
// ResourceDispatcherHost event handler which can't pause there. |
- SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url), |
- std::vector<SBFullHash>(), |
- client, |
- safe_browsing_util::MALWARE, |
- expected_threats); |
+ // This check will ping the Safe Browsing servers and get all lists which it |
+ // matches. These lists will then be filtered against the |expected_threats| |
+ // and the result callback for MALWARE (which is the same as for PHISH and |
+ // UNWANTEDURL) will eventually be invoked with the final decision. |
+ SafeBrowsingCheck* check = |
+ new SafeBrowsingCheck(std::vector<GURL>(1, url), |
+ std::vector<SBFullHash>(), |
+ client, |
+ safe_browsing_util::MALWARE, |
+ expected_threats); |
check->need_get_hash = cache_hits.empty(); |
check->prefix_hits.swap(prefix_hits); |
check->cache_hits.swap(cache_hits); |
@@ -705,7 +799,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() { |
enable_download_whitelist_, |
enable_extension_blacklist_, |
enable_side_effect_free_whitelist_, |
- enable_ip_blacklist_); |
+ enable_ip_blacklist_, |
+ enable_unwanted_software_blacklist_); |
database->Init(SafeBrowsingService::GetBaseFilename()); |
{ |
@@ -771,10 +866,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) { |
} else { |
// We may have cached results for previous GetHash queries. Since |
// this data comes from cache, don't histogram hits. |
- bool is_threat = HandleOneCheck(check, check->cache_hits); |
- // cache_hits should only contain hits for a fullhash we searched for, so if |
- // we got to this point it should always result in a threat match. |
- DCHECK(is_threat); |
+ HandleOneCheck(check, check->cache_hits); |
} |
} |
@@ -940,11 +1032,12 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck( |
bool is_threat = false; |
- // TODO(shess): GetHashThreadListType() contains a loop, |
- // GetUrlThreatListType() a loop around that loop. Having another loop out |
- // here concerns me. It is likely that SAFE is an expected outcome, which |
- // means all of those loops run to completion. Refactoring this to generate a |
- // set of sorted items to compare in sequence would probably improve things. |
+ // TODO(shess): GetHashSeverestThreadListType() contains a loop, |
+ // GetUrlSeverestThreatListType() a loop around that loop. Having another |
+ // loop out here concerns me. It is likely that SAFE is an expected outcome, |
+ // which means all of those loops run to completion. Refactoring this to |
+ // generate a set of sorted items to compare in sequence would probably |
+ // improve things. |
// |
// Additionally, the set of patterns generated from the urls is very similar |
// to the patterns generated in ContainsBrowseUrl() and other database checks, |
@@ -954,7 +1047,7 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck( |
for (size_t i = 0; i < check->urls.size(); ++i) { |
size_t threat_index; |
SBThreatType threat = |
- GetUrlThreatType(check->urls[i], full_hashes, &threat_index); |
+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index); |
if (threat != SB_THREAT_TYPE_SAFE && |
IsExpectedThreat(threat, check->expected_threats)) { |
mattm
2014/11/12 03:24:30
Hmm, I suppose ideally the severest threat should
gab
2014/11/12 16:05:08
Good point. Since this is an existing problem thou
|
check->url_results[i] = threat; |
@@ -964,7 +1057,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck( |
} |
for (size_t i = 0; i < check->full_hashes.size(); ++i) { |
- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes); |
+ SBThreatType threat = GetHashSeverestThreatType(check->full_hashes[i], |
+ full_hashes); |
if (threat != SB_THREAT_TYPE_SAFE && |
IsExpectedThreat(threat, check->expected_threats)) { |
check->full_hash_results[i] = threat; |