Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2494)

Unified Diff: chrome/browser/safe_browsing/database_manager.cc

Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: nits and cl format Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/database_manager.cc
diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc
index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..e64b02558b49a9fdaaba79f725b7ee7ba2619deb 100644
--- a/chrome/browser/safe_browsing/database_manager.cc
+++ b/chrome/browser/safe_browsing/database_manager.cc
@@ -68,26 +68,50 @@ bool IsExpectedThreat(
threat_type);
}
-// Return the list id from the first result in |full_hashes| which matches
+// Return the severest list id from the results in |full_hashes| which matches
// |hash|, or INVALID if none match.
-safe_browsing_util::ListType GetHashThreatListType(
+safe_browsing_util::ListType GetHashSeverestThreatListType(
const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID;
for (size_t i = 0; i < full_hashes.size(); ++i) {
if (SBFullHashEqual(hash, full_hashes[i].hash)) {
- if (index)
- *index = i;
- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);
+ const safe_browsing_util::ListType threat =
+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);
+ switch (threat) {
+ case safe_browsing_util::INVALID:
+ // |full_hashes| should never contain INVALID as a |list_id|.
+ NOTREACHED();
+ break;
+ case safe_browsing_util::MALWARE: // Falls through.
+ case safe_browsing_util::PHISH: // Falls through.
+ case safe_browsing_util::BINURL: // Falls through.
+ case safe_browsing_util::CSDWHITELIST: // Falls through.
+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through.
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through.
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through.
+ case safe_browsing_util::IPBLACKLIST:
+ if (index)
+ *index = i;
+ return threat;
+ case safe_browsing_util::UNWANTEDURL:
+ // UNWANTEDURL is considered less severe than other threats, keep
+ // looking.
+ pending_threat = threat;
+ if (index)
+ *index = i;
+ break;
+ }
}
}
- return safe_browsing_util::INVALID;
+ return pending_threat;
}
// Given a URL, compare all the possible host + path full hashes to the set of
-// provided full hashes. Returns the list id of the a matching result from
-// |full_hashes|, or INVALID if none match.
-safe_browsing_util::ListType GetUrlThreatListType(
+// provided full hashes. Returns the list id of the severest matching result
+// from |full_hashes|, or INVALID if none match.
+safe_browsing_util::ListType GetUrlSeverestThreatListType(
const GURL& url,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
@@ -97,13 +121,31 @@ safe_browsing_util::ListType GetUrlThreatListType(
std::vector<std::string> patterns;
safe_browsing_util::GeneratePatternsToCheck(url, &patterns);
+ safe_browsing_util::ListType pending_threat = safe_browsing_util::INVALID;
for (size_t i = 0; i < patterns.size(); ++i) {
- safe_browsing_util::ListType threat = GetHashThreatListType(
+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType(
SBFullHashForString(patterns[i]), full_hashes, index);
- if (threat != safe_browsing_util::INVALID)
- return threat;
+ switch (threat) {
+ case safe_browsing_util::INVALID:
+ // Ignore patterns with no matching threat.
+ break;
+ case safe_browsing_util::MALWARE: // Falls through.
+ case safe_browsing_util::PHISH: // Falls through.
+ case safe_browsing_util::BINURL: // Falls through.
+ case safe_browsing_util::CSDWHITELIST: // Falls through.
+ case safe_browsing_util::DOWNLOADWHITELIST: // Falls through.
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Falls through.
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Falls through.
+ case safe_browsing_util::IPBLACKLIST:
+ return threat;
+ case safe_browsing_util::UNWANTEDURL:
+ // UNWANTEDURL is considered less severe than other threats, keep
+ // looking.
+ pending_threat = threat;
+ break;
+ }
}
- return safe_browsing_util::INVALID;
+ return pending_threat;
}
SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
@@ -112,6 +154,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
return SB_THREAT_TYPE_URL_PHISHING;
case safe_browsing_util::MALWARE:
return SB_THREAT_TYPE_URL_MALWARE;
+ case safe_browsing_util::UNWANTEDURL:
+ return SB_THREAT_TYPE_URL_UNWANTED;
case safe_browsing_util::BINURL:
return SB_THREAT_TYPE_BINARY_MALWARE_URL;
case safe_browsing_util::EXTENSIONBLACKLIST:
@@ -125,20 +169,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
} // namespace
// static
-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType(
+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType(
const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes) {
return GetThreatTypeFromListType(
- GetHashThreatListType(hash, full_hashes, NULL));
+ GetHashSeverestThreatListType(hash, full_hashes, NULL));
}
// static
-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType(
+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType(
const GURL& url,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
return GetThreatTypeFromListType(
- GetUrlThreatListType(url, full_hashes, index));
+ GetUrlSeverestThreatListType(url, full_hashes, index));
}
SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck(
@@ -171,6 +215,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult(
switch (check.check_type) {
case safe_browsing_util::MALWARE:
case safe_browsing_util::PHISH:
+ case safe_browsing_util::UNWANTEDURL:
DCHECK_EQ(1u, check.urls.size());
OnCheckBrowseUrlResult(
check.urls[0], check.url_results[0], check.url_metadata[0]);
@@ -217,6 +262,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(
enable_extension_blacklist_(false),
enable_side_effect_free_whitelist_(false),
enable_ip_blacklist_(false),
+ enable_unwanted_software_blacklist_(false),
update_in_progress_(false),
database_update_in_progress_(false),
closing_database_(false),
@@ -253,6 +299,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(
// phishing protection for now.
enable_ip_blacklist_ = enable_csd_whitelist_;
+ // TODO(gab): Gate this on the same experiment that will soon control the UwS
+ // URL UI.
+ enable_unwanted_software_blacklist_ = true;
+
enum SideEffectFreeWhitelistStatus {
SIDE_EFFECT_FREE_WHITELIST_ENABLED,
SIDE_EFFECT_FREE_WHITELIST_DISABLED,
@@ -409,6 +459,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,
std::vector<SBThreatType> expected_threats;
expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE);
expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING);
+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED);
const base::TimeTicks start = base::TimeTicks::Now();
if (!MakeDatabaseAvailable()) {
@@ -421,19 +472,45 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,
return false;
}
- std::vector<SBPrefix> prefix_hits;
+ // Cache hits should, in general, be the same for both (ignoring potential
+ // cache evictions in the second call for entries that were just about to be
+ // evicted in the first call).
+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here.
std::vector<SBFullHashResult> cache_hits;
- bool prefix_match =
- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits);
+ std::vector<SBPrefix> browse_prefix_hits;
+ bool browse_prefix_match = database_->ContainsBrowseUrl(
+ url, &browse_prefix_hits, &cache_hits);
+
+ std::vector<SBPrefix> unwanted_prefix_hits;
+ std::vector<SBFullHashResult> unused_cache_hits;
+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl(
+ url, &unwanted_prefix_hits, &unused_cache_hits);
+
+ // Merge the two pre-sorted prefix hits lists.
+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list
+ // by default rather than building it here.
+ std::vector<SBPrefix> prefix_hits(browse_prefix_hits.size() +
+ unwanted_prefix_hits.size());
+ std::merge(browse_prefix_hits.begin(),
+ browse_prefix_hits.end(),
+ unwanted_prefix_hits.begin(),
+ unwanted_prefix_hits.end(),
+ prefix_hits.begin());
+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()),
+ prefix_hits.end());
UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start);
- if (!prefix_match)
+ if (!browse_prefix_match && !unwanted_prefix_match)
return true; // URL is okay.
// Needs to be asynchronous, since we could be in the constructor of a
// ResourceDispatcherHost event handler which can't pause there.
+ // This check will ping the Safe Browsing servers and get all lists which it
+ // matches. These lists will then be filtered against the |expected_threats|
+ // and the result callback for MALWARE (which is the same as for PHISH and
+ // UNWANTEDURL) will eventually be invoked with the final decision.
SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url),
std::vector<SBFullHash>(),
client,
@@ -705,7 +782,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() {
enable_download_whitelist_,
enable_extension_blacklist_,
enable_side_effect_free_whitelist_,
- enable_ip_blacklist_);
+ enable_ip_blacklist_,
+ enable_unwanted_software_blacklist_);
database->Init(SafeBrowsingService::GetBaseFilename());
{
@@ -771,10 +849,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) {
} else {
// We may have cached results for previous GetHash queries. Since
// this data comes from cache, don't histogram hits.
- bool is_threat = HandleOneCheck(check, check->cache_hits);
- // cache_hits should only contain hits for a fullhash we searched for, so if
- // we got to this point it should always result in a threat match.
- DCHECK(is_threat);
+ HandleOneCheck(check, check->cache_hits);
}
}
@@ -940,21 +1015,26 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(
bool is_threat = false;
- // TODO(shess): GetHashThreadListType() contains a loop,
- // GetUrlThreatListType() a loop around that loop. Having another loop out
- // here concerns me. It is likely that SAFE is an expected outcome, which
- // means all of those loops run to completion. Refactoring this to generate a
- // set of sorted items to compare in sequence would probably improve things.
+ // TODO(shess): GetHashSeverestThreadListType() contains a loop,
+ // GetUrlSeverestThreatListType() a loop around that loop. Having another
+ // loop out here concerns me. It is likely that SAFE is an expected outcome,
+ // which means all of those loops run to completion. Refactoring this to
+ // generate a set of sorted items to compare in sequence would probably
+ // improve things.
//
// Additionally, the set of patterns generated from the urls is very similar
// to the patterns generated in ContainsBrowseUrl() and other database checks,
// which are called from this code. Refactoring that across the checks could
// interact well with batching the checks here.
+ // TODO(gab): Fix the fact that Get(Url|Hash)SeverestThreatType() may return a
+ // threat for which IsExpectedThreat() returns false even if |full_hashes|
+ // actually contains an expected threat.
+
for (size_t i = 0; i < check->urls.size(); ++i) {
size_t threat_index;
SBThreatType threat =
- GetUrlThreatType(check->urls[i], full_hashes, &threat_index);
+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index);
if (threat != SB_THREAT_TYPE_SAFE &&
IsExpectedThreat(threat, check->expected_threats)) {
check->url_results[i] = threat;
@@ -964,7 +1044,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(
}
for (size_t i = 0; i < check->full_hashes.size(); ++i) {
- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes);
+ SBThreatType threat =
+ GetHashSeverestThreatType(check->full_hashes[i], full_hashes);
if (threat != SB_THREAT_TYPE_SAFE &&
IsExpectedThreat(threat, check->expected_threats)) {
check->full_hash_results[i] = threat;

Powered by Google App Engine
This is Rietveld 408576698