Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Unified Diff: chrome/browser/safe_browsing/database_manager.cc

Issue 611603002: Add the goog-unwanted-shavar list to a new SafeBrowsing PrefixSet. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: unwantedblhit => uwsblhit as per discussion with Noe for server-side logs Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/database_manager.cc
diff --git a/chrome/browser/safe_browsing/database_manager.cc b/chrome/browser/safe_browsing/database_manager.cc
index 712271ba657f0c31e19fc9bbd71aed6ef6fae19d..73acf0ae271f1df6422f97107d98576bcbb7f0e4 100644
--- a/chrome/browser/safe_browsing/database_manager.cc
+++ b/chrome/browser/safe_browsing/database_manager.cc
@@ -68,26 +68,51 @@ bool IsExpectedThreat(
threat_type);
}
-// Return the list id from the first result in |full_hashes| which matches
+// Return the severest list id from the results in |full_hashes| which matches
// |hash|, or INVALID if none match.
-safe_browsing_util::ListType GetHashThreatListType(
+safe_browsing_util::ListType GetHashSeverestThreatListType(
const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
+
+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID;
mattm 2014/11/12 03:24:30 name is a bit confusing since it's not used for th
gab 2014/11/12 16:05:08 Done.
for (size_t i = 0; i < full_hashes.size(); ++i) {
if (SBFullHashEqual(hash, full_hashes[i].hash)) {
- if (index)
- *index = i;
- return static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);
+ const safe_browsing_util::ListType threat =
+ static_cast<safe_browsing_util::ListType>(full_hashes[i].list_id);
+ switch (threat) {
+ case safe_browsing_util::INVALID:
+ // |full_hashes| should never contain INVALID as a |list_id|.
+ NOTREACHED();
+ break;
+ case safe_browsing_util::MALWARE: // Fall through.
+ case safe_browsing_util::PHISH: // Fall through.
+ case safe_browsing_util::BINURL: // Fall through.
+ case safe_browsing_util::CSDWHITELIST: // Fall through.
+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through.
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through.
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through.
+ case safe_browsing_util::IPBLACKLIST:
+ if (index)
+ *index = i;
+ return threat;
+ case safe_browsing_util::UNWANTEDURL:
+ // UNWANTEDURL is considered less severe than other threats, keep
+ // looking.
+ severest_threat = threat;
+ if (index)
+ *index = i;
+ break;
+ }
}
}
- return safe_browsing_util::INVALID;
+ return severest_threat;
}
// Given a URL, compare all the possible host + path full hashes to the set of
-// provided full hashes. Returns the list id of the a matching result from
-// |full_hashes|, or INVALID if none match.
-safe_browsing_util::ListType GetUrlThreatListType(
+// provided full hashes. Returns the list id of the severest matching result
+// from |full_hashes|, or INVALID if none match.
+safe_browsing_util::ListType GetUrlSeverestThreatListType(
const GURL& url,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
@@ -97,13 +122,31 @@ safe_browsing_util::ListType GetUrlThreatListType(
std::vector<std::string> patterns;
safe_browsing_util::GeneratePatternsToCheck(url, &patterns);
+ safe_browsing_util::ListType severest_threat = safe_browsing_util::INVALID;
for (size_t i = 0; i < patterns.size(); ++i) {
- safe_browsing_util::ListType threat = GetHashThreatListType(
+ safe_browsing_util::ListType threat = GetHashSeverestThreatListType(
SBFullHashForString(patterns[i]), full_hashes, index);
- if (threat != safe_browsing_util::INVALID)
- return threat;
+ switch (threat) {
+ case safe_browsing_util::INVALID:
+ // Ignore patterns with no matching threat.
+ break;
+ case safe_browsing_util::MALWARE: // Fall through.
+ case safe_browsing_util::PHISH: // Fall through.
+ case safe_browsing_util::BINURL: // Fall through.
+ case safe_browsing_util::CSDWHITELIST: // Fall through.
+ case safe_browsing_util::DOWNLOADWHITELIST: // Fall through.
+ case safe_browsing_util::EXTENSIONBLACKLIST: // Fall through.
+ case safe_browsing_util::SIDEEFFECTFREEWHITELIST: // Fall through.
+ case safe_browsing_util::IPBLACKLIST:
+ return threat;
+ case safe_browsing_util::UNWANTEDURL:
+ // UNWANTEDURL is considered less severe than other threats, keep
+ // looking.
+ severest_threat = threat;
+ break;
+ }
}
- return safe_browsing_util::INVALID;
+ return severest_threat;
}
SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
@@ -112,6 +155,8 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
return SB_THREAT_TYPE_URL_PHISHING;
case safe_browsing_util::MALWARE:
return SB_THREAT_TYPE_URL_MALWARE;
+ case safe_browsing_util::UNWANTEDURL:
+ return SB_THREAT_TYPE_URL_UNWANTED;
case safe_browsing_util::BINURL:
return SB_THREAT_TYPE_BINARY_MALWARE_URL;
case safe_browsing_util::EXTENSIONBLACKLIST:
@@ -125,20 +170,20 @@ SBThreatType GetThreatTypeFromListType(safe_browsing_util::ListType list_type) {
} // namespace
// static
-SBThreatType SafeBrowsingDatabaseManager::GetHashThreatType(
+SBThreatType SafeBrowsingDatabaseManager::GetHashSeverestThreatType(
const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes) {
return GetThreatTypeFromListType(
- GetHashThreatListType(hash, full_hashes, NULL));
+ GetHashSeverestThreatListType(hash, full_hashes, NULL));
}
// static
-SBThreatType SafeBrowsingDatabaseManager::GetUrlThreatType(
+SBThreatType SafeBrowsingDatabaseManager::GetUrlSeverestThreatType(
const GURL& url,
const std::vector<SBFullHashResult>& full_hashes,
size_t* index) {
return GetThreatTypeFromListType(
- GetUrlThreatListType(url, full_hashes, index));
+ GetUrlSeverestThreatListType(url, full_hashes, index));
}
SafeBrowsingDatabaseManager::SafeBrowsingCheck::SafeBrowsingCheck(
@@ -171,6 +216,7 @@ void SafeBrowsingDatabaseManager::Client::OnSafeBrowsingResult(
switch (check.check_type) {
case safe_browsing_util::MALWARE:
case safe_browsing_util::PHISH:
+ case safe_browsing_util::UNWANTEDURL:
DCHECK_EQ(1u, check.urls.size());
OnCheckBrowseUrlResult(
check.urls[0], check.url_results[0], check.url_metadata[0]);
@@ -217,6 +263,7 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(
enable_extension_blacklist_(false),
enable_side_effect_free_whitelist_(false),
enable_ip_blacklist_(false),
+ enable_unwanted_software_blacklist_(false),
update_in_progress_(false),
database_update_in_progress_(false),
closing_database_(false),
@@ -253,6 +300,10 @@ SafeBrowsingDatabaseManager::SafeBrowsingDatabaseManager(
// phishing protection for now.
enable_ip_blacklist_ = enable_csd_whitelist_;
+ // TODO(gab): Gate this on the same experiment that will soon control the UwS
+ // URL UI.
+ enable_unwanted_software_blacklist_ = true;
+
enum SideEffectFreeWhitelistStatus {
SIDE_EFFECT_FREE_WHITELIST_ENABLED,
SIDE_EFFECT_FREE_WHITELIST_DISABLED,
@@ -409,6 +460,7 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,
std::vector<SBThreatType> expected_threats;
expected_threats.push_back(SB_THREAT_TYPE_URL_MALWARE);
expected_threats.push_back(SB_THREAT_TYPE_URL_PHISHING);
+ expected_threats.push_back(SB_THREAT_TYPE_URL_UNWANTED);
const base::TimeTicks start = base::TimeTicks::Now();
if (!MakeDatabaseAvailable()) {
@@ -421,24 +473,66 @@ bool SafeBrowsingDatabaseManager::CheckBrowseUrl(const GURL& url,
return false;
}
- std::vector<SBPrefix> prefix_hits;
+ // Cache hits should be the same for both (as verified by the DCHECK below).
+ // TODO(gab): Refactor SafeBrowsingDatabase to avoid depending on this here.
std::vector<SBFullHashResult> cache_hits;
- bool prefix_match =
- database_->ContainsBrowseUrl(url, &prefix_hits, &cache_hits);
+ std::vector<SBPrefix> browse_prefix_hits;
+ bool browse_prefix_match = database_->ContainsBrowseUrl(
+ url, &browse_prefix_hits, &cache_hits);
+
+ std::vector<SBPrefix> unwanted_prefix_hits;
+ std::vector<SBFullHashResult> other_cache_hits;
mattm 2014/11/12 03:24:30 unused_cache_hits ?
gab 2014/11/12 16:05:08 Done.
+ bool unwanted_prefix_match = database_->ContainsUnwantedSoftwareUrl(
+ url, &unwanted_prefix_hits, &other_cache_hits);
+
+#if defined(OS_WIN)
+ // TODO(gab): Figure out why this doesn't pass on non-win compilers (padding?)
+ // leave it here as Win-only for now to ensure changes to SBFullHashResult's
+ // format are at least caught by some bots...
mattm 2014/11/12 03:24:30 It's not win specific, but it is padding, yes. Ex
gab 2014/11/12 16:05:08 Removed.
+ static_assert(sizeof(SBFullHashResult) == sizeof(SBFullHash) + sizeof(int) +
+ sizeof(std::string),
+ "Need to update equality predicate below.");
+#endif
+ DCHECK(cache_hits.size() == other_cache_hits.size() &&
+ std::equal(cache_hits.begin(), cache_hits.end(),
+ other_cache_hits.begin(),
+ [](const SBFullHashResult& a, const SBFullHashResult& b) {
+ return SBFullHashEqual(a.hash, b.hash) &&
+ a.list_id == b.list_id && a.metadata == b.metadata;
+ }));
mattm 2014/11/12 03:24:30 Technically the cache hits could be different, sin
gab 2014/11/12 16:05:08 Good catch, removed.
+
+ // Merge the two pre-sorted prefix hits lists.
+ // TODO(gab): Refactor SafeBrowsingDatabase for it to return this merged list
+ // by default rather than building it here.
+ DCHECK(std::is_sorted(browse_prefix_hits.begin(), browse_prefix_hits.end()));
+ DCHECK(std::is_sorted(unwanted_prefix_hits.begin(),
gab 2014/11/12 16:05:08 Also removed these two checks as std::is_sorted is
+ unwanted_prefix_hits.end()));
+ std::vector<SBPrefix> prefix_hits(
+ browse_prefix_hits.size() + unwanted_prefix_hits.size());
+ std::merge(browse_prefix_hits.begin(), browse_prefix_hits.end(),
+ unwanted_prefix_hits.begin(), unwanted_prefix_hits.end(),
+ prefix_hits.begin());
+ prefix_hits.erase(std::unique(prefix_hits.begin(), prefix_hits.end()),
+ prefix_hits.end());
UMA_HISTOGRAM_TIMES("SB2.FilterCheck", base::TimeTicks::Now() - start);
- if (!prefix_match)
+ if (!browse_prefix_match && !unwanted_prefix_match)
return true; // URL is okay.
// Needs to be asynchronous, since we could be in the constructor of a
// ResourceDispatcherHost event handler which can't pause there.
- SafeBrowsingCheck* check = new SafeBrowsingCheck(std::vector<GURL>(1, url),
- std::vector<SBFullHash>(),
- client,
- safe_browsing_util::MALWARE,
- expected_threats);
+ // This check will ping the Safe Browsing servers and get all lists which it
+ // matches. These lists will then be filtered against the |expected_threats|
+ // and the result callback for MALWARE (which is the same as for PHISH and
+ // UNWANTEDURL) will eventually be invoked with the final decision.
+ SafeBrowsingCheck* check =
+ new SafeBrowsingCheck(std::vector<GURL>(1, url),
+ std::vector<SBFullHash>(),
+ client,
+ safe_browsing_util::MALWARE,
+ expected_threats);
check->need_get_hash = cache_hits.empty();
check->prefix_hits.swap(prefix_hits);
check->cache_hits.swap(cache_hits);
@@ -705,7 +799,8 @@ SafeBrowsingDatabase* SafeBrowsingDatabaseManager::GetDatabase() {
enable_download_whitelist_,
enable_extension_blacklist_,
enable_side_effect_free_whitelist_,
- enable_ip_blacklist_);
+ enable_ip_blacklist_,
+ enable_unwanted_software_blacklist_);
database->Init(SafeBrowsingService::GetBaseFilename());
{
@@ -771,10 +866,7 @@ void SafeBrowsingDatabaseManager::OnCheckDone(SafeBrowsingCheck* check) {
} else {
// We may have cached results for previous GetHash queries. Since
// this data comes from cache, don't histogram hits.
- bool is_threat = HandleOneCheck(check, check->cache_hits);
- // cache_hits should only contain hits for a fullhash we searched for, so if
- // we got to this point it should always result in a threat match.
- DCHECK(is_threat);
+ HandleOneCheck(check, check->cache_hits);
}
}
@@ -940,11 +1032,12 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(
bool is_threat = false;
- // TODO(shess): GetHashThreadListType() contains a loop,
- // GetUrlThreatListType() a loop around that loop. Having another loop out
- // here concerns me. It is likely that SAFE is an expected outcome, which
- // means all of those loops run to completion. Refactoring this to generate a
- // set of sorted items to compare in sequence would probably improve things.
+ // TODO(shess): GetHashSeverestThreadListType() contains a loop,
+ // GetUrlSeverestThreatListType() a loop around that loop. Having another
+ // loop out here concerns me. It is likely that SAFE is an expected outcome,
+ // which means all of those loops run to completion. Refactoring this to
+ // generate a set of sorted items to compare in sequence would probably
+ // improve things.
//
// Additionally, the set of patterns generated from the urls is very similar
// to the patterns generated in ContainsBrowseUrl() and other database checks,
@@ -954,7 +1047,7 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(
for (size_t i = 0; i < check->urls.size(); ++i) {
size_t threat_index;
SBThreatType threat =
- GetUrlThreatType(check->urls[i], full_hashes, &threat_index);
+ GetUrlSeverestThreatType(check->urls[i], full_hashes, &threat_index);
if (threat != SB_THREAT_TYPE_SAFE &&
IsExpectedThreat(threat, check->expected_threats)) {
mattm 2014/11/12 03:24:30 Hmm, I suppose ideally the severest threat should
gab 2014/11/12 16:05:08 Good point. Since this is an existing problem thou
check->url_results[i] = threat;
@@ -964,7 +1057,8 @@ bool SafeBrowsingDatabaseManager::HandleOneCheck(
}
for (size_t i = 0; i < check->full_hashes.size(); ++i) {
- SBThreatType threat = GetHashThreatType(check->full_hashes[i], full_hashes);
+ SBThreatType threat = GetHashSeverestThreatType(check->full_hashes[i],
+ full_hashes);
if (threat != SB_THREAT_TYPE_SAFE &&
IsExpectedThreat(threat, check->expected_threats)) {
check->full_hash_results[i] = threat;
« no previous file with comments | « chrome/browser/safe_browsing/database_manager.h ('k') | chrome/browser/safe_browsing/database_manager_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698