Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3931)

Unified Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 220493003: Safebrowsing: change gethash caching to match api 2.3 rules, fix some corner cases. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase (including 227613008) Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/safe_browsing/safe_browsing_database.cc
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
index 2b5b2e256bcdf199691453784246ac5a14ccc20c..282188133d98503176983c86e24cba6a446f116d 100644
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
@@ -66,9 +66,6 @@ const base::FilePath::CharType kIPBlacklistDBFile[] =
// this.
const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
-// The maximum staleness for a cached entry.
-const int kMaxStalenessMinutes = 45;
-
// Maximum number of entries we allow in any of the whitelists.
// If a whitelist on disk contains more entries then all lookups to
// the whitelist will be considered a match.
@@ -185,51 +182,6 @@ bool MatchAddPrefixes(SafeBrowsingStore* store,
return found_match;
}
-// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
-// add them to |full_hits| if not expired. "Not expired" is when
-// either |last_update| was recent enough, or the item has been
-// received recently enough. Expired items are not deleted because a
-// future update may make them acceptable again.
-//
-// For efficiency reasons the code walks |prefix_hits| and
-// |full_hashes| in parallel, so they must be sorted by prefix.
-void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
- const std::vector<SBAddFullHash>& full_hashes,
- std::vector<SBFullHashResult>* full_hits,
- base::Time last_update) {
- const base::Time expire_time =
- base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
-
- std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
- std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
-
- while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
- if (*piter < hiter->full_hash.prefix) {
- ++piter;
- } else if (hiter->full_hash.prefix < *piter) {
- ++hiter;
- } else {
- if (expire_time < last_update ||
- expire_time.ToTimeT() < hiter->received) {
- SBFullHashResult result;
- const int list_bit = GetListIdBit(hiter->chunk_id);
- DCHECK(list_bit == safe_browsing_util::MALWARE ||
- list_bit == safe_browsing_util::PHISH);
- const safe_browsing_util::ListType list_id =
- static_cast<safe_browsing_util::ListType>(list_bit);
- if (!safe_browsing_util::GetListName(list_id, &result.list_name))
- continue;
- result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
- result.hash = hiter->full_hash;
- full_hits->push_back(result);
- }
-
- // Only increment |hiter|, |piter| might have multiple hits.
- ++hiter;
- }
- }
-}
-
// This function generates a chunk range string for |chunks|. It
// outputs one chunk range string per list and writes it to the
// |list_ranges| vector. We expect |list_ranges| to already be of the
@@ -311,6 +263,9 @@ void UpdateChunkRangesForList(SafeBrowsingStore* store,
bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
return a.full_hash.prefix < b.full_hash.prefix;
}
+bool SBAddFullHashSBPrefixLess(const SBAddFullHash& a, SBPrefix b) {
+ return a.full_hash.prefix < b;
+}
// This code always checks for non-zero file size. This helper makes
// that less verbose.
@@ -321,11 +276,6 @@ int64 GetFileSizeOrZero(const base::FilePath& file_path) {
return size_64;
}
-// Used to order whitelist storage in memory.
-bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
- return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
-}
-
} // namespace
// The default SafeBrowsingDatabaseFactory.
@@ -531,7 +481,7 @@ void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
// contention on the lock...
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.clear();
- pending_browse_hashes_.clear();
+ browse_gethash_cache_.clear();
LoadPrefixSet();
}
@@ -655,8 +605,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
{
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.clear();
- pending_browse_hashes_.clear();
- prefix_miss_cache_.clear();
+ browse_gethash_cache_.clear();
browse_prefix_set_.reset();
side_effect_free_whitelist_prefix_set_.reset();
ip_blacklist_.clear();
@@ -667,23 +616,28 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
return true;
}
-// TODO(lzheng): Remove matching_list, it is not used anywhere.
bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
const GURL& url,
- std::string* matching_list,
std::vector<SBPrefix>* prefix_hits,
- std::vector<SBFullHashResult>* full_hits,
- base::Time last_update) {
+ std::vector<SBFullHashResult>* cache_hits) {
// Clear the results first.
- matching_list->clear();
prefix_hits->clear();
- full_hits->clear();
+ cache_hits->clear();
std::vector<SBFullHash> full_hashes;
BrowseFullHashesToCheck(url, false, &full_hashes);
if (full_hashes.empty())
return false;
+ std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess);
+
+ return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
+}
+
+bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
+ const std::vector<SBFullHash>& full_hashes,
+ std::vector<SBPrefix>* prefix_hits,
+ std::vector<SBFullHashResult>* cache_hits) {
// This function is called on the I/O thread, prevent changes to
// filter and caches.
base::AutoLock locked(lookup_lock_);
@@ -694,30 +648,62 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
if (!browse_prefix_set_.get())
return false;
- size_t miss_count = 0;
+ const base::Time now = base::Time::Now();
+
for (size_t i = 0; i < full_hashes.size(); ++i) {
const SBPrefix prefix = full_hashes[i].prefix;
- if (browse_prefix_set_->Exists(prefix)) {
- prefix_hits->push_back(prefix);
- if (prefix_miss_cache_.count(prefix) > 0)
- ++miss_count;
+
+ // First check if there is a valid cached result for this prefix.
+ std::map<SBPrefix, SBCachedFullHashResult>::iterator citer =
+ browse_gethash_cache_.find(prefix);
+ if (citer != browse_gethash_cache_.end()) {
+ if (now <= citer->second.expire_after) {
+ for (std::vector<SBFullHashResult>::const_iterator fiter =
+ citer->second.full_hashes.begin();
+ fiter != citer->second.full_hashes.end();
+ ++fiter) {
+ if (SBFullHashEqual(full_hashes[i], fiter->hash))
+ cache_hits->push_back(*fiter);
+ }
+ // If the prefix was in the cache, don't add the prefix to
+ // prefix_hits. The result will be in cache_hits (if the fullhash
+ // matched), or not (if it was a cached miss).
+ continue;
+ }
+
+ // Remove expired entries.
+ browse_gethash_cache_.erase(citer);
}
- }
- // If all the prefixes are cached as 'misses', don't issue a GetHash.
- if (miss_count == prefix_hits->size())
- return false;
+ // There was no valid cached result for the prefix, so check the database.
+ if (browse_prefix_set_->Exists(prefix)) {
+ if (prefix_hits->empty() || prefix_hits->back() != prefix)
+ prefix_hits->push_back(prefix);
+ continue;
+ }
- // Find the matching full-hash results. |full_browse_hashes_| are from the
- // database, |pending_browse_hashes_| are from GetHash requests between
- // updates.
- std::sort(prefix_hits->begin(), prefix_hits->end());
+ // There was no prefix match, check for fullhash matches.
+ std::vector<SBAddFullHash>::const_iterator db_fullhash_prefix_match =
+ std::lower_bound(full_browse_hashes_.begin(),
+ full_browse_hashes_.end(),
+ prefix,
+ SBAddFullHashSBPrefixLess);
+ // If full_browse_hashes_ was sorted on the fullhash (not just the
+ // prefix), could do binary_search here, but there are unlikely to be
+ // enough prefix matches to matter.
+ while (db_fullhash_prefix_match != full_browse_hashes_.end() &&
+ db_fullhash_prefix_match->full_hash.prefix == prefix) {
+ if (SBFullHashEqual(db_fullhash_prefix_match->full_hash,
+ full_hashes[i])) {
+ if (prefix_hits->empty() || prefix_hits->back() != prefix)
+ prefix_hits->push_back(prefix);
+ break;
+ }
+ ++db_fullhash_prefix_match;
+ }
+ }
- GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
- full_hits, last_update);
- GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
- full_hits, last_update);
- return true;
+ return !prefix_hits->empty() || !cache_hits->empty();
}
bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
@@ -875,17 +861,12 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
store->WriteAddPrefix(encoded_chunk_id, prefix);
}
} else {
- // Prefixes and hashes.
- const base::Time receive_time = base::Time::Now();
+ // Full hashes only.
for (int i = 0; i < count; ++i) {
const SBFullHash full_hash = entry->FullHashAt(i);
- const SBPrefix prefix = full_hash.prefix;
-
- STATS_COUNTER("SB.PrefixAdd", 1);
- store->WriteAddPrefix(encoded_chunk_id, prefix);
STATS_COUNTER("SB.PrefixAddFull", 1);
- store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
+ store->WriteAddHash(encoded_chunk_id, full_hash);
}
}
}
@@ -950,15 +931,12 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
}
} else {
- // Prefixes and hashes.
+ // Full hashes only.
for (int i = 0; i < count; ++i) {
const SBFullHash full_hash = entry->FullHashAt(i);
const int add_chunk_id =
EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
- STATS_COUNTER("SB.PrefixSub", 1);
- store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
-
STATS_COUNTER("SB.PrefixSubFull", 1);
store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
}
@@ -1053,37 +1031,28 @@ void SafeBrowsingDatabaseNew::DeleteChunks(
void SafeBrowsingDatabaseNew::CacheHashResults(
const std::vector<SBPrefix>& prefixes,
- const std::vector<SBFullHashResult>& full_hits) {
+ const std::vector<SBFullHashResult>& full_hits,
+ const base::TimeDelta& cache_lifetime) {
+
+ const base::Time expire_after = base::Time::Now() + cache_lifetime;
+
// This is called on the I/O thread, lock against updates.
base::AutoLock locked(lookup_lock_);
- if (full_hits.empty()) {
- prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
- return;
+ // Create or reset all cached results for these prefixes.
+ for (std::vector<SBPrefix>::const_iterator i = prefixes.begin();
+ i != prefixes.end();
+ ++i) {
+ browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after);
}
- // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
- // Refactor to make them identical.
- const base::Time now = base::Time::Now();
- const size_t orig_size = pending_browse_hashes_.size();
- for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
- iter != full_hits.end(); ++iter) {
- const int list_id = safe_browsing_util::GetListId(iter->list_name);
- if (list_id == safe_browsing_util::MALWARE ||
- list_id == safe_browsing_util::PHISH) {
- int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
- SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
- pending_browse_hashes_.push_back(add_full_hash);
- }
+ // Insert any fullhash hits. Note that there may be one, multiple, or no
+ // fullhashes for any given entry in |prefixes|.
+ for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin();
+ i != full_hits.end();
+ ++i) {
+ browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i);
}
-
- // Sort new entries then merge with the previously-sorted entries.
- std::vector<SBAddFullHash>::iterator
- orig_end = pending_browse_hashes_.begin() + orig_size;
- std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
- std::inplace_merge(pending_browse_hashes_.begin(),
- orig_end, pending_browse_hashes_.end(),
- SBAddFullHashPrefixLess);
}
bool SafeBrowsingDatabaseNew::UpdateStarted(
@@ -1141,6 +1110,9 @@ bool SafeBrowsingDatabaseNew::UpdateStarted(
safe_browsing_util::kMalwareList,
safe_browsing_util::kPhishingList,
lists);
+ // Cached fullhash results must be cleared on every database update (whether
+ // successful or not.)
+ browse_gethash_cache_.clear();
// NOTE(shess): |download_store_| used to contain kBinHashList, which has been
// deprecated. Code to delete the list from the store shows ~15k hits/day as
@@ -1275,15 +1247,11 @@ void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
if (!store)
return;
- // For the whitelists, we don't cache and save full hashes since all
- // hashes are already full.
- std::vector<SBAddFullHash> empty_add_hashes;
-
// Note: |builder| will not be empty. The current data store implementation
// stores all full-length hashes as both full and prefix hashes.
safe_browsing::PrefixSetBuilder builder;
std::vector<SBAddFullHash> full_hashes;
- if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) {
+ if (!store->FinishUpdate(&builder, &full_hashes)) {
RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
WhitelistEverything(whitelist);
return;
@@ -1300,17 +1268,12 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
const base::FilePath& store_filename,
SafeBrowsingStore* store,
FailureType failure_type) {
- // We don't cache and save full hashes.
- std::vector<SBAddFullHash> empty_add_hashes;
-
// These results are not used after this call. Simply ignore the
// returned value after FinishUpdate(...).
safe_browsing::PrefixSetBuilder builder;
std::vector<SBAddFullHash> add_full_hashes_result;
- if (!store->FinishUpdate(empty_add_hashes,
- &builder,
- &add_full_hashes_result)) {
+ if (!store->FinishUpdate(&builder, &add_full_hashes_result)) {
RecordFailure(failure_type);
}
@@ -1322,16 +1285,6 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
}
void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
- // Copy out the pending add hashes. Copy rather than swapping in
- // case |ContainsBrowseURL()| is called before the new filter is complete.
- std::vector<SBAddFullHash> pending_add_hashes;
- {
- base::AutoLock locked(lookup_lock_);
- pending_add_hashes.insert(pending_add_hashes.end(),
- pending_browse_hashes_.begin(),
- pending_browse_hashes_.end());
- }
-
// Measure the amount of IO during the filter build.
base::IoCounters io_before, io_after;
base::ProcessHandle handle = base::Process::Current().handle();
@@ -1353,8 +1306,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
safe_browsing::PrefixSetBuilder builder;
std::vector<SBAddFullHash> add_full_hashes;
- if (!browse_store_->FinishUpdate(pending_add_hashes,
- &builder, &add_full_hashes)) {
+ if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
return;
}
@@ -1368,14 +1320,6 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
{
base::AutoLock locked(lookup_lock_);
full_browse_hashes_.swap(add_full_hashes);
-
- // TODO(shess): If |CacheHashResults()| is posted between the
- // earlier lock and this clear, those pending hashes will be lost.
- // It could be fixed by only removing hashes which were collected
- // at the earlier point. I believe that is fail-safe as-is (the
- // hash will be fetched again).
- pending_browse_hashes_.clear();
- prefix_miss_cache_.clear();
browse_prefix_set_.swap(prefix_set);
}
@@ -1417,14 +1361,11 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
}
void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
- std::vector<SBAddFullHash> empty_add_hashes;
safe_browsing::PrefixSetBuilder builder;
std::vector<SBAddFullHash> add_full_hashes_result;
if (!side_effect_free_whitelist_store_->FinishUpdate(
- empty_add_hashes,
- &builder,
- &add_full_hashes_result)) {
+ &builder, &add_full_hashes_result)) {
RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
return;
}
@@ -1465,16 +1406,11 @@ void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
}
void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
- // For the IP blacklist, we don't cache and save full hashes since all
- // hashes are already full.
- std::vector<SBAddFullHash> empty_add_hashes;
-
// Note: prefixes will not be empty. The current data store implementation
// stores all full-length hashes as both full and prefix hashes.
safe_browsing::PrefixSetBuilder builder;
std::vector<SBAddFullHash> full_hashes;
- if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes,
- &builder, &full_hashes)) {
+ if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
return;
« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_database.h ('k') | chrome/browser/safe_browsing/safe_browsing_database_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698