Index: chrome/browser/safe_browsing/safe_browsing_database.cc |
diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc |
index 2b5b2e256bcdf199691453784246ac5a14ccc20c..ece3138a042b65ef909612658d517a063a20ed70 100644 |
--- a/chrome/browser/safe_browsing/safe_browsing_database.cc |
+++ b/chrome/browser/safe_browsing/safe_browsing_database.cc |
@@ -66,9 +66,6 @@ const base::FilePath::CharType kIPBlacklistDBFile[] = |
// this. |
const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); |
-// The maximum staleness for a cached entry. |
-const int kMaxStalenessMinutes = 45; |
- |
// Maximum number of entries we allow in any of the whitelists. |
// If a whitelist on disk contains more entries then all lookups to |
// the whitelist will be considered a match. |
@@ -185,51 +182,6 @@ bool MatchAddPrefixes(SafeBrowsingStore* store, |
return found_match; |
} |
-// Find the entries in |full_hashes| with prefix in |prefix_hits|, and |
-// add them to |full_hits| if not expired. "Not expired" is when |
-// either |last_update| was recent enough, or the item has been |
-// received recently enough. Expired items are not deleted because a |
-// future update may make them acceptable again. |
-// |
-// For efficiency reasons the code walks |prefix_hits| and |
-// |full_hashes| in parallel, so they must be sorted by prefix. |
-void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, |
- const std::vector<SBAddFullHash>& full_hashes, |
- std::vector<SBFullHashResult>* full_hits, |
- base::Time last_update) { |
- const base::Time expire_time = |
- base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); |
- |
- std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); |
- std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); |
- |
- while (piter != prefix_hits.end() && hiter != full_hashes.end()) { |
- if (*piter < hiter->full_hash.prefix) { |
- ++piter; |
- } else if (hiter->full_hash.prefix < *piter) { |
- ++hiter; |
- } else { |
- if (expire_time < last_update || |
- expire_time.ToTimeT() < hiter->received) { |
- SBFullHashResult result; |
- const int list_bit = GetListIdBit(hiter->chunk_id); |
- DCHECK(list_bit == safe_browsing_util::MALWARE || |
- list_bit == safe_browsing_util::PHISH); |
- const safe_browsing_util::ListType list_id = |
- static_cast<safe_browsing_util::ListType>(list_bit); |
- if (!safe_browsing_util::GetListName(list_id, &result.list_name)) |
- continue; |
- result.add_chunk_id = DecodeChunkId(hiter->chunk_id); |
- result.hash = hiter->full_hash; |
- full_hits->push_back(result); |
- } |
- |
- // Only increment |hiter|, |piter| might have multiple hits. |
- ++hiter; |
- } |
- } |
-} |
- |
// This function generates a chunk range string for |chunks|. It |
// outputs one chunk range string per list and writes it to the |
// |list_ranges| vector. We expect |list_ranges| to already be of the |
@@ -321,11 +273,6 @@ int64 GetFileSizeOrZero(const base::FilePath& file_path) { |
return size_64; |
} |
-// Used to order whitelist storage in memory. |
-bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { |
- return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; |
-} |
- |
} // namespace |
// The default SafeBrowsingDatabaseFactory. |
@@ -531,7 +478,7 @@ void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { |
// contention on the lock... |
base::AutoLock locked(lookup_lock_); |
full_browse_hashes_.clear(); |
- pending_browse_hashes_.clear(); |
+ browse_gethash_cache_.clear(); |
LoadPrefixSet(); |
} |
@@ -655,8 +602,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { |
{ |
base::AutoLock locked(lookup_lock_); |
full_browse_hashes_.clear(); |
- pending_browse_hashes_.clear(); |
- prefix_miss_cache_.clear(); |
+ browse_gethash_cache_.clear(); |
browse_prefix_set_.reset(); |
side_effect_free_whitelist_prefix_set_.reset(); |
ip_blacklist_.clear(); |
@@ -667,23 +613,28 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { |
return true; |
} |
-// TODO(lzheng): Remove matching_list, it is not used anywhere. |
bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
const GURL& url, |
- std::string* matching_list, |
std::vector<SBPrefix>* prefix_hits, |
- std::vector<SBFullHashResult>* full_hits, |
- base::Time last_update) { |
+ std::vector<SBFullHashResult>* cache_hits) { |
// Clear the results first. |
- matching_list->clear(); |
prefix_hits->clear(); |
- full_hits->clear(); |
+ cache_hits->clear(); |
std::vector<SBFullHash> full_hashes; |
BrowseFullHashesToCheck(url, false, &full_hashes); |
if (full_hashes.empty()) |
return false; |
+ std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess); |
+ |
+ return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits); |
+} |
+ |
+bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes( |
+ const std::vector<SBFullHash>& full_hashes, |
+ std::vector<SBPrefix>* prefix_hits, |
+ std::vector<SBFullHashResult>* cache_hits) { |
// This function is called on the I/O thread, prevent changes to |
// filter and caches. |
base::AutoLock locked(lookup_lock_); |
@@ -694,30 +645,81 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
if (!browse_prefix_set_.get()) |
return false; |
- size_t miss_count = 0; |
+ const base::Time now = base::Time::Now(); |
+ |
for (size_t i = 0; i < full_hashes.size(); ++i) { |
const SBPrefix prefix = full_hashes[i].prefix; |
if (browse_prefix_set_->Exists(prefix)) { |
- prefix_hits->push_back(prefix); |
- if (prefix_miss_cache_.count(prefix) > 0) |
- ++miss_count; |
- } |
- } |
+ // Stupid workaround since std::equal_range requires you to have the |
+ // search key be the same type as the elements in the collection. |
Scott Hess - ex-Googler
2014/04/01 22:08:36
My impression is that this is because while you ca
mattm
2014/04/03 01:38:12
Yeah. I had tried to hack by defining both orderin
|
+ SBAddFullHash search_value; |
+ search_value.full_hash.prefix = prefix; |
+ |
+ std::pair<std::vector<SBAddFullHash>::const_iterator, |
+ std::vector<SBAddFullHash>::const_iterator> |
+ db_fullhash_prefix_matches = |
+ std::equal_range(full_browse_hashes_.begin(), |
+ full_browse_hashes_.end(), |
+ search_value, |
+ SBAddFullHashPrefixLess); |
+ bool match = false; |
+ // If the database contains any fullhashes with the same prefix, then we |
+ // only count it as a prefix hit if there is an exact fullhash match. |
+ if (db_fullhash_prefix_matches.first != |
+ db_fullhash_prefix_matches.second) { |
+ // Could do binary_search here, but unlikely to be enough matches to |
+ // matter. |
+ for (std::vector<SBAddFullHash>::const_iterator hiter = |
+ db_fullhash_prefix_matches.first; |
+ hiter != db_fullhash_prefix_matches.second; |
+ ++hiter) { |
+ if (SBFullHashEqual(hiter->full_hash, full_hashes[i])) { |
+ match = true; |
+ break; |
+ } |
+ } |
+ } else { |
+ // If db has only a prefix hit, that's ok too. |
+ match = true; |
+ } |
- // If all the prefixes are cached as 'misses', don't issue a GetHash. |
- if (miss_count == prefix_hits->size()) |
- return false; |
+ // If we have a db match, check if we have a valid cached result for this |
+ // prefix. |
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think maybe this is backwards. If there is a va
mattm
2014/04/03 01:38:12
Ah, good point. Could even check the cache before
|
+ if (match) { |
+ std::map<SBPrefix, SBCachedFullHashResult>::iterator citer = |
+ browse_gethash_cache_.find(prefix); |
+ if (citer != browse_gethash_cache_.end()) { |
+ if (now > citer->second.expire_after) { |
+ // If the cached entry is expired, remove it and ignore it. |
+ browse_gethash_cache_.erase(citer); |
+ } else { |
+ for (std::vector<SBFullHashResult>::const_iterator fiter = |
+ citer->second.full_hashes.begin(); |
+ fiter != citer->second.full_hashes.end(); |
+ ++fiter) { |
+ if (SBFullHashEqual(full_hashes[i], fiter->hash)) { |
+ cache_hits->push_back(*fiter); |
+ } |
+ } |
+ // If the prefix was in the cache, don't add the prefix to |
+ // prefix_hits. The result will be in cache_hits (if the fullhash |
+ // matched), or not (if it was a cached miss). |
+ match = false; |
+ } |
+ } |
+ } |
- // Find the matching full-hash results. |full_browse_hashes_| are from the |
- // database, |pending_browse_hashes_| are from GetHash requests between |
- // updates. |
- std::sort(prefix_hits->begin(), prefix_hits->end()); |
+ // If there was a prefix match and no cached result, add to prefix_hits. |
+ if (match) { |
+ // Only add a given prefix once. Since full_hashes is sorted, we only |
+ // need to check the last entry of prefix_hits. |
+ if (prefix_hits->empty() || prefix_hits->back() != prefix) |
+ prefix_hits->push_back(prefix); |
+ } |
+ } |
+ } |
- GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, |
- full_hits, last_update); |
- GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, |
- full_hits, last_update); |
- return true; |
+ return !prefix_hits->empty() || !cache_hits->empty(); |
} |
bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( |
@@ -876,7 +878,6 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, |
} |
} else { |
// Prefixes and hashes. |
- const base::Time receive_time = base::Time::Now(); |
for (int i = 0; i < count; ++i) { |
const SBFullHash full_hash = entry->FullHashAt(i); |
const SBPrefix prefix = full_hash.prefix; |
@@ -885,7 +886,7 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, |
store->WriteAddPrefix(encoded_chunk_id, prefix); |
STATS_COUNTER("SB.PrefixAddFull", 1); |
- store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); |
+ store->WriteAddHash(encoded_chunk_id, full_hash); |
} |
} |
} |
@@ -1053,37 +1054,28 @@ void SafeBrowsingDatabaseNew::DeleteChunks( |
void SafeBrowsingDatabaseNew::CacheHashResults( |
const std::vector<SBPrefix>& prefixes, |
- const std::vector<SBFullHashResult>& full_hits) { |
+ const std::vector<SBFullHashResult>& full_hits, |
+ const base::TimeDelta& cache_lifetime) { |
+ |
+ const base::Time expire_after = base::Time::Now() + cache_lifetime; |
+ |
// This is called on the I/O thread, lock against updates. |
base::AutoLock locked(lookup_lock_); |
- if (full_hits.empty()) { |
- prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); |
- return; |
+ // Create or reset all cached results for these prefixes. |
+ for (std::vector<SBPrefix>::const_iterator i = prefixes.begin(); |
+ i != prefixes.end(); |
+ ++i) { |
+ browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after); |
} |
- // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. |
- // Refactor to make them identical. |
- const base::Time now = base::Time::Now(); |
- const size_t orig_size = pending_browse_hashes_.size(); |
- for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); |
- iter != full_hits.end(); ++iter) { |
- const int list_id = safe_browsing_util::GetListId(iter->list_name); |
- if (list_id == safe_browsing_util::MALWARE || |
- list_id == safe_browsing_util::PHISH) { |
- int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); |
- SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); |
- pending_browse_hashes_.push_back(add_full_hash); |
- } |
+ // Insert any fullhash hits. Note that there may be one, multiple, or no |
+ // fullhashes for any given entry in |prefixes|. |
+ for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin(); |
+ i != full_hits.end(); |
+ ++i) { |
+ browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i); |
} |
- |
- // Sort new entries then merge with the previously-sorted entries. |
- std::vector<SBAddFullHash>::iterator |
- orig_end = pending_browse_hashes_.begin() + orig_size; |
- std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); |
- std::inplace_merge(pending_browse_hashes_.begin(), |
- orig_end, pending_browse_hashes_.end(), |
- SBAddFullHashPrefixLess); |
} |
bool SafeBrowsingDatabaseNew::UpdateStarted( |
@@ -1141,6 +1133,9 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( |
safe_browsing_util::kMalwareList, |
safe_browsing_util::kPhishingList, |
lists); |
+ // Cached fullhash results must be cleared on every database update (whether |
+ // successful or not.) |
+ browse_gethash_cache_.clear(); |
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think this should be cleared when the update is
mattm
2014/04/03 01:38:12
The spec says "Clients must clear cached full-leng
|
// NOTE(shess): |download_store_| used to contain kBinHashList, which has been |
// deprecated. Code to delete the list from the store shows ~15k hits/day as |
@@ -1275,15 +1270,11 @@ void SafeBrowsingDatabaseNew::UpdateWhitelistStore( |
if (!store) |
return; |
- // For the whitelists, we don't cache and save full hashes since all |
- // hashes are already full. |
- std::vector<SBAddFullHash> empty_add_hashes; |
- |
// Note: |builder| will not be empty. The current data store implementation |
// stores all full-length hashes as both full and prefix hashes. |
safe_browsing::PrefixSetBuilder builder; |
std::vector<SBAddFullHash> full_hashes; |
- if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { |
+ if (!store->FinishUpdate(&builder, &full_hashes)) { |
RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); |
WhitelistEverything(whitelist); |
return; |
@@ -1300,17 +1291,12 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( |
const base::FilePath& store_filename, |
SafeBrowsingStore* store, |
FailureType failure_type) { |
- // We don't cache and save full hashes. |
- std::vector<SBAddFullHash> empty_add_hashes; |
- |
// These results are not used after this call. Simply ignore the |
// returned value after FinishUpdate(...). |
safe_browsing::PrefixSetBuilder builder; |
std::vector<SBAddFullHash> add_full_hashes_result; |
- if (!store->FinishUpdate(empty_add_hashes, |
- &builder, |
- &add_full_hashes_result)) { |
+ if (!store->FinishUpdate(&builder, &add_full_hashes_result)) { |
RecordFailure(failure_type); |
} |
@@ -1322,16 +1308,6 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( |
} |
void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
- // Copy out the pending add hashes. Copy rather than swapping in |
- // case |ContainsBrowseURL()| is called before the new filter is complete. |
- std::vector<SBAddFullHash> pending_add_hashes; |
- { |
- base::AutoLock locked(lookup_lock_); |
- pending_add_hashes.insert(pending_add_hashes.end(), |
- pending_browse_hashes_.begin(), |
- pending_browse_hashes_.end()); |
- } |
- |
// Measure the amount of IO during the filter build. |
base::IoCounters io_before, io_after; |
base::ProcessHandle handle = base::Process::Current().handle(); |
@@ -1353,8 +1329,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
safe_browsing::PrefixSetBuilder builder; |
std::vector<SBAddFullHash> add_full_hashes; |
- if (!browse_store_->FinishUpdate(pending_add_hashes, |
- &builder, &add_full_hashes)) { |
+ if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { |
RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); |
return; |
} |
@@ -1368,14 +1343,6 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
{ |
base::AutoLock locked(lookup_lock_); |
full_browse_hashes_.swap(add_full_hashes); |
- |
- // TODO(shess): If |CacheHashResults()| is posted between the |
- // earlier lock and this clear, those pending hashes will be lost. |
- // It could be fixed by only removing hashes which were collected |
- // at the earlier point. I believe that is fail-safe as-is (the |
- // hash will be fetched again). |
- pending_browse_hashes_.clear(); |
- prefix_miss_cache_.clear(); |
browse_prefix_set_.swap(prefix_set); |
} |
@@ -1417,14 +1384,11 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
} |
void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { |
- std::vector<SBAddFullHash> empty_add_hashes; |
safe_browsing::PrefixSetBuilder builder; |
std::vector<SBAddFullHash> add_full_hashes_result; |
if (!side_effect_free_whitelist_store_->FinishUpdate( |
- empty_add_hashes, |
- &builder, |
- &add_full_hashes_result)) { |
+ &builder, &add_full_hashes_result)) { |
RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); |
return; |
} |
@@ -1465,16 +1429,11 @@ void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { |
} |
void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { |
- // For the IP blacklist, we don't cache and save full hashes since all |
- // hashes are already full. |
- std::vector<SBAddFullHash> empty_add_hashes; |
- |
// Note: prefixes will not be empty. The current data store implementation |
// stores all full-length hashes as both full and prefix hashes. |
safe_browsing::PrefixSetBuilder builder; |
std::vector<SBAddFullHash> full_hashes; |
- if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, |
- &builder, &full_hashes)) { |
+ if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { |
RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); |
LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. |
return; |