Chromium Code Reviews| Index: chrome/browser/safe_browsing/safe_browsing_database.cc |
| diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc |
| index 2b5b2e256bcdf199691453784246ac5a14ccc20c..ece3138a042b65ef909612658d517a063a20ed70 100644 |
| --- a/chrome/browser/safe_browsing/safe_browsing_database.cc |
| +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc |
| @@ -66,9 +66,6 @@ const base::FilePath::CharType kIPBlacklistDBFile[] = |
| // this. |
| const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); |
| -// The maximum staleness for a cached entry. |
| -const int kMaxStalenessMinutes = 45; |
| - |
| // Maximum number of entries we allow in any of the whitelists. |
| // If a whitelist on disk contains more entries then all lookups to |
| // the whitelist will be considered a match. |
| @@ -185,51 +182,6 @@ bool MatchAddPrefixes(SafeBrowsingStore* store, |
| return found_match; |
| } |
| -// Find the entries in |full_hashes| with prefix in |prefix_hits|, and |
| -// add them to |full_hits| if not expired. "Not expired" is when |
| -// either |last_update| was recent enough, or the item has been |
| -// received recently enough. Expired items are not deleted because a |
| -// future update may make them acceptable again. |
| -// |
| -// For efficiency reasons the code walks |prefix_hits| and |
| -// |full_hashes| in parallel, so they must be sorted by prefix. |
| -void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, |
| - const std::vector<SBAddFullHash>& full_hashes, |
| - std::vector<SBFullHashResult>* full_hits, |
| - base::Time last_update) { |
| - const base::Time expire_time = |
| - base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); |
| - |
| - std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); |
| - std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); |
| - |
| - while (piter != prefix_hits.end() && hiter != full_hashes.end()) { |
| - if (*piter < hiter->full_hash.prefix) { |
| - ++piter; |
| - } else if (hiter->full_hash.prefix < *piter) { |
| - ++hiter; |
| - } else { |
| - if (expire_time < last_update || |
| - expire_time.ToTimeT() < hiter->received) { |
| - SBFullHashResult result; |
| - const int list_bit = GetListIdBit(hiter->chunk_id); |
| - DCHECK(list_bit == safe_browsing_util::MALWARE || |
| - list_bit == safe_browsing_util::PHISH); |
| - const safe_browsing_util::ListType list_id = |
| - static_cast<safe_browsing_util::ListType>(list_bit); |
| - if (!safe_browsing_util::GetListName(list_id, &result.list_name)) |
| - continue; |
| - result.add_chunk_id = DecodeChunkId(hiter->chunk_id); |
| - result.hash = hiter->full_hash; |
| - full_hits->push_back(result); |
| - } |
| - |
| - // Only increment |hiter|, |piter| might have multiple hits. |
| - ++hiter; |
| - } |
| - } |
| -} |
| - |
| // This function generates a chunk range string for |chunks|. It |
| // outputs one chunk range string per list and writes it to the |
| // |list_ranges| vector. We expect |list_ranges| to already be of the |
| @@ -321,11 +273,6 @@ int64 GetFileSizeOrZero(const base::FilePath& file_path) { |
| return size_64; |
| } |
| -// Used to order whitelist storage in memory. |
| -bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { |
| - return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; |
| -} |
| - |
| } // namespace |
| // The default SafeBrowsingDatabaseFactory. |
| @@ -531,7 +478,7 @@ void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) { |
| // contention on the lock... |
| base::AutoLock locked(lookup_lock_); |
| full_browse_hashes_.clear(); |
| - pending_browse_hashes_.clear(); |
| + browse_gethash_cache_.clear(); |
| LoadPrefixSet(); |
| } |
| @@ -655,8 +602,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { |
| { |
| base::AutoLock locked(lookup_lock_); |
| full_browse_hashes_.clear(); |
| - pending_browse_hashes_.clear(); |
| - prefix_miss_cache_.clear(); |
| + browse_gethash_cache_.clear(); |
| browse_prefix_set_.reset(); |
| side_effect_free_whitelist_prefix_set_.reset(); |
| ip_blacklist_.clear(); |
| @@ -667,23 +613,28 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() { |
| return true; |
| } |
| -// TODO(lzheng): Remove matching_list, it is not used anywhere. |
| bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
| const GURL& url, |
| - std::string* matching_list, |
| std::vector<SBPrefix>* prefix_hits, |
| - std::vector<SBFullHashResult>* full_hits, |
| - base::Time last_update) { |
| + std::vector<SBFullHashResult>* cache_hits) { |
| // Clear the results first. |
| - matching_list->clear(); |
| prefix_hits->clear(); |
| - full_hits->clear(); |
| + cache_hits->clear(); |
| std::vector<SBFullHash> full_hashes; |
| BrowseFullHashesToCheck(url, false, &full_hashes); |
| if (full_hashes.empty()) |
| return false; |
| + std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess); |
| + |
| + return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits); |
| +} |
| + |
| +bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes( |
| + const std::vector<SBFullHash>& full_hashes, |
| + std::vector<SBPrefix>* prefix_hits, |
| + std::vector<SBFullHashResult>* cache_hits) { |
| // This function is called on the I/O thread, prevent changes to |
| // filter and caches. |
| base::AutoLock locked(lookup_lock_); |
| @@ -694,30 +645,81 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
| if (!browse_prefix_set_.get()) |
| return false; |
| - size_t miss_count = 0; |
| + const base::Time now = base::Time::Now(); |
| + |
| for (size_t i = 0; i < full_hashes.size(); ++i) { |
| const SBPrefix prefix = full_hashes[i].prefix; |
| if (browse_prefix_set_->Exists(prefix)) { |
| - prefix_hits->push_back(prefix); |
| - if (prefix_miss_cache_.count(prefix) > 0) |
| - ++miss_count; |
| - } |
| - } |
| + // Stupid workaround since std::equal_range requires you to have the |
| + // search key be the same type as the elements in the collection. |
|
Scott Hess - ex-Googler
2014/04/01 22:08:36
My impression is that this is because while you ca
mattm
2014/04/03 01:38:12
Yeah. I had tried to hack by defining both orderin
|
| + SBAddFullHash search_value; |
| + search_value.full_hash.prefix = prefix; |
| + |
| + std::pair<std::vector<SBAddFullHash>::const_iterator, |
| + std::vector<SBAddFullHash>::const_iterator> |
| + db_fullhash_prefix_matches = |
| + std::equal_range(full_browse_hashes_.begin(), |
| + full_browse_hashes_.end(), |
| + search_value, |
| + SBAddFullHashPrefixLess); |
| + bool match = false; |
| + // If the database contains any fullhashes with the same prefix, then we |
| + // only count it as a prefix hit if there is an exact fullhash match. |
| + if (db_fullhash_prefix_matches.first != |
| + db_fullhash_prefix_matches.second) { |
| + // Could do binary_search here, but unlikely to be enough matches to |
| + // matter. |
| + for (std::vector<SBAddFullHash>::const_iterator hiter = |
| + db_fullhash_prefix_matches.first; |
| + hiter != db_fullhash_prefix_matches.second; |
| + ++hiter) { |
| + if (SBFullHashEqual(hiter->full_hash, full_hashes[i])) { |
| + match = true; |
| + break; |
| + } |
| + } |
| + } else { |
| + // If db has only a prefix hit, that's ok too. |
| + match = true; |
| + } |
| - // If all the prefixes are cached as 'misses', don't issue a GetHash. |
| - if (miss_count == prefix_hits->size()) |
| - return false; |
| + // If we have a db match, check if we have a valid cached result for this |
| + // prefix. |
|
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think maybe this is backwards. If there is a va
mattm
2014/04/03 01:38:12
Ah, good point. Could even check the cache before
|
| + if (match) { |
| + std::map<SBPrefix, SBCachedFullHashResult>::iterator citer = |
| + browse_gethash_cache_.find(prefix); |
| + if (citer != browse_gethash_cache_.end()) { |
| + if (now > citer->second.expire_after) { |
| + // If the cached entry is expired, remove it and ignore it. |
| + browse_gethash_cache_.erase(citer); |
| + } else { |
| + for (std::vector<SBFullHashResult>::const_iterator fiter = |
| + citer->second.full_hashes.begin(); |
| + fiter != citer->second.full_hashes.end(); |
| + ++fiter) { |
| + if (SBFullHashEqual(full_hashes[i], fiter->hash)) { |
| + cache_hits->push_back(*fiter); |
| + } |
| + } |
| + // If the prefix was in the cache, don't add the prefix to |
| + // prefix_hits. The result will be in cache_hits (if the fullhash |
| + // matched), or not (if it was a cached miss). |
| + match = false; |
| + } |
| + } |
| + } |
| - // Find the matching full-hash results. |full_browse_hashes_| are from the |
| - // database, |pending_browse_hashes_| are from GetHash requests between |
| - // updates. |
| - std::sort(prefix_hits->begin(), prefix_hits->end()); |
| + // If there was a prefix match and no cached result, add to prefix_hits. |
| + if (match) { |
| + // Only add a given prefix once. Since full_hashes is sorted, we only |
| + // need to check the last entry of prefix_hits. |
| + if (prefix_hits->empty() || prefix_hits->back() != prefix) |
| + prefix_hits->push_back(prefix); |
| + } |
| + } |
| + } |
| - GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, |
| - full_hits, last_update); |
| - GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, |
| - full_hits, last_update); |
| - return true; |
| + return !prefix_hits->empty() || !cache_hits->empty(); |
| } |
| bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( |
| @@ -876,7 +878,6 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, |
| } |
| } else { |
| // Prefixes and hashes. |
| - const base::Time receive_time = base::Time::Now(); |
| for (int i = 0; i < count; ++i) { |
| const SBFullHash full_hash = entry->FullHashAt(i); |
| const SBPrefix prefix = full_hash.prefix; |
| @@ -885,7 +886,7 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host, |
| store->WriteAddPrefix(encoded_chunk_id, prefix); |
| STATS_COUNTER("SB.PrefixAddFull", 1); |
| - store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); |
| + store->WriteAddHash(encoded_chunk_id, full_hash); |
| } |
| } |
| } |
| @@ -1053,37 +1054,28 @@ void SafeBrowsingDatabaseNew::DeleteChunks( |
| void SafeBrowsingDatabaseNew::CacheHashResults( |
| const std::vector<SBPrefix>& prefixes, |
| - const std::vector<SBFullHashResult>& full_hits) { |
| + const std::vector<SBFullHashResult>& full_hits, |
| + const base::TimeDelta& cache_lifetime) { |
| + |
| + const base::Time expire_after = base::Time::Now() + cache_lifetime; |
| + |
| // This is called on the I/O thread, lock against updates. |
| base::AutoLock locked(lookup_lock_); |
| - if (full_hits.empty()) { |
| - prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); |
| - return; |
| + // Create or reset all cached results for these prefixes. |
| + for (std::vector<SBPrefix>::const_iterator i = prefixes.begin(); |
| + i != prefixes.end(); |
| + ++i) { |
| + browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after); |
| } |
| - // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. |
| - // Refactor to make them identical. |
| - const base::Time now = base::Time::Now(); |
| - const size_t orig_size = pending_browse_hashes_.size(); |
| - for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); |
| - iter != full_hits.end(); ++iter) { |
| - const int list_id = safe_browsing_util::GetListId(iter->list_name); |
| - if (list_id == safe_browsing_util::MALWARE || |
| - list_id == safe_browsing_util::PHISH) { |
| - int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); |
| - SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); |
| - pending_browse_hashes_.push_back(add_full_hash); |
| - } |
| + // Insert any fullhash hits. Note that there may be one, multiple, or no |
| + // fullhashes for any given entry in |prefixes|. |
| + for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin(); |
| + i != full_hits.end(); |
| + ++i) { |
| + browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i); |
| } |
| - |
| - // Sort new entries then merge with the previously-sorted entries. |
| - std::vector<SBAddFullHash>::iterator |
| - orig_end = pending_browse_hashes_.begin() + orig_size; |
| - std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); |
| - std::inplace_merge(pending_browse_hashes_.begin(), |
| - orig_end, pending_browse_hashes_.end(), |
| - SBAddFullHashPrefixLess); |
| } |
| bool SafeBrowsingDatabaseNew::UpdateStarted( |
| @@ -1141,6 +1133,9 @@ bool SafeBrowsingDatabaseNew::UpdateStarted( |
| safe_browsing_util::kMalwareList, |
| safe_browsing_util::kPhishingList, |
| lists); |
| + // Cached fullhash results must be cleared on every database update (whether |
| + // successful or not.) |
| + browse_gethash_cache_.clear(); |
|
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think this should be cleared when the update is
mattm
2014/04/03 01:38:12
The spec says "Clients must clear cached full-leng
|
| // NOTE(shess): |download_store_| used to contain kBinHashList, which has been |
| // deprecated. Code to delete the list from the store shows ~15k hits/day as |
| @@ -1275,15 +1270,11 @@ void SafeBrowsingDatabaseNew::UpdateWhitelistStore( |
| if (!store) |
| return; |
| - // For the whitelists, we don't cache and save full hashes since all |
| - // hashes are already full. |
| - std::vector<SBAddFullHash> empty_add_hashes; |
| - |
| // Note: |builder| will not be empty. The current data store implementation |
| // stores all full-length hashes as both full and prefix hashes. |
| safe_browsing::PrefixSetBuilder builder; |
| std::vector<SBAddFullHash> full_hashes; |
| - if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { |
| + if (!store->FinishUpdate(&builder, &full_hashes)) { |
| RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); |
| WhitelistEverything(whitelist); |
| return; |
| @@ -1300,17 +1291,12 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( |
| const base::FilePath& store_filename, |
| SafeBrowsingStore* store, |
| FailureType failure_type) { |
| - // We don't cache and save full hashes. |
| - std::vector<SBAddFullHash> empty_add_hashes; |
| - |
| // These results are not used after this call. Simply ignore the |
| // returned value after FinishUpdate(...). |
| safe_browsing::PrefixSetBuilder builder; |
| std::vector<SBAddFullHash> add_full_hashes_result; |
| - if (!store->FinishUpdate(empty_add_hashes, |
| - &builder, |
| - &add_full_hashes_result)) { |
| + if (!store->FinishUpdate(&builder, &add_full_hashes_result)) { |
| RecordFailure(failure_type); |
| } |
| @@ -1322,16 +1308,6 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( |
| } |
| void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
| - // Copy out the pending add hashes. Copy rather than swapping in |
| - // case |ContainsBrowseURL()| is called before the new filter is complete. |
| - std::vector<SBAddFullHash> pending_add_hashes; |
| - { |
| - base::AutoLock locked(lookup_lock_); |
| - pending_add_hashes.insert(pending_add_hashes.end(), |
| - pending_browse_hashes_.begin(), |
| - pending_browse_hashes_.end()); |
| - } |
| - |
| // Measure the amount of IO during the filter build. |
| base::IoCounters io_before, io_after; |
| base::ProcessHandle handle = base::Process::Current().handle(); |
| @@ -1353,8 +1329,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
| safe_browsing::PrefixSetBuilder builder; |
| std::vector<SBAddFullHash> add_full_hashes; |
| - if (!browse_store_->FinishUpdate(pending_add_hashes, |
| - &builder, &add_full_hashes)) { |
| + if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { |
| RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); |
| return; |
| } |
| @@ -1368,14 +1343,6 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
| { |
| base::AutoLock locked(lookup_lock_); |
| full_browse_hashes_.swap(add_full_hashes); |
| - |
| - // TODO(shess): If |CacheHashResults()| is posted between the |
| - // earlier lock and this clear, those pending hashes will be lost. |
| - // It could be fixed by only removing hashes which were collected |
| - // at the earlier point. I believe that is fail-safe as-is (the |
| - // hash will be fetched again). |
| - pending_browse_hashes_.clear(); |
| - prefix_miss_cache_.clear(); |
| browse_prefix_set_.swap(prefix_set); |
| } |
| @@ -1417,14 +1384,11 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
| } |
| void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { |
| - std::vector<SBAddFullHash> empty_add_hashes; |
| safe_browsing::PrefixSetBuilder builder; |
| std::vector<SBAddFullHash> add_full_hashes_result; |
| if (!side_effect_free_whitelist_store_->FinishUpdate( |
| - empty_add_hashes, |
| - &builder, |
| - &add_full_hashes_result)) { |
| + &builder, &add_full_hashes_result)) { |
| RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); |
| return; |
| } |
| @@ -1465,16 +1429,11 @@ void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { |
| } |
| void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { |
| - // For the IP blacklist, we don't cache and save full hashes since all |
| - // hashes are already full. |
| - std::vector<SBAddFullHash> empty_add_hashes; |
| - |
| // Note: prefixes will not be empty. The current data store implementation |
| // stores all full-length hashes as both full and prefix hashes. |
| safe_browsing::PrefixSetBuilder builder; |
| std::vector<SBAddFullHash> full_hashes; |
| - if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, |
| - &builder, &full_hashes)) { |
| + if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { |
| RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); |
| LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. |
| return; |