| Index: chrome/browser/safe_browsing/safe_browsing_database.cc
|
| diff --git a/chrome/browser/safe_browsing/safe_browsing_database.cc b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| index 2b5b2e256bcdf199691453784246ac5a14ccc20c..282188133d98503176983c86e24cba6a446f116d 100644
|
| --- a/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| +++ b/chrome/browser/safe_browsing/safe_browsing_database.cc
|
| @@ -66,9 +66,6 @@ const base::FilePath::CharType kIPBlacklistDBFile[] =
|
| // this.
|
| const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
|
|
|
| -// The maximum staleness for a cached entry.
|
| -const int kMaxStalenessMinutes = 45;
|
| -
|
| // Maximum number of entries we allow in any of the whitelists.
|
| // If a whitelist on disk contains more entries then all lookups to
|
| // the whitelist will be considered a match.
|
| @@ -185,51 +182,6 @@ bool MatchAddPrefixes(SafeBrowsingStore* store,
|
| return found_match;
|
| }
|
|
|
| -// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
|
| -// add them to |full_hits| if not expired. "Not expired" is when
|
| -// either |last_update| was recent enough, or the item has been
|
| -// received recently enough. Expired items are not deleted because a
|
| -// future update may make them acceptable again.
|
| -//
|
| -// For efficiency reasons the code walks |prefix_hits| and
|
| -// |full_hashes| in parallel, so they must be sorted by prefix.
|
| -void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
|
| - const std::vector<SBAddFullHash>& full_hashes,
|
| - std::vector<SBFullHashResult>* full_hits,
|
| - base::Time last_update) {
|
| - const base::Time expire_time =
|
| - base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
|
| -
|
| - std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
|
| - std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
|
| -
|
| - while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
|
| - if (*piter < hiter->full_hash.prefix) {
|
| - ++piter;
|
| - } else if (hiter->full_hash.prefix < *piter) {
|
| - ++hiter;
|
| - } else {
|
| - if (expire_time < last_update ||
|
| - expire_time.ToTimeT() < hiter->received) {
|
| - SBFullHashResult result;
|
| - const int list_bit = GetListIdBit(hiter->chunk_id);
|
| - DCHECK(list_bit == safe_browsing_util::MALWARE ||
|
| - list_bit == safe_browsing_util::PHISH);
|
| - const safe_browsing_util::ListType list_id =
|
| - static_cast<safe_browsing_util::ListType>(list_bit);
|
| - if (!safe_browsing_util::GetListName(list_id, &result.list_name))
|
| - continue;
|
| - result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
|
| - result.hash = hiter->full_hash;
|
| - full_hits->push_back(result);
|
| - }
|
| -
|
| - // Only increment |hiter|, |piter| might have multiple hits.
|
| - ++hiter;
|
| - }
|
| - }
|
| -}
|
| -
|
| // This function generates a chunk range string for |chunks|. It
|
| // outputs one chunk range string per list and writes it to the
|
| // |list_ranges| vector. We expect |list_ranges| to already be of the
|
| @@ -311,6 +263,9 @@ void UpdateChunkRangesForList(SafeBrowsingStore* store,
|
| bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
|
| return a.full_hash.prefix < b.full_hash.prefix;
|
| }
|
| +bool SBAddFullHashSBPrefixLess(const SBAddFullHash& a, SBPrefix b) {
|
| + return a.full_hash.prefix < b;
|
| +}
|
|
|
| // This code always checks for non-zero file size. This helper makes
|
| // that less verbose.
|
| @@ -321,11 +276,6 @@ int64 GetFileSizeOrZero(const base::FilePath& file_path) {
|
| return size_64;
|
| }
|
|
|
| -// Used to order whitelist storage in memory.
|
| -bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
|
| - return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
|
| -}
|
| -
|
| } // namespace
|
|
|
| // The default SafeBrowsingDatabaseFactory.
|
| @@ -531,7 +481,7 @@ void SafeBrowsingDatabaseNew::Init(const base::FilePath& filename_base) {
|
| // contention on the lock...
|
| base::AutoLock locked(lookup_lock_);
|
| full_browse_hashes_.clear();
|
| - pending_browse_hashes_.clear();
|
| + browse_gethash_cache_.clear();
|
| LoadPrefixSet();
|
| }
|
|
|
| @@ -655,8 +605,7 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
|
| {
|
| base::AutoLock locked(lookup_lock_);
|
| full_browse_hashes_.clear();
|
| - pending_browse_hashes_.clear();
|
| - prefix_miss_cache_.clear();
|
| + browse_gethash_cache_.clear();
|
| browse_prefix_set_.reset();
|
| side_effect_free_whitelist_prefix_set_.reset();
|
| ip_blacklist_.clear();
|
| @@ -667,23 +616,28 @@ bool SafeBrowsingDatabaseNew::ResetDatabase() {
|
| return true;
|
| }
|
|
|
| -// TODO(lzheng): Remove matching_list, it is not used anywhere.
|
| bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
|
| const GURL& url,
|
| - std::string* matching_list,
|
| std::vector<SBPrefix>* prefix_hits,
|
| - std::vector<SBFullHashResult>* full_hits,
|
| - base::Time last_update) {
|
| + std::vector<SBFullHashResult>* cache_hits) {
|
| // Clear the results first.
|
| - matching_list->clear();
|
| prefix_hits->clear();
|
| - full_hits->clear();
|
| + cache_hits->clear();
|
|
|
| std::vector<SBFullHash> full_hashes;
|
| BrowseFullHashesToCheck(url, false, &full_hashes);
|
| if (full_hashes.empty())
|
| return false;
|
|
|
| + std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess);
|
| +
|
| + return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
|
| +}
|
| +
|
| +bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
|
| + const std::vector<SBFullHash>& full_hashes,
|
| + std::vector<SBPrefix>* prefix_hits,
|
| + std::vector<SBFullHashResult>* cache_hits) {
|
| // This function is called on the I/O thread, prevent changes to
|
| // filter and caches.
|
| base::AutoLock locked(lookup_lock_);
|
| @@ -694,30 +648,62 @@ bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
|
| if (!browse_prefix_set_.get())
|
| return false;
|
|
|
| - size_t miss_count = 0;
|
| + const base::Time now = base::Time::Now();
|
| +
|
| for (size_t i = 0; i < full_hashes.size(); ++i) {
|
| const SBPrefix prefix = full_hashes[i].prefix;
|
| - if (browse_prefix_set_->Exists(prefix)) {
|
| - prefix_hits->push_back(prefix);
|
| - if (prefix_miss_cache_.count(prefix) > 0)
|
| - ++miss_count;
|
| +
|
| + // First check if there is a valid cached result for this prefix.
|
| + std::map<SBPrefix, SBCachedFullHashResult>::iterator citer =
|
| + browse_gethash_cache_.find(prefix);
|
| + if (citer != browse_gethash_cache_.end()) {
|
| + if (now <= citer->second.expire_after) {
|
| + for (std::vector<SBFullHashResult>::const_iterator fiter =
|
| + citer->second.full_hashes.begin();
|
| + fiter != citer->second.full_hashes.end();
|
| + ++fiter) {
|
| + if (SBFullHashEqual(full_hashes[i], fiter->hash))
|
| + cache_hits->push_back(*fiter);
|
| + }
|
| + // If the prefix was in the cache, don't add the prefix to
|
| + // prefix_hits. The result will be in cache_hits (if the fullhash
|
| + // matched), or not (if it was a cached miss).
|
| + continue;
|
| + }
|
| +
|
| + // Remove expired entries.
|
| + browse_gethash_cache_.erase(citer);
|
| }
|
| - }
|
|
|
| - // If all the prefixes are cached as 'misses', don't issue a GetHash.
|
| - if (miss_count == prefix_hits->size())
|
| - return false;
|
| + // There was no valid cached result for the prefix, so check the database.
|
| + if (browse_prefix_set_->Exists(prefix)) {
|
| + if (prefix_hits->empty() || prefix_hits->back() != prefix)
|
| + prefix_hits->push_back(prefix);
|
| + continue;
|
| + }
|
|
|
| - // Find the matching full-hash results. |full_browse_hashes_| are from the
|
| - // database, |pending_browse_hashes_| are from GetHash requests between
|
| - // updates.
|
| - std::sort(prefix_hits->begin(), prefix_hits->end());
|
| + // There was no prefix match, check for fullhash matches.
|
| + std::vector<SBAddFullHash>::const_iterator db_fullhash_prefix_match =
|
| + std::lower_bound(full_browse_hashes_.begin(),
|
| + full_browse_hashes_.end(),
|
| + prefix,
|
| + SBAddFullHashSBPrefixLess);
|
| + // If full_browse_hashes_ was sorted on the fullhash (not just the
|
| + // prefix), could do binary_search here, but there are unlikely to be
|
| + // enough prefix matches to matter.
|
| + while (db_fullhash_prefix_match != full_browse_hashes_.end() &&
|
| + db_fullhash_prefix_match->full_hash.prefix == prefix) {
|
| + if (SBFullHashEqual(db_fullhash_prefix_match->full_hash,
|
| + full_hashes[i])) {
|
| + if (prefix_hits->empty() || prefix_hits->back() != prefix)
|
| + prefix_hits->push_back(prefix);
|
| + break;
|
| + }
|
| + ++db_fullhash_prefix_match;
|
| + }
|
| + }
|
|
|
| - GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
|
| - full_hits, last_update);
|
| - GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
|
| - full_hits, last_update);
|
| - return true;
|
| + return !prefix_hits->empty() || !cache_hits->empty();
|
| }
|
|
|
| bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
|
| @@ -875,17 +861,12 @@ void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
|
| store->WriteAddPrefix(encoded_chunk_id, prefix);
|
| }
|
| } else {
|
| - // Prefixes and hashes.
|
| - const base::Time receive_time = base::Time::Now();
|
| + // Full hashes only.
|
| for (int i = 0; i < count; ++i) {
|
| const SBFullHash full_hash = entry->FullHashAt(i);
|
| - const SBPrefix prefix = full_hash.prefix;
|
| -
|
| - STATS_COUNTER("SB.PrefixAdd", 1);
|
| - store->WriteAddPrefix(encoded_chunk_id, prefix);
|
|
|
| STATS_COUNTER("SB.PrefixAddFull", 1);
|
| - store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
|
| + store->WriteAddHash(encoded_chunk_id, full_hash);
|
| }
|
| }
|
| }
|
| @@ -950,15 +931,12 @@ void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
|
| store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
|
| }
|
| } else {
|
| - // Prefixes and hashes.
|
| + // Full hashes only.
|
| for (int i = 0; i < count; ++i) {
|
| const SBFullHash full_hash = entry->FullHashAt(i);
|
| const int add_chunk_id =
|
| EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
|
|
|
| - STATS_COUNTER("SB.PrefixSub", 1);
|
| - store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
|
| -
|
| STATS_COUNTER("SB.PrefixSubFull", 1);
|
| store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
|
| }
|
| @@ -1053,37 +1031,28 @@ void SafeBrowsingDatabaseNew::DeleteChunks(
|
|
|
| void SafeBrowsingDatabaseNew::CacheHashResults(
|
| const std::vector<SBPrefix>& prefixes,
|
| - const std::vector<SBFullHashResult>& full_hits) {
|
| + const std::vector<SBFullHashResult>& full_hits,
|
| + const base::TimeDelta& cache_lifetime) {
|
| +
|
| + const base::Time expire_after = base::Time::Now() + cache_lifetime;
|
| +
|
| // This is called on the I/O thread, lock against updates.
|
| base::AutoLock locked(lookup_lock_);
|
|
|
| - if (full_hits.empty()) {
|
| - prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
|
| - return;
|
| + // Create or reset all cached results for these prefixes.
|
| + for (std::vector<SBPrefix>::const_iterator i = prefixes.begin();
|
| + i != prefixes.end();
|
| + ++i) {
|
| + browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after);
|
| }
|
|
|
| - // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
|
| - // Refactor to make them identical.
|
| - const base::Time now = base::Time::Now();
|
| - const size_t orig_size = pending_browse_hashes_.size();
|
| - for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
|
| - iter != full_hits.end(); ++iter) {
|
| - const int list_id = safe_browsing_util::GetListId(iter->list_name);
|
| - if (list_id == safe_browsing_util::MALWARE ||
|
| - list_id == safe_browsing_util::PHISH) {
|
| - int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
|
| - SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
|
| - pending_browse_hashes_.push_back(add_full_hash);
|
| - }
|
| + // Insert any fullhash hits. Note that there may be one, multiple, or no
|
| + // fullhashes for any given entry in |prefixes|.
|
| + for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin();
|
| + i != full_hits.end();
|
| + ++i) {
|
| + browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i);
|
| }
|
| -
|
| - // Sort new entries then merge with the previously-sorted entries.
|
| - std::vector<SBAddFullHash>::iterator
|
| - orig_end = pending_browse_hashes_.begin() + orig_size;
|
| - std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
|
| - std::inplace_merge(pending_browse_hashes_.begin(),
|
| - orig_end, pending_browse_hashes_.end(),
|
| - SBAddFullHashPrefixLess);
|
| }
|
|
|
| bool SafeBrowsingDatabaseNew::UpdateStarted(
|
| @@ -1141,6 +1110,9 @@ bool SafeBrowsingDatabaseNew::UpdateStarted(
|
| safe_browsing_util::kMalwareList,
|
| safe_browsing_util::kPhishingList,
|
| lists);
|
| + // Cached fullhash results must be cleared on every database update (whether
|
| + // successful or not.)
|
| + browse_gethash_cache_.clear();
|
|
|
| // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
|
| // deprecated. Code to delete the list from the store shows ~15k hits/day as
|
| @@ -1275,15 +1247,11 @@ void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
|
| if (!store)
|
| return;
|
|
|
| - // For the whitelists, we don't cache and save full hashes since all
|
| - // hashes are already full.
|
| - std::vector<SBAddFullHash> empty_add_hashes;
|
| -
|
| // Note: |builder| will not be empty. The current data store implementation
|
| // stores all full-length hashes as both full and prefix hashes.
|
| safe_browsing::PrefixSetBuilder builder;
|
| std::vector<SBAddFullHash> full_hashes;
|
| - if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) {
|
| + if (!store->FinishUpdate(&builder, &full_hashes)) {
|
| RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
|
| WhitelistEverything(whitelist);
|
| return;
|
| @@ -1300,17 +1268,12 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
|
| const base::FilePath& store_filename,
|
| SafeBrowsingStore* store,
|
| FailureType failure_type) {
|
| - // We don't cache and save full hashes.
|
| - std::vector<SBAddFullHash> empty_add_hashes;
|
| -
|
| // These results are not used after this call. Simply ignore the
|
| // returned value after FinishUpdate(...).
|
| safe_browsing::PrefixSetBuilder builder;
|
| std::vector<SBAddFullHash> add_full_hashes_result;
|
|
|
| - if (!store->FinishUpdate(empty_add_hashes,
|
| - &builder,
|
| - &add_full_hashes_result)) {
|
| + if (!store->FinishUpdate(&builder, &add_full_hashes_result)) {
|
| RecordFailure(failure_type);
|
| }
|
|
|
| @@ -1322,16 +1285,6 @@ int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
|
| }
|
|
|
| void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
|
| - // Copy out the pending add hashes. Copy rather than swapping in
|
| - // case |ContainsBrowseURL()| is called before the new filter is complete.
|
| - std::vector<SBAddFullHash> pending_add_hashes;
|
| - {
|
| - base::AutoLock locked(lookup_lock_);
|
| - pending_add_hashes.insert(pending_add_hashes.end(),
|
| - pending_browse_hashes_.begin(),
|
| - pending_browse_hashes_.end());
|
| - }
|
| -
|
| // Measure the amount of IO during the filter build.
|
| base::IoCounters io_before, io_after;
|
| base::ProcessHandle handle = base::Process::Current().handle();
|
| @@ -1353,8 +1306,7 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
|
|
|
| safe_browsing::PrefixSetBuilder builder;
|
| std::vector<SBAddFullHash> add_full_hashes;
|
| - if (!browse_store_->FinishUpdate(pending_add_hashes,
|
| - &builder, &add_full_hashes)) {
|
| + if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
|
| RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
|
| return;
|
| }
|
| @@ -1368,14 +1320,6 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
|
| {
|
| base::AutoLock locked(lookup_lock_);
|
| full_browse_hashes_.swap(add_full_hashes);
|
| -
|
| - // TODO(shess): If |CacheHashResults()| is posted between the
|
| - // earlier lock and this clear, those pending hashes will be lost.
|
| - // It could be fixed by only removing hashes which were collected
|
| - // at the earlier point. I believe that is fail-safe as-is (the
|
| - // hash will be fetched again).
|
| - pending_browse_hashes_.clear();
|
| - prefix_miss_cache_.clear();
|
| browse_prefix_set_.swap(prefix_set);
|
| }
|
|
|
| @@ -1417,14 +1361,11 @@ void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
|
| }
|
|
|
| void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
|
| - std::vector<SBAddFullHash> empty_add_hashes;
|
| safe_browsing::PrefixSetBuilder builder;
|
| std::vector<SBAddFullHash> add_full_hashes_result;
|
|
|
| if (!side_effect_free_whitelist_store_->FinishUpdate(
|
| - empty_add_hashes,
|
| - &builder,
|
| - &add_full_hashes_result)) {
|
| + &builder, &add_full_hashes_result)) {
|
| RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
|
| return;
|
| }
|
| @@ -1465,16 +1406,11 @@ void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
|
| }
|
|
|
| void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
|
| - // For the IP blacklist, we don't cache and save full hashes since all
|
| - // hashes are already full.
|
| - std::vector<SBAddFullHash> empty_add_hashes;
|
| -
|
| // Note: prefixes will not be empty. The current data store implementation
|
| // stores all full-length hashes as both full and prefix hashes.
|
| safe_browsing::PrefixSetBuilder builder;
|
| std::vector<SBAddFullHash> full_hashes;
|
| - if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes,
|
| - &builder, &full_hashes)) {
|
| + if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
|
| RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
|
| LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
|
| return;
|
|
|