components/safe_browsing_db/v4_get_hash_protocol_manager.cc - Issue 2233103002: Move full hash caching logic to v4_get_hash_protocol_manager

Unified Diff: components/safe_browsing_db/v4_get_hash_protocol_manager.cc

Issue 2233103002: Move full hash caching logic to v4_get_hash_protocol_manager (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« components/safe_browsing_db/v4_get_hash_protocol_manager.h ('K') | « components/safe_browsing_db/v4_get_hash_protocol_manager.h ('k') | components/safe_browsing_db/v4_get_hash_protocol_manager_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/safe_browsing_db/v4_get_hash_protocol_manager.cc

diff --git a/components/safe_browsing_db/v4_get_hash_protocol_manager.cc b/components/safe_browsing_db/v4_get_hash_protocol_manager.cc

index 4b5f911d3aea69310fb8be800b29b7dff126aac1..9c14bd469ca4dc23c4a1824c72318aaf8786dec2 100644

--- a/components/safe_browsing_db/v4_get_hash_protocol_manager.cc

+++ b/components/safe_browsing_db/v4_get_hash_protocol_manager.cc

@@ -52,6 +52,23 @@ enum ParseResultType {

PARSE_RESULT_TYPE_MAX = 7,

};

+// Enumerate full hash cache hits/misses for histogramming purposes.

+// DO NOT CHANGE THE ORDERING OF THESE VALUES.

+enum V4FullHashCacheResultType {

+ // Full hashes for which there is no cache hit.

+ FULL_HASH_CACHE_MISS = 0,

+ // Full hashes with a cache hit.

+ FULL_HASH_CACHE_HIT = 1,

+ // Full hashes with a negative cache hit.

+ FULL_HASH_NEGATIVE_CACHE_HIT = 2,

+ // Memory space for histograms is determined by the max. ALWAYS

+ // ADD NEW VALUES BEFORE THIS ONE.

+ FULL_HASH_CACHE_RESULT_MAX

+};

// Record parsing errors of a GetHash result.

void RecordParseGetHashResult(ParseResultType result_type) {

UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.ParseV4HashResult", result_type,

@@ -65,6 +82,13 @@ void RecordGetHashResult(safe_browsing::V4OperationResult result) {

safe_browsing::V4OperationResult::OPERATION_RESULT_MAX);

}

+// Record a full hash cache hit result.

+void RecordV4FullHashCacheResult(

+ V4FullHashCacheResultType result_type) {

+ UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4FullHashCacheResult", result_type,

+ FULL_HASH_CACHE_RESULT_MAX);

} // namespace

namespace safe_browsing {

@@ -120,9 +144,9 @@ V4GetHashProtocolManager::V4GetHashProtocolManager(

V4GetHashProtocolManager::~V4GetHashProtocolManager() {

// Delete in-progress SafeBrowsing requests.

- STLDeleteContainerPairFirstPointers(hash_requests_.begin(),

- hash_requests_.end());

- hash_requests_.clear();

+ STLDeleteContainerPairFirstPointers(pending_hash_requests_.begin(),

+ pending_hash_requests_.end());

+ pending_hash_requests_.clear();

}

// static

@@ -212,7 +236,6 @@ bool V4GetHashProtocolManager::ParseHashResponse(

// Fill in the full hash.

SBFullHashResult result;

result.hash = StringToSBFullHash(match.threat().hash());

if (match.has_cache_duration()) {

// Seconds resolution is good enough so we ignore the nanos field.

result.cache_expire_after =

@@ -328,7 +351,7 @@ void V4GetHashProtocolManager::GetFullHashes(

net::URLFetcher::GET, this)

.release();

fetcher->SetExtraRequestHeaders(headers.ToString());

- hash_requests_[fetcher] = callback;

+ pending_hash_requests_[fetcher] = std::make_pair(prefixes, callback);

fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE);

fetcher->SetRequestContext(request_context_getter_.get());

@@ -354,8 +377,8 @@ void V4GetHashProtocolManager::OnURLFetchComplete(

const net::URLFetcher* source) {

DCHECK(CalledOnValidThread());

- HashRequests::iterator it = hash_requests_.find(source);

- DCHECK(it != hash_requests_.end()) << "Request not found";

+ PendingHashRequests::iterator it = pending_hash_requests_.find(source);

+ DCHECK(it != pending_hash_requests_.end()) << "Request not found";

// FindFullHashes response.

// Reset the scoped pointer so the fetcher gets destroyed properly.

@@ -366,7 +389,7 @@ void V4GetHashProtocolManager::OnURLFetchComplete(

V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode(

kUmaV4HashResponseMetricName, status, response_code);

- const FullHashCallback& callback = it->second;

+ const FullHashCallback& callback = it->second.second;

Nathan Parker 2016/08/15 19:54:47 nit: I might vote for a simple struct rather than

std::vector<SBFullHashResult> full_hashes;

base::Time negative_cache_expire;

if (status.is_success() && response_code == net::HTTP_OK) {

@@ -392,12 +415,29 @@ void V4GetHashProtocolManager::OnURLFetchComplete(

}

+ const std::vector<SBPrefix>& prefixes = it->second.first;

+ // If the time is uninitialized, don't cache the results.

+ if (!negative_cache_expire.is_null()) {

+ // Cache the results.

+ // Create or reset all cached results for this prefix.

+ for (const SBPrefix& prefix : prefixes) {

+ v4_full_hash_cache_[SB_THREAT_TYPE_API_ABUSE][prefix] =

Nathan Parker 2016/08/15 19:54:47 The threat type shouldn't be hardcoded here... it

+ SBCachedFullHashResult(negative_cache_expire);

+ }

+ // Insert any full hash hits. Note that there may be one, multiple, or no

+ // full hashes for any given prefix.

+ for (const SBFullHashResult& result : full_hashes) {

+ v4_full_hash_cache_[SB_THREAT_TYPE_API_ABUSE][result.hash.prefix].

+ full_hashes.push_back(result);

+ }

// Invoke the callback with full_hashes, even if there was a parse error or

// an error response code (in which case full_hashes will be empty). The

// caller can't be blocked indefinitely.

callback.Run(full_hashes, negative_cache_expire);

- hash_requests_.erase(it);

+ pending_hash_requests_.erase(it);

}

void V4GetHashProtocolManager::HandleGetHashError(const Time& now) {

@@ -415,4 +455,105 @@ void V4GetHashProtocolManager::GetHashUrlAndHeaders(

config_, gurl, headers);

}

+void V4GetHashProtocolManager::GetFullHashCachedResults(

+ const SBThreatType& threat_type,

+ const std::vector<SBFullHash>& full_hashes,

+ base::Time now,

+ std::vector<SBPrefix>* prefixes_needing_reqs,

+ std::vector<SBFullHashResult>* cached_results) {

+ DCHECK(prefixes_needing_reqs);

+ prefixes_needing_reqs->clear();

+ DCHECK(cached_results);

+ cached_results->clear();

+ // Caching behavior is documented here:

+ // https://developers.google.com/safe-browsing/v4/caching#about-caching

+ //

+ // The cache operates as follows:

+ // Lookup:

+ // Case 1: The prefix is in the cache.

+ // Case a: The full hash is in the cache.

+ // Case i : The positive full hash result has not expired.

+ // The result is unsafe and we do not need to send a new

+ // request.

+ // Case ii: The positive full hash result has expired.

+ // We need to send a request for full hashes.

+ // Case b: The full hash is not in the cache.

+ // Case i : The negative cache entry has not expired.

+ // The result is still safe and we do not need to send a

+ // new request.

+ // Case ii: The negative cache entry has expired.

+ // We need to send a request for full hashes.

+ // Case 2: The prefix is not in the cache.

+ // We need to send a request for full hashes.

+ //

+ // Eviction:

+ // SBCachedFullHashResult entries can be removed from the cache only when

+ // the negative cache expire time and the cache expire time of all full

+ // hash results for that prefix have expired.

+ // Individual full hash results can be removed from the prefix's

+ // cache entry if they expire AND their expire time is after the negative

+ // cache expire time.

+ for (const SBFullHash& full_hash : full_hashes) {

+ auto entry = v4_full_hash_cache_[threat_type].find(full_hash.prefix);

+ if (entry != v4_full_hash_cache_[threat_type].end()) {

+ // Case 1.

+ SBCachedFullHashResult& cache_result = entry->second;

+ const SBFullHashResult* found_full_hash = nullptr;

+ size_t matched_idx = 0;

+ for (const SBFullHashResult& hash_result : cache_result.full_hashes) {

+ if (SBFullHashEqual(full_hash, hash_result.hash)) {

+ found_full_hash = &hash_result;

+ break;

+ }

+ ++matched_idx;

+ }

+ if (found_full_hash) {

+ // Case a.

+ if (found_full_hash->cache_expire_after > now) {

+ // Case i.

+ cached_results->push_back(*found_full_hash);

+ RecordV4FullHashCacheResult(FULL_HASH_CACHE_HIT);

+ } else {

+ // Case ii.

+ prefixes_needing_reqs->push_back(full_hash.prefix);

+ RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);

+ // If the negative cache expire time has passed, evict this full hash

+ // result from the cache.

+ if (cache_result.expire_after <= now) {

+ cache_result.full_hashes.erase(

+ cache_result.full_hashes.begin() + matched_idx);

+ // If there are no more full hashes, we can evict the entire entry.

+ if (cache_result.full_hashes.empty()) {

+ v4_full_hash_cache_[threat_type].erase(entry);

+ }

+ } else {

+ // Case b.

+ if (cache_result.expire_after > now) {

+ // Case i.

+ RecordV4FullHashCacheResult(FULL_HASH_NEGATIVE_CACHE_HIT);

+ } else {

+ // Case ii.

+ prefixes_needing_reqs->push_back(full_hash.prefix);

+ RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);

+ }

+ } else {

+ // Case 2.

+ prefixes_needing_reqs->push_back(full_hash.prefix);

+ RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);

+ }

+ // Multiple full hashes could share a prefix, remove duplicates.

+ // TODO(kcarattini): Make |prefixes_needing_reqs| a set.

+ std::sort(prefixes_needing_reqs->begin(), prefixes_needing_reqs->end());

+ prefixes_needing_reqs->erase(std::unique(prefixes_needing_reqs->begin(),

+ prefixes_needing_reqs->end()), prefixes_needing_reqs->end());

} // namespace safe_browsing