Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: components/safe_browsing_db/v4_get_hash_protocol_manager.cc

Issue 2233103002: Move full hash caching logic to v4_get_hash_protocol_manager (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Bring back the histogram to check if there were any hits in the response from the server Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/safe_browsing_db/v4_get_hash_protocol_manager.h" 5 #include "components/safe_browsing_db/v4_get_hash_protocol_manager.h"
6 6
7 #include <utility> 7 #include <utility>
8 8
9 #include "base/base64url.h" 9 #include "base/base64url.h"
10 #include "base/macros.h" 10 #include "base/macros.h"
11 #include "base/memory/ptr_util.h" 11 #include "base/memory/ptr_util.h"
12 #include "base/metrics/histogram_macros.h" 12 #include "base/metrics/histogram_macros.h"
13 #include "base/timer/timer.h" 13 #include "base/timer/timer.h"
14 #include "net/base/load_flags.h" 14 #include "net/base/load_flags.h"
15 #include "net/http/http_response_headers.h" 15 #include "net/http/http_response_headers.h"
16 #include "net/http/http_status_code.h" 16 #include "net/http/http_status_code.h"
17 #include "net/url_request/url_fetcher.h" 17 #include "net/url_request/url_fetcher.h"
18 #include "net/url_request/url_request_context_getter.h" 18 #include "net/url_request/url_request_context_getter.h"
19 19
20 using base::Time; 20 using base::Time;
21 using base::TimeDelta; 21 using base::TimeDelta;
22 22
23 namespace { 23 namespace {
24 24
25 // Record a GetHash result.
26 void RecordGetHashResult(safe_browsing::V4OperationResult result) {
27 UMA_HISTOGRAM_ENUMERATION(
28 "SafeBrowsing.GetV4HashResult", result,
29 safe_browsing::V4OperationResult::OPERATION_RESULT_MAX);
30 }
31
25 // Enumerate parsing failures for histogramming purposes. DO NOT CHANGE 32 // Enumerate parsing failures for histogramming purposes. DO NOT CHANGE
26 // THE ORDERING OF THESE VALUES. 33 // THE ORDERING OF THESE VALUES.
27 enum ParseResultType { 34 enum ParseResultType {
28 // Error parsing the protocol buffer from a string. 35 // Error parsing the protocol buffer from a string.
29 PARSE_FROM_STRING_ERROR = 0, 36 PARSE_FROM_STRING_ERROR = 0,
30 37
31 // A match in the response had an unexpected THREAT_ENTRY_TYPE. 38 // A match in the response had an unexpected THREAT_ENTRY_TYPE.
32 UNEXPECTED_THREAT_ENTRY_TYPE_ERROR = 1, 39 UNEXPECTED_THREAT_ENTRY_TYPE_ERROR = 1,
33 40
34 // A match in the response had an unexpected THREAT_TYPE. 41 // A match in the response had an unexpected THREAT_TYPE.
35 UNEXPECTED_THREAT_TYPE_ERROR = 2, 42 UNEXPECTED_THREAT_TYPE_ERROR = 2,
36 43
37 // A match in the response had an unexpected PLATFORM_TYPE. 44 // A match in the response had an unexpected PLATFORM_TYPE.
38 UNEXPECTED_PLATFORM_TYPE_ERROR = 3, 45 UNEXPECTED_PLATFORM_TYPE_ERROR = 3,
39 46
40 // A match in the response contained no metadata where metadata was 47 // A match in the response contained no metadata where metadata was
41 // expected. 48 // expected.
42 NO_METADATA_ERROR = 4, 49 NO_METADATA_ERROR = 4,
43 50
44 // A match in the response contained a ThreatType that was inconsistent 51 // A match in the response contained a ThreatType that was inconsistent
45 // with the other matches. 52 // with the other matches.
46 INCONSISTENT_THREAT_TYPE_ERROR = 5, 53 INCONSISTENT_THREAT_TYPE_ERROR = 5,
47 54
48 // A match in the response contained a metadata, but the metadata is invalid. 55 // A match in the response contained a metadata, but the metadata is invalid.
49 UNEXPECTED_METADATA_VALUE_ERROR = 6, 56 UNEXPECTED_METADATA_VALUE_ERROR = 6,
50 57
58 // A match in the response had no information in the threat field.
59 NO_THREAT_ERROR = 7,
60
51 // Memory space for histograms is determined by the max. ALWAYS 61 // Memory space for histograms is determined by the max. ALWAYS
52 // ADD NEW VALUES BEFORE THIS ONE. 62 // ADD NEW VALUES BEFORE THIS ONE.
53 PARSE_RESULT_TYPE_MAX = 7, 63 PARSE_RESULT_TYPE_MAX = 8,
54 }; 64 };
55 65
56 // Record parsing errors of a GetHash result. 66 // Record parsing errors of a GetHash result.
57 void RecordParseGetHashResult(ParseResultType result_type) { 67 void RecordParseGetHashResult(ParseResultType result_type) {
58 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.ParseV4HashResult", result_type, 68 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.ParseV4HashResult", result_type,
59 PARSE_RESULT_TYPE_MAX); 69 PARSE_RESULT_TYPE_MAX);
60 } 70 }
61 71
62 // Record a GetHash result. 72 // Enumerate full hash cache hits/misses for histogramming purposes.
63 void RecordGetHashResult(safe_browsing::V4OperationResult result) { 73 // DO NOT CHANGE THE ORDERING OF THESE VALUES.
64 UMA_HISTOGRAM_ENUMERATION( 74 enum V4FullHashCacheResultType {
65 "SafeBrowsing.GetV4HashResult", result, 75 // Full hashes for which there is no cache hit.
66 safe_browsing::V4OperationResult::OPERATION_RESULT_MAX); 76 FULL_HASH_CACHE_MISS = 0,
77
78 // Full hashes with a cache hit.
79 FULL_HASH_CACHE_HIT = 1,
80
81 // Full hashes with a negative cache hit.
82 FULL_HASH_NEGATIVE_CACHE_HIT = 2,
83
84 // Memory space for histograms is determined by the max. ALWAYS
85 // ADD NEW VALUES BEFORE THIS ONE.
86 FULL_HASH_CACHE_RESULT_MAX
87 };
88
89 // Record a full hash cache hit result.
90 void RecordV4FullHashCacheResult(V4FullHashCacheResultType result_type) {
91 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4FullHashCacheResult", result_type,
92 FULL_HASH_CACHE_RESULT_MAX);
93 }
94
95 // Enumerate GetHash hits/misses for histogramming purposes. DO NOT CHANGE THE
96 // ORDERING OF THESE VALUES.
97 enum V4GetHashCheckResultType {
98 // Successful responses which returned no full hashes.
99 GET_HASH_CHECK_EMPTY = 0,
100
101 // Successful responses for which one or more of the full hashes matched.
102 GET_HASH_CHECK_HIT = 1,
103
104 // Successful responses which weren't empty but have no matches.
105 GET_HASH_CHECK_MISS = 2,
106
107 // Memory space for histograms is determined by the max. ALWAYS
108 // ADD NEW VALUES BEFORE THIS ONE.
109 GET_HASH_CHECK_RESULT_MAX
110 };
111
112 // Record a GetHash hit result.
113 void RecordV4GetHashCheckResult(V4GetHashCheckResultType result_type) {
114 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4GetHashCheckResult", result_type,
115 GET_HASH_CHECK_RESULT_MAX);
67 } 116 }
68 117
69 } // namespace 118 } // namespace
70 119
71 namespace safe_browsing { 120 namespace safe_browsing {
72 121
73 const char kUmaV4HashResponseMetricName[] = 122 const char kUmaV4HashResponseMetricName[] =
74 "SafeBrowsing.GetV4HashHttpResponseOrErrorCode"; 123 "SafeBrowsing.GetV4HashHttpResponseOrErrorCode";
75 124
76 // The default V4GetHashProtocolManagerFactory. 125 // The default V4GetHashProtocolManagerFactory.
77 class V4GetHashProtocolManagerFactoryImpl 126 class V4GetHashProtocolManagerFactoryImpl
78 : public V4GetHashProtocolManagerFactory { 127 : public V4GetHashProtocolManagerFactory {
79 public: 128 public:
80 V4GetHashProtocolManagerFactoryImpl() {} 129 V4GetHashProtocolManagerFactoryImpl() {}
81 ~V4GetHashProtocolManagerFactoryImpl() override {} 130 ~V4GetHashProtocolManagerFactoryImpl() override {}
82 V4GetHashProtocolManager* CreateProtocolManager( 131 std::unique_ptr<V4GetHashProtocolManager> CreateProtocolManager(
83 net::URLRequestContextGetter* request_context_getter, 132 net::URLRequestContextGetter* request_context_getter,
133 const base::hash_set<UpdateListIdentifier>& stores_to_request,
84 const V4ProtocolConfig& config) override { 134 const V4ProtocolConfig& config) override {
85 return new V4GetHashProtocolManager(request_context_getter, config); 135 return base::WrapUnique(new V4GetHashProtocolManager(
136 request_context_getter, stores_to_request, config));
86 } 137 }
87 138
88 private: 139 private:
89 DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManagerFactoryImpl); 140 DISALLOW_COPY_AND_ASSIGN(V4GetHashProtocolManagerFactoryImpl);
90 }; 141 };
91 142
143 // ----------------------------------------------------------------
144
145 CachedHashPrefixInfo::CachedHashPrefixInfo() {}
146
147 CachedHashPrefixInfo::CachedHashPrefixInfo(const CachedHashPrefixInfo& other) =
148 default;
149
150 CachedHashPrefixInfo::~CachedHashPrefixInfo() {}
151
152 // ----------------------------------------------------------------
153
154 FullHashCallbackInfo::FullHashCallbackInfo() {}
155
156 FullHashCallbackInfo::FullHashCallbackInfo(
157 const std::vector<FullHashInfo>& cached_full_hash_infos,
158 const std::vector<HashPrefix>& prefixes_requested,
159 std::unique_ptr<net::URLFetcher> fetcher,
160 const FullHashToStoreAndHashPrefixesMap&
161 full_hash_to_store_and_hash_prefixes,
162 const FullHashCallback& callback)
163 : cached_full_hash_infos(cached_full_hash_infos),
164 callback(callback),
165 fetcher(std::move(fetcher)),
166 full_hash_to_store_and_hash_prefixes(
167 full_hash_to_store_and_hash_prefixes),
168 prefixes_requested(prefixes_requested) {}
Scott Hess - ex-Googler 2016/09/12 23:31:35 AFAICT from the calling code, all of these could b
vakh (use Gerrit instead) 2016/09/13 00:12:07 Acknowledged. Work for when we put in perf markers
169
170 FullHashCallbackInfo::~FullHashCallbackInfo() {}
171
172 // ----------------------------------------------------------------
173
174 FullHashInfo::FullHashInfo(const FullHash& full_hash,
175 const UpdateListIdentifier& list_id,
176 const base::Time& positive_ttl)
177 : full_hash(full_hash), list_id(list_id), positive_ttl(positive_ttl) {}
178
179 FullHashInfo::FullHashInfo(const FullHashInfo& other) = default;
180
181 FullHashInfo::~FullHashInfo() {}
182
183 bool FullHashInfo::operator==(const FullHashInfo& other) const {
184 return full_hash == other.full_hash && list_id == other.list_id &&
185 positive_ttl == other.positive_ttl && metadata == other.metadata;
186 }
187
188 bool FullHashInfo::operator!=(const FullHashInfo& other) const {
189 return !operator==(other);
190 }
191
92 // V4GetHashProtocolManager implementation -------------------------------- 192 // V4GetHashProtocolManager implementation --------------------------------
93 193
94 // static 194 // static
95 V4GetHashProtocolManagerFactory* V4GetHashProtocolManager::factory_ = NULL; 195 V4GetHashProtocolManagerFactory* V4GetHashProtocolManager::factory_ = NULL;
96 196
97 // static 197 // static
98 V4GetHashProtocolManager* V4GetHashProtocolManager::Create( 198 std::unique_ptr<V4GetHashProtocolManager> V4GetHashProtocolManager::Create(
99 net::URLRequestContextGetter* request_context_getter, 199 net::URLRequestContextGetter* request_context_getter,
200 const base::hash_set<UpdateListIdentifier>& stores_to_request,
100 const V4ProtocolConfig& config) { 201 const V4ProtocolConfig& config) {
101 if (!factory_) 202 if (!factory_)
102 factory_ = new V4GetHashProtocolManagerFactoryImpl(); 203 factory_ = new V4GetHashProtocolManagerFactoryImpl();
103 return factory_->CreateProtocolManager(request_context_getter, config); 204 return factory_->CreateProtocolManager(request_context_getter,
205 stores_to_request, config);
104 } 206 }
105 207
106 void V4GetHashProtocolManager::ResetGetHashErrors() { 208 // static
107 gethash_error_count_ = 0; 209 void V4GetHashProtocolManager::RegisterFactory(
108 gethash_back_off_mult_ = 1; 210 std::unique_ptr<V4GetHashProtocolManagerFactory> factory) {
211 if (factory_)
212 delete factory_;
213 factory_ = factory.release();
109 } 214 }
110 215
111 V4GetHashProtocolManager::V4GetHashProtocolManager( 216 V4GetHashProtocolManager::V4GetHashProtocolManager(
112 net::URLRequestContextGetter* request_context_getter, 217 net::URLRequestContextGetter* request_context_getter,
218 const base::hash_set<UpdateListIdentifier>& stores_to_request,
113 const V4ProtocolConfig& config) 219 const V4ProtocolConfig& config)
114 : gethash_error_count_(0), 220 : gethash_error_count_(0),
115 gethash_back_off_mult_(1), 221 gethash_back_off_mult_(1),
116 next_gethash_time_(Time::FromDoubleT(0)), 222 next_gethash_time_(Time::FromDoubleT(0)),
117 config_(config), 223 config_(config),
118 request_context_getter_(request_context_getter), 224 request_context_getter_(request_context_getter),
119 url_fetcher_id_(0), 225 url_fetcher_id_(0),
120 clock_(new base::DefaultClock()) {} 226 clock_(new base::DefaultClock()) {
121 227 DCHECK(!stores_to_request.empty());
122 V4GetHashProtocolManager::~V4GetHashProtocolManager() { 228 for (const UpdateListIdentifier& store : stores_to_request) {
123 } 229 platform_types_.insert(store.platform_type);
124 230 threat_entry_types_.insert(store.threat_entry_type);
125 // static 231 threat_types_.insert(store.threat_type);
126 void V4GetHashProtocolManager::RegisterFactory( 232 }
127 std::unique_ptr<V4GetHashProtocolManagerFactory> factory) { 233 }
128 if (factory_) 234
129 delete factory_; 235 V4GetHashProtocolManager::~V4GetHashProtocolManager() {}
130 factory_ = factory.release(); 236
237 void V4GetHashProtocolManager::ClearCache() {
238 DCHECK(CalledOnValidThread());
239 full_hash_cache_.clear();
240 }
241
242 void V4GetHashProtocolManager::GetFullHashes(
243 const FullHashToStoreAndHashPrefixesMap&
244 full_hash_to_store_and_hash_prefixes,
245 FullHashCallback callback) {
246 DCHECK(CalledOnValidThread());
247 DCHECK(!full_hash_to_store_and_hash_prefixes.empty());
248
249 std::vector<HashPrefix> prefixes_to_request;
250 std::vector<FullHashInfo> cached_full_hash_infos;
251 GetFullHashCachedResults(full_hash_to_store_and_hash_prefixes, Time::Now(),
252 &prefixes_to_request, &cached_full_hash_infos);
253
254 if (prefixes_to_request.empty()) {
255 // 100% cache hits (positive or negative) so we can call the callback right
256 // away.
257 callback.Run(cached_full_hash_infos);
258 return;
259 }
260
261 // We need to wait the minimum waiting duration, and if we are in backoff,
262 // we need to check if we're past the next allowed time. If we are, we can
263 // proceed with the request. If not, we are required to return empty results
264 // (i.e. just use the results from cache and potentially report an unsafe
265 // resource as safe).
266 if (clock_->Now() <= next_gethash_time_) {
267 if (gethash_error_count_) {
268 RecordGetHashResult(V4OperationResult::BACKOFF_ERROR);
269 } else {
270 RecordGetHashResult(V4OperationResult::MIN_WAIT_DURATION_ERROR);
271 }
272 callback.Run(cached_full_hash_infos);
273 return;
274 }
275
276 std::string req_base64 = GetHashRequest(prefixes_to_request);
277 GURL gethash_url;
278 net::HttpRequestHeaders headers;
279 GetHashUrlAndHeaders(req_base64, &gethash_url, &headers);
280
281 std::unique_ptr<net::URLFetcher> owned_fetcher = net::URLFetcher::Create(
282 url_fetcher_id_++, gethash_url, net::URLFetcher::GET, this);
283 net::URLFetcher* fetcher = owned_fetcher.get();
284 pending_hash_requests_[fetcher].reset(new FullHashCallbackInfo(
285 cached_full_hash_infos, prefixes_to_request, std::move(owned_fetcher),
286 full_hash_to_store_and_hash_prefixes, callback));
287
288 fetcher->SetExtraRequestHeaders(headers.ToString());
289 fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE);
290 fetcher->SetRequestContext(request_context_getter_.get());
291 fetcher->Start();
292 }
293
294 void V4GetHashProtocolManager::GetFullHashesWithApis(
295 const GURL& url,
296 ThreatMetadataForApiCallback api_callback) {
297 DCHECK(url.SchemeIs(url::kHttpScheme) || url.SchemeIs(url::kHttpsScheme));
298
299 base::hash_set<FullHash> full_hashes;
300 V4ProtocolManagerUtil::UrlToFullHashes(url, &full_hashes);
301
302 FullHashToStoreAndHashPrefixesMap full_hash_to_store_and_hash_prefixes;
303 for (const FullHash& full_hash : full_hashes) {
304 HashPrefix prefix;
305 bool result =
306 V4ProtocolManagerUtil::FullHashToSmallestHashPrefix(full_hash, &prefix);
307 DCHECK(result);
308 full_hash_to_store_and_hash_prefixes[full_hash].emplace_back(
309 GetChromeUrlApiId(), prefix);
310 }
311
312 GetFullHashes(full_hash_to_store_and_hash_prefixes,
313 base::Bind(&V4GetHashProtocolManager::OnFullHashForApi,
314 base::Unretained(this), api_callback, full_hashes));
315 }
316
317 void V4GetHashProtocolManager::GetFullHashCachedResults(
318 const FullHashToStoreAndHashPrefixesMap&
319 full_hash_to_store_and_hash_prefixes,
320 const Time& now,
321 std::vector<HashPrefix>* prefixes_to_request,
322 std::vector<FullHashInfo>* cached_full_hash_infos) const {
323 DCHECK(!full_hash_to_store_and_hash_prefixes.empty());
324 DCHECK(prefixes_to_request->empty());
325 DCHECK(cached_full_hash_infos->empty());
326
327 // Caching behavior is documented here:
328 // https://developers.google.com/safe-browsing/v4/caching#about-caching
329 //
330 // The cache operates as follows:
331 // Lookup:
332 // Case 1: The prefix is in the cache.
333 // Case a: The full hash is in the cache.
334 // Case i : The positive full hash result has not expired.
335 // The result is unsafe and we do not need to send a new
336 // request.
337 // Case ii: The positive full hash result has expired.
338 // We need to send a request for full hashes.
339 // Case b: The full hash is not in the cache.
340 // Case i : The negative cache entry has not expired.
341 // The result is still safe and we do not need to send a
342 // new request.
343 // Case ii: The negative cache entry has expired.
344 // We need to send a request for full hashes.
345 // Case 2: The prefix is not in the cache.
346 // We need to send a request for full hashes.
347 //
348 // Note on eviction:
349 // CachedHashPrefixInfo entries can be removed from the cache only when
350 // the negative cache expire time and the cache expire time of all full
351 // hash results for that prefix have expired.
352 // Individual full hash results can be removed from the prefix's
353 // cache entry if they expire AND their expire time is after the negative
354 // cache expire time.
355
356 base::hash_set<HashPrefix> unique_prefixes_to_request;
357 for (const auto& it : full_hash_to_store_and_hash_prefixes) {
358 const FullHash& full_hash = it.first;
359 const StoreAndHashPrefixes& matched = it.second;
360 for (const StoreAndHashPrefix& matched_it : matched) {
361 const UpdateListIdentifier& list_id = matched_it.list_id;
362 const HashPrefix& prefix = matched_it.hash_prefix;
363 const auto& prefix_entry = full_hash_cache_.find(prefix);
364 if (prefix_entry != full_hash_cache_.end()) {
365 // Case 1.
366 const CachedHashPrefixInfo& cached_prefix_info = prefix_entry->second;
367 bool found_full_hash = false;
368 for (const FullHashInfo& full_hash_info :
369 cached_prefix_info.full_hash_infos) {
370 if (full_hash_info.full_hash == full_hash &&
371 full_hash_info.list_id == list_id) {
372 // Case a.
373 found_full_hash = true;
374 bool positive_ttl_unexpired = full_hash_info.positive_ttl > now;
375 if (positive_ttl_unexpired) {
376 // Case i.
377 cached_full_hash_infos->push_back(full_hash_info);
378 RecordV4FullHashCacheResult(FULL_HASH_CACHE_HIT);
379 } else {
380 // Case ii.
381 unique_prefixes_to_request.insert(prefix);
382 RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);
383 }
384 break;
385 }
386 }
387
388 if (!found_full_hash) {
389 // Case b.
390 if (cached_prefix_info.negative_ttl > now) {
391 // Case i.
392 RecordV4FullHashCacheResult(FULL_HASH_NEGATIVE_CACHE_HIT);
393 } else {
394 // Case ii.
395 unique_prefixes_to_request.insert(prefix);
396 RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);
397 }
398 }
399 } else {
400 // Case 2.
401 unique_prefixes_to_request.insert(prefix);
402 RecordV4FullHashCacheResult(FULL_HASH_CACHE_MISS);
403 }
404 }
405 }
406
407 prefixes_to_request->insert(prefixes_to_request->begin(),
408 unique_prefixes_to_request.begin(),
409 unique_prefixes_to_request.end());
131 } 410 }
132 411
133 std::string V4GetHashProtocolManager::GetHashRequest( 412 std::string V4GetHashProtocolManager::GetHashRequest(
134 const std::vector<SBPrefix>& prefixes, 413 const std::vector<HashPrefix>& prefixes_to_request) {
135 const std::vector<PlatformType>& platforms, 414 DCHECK(!prefixes_to_request.empty());
136 ThreatType threat_type) { 415
137 // Build the request. Client info and client states are not added to the
138 // request protocol buffer. Client info is passed as params in the url.
139 FindFullHashesRequest req; 416 FindFullHashesRequest req;
140 ThreatInfo* info = req.mutable_threat_info(); 417 ThreatInfo* info = req.mutable_threat_info();
141 info->add_threat_types(threat_type); 418 for (const PlatformType p : platform_types_) {
142 info->add_threat_entry_types(URL);
143 for (const PlatformType p : platforms) {
144 info->add_platform_types(p); 419 info->add_platform_types(p);
145 } 420 }
146 for (const SBPrefix& prefix : prefixes) { 421 for (const ThreatEntryType tet : threat_entry_types_) {
147 std::string hash(reinterpret_cast<const char*>(&prefix), sizeof(SBPrefix)); 422 info->add_threat_entry_types(tet);
148 info->add_threat_entries()->set_hash(hash); 423 }
424 for (const ThreatType tt : threat_types_) {
425 info->add_threat_types(tt);
426 }
427 for (const HashPrefix& prefix : prefixes_to_request) {
428 info->add_threat_entries()->set_hash(prefix);
149 } 429 }
150 430
151 // Serialize and Base64 encode. 431 // Serialize and Base64 encode.
152 std::string req_data, req_base64; 432 std::string req_data, req_base64;
153 req.SerializeToString(&req_data); 433 req.SerializeToString(&req_data);
154 base::Base64UrlEncode(req_data, base::Base64UrlEncodePolicy::INCLUDE_PADDING, 434 base::Base64UrlEncode(req_data, base::Base64UrlEncodePolicy::INCLUDE_PADDING,
155 &req_base64); 435 &req_base64);
156 return req_base64; 436 return req_base64;
157 } 437 }
158 438
439 void V4GetHashProtocolManager::GetHashUrlAndHeaders(
440 const std::string& req_base64,
441 GURL* gurl,
442 net::HttpRequestHeaders* headers) const {
443 V4ProtocolManagerUtil::GetRequestUrlAndHeaders(req_base64, "fullHashes:find",
444 config_, gurl, headers);
445 }
446
447 void V4GetHashProtocolManager::HandleGetHashError(const Time& now) {
448 DCHECK(CalledOnValidThread());
449 TimeDelta next = V4ProtocolManagerUtil::GetNextBackOffInterval(
450 &gethash_error_count_, &gethash_back_off_mult_);
451 next_gethash_time_ = now + next;
452 }
453
454 void V4GetHashProtocolManager::OnFullHashForApi(
455 const ThreatMetadataForApiCallback& api_callback,
456 const base::hash_set<FullHash>& full_hashes,
457 const std::vector<FullHashInfo>& full_hash_infos) {
458 ThreatMetadata md;
459 for (const FullHashInfo& full_hash_info : full_hash_infos) {
460 DCHECK_EQ(GetChromeUrlApiId(), full_hash_info.list_id);
461 DCHECK(full_hashes.find(full_hash_info.full_hash) != full_hashes.end());
462 md.api_permissions.insert(full_hash_info.metadata.api_permissions.begin(),
463 full_hash_info.metadata.api_permissions.end());
464 }
465
466 // TODO(vakh): Figure out what UMA metrics to report. This code was previously
467 // calling RecordV4GetHashCheckResult with appropriate values but that's not
468 // applicable anymore.
469 api_callback.Run(md);
470 }
471
159 bool V4GetHashProtocolManager::ParseHashResponse( 472 bool V4GetHashProtocolManager::ParseHashResponse(
160 const std::string& data, 473 const std::string& response_data,
161 std::vector<SBFullHashResult>* full_hashes, 474 std::vector<FullHashInfo>* full_hash_infos,
162 base::Time* negative_cache_expire) { 475 Time* negative_cache_expire) {
163 FindFullHashesResponse response; 476 FindFullHashesResponse response;
164 477
165 if (!response.ParseFromString(data)) { 478 if (!response.ParseFromString(response_data)) {
166 RecordParseGetHashResult(PARSE_FROM_STRING_ERROR); 479 RecordParseGetHashResult(PARSE_FROM_STRING_ERROR);
167 return false; 480 return false;
168 } 481 }
169 482
170 // negative_cache_duration should always be set. 483 // negative_cache_duration should always be set.
171 DCHECK(response.has_negative_cache_duration()); 484 DCHECK(response.has_negative_cache_duration());
485
172 // Seconds resolution is good enough so we ignore the nanos field. 486 // Seconds resolution is good enough so we ignore the nanos field.
173 *negative_cache_expire = 487 *negative_cache_expire =
174 clock_->Now() + base::TimeDelta::FromSeconds( 488 clock_->Now() +
175 response.negative_cache_duration().seconds()); 489 TimeDelta::FromSeconds(response.negative_cache_duration().seconds());
176 490
177 if (response.has_minimum_wait_duration()) { 491 if (response.has_minimum_wait_duration()) {
178 // Seconds resolution is good enough so we ignore the nanos field. 492 // Seconds resolution is good enough so we ignore the nanos field.
179 next_gethash_time_ = 493 next_gethash_time_ =
180 clock_->Now() + base::TimeDelta::FromSeconds( 494 clock_->Now() +
181 response.minimum_wait_duration().seconds()); 495 TimeDelta::FromSeconds(response.minimum_wait_duration().seconds());
182 } 496 }
183 497
184 // We only expect one threat type per request, so we make sure
185 // the threat types are consistent between matches.
186 ThreatType expected_threat_type = THREAT_TYPE_UNSPECIFIED;
187
188 // Loop over the threat matches and fill in full_hashes.
189 for (const ThreatMatch& match : response.matches()) { 498 for (const ThreatMatch& match : response.matches()) {
190 // Make sure the platform and threat entry type match. 499 if (!match.has_platform_type()) {
191 if (!(match.has_threat_entry_type() && match.threat_entry_type() == URL && 500 RecordParseGetHashResult(UNEXPECTED_PLATFORM_TYPE_ERROR);
192 match.has_threat())) { 501 return false;
502 }
503 if (!match.has_threat_entry_type()) {
193 RecordParseGetHashResult(UNEXPECTED_THREAT_ENTRY_TYPE_ERROR); 504 RecordParseGetHashResult(UNEXPECTED_THREAT_ENTRY_TYPE_ERROR);
194 return false; 505 return false;
195 } 506 }
196
197 if (!match.has_threat_type()) { 507 if (!match.has_threat_type()) {
198 RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR); 508 RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR);
199 return false; 509 return false;
200 } 510 }
201 511 if (!match.has_threat()) {
202 if (expected_threat_type == THREAT_TYPE_UNSPECIFIED) { 512 RecordParseGetHashResult(NO_THREAT_ERROR);
203 expected_threat_type = match.threat_type(); 513 return false;
204 } else if (match.threat_type() != expected_threat_type) { 514 }
205 RecordParseGetHashResult(INCONSISTENT_THREAT_TYPE_ERROR); 515
206 return false; 516 UpdateListIdentifier list_id(
207 } 517 match.platform_type(), match.threat_entry_type(), match.threat_type());
208 518 base::Time positive_ttl;
209 // Fill in the full hash.
210 SBFullHashResult result;
211 result.hash = StringToSBFullHash(match.threat().hash());
212
213 if (match.has_cache_duration()) { 519 if (match.has_cache_duration()) {
214 // Seconds resolution is good enough so we ignore the nanos field. 520 // Seconds resolution is good enough so we ignore the nanos field.
215 result.cache_expire_after = 521 positive_ttl = clock_->Now() +
216 clock_->Now() + 522 TimeDelta::FromSeconds(match.cache_duration().seconds());
217 base::TimeDelta::FromSeconds(match.cache_duration().seconds());
218 } else { 523 } else {
219 result.cache_expire_after = clock_->Now(); 524 positive_ttl = clock_->Now();
Scott Hess - ex-Googler 2016/09/12 23:31:35 Is this meant to be a placeholder which will not m
vakh (use Gerrit instead) 2016/09/13 00:12:07 Good idea. Done.
220 } 525 }
221 526 FullHashInfo full_hash_info(match.threat().hash(), list_id, positive_ttl);
222 // Different threat types will handle the metadata differently. 527 if (!ParseMetadata(match, &full_hash_info.metadata)) {
223 if (match.threat_type() == API_ABUSE) { 528 return false;
224 if (match.has_platform_type() && 529 }
225 match.platform_type() == CHROME_PLATFORM) { 530
226 if (match.has_threat_entry_metadata()) { 531 full_hash_infos->push_back(full_hash_info);
227 // For API Abuse, store a list of the returned permissions. 532 }
228 for (const ThreatEntryMetadata::MetadataEntry& m : 533 return true;
229 match.threat_entry_metadata().entries()) { 534 }
230 if (m.key() == "permission") { 535
231 result.metadata.api_permissions.insert(m.value()); 536 bool V4GetHashProtocolManager::ParseMetadata(const ThreatMatch& match,
232 } else { 537 ThreatMetadata* metadata) {
Scott Hess - ex-Googler 2016/09/12 23:31:34 Probably the various string constants in here shou
vakh (use Gerrit instead) 2016/09/13 00:12:07 Done.
233 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR); 538 // Different threat types will handle the metadata differently.
234 return false; 539 if (match.threat_type() == API_ABUSE) {
235 } 540 if (!match.has_platform_type() ||
236 } 541 match.platform_type() != CHROME_PLATFORM) {
542 RecordParseGetHashResult(UNEXPECTED_PLATFORM_TYPE_ERROR);
543 return false;
544 }
545
546 if (!match.has_threat_entry_metadata()) {
547 RecordParseGetHashResult(NO_METADATA_ERROR);
548 return false;
549 }
550 // For API Abuse, store a list of the returned permissions.
551 for (const ThreatEntryMetadata::MetadataEntry& m :
552 match.threat_entry_metadata().entries()) {
553 if (m.key() != "permission") {
554 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR);
555 return false;
556 }
557 metadata->api_permissions.insert(m.value());
558 }
559 } else if (match.threat_type() == MALWARE_THREAT ||
560 match.threat_type() == POTENTIALLY_HARMFUL_APPLICATION) {
561 for (const ThreatEntryMetadata::MetadataEntry& m :
562 match.threat_entry_metadata().entries()) {
563 // TODO: Need to confirm the below key/value pairs with CSD backend.
564 if (m.key() == "pha_pattern_type" || m.key() == "malware_pattern_type") {
565 if (m.value() == "LANDING") {
566 metadata->threat_pattern_type = ThreatPatternType::MALWARE_LANDING;
567 break;
568 } else if (m.value() == "DISTRIBUTION") {
569 metadata->threat_pattern_type =
570 ThreatPatternType::MALWARE_DISTRIBUTION;
571 break;
237 } else { 572 } else {
238 RecordParseGetHashResult(NO_METADATA_ERROR); 573 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR);
239 return false; 574 return false;
240 } 575 }
241 } else { 576 }
242 RecordParseGetHashResult(UNEXPECTED_PLATFORM_TYPE_ERROR); 577 }
243 return false; 578 } else if (match.threat_type() == SOCIAL_ENGINEERING_PUBLIC) {
244 } 579 for (const ThreatEntryMetadata::MetadataEntry& m :
245 } else if (match.threat_type() == MALWARE_THREAT || 580 match.threat_entry_metadata().entries()) {
246 match.threat_type() == POTENTIALLY_HARMFUL_APPLICATION) { 581 if (m.key() == "se_pattern_type") {
247 for (const ThreatEntryMetadata::MetadataEntry& m : 582 if (m.value() == "SOCIAL_ENGINEERING_ADS") {
248 match.threat_entry_metadata().entries()) { 583 metadata->threat_pattern_type =
249 // TODO: Need to confirm the below key/value pairs with CSD backend. 584 ThreatPatternType::SOCIAL_ENGINEERING_ADS;
250 if (m.key() == "pha_pattern_type" || 585 break;
251 m.key() == "malware_pattern_type") { 586 } else if (m.value() == "SOCIAL_ENGINEERING_LANDING") {
252 if (m.value() == "LANDING") { 587 metadata->threat_pattern_type =
253 result.metadata.threat_pattern_type = 588 ThreatPatternType::SOCIAL_ENGINEERING_LANDING;
254 ThreatPatternType::MALWARE_LANDING; 589 break;
255 break; 590 } else if (m.value() == "PHISHING") {
256 } else if (m.value() == "DISTRIBUTION") { 591 metadata->threat_pattern_type = ThreatPatternType::PHISHING;
257 result.metadata.threat_pattern_type = 592 break;
258 ThreatPatternType::MALWARE_DISTRIBUTION; 593 } else {
259 break; 594 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR);
260 } else { 595 return false;
261 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR);
262 return false;
263 }
264 } 596 }
265 } 597 }
266 } else if (match.threat_type() == SOCIAL_ENGINEERING_PUBLIC) { 598 }
267 for (const ThreatEntryMetadata::MetadataEntry& m : 599 } else {
268 match.threat_entry_metadata().entries()) { 600 RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR);
269 if (m.key() == "se_pattern_type") { 601 return false;
270 if (m.value() == "SOCIAL_ENGINEERING_ADS") { 602 }
271 result.metadata.threat_pattern_type = 603
272 ThreatPatternType::SOCIAL_ENGINEERING_ADS;
273 break;
274 } else if (m.value() == "SOCIAL_ENGINEERING_LANDING") {
275 result.metadata.threat_pattern_type =
276 ThreatPatternType::SOCIAL_ENGINEERING_LANDING;
277 break;
278 } else if (m.value() == "PHISHING") {
279 result.metadata.threat_pattern_type = ThreatPatternType::PHISHING;
280 break;
281 } else {
282 RecordParseGetHashResult(UNEXPECTED_METADATA_VALUE_ERROR);
283 return false;
284 }
285 }
286 }
287 } else {
288 RecordParseGetHashResult(UNEXPECTED_THREAT_TYPE_ERROR);
289 return false;
290 }
291
292 full_hashes->push_back(result);
293 }
294 return true; 604 return true;
295 } 605 }
296 606
297 void V4GetHashProtocolManager::GetFullHashes( 607 void V4GetHashProtocolManager::ResetGetHashErrors() {
298 const std::vector<SBPrefix>& prefixes, 608 gethash_error_count_ = 0;
299 const std::vector<PlatformType>& platforms, 609 gethash_back_off_mult_ = 1;
300 ThreatType threat_type,
301 FullHashCallback callback) {
302 DCHECK(CalledOnValidThread());
303 // We need to wait the minimum waiting duration, and if we are in backoff,
304 // we need to check if we're past the next allowed time. If we are, we can
305 // proceed with the request. If not, we are required to return empty results
306 // (i.e. treat the page as safe).
307 if (clock_->Now() <= next_gethash_time_) {
308 if (gethash_error_count_) {
309 RecordGetHashResult(V4OperationResult::BACKOFF_ERROR);
310 } else {
311 RecordGetHashResult(V4OperationResult::MIN_WAIT_DURATION_ERROR);
312 }
313 std::vector<SBFullHashResult> full_hashes;
314 callback.Run(full_hashes, base::Time());
315 return;
316 }
317
318 std::string req_base64 = GetHashRequest(prefixes, platforms, threat_type);
319 GURL gethash_url;
320 net::HttpRequestHeaders headers;
321 GetHashUrlAndHeaders(req_base64, &gethash_url, &headers);
322
323 std::unique_ptr<net::URLFetcher> owned_fetcher = net::URLFetcher::Create(
324 url_fetcher_id_++, gethash_url, net::URLFetcher::GET, this);
325 net::URLFetcher* fetcher = owned_fetcher.get();
326 fetcher->SetExtraRequestHeaders(headers.ToString());
327 hash_requests_[fetcher] = std::make_pair(std::move(owned_fetcher), callback);
328
329 fetcher->SetLoadFlags(net::LOAD_DISABLE_CACHE);
330 fetcher->SetRequestContext(request_context_getter_.get());
331 fetcher->Start();
332 }
333
334 void V4GetHashProtocolManager::GetFullHashesWithApis(
335 const std::vector<SBPrefix>& prefixes,
336 FullHashCallback callback) {
337 std::vector<PlatformType> platform = {CHROME_PLATFORM};
338 GetFullHashes(prefixes, platform, API_ABUSE, callback);
339 } 610 }
340 611
341 void V4GetHashProtocolManager::SetClockForTests( 612 void V4GetHashProtocolManager::SetClockForTests(
342 std::unique_ptr<base::Clock> clock) { 613 std::unique_ptr<base::Clock> clock) {
343 clock_ = std::move(clock); 614 clock_ = std::move(clock);
344 } 615 }
345 616
617 void V4GetHashProtocolManager::UpdateCache(
618 const std::vector<HashPrefix>& prefixes_requested,
619 const std::vector<FullHashInfo>& full_hash_infos,
620 const Time& negative_cache_expire) {
621 // If negative_cache_expire is null, don't cache the results since it's not
622 // clear till what time they should be considered valid.
623 if (negative_cache_expire.is_null()) {
624 return;
625 }
626
627 for (const HashPrefix& prefix : prefixes_requested) {
628 // Create or reset the cached result for this prefix.
629 CachedHashPrefixInfo& chpi = full_hash_cache_[prefix];
630 chpi.full_hash_infos.clear();
631 chpi.negative_ttl = negative_cache_expire;
632
633 for (const FullHashInfo& full_hash_info : full_hash_infos) {
634 if (V4ProtocolManagerUtil::FullHashMatchesHashPrefix(
635 full_hash_info.full_hash, prefix)) {
Scott Hess - ex-Googler 2016/09/12 23:31:35 Is this iterating the vector of prefixes, then ite
vakh (use Gerrit instead) 2016/09/13 00:12:07 The hash prefixes can be >32 bits now so that won'
Scott Hess - ex-Googler 2016/09/13 00:31:34 Mostly what I mean is that right now much of this
vakh (use Gerrit instead) 2016/09/13 01:18:56 Yes, I share that concern. But, as you said, this
636 chpi.full_hash_infos.push_back(full_hash_info);
637 }
638 }
639 }
640 }
641
642 void V4GetHashProtocolManager::MergeResults(
643 const FullHashToStoreAndHashPrefixesMap&
644 full_hash_to_store_and_hash_prefixes,
645 const std::vector<FullHashInfo>& full_hash_infos,
646 std::vector<FullHashInfo>* merged_full_hash_infos) {
647 bool get_hash_hit = false;
648 for (const FullHashInfo& fhi : full_hash_infos) {
649 const auto& it = full_hash_to_store_and_hash_prefixes.find(fhi.full_hash);
650 bool matched_full_hash = full_hash_to_store_and_hash_prefixes.end() != it;
Scott Hess - ex-Googler 2016/09/12 23:31:35 I dunno, feels strained versus just inlining the !
vakh (use Gerrit instead) 2016/09/13 00:12:07 Done.
651 if (matched_full_hash) {
652 for (const StoreAndHashPrefix& sahp : it->second) {
653 if (fhi.list_id == sahp.list_id) {
654 merged_full_hash_infos->push_back(fhi);
655 get_hash_hit = true;
656 break;
657 }
658 }
659 }
660 }
661
662 if (get_hash_hit) {
663 RecordV4GetHashCheckResult(GET_HASH_CHECK_HIT);
664 } else if (full_hash_infos.empty()) {
665 RecordV4GetHashCheckResult(GET_HASH_CHECK_EMPTY);
666 } else {
667 RecordV4GetHashCheckResult(GET_HASH_CHECK_MISS);
668 }
669 }
670
346 // net::URLFetcherDelegate implementation ---------------------------------- 671 // net::URLFetcherDelegate implementation ----------------------------------
347 672
348 // SafeBrowsing request responses are handled here. 673 // SafeBrowsing request responses are handled here.
349 void V4GetHashProtocolManager::OnURLFetchComplete( 674 void V4GetHashProtocolManager::OnURLFetchComplete(
350 const net::URLFetcher* source) { 675 const net::URLFetcher* source) {
351 DCHECK(CalledOnValidThread()); 676 DCHECK(CalledOnValidThread());
352 677
353 HashRequests::iterator it = hash_requests_.find(source); 678 PendingHashRequests::iterator it = pending_hash_requests_.find(source);
354 DCHECK(it != hash_requests_.end()) << "Request not found"; 679 DCHECK(it != pending_hash_requests_.end()) << "Request not found";
355 680
356 int response_code = source->GetResponseCode(); 681 int response_code = source->GetResponseCode();
357 net::URLRequestStatus status = source->GetStatus(); 682 net::URLRequestStatus status = source->GetStatus();
358 V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode( 683 V4ProtocolManagerUtil::RecordHttpResponseOrErrorCode(
359 kUmaV4HashResponseMetricName, status, response_code); 684 kUmaV4HashResponseMetricName, status, response_code);
360 685
361 const FullHashCallback& callback = it->second.second; 686 std::vector<FullHashInfo> full_hash_infos;
362 std::vector<SBFullHashResult> full_hashes; 687 Time negative_cache_expire;
363 base::Time negative_cache_expire;
364 if (status.is_success() && response_code == net::HTTP_OK) { 688 if (status.is_success() && response_code == net::HTTP_OK) {
365 RecordGetHashResult(V4OperationResult::STATUS_200); 689 RecordGetHashResult(V4OperationResult::STATUS_200);
366 ResetGetHashErrors(); 690 ResetGetHashErrors();
367 std::string data; 691 std::string data;
368 source->GetResponseAsString(&data); 692 source->GetResponseAsString(&data);
369 if (!ParseHashResponse(data, &full_hashes, &negative_cache_expire)) { 693 if (!ParseHashResponse(data, &full_hash_infos, &negative_cache_expire)) {
370 full_hashes.clear(); 694 full_hash_infos.clear();
371 RecordGetHashResult(V4OperationResult::PARSE_ERROR); 695 RecordGetHashResult(V4OperationResult::PARSE_ERROR);
372 } 696 }
373 } else { 697 } else {
374 HandleGetHashError(clock_->Now()); 698 HandleGetHashError(clock_->Now());
375 699
376 DVLOG(1) << "SafeBrowsing GetEncodedFullHashes request for: " 700 DVLOG(1) << "SafeBrowsing GetEncodedFullHashes request for: "
377 << source->GetURL() << " failed with error: " << status.error() 701 << source->GetURL() << " failed with error: " << status.error()
378 << " and response code: " << response_code; 702 << " and response code: " << response_code;
379 703
380 if (status.status() == net::URLRequestStatus::FAILED) { 704 if (status.status() == net::URLRequestStatus::FAILED) {
381 RecordGetHashResult(V4OperationResult::NETWORK_ERROR); 705 RecordGetHashResult(V4OperationResult::NETWORK_ERROR);
382 } else { 706 } else {
383 RecordGetHashResult(V4OperationResult::HTTP_ERROR); 707 RecordGetHashResult(V4OperationResult::HTTP_ERROR);
384 } 708 }
385 } 709 }
386 710
387 // Invoke the callback with full_hashes, even if there was a parse error or 711 const std::unique_ptr<FullHashCallbackInfo>& fhci = it->second;
388 // an error response code (in which case full_hashes will be empty). The 712 UpdateCache(fhci->prefixes_requested, full_hash_infos, negative_cache_expire);
389 // caller can't be blocked indefinitely. 713 MergeResults(fhci->full_hash_to_store_and_hash_prefixes, full_hash_infos,
390 callback.Run(full_hashes, negative_cache_expire); 714 &fhci->cached_full_hash_infos);
391 715
392 hash_requests_.erase(it); 716 fhci->callback.Run(fhci->cached_full_hash_infos);
717
718 pending_hash_requests_.erase(it);
393 } 719 }
394 720
395 void V4GetHashProtocolManager::HandleGetHashError(const Time& now) { 721 #ifndef DEBUG
396 DCHECK(CalledOnValidThread()); 722 std::ostream& operator<<(std::ostream& os, const FullHashInfo& fhi) {
397 base::TimeDelta next = V4ProtocolManagerUtil::GetNextBackOffInterval( 723 os << "{full_hash: " << fhi.full_hash << "; list_id: " << fhi.list_id
398 &gethash_error_count_, &gethash_back_off_mult_); 724 << "; positive_ttl: " << fhi.positive_ttl
399 next_gethash_time_ = now + next; 725 << "; metadata.api_permissions.size(): "
726 << fhi.metadata.api_permissions.size() << "}";
727 return os;
400 } 728 }
401 729 #endif
402 void V4GetHashProtocolManager::GetHashUrlAndHeaders(
403 const std::string& req_base64,
404 GURL* gurl,
405 net::HttpRequestHeaders* headers) const {
406 V4ProtocolManagerUtil::GetRequestUrlAndHeaders(req_base64, "fullHashes:find",
407 config_, gurl, headers);
408 }
409 730
410 } // namespace safe_browsing 731 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698