Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2384893002: PVer4: Test checksum on startup outside the hotpath of DB load (Closed)
Patch Set: Minor: Formatting Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/base64.h" 5 #include "base/base64.h"
6 #include "base/bind.h" 6 #include "base/bind.h"
7 #include "base/files/file_util.h" 7 #include "base/files/file_util.h"
8 #include "base/memory/ptr_util.h" 8 #include "base/memory/ptr_util.h"
9 #include "base/metrics/histogram_macros.h" 9 #include "base/metrics/histogram_macros.h"
10 #include "base/metrics/sparse_histogram.h" 10 #include "base/metrics/sparse_histogram.h"
(...skipping 16 matching lines...) Expand all
27 const uint32_t kFileVersion = 9; 27 const uint32_t kFileVersion = 9;
28 28
29 std::string GetUmaSuffixForStore(const base::FilePath& file_path) { 29 std::string GetUmaSuffixForStore(const base::FilePath& file_path) {
30 return base::StringPrintf( 30 return base::StringPrintf(
31 ".%" PRIsFP, file_path.BaseName().RemoveExtension().value().c_str()); 31 ".%" PRIsFP, file_path.BaseName().RemoveExtension().value().c_str());
32 } 32 }
33 33
34 void RecordTimeWithAndWithoutStore(const std::string& metric, 34 void RecordTimeWithAndWithoutStore(const std::string& metric,
35 base::TimeDelta time, 35 base::TimeDelta time,
36 const base::FilePath& file_path) { 36 const base::FilePath& file_path) {
37 std::string suffix = GetUmaSuffixForStore(file_path);
38
39 // The histograms below are a modified expansion of the 37 // The histograms below are a modified expansion of the
40 // UMA_HISTOGRAM_LONG_TIMES macro adapted to allow for a dynamically suffixed 38 // UMA_HISTOGRAM_LONG_TIMES macro adapted to allow for a dynamically suffixed
41 // histogram name. 39 // histogram name.
42 // Note: The factory creates and owns the histogram. 40 // Note: The factory creates and owns the histogram.
43 const int kBucketCount = 100; 41 const int kBucketCount = 100;
44 base::HistogramBase* histogram = base::Histogram::FactoryTimeGet( 42 base::HistogramBase* histogram = base::Histogram::FactoryTimeGet(
45 metric, base::TimeDelta::FromMilliseconds(1), 43 metric, base::TimeDelta::FromMilliseconds(1),
46 base::TimeDelta::FromMinutes(1), kBucketCount, 44 base::TimeDelta::FromMinutes(1), kBucketCount,
47 base::HistogramBase::kUmaTargetedHistogramFlag); 45 base::HistogramBase::kUmaTargetedHistogramFlag);
48 if (histogram) { 46 if (histogram) {
49 histogram->AddTime(time); 47 histogram->AddTime(time);
50 } 48 }
51 49
50 std::string suffix = GetUmaSuffixForStore(file_path);
52 base::HistogramBase* histogram_suffix = base::Histogram::FactoryTimeGet( 51 base::HistogramBase* histogram_suffix = base::Histogram::FactoryTimeGet(
53 metric + suffix, base::TimeDelta::FromMilliseconds(1), 52 metric + suffix, base::TimeDelta::FromMilliseconds(1),
54 base::TimeDelta::FromMinutes(1), kBucketCount, 53 base::TimeDelta::FromMinutes(1), kBucketCount,
55 base::HistogramBase::kUmaTargetedHistogramFlag); 54 base::HistogramBase::kUmaTargetedHistogramFlag);
56 if (histogram_suffix) { 55 if (histogram_suffix) {
57 histogram_suffix->AddTime(time); 56 histogram_suffix->AddTime(time);
58 } 57 }
59 } 58 }
60 59
61 void RecordAddUnlumpedHashesTime(base::TimeDelta time) { 60 void RecordAddUnlumpedHashesTime(base::TimeDelta time) {
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
169 V4Store::~V4Store() {} 168 V4Store::~V4Store() {}
170 169
171 std::string V4Store::DebugString() const { 170 std::string V4Store::DebugString() const {
172 std::string state_base64; 171 std::string state_base64;
173 base::Base64Encode(state_, &state_base64); 172 base::Base64Encode(state_, &state_base64);
174 173
175 return base::StringPrintf("path: %" PRIsFP "; state: %s", 174 return base::StringPrintf("path: %" PRIsFP "; state: %s",
176 store_path_.value().c_str(), state_base64.c_str()); 175 store_path_.value().c_str(), state_base64.c_str());
177 } 176 }
178 177
179 bool V4Store::Reset() { 178 void V4Store::Reset() {
180 // TODO(vakh): Implement skeleton. 179 expected_checksum_.clear();
180 hash_prefix_map_.clear();
181 state_ = ""; 181 state_ = "";
182 return true;
183 } 182 }
184 183
185 ApplyUpdateResult V4Store::ProcessPartialUpdateAndWriteToDisk( 184 ApplyUpdateResult V4Store::ProcessPartialUpdateAndWriteToDisk(
186 const HashPrefixMap& hash_prefix_map_old, 185 const HashPrefixMap& hash_prefix_map_old,
187 std::unique_ptr<ListUpdateResponse> response) { 186 std::unique_ptr<ListUpdateResponse> response) {
188 DCHECK(response->has_response_type()); 187 DCHECK(response->has_response_type());
189 DCHECK_EQ(ListUpdateResponse::PARTIAL_UPDATE, response->response_type()); 188 DCHECK_EQ(ListUpdateResponse::PARTIAL_UPDATE, response->response_type());
190 189
191 TimeTicks before = TimeTicks::Now(); 190 TimeTicks before = TimeTicks::Now();
192 ApplyUpdateResult result = ProcessUpdate(hash_prefix_map_old, response); 191 ApplyUpdateResult result = ProcessUpdate(hash_prefix_map_old, response);
193 if (result == APPLY_UPDATE_SUCCESS) { 192 if (result == APPLY_UPDATE_SUCCESS) {
194 RecordProcessPartialUpdateTime(TimeTicks::Now() - before, store_path_); 193 RecordProcessPartialUpdateTime(TimeTicks::Now() - before, store_path_);
195 // TODO(vakh): Create a ListUpdateResponse containing RICE encoded 194 // TODO(vakh): Create a ListUpdateResponse containing RICE encoded
196 // hash prefixes and response_type as FULL_UPDATE, and write that to disk. 195 // hash prefixes and response_type as FULL_UPDATE, and write that to disk.
197 } 196 }
198 return result; 197 return result;
199 } 198 }
200 199
201 ApplyUpdateResult V4Store::ProcessFullUpdateAndWriteToDisk( 200 ApplyUpdateResult V4Store::ProcessFullUpdateAndWriteToDisk(
202 std::unique_ptr<ListUpdateResponse> response) { 201 std::unique_ptr<ListUpdateResponse> response) {
203 TimeTicks before = TimeTicks::Now(); 202 TimeTicks before = TimeTicks::Now();
204 ApplyUpdateResult result = ProcessFullUpdate(response); 203 ApplyUpdateResult result = ProcessFullUpdate(response);
205 if (result == APPLY_UPDATE_SUCCESS) { 204 if (result == APPLY_UPDATE_SUCCESS) {
205 RecordProcessFullUpdateTime(TimeTicks::Now() - before, store_path_);
206 RecordStoreWriteResult(WriteToDisk(std::move(response))); 206 RecordStoreWriteResult(WriteToDisk(std::move(response)));
207 RecordProcessFullUpdateTime(TimeTicks::Now() - before, store_path_);
208 } 207 }
209 return result; 208 return result;
210 } 209 }
211 210
212 ApplyUpdateResult V4Store::ProcessFullUpdate( 211 ApplyUpdateResult V4Store::ProcessFullUpdate(
213 const std::unique_ptr<ListUpdateResponse>& response) { 212 const std::unique_ptr<ListUpdateResponse>& response) {
214 DCHECK(response->has_response_type()); 213 DCHECK(response->has_response_type());
215 DCHECK_EQ(ListUpdateResponse::FULL_UPDATE, response->response_type()); 214 DCHECK_EQ(ListUpdateResponse::FULL_UPDATE, response->response_type());
216 // TODO(vakh): For a full update, we don't need to process the update in 215 // TODO(vakh): For a full update, we don't need to process the update in
217 // lexographical order to store it, but we do need to do that for calculating 216 // lexographical order to store it, but we do need to do that for calculating
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
305 } else { 304 } else {
306 DVLOG(1) << "Failure: ApplyUpdate: reason: " << apply_update_result 305 DVLOG(1) << "Failure: ApplyUpdate: reason: " << apply_update_result
307 << "; store: " << *this; 306 << "; store: " << *this;
308 // new_store failed updating. Pass a nullptr to the callback. 307 // new_store failed updating. Pass a nullptr to the callback.
309 callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr)); 308 callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr));
310 } 309 }
311 310
312 RecordApplyUpdateResult(apply_update_result); 311 RecordApplyUpdateResult(apply_update_result);
313 } 312 }
314 313
315 // static
Scott Hess - ex-Googler 2016/10/06 23:04:10 AFAICT, the only reason it can't be static is stor
vakh (use Gerrit instead) 2016/10/07 00:39:45 It was already non-static but the comment hadn't b
Scott Hess - ex-Googler 2016/10/07 13:31:09 Sure.
316 ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions( 314 ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions(
317 const RepeatedPtrField<ThreatEntrySet>& additions, 315 const RepeatedPtrField<ThreatEntrySet>& additions,
318 HashPrefixMap* additions_map) { 316 HashPrefixMap* additions_map) {
319 for (const auto& addition : additions) { 317 for (const auto& addition : additions) {
320 ApplyUpdateResult apply_update_result = APPLY_UPDATE_SUCCESS; 318 ApplyUpdateResult apply_update_result = APPLY_UPDATE_SUCCESS;
321 const CompressionType compression_type = addition.compression_type(); 319 const CompressionType compression_type = addition.compression_type();
322 if (compression_type == RAW) { 320 if (compression_type == RAW) {
323 DCHECK(addition.has_raw_hashes()); 321 DCHECK(addition.has_raw_hashes());
324 DCHECK(addition.raw_hashes().has_raw_hashes()); 322 DCHECK(addition.raw_hashes().has_raw_hashes());
325 323
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
443 441
444 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity + 442 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
445 prefix_length_to_add); 443 prefix_length_to_add);
446 } 444 }
447 } 445 }
448 446
449 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map, 447 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
450 const HashPrefixMap& additions_map, 448 const HashPrefixMap& additions_map,
451 const RepeatedField<int32>* raw_removals, 449 const RepeatedField<int32>* raw_removals,
452 const std::string& expected_checksum) { 450 const std::string& expected_checksum) {
451 DCHECK(task_runner_->RunsTasksOnCurrentThread());
453 DCHECK(hash_prefix_map_.empty()); 452 DCHECK(hash_prefix_map_.empty());
453
454 bool calculate_checksum = !expected_checksum.empty();
455 if (old_prefixes_map.empty()) {
456 // If the old map is empty, which it is at startup, then just copy over the
457 // additions map.
458 DCHECK(!raw_removals);
459 hash_prefix_map_ = additions_map;
460
461 if (calculate_checksum) {
462 // Calculate the checksum asynchronously later and if it doesn't match,
463 // reset the store.
464 expected_checksum_ = expected_checksum;
465 }
466
467 return APPLY_UPDATE_SUCCESS;
468 }
469
454 hash_prefix_map_.clear(); 470 hash_prefix_map_.clear();
455 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_); 471 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
456 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_); 472 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);
457 473
458 IteratorMap old_iterator_map; 474 IteratorMap old_iterator_map;
459 HashPrefix next_smallest_prefix_old; 475 HashPrefix next_smallest_prefix_old;
460 InitializeIteratorMap(old_prefixes_map, &old_iterator_map); 476 InitializeIteratorMap(old_prefixes_map, &old_iterator_map);
461 bool old_has_unmerged = GetNextSmallestUnmergedPrefix( 477 bool old_has_unmerged = GetNextSmallestUnmergedPrefix(
462 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old); 478 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);
463 479
464 IteratorMap additions_iterator_map; 480 IteratorMap additions_iterator_map;
465 HashPrefix next_smallest_prefix_additions; 481 HashPrefix next_smallest_prefix_additions;
466 InitializeIteratorMap(additions_map, &additions_iterator_map); 482 InitializeIteratorMap(additions_map, &additions_iterator_map);
467 bool additions_has_unmerged = GetNextSmallestUnmergedPrefix( 483 bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(
468 additions_map, additions_iterator_map, &next_smallest_prefix_additions); 484 additions_map, additions_iterator_map, &next_smallest_prefix_additions);
469 485
470 // Classical merge sort. 486 // Classical merge sort.
471 // The two constructs to merge are maps: old_prefixes_map, additions_map. 487 // The two constructs to merge are maps: old_prefixes_map, additions_map.
472 // At least one of the maps still has elements that need to be merged into the 488 // At least one of the maps still has elements that need to be merged into the
473 // new store. 489 // new store.
474 490
475 bool calculate_checksum = !expected_checksum.empty();
476 std::unique_ptr<crypto::SecureHash> checksum_ctx( 491 std::unique_ptr<crypto::SecureHash> checksum_ctx(
477 crypto::SecureHash::Create(crypto::SecureHash::SHA256)); 492 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
478 493
479 // Keep track of the number of elements picked from the old map. This is used 494 // Keep track of the number of elements picked from the old map. This is used
480 // to determine which elements to drop based on the raw_removals. Note that 495 // to determine which elements to drop based on the raw_removals. Note that
481 // picked is not the same as merged. A picked element isn't merged if its 496 // picked is not the same as merged. A picked element isn't merged if its
482 // index is on the raw_removals list. 497 // index is on the raw_removals list.
483 int total_picked_from_old = 0; 498 int total_picked_from_old = 0;
484 const int* removals_iter = raw_removals ? raw_removals->begin() : nullptr; 499 const int* removals_iter = raw_removals ? raw_removals->begin() : nullptr;
485 while (old_has_unmerged || additions_has_unmerged) { 500 while (old_has_unmerged || additions_has_unmerged) {
486 // If the same hash prefix appears in the existing store and the additions 501 // If the same hash prefix appears in the existing store and the additions
487 // list, something is clearly wrong. Discard the update. 502 // list, something is clearly wrong. Discard the update.
488 if (old_has_unmerged && additions_has_unmerged && 503 if (old_has_unmerged && additions_has_unmerged &&
489 next_smallest_prefix_old == next_smallest_prefix_additions) { 504 next_smallest_prefix_old == next_smallest_prefix_additions) {
490 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE; 505 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;
491 } 506 }
492 507
493 // Select which map to pick the next hash prefix from to keep the result in 508 // Select which map to pick the next hash prefix from to keep the result in
494 // lexographically sorted order. 509 // lexographically sorted order.
495 bool pick_from_old = 510 bool pick_from_old =
496 old_has_unmerged && 511 old_has_unmerged &&
497 (!additions_has_unmerged || 512 (!additions_has_unmerged ||
498 (next_smallest_prefix_old < next_smallest_prefix_additions)); 513 (next_smallest_prefix_old < next_smallest_prefix_additions));
499 514
500 PrefixSize next_smallest_prefix_size; 515 PrefixSize next_smallest_prefix_size;
501 if (pick_from_old) { 516 if (pick_from_old) {
502 next_smallest_prefix_size = next_smallest_prefix_old.size(); 517 next_smallest_prefix_size = next_smallest_prefix_old.size();
503 518
504 // Update the iterator map, which means that we have merged one hash 519 // Update the iterator map, which means that we have merged one hash
505 // prefix of size |next_size_for_old| from the old store. 520 // prefix of size |next_smallest_prefix_size| from the old store.
506 old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size; 521 old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;
507 522
508 if (!raw_removals || removals_iter == raw_removals->end() || 523 if (!raw_removals || removals_iter == raw_removals->end() ||
509 *removals_iter != total_picked_from_old) { 524 *removals_iter != total_picked_from_old) {
510 // Append the smallest hash to the appropriate list. 525 // Append the smallest hash to the appropriate list.
511 hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old; 526 hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;
512 527
513 if (calculate_checksum) { 528 if (calculate_checksum) {
514 checksum_ctx->Update(base::string_as_array(&next_smallest_prefix_old), 529 checksum_ctx->Update(base::string_as_array(&next_smallest_prefix_old),
515 next_smallest_prefix_size); 530 next_smallest_prefix_size);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 } 569 }
555 570
556 if (calculate_checksum) { 571 if (calculate_checksum) {
557 std::string checksum(crypto::kSHA256Length, 0); 572 std::string checksum(crypto::kSHA256Length, 0);
558 checksum_ctx->Finish(base::string_as_array(&checksum), checksum.size()); 573 checksum_ctx->Finish(base::string_as_array(&checksum), checksum.size());
559 if (checksum != expected_checksum) { 574 if (checksum != expected_checksum) {
560 std::string checksum_base64, expected_checksum_base64; 575 std::string checksum_base64, expected_checksum_base64;
561 base::Base64Encode(checksum, &checksum_base64); 576 base::Base64Encode(checksum, &checksum_base64);
562 base::Base64Encode(expected_checksum, &expected_checksum_base64); 577 base::Base64Encode(expected_checksum, &expected_checksum_base64);
563 DVLOG(1) << "Failure: Checksum mismatch: calculated: " << checksum_base64 578 DVLOG(1) << "Failure: Checksum mismatch: calculated: " << checksum_base64
564 << " expected: " << expected_checksum_base64; 579 << "; expected: " << expected_checksum_base64
580 << "; store: " << *this;
581 ;
Scott Hess - ex-Googler 2016/10/06 23:04:10 Extra semi-colon, and also DCHECK_IS_ON() wrapper
565 return CHECKSUM_MISMATCH_FAILURE; 582 return CHECKSUM_MISMATCH_FAILURE;
566 } 583 }
567 } 584 }
568 585
569 return APPLY_UPDATE_SUCCESS; 586 return APPLY_UPDATE_SUCCESS;
570 } 587 }
571 588
572 StoreReadResult V4Store::ReadFromDisk() { 589 StoreReadResult V4Store::ReadFromDisk() {
573 DCHECK(task_runner_->RunsTasksOnCurrentThread()); 590 DCHECK(task_runner_->RunsTasksOnCurrentThread());
574 591
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
683 int result = hash_prefix.compare(mid_prefix); 700 int result = hash_prefix.compare(mid_prefix);
684 if (result == 0) { 701 if (result == 0) {
685 return true; 702 return true;
686 } else if (result < 0) { 703 } else if (result < 0) {
687 return HashPrefixMatches(hash_prefix, begin, mid); 704 return HashPrefixMatches(hash_prefix, begin, mid);
688 } else { 705 } else {
689 return HashPrefixMatches(hash_prefix, mid + prefix_size, end); 706 return HashPrefixMatches(hash_prefix, mid + prefix_size, end);
690 } 707 }
691 } 708 }
692 709
710 bool V4Store::VerifyChecksum() {
711 DCHECK(task_runner_->RunsTasksOnCurrentThread());
712
713 if (expected_checksum_.empty()) {
714 // If the |expected_checksum_| is empty, the file (or hash_prefix_map_)
715 // should also be empty.
716 return hash_prefix_map_.empty();
717 }
718
719 IteratorMap iterator_map;
720 HashPrefix next_smallest_prefix;
721 InitializeIteratorMap(hash_prefix_map_, &iterator_map);
722 bool has_unmerged = GetNextSmallestUnmergedPrefix(
723 hash_prefix_map_, iterator_map, &next_smallest_prefix);
724
725 std::unique_ptr<crypto::SecureHash> checksum_ctx(
726 crypto::SecureHash::Create(crypto::SecureHash::SHA256));
727 while (has_unmerged) {
728 PrefixSize next_smallest_prefix_size;
729 next_smallest_prefix_size = next_smallest_prefix.size();
730
731 // Update the iterator map, which means that we have read one hash
732 // prefix of size |next_smallest_prefix_size| from hash_prefix_map_.
733 iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;
734
735 checksum_ctx->Update(base::string_as_array(&next_smallest_prefix),
736 next_smallest_prefix_size);
Scott Hess - ex-Googler 2016/10/06 23:04:10 Why string_as_array rather than data()? Update()
vakh (use Gerrit instead) 2016/10/07 00:39:45 Done.
737
738 // Find the next smallest unmerged element in the map.
739 has_unmerged = GetNextSmallestUnmergedPrefix(hash_prefix_map_, iterator_map,
740 &next_smallest_prefix);
741 }
742
743 std::string checksum(crypto::kSHA256Length, 0);
744 checksum_ctx->Finish(base::string_as_array(&checksum), checksum.size());
Scott Hess - ex-Googler 2016/10/06 23:04:10 This usage of string_as_array() is valid ... but i
vakh (use Gerrit instead) 2016/10/07 00:39:45 Done.
745 if (checksum == expected_checksum_) {
746 expected_checksum_.clear();
747 return true;
748 }
749
750 std::string checksum_base64, expected_checksum_base64;
751 base::Base64Encode(checksum, &checksum_base64);
752 base::Base64Encode(expected_checksum_, &expected_checksum_base64);
753 DVLOG(1) << "Failure: Checksum mismatch: calculated: " << checksum_base64
754 << "; expected: " << expected_checksum_base64
755 << "; store: " << *this;
Scott Hess - ex-Googler 2016/10/06 23:04:10 That said, it might be worth wrapping this in an D
vakh (use Gerrit instead) 2016/10/07 00:39:45 Done.
756 RecordApplyUpdateResultWhenReadingFromDisk(CHECKSUM_MISMATCH_FAILURE);
757
758 return false;
759 }
760
693 } // namespace safe_browsing 761 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698