Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(329)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 6591087: Additional validation code for PrefixSet. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Sense of histogram was WRONG~ Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include "base/file_util.h" 7 #include "base/file_util.h"
8 #include "base/metrics/histogram.h" 8 #include "base/metrics/histogram.h"
9 #include "base/metrics/stats_counters.h" 9 #include "base/metrics/stats_counters.h"
10 #include "base/time.h" 10 #include "base/time.h"
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
171 lists->push_back(chunkrange0); 171 lists->push_back(chunkrange0);
172 lists->push_back(chunkrange1); 172 lists->push_back(chunkrange1);
173 } 173 }
174 174
175 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 175 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
176 // safe_browsing_store.h orders on both chunk-id and prefix. 176 // safe_browsing_store.h orders on both chunk-id and prefix.
177 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 177 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
178 return a.full_hash.prefix < b.full_hash.prefix; 178 return a.full_hash.prefix < b.full_hash.prefix;
179 } 179 }
180 180
181 // Create a |PrefixSet| from a vector of add-prefixes.
182 safe_browsing::PrefixSet* CreatePrefixSet(
183 const std::vector<SBAddPrefix>& add_prefixes) {
184 std::vector<SBPrefix> prefixes;
185 for (size_t i = 0; i < add_prefixes.size(); ++i) {
186 prefixes.push_back(add_prefixes[i].prefix);
187 }
188 return new safe_browsing::PrefixSet(prefixes);
189 }
190
191 // As compared to the bloom filter, PrefixSet should have these 181 // As compared to the bloom filter, PrefixSet should have these
192 // properties: 182 // properties:
193 // - Any bloom filter miss should be a prefix set miss. 183 // - Any bloom filter miss should be a prefix set miss.
194 // - Any prefix set hit should be a bloom filter hit. 184 // - Any prefix set hit should be a bloom filter hit.
195 // - Bloom filter false positives are prefix set misses. 185 // - Bloom filter false positives are prefix set misses.
196 // The following is to log actual performance to verify this. 186 // The following is to log actual performance to verify this.
197 enum PrefixSetEvent { 187 enum PrefixSetEvent {
198 PREFIX_SET_EVENT_HIT, 188 PREFIX_SET_EVENT_HIT,
199 PREFIX_SET_EVENT_BLOOM_HIT, 189 PREFIX_SET_EVENT_BLOOM_HIT,
200 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, 190 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
191 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
lzheng 2011/03/04 00:29:06 Yeah, much better.
192 PREFIX_SET_GETPREFIXES_BROKEN,
201 193
202 // Memory space for histograms is determined by the max. ALWAYS ADD 194 // Memory space for histograms is determined by the max. ALWAYS ADD
203 // NEW VALUES BEFORE THIS ONE. 195 // NEW VALUES BEFORE THIS ONE.
204 PREFIX_SET_EVENT_MAX 196 PREFIX_SET_EVENT_MAX
205 }; 197 };
206 198
207 void RecordPrefixSetInfo(PrefixSetEvent event_type) { 199 void RecordPrefixSetInfo(PrefixSetEvent event_type) {
208 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type, 200 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
209 PREFIX_SET_EVENT_MAX); 201 PREFIX_SET_EVENT_MAX);
210 } 202 }
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 { 346 {
355 base::AutoLock locked(lookup_lock_); 347 base::AutoLock locked(lookup_lock_);
356 full_browse_hashes_.clear(); 348 full_browse_hashes_.clear();
357 pending_browse_hashes_.clear(); 349 pending_browse_hashes_.clear();
358 prefix_miss_cache_.clear(); 350 prefix_miss_cache_.clear();
359 // TODO(shess): This could probably be |bloom_filter_.reset()|. 351 // TODO(shess): This could probably be |bloom_filter_.reset()|.
360 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * 352 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
361 BloomFilter::kBloomFilterSizeRatio); 353 BloomFilter::kBloomFilterSizeRatio);
362 // TODO(shess): It is simpler for the code to assume that presence 354 // TODO(shess): It is simpler for the code to assume that presence
363 // of a bloom filter always implies presence of a prefix set. 355 // of a bloom filter always implies presence of a prefix set.
364 prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>())); 356 prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
365 } 357 }
366 358
367 return true; 359 return true;
368 } 360 }
369 361
370 // TODO(lzheng): Remove matching_list, it is not used anywhere. 362 // TODO(lzheng): Remove matching_list, it is not used anywhere.
371 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 363 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
372 const GURL& url, 364 const GURL& url,
373 std::string* matching_list, 365 std::string* matching_list,
374 std::vector<SBPrefix>* prefix_hits, 366 std::vector<SBPrefix>* prefix_hits,
(...skipping 10 matching lines...) Expand all
385 return false; 377 return false;
386 378
387 // This function is called on the I/O thread, prevent changes to 379 // This function is called on the I/O thread, prevent changes to
388 // bloom filter and caches. 380 // bloom filter and caches.
389 base::AutoLock locked(lookup_lock_); 381 base::AutoLock locked(lookup_lock_);
390 382
391 if (!browse_bloom_filter_.get()) 383 if (!browse_bloom_filter_.get())
392 return false; 384 return false;
393 DCHECK(prefix_set_.get()); 385 DCHECK(prefix_set_.get());
394 386
387 // Used to double-check in case of a hit mis-match.
388 std::vector<SBPrefix> restored;
389
395 size_t miss_count = 0; 390 size_t miss_count = 0;
396 for (size_t i = 0; i < prefixes.size(); ++i) { 391 for (size_t i = 0; i < prefixes.size(); ++i) {
397 bool found = prefix_set_->Exists(prefixes[i]); 392 bool found = prefix_set_->Exists(prefixes[i]);
398 393
399 if (browse_bloom_filter_->Exists(prefixes[i])) { 394 if (browse_bloom_filter_->Exists(prefixes[i])) {
400 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); 395 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
401 if (found) 396 if (found)
402 RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); 397 RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
403 prefix_hits->push_back(prefixes[i]); 398 prefix_hits->push_back(prefixes[i]);
404 if (prefix_miss_cache_.count(prefixes[i]) > 0) 399 if (prefix_miss_cache_.count(prefixes[i]) > 0)
405 ++miss_count; 400 ++miss_count;
406 } else { 401 } else {
407 // Bloom filter misses should never be in prefix set. 402 // Bloom filter misses should never be in prefix set. Re-create
403 // the original prefixes and manually search for it, to check if
404 // there's a bug with how |Exists()| is implemented.
405 // |UpdateBrowseStore()| previously verified that
406 // |GetPrefixes()| returns the same prefixes as were passed to
407 // the constructor.
408 DCHECK(!found); 408 DCHECK(!found);
409 if (found) 409 if (found) {
410 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); 410 if (restored.empty())
411 prefix_set_->GetPrefixes(&restored);
412
413 // If the item is not in the re-created list, then there is an
414 // error in |PrefixSet::Exists()|. If the item is in the
415 // re-created list, then the bloom filter was wrong.
416 if (std::binary_search(restored.begin(), restored.end(), prefixes[i])) {
417 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
418 } else {
419 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
420 }
421 }
411 } 422 }
412 } 423 }
413 424
414 // If all the prefixes are cached as 'misses', don't issue a GetHash. 425 // If all the prefixes are cached as 'misses', don't issue a GetHash.
415 if (miss_count == prefix_hits->size()) 426 if (miss_count == prefix_hits->size())
416 return false; 427 return false;
417 428
418 // Find the matching full-hash results. |full_browse_hashes_| are from the 429 // Find the matching full-hash results. |full_browse_hashes_| are from the
419 // database, |pending_browse_hashes_| are from GetHash requests between 430 // database, |pending_browse_hashes_| are from GetHash requests between
420 // updates. 431 // updates.
(...skipping 415 matching lines...) Expand 10 before | Expand all | Expand 10 after
836 // Create and populate |filter| from |add_prefixes|. 847 // Create and populate |filter| from |add_prefixes|.
837 // TODO(shess): The bloom filter doesn't need to be a 848 // TODO(shess): The bloom filter doesn't need to be a
838 // scoped_refptr<> for this code. Refactor that away. 849 // scoped_refptr<> for this code. Refactor that away.
839 const int filter_size = 850 const int filter_size =
840 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); 851 BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
841 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size)); 852 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
842 for (size_t i = 0; i < add_prefixes.size(); ++i) { 853 for (size_t i = 0; i < add_prefixes.size(); ++i) {
843 filter->Insert(add_prefixes[i].prefix); 854 filter->Insert(add_prefixes[i].prefix);
844 } 855 }
845 856
857 std::vector<SBPrefix> prefixes;
858 for (size_t i = 0; i < add_prefixes.size(); ++i) {
859 prefixes.push_back(add_prefixes[i].prefix);
860 }
861 std::sort(prefixes.begin(), prefixes.end());
846 scoped_ptr<safe_browsing::PrefixSet> 862 scoped_ptr<safe_browsing::PrefixSet>
847 prefix_set(CreatePrefixSet(add_prefixes)); 863 prefix_set(new safe_browsing::PrefixSet(prefixes));
864
865 // Verify that |GetPrefixes()| returns the same set of prefixes as
866 // was passed to the constructor.
867 std::vector<SBPrefix> restored;
868 prefix_set->GetPrefixes(&restored);
869 prefixes.erase(std::unique(prefixes.begin(), prefixes.end()), prefixes.end());
870 if (restored.size() != prefixes.size() ||
871 !std::equal(prefixes.begin(), prefixes.end(), restored.begin())) {
872 NOTREACHED();
873 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
874 }
848 875
849 // This needs to be in sorted order by prefix for efficient access. 876 // This needs to be in sorted order by prefix for efficient access.
850 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 877 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
851 SBAddFullHashPrefixLess); 878 SBAddFullHashPrefixLess);
852 879
853 // Swap in the newly built filter and cache. 880 // Swap in the newly built filter and cache.
854 { 881 {
855 base::AutoLock locked(lookup_lock_); 882 base::AutoLock locked(lookup_lock_);
856 full_browse_hashes_.swap(add_full_hashes); 883 full_browse_hashes_.swap(add_full_hashes);
857 884
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
945 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " 972 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
946 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 973 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
947 974
948 if (!browse_bloom_filter_.get()) 975 if (!browse_bloom_filter_.get())
949 RecordFailure(FAILURE_DATABASE_FILTER_READ); 976 RecordFailure(FAILURE_DATABASE_FILTER_READ);
950 977
951 // Manually re-generate the prefix set from the main database. 978 // Manually re-generate the prefix set from the main database.
952 // TODO(shess): Write/read for prefix set. 979 // TODO(shess): Write/read for prefix set.
953 std::vector<SBAddPrefix> add_prefixes; 980 std::vector<SBAddPrefix> add_prefixes;
954 browse_store_->GetAddPrefixes(&add_prefixes); 981 browse_store_->GetAddPrefixes(&add_prefixes);
955 prefix_set_.reset(CreatePrefixSet(add_prefixes)); 982 std::vector<SBPrefix> prefixes;
983 for (size_t i = 0; i < add_prefixes.size(); ++i) {
984 prefixes.push_back(add_prefixes[i].prefix);
985 }
986 std::sort(prefixes.begin(), prefixes.end());
987 prefix_set_.reset(new safe_browsing::PrefixSet(prefixes));
988
989 // Double-check the prefixes so that the
990 // PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
991 // ContainsBrowseUrl() can be trustworthy.
992 std::vector<SBPrefix> restored;
993 prefix_set_->GetPrefixes(&restored);
994 std::set<SBPrefix> unique(prefixes.begin(), prefixes.end());
995 if (restored.size() != unique.size() ||
996 !std::equal(unique.begin(), unique.end(), restored.begin())) {
997 NOTREACHED();
998 RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
999 }
956 } 1000 }
957 1001
958 bool SafeBrowsingDatabaseNew::Delete() { 1002 bool SafeBrowsingDatabaseNew::Delete() {
959 DCHECK_EQ(creation_loop_, MessageLoop::current()); 1003 DCHECK_EQ(creation_loop_, MessageLoop::current());
960 1004
961 const bool r1 = browse_store_->Delete(); 1005 const bool r1 = browse_store_->Delete();
962 if (!r1) 1006 if (!r1)
963 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 1007 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
964 1008
965 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 1009 const bool r2 = download_store_.get() ? download_store_->Delete() : true;
(...skipping 13 matching lines...) Expand all
979 return; 1023 return;
980 1024
981 const base::TimeTicks before = base::TimeTicks::Now(); 1025 const base::TimeTicks before = base::TimeTicks::Now();
982 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); 1026 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
983 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " 1027 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
984 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 1028 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
985 1029
986 if (!write_ok) 1030 if (!write_ok)
987 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); 1031 RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
988 } 1032 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698