Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(105)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 6286072: PrefixSet as an alternate to BloomFilter for safe-browsing. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix windows build. Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include "base/file_util.h" 7 #include "base/file_util.h"
8 #include "base/metrics/histogram.h" 8 #include "base/metrics/histogram.h"
9 #include "base/metrics/stats_counters.h" 9 #include "base/metrics/stats_counters.h"
10 #include "base/time.h" 10 #include "base/time.h"
11 #include "base/message_loop.h" 11 #include "base/message_loop.h"
12 #include "base/process_util.h" 12 #include "base/process_util.h"
13 #include "base/sha2.h" 13 #include "base/sha2.h"
14 #include "chrome/browser/safe_browsing/bloom_filter.h" 14 #include "chrome/browser/safe_browsing/bloom_filter.h"
15 #include "chrome/browser/safe_browsing/prefix_set.h"
15 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" 16 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
16 #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" 17 #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h"
17 #include "chrome/browser/safe_browsing/safe_browsing_util.h" 18 #include "chrome/browser/safe_browsing/safe_browsing_util.h"
18 #include "googleurl/src/gurl.h" 19 #include "googleurl/src/gurl.h"
19 20
20 namespace { 21 namespace {
21 22
22 // Filename suffix for the bloom filter. 23 // Filename suffix for the bloom filter.
23 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); 24 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
24 // Filename suffix for download store. 25 // Filename suffix for download store.
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
172 lists->push_back(chunkrange0); 173 lists->push_back(chunkrange0);
173 lists->push_back(chunkrange1); 174 lists->push_back(chunkrange1);
174 } 175 }
175 176
176 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 177 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
177 // safe_browsing_store.h orders on both chunk-id and prefix. 178 // safe_browsing_store.h orders on both chunk-id and prefix.
178 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 179 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
179 return a.full_hash.prefix < b.full_hash.prefix; 180 return a.full_hash.prefix < b.full_hash.prefix;
180 } 181 }
181 182
183 // Create a |PrefixSet| from a vector of add-prefixes.
184 safe_browsing::PrefixSet* CreatePrefixSet(
185 const std::vector<SBAddPrefix>& add_prefixes) {
186 std::vector<SBPrefix> prefixes;
187 for (size_t i = 0; i < add_prefixes.size(); ++i) {
188 prefixes.push_back(add_prefixes[i].prefix);
189 }
190 return new safe_browsing::PrefixSet(prefixes);
191 }
192
193 // As compared to the bloom filter, PrefixSet should have these
194 // properties:
195 // - Any bloom filter miss should be a prefix set miss.
196 // - Any prefix set hit should be a bloom filter hit.
197 // - Bloom filter false positives are prefix set misses.
198 // The following is to log actual performance to verify this.
199 enum PrefixSetEvent {
lzheng 2011/02/05 01:03:42 I found we've never record how many look ups we en
Scott Hess - ex-Googler 2011/02/07 19:20:23 It should be a couple orders of magnitude higher t
200 PREFIX_SET_EVENT_HIT,
201 PREFIX_SET_EVENT_BLOOM_HIT,
202 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
203
204 // Memory space for histograms is determined by the max. ALWAYS ADD
205 // NEW VALUES BEFORE THIS ONE.
206 PREFIX_SET_EVENT_MAX
207 };
208
209 void RecordPrefixSetInfo(PrefixSetEvent event_type) {
210 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
211 PREFIX_SET_EVENT_MAX);
212 }
213
182 } // namespace 214 } // namespace
183 215
184 // The default SafeBrowsingDatabaseFactory. 216 // The default SafeBrowsingDatabaseFactory.
185 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 217 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
186 public: 218 public:
187 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 219 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
188 bool enable_download_protection) { 220 bool enable_download_protection) {
189 if (enable_download_protection) { 221 if (enable_download_protection) {
190 // Create database with browse url store and download store. 222 // Create database with browse url store and download store.
191 return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, 223 return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile,
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
324 356
325 // Reset objects in memory. 357 // Reset objects in memory.
326 { 358 {
327 base::AutoLock locked(lookup_lock_); 359 base::AutoLock locked(lookup_lock_);
328 full_browse_hashes_.clear(); 360 full_browse_hashes_.clear();
329 pending_browse_hashes_.clear(); 361 pending_browse_hashes_.clear();
330 prefix_miss_cache_.clear(); 362 prefix_miss_cache_.clear();
331 // TODO(shess): This could probably be |bloom_filter_.reset()|. 363 // TODO(shess): This could probably be |bloom_filter_.reset()|.
332 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * 364 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
333 BloomFilter::kBloomFilterSizeRatio); 365 BloomFilter::kBloomFilterSizeRatio);
366 // TODO(shess): It is simpler for the code to assume that presence
367 // of a bloom filter always implies presence of a prefix set.
368 prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>()));
334 } 369 }
335 370
336 return true; 371 return true;
337 } 372 }
338 373
339 // TODO(lzheng): Remove matching_list, it is not used anywhere. 374 // TODO(lzheng): Remove matching_list, it is not used anywhere.
340 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 375 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
341 const GURL& url, 376 const GURL& url,
342 std::string* matching_list, 377 std::string* matching_list,
343 std::vector<SBPrefix>* prefix_hits, 378 std::vector<SBPrefix>* prefix_hits,
344 std::vector<SBFullHashResult>* full_hits, 379 std::vector<SBFullHashResult>* full_hits,
345 base::Time last_update) { 380 base::Time last_update) {
346 // Clear the results first. 381 // Clear the results first.
347 matching_list->clear(); 382 matching_list->clear();
348 prefix_hits->clear(); 383 prefix_hits->clear();
349 full_hits->clear(); 384 full_hits->clear();
350 385
351 std::vector<SBPrefix> prefixes; 386 std::vector<SBPrefix> prefixes;
352 BrowsePrefixesToCheck(url, &prefixes); 387 BrowsePrefixesToCheck(url, &prefixes);
353 if (prefixes.empty()) 388 if (prefixes.empty())
354 return false; 389 return false;
355 390
356 // This function is called on the I/O thread, prevent changes to 391 // This function is called on the I/O thread, prevent changes to
357 // bloom filter and caches. 392 // bloom filter and caches.
358 base::AutoLock locked(lookup_lock_); 393 base::AutoLock locked(lookup_lock_);
359 394
360 if (!browse_bloom_filter_.get()) 395 if (!browse_bloom_filter_.get())
361 return false; 396 return false;
397 DCHECK(prefix_set_.get());
362 398
363 size_t miss_count = 0; 399 size_t miss_count = 0;
364 for (size_t i = 0; i < prefixes.size(); ++i) { 400 for (size_t i = 0; i < prefixes.size(); ++i) {
401 bool found = prefix_set_->Exists(prefixes[i]);
402
365 if (browse_bloom_filter_->Exists(prefixes[i])) { 403 if (browse_bloom_filter_->Exists(prefixes[i])) {
404 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
405 if (found)
406 RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
366 prefix_hits->push_back(prefixes[i]); 407 prefix_hits->push_back(prefixes[i]);
367 if (prefix_miss_cache_.count(prefixes[i]) > 0) 408 if (prefix_miss_cache_.count(prefixes[i]) > 0)
368 ++miss_count; 409 ++miss_count;
410 } else {
411 // Bloom filter misses should never be in prefix set.
412 DCHECK(!found);
413 if (found)
414 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
369 } 415 }
370 } 416 }
371 417
372 // If all the prefixes are cached as 'misses', don't issue a GetHash. 418 // If all the prefixes are cached as 'misses', don't issue a GetHash.
373 if (miss_count == prefix_hits->size()) 419 if (miss_count == prefix_hits->size())
374 return false; 420 return false;
375 421
376 // Find the matching full-hash results. |full_browse_hashes_| are from the 422 // Find the matching full-hash results. |full_browse_hashes_| are from the
377 // database, |pending_browse_hashes_| are from GetHash requests between 423 // database, |pending_browse_hashes_| are from GetHash requests between
378 // updates. 424 // updates.
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after
765 // Create and populate |filter| from |add_prefixes|. 811 // Create and populate |filter| from |add_prefixes|.
766 // TODO(shess): The bloom filter doesn't need to be a 812 // TODO(shess): The bloom filter doesn't need to be a
767 // scoped_refptr<> for this code. Refactor that away. 813 // scoped_refptr<> for this code. Refactor that away.
768 const int filter_size = 814 const int filter_size =
769 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); 815 BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
770 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size)); 816 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
771 for (size_t i = 0; i < add_prefixes.size(); ++i) { 817 for (size_t i = 0; i < add_prefixes.size(); ++i) {
772 filter->Insert(add_prefixes[i].prefix); 818 filter->Insert(add_prefixes[i].prefix);
773 } 819 }
774 820
821 scoped_ptr<safe_browsing::PrefixSet>
822 prefix_set(CreatePrefixSet(add_prefixes));
823
775 // This needs to be in sorted order by prefix for efficient access. 824 // This needs to be in sorted order by prefix for efficient access.
776 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 825 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
777 SBAddFullHashPrefixLess); 826 SBAddFullHashPrefixLess);
778 827
779 // Swap in the newly built filter and cache. 828 // Swap in the newly built filter and cache.
780 { 829 {
781 base::AutoLock locked(lookup_lock_); 830 base::AutoLock locked(lookup_lock_);
782 full_browse_hashes_.swap(add_full_hashes); 831 full_browse_hashes_.swap(add_full_hashes);
783 832
784 // TODO(shess): If |CacheHashResults()| is posted between the 833 // TODO(shess): If |CacheHashResults()| is posted between the
785 // earlier lock and this clear, those pending hashes will be lost. 834 // earlier lock and this clear, those pending hashes will be lost.
786 // It could be fixed by only removing hashes which were collected 835 // It could be fixed by only removing hashes which were collected
787 // at the earlier point. I believe that is fail-safe as-is (the 836 // at the earlier point. I believe that is fail-safe as-is (the
788 // hash will be fetched again). 837 // hash will be fetched again).
789 pending_browse_hashes_.clear(); 838 pending_browse_hashes_.clear();
790 prefix_miss_cache_.clear(); 839 prefix_miss_cache_.clear();
791 browse_bloom_filter_.swap(filter); 840 browse_bloom_filter_.swap(filter);
841 prefix_set_.swap(prefix_set);
792 } 842 }
793 843
794 const base::TimeDelta bloom_gen = base::Time::Now() - before; 844 const base::TimeDelta bloom_gen = base::Time::Now() - before;
795 845
796 // Persist the bloom filter to disk. Since only this thread changes 846 // Persist the bloom filter to disk. Since only this thread changes
797 // |browse_bloom_filter_|, there is no need to lock. 847 // |browse_bloom_filter_|, there is no need to lock.
798 WriteBloomFilter(); 848 WriteBloomFilter();
799 849
800 // Gather statistics. 850 // Gather statistics.
801 if (got_counters && metric->GetIOCounters(&io_after)) { 851 if (got_counters && metric->GetIOCounters(&io_after)) {
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
865 return; 915 return;
866 } 916 }
867 917
868 const base::TimeTicks before = base::TimeTicks::Now(); 918 const base::TimeTicks before = base::TimeTicks::Now();
869 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); 919 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
870 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " 920 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
871 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 921 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
872 922
873 if (!browse_bloom_filter_.get()) 923 if (!browse_bloom_filter_.get())
874 RecordFailure(FAILURE_DATABASE_FILTER_READ); 924 RecordFailure(FAILURE_DATABASE_FILTER_READ);
925
926 // Manually re-generate the prefix set from the main database.
927 // TODO(shess): Write/read for prefix set.
928 std::vector<SBAddPrefix> add_prefixes;
929 browse_store_->GetAddPrefixes(&add_prefixes);
930 prefix_set_.reset(CreatePrefixSet(add_prefixes));
875 } 931 }
876 932
877 bool SafeBrowsingDatabaseNew::Delete() { 933 bool SafeBrowsingDatabaseNew::Delete() {
878 DCHECK_EQ(creation_loop_, MessageLoop::current()); 934 DCHECK_EQ(creation_loop_, MessageLoop::current());
879 935
880 const bool r1 = browse_store_->Delete(); 936 const bool r1 = browse_store_->Delete();
881 if (!r1) 937 if (!r1)
882 RecordFailure(FAILURE_DATABASE_STORE_DELETE); 938 RecordFailure(FAILURE_DATABASE_STORE_DELETE);
883 939
884 const bool r2 = download_store_.get() ? download_store_->Delete() : true; 940 const bool r2 = download_store_.get() ? download_store_->Delete() : true;
(...skipping 13 matching lines...) Expand all
898 return; 954 return;
899 955
900 const base::TimeTicks before = base::TimeTicks::Now(); 956 const base::TimeTicks before = base::TimeTicks::Now();
901 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); 957 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
902 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " 958 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
903 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; 959 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";
904 960
905 if (!write_ok) 961 if (!write_ok)
906 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); 962 RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
907 } 963 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698