OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" |
6 | 6 |
7 #include "base/file_util.h" | 7 #include "base/file_util.h" |
8 #include "base/metrics/histogram.h" | 8 #include "base/metrics/histogram.h" |
9 #include "base/metrics/stats_counters.h" | 9 #include "base/metrics/stats_counters.h" |
10 #include "base/time.h" | 10 #include "base/time.h" |
11 #include "base/message_loop.h" | 11 #include "base/message_loop.h" |
12 #include "base/process_util.h" | 12 #include "base/process_util.h" |
13 #include "base/sha2.h" | 13 #include "base/sha2.h" |
14 #include "chrome/browser/safe_browsing/bloom_filter.h" | 14 #include "chrome/browser/safe_browsing/bloom_filter.h" |
| 15 #include "chrome/browser/safe_browsing/prefix_set.h" |
15 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" | 16 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" |
16 #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" | 17 #include "chrome/browser/safe_browsing/safe_browsing_store_sqlite.h" |
17 #include "chrome/browser/safe_browsing/safe_browsing_util.h" | 18 #include "chrome/browser/safe_browsing/safe_browsing_util.h" |
18 #include "googleurl/src/gurl.h" | 19 #include "googleurl/src/gurl.h" |
19 | 20 |
20 namespace { | 21 namespace { |
21 | 22 |
22 // Filename suffix for the bloom filter. | 23 // Filename suffix for the bloom filter. |
23 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); | 24 const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2"); |
24 // Filename suffix for download store. | 25 // Filename suffix for download store. |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
172 lists->push_back(chunkrange0); | 173 lists->push_back(chunkrange0); |
173 lists->push_back(chunkrange1); | 174 lists->push_back(chunkrange1); |
174 } | 175 } |
175 | 176 |
176 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from | 177 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from |
177 // safe_browsing_store.h orders on both chunk-id and prefix. | 178 // safe_browsing_store.h orders on both chunk-id and prefix. |
178 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { | 179 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { |
179 return a.full_hash.prefix < b.full_hash.prefix; | 180 return a.full_hash.prefix < b.full_hash.prefix; |
180 } | 181 } |
181 | 182 |
| 183 // Create a |PrefixSet| from a vector of add-prefixes. |
| 184 safe_browsing::PrefixSet* CreatePrefixSet( |
| 185 const std::vector<SBAddPrefix>& add_prefixes) { |
| 186 std::vector<SBPrefix> prefixes; |
| 187 for (size_t i = 0; i < add_prefixes.size(); ++i) { |
| 188 prefixes.push_back(add_prefixes[i].prefix); |
| 189 } |
| 190 return new safe_browsing::PrefixSet(prefixes); |
| 191 } |
| 192 |
| 193 // As compared to the bloom filter, PrefixSet should have these |
| 194 // properties: |
| 195 // - Any bloom filter miss should be a prefix set miss. |
| 196 // - Any prefix set hit should be a bloom filter hit. |
| 197 // - Bloom filter false positives are prefix set misses. |
| 198 // The following is to log actual performance to verify this. |
| 199 enum PrefixSetEvent { |
| 200 PREFIX_SET_EVENT_HIT, |
| 201 PREFIX_SET_EVENT_BLOOM_HIT, |
| 202 PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT, |
| 203 |
| 204 // Memory space for histograms is determined by the max. ALWAYS ADD |
| 205 // NEW VALUES BEFORE THIS ONE. |
| 206 PREFIX_SET_EVENT_MAX |
| 207 }; |
| 208 |
| 209 void RecordPrefixSetInfo(PrefixSetEvent event_type) { |
| 210 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type, |
| 211 PREFIX_SET_EVENT_MAX); |
| 212 } |
| 213 |
182 } // namespace | 214 } // namespace |
183 | 215 |
184 // The default SafeBrowsingDatabaseFactory. | 216 // The default SafeBrowsingDatabaseFactory. |
185 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { | 217 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { |
186 public: | 218 public: |
187 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( | 219 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( |
188 bool enable_download_protection) { | 220 bool enable_download_protection) { |
189 if (enable_download_protection) { | 221 if (enable_download_protection) { |
190 // Create database with browse url store and download store. | 222 // Create database with browse url store and download store. |
191 return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, | 223 return new SafeBrowsingDatabaseNew(new SafeBrowsingStoreFile, |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
324 | 356 |
325 // Reset objects in memory. | 357 // Reset objects in memory. |
326 { | 358 { |
327 base::AutoLock locked(lookup_lock_); | 359 base::AutoLock locked(lookup_lock_); |
328 full_browse_hashes_.clear(); | 360 full_browse_hashes_.clear(); |
329 pending_browse_hashes_.clear(); | 361 pending_browse_hashes_.clear(); |
330 prefix_miss_cache_.clear(); | 362 prefix_miss_cache_.clear(); |
331 // TODO(shess): This could probably be |bloom_filter_.reset()|. | 363 // TODO(shess): This could probably be |bloom_filter_.reset()|. |
332 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * | 364 browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize * |
333 BloomFilter::kBloomFilterSizeRatio); | 365 BloomFilter::kBloomFilterSizeRatio); |
| 366 // TODO(shess): It is simpler for the code to assume that presence |
| 367 // of a bloom filter always implies presence of a prefix set. |
| 368 prefix_set_.reset(CreatePrefixSet(std::vector<SBAddPrefix>())); |
334 } | 369 } |
335 | 370 |
336 return true; | 371 return true; |
337 } | 372 } |
338 | 373 |
339 // TODO(lzheng): Remove matching_list, it is not used anywhere. | 374 // TODO(lzheng): Remove matching_list, it is not used anywhere. |
340 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( | 375 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
341 const GURL& url, | 376 const GURL& url, |
342 std::string* matching_list, | 377 std::string* matching_list, |
343 std::vector<SBPrefix>* prefix_hits, | 378 std::vector<SBPrefix>* prefix_hits, |
344 std::vector<SBFullHashResult>* full_hits, | 379 std::vector<SBFullHashResult>* full_hits, |
345 base::Time last_update) { | 380 base::Time last_update) { |
346 // Clear the results first. | 381 // Clear the results first. |
347 matching_list->clear(); | 382 matching_list->clear(); |
348 prefix_hits->clear(); | 383 prefix_hits->clear(); |
349 full_hits->clear(); | 384 full_hits->clear(); |
350 | 385 |
351 std::vector<SBPrefix> prefixes; | 386 std::vector<SBPrefix> prefixes; |
352 BrowsePrefixesToCheck(url, &prefixes); | 387 BrowsePrefixesToCheck(url, &prefixes); |
353 if (prefixes.empty()) | 388 if (prefixes.empty()) |
354 return false; | 389 return false; |
355 | 390 |
356 // This function is called on the I/O thread, prevent changes to | 391 // This function is called on the I/O thread, prevent changes to |
357 // bloom filter and caches. | 392 // bloom filter and caches. |
358 base::AutoLock locked(lookup_lock_); | 393 base::AutoLock locked(lookup_lock_); |
359 | 394 |
360 if (!browse_bloom_filter_.get()) | 395 if (!browse_bloom_filter_.get()) |
361 return false; | 396 return false; |
| 397 DCHECK(prefix_set_.get()); |
362 | 398 |
363 size_t miss_count = 0; | 399 size_t miss_count = 0; |
364 for (size_t i = 0; i < prefixes.size(); ++i) { | 400 for (size_t i = 0; i < prefixes.size(); ++i) { |
| 401 bool found = prefix_set_->Exists(prefixes[i]); |
| 402 |
365 if (browse_bloom_filter_->Exists(prefixes[i])) { | 403 if (browse_bloom_filter_->Exists(prefixes[i])) { |
| 404 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT); |
| 405 if (found) |
| 406 RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT); |
366 prefix_hits->push_back(prefixes[i]); | 407 prefix_hits->push_back(prefixes[i]); |
367 if (prefix_miss_cache_.count(prefixes[i]) > 0) | 408 if (prefix_miss_cache_.count(prefixes[i]) > 0) |
368 ++miss_count; | 409 ++miss_count; |
| 410 } else { |
| 411 // Bloom filter misses should never be in prefix set. |
| 412 DCHECK(!found); |
| 413 if (found) |
| 414 RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT); |
369 } | 415 } |
370 } | 416 } |
371 | 417 |
372 // If all the prefixes are cached as 'misses', don't issue a GetHash. | 418 // If all the prefixes are cached as 'misses', don't issue a GetHash. |
373 if (miss_count == prefix_hits->size()) | 419 if (miss_count == prefix_hits->size()) |
374 return false; | 420 return false; |
375 | 421 |
376 // Find the matching full-hash results. |full_browse_hashes_| are from the | 422 // Find the matching full-hash results. |full_browse_hashes_| are from the |
377 // database, |pending_browse_hashes_| are from GetHash requests between | 423 // database, |pending_browse_hashes_| are from GetHash requests between |
378 // updates. | 424 // updates. |
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
765 // Create and populate |filter| from |add_prefixes|. | 811 // Create and populate |filter| from |add_prefixes|. |
766 // TODO(shess): The bloom filter doesn't need to be a | 812 // TODO(shess): The bloom filter doesn't need to be a |
767 // scoped_refptr<> for this code. Refactor that away. | 813 // scoped_refptr<> for this code. Refactor that away. |
768 const int filter_size = | 814 const int filter_size = |
769 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); | 815 BloomFilter::FilterSizeForKeyCount(add_prefixes.size()); |
770 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size)); | 816 scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size)); |
771 for (size_t i = 0; i < add_prefixes.size(); ++i) { | 817 for (size_t i = 0; i < add_prefixes.size(); ++i) { |
772 filter->Insert(add_prefixes[i].prefix); | 818 filter->Insert(add_prefixes[i].prefix); |
773 } | 819 } |
774 | 820 |
| 821 scoped_ptr<safe_browsing::PrefixSet> |
| 822 prefix_set(CreatePrefixSet(add_prefixes)); |
| 823 |
775 // This needs to be in sorted order by prefix for efficient access. | 824 // This needs to be in sorted order by prefix for efficient access. |
776 std::sort(add_full_hashes.begin(), add_full_hashes.end(), | 825 std::sort(add_full_hashes.begin(), add_full_hashes.end(), |
777 SBAddFullHashPrefixLess); | 826 SBAddFullHashPrefixLess); |
778 | 827 |
779 // Swap in the newly built filter and cache. | 828 // Swap in the newly built filter and cache. |
780 { | 829 { |
781 base::AutoLock locked(lookup_lock_); | 830 base::AutoLock locked(lookup_lock_); |
782 full_browse_hashes_.swap(add_full_hashes); | 831 full_browse_hashes_.swap(add_full_hashes); |
783 | 832 |
784 // TODO(shess): If |CacheHashResults()| is posted between the | 833 // TODO(shess): If |CacheHashResults()| is posted between the |
785 // earlier lock and this clear, those pending hashes will be lost. | 834 // earlier lock and this clear, those pending hashes will be lost. |
786 // It could be fixed by only removing hashes which were collected | 835 // It could be fixed by only removing hashes which were collected |
787 // at the earlier point. I believe that is fail-safe as-is (the | 836 // at the earlier point. I believe that is fail-safe as-is (the |
788 // hash will be fetched again). | 837 // hash will be fetched again). |
789 pending_browse_hashes_.clear(); | 838 pending_browse_hashes_.clear(); |
790 prefix_miss_cache_.clear(); | 839 prefix_miss_cache_.clear(); |
791 browse_bloom_filter_.swap(filter); | 840 browse_bloom_filter_.swap(filter); |
| 841 prefix_set_.swap(prefix_set); |
792 } | 842 } |
793 | 843 |
794 const base::TimeDelta bloom_gen = base::Time::Now() - before; | 844 const base::TimeDelta bloom_gen = base::Time::Now() - before; |
795 | 845 |
796 // Persist the bloom filter to disk. Since only this thread changes | 846 // Persist the bloom filter to disk. Since only this thread changes |
797 // |browse_bloom_filter_|, there is no need to lock. | 847 // |browse_bloom_filter_|, there is no need to lock. |
798 WriteBloomFilter(); | 848 WriteBloomFilter(); |
799 | 849 |
800 // Gather statistics. | 850 // Gather statistics. |
801 if (got_counters && metric->GetIOCounters(&io_after)) { | 851 if (got_counters && metric->GetIOCounters(&io_after)) { |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
865 return; | 915 return; |
866 } | 916 } |
867 | 917 |
868 const base::TimeTicks before = base::TimeTicks::Now(); | 918 const base::TimeTicks before = base::TimeTicks::Now(); |
869 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); | 919 browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_); |
870 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " | 920 DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in " |
871 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; | 921 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; |
872 | 922 |
873 if (!browse_bloom_filter_.get()) | 923 if (!browse_bloom_filter_.get()) |
874 RecordFailure(FAILURE_DATABASE_FILTER_READ); | 924 RecordFailure(FAILURE_DATABASE_FILTER_READ); |
| 925 |
| 926 // Manually re-generate the prefix set from the main database. |
| 927 // TODO(shess): Write/read for prefix set. |
| 928 std::vector<SBAddPrefix> add_prefixes; |
| 929 browse_store_->GetAddPrefixes(&add_prefixes); |
| 930 prefix_set_.reset(CreatePrefixSet(add_prefixes)); |
875 } | 931 } |
876 | 932 |
877 bool SafeBrowsingDatabaseNew::Delete() { | 933 bool SafeBrowsingDatabaseNew::Delete() { |
878 DCHECK_EQ(creation_loop_, MessageLoop::current()); | 934 DCHECK_EQ(creation_loop_, MessageLoop::current()); |
879 | 935 |
880 const bool r1 = browse_store_->Delete(); | 936 const bool r1 = browse_store_->Delete(); |
881 if (!r1) | 937 if (!r1) |
882 RecordFailure(FAILURE_DATABASE_STORE_DELETE); | 938 RecordFailure(FAILURE_DATABASE_STORE_DELETE); |
883 | 939 |
884 const bool r2 = download_store_.get() ? download_store_->Delete() : true; | 940 const bool r2 = download_store_.get() ? download_store_->Delete() : true; |
(...skipping 13 matching lines...) Expand all Loading... |
898 return; | 954 return; |
899 | 955 |
900 const base::TimeTicks before = base::TimeTicks::Now(); | 956 const base::TimeTicks before = base::TimeTicks::Now(); |
901 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); | 957 const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_); |
902 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " | 958 DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in " |
903 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; | 959 << (base::TimeTicks::Now() - before).InMilliseconds() << " ms"; |
904 | 960 |
905 if (!write_ok) | 961 if (!write_ok) |
906 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); | 962 RecordFailure(FAILURE_DATABASE_FILTER_WRITE); |
907 } | 963 } |
OLD | NEW |