Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 220493003: Safebrowsing: change gethash caching to match api 2.3 rules, fix some corner cases. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase (including 227613008) Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 FILE_PATH_LITERAL(" IP Blacklist"); 59 FILE_PATH_LITERAL(" IP Blacklist");
60 60
61 // Filename suffix for browse store. 61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code 63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all 64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify 65 // the data in one file), that would be a convenient point to rectify
66 // this. 66 // this.
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68 68
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
71
72 // Maximum number of entries we allow in any of the whitelists. 69 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to 70 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match. 71 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000; 72 const size_t kMaxWhitelistSize = 5000;
76 73
77 // If the hash of this exact expression is on a whitelist then all 74 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match. 75 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] = 76 const char kWhitelistKillSwitchUrl[] =
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
81 78
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 if (prefix == iter->prefix && 175 if (prefix == iter->prefix &&
179 GetListIdBit(iter->chunk_id) == list_bit) { 176 GetListIdBit(iter->chunk_id) == list_bit) {
180 prefix_hits->push_back(prefix); 177 prefix_hits->push_back(prefix);
181 found_match = true; 178 found_match = true;
182 } 179 }
183 } 180 }
184 } 181 }
185 return found_match; 182 return found_match;
186 } 183 }
187 184
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired. "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough. Expired items are not deleted because a
192 // future update may make them acceptable again.
193 //
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
197 const std::vector<SBAddFullHash>& full_hashes,
198 std::vector<SBFullHashResult>* full_hits,
199 base::Time last_update) {
200 const base::Time expire_time =
201 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
202
203 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
204 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
205
206 while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
207 if (*piter < hiter->full_hash.prefix) {
208 ++piter;
209 } else if (hiter->full_hash.prefix < *piter) {
210 ++hiter;
211 } else {
212 if (expire_time < last_update ||
213 expire_time.ToTimeT() < hiter->received) {
214 SBFullHashResult result;
215 const int list_bit = GetListIdBit(hiter->chunk_id);
216 DCHECK(list_bit == safe_browsing_util::MALWARE ||
217 list_bit == safe_browsing_util::PHISH);
218 const safe_browsing_util::ListType list_id =
219 static_cast<safe_browsing_util::ListType>(list_bit);
220 if (!safe_browsing_util::GetListName(list_id, &result.list_name))
221 continue;
222 result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
223 result.hash = hiter->full_hash;
224 full_hits->push_back(result);
225 }
226
227 // Only increment |hiter|, |piter| might have multiple hits.
228 ++hiter;
229 }
230 }
231 }
232
233 // This function generates a chunk range string for |chunks|. It 185 // This function generates a chunk range string for |chunks|. It
234 // outputs one chunk range string per list and writes it to the 186 // outputs one chunk range string per list and writes it to the
235 // |list_ranges| vector. We expect |list_ranges| to already be of the 187 // |list_ranges| vector. We expect |list_ranges| to already be of the
236 // right size. E.g., if |chunks| contains chunks with two different 188 // right size. E.g., if |chunks| contains chunks with two different
237 // list ids then |list_ranges| must contain two elements. 189 // list ids then |list_ranges| must contain two elements.
238 void GetChunkRanges(const std::vector<int>& chunks, 190 void GetChunkRanges(const std::vector<int>& chunks,
239 std::vector<std::string>* list_ranges) { 191 std::vector<std::string>* list_ranges) {
240 // Since there are 2 possible list ids, there must be exactly two 192 // Since there are 2 possible list ids, there must be exactly two
241 // list ranges. Even if the chunk data should only contain one 193 // list ranges. Even if the chunk data should only contain one
242 // line, this code has to somehow handle corruption. 194 // line, this code has to somehow handle corruption.
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 const std::string& listname, 256 const std::string& listname,
305 std::vector<SBListChunkRanges>* lists) { 257 std::vector<SBListChunkRanges>* lists) {
306 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists); 258 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
307 } 259 }
308 260
309 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 261 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
310 // safe_browsing_store.h orders on both chunk-id and prefix. 262 // safe_browsing_store.h orders on both chunk-id and prefix.
311 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 263 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
312 return a.full_hash.prefix < b.full_hash.prefix; 264 return a.full_hash.prefix < b.full_hash.prefix;
313 } 265 }
266 bool SBAddFullHashSBPrefixLess(const SBAddFullHash& a, SBPrefix b) {
267 return a.full_hash.prefix < b;
268 }
314 269
315 // This code always checks for non-zero file size. This helper makes 270 // This code always checks for non-zero file size. This helper makes
316 // that less verbose. 271 // that less verbose.
317 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 272 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
318 int64 size_64; 273 int64 size_64;
319 if (!base::GetFileSize(file_path, &size_64)) 274 if (!base::GetFileSize(file_path, &size_64))
320 return 0; 275 return 0;
321 return size_64; 276 return size_64;
322 } 277 }
323 278
324 // Used to order whitelist storage in memory.
325 bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
326 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
327 }
328
329 } // namespace 279 } // namespace
330 280
331 // The default SafeBrowsingDatabaseFactory. 281 // The default SafeBrowsingDatabaseFactory.
332 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 282 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
333 public: 283 public:
334 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 284 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
335 bool enable_download_protection, 285 bool enable_download_protection,
336 bool enable_client_side_whitelist, 286 bool enable_client_side_whitelist,
337 bool enable_download_whitelist, 287 bool enable_download_whitelist,
338 bool enable_extension_blacklist, 288 bool enable_extension_blacklist,
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 base::Unretained(this))); 474 base::Unretained(this)));
525 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 475 DVLOG(1) << "Init browse store: " << browse_filename_.value();
526 476
527 { 477 {
528 // NOTE: There is no need to grab the lock in this function, since 478 // NOTE: There is no need to grab the lock in this function, since
529 // until it returns, there are no pointers to this class on other 479 // until it returns, there are no pointers to this class on other
530 // threads. Then again, that means there is no possibility of 480 // threads. Then again, that means there is no possibility of
531 // contention on the lock... 481 // contention on the lock...
532 base::AutoLock locked(lookup_lock_); 482 base::AutoLock locked(lookup_lock_);
533 full_browse_hashes_.clear(); 483 full_browse_hashes_.clear();
534 pending_browse_hashes_.clear(); 484 browse_gethash_cache_.clear();
535 LoadPrefixSet(); 485 LoadPrefixSet();
536 } 486 }
537 487
538 if (download_store_.get()) { 488 if (download_store_.get()) {
539 download_filename_ = DownloadDBFilename(filename_base); 489 download_filename_ = DownloadDBFilename(filename_base);
540 download_store_->Init( 490 download_store_->Init(
541 download_filename_, 491 download_filename_,
542 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 492 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
543 base::Unretained(this))); 493 base::Unretained(this)));
544 DVLOG(1) << "Init download store: " << download_filename_.value(); 494 DVLOG(1) << "Init download store: " << download_filename_.value();
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
648 // Delete files on disk. 598 // Delete files on disk.
649 // TODO(shess): Hard to see where one might want to delete without a 599 // TODO(shess): Hard to see where one might want to delete without a
650 // reset. Perhaps inline |Delete()|? 600 // reset. Perhaps inline |Delete()|?
651 if (!Delete()) 601 if (!Delete())
652 return false; 602 return false;
653 603
654 // Reset objects in memory. 604 // Reset objects in memory.
655 { 605 {
656 base::AutoLock locked(lookup_lock_); 606 base::AutoLock locked(lookup_lock_);
657 full_browse_hashes_.clear(); 607 full_browse_hashes_.clear();
658 pending_browse_hashes_.clear(); 608 browse_gethash_cache_.clear();
659 prefix_miss_cache_.clear();
660 browse_prefix_set_.reset(); 609 browse_prefix_set_.reset();
661 side_effect_free_whitelist_prefix_set_.reset(); 610 side_effect_free_whitelist_prefix_set_.reset();
662 ip_blacklist_.clear(); 611 ip_blacklist_.clear();
663 } 612 }
664 // Wants to acquire the lock itself. 613 // Wants to acquire the lock itself.
665 WhitelistEverything(&csd_whitelist_); 614 WhitelistEverything(&csd_whitelist_);
666 WhitelistEverything(&download_whitelist_); 615 WhitelistEverything(&download_whitelist_);
667 return true; 616 return true;
668 } 617 }
669 618
670 // TODO(lzheng): Remove matching_list, it is not used anywhere.
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 619 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
672 const GURL& url, 620 const GURL& url,
673 std::string* matching_list,
674 std::vector<SBPrefix>* prefix_hits, 621 std::vector<SBPrefix>* prefix_hits,
675 std::vector<SBFullHashResult>* full_hits, 622 std::vector<SBFullHashResult>* cache_hits) {
676 base::Time last_update) {
677 // Clear the results first. 623 // Clear the results first.
678 matching_list->clear();
679 prefix_hits->clear(); 624 prefix_hits->clear();
680 full_hits->clear(); 625 cache_hits->clear();
681 626
682 std::vector<SBFullHash> full_hashes; 627 std::vector<SBFullHash> full_hashes;
683 BrowseFullHashesToCheck(url, false, &full_hashes); 628 BrowseFullHashesToCheck(url, false, &full_hashes);
684 if (full_hashes.empty()) 629 if (full_hashes.empty())
685 return false; 630 return false;
686 631
632 std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess);
633
634 return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
635 }
636
637 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
638 const std::vector<SBFullHash>& full_hashes,
639 std::vector<SBPrefix>* prefix_hits,
640 std::vector<SBFullHashResult>* cache_hits) {
687 // This function is called on the I/O thread, prevent changes to 641 // This function is called on the I/O thread, prevent changes to
688 // filter and caches. 642 // filter and caches.
689 base::AutoLock locked(lookup_lock_); 643 base::AutoLock locked(lookup_lock_);
690 644
691 // |browse_prefix_set_| is empty until it is either read from disk, or the 645 // |browse_prefix_set_| is empty until it is either read from disk, or the
692 // first update populates it. Bail out without a hit if not yet 646 // first update populates it. Bail out without a hit if not yet
693 // available. 647 // available.
694 if (!browse_prefix_set_.get()) 648 if (!browse_prefix_set_.get())
695 return false; 649 return false;
696 650
697 size_t miss_count = 0; 651 const base::Time now = base::Time::Now();
652
698 for (size_t i = 0; i < full_hashes.size(); ++i) { 653 for (size_t i = 0; i < full_hashes.size(); ++i) {
699 const SBPrefix prefix = full_hashes[i].prefix; 654 const SBPrefix prefix = full_hashes[i].prefix;
655
656 // First check if there is a valid cached result for this prefix.
657 std::map<SBPrefix, SBCachedFullHashResult>::iterator citer =
658 browse_gethash_cache_.find(prefix);
659 if (citer != browse_gethash_cache_.end()) {
660 if (now <= citer->second.expire_after) {
661 for (std::vector<SBFullHashResult>::const_iterator fiter =
662 citer->second.full_hashes.begin();
663 fiter != citer->second.full_hashes.end();
664 ++fiter) {
665 if (SBFullHashEqual(full_hashes[i], fiter->hash))
666 cache_hits->push_back(*fiter);
667 }
668 // If the prefix was in the cache, don't add the prefix to
669 // prefix_hits. The result will be in cache_hits (if the fullhash
670 // matched), or not (if it was a cached miss).
671 continue;
672 }
673
674 // Remove expired entries.
675 browse_gethash_cache_.erase(citer);
676 }
677
678 // There was no valid cached result for the prefix, so check the database.
700 if (browse_prefix_set_->Exists(prefix)) { 679 if (browse_prefix_set_->Exists(prefix)) {
701 prefix_hits->push_back(prefix); 680 if (prefix_hits->empty() || prefix_hits->back() != prefix)
702 if (prefix_miss_cache_.count(prefix) > 0) 681 prefix_hits->push_back(prefix);
703 ++miss_count; 682 continue;
683 }
684
685 // There was no prefix match, check for fullhash matches.
686 std::vector<SBAddFullHash>::const_iterator db_fullhash_prefix_match =
687 std::lower_bound(full_browse_hashes_.begin(),
688 full_browse_hashes_.end(),
689 prefix,
690 SBAddFullHashSBPrefixLess);
691 // If full_browse_hashes_ was sorted on the fullhash (not just the
692 // prefix), could do binary_search here, but there are unlikely to be
693 // enough prefix matches to matter.
694 while (db_fullhash_prefix_match != full_browse_hashes_.end() &&
695 db_fullhash_prefix_match->full_hash.prefix == prefix) {
696 if (SBFullHashEqual(db_fullhash_prefix_match->full_hash,
697 full_hashes[i])) {
698 if (prefix_hits->empty() || prefix_hits->back() != prefix)
699 prefix_hits->push_back(prefix);
700 break;
701 }
702 ++db_fullhash_prefix_match;
704 } 703 }
705 } 704 }
706 705
707 // If all the prefixes are cached as 'misses', don't issue a GetHash. 706 return !prefix_hits->empty() || !cache_hits->empty();
708 if (miss_count == prefix_hits->size())
709 return false;
710
711 // Find the matching full-hash results. |full_browse_hashes_| are from the
712 // database, |pending_browse_hashes_| are from GetHash requests between
713 // updates.
714 std::sort(prefix_hits->begin(), prefix_hits->end());
715
716 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
717 full_hits, last_update);
718 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
719 full_hits, last_update);
720 return true;
721 } 707 }
722 708
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 709 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724 const std::vector<GURL>& urls, 710 const std::vector<GURL>& urls,
725 std::vector<SBPrefix>* prefix_hits) { 711 std::vector<SBPrefix>* prefix_hits) {
726 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 712 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
727 713
728 // Ignore this check when download checking is not enabled. 714 // Ignore this check when download checking is not enabled.
729 if (!download_store_.get()) 715 if (!download_store_.get())
730 return false; 716 return false;
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
868 STATS_COUNTER("SB.PrefixAdd", 1); 854 STATS_COUNTER("SB.PrefixAdd", 1);
869 store->WriteAddPrefix(encoded_chunk_id, host); 855 store->WriteAddPrefix(encoded_chunk_id, host);
870 } else if (entry->IsPrefix()) { 856 } else if (entry->IsPrefix()) {
871 // Prefixes only. 857 // Prefixes only.
872 for (int i = 0; i < count; i++) { 858 for (int i = 0; i < count; i++) {
873 const SBPrefix prefix = entry->PrefixAt(i); 859 const SBPrefix prefix = entry->PrefixAt(i);
874 STATS_COUNTER("SB.PrefixAdd", 1); 860 STATS_COUNTER("SB.PrefixAdd", 1);
875 store->WriteAddPrefix(encoded_chunk_id, prefix); 861 store->WriteAddPrefix(encoded_chunk_id, prefix);
876 } 862 }
877 } else { 863 } else {
878 // Prefixes and hashes. 864 // Full hashes only.
879 const base::Time receive_time = base::Time::Now();
880 for (int i = 0; i < count; ++i) { 865 for (int i = 0; i < count; ++i) {
881 const SBFullHash full_hash = entry->FullHashAt(i); 866 const SBFullHash full_hash = entry->FullHashAt(i);
882 const SBPrefix prefix = full_hash.prefix;
883
884 STATS_COUNTER("SB.PrefixAdd", 1);
885 store->WriteAddPrefix(encoded_chunk_id, prefix);
886 867
887 STATS_COUNTER("SB.PrefixAddFull", 1); 868 STATS_COUNTER("SB.PrefixAddFull", 1);
888 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 869 store->WriteAddHash(encoded_chunk_id, full_hash);
889 } 870 }
890 } 871 }
891 } 872 }
892 873
893 // Helper to iterate over all the entries in the hosts in |chunks| and 874 // Helper to iterate over all the entries in the hosts in |chunks| and
894 // add them to the store. 875 // add them to the store.
895 void SafeBrowsingDatabaseNew::InsertAddChunks( 876 void SafeBrowsingDatabaseNew::InsertAddChunks(
896 const safe_browsing_util::ListType list_id, 877 const safe_browsing_util::ListType list_id,
897 const SBChunkList& chunks) { 878 const SBChunkList& chunks) {
898 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 879 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
943 // Prefixes only. 924 // Prefixes only.
944 for (int i = 0; i < count; i++) { 925 for (int i = 0; i < count; i++) {
945 const SBPrefix prefix = entry->PrefixAt(i); 926 const SBPrefix prefix = entry->PrefixAt(i);
946 const int add_chunk_id = 927 const int add_chunk_id =
947 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 928 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
948 929
949 STATS_COUNTER("SB.PrefixSub", 1); 930 STATS_COUNTER("SB.PrefixSub", 1);
950 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix); 931 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
951 } 932 }
952 } else { 933 } else {
953 // Prefixes and hashes. 934 // Full hashes only.
954 for (int i = 0; i < count; ++i) { 935 for (int i = 0; i < count; ++i) {
955 const SBFullHash full_hash = entry->FullHashAt(i); 936 const SBFullHash full_hash = entry->FullHashAt(i);
956 const int add_chunk_id = 937 const int add_chunk_id =
957 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id); 938 EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);
958 939
959 STATS_COUNTER("SB.PrefixSub", 1);
960 store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);
961
962 STATS_COUNTER("SB.PrefixSubFull", 1); 940 STATS_COUNTER("SB.PrefixSubFull", 1);
963 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash); 941 store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
964 } 942 }
965 } 943 }
966 } 944 }
967 945
968 // Helper to iterate over all the entries in the hosts in |chunks| and 946 // Helper to iterate over all the entries in the hosts in |chunks| and
969 // add them to the store. 947 // add them to the store.
970 void SafeBrowsingDatabaseNew::InsertSubChunks( 948 void SafeBrowsingDatabaseNew::InsertSubChunks(
971 safe_browsing_util::ListType list_id, 949 safe_browsing_util::ListType list_id,
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after
1046 if (chunk_deletes[i].is_sub_del) 1024 if (chunk_deletes[i].is_sub_del)
1047 store->DeleteSubChunk(encoded_chunk_id); 1025 store->DeleteSubChunk(encoded_chunk_id);
1048 else 1026 else
1049 store->DeleteAddChunk(encoded_chunk_id); 1027 store->DeleteAddChunk(encoded_chunk_id);
1050 } 1028 }
1051 } 1029 }
1052 } 1030 }
1053 1031
1054 void SafeBrowsingDatabaseNew::CacheHashResults( 1032 void SafeBrowsingDatabaseNew::CacheHashResults(
1055 const std::vector<SBPrefix>& prefixes, 1033 const std::vector<SBPrefix>& prefixes,
1056 const std::vector<SBFullHashResult>& full_hits) { 1034 const std::vector<SBFullHashResult>& full_hits,
1035 const base::TimeDelta& cache_lifetime) {
1036
1037 const base::Time expire_after = base::Time::Now() + cache_lifetime;
1038
1057 // This is called on the I/O thread, lock against updates. 1039 // This is called on the I/O thread, lock against updates.
1058 base::AutoLock locked(lookup_lock_); 1040 base::AutoLock locked(lookup_lock_);
1059 1041
1060 if (full_hits.empty()) { 1042 // Create or reset all cached results for these prefixes.
1061 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 1043 for (std::vector<SBPrefix>::const_iterator i = prefixes.begin();
1062 return; 1044 i != prefixes.end();
1045 ++i) {
1046 browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after);
1063 } 1047 }
1064 1048
1065 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 1049 // Insert any fullhash hits. Note that there may be one, multiple, or no
1066 // Refactor to make them identical. 1050 // fullhashes for any given entry in |prefixes|.
1067 const base::Time now = base::Time::Now(); 1051 for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin();
1068 const size_t orig_size = pending_browse_hashes_.size(); 1052 i != full_hits.end();
1069 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 1053 ++i) {
1070 iter != full_hits.end(); ++iter) { 1054 browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i);
1071 const int list_id = safe_browsing_util::GetListId(iter->list_name);
1072 if (list_id == safe_browsing_util::MALWARE ||
1073 list_id == safe_browsing_util::PHISH) {
1074 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1075 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1076 pending_browse_hashes_.push_back(add_full_hash);
1077 }
1078 } 1055 }
1079
1080 // Sort new entries then merge with the previously-sorted entries.
1081 std::vector<SBAddFullHash>::iterator
1082 orig_end = pending_browse_hashes_.begin() + orig_size;
1083 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1084 std::inplace_merge(pending_browse_hashes_.begin(),
1085 orig_end, pending_browse_hashes_.end(),
1086 SBAddFullHashPrefixLess);
1087 } 1056 }
1088 1057
1089 bool SafeBrowsingDatabaseNew::UpdateStarted( 1058 bool SafeBrowsingDatabaseNew::UpdateStarted(
1090 std::vector<SBListChunkRanges>* lists) { 1059 std::vector<SBListChunkRanges>* lists) {
1091 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1060 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1092 DCHECK(lists); 1061 DCHECK(lists);
1093 1062
1094 // If |BeginUpdate()| fails, reset the database. 1063 // If |BeginUpdate()| fails, reset the database.
1095 if (!browse_store_->BeginUpdate()) { 1064 if (!browse_store_->BeginUpdate()) {
1096 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 1065 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1103 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1135 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1104 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1136 HandleCorruptDatabase(); 1105 HandleCorruptDatabase();
1137 return false; 1106 return false;
1138 } 1107 }
1139 1108
1140 UpdateChunkRangesForLists(browse_store_.get(), 1109 UpdateChunkRangesForLists(browse_store_.get(),
1141 safe_browsing_util::kMalwareList, 1110 safe_browsing_util::kMalwareList,
1142 safe_browsing_util::kPhishingList, 1111 safe_browsing_util::kPhishingList,
1143 lists); 1112 lists);
1113 // Cached fullhash results must be cleared on every database update (whether
1114 // successful or not.)
1115 browse_gethash_cache_.clear();
1144 1116
1145 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1117 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1118 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1147 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1119 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1148 // extra data of that sort. 1120 // extra data of that sort.
1149 UpdateChunkRangesForList(download_store_.get(), 1121 UpdateChunkRangesForList(download_store_.get(),
1150 safe_browsing_util::kBinUrlList, lists); 1122 safe_browsing_util::kBinUrlList, lists);
1151 1123
1152 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1124 UpdateChunkRangesForList(csd_whitelist_store_.get(),
1153 safe_browsing_util::kCsdWhiteList, lists); 1125 safe_browsing_util::kCsdWhiteList, lists);
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
1268 UpdateIpBlacklistStore(); 1240 UpdateIpBlacklistStore();
1269 } 1241 }
1270 1242
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1243 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272 const base::FilePath& store_filename, 1244 const base::FilePath& store_filename,
1273 SafeBrowsingStore* store, 1245 SafeBrowsingStore* store,
1274 SBWhitelist* whitelist) { 1246 SBWhitelist* whitelist) {
1275 if (!store) 1247 if (!store)
1276 return; 1248 return;
1277 1249
1278 // For the whitelists, we don't cache and save full hashes since all
1279 // hashes are already full.
1280 std::vector<SBAddFullHash> empty_add_hashes;
1281
1282 // Note: |builder| will not be empty. The current data store implementation 1250 // Note: |builder| will not be empty. The current data store implementation
1283 // stores all full-length hashes as both full and prefix hashes. 1251 // stores all full-length hashes as both full and prefix hashes.
1284 safe_browsing::PrefixSetBuilder builder; 1252 safe_browsing::PrefixSetBuilder builder;
1285 std::vector<SBAddFullHash> full_hashes; 1253 std::vector<SBAddFullHash> full_hashes;
1286 if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { 1254 if (!store->FinishUpdate(&builder, &full_hashes)) {
1287 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1255 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1288 WhitelistEverything(whitelist); 1256 WhitelistEverything(whitelist);
1289 return; 1257 return;
1290 } 1258 }
1291 1259
1292 #if defined(OS_MACOSX) 1260 #if defined(OS_MACOSX)
1293 base::mac::SetFileBackupExclusion(store_filename); 1261 base::mac::SetFileBackupExclusion(store_filename);
1294 #endif 1262 #endif
1295 1263
1296 LoadWhitelist(full_hashes, whitelist); 1264 LoadWhitelist(full_hashes, whitelist);
1297 } 1265 }
1298 1266
1299 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1267 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300 const base::FilePath& store_filename, 1268 const base::FilePath& store_filename,
1301 SafeBrowsingStore* store, 1269 SafeBrowsingStore* store,
1302 FailureType failure_type) { 1270 FailureType failure_type) {
1303 // We don't cache and save full hashes.
1304 std::vector<SBAddFullHash> empty_add_hashes;
1305
1306 // These results are not used after this call. Simply ignore the 1271 // These results are not used after this call. Simply ignore the
1307 // returned value after FinishUpdate(...). 1272 // returned value after FinishUpdate(...).
1308 safe_browsing::PrefixSetBuilder builder; 1273 safe_browsing::PrefixSetBuilder builder;
1309 std::vector<SBAddFullHash> add_full_hashes_result; 1274 std::vector<SBAddFullHash> add_full_hashes_result;
1310 1275
1311 if (!store->FinishUpdate(empty_add_hashes, 1276 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) {
1312 &builder,
1313 &add_full_hashes_result)) {
1314 RecordFailure(failure_type); 1277 RecordFailure(failure_type);
1315 } 1278 }
1316 1279
1317 #if defined(OS_MACOSX) 1280 #if defined(OS_MACOSX)
1318 base::mac::SetFileBackupExclusion(store_filename); 1281 base::mac::SetFileBackupExclusion(store_filename);
1319 #endif 1282 #endif
1320 1283
1321 return GetFileSizeOrZero(store_filename); 1284 return GetFileSizeOrZero(store_filename);
1322 } 1285 }
1323 1286
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1287 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325 // Copy out the pending add hashes. Copy rather than swapping in
1326 // case |ContainsBrowseURL()| is called before the new filter is complete.
1327 std::vector<SBAddFullHash> pending_add_hashes;
1328 {
1329 base::AutoLock locked(lookup_lock_);
1330 pending_add_hashes.insert(pending_add_hashes.end(),
1331 pending_browse_hashes_.begin(),
1332 pending_browse_hashes_.end());
1333 }
1334
1335 // Measure the amount of IO during the filter build. 1288 // Measure the amount of IO during the filter build.
1336 base::IoCounters io_before, io_after; 1289 base::IoCounters io_before, io_after;
1337 base::ProcessHandle handle = base::Process::Current().handle(); 1290 base::ProcessHandle handle = base::Process::Current().handle();
1338 scoped_ptr<base::ProcessMetrics> metric( 1291 scoped_ptr<base::ProcessMetrics> metric(
1339 #if !defined(OS_MACOSX) 1292 #if !defined(OS_MACOSX)
1340 base::ProcessMetrics::CreateProcessMetrics(handle) 1293 base::ProcessMetrics::CreateProcessMetrics(handle)
1341 #else 1294 #else
1342 // Getting stats only for the current process is enough, so NULL is fine. 1295 // Getting stats only for the current process is enough, so NULL is fine.
1343 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1296 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1344 #endif 1297 #endif
1345 ); 1298 );
1346 1299
1347 // IoCounters are currently not supported on Mac, and may not be 1300 // IoCounters are currently not supported on Mac, and may not be
1348 // available for Linux, so we check the result and only show IO 1301 // available for Linux, so we check the result and only show IO
1349 // stats if they are available. 1302 // stats if they are available.
1350 const bool got_counters = metric->GetIOCounters(&io_before); 1303 const bool got_counters = metric->GetIOCounters(&io_before);
1351 1304
1352 const base::TimeTicks before = base::TimeTicks::Now(); 1305 const base::TimeTicks before = base::TimeTicks::Now();
1353 1306
1354 safe_browsing::PrefixSetBuilder builder; 1307 safe_browsing::PrefixSetBuilder builder;
1355 std::vector<SBAddFullHash> add_full_hashes; 1308 std::vector<SBAddFullHash> add_full_hashes;
1356 if (!browse_store_->FinishUpdate(pending_add_hashes, 1309 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1357 &builder, &add_full_hashes)) {
1358 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1310 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1359 return; 1311 return;
1360 } 1312 }
1361 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1313 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1362 1314
1363 // This needs to be in sorted order by prefix for efficient access. 1315 // This needs to be in sorted order by prefix for efficient access.
1364 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1316 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1365 SBAddFullHashPrefixLess); 1317 SBAddFullHashPrefixLess);
1366 1318
1367 // Swap in the newly built filter and cache. 1319 // Swap in the newly built filter and cache.
1368 { 1320 {
1369 base::AutoLock locked(lookup_lock_); 1321 base::AutoLock locked(lookup_lock_);
1370 full_browse_hashes_.swap(add_full_hashes); 1322 full_browse_hashes_.swap(add_full_hashes);
1371
1372 // TODO(shess): If |CacheHashResults()| is posted between the
1373 // earlier lock and this clear, those pending hashes will be lost.
1374 // It could be fixed by only removing hashes which were collected
1375 // at the earlier point. I believe that is fail-safe as-is (the
1376 // hash will be fetched again).
1377 pending_browse_hashes_.clear();
1378 prefix_miss_cache_.clear();
1379 browse_prefix_set_.swap(prefix_set); 1323 browse_prefix_set_.swap(prefix_set);
1380 } 1324 }
1381 1325
1382 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " 1326 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1383 << (base::TimeTicks::Now() - before).InMilliseconds() 1327 << (base::TimeTicks::Now() - before).InMilliseconds()
1384 << " ms total."; 1328 << " ms total.";
1385 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1329 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1386 1330
1387 // Persist the prefix set to disk. Since only this thread changes 1331 // Persist the prefix set to disk. Since only this thread changes
1388 // |browse_prefix_set_|, there is no need to lock. 1332 // |browse_prefix_set_|, there is no need to lock.
(...skipping 21 matching lines...) Expand all
1410 file_size = GetFileSizeOrZero(browse_filename_); 1354 file_size = GetFileSizeOrZero(browse_filename_);
1411 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1355 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1412 static_cast<int>(file_size / 1024)); 1356 static_cast<int>(file_size / 1024));
1413 1357
1414 #if defined(OS_MACOSX) 1358 #if defined(OS_MACOSX)
1415 base::mac::SetFileBackupExclusion(browse_filename_); 1359 base::mac::SetFileBackupExclusion(browse_filename_);
1416 #endif 1360 #endif
1417 } 1361 }
1418 1362
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1363 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1420 std::vector<SBAddFullHash> empty_add_hashes;
1421 safe_browsing::PrefixSetBuilder builder; 1364 safe_browsing::PrefixSetBuilder builder;
1422 std::vector<SBAddFullHash> add_full_hashes_result; 1365 std::vector<SBAddFullHash> add_full_hashes_result;
1423 1366
1424 if (!side_effect_free_whitelist_store_->FinishUpdate( 1367 if (!side_effect_free_whitelist_store_->FinishUpdate(
1425 empty_add_hashes, 1368 &builder, &add_full_hashes_result)) {
1426 &builder,
1427 &add_full_hashes_result)) {
1428 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1369 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1429 return; 1370 return;
1430 } 1371 }
1431 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1372 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1432 1373
1433 // Swap in the newly built prefix set. 1374 // Swap in the newly built prefix set.
1434 { 1375 {
1435 base::AutoLock locked(lookup_lock_); 1376 base::AutoLock locked(lookup_lock_);
1436 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1377 side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1437 } 1378 }
(...skipping 20 matching lines...) Expand all
1458 static_cast<int>(file_size / 1024)); 1399 static_cast<int>(file_size / 1024));
1459 1400
1460 #if defined(OS_MACOSX) 1401 #if defined(OS_MACOSX)
1461 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1402 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1462 base::mac::SetFileBackupExclusion( 1403 base::mac::SetFileBackupExclusion(
1463 side_effect_free_whitelist_prefix_set_filename_); 1404 side_effect_free_whitelist_prefix_set_filename_);
1464 #endif 1405 #endif
1465 } 1406 }
1466 1407
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1408 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1468 // For the IP blacklist, we don't cache and save full hashes since all
1469 // hashes are already full.
1470 std::vector<SBAddFullHash> empty_add_hashes;
1471
1472 // Note: prefixes will not be empty. The current data store implementation 1409 // Note: prefixes will not be empty. The current data store implementation
1473 // stores all full-length hashes as both full and prefix hashes. 1410 // stores all full-length hashes as both full and prefix hashes.
1474 safe_browsing::PrefixSetBuilder builder; 1411 safe_browsing::PrefixSetBuilder builder;
1475 std::vector<SBAddFullHash> full_hashes; 1412 std::vector<SBAddFullHash> full_hashes;
1476 if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, 1413 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1477 &builder, &full_hashes)) {
1478 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1414 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1479 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1415 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
1480 return; 1416 return;
1481 } 1417 }
1482 1418
1483 #if defined(OS_MACOSX) 1419 #if defined(OS_MACOSX)
1484 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); 1420 base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1485 #endif 1421 #endif
1486 1422
1487 LoadIpBlacklist(full_hashes); 1423 LoadIpBlacklist(full_hashes);
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
1683 base::AutoLock locked(lookup_lock_); 1619 base::AutoLock locked(lookup_lock_);
1684 ip_blacklist_.swap(new_blacklist); 1620 ip_blacklist_.swap(new_blacklist);
1685 } 1621 }
1686 1622
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1623 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1688 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1624 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1689 std::vector<SBFullHash> full_hashes; 1625 std::vector<SBFullHash> full_hashes;
1690 full_hashes.push_back(malware_kill_switch); 1626 full_hashes.push_back(malware_kill_switch);
1691 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1627 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1692 } 1628 }
OLDNEW
« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_database.h ('k') | chrome/browser/safe_browsing/safe_browsing_database_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698