Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 220493003: Safebrowsing: change gethash caching to match api 2.3 rules, fix some corner cases. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: changes for review #2 Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 FILE_PATH_LITERAL(" IP Blacklist"); 59 FILE_PATH_LITERAL(" IP Blacklist");
60 60
61 // Filename suffix for browse store. 61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code 63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all 64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify 65 // the data in one file), that would be a convenient point to rectify
66 // this. 66 // this.
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68 68
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
71
72 // Maximum number of entries we allow in any of the whitelists. 69 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to 70 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match. 71 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000; 72 const size_t kMaxWhitelistSize = 5000;
76 73
77 // If the hash of this exact expression is on a whitelist then all 74 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match. 75 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] = 76 const char kWhitelistKillSwitchUrl[] =
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
81 78
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 if (prefix == iter->prefix && 175 if (prefix == iter->prefix &&
179 GetListIdBit(iter->chunk_id) == list_bit) { 176 GetListIdBit(iter->chunk_id) == list_bit) {
180 prefix_hits->push_back(prefix); 177 prefix_hits->push_back(prefix);
181 found_match = true; 178 found_match = true;
182 } 179 }
183 } 180 }
184 } 181 }
185 return found_match; 182 return found_match;
186 } 183 }
187 184
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired. "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough. Expired items are not deleted because a
192 // future update may make them acceptable again.
193 //
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
197 const std::vector<SBAddFullHash>& full_hashes,
198 std::vector<SBFullHashResult>* full_hits,
199 base::Time last_update) {
200 const base::Time expire_time =
201 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
202
203 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
204 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
205
206 while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
207 if (*piter < hiter->full_hash.prefix) {
208 ++piter;
209 } else if (hiter->full_hash.prefix < *piter) {
210 ++hiter;
211 } else {
212 if (expire_time < last_update ||
213 expire_time.ToTimeT() < hiter->received) {
214 SBFullHashResult result;
215 const int list_bit = GetListIdBit(hiter->chunk_id);
216 DCHECK(list_bit == safe_browsing_util::MALWARE ||
217 list_bit == safe_browsing_util::PHISH);
218 const safe_browsing_util::ListType list_id =
219 static_cast<safe_browsing_util::ListType>(list_bit);
220 if (!safe_browsing_util::GetListName(list_id, &result.list_name))
221 continue;
222 result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
223 result.hash = hiter->full_hash;
224 full_hits->push_back(result);
225 }
226
227 // Only increment |hiter|, |piter| might have multiple hits.
228 ++hiter;
229 }
230 }
231 }
232
233 // This function generates a chunk range string for |chunks|. It 185 // This function generates a chunk range string for |chunks|. It
234 // outputs one chunk range string per list and writes it to the 186 // outputs one chunk range string per list and writes it to the
235 // |list_ranges| vector. We expect |list_ranges| to already be of the 187 // |list_ranges| vector. We expect |list_ranges| to already be of the
236 // right size. E.g., if |chunks| contains chunks with two different 188 // right size. E.g., if |chunks| contains chunks with two different
237 // list ids then |list_ranges| must contain two elements. 189 // list ids then |list_ranges| must contain two elements.
238 void GetChunkRanges(const std::vector<int>& chunks, 190 void GetChunkRanges(const std::vector<int>& chunks,
239 std::vector<std::string>* list_ranges) { 191 std::vector<std::string>* list_ranges) {
240 // Since there are 2 possible list ids, there must be exactly two 192 // Since there are 2 possible list ids, there must be exactly two
241 // list ranges. Even if the chunk data should only contain one 193 // list ranges. Even if the chunk data should only contain one
242 // line, this code has to somehow handle corruption. 194 // line, this code has to somehow handle corruption.
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 const std::string& listname, 256 const std::string& listname,
305 std::vector<SBListChunkRanges>* lists) { 257 std::vector<SBListChunkRanges>* lists) {
306 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists); 258 UpdateChunkRanges(store, std::vector<std::string>(1, listname), lists);
307 } 259 }
308 260
309 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from 261 // Order |SBAddFullHash| on the prefix part. |SBAddPrefixLess()| from
310 // safe_browsing_store.h orders on both chunk-id and prefix. 262 // safe_browsing_store.h orders on both chunk-id and prefix.
311 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) { 263 bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
312 return a.full_hash.prefix < b.full_hash.prefix; 264 return a.full_hash.prefix < b.full_hash.prefix;
313 } 265 }
266 bool SBAddFullHashSBPrefixLess(const SBAddFullHash& a, SBPrefix b) {
267 return a.full_hash.prefix < b;
268 }
314 269
315 // This code always checks for non-zero file size. This helper makes 270 // This code always checks for non-zero file size. This helper makes
316 // that less verbose. 271 // that less verbose.
317 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 272 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
318 int64 size_64; 273 int64 size_64;
319 if (!base::GetFileSize(file_path, &size_64)) 274 if (!base::GetFileSize(file_path, &size_64))
320 return 0; 275 return 0;
321 return size_64; 276 return size_64;
322 } 277 }
323 278
324 // Used to order whitelist storage in memory.
325 bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
326 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
327 }
328
329 } // namespace 279 } // namespace
330 280
331 // The default SafeBrowsingDatabaseFactory. 281 // The default SafeBrowsingDatabaseFactory.
332 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 282 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
333 public: 283 public:
334 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 284 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
335 bool enable_download_protection, 285 bool enable_download_protection,
336 bool enable_client_side_whitelist, 286 bool enable_client_side_whitelist,
337 bool enable_download_whitelist, 287 bool enable_download_whitelist,
338 bool enable_extension_blacklist, 288 bool enable_extension_blacklist,
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 base::Unretained(this))); 474 base::Unretained(this)));
525 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 475 DVLOG(1) << "Init browse store: " << browse_filename_.value();
526 476
527 { 477 {
528 // NOTE: There is no need to grab the lock in this function, since 478 // NOTE: There is no need to grab the lock in this function, since
529 // until it returns, there are no pointers to this class on other 479 // until it returns, there are no pointers to this class on other
530 // threads. Then again, that means there is no possibility of 480 // threads. Then again, that means there is no possibility of
531 // contention on the lock... 481 // contention on the lock...
532 base::AutoLock locked(lookup_lock_); 482 base::AutoLock locked(lookup_lock_);
533 full_browse_hashes_.clear(); 483 full_browse_hashes_.clear();
534 pending_browse_hashes_.clear(); 484 browse_gethash_cache_.clear();
535 LoadPrefixSet(); 485 LoadPrefixSet();
536 } 486 }
537 487
538 if (download_store_.get()) { 488 if (download_store_.get()) {
539 download_filename_ = DownloadDBFilename(filename_base); 489 download_filename_ = DownloadDBFilename(filename_base);
540 download_store_->Init( 490 download_store_->Init(
541 download_filename_, 491 download_filename_,
542 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 492 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
543 base::Unretained(this))); 493 base::Unretained(this)));
544 DVLOG(1) << "Init download store: " << download_filename_.value(); 494 DVLOG(1) << "Init download store: " << download_filename_.value();
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
648 // Delete files on disk. 598 // Delete files on disk.
649 // TODO(shess): Hard to see where one might want to delete without a 599 // TODO(shess): Hard to see where one might want to delete without a
650 // reset. Perhaps inline |Delete()|? 600 // reset. Perhaps inline |Delete()|?
651 if (!Delete()) 601 if (!Delete())
652 return false; 602 return false;
653 603
654 // Reset objects in memory. 604 // Reset objects in memory.
655 { 605 {
656 base::AutoLock locked(lookup_lock_); 606 base::AutoLock locked(lookup_lock_);
657 full_browse_hashes_.clear(); 607 full_browse_hashes_.clear();
658 pending_browse_hashes_.clear(); 608 browse_gethash_cache_.clear();
659 prefix_miss_cache_.clear();
660 browse_prefix_set_.reset(); 609 browse_prefix_set_.reset();
661 side_effect_free_whitelist_prefix_set_.reset(); 610 side_effect_free_whitelist_prefix_set_.reset();
662 ip_blacklist_.clear(); 611 ip_blacklist_.clear();
663 } 612 }
664 // Wants to acquire the lock itself. 613 // Wants to acquire the lock itself.
665 WhitelistEverything(&csd_whitelist_); 614 WhitelistEverything(&csd_whitelist_);
666 WhitelistEverything(&download_whitelist_); 615 WhitelistEverything(&download_whitelist_);
667 return true; 616 return true;
668 } 617 }
669 618
670 // TODO(lzheng): Remove matching_list, it is not used anywhere.
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 619 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
672 const GURL& url, 620 const GURL& url,
673 std::string* matching_list,
674 std::vector<SBPrefix>* prefix_hits, 621 std::vector<SBPrefix>* prefix_hits,
675 std::vector<SBFullHashResult>* full_hits, 622 std::vector<SBFullHashResult>* cache_hits) {
676 base::Time last_update) {
677 // Clear the results first. 623 // Clear the results first.
678 matching_list->clear();
679 prefix_hits->clear(); 624 prefix_hits->clear();
680 full_hits->clear(); 625 cache_hits->clear();
681 626
682 std::vector<SBFullHash> full_hashes; 627 std::vector<SBFullHash> full_hashes;
683 BrowseFullHashesToCheck(url, false, &full_hashes); 628 BrowseFullHashesToCheck(url, false, &full_hashes);
684 if (full_hashes.empty()) 629 if (full_hashes.empty())
685 return false; 630 return false;
686 631
632 std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess);
633
634 return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
635 }
636
637 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
638 const std::vector<SBFullHash>& full_hashes,
639 std::vector<SBPrefix>* prefix_hits,
640 std::vector<SBFullHashResult>* cache_hits) {
687 // This function is called on the I/O thread, prevent changes to 641 // This function is called on the I/O thread, prevent changes to
688 // filter and caches. 642 // filter and caches.
689 base::AutoLock locked(lookup_lock_); 643 base::AutoLock locked(lookup_lock_);
690 644
691 // |browse_prefix_set_| is empty until it is either read from disk, or the 645 // |browse_prefix_set_| is empty until it is either read from disk, or the
692 // first update populates it. Bail out without a hit if not yet 646 // first update populates it. Bail out without a hit if not yet
693 // available. 647 // available.
694 if (!browse_prefix_set_.get()) 648 if (!browse_prefix_set_.get())
695 return false; 649 return false;
696 650
697 size_t miss_count = 0; 651 const base::Time now = base::Time::Now();
652
698 for (size_t i = 0; i < full_hashes.size(); ++i) { 653 for (size_t i = 0; i < full_hashes.size(); ++i) {
699 const SBPrefix prefix = full_hashes[i].prefix; 654 const SBPrefix prefix = full_hashes[i].prefix;
655
656 // First check if there is a valid cached result for this prefix.
657 std::map<SBPrefix, SBCachedFullHashResult>::iterator citer =
658 browse_gethash_cache_.find(prefix);
659 if (citer != browse_gethash_cache_.end()) {
660 if (now > citer->second.expire_after) {
661 // If the cached entry is expired, remove it and ignore it.
662 browse_gethash_cache_.erase(citer);
Scott Hess - ex-Googler 2014/04/03 21:44:03 Having one case with a continue and one without is
mattm 2014/04/04 23:58:49 Done.
663 } else {
664 for (std::vector<SBFullHashResult>::const_iterator fiter =
665 citer->second.full_hashes.begin();
666 fiter != citer->second.full_hashes.end();
667 ++fiter) {
668 if (SBFullHashEqual(full_hashes[i], fiter->hash))
669 cache_hits->push_back(*fiter);
670 }
671 // If the prefix was in the cache, don't add the prefix to
672 // prefix_hits. The result will be in cache_hits (if the fullhash
673 // matched), or not (if it was a cached miss).
674 continue;
675 }
676 }
677
678 // There was no valid cached result for the prefix, so check the database.
700 if (browse_prefix_set_->Exists(prefix)) { 679 if (browse_prefix_set_->Exists(prefix)) {
Scott Hess - ex-Googler 2014/04/03 21:44:03 I'm wondering if the control-flow can be flattened
mattm 2014/04/04 23:58:49 Done.
701 prefix_hits->push_back(prefix); 680 std::vector<SBAddFullHash>::const_iterator db_fullhash_prefix_match =
702 if (prefix_miss_cache_.count(prefix) > 0) 681 std::lower_bound(full_browse_hashes_.begin(),
703 ++miss_count; 682 full_browse_hashes_.end(),
683 prefix,
684 SBAddFullHashSBPrefixLess);
685 // If the database contains any fullhashes with the same prefix, then we
686 // only count it as a prefix hit if there is an exact fullhash match.
687 if (db_fullhash_prefix_match != full_browse_hashes_.end() &&
688 db_fullhash_prefix_match->full_hash.prefix == prefix) {
689 bool fullhash_match = false;
690 // If full_browse_hashes_ was sorted on the fullhash (not just the
691 // prefix), could do binary_search here, but there are unlikely to be
692 // enough prefix matches to matter.
693 while (db_fullhash_prefix_match != full_browse_hashes_.end() &&
694 db_fullhash_prefix_match->full_hash.prefix == prefix) {
695 if (SBFullHashEqual(db_fullhash_prefix_match->full_hash,
696 full_hashes[i])) {
697 fullhash_match = true;
698 break;
699 }
700 ++db_fullhash_prefix_match;
701 }
702 if (!fullhash_match)
703 continue;
704 }
705
706 // Only add a given prefix once. Since full_hashes is sorted, we only
707 // need to check the last entry of prefix_hits.
708 if (prefix_hits->empty() || prefix_hits->back() != prefix)
709 prefix_hits->push_back(prefix);
704 } 710 }
705 } 711 }
706 712
707 // If all the prefixes are cached as 'misses', don't issue a GetHash. 713 return !prefix_hits->empty() || !cache_hits->empty();
708 if (miss_count == prefix_hits->size())
709 return false;
710
711 // Find the matching full-hash results. |full_browse_hashes_| are from the
712 // database, |pending_browse_hashes_| are from GetHash requests between
713 // updates.
714 std::sort(prefix_hits->begin(), prefix_hits->end());
715
716 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
717 full_hits, last_update);
718 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
719 full_hits, last_update);
720 return true;
721 } 714 }
722 715
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 716 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724 const std::vector<GURL>& urls, 717 const std::vector<GURL>& urls,
725 std::vector<SBPrefix>* prefix_hits) { 718 std::vector<SBPrefix>* prefix_hits) {
726 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 719 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
727 720
728 // Ignore this check when download checking is not enabled. 721 // Ignore this check when download checking is not enabled.
729 if (!download_store_.get()) 722 if (!download_store_.get())
730 return false; 723 return false;
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 store->WriteAddPrefix(encoded_chunk_id, host); 862 store->WriteAddPrefix(encoded_chunk_id, host);
870 } else if (entry->IsPrefix()) { 863 } else if (entry->IsPrefix()) {
871 // Prefixes only. 864 // Prefixes only.
872 for (int i = 0; i < count; i++) { 865 for (int i = 0; i < count; i++) {
873 const SBPrefix prefix = entry->PrefixAt(i); 866 const SBPrefix prefix = entry->PrefixAt(i);
874 STATS_COUNTER("SB.PrefixAdd", 1); 867 STATS_COUNTER("SB.PrefixAdd", 1);
875 store->WriteAddPrefix(encoded_chunk_id, prefix); 868 store->WriteAddPrefix(encoded_chunk_id, prefix);
876 } 869 }
877 } else { 870 } else {
878 // Prefixes and hashes. 871 // Prefixes and hashes.
879 const base::Time receive_time = base::Time::Now();
880 for (int i = 0; i < count; ++i) { 872 for (int i = 0; i < count; ++i) {
881 const SBFullHash full_hash = entry->FullHashAt(i); 873 const SBFullHash full_hash = entry->FullHashAt(i);
882 const SBPrefix prefix = full_hash.prefix; 874 const SBPrefix prefix = full_hash.prefix;
883 875
884 STATS_COUNTER("SB.PrefixAdd", 1); 876 STATS_COUNTER("SB.PrefixAdd", 1);
885 store->WriteAddPrefix(encoded_chunk_id, prefix); 877 store->WriteAddPrefix(encoded_chunk_id, prefix);
886 878
887 STATS_COUNTER("SB.PrefixAddFull", 1); 879 STATS_COUNTER("SB.PrefixAddFull", 1);
888 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 880 store->WriteAddHash(encoded_chunk_id, full_hash);
889 } 881 }
890 } 882 }
891 } 883 }
892 884
893 // Helper to iterate over all the entries in the hosts in |chunks| and 885 // Helper to iterate over all the entries in the hosts in |chunks| and
894 // add them to the store. 886 // add them to the store.
895 void SafeBrowsingDatabaseNew::InsertAddChunks( 887 void SafeBrowsingDatabaseNew::InsertAddChunks(
896 const safe_browsing_util::ListType list_id, 888 const safe_browsing_util::ListType list_id,
897 const SBChunkList& chunks) { 889 const SBChunkList& chunks) {
898 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 890 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
1046 if (chunk_deletes[i].is_sub_del) 1038 if (chunk_deletes[i].is_sub_del)
1047 store->DeleteSubChunk(encoded_chunk_id); 1039 store->DeleteSubChunk(encoded_chunk_id);
1048 else 1040 else
1049 store->DeleteAddChunk(encoded_chunk_id); 1041 store->DeleteAddChunk(encoded_chunk_id);
1050 } 1042 }
1051 } 1043 }
1052 } 1044 }
1053 1045
1054 void SafeBrowsingDatabaseNew::CacheHashResults( 1046 void SafeBrowsingDatabaseNew::CacheHashResults(
1055 const std::vector<SBPrefix>& prefixes, 1047 const std::vector<SBPrefix>& prefixes,
1056 const std::vector<SBFullHashResult>& full_hits) { 1048 const std::vector<SBFullHashResult>& full_hits,
1049 const base::TimeDelta& cache_lifetime) {
1050
1051 const base::Time expire_after = base::Time::Now() + cache_lifetime;
1052
1057 // This is called on the I/O thread, lock against updates. 1053 // This is called on the I/O thread, lock against updates.
1058 base::AutoLock locked(lookup_lock_); 1054 base::AutoLock locked(lookup_lock_);
1059 1055
1060 if (full_hits.empty()) { 1056 // Create or reset all cached results for these prefixes.
1061 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 1057 for (std::vector<SBPrefix>::const_iterator i = prefixes.begin();
1062 return; 1058 i != prefixes.end();
1059 ++i) {
1060 browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after);
1063 } 1061 }
1064 1062
1065 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 1063 // Insert any fullhash hits. Note that there may be one, multiple, or no
1066 // Refactor to make them identical. 1064 // fullhashes for any given entry in |prefixes|.
1067 const base::Time now = base::Time::Now(); 1065 for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin();
1068 const size_t orig_size = pending_browse_hashes_.size(); 1066 i != full_hits.end();
1069 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 1067 ++i) {
1070 iter != full_hits.end(); ++iter) { 1068 browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i);
1071 const int list_id = safe_browsing_util::GetListId(iter->list_name);
1072 if (list_id == safe_browsing_util::MALWARE ||
1073 list_id == safe_browsing_util::PHISH) {
1074 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1075 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1076 pending_browse_hashes_.push_back(add_full_hash);
1077 }
1078 } 1069 }
1079
1080 // Sort new entries then merge with the previously-sorted entries.
1081 std::vector<SBAddFullHash>::iterator
1082 orig_end = pending_browse_hashes_.begin() + orig_size;
1083 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1084 std::inplace_merge(pending_browse_hashes_.begin(),
1085 orig_end, pending_browse_hashes_.end(),
1086 SBAddFullHashPrefixLess);
1087 } 1070 }
1088 1071
1089 bool SafeBrowsingDatabaseNew::UpdateStarted( 1072 bool SafeBrowsingDatabaseNew::UpdateStarted(
1090 std::vector<SBListChunkRanges>* lists) { 1073 std::vector<SBListChunkRanges>* lists) {
1091 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1074 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1092 DCHECK(lists); 1075 DCHECK(lists);
1093 1076
1094 // If |BeginUpdate()| fails, reset the database. 1077 // If |BeginUpdate()| fails, reset the database.
1095 if (!browse_store_->BeginUpdate()) { 1078 if (!browse_store_->BeginUpdate()) {
1096 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 1079 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1117 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1135 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1118 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1136 HandleCorruptDatabase(); 1119 HandleCorruptDatabase();
1137 return false; 1120 return false;
1138 } 1121 }
1139 1122
1140 UpdateChunkRangesForLists(browse_store_.get(), 1123 UpdateChunkRangesForLists(browse_store_.get(),
1141 safe_browsing_util::kMalwareList, 1124 safe_browsing_util::kMalwareList,
1142 safe_browsing_util::kPhishingList, 1125 safe_browsing_util::kPhishingList,
1143 lists); 1126 lists);
1127 // Cached fullhash results must be cleared on every database update (whether
1128 // successful or not.)
1129 browse_gethash_cache_.clear();
1144 1130
1145 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1131 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1132 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1147 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1133 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1148 // extra data of that sort. 1134 // extra data of that sort.
1149 UpdateChunkRangesForList(download_store_.get(), 1135 UpdateChunkRangesForList(download_store_.get(),
1150 safe_browsing_util::kBinUrlList, lists); 1136 safe_browsing_util::kBinUrlList, lists);
1151 1137
1152 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1138 UpdateChunkRangesForList(csd_whitelist_store_.get(),
1153 safe_browsing_util::kCsdWhiteList, lists); 1139 safe_browsing_util::kCsdWhiteList, lists);
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
1268 UpdateIpBlacklistStore(); 1254 UpdateIpBlacklistStore();
1269 } 1255 }
1270 1256
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1257 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272 const base::FilePath& store_filename, 1258 const base::FilePath& store_filename,
1273 SafeBrowsingStore* store, 1259 SafeBrowsingStore* store,
1274 SBWhitelist* whitelist) { 1260 SBWhitelist* whitelist) {
1275 if (!store) 1261 if (!store)
1276 return; 1262 return;
1277 1263
1278 // For the whitelists, we don't cache and save full hashes since all
1279 // hashes are already full.
1280 std::vector<SBAddFullHash> empty_add_hashes;
1281
1282 // Note: |builder| will not be empty. The current data store implementation 1264 // Note: |builder| will not be empty. The current data store implementation
1283 // stores all full-length hashes as both full and prefix hashes. 1265 // stores all full-length hashes as both full and prefix hashes.
1284 safe_browsing::PrefixSetBuilder builder; 1266 safe_browsing::PrefixSetBuilder builder;
1285 std::vector<SBAddFullHash> full_hashes; 1267 std::vector<SBAddFullHash> full_hashes;
1286 if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { 1268 if (!store->FinishUpdate(&builder, &full_hashes)) {
1287 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1269 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1288 WhitelistEverything(whitelist); 1270 WhitelistEverything(whitelist);
1289 return; 1271 return;
1290 } 1272 }
1291 1273
1292 #if defined(OS_MACOSX) 1274 #if defined(OS_MACOSX)
1293 base::mac::SetFileBackupExclusion(store_filename); 1275 base::mac::SetFileBackupExclusion(store_filename);
1294 #endif 1276 #endif
1295 1277
1296 LoadWhitelist(full_hashes, whitelist); 1278 LoadWhitelist(full_hashes, whitelist);
1297 } 1279 }
1298 1280
1299 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1281 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300 const base::FilePath& store_filename, 1282 const base::FilePath& store_filename,
1301 SafeBrowsingStore* store, 1283 SafeBrowsingStore* store,
1302 FailureType failure_type) { 1284 FailureType failure_type) {
1303 // We don't cache and save full hashes.
1304 std::vector<SBAddFullHash> empty_add_hashes;
1305
1306 // These results are not used after this call. Simply ignore the 1285 // These results are not used after this call. Simply ignore the
1307 // returned value after FinishUpdate(...). 1286 // returned value after FinishUpdate(...).
1308 safe_browsing::PrefixSetBuilder builder; 1287 safe_browsing::PrefixSetBuilder builder;
1309 std::vector<SBAddFullHash> add_full_hashes_result; 1288 std::vector<SBAddFullHash> add_full_hashes_result;
1310 1289
1311 if (!store->FinishUpdate(empty_add_hashes, 1290 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) {
1312 &builder,
1313 &add_full_hashes_result)) {
1314 RecordFailure(failure_type); 1291 RecordFailure(failure_type);
1315 } 1292 }
1316 1293
1317 #if defined(OS_MACOSX) 1294 #if defined(OS_MACOSX)
1318 base::mac::SetFileBackupExclusion(store_filename); 1295 base::mac::SetFileBackupExclusion(store_filename);
1319 #endif 1296 #endif
1320 1297
1321 return GetFileSizeOrZero(store_filename); 1298 return GetFileSizeOrZero(store_filename);
1322 } 1299 }
1323 1300
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1301 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325 // Copy out the pending add hashes. Copy rather than swapping in
1326 // case |ContainsBrowseURL()| is called before the new filter is complete.
1327 std::vector<SBAddFullHash> pending_add_hashes;
1328 {
1329 base::AutoLock locked(lookup_lock_);
1330 pending_add_hashes.insert(pending_add_hashes.end(),
1331 pending_browse_hashes_.begin(),
1332 pending_browse_hashes_.end());
1333 }
1334
1335 // Measure the amount of IO during the filter build. 1302 // Measure the amount of IO during the filter build.
1336 base::IoCounters io_before, io_after; 1303 base::IoCounters io_before, io_after;
1337 base::ProcessHandle handle = base::Process::Current().handle(); 1304 base::ProcessHandle handle = base::Process::Current().handle();
1338 scoped_ptr<base::ProcessMetrics> metric( 1305 scoped_ptr<base::ProcessMetrics> metric(
1339 #if !defined(OS_MACOSX) 1306 #if !defined(OS_MACOSX)
1340 base::ProcessMetrics::CreateProcessMetrics(handle) 1307 base::ProcessMetrics::CreateProcessMetrics(handle)
1341 #else 1308 #else
1342 // Getting stats only for the current process is enough, so NULL is fine. 1309 // Getting stats only for the current process is enough, so NULL is fine.
1343 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1310 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1344 #endif 1311 #endif
1345 ); 1312 );
1346 1313
1347 // IoCounters are currently not supported on Mac, and may not be 1314 // IoCounters are currently not supported on Mac, and may not be
1348 // available for Linux, so we check the result and only show IO 1315 // available for Linux, so we check the result and only show IO
1349 // stats if they are available. 1316 // stats if they are available.
1350 const bool got_counters = metric->GetIOCounters(&io_before); 1317 const bool got_counters = metric->GetIOCounters(&io_before);
1351 1318
1352 const base::TimeTicks before = base::TimeTicks::Now(); 1319 const base::TimeTicks before = base::TimeTicks::Now();
1353 1320
1354 safe_browsing::PrefixSetBuilder builder; 1321 safe_browsing::PrefixSetBuilder builder;
1355 std::vector<SBAddFullHash> add_full_hashes; 1322 std::vector<SBAddFullHash> add_full_hashes;
1356 if (!browse_store_->FinishUpdate(pending_add_hashes, 1323 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1357 &builder, &add_full_hashes)) {
1358 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1324 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1359 return; 1325 return;
1360 } 1326 }
1361 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1327 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1362 1328
1363 // This needs to be in sorted order by prefix for efficient access. 1329 // This needs to be in sorted order by prefix for efficient access.
1364 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1330 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1365 SBAddFullHashPrefixLess); 1331 SBAddFullHashPrefixLess);
1366 1332
1367 // Swap in the newly built filter and cache. 1333 // Swap in the newly built filter and cache.
1368 { 1334 {
1369 base::AutoLock locked(lookup_lock_); 1335 base::AutoLock locked(lookup_lock_);
1370 full_browse_hashes_.swap(add_full_hashes); 1336 full_browse_hashes_.swap(add_full_hashes);
1371
1372 // TODO(shess): If |CacheHashResults()| is posted between the
1373 // earlier lock and this clear, those pending hashes will be lost.
1374 // It could be fixed by only removing hashes which were collected
1375 // at the earlier point. I believe that is fail-safe as-is (the
1376 // hash will be fetched again).
1377 pending_browse_hashes_.clear();
1378 prefix_miss_cache_.clear();
1379 browse_prefix_set_.swap(prefix_set); 1337 browse_prefix_set_.swap(prefix_set);
1380 } 1338 }
1381 1339
1382 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " 1340 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1383 << (base::TimeTicks::Now() - before).InMilliseconds() 1341 << (base::TimeTicks::Now() - before).InMilliseconds()
1384 << " ms total."; 1342 << " ms total.";
1385 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1343 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1386 1344
1387 // Persist the prefix set to disk. Since only this thread changes 1345 // Persist the prefix set to disk. Since only this thread changes
1388 // |browse_prefix_set_|, there is no need to lock. 1346 // |browse_prefix_set_|, there is no need to lock.
(...skipping 21 matching lines...) Expand all
1410 file_size = GetFileSizeOrZero(browse_filename_); 1368 file_size = GetFileSizeOrZero(browse_filename_);
1411 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1369 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1412 static_cast<int>(file_size / 1024)); 1370 static_cast<int>(file_size / 1024));
1413 1371
1414 #if defined(OS_MACOSX) 1372 #if defined(OS_MACOSX)
1415 base::mac::SetFileBackupExclusion(browse_filename_); 1373 base::mac::SetFileBackupExclusion(browse_filename_);
1416 #endif 1374 #endif
1417 } 1375 }
1418 1376
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1377 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1420 std::vector<SBAddFullHash> empty_add_hashes;
1421 safe_browsing::PrefixSetBuilder builder; 1378 safe_browsing::PrefixSetBuilder builder;
1422 std::vector<SBAddFullHash> add_full_hashes_result; 1379 std::vector<SBAddFullHash> add_full_hashes_result;
1423 1380
1424 if (!side_effect_free_whitelist_store_->FinishUpdate( 1381 if (!side_effect_free_whitelist_store_->FinishUpdate(
1425 empty_add_hashes, 1382 &builder, &add_full_hashes_result)) {
1426 &builder,
1427 &add_full_hashes_result)) {
1428 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1383 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1429 return; 1384 return;
1430 } 1385 }
1431 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1386 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1432 1387
1433 // Swap in the newly built prefix set. 1388 // Swap in the newly built prefix set.
1434 { 1389 {
1435 base::AutoLock locked(lookup_lock_); 1390 base::AutoLock locked(lookup_lock_);
1436 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1391 side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1437 } 1392 }
(...skipping 20 matching lines...) Expand all
1458 static_cast<int>(file_size / 1024)); 1413 static_cast<int>(file_size / 1024));
1459 1414
1460 #if defined(OS_MACOSX) 1415 #if defined(OS_MACOSX)
1461 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1416 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1462 base::mac::SetFileBackupExclusion( 1417 base::mac::SetFileBackupExclusion(
1463 side_effect_free_whitelist_prefix_set_filename_); 1418 side_effect_free_whitelist_prefix_set_filename_);
1464 #endif 1419 #endif
1465 } 1420 }
1466 1421
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1422 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1468 // For the IP blacklist, we don't cache and save full hashes since all
1469 // hashes are already full.
1470 std::vector<SBAddFullHash> empty_add_hashes;
1471
1472 // Note: prefixes will not be empty. The current data store implementation 1423 // Note: prefixes will not be empty. The current data store implementation
1473 // stores all full-length hashes as both full and prefix hashes. 1424 // stores all full-length hashes as both full and prefix hashes.
1474 safe_browsing::PrefixSetBuilder builder; 1425 safe_browsing::PrefixSetBuilder builder;
1475 std::vector<SBAddFullHash> full_hashes; 1426 std::vector<SBAddFullHash> full_hashes;
1476 if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, 1427 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1477 &builder, &full_hashes)) {
1478 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1428 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1479 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1429 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
1480 return; 1430 return;
1481 } 1431 }
1482 1432
1483 #if defined(OS_MACOSX) 1433 #if defined(OS_MACOSX)
1484 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); 1434 base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1485 #endif 1435 #endif
1486 1436
1487 LoadIpBlacklist(full_hashes); 1437 LoadIpBlacklist(full_hashes);
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
1683 base::AutoLock locked(lookup_lock_); 1633 base::AutoLock locked(lookup_lock_);
1684 ip_blacklist_.swap(new_blacklist); 1634 ip_blacklist_.swap(new_blacklist);
1685 } 1635 }
1686 1636
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1637 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1688 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1638 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1689 std::vector<SBFullHash> full_hashes; 1639 std::vector<SBFullHash> full_hashes;
1690 full_hashes.push_back(malware_kill_switch); 1640 full_hashes.push_back(malware_kill_switch);
1691 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1641 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1692 } 1642 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698