Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_database.cc

Issue 220493003: Safebrowsing: change gethash caching to match api 2.3 rules, fix some corner cases. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <iterator> 8 #include <iterator>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
59 FILE_PATH_LITERAL(" IP Blacklist"); 59 FILE_PATH_LITERAL(" IP Blacklist");
60 60
61 // Filename suffix for browse store. 61 // Filename suffix for browse store.
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win.
63 // Unfortunately, to change the name implies lots of transition code 63 // Unfortunately, to change the name implies lots of transition code
64 // for little benefit. If/when file formats change (say to put all 64 // for little benefit. If/when file formats change (say to put all
65 // the data in one file), that would be a convenient point to rectify 65 // the data in one file), that would be a convenient point to rectify
66 // this. 66 // this.
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); 67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");
68 68
69 // The maximum staleness for a cached entry.
70 const int kMaxStalenessMinutes = 45;
71
72 // Maximum number of entries we allow in any of the whitelists. 69 // Maximum number of entries we allow in any of the whitelists.
73 // If a whitelist on disk contains more entries then all lookups to 70 // If a whitelist on disk contains more entries then all lookups to
74 // the whitelist will be considered a match. 71 // the whitelist will be considered a match.
75 const size_t kMaxWhitelistSize = 5000; 72 const size_t kMaxWhitelistSize = 5000;
76 73
77 // If the hash of this exact expression is on a whitelist then all 74 // If the hash of this exact expression is on a whitelist then all
78 // lookups to this whitelist will be considered a match. 75 // lookups to this whitelist will be considered a match.
79 const char kWhitelistKillSwitchUrl[] = 76 const char kWhitelistKillSwitchUrl[] =
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this!
81 78
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 if (prefix == iter->prefix && 175 if (prefix == iter->prefix &&
179 GetListIdBit(iter->chunk_id) == list_bit) { 176 GetListIdBit(iter->chunk_id) == list_bit) {
180 prefix_hits->push_back(prefix); 177 prefix_hits->push_back(prefix);
181 found_match = true; 178 found_match = true;
182 } 179 }
183 } 180 }
184 } 181 }
185 return found_match; 182 return found_match;
186 } 183 }
187 184
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and
189 // add them to |full_hits| if not expired. "Not expired" is when
190 // either |last_update| was recent enough, or the item has been
191 // received recently enough. Expired items are not deleted because a
192 // future update may make them acceptable again.
193 //
194 // For efficiency reasons the code walks |prefix_hits| and
195 // |full_hashes| in parallel, so they must be sorted by prefix.
196 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
197 const std::vector<SBAddFullHash>& full_hashes,
198 std::vector<SBFullHashResult>* full_hits,
199 base::Time last_update) {
200 const base::Time expire_time =
201 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);
202
203 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
204 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();
205
206 while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
207 if (*piter < hiter->full_hash.prefix) {
208 ++piter;
209 } else if (hiter->full_hash.prefix < *piter) {
210 ++hiter;
211 } else {
212 if (expire_time < last_update ||
213 expire_time.ToTimeT() < hiter->received) {
214 SBFullHashResult result;
215 const int list_bit = GetListIdBit(hiter->chunk_id);
216 DCHECK(list_bit == safe_browsing_util::MALWARE ||
217 list_bit == safe_browsing_util::PHISH);
218 const safe_browsing_util::ListType list_id =
219 static_cast<safe_browsing_util::ListType>(list_bit);
220 if (!safe_browsing_util::GetListName(list_id, &result.list_name))
221 continue;
222 result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
223 result.hash = hiter->full_hash;
224 full_hits->push_back(result);
225 }
226
227 // Only increment |hiter|, |piter| might have multiple hits.
228 ++hiter;
229 }
230 }
231 }
232
233 // This function generates a chunk range string for |chunks|. It 185 // This function generates a chunk range string for |chunks|. It
234 // outputs one chunk range string per list and writes it to the 186 // outputs one chunk range string per list and writes it to the
235 // |list_ranges| vector. We expect |list_ranges| to already be of the 187 // |list_ranges| vector. We expect |list_ranges| to already be of the
236 // right size. E.g., if |chunks| contains chunks with two different 188 // right size. E.g., if |chunks| contains chunks with two different
237 // list ids then |list_ranges| must contain two elements. 189 // list ids then |list_ranges| must contain two elements.
238 void GetChunkRanges(const std::vector<int>& chunks, 190 void GetChunkRanges(const std::vector<int>& chunks,
239 std::vector<std::string>* list_ranges) { 191 std::vector<std::string>* list_ranges) {
240 // Since there are 2 possible list ids, there must be exactly two 192 // Since there are 2 possible list ids, there must be exactly two
241 // list ranges. Even if the chunk data should only contain one 193 // list ranges. Even if the chunk data should only contain one
242 // line, this code has to somehow handle corruption. 194 // line, this code has to somehow handle corruption.
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
314 266
315 // This code always checks for non-zero file size. This helper makes 267 // This code always checks for non-zero file size. This helper makes
316 // that less verbose. 268 // that less verbose.
317 int64 GetFileSizeOrZero(const base::FilePath& file_path) { 269 int64 GetFileSizeOrZero(const base::FilePath& file_path) {
318 int64 size_64; 270 int64 size_64;
319 if (!base::GetFileSize(file_path, &size_64)) 271 if (!base::GetFileSize(file_path, &size_64))
320 return 0; 272 return 0;
321 return size_64; 273 return size_64;
322 } 274 }
323 275
324 // Used to order whitelist storage in memory.
325 bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) {
326 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0;
327 }
328
329 } // namespace 276 } // namespace
330 277
331 // The default SafeBrowsingDatabaseFactory. 278 // The default SafeBrowsingDatabaseFactory.
332 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { 279 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
333 public: 280 public:
334 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( 281 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
335 bool enable_download_protection, 282 bool enable_download_protection,
336 bool enable_client_side_whitelist, 283 bool enable_client_side_whitelist,
337 bool enable_download_whitelist, 284 bool enable_download_whitelist,
338 bool enable_extension_blacklist, 285 bool enable_extension_blacklist,
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 base::Unretained(this))); 471 base::Unretained(this)));
525 DVLOG(1) << "Init browse store: " << browse_filename_.value(); 472 DVLOG(1) << "Init browse store: " << browse_filename_.value();
526 473
527 { 474 {
528 // NOTE: There is no need to grab the lock in this function, since 475 // NOTE: There is no need to grab the lock in this function, since
529 // until it returns, there are no pointers to this class on other 476 // until it returns, there are no pointers to this class on other
530 // threads. Then again, that means there is no possibility of 477 // threads. Then again, that means there is no possibility of
531 // contention on the lock... 478 // contention on the lock...
532 base::AutoLock locked(lookup_lock_); 479 base::AutoLock locked(lookup_lock_);
533 full_browse_hashes_.clear(); 480 full_browse_hashes_.clear();
534 pending_browse_hashes_.clear(); 481 browse_gethash_cache_.clear();
535 LoadPrefixSet(); 482 LoadPrefixSet();
536 } 483 }
537 484
538 if (download_store_.get()) { 485 if (download_store_.get()) {
539 download_filename_ = DownloadDBFilename(filename_base); 486 download_filename_ = DownloadDBFilename(filename_base);
540 download_store_->Init( 487 download_store_->Init(
541 download_filename_, 488 download_filename_,
542 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, 489 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase,
543 base::Unretained(this))); 490 base::Unretained(this)));
544 DVLOG(1) << "Init download store: " << download_filename_.value(); 491 DVLOG(1) << "Init download store: " << download_filename_.value();
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
648 // Delete files on disk. 595 // Delete files on disk.
649 // TODO(shess): Hard to see where one might want to delete without a 596 // TODO(shess): Hard to see where one might want to delete without a
650 // reset. Perhaps inline |Delete()|? 597 // reset. Perhaps inline |Delete()|?
651 if (!Delete()) 598 if (!Delete())
652 return false; 599 return false;
653 600
654 // Reset objects in memory. 601 // Reset objects in memory.
655 { 602 {
656 base::AutoLock locked(lookup_lock_); 603 base::AutoLock locked(lookup_lock_);
657 full_browse_hashes_.clear(); 604 full_browse_hashes_.clear();
658 pending_browse_hashes_.clear(); 605 browse_gethash_cache_.clear();
659 prefix_miss_cache_.clear();
660 browse_prefix_set_.reset(); 606 browse_prefix_set_.reset();
661 side_effect_free_whitelist_prefix_set_.reset(); 607 side_effect_free_whitelist_prefix_set_.reset();
662 ip_blacklist_.clear(); 608 ip_blacklist_.clear();
663 } 609 }
664 // Wants to acquire the lock itself. 610 // Wants to acquire the lock itself.
665 WhitelistEverything(&csd_whitelist_); 611 WhitelistEverything(&csd_whitelist_);
666 WhitelistEverything(&download_whitelist_); 612 WhitelistEverything(&download_whitelist_);
667 return true; 613 return true;
668 } 614 }
669 615
670 // TODO(lzheng): Remove matching_list, it is not used anywhere.
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( 616 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
672 const GURL& url, 617 const GURL& url,
673 std::string* matching_list,
674 std::vector<SBPrefix>* prefix_hits, 618 std::vector<SBPrefix>* prefix_hits,
675 std::vector<SBFullHashResult>* full_hits, 619 std::vector<SBFullHashResult>* cache_hits) {
676 base::Time last_update) {
677 // Clear the results first. 620 // Clear the results first.
678 matching_list->clear();
679 prefix_hits->clear(); 621 prefix_hits->clear();
680 full_hits->clear(); 622 cache_hits->clear();
681 623
682 std::vector<SBFullHash> full_hashes; 624 std::vector<SBFullHash> full_hashes;
683 BrowseFullHashesToCheck(url, false, &full_hashes); 625 BrowseFullHashesToCheck(url, false, &full_hashes);
684 if (full_hashes.empty()) 626 if (full_hashes.empty())
685 return false; 627 return false;
686 628
629 std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess);
630
631 return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits);
632 }
633
634 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes(
635 const std::vector<SBFullHash>& full_hashes,
636 std::vector<SBPrefix>* prefix_hits,
637 std::vector<SBFullHashResult>* cache_hits) {
687 // This function is called on the I/O thread, prevent changes to 638 // This function is called on the I/O thread, prevent changes to
688 // filter and caches. 639 // filter and caches.
689 base::AutoLock locked(lookup_lock_); 640 base::AutoLock locked(lookup_lock_);
690 641
691 // |browse_prefix_set_| is empty until it is either read from disk, or the 642 // |browse_prefix_set_| is empty until it is either read from disk, or the
692 // first update populates it. Bail out without a hit if not yet 643 // first update populates it. Bail out without a hit if not yet
693 // available. 644 // available.
694 if (!browse_prefix_set_.get()) 645 if (!browse_prefix_set_.get())
695 return false; 646 return false;
696 647
697 size_t miss_count = 0; 648 const base::Time now = base::Time::Now();
649
698 for (size_t i = 0; i < full_hashes.size(); ++i) { 650 for (size_t i = 0; i < full_hashes.size(); ++i) {
699 const SBPrefix prefix = full_hashes[i].prefix; 651 const SBPrefix prefix = full_hashes[i].prefix;
700 if (browse_prefix_set_->Exists(prefix)) { 652 if (browse_prefix_set_->Exists(prefix)) {
701 prefix_hits->push_back(prefix); 653 // Stupid workaround since std::equal_range requires you to have the
702 if (prefix_miss_cache_.count(prefix) > 0) 654 // search key be the same type as the elements in the collection.
Scott Hess - ex-Googler 2014/04/01 22:08:36 My impression is that this is because while you ca
mattm 2014/04/03 01:38:12 Yeah. I had tried to hack by defining both orderin
703 ++miss_count; 655 SBAddFullHash search_value;
656 search_value.full_hash.prefix = prefix;
657
658 std::pair<std::vector<SBAddFullHash>::const_iterator,
659 std::vector<SBAddFullHash>::const_iterator>
660 db_fullhash_prefix_matches =
661 std::equal_range(full_browse_hashes_.begin(),
662 full_browse_hashes_.end(),
663 search_value,
664 SBAddFullHashPrefixLess);
665 bool match = false;
666 // If the database contains any fullhashes with the same prefix, then we
667 // only count it as a prefix hit if there is an exact fullhash match.
668 if (db_fullhash_prefix_matches.first !=
669 db_fullhash_prefix_matches.second) {
670 // Could do binary_search here, but unlikely to be enough matches to
671 // matter.
672 for (std::vector<SBAddFullHash>::const_iterator hiter =
673 db_fullhash_prefix_matches.first;
674 hiter != db_fullhash_prefix_matches.second;
675 ++hiter) {
676 if (SBFullHashEqual(hiter->full_hash, full_hashes[i])) {
677 match = true;
678 break;
679 }
680 }
681 } else {
682 // If db has only a prefix hit, that's ok too.
683 match = true;
684 }
685
686 // If we have a db match, check if we have a valid cached result for this
687 // prefix.
Scott Hess - ex-Googler 2014/04/01 22:08:36 I think maybe this is backwards. If there is a va
mattm 2014/04/03 01:38:12 Ah, good point. Could even check the cache before
688 if (match) {
689 std::map<SBPrefix, SBCachedFullHashResult>::iterator citer =
690 browse_gethash_cache_.find(prefix);
691 if (citer != browse_gethash_cache_.end()) {
692 if (now > citer->second.expire_after) {
693 // If the cached entry is expired, remove it and ignore it.
694 browse_gethash_cache_.erase(citer);
695 } else {
696 for (std::vector<SBFullHashResult>::const_iterator fiter =
697 citer->second.full_hashes.begin();
698 fiter != citer->second.full_hashes.end();
699 ++fiter) {
700 if (SBFullHashEqual(full_hashes[i], fiter->hash)) {
701 cache_hits->push_back(*fiter);
702 }
703 }
704 // If the prefix was in the cache, don't add the prefix to
705 // prefix_hits. The result will be in cache_hits (if the fullhash
706 // matched), or not (if it was a cached miss).
707 match = false;
708 }
709 }
710 }
711
712 // If there was a prefix match and no cached result, add to prefix_hits.
713 if (match) {
714 // Only add a given prefix once. Since full_hashes is sorted, we only
715 // need to check the last entry of prefix_hits.
716 if (prefix_hits->empty() || prefix_hits->back() != prefix)
717 prefix_hits->push_back(prefix);
718 }
704 } 719 }
705 } 720 }
706 721
707 // If all the prefixes are cached as 'misses', don't issue a GetHash. 722 return !prefix_hits->empty() || !cache_hits->empty();
708 if (miss_count == prefix_hits->size())
709 return false;
710
711 // Find the matching full-hash results. |full_browse_hashes_| are from the
712 // database, |pending_browse_hashes_| are from GetHash requests between
713 // updates.
714 std::sort(prefix_hits->begin(), prefix_hits->end());
715
716 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
717 full_hits, last_update);
718 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
719 full_hits, last_update);
720 return true;
721 } 723 }
722 724
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( 725 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
724 const std::vector<GURL>& urls, 726 const std::vector<GURL>& urls,
725 std::vector<SBPrefix>* prefix_hits) { 727 std::vector<SBPrefix>* prefix_hits) {
726 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 728 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
727 729
728 // Ignore this check when download checking is not enabled. 730 // Ignore this check when download checking is not enabled.
729 if (!download_store_.get()) 731 if (!download_store_.get())
730 return false; 732 return false;
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
869 store->WriteAddPrefix(encoded_chunk_id, host); 871 store->WriteAddPrefix(encoded_chunk_id, host);
870 } else if (entry->IsPrefix()) { 872 } else if (entry->IsPrefix()) {
871 // Prefixes only. 873 // Prefixes only.
872 for (int i = 0; i < count; i++) { 874 for (int i = 0; i < count; i++) {
873 const SBPrefix prefix = entry->PrefixAt(i); 875 const SBPrefix prefix = entry->PrefixAt(i);
874 STATS_COUNTER("SB.PrefixAdd", 1); 876 STATS_COUNTER("SB.PrefixAdd", 1);
875 store->WriteAddPrefix(encoded_chunk_id, prefix); 877 store->WriteAddPrefix(encoded_chunk_id, prefix);
876 } 878 }
877 } else { 879 } else {
878 // Prefixes and hashes. 880 // Prefixes and hashes.
879 const base::Time receive_time = base::Time::Now();
880 for (int i = 0; i < count; ++i) { 881 for (int i = 0; i < count; ++i) {
881 const SBFullHash full_hash = entry->FullHashAt(i); 882 const SBFullHash full_hash = entry->FullHashAt(i);
882 const SBPrefix prefix = full_hash.prefix; 883 const SBPrefix prefix = full_hash.prefix;
883 884
884 STATS_COUNTER("SB.PrefixAdd", 1); 885 STATS_COUNTER("SB.PrefixAdd", 1);
885 store->WriteAddPrefix(encoded_chunk_id, prefix); 886 store->WriteAddPrefix(encoded_chunk_id, prefix);
886 887
887 STATS_COUNTER("SB.PrefixAddFull", 1); 888 STATS_COUNTER("SB.PrefixAddFull", 1);
888 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); 889 store->WriteAddHash(encoded_chunk_id, full_hash);
889 } 890 }
890 } 891 }
891 } 892 }
892 893
893 // Helper to iterate over all the entries in the hosts in |chunks| and 894 // Helper to iterate over all the entries in the hosts in |chunks| and
894 // add them to the store. 895 // add them to the store.
895 void SafeBrowsingDatabaseNew::InsertAddChunks( 896 void SafeBrowsingDatabaseNew::InsertAddChunks(
896 const safe_browsing_util::ListType list_id, 897 const safe_browsing_util::ListType list_id,
897 const SBChunkList& chunks) { 898 const SBChunkList& chunks) {
898 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 899 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
1046 if (chunk_deletes[i].is_sub_del) 1047 if (chunk_deletes[i].is_sub_del)
1047 store->DeleteSubChunk(encoded_chunk_id); 1048 store->DeleteSubChunk(encoded_chunk_id);
1048 else 1049 else
1049 store->DeleteAddChunk(encoded_chunk_id); 1050 store->DeleteAddChunk(encoded_chunk_id);
1050 } 1051 }
1051 } 1052 }
1052 } 1053 }
1053 1054
1054 void SafeBrowsingDatabaseNew::CacheHashResults( 1055 void SafeBrowsingDatabaseNew::CacheHashResults(
1055 const std::vector<SBPrefix>& prefixes, 1056 const std::vector<SBPrefix>& prefixes,
1056 const std::vector<SBFullHashResult>& full_hits) { 1057 const std::vector<SBFullHashResult>& full_hits,
1058 const base::TimeDelta& cache_lifetime) {
1059
1060 const base::Time expire_after = base::Time::Now() + cache_lifetime;
1061
1057 // This is called on the I/O thread, lock against updates. 1062 // This is called on the I/O thread, lock against updates.
1058 base::AutoLock locked(lookup_lock_); 1063 base::AutoLock locked(lookup_lock_);
1059 1064
1060 if (full_hits.empty()) { 1065 // Create or reset all cached results for these prefixes.
1061 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); 1066 for (std::vector<SBPrefix>::const_iterator i = prefixes.begin();
1062 return; 1067 i != prefixes.end();
1068 ++i) {
1069 browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after);
1063 } 1070 }
1064 1071
1065 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. 1072 // Insert any fullhash hits. Note that there may be one, multiple, or no
1066 // Refactor to make them identical. 1073 // fullhashes for any given entry in |prefixes|.
1067 const base::Time now = base::Time::Now(); 1074 for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin();
1068 const size_t orig_size = pending_browse_hashes_.size(); 1075 i != full_hits.end();
1069 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); 1076 ++i) {
1070 iter != full_hits.end(); ++iter) { 1077 browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i);
1071 const int list_id = safe_browsing_util::GetListId(iter->list_name);
1072 if (list_id == safe_browsing_util::MALWARE ||
1073 list_id == safe_browsing_util::PHISH) {
1074 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
1075 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
1076 pending_browse_hashes_.push_back(add_full_hash);
1077 }
1078 } 1078 }
1079
1080 // Sort new entries then merge with the previously-sorted entries.
1081 std::vector<SBAddFullHash>::iterator
1082 orig_end = pending_browse_hashes_.begin() + orig_size;
1083 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
1084 std::inplace_merge(pending_browse_hashes_.begin(),
1085 orig_end, pending_browse_hashes_.end(),
1086 SBAddFullHashPrefixLess);
1087 } 1079 }
1088 1080
1089 bool SafeBrowsingDatabaseNew::UpdateStarted( 1081 bool SafeBrowsingDatabaseNew::UpdateStarted(
1090 std::vector<SBListChunkRanges>* lists) { 1082 std::vector<SBListChunkRanges>* lists) {
1091 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); 1083 DCHECK_EQ(creation_loop_, base::MessageLoop::current());
1092 DCHECK(lists); 1084 DCHECK(lists);
1093 1085
1094 // If |BeginUpdate()| fails, reset the database. 1086 // If |BeginUpdate()| fails, reset the database.
1095 if (!browse_store_->BeginUpdate()) { 1087 if (!browse_store_->BeginUpdate()) {
1096 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); 1088 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1134 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { 1126 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) {
1135 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); 1127 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN);
1136 HandleCorruptDatabase(); 1128 HandleCorruptDatabase();
1137 return false; 1129 return false;
1138 } 1130 }
1139 1131
1140 UpdateChunkRangesForLists(browse_store_.get(), 1132 UpdateChunkRangesForLists(browse_store_.get(),
1141 safe_browsing_util::kMalwareList, 1133 safe_browsing_util::kMalwareList,
1142 safe_browsing_util::kPhishingList, 1134 safe_browsing_util::kPhishingList,
1143 lists); 1135 lists);
1136 // Cached fullhash results must be cleared on every database update (whether
1137 // successful or not.)
1138 browse_gethash_cache_.clear();
Scott Hess - ex-Googler 2014/04/01 22:08:36 I think this should be cleared when the update is
mattm 2014/04/03 01:38:12 The spec says "Clients must clear cached full-leng
1144 1139
1145 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been 1140 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been
1146 // deprecated. Code to delete the list from the store shows ~15k hits/day as 1141 // deprecated. Code to delete the list from the store shows ~15k hits/day as
1147 // of Feb 2014, so it has been removed. Everything _should_ be resilient to 1142 // of Feb 2014, so it has been removed. Everything _should_ be resilient to
1148 // extra data of that sort. 1143 // extra data of that sort.
1149 UpdateChunkRangesForList(download_store_.get(), 1144 UpdateChunkRangesForList(download_store_.get(),
1150 safe_browsing_util::kBinUrlList, lists); 1145 safe_browsing_util::kBinUrlList, lists);
1151 1146
1152 UpdateChunkRangesForList(csd_whitelist_store_.get(), 1147 UpdateChunkRangesForList(csd_whitelist_store_.get(),
1153 safe_browsing_util::kCsdWhiteList, lists); 1148 safe_browsing_util::kCsdWhiteList, lists);
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
1268 UpdateIpBlacklistStore(); 1263 UpdateIpBlacklistStore();
1269 } 1264 }
1270 1265
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( 1266 void SafeBrowsingDatabaseNew::UpdateWhitelistStore(
1272 const base::FilePath& store_filename, 1267 const base::FilePath& store_filename,
1273 SafeBrowsingStore* store, 1268 SafeBrowsingStore* store,
1274 SBWhitelist* whitelist) { 1269 SBWhitelist* whitelist) {
1275 if (!store) 1270 if (!store)
1276 return; 1271 return;
1277 1272
1278 // For the whitelists, we don't cache and save full hashes since all
1279 // hashes are already full.
1280 std::vector<SBAddFullHash> empty_add_hashes;
1281
1282 // Note: |builder| will not be empty. The current data store implementation 1273 // Note: |builder| will not be empty. The current data store implementation
1283 // stores all full-length hashes as both full and prefix hashes. 1274 // stores all full-length hashes as both full and prefix hashes.
1284 safe_browsing::PrefixSetBuilder builder; 1275 safe_browsing::PrefixSetBuilder builder;
1285 std::vector<SBAddFullHash> full_hashes; 1276 std::vector<SBAddFullHash> full_hashes;
1286 if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { 1277 if (!store->FinishUpdate(&builder, &full_hashes)) {
1287 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); 1278 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH);
1288 WhitelistEverything(whitelist); 1279 WhitelistEverything(whitelist);
1289 return; 1280 return;
1290 } 1281 }
1291 1282
1292 #if defined(OS_MACOSX) 1283 #if defined(OS_MACOSX)
1293 base::mac::SetFileBackupExclusion(store_filename); 1284 base::mac::SetFileBackupExclusion(store_filename);
1294 #endif 1285 #endif
1295 1286
1296 LoadWhitelist(full_hashes, whitelist); 1287 LoadWhitelist(full_hashes, whitelist);
1297 } 1288 }
1298 1289
1299 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( 1290 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore(
1300 const base::FilePath& store_filename, 1291 const base::FilePath& store_filename,
1301 SafeBrowsingStore* store, 1292 SafeBrowsingStore* store,
1302 FailureType failure_type) { 1293 FailureType failure_type) {
1303 // We don't cache and save full hashes.
1304 std::vector<SBAddFullHash> empty_add_hashes;
1305
1306 // These results are not used after this call. Simply ignore the 1294 // These results are not used after this call. Simply ignore the
1307 // returned value after FinishUpdate(...). 1295 // returned value after FinishUpdate(...).
1308 safe_browsing::PrefixSetBuilder builder; 1296 safe_browsing::PrefixSetBuilder builder;
1309 std::vector<SBAddFullHash> add_full_hashes_result; 1297 std::vector<SBAddFullHash> add_full_hashes_result;
1310 1298
1311 if (!store->FinishUpdate(empty_add_hashes, 1299 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) {
1312 &builder,
1313 &add_full_hashes_result)) {
1314 RecordFailure(failure_type); 1300 RecordFailure(failure_type);
1315 } 1301 }
1316 1302
1317 #if defined(OS_MACOSX) 1303 #if defined(OS_MACOSX)
1318 base::mac::SetFileBackupExclusion(store_filename); 1304 base::mac::SetFileBackupExclusion(store_filename);
1319 #endif 1305 #endif
1320 1306
1321 return GetFileSizeOrZero(store_filename); 1307 return GetFileSizeOrZero(store_filename);
1322 } 1308 }
1323 1309
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { 1310 void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
1325 // Copy out the pending add hashes. Copy rather than swapping in
1326 // case |ContainsBrowseURL()| is called before the new filter is complete.
1327 std::vector<SBAddFullHash> pending_add_hashes;
1328 {
1329 base::AutoLock locked(lookup_lock_);
1330 pending_add_hashes.insert(pending_add_hashes.end(),
1331 pending_browse_hashes_.begin(),
1332 pending_browse_hashes_.end());
1333 }
1334
1335 // Measure the amount of IO during the filter build. 1311 // Measure the amount of IO during the filter build.
1336 base::IoCounters io_before, io_after; 1312 base::IoCounters io_before, io_after;
1337 base::ProcessHandle handle = base::Process::Current().handle(); 1313 base::ProcessHandle handle = base::Process::Current().handle();
1338 scoped_ptr<base::ProcessMetrics> metric( 1314 scoped_ptr<base::ProcessMetrics> metric(
1339 #if !defined(OS_MACOSX) 1315 #if !defined(OS_MACOSX)
1340 base::ProcessMetrics::CreateProcessMetrics(handle) 1316 base::ProcessMetrics::CreateProcessMetrics(handle)
1341 #else 1317 #else
1342 // Getting stats only for the current process is enough, so NULL is fine. 1318 // Getting stats only for the current process is enough, so NULL is fine.
1343 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) 1319 base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
1344 #endif 1320 #endif
1345 ); 1321 );
1346 1322
1347 // IoCounters are currently not supported on Mac, and may not be 1323 // IoCounters are currently not supported on Mac, and may not be
1348 // available for Linux, so we check the result and only show IO 1324 // available for Linux, so we check the result and only show IO
1349 // stats if they are available. 1325 // stats if they are available.
1350 const bool got_counters = metric->GetIOCounters(&io_before); 1326 const bool got_counters = metric->GetIOCounters(&io_before);
1351 1327
1352 const base::TimeTicks before = base::TimeTicks::Now(); 1328 const base::TimeTicks before = base::TimeTicks::Now();
1353 1329
1354 safe_browsing::PrefixSetBuilder builder; 1330 safe_browsing::PrefixSetBuilder builder;
1355 std::vector<SBAddFullHash> add_full_hashes; 1331 std::vector<SBAddFullHash> add_full_hashes;
1356 if (!browse_store_->FinishUpdate(pending_add_hashes, 1332 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) {
1357 &builder, &add_full_hashes)) {
1358 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); 1333 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
1359 return; 1334 return;
1360 } 1335 }
1361 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1336 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1362 1337
1363 // This needs to be in sorted order by prefix for efficient access. 1338 // This needs to be in sorted order by prefix for efficient access.
1364 std::sort(add_full_hashes.begin(), add_full_hashes.end(), 1339 std::sort(add_full_hashes.begin(), add_full_hashes.end(),
1365 SBAddFullHashPrefixLess); 1340 SBAddFullHashPrefixLess);
1366 1341
1367 // Swap in the newly built filter and cache. 1342 // Swap in the newly built filter and cache.
1368 { 1343 {
1369 base::AutoLock locked(lookup_lock_); 1344 base::AutoLock locked(lookup_lock_);
1370 full_browse_hashes_.swap(add_full_hashes); 1345 full_browse_hashes_.swap(add_full_hashes);
1371
1372 // TODO(shess): If |CacheHashResults()| is posted between the
1373 // earlier lock and this clear, those pending hashes will be lost.
1374 // It could be fixed by only removing hashes which were collected
1375 // at the earlier point. I believe that is fail-safe as-is (the
1376 // hash will be fetched again).
1377 pending_browse_hashes_.clear();
1378 prefix_miss_cache_.clear();
1379 browse_prefix_set_.swap(prefix_set); 1346 browse_prefix_set_.swap(prefix_set);
1380 } 1347 }
1381 1348
1382 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " 1349 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in "
1383 << (base::TimeTicks::Now() - before).InMilliseconds() 1350 << (base::TimeTicks::Now() - before).InMilliseconds()
1384 << " ms total."; 1351 << " ms total.";
1385 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); 1352 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before);
1386 1353
1387 // Persist the prefix set to disk. Since only this thread changes 1354 // Persist the prefix set to disk. Since only this thread changes
1388 // |browse_prefix_set_|, there is no need to lock. 1355 // |browse_prefix_set_|, there is no need to lock.
(...skipping 21 matching lines...) Expand all
1410 file_size = GetFileSizeOrZero(browse_filename_); 1377 file_size = GetFileSizeOrZero(browse_filename_);
1411 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", 1378 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
1412 static_cast<int>(file_size / 1024)); 1379 static_cast<int>(file_size / 1024));
1413 1380
1414 #if defined(OS_MACOSX) 1381 #if defined(OS_MACOSX)
1415 base::mac::SetFileBackupExclusion(browse_filename_); 1382 base::mac::SetFileBackupExclusion(browse_filename_);
1416 #endif 1383 #endif
1417 } 1384 }
1418 1385
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { 1386 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() {
1420 std::vector<SBAddFullHash> empty_add_hashes;
1421 safe_browsing::PrefixSetBuilder builder; 1387 safe_browsing::PrefixSetBuilder builder;
1422 std::vector<SBAddFullHash> add_full_hashes_result; 1388 std::vector<SBAddFullHash> add_full_hashes_result;
1423 1389
1424 if (!side_effect_free_whitelist_store_->FinishUpdate( 1390 if (!side_effect_free_whitelist_store_->FinishUpdate(
1425 empty_add_hashes, 1391 &builder, &add_full_hashes_result)) {
1426 &builder,
1427 &add_full_hashes_result)) {
1428 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); 1392 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH);
1429 return; 1393 return;
1430 } 1394 }
1431 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); 1395 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet());
1432 1396
1433 // Swap in the newly built prefix set. 1397 // Swap in the newly built prefix set.
1434 { 1398 {
1435 base::AutoLock locked(lookup_lock_); 1399 base::AutoLock locked(lookup_lock_);
1436 side_effect_free_whitelist_prefix_set_.swap(prefix_set); 1400 side_effect_free_whitelist_prefix_set_.swap(prefix_set);
1437 } 1401 }
(...skipping 20 matching lines...) Expand all
1458 static_cast<int>(file_size / 1024)); 1422 static_cast<int>(file_size / 1024));
1459 1423
1460 #if defined(OS_MACOSX) 1424 #if defined(OS_MACOSX)
1461 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); 1425 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_);
1462 base::mac::SetFileBackupExclusion( 1426 base::mac::SetFileBackupExclusion(
1463 side_effect_free_whitelist_prefix_set_filename_); 1427 side_effect_free_whitelist_prefix_set_filename_);
1464 #endif 1428 #endif
1465 } 1429 }
1466 1430
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { 1431 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() {
1468 // For the IP blacklist, we don't cache and save full hashes since all
1469 // hashes are already full.
1470 std::vector<SBAddFullHash> empty_add_hashes;
1471
1472 // Note: prefixes will not be empty. The current data store implementation 1432 // Note: prefixes will not be empty. The current data store implementation
1473 // stores all full-length hashes as both full and prefix hashes. 1433 // stores all full-length hashes as both full and prefix hashes.
1474 safe_browsing::PrefixSetBuilder builder; 1434 safe_browsing::PrefixSetBuilder builder;
1475 std::vector<SBAddFullHash> full_hashes; 1435 std::vector<SBAddFullHash> full_hashes;
1476 if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, 1436 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) {
1477 &builder, &full_hashes)) {
1478 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); 1437 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH);
1479 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. 1438 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list.
1480 return; 1439 return;
1481 } 1440 }
1482 1441
1483 #if defined(OS_MACOSX) 1442 #if defined(OS_MACOSX)
1484 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); 1443 base::mac::SetFileBackupExclusion(ip_blacklist_filename_);
1485 #endif 1444 #endif
1486 1445
1487 LoadIpBlacklist(full_hashes); 1446 LoadIpBlacklist(full_hashes);
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
1683 base::AutoLock locked(lookup_lock_); 1642 base::AutoLock locked(lookup_lock_);
1684 ip_blacklist_.swap(new_blacklist); 1643 ip_blacklist_.swap(new_blacklist);
1685 } 1644 }
1686 1645
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { 1646 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() {
1688 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); 1647 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl);
1689 std::vector<SBFullHash> full_hashes; 1648 std::vector<SBFullHash> full_hashes;
1690 full_hashes.push_back(malware_kill_switch); 1649 full_hashes.push_back(malware_kill_switch);
1691 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); 1650 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes);
1692 } 1651 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698