OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_database.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <iterator> | 8 #include <iterator> |
9 | 9 |
10 #include "base/bind.h" | 10 #include "base/bind.h" |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
59 FILE_PATH_LITERAL(" IP Blacklist"); | 59 FILE_PATH_LITERAL(" IP Blacklist"); |
60 | 60 |
61 // Filename suffix for browse store. | 61 // Filename suffix for browse store. |
62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. | 62 // TODO(shess): "Safe Browsing Bloom Prefix Set" is full of win. |
63 // Unfortunately, to change the name implies lots of transition code | 63 // Unfortunately, to change the name implies lots of transition code |
64 // for little benefit. If/when file formats change (say to put all | 64 // for little benefit. If/when file formats change (say to put all |
65 // the data in one file), that would be a convenient point to rectify | 65 // the data in one file), that would be a convenient point to rectify |
66 // this. | 66 // this. |
67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); | 67 const base::FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom"); |
68 | 68 |
69 // The maximum staleness for a cached entry. | |
70 const int kMaxStalenessMinutes = 45; | |
71 | |
72 // Maximum number of entries we allow in any of the whitelists. | 69 // Maximum number of entries we allow in any of the whitelists. |
73 // If a whitelist on disk contains more entries then all lookups to | 70 // If a whitelist on disk contains more entries then all lookups to |
74 // the whitelist will be considered a match. | 71 // the whitelist will be considered a match. |
75 const size_t kMaxWhitelistSize = 5000; | 72 const size_t kMaxWhitelistSize = 5000; |
76 | 73 |
77 // If the hash of this exact expression is on a whitelist then all | 74 // If the hash of this exact expression is on a whitelist then all |
78 // lookups to this whitelist will be considered a match. | 75 // lookups to this whitelist will be considered a match. |
79 const char kWhitelistKillSwitchUrl[] = | 76 const char kWhitelistKillSwitchUrl[] = |
80 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! | 77 "sb-ssl.google.com/safebrowsing/csd/killswitch"; // Don't change this! |
81 | 78 |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
178 if (prefix == iter->prefix && | 175 if (prefix == iter->prefix && |
179 GetListIdBit(iter->chunk_id) == list_bit) { | 176 GetListIdBit(iter->chunk_id) == list_bit) { |
180 prefix_hits->push_back(prefix); | 177 prefix_hits->push_back(prefix); |
181 found_match = true; | 178 found_match = true; |
182 } | 179 } |
183 } | 180 } |
184 } | 181 } |
185 return found_match; | 182 return found_match; |
186 } | 183 } |
187 | 184 |
188 // Find the entries in |full_hashes| with prefix in |prefix_hits|, and | |
189 // add them to |full_hits| if not expired. "Not expired" is when | |
190 // either |last_update| was recent enough, or the item has been | |
191 // received recently enough. Expired items are not deleted because a | |
192 // future update may make them acceptable again. | |
193 // | |
194 // For efficiency reasons the code walks |prefix_hits| and | |
195 // |full_hashes| in parallel, so they must be sorted by prefix. | |
196 void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits, | |
197 const std::vector<SBAddFullHash>& full_hashes, | |
198 std::vector<SBFullHashResult>* full_hits, | |
199 base::Time last_update) { | |
200 const base::Time expire_time = | |
201 base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes); | |
202 | |
203 std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin(); | |
204 std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin(); | |
205 | |
206 while (piter != prefix_hits.end() && hiter != full_hashes.end()) { | |
207 if (*piter < hiter->full_hash.prefix) { | |
208 ++piter; | |
209 } else if (hiter->full_hash.prefix < *piter) { | |
210 ++hiter; | |
211 } else { | |
212 if (expire_time < last_update || | |
213 expire_time.ToTimeT() < hiter->received) { | |
214 SBFullHashResult result; | |
215 const int list_bit = GetListIdBit(hiter->chunk_id); | |
216 DCHECK(list_bit == safe_browsing_util::MALWARE || | |
217 list_bit == safe_browsing_util::PHISH); | |
218 const safe_browsing_util::ListType list_id = | |
219 static_cast<safe_browsing_util::ListType>(list_bit); | |
220 if (!safe_browsing_util::GetListName(list_id, &result.list_name)) | |
221 continue; | |
222 result.add_chunk_id = DecodeChunkId(hiter->chunk_id); | |
223 result.hash = hiter->full_hash; | |
224 full_hits->push_back(result); | |
225 } | |
226 | |
227 // Only increment |hiter|, |piter| might have multiple hits. | |
228 ++hiter; | |
229 } | |
230 } | |
231 } | |
232 | |
233 // This function generates a chunk range string for |chunks|. It | 185 // This function generates a chunk range string for |chunks|. It |
234 // outputs one chunk range string per list and writes it to the | 186 // outputs one chunk range string per list and writes it to the |
235 // |list_ranges| vector. We expect |list_ranges| to already be of the | 187 // |list_ranges| vector. We expect |list_ranges| to already be of the |
236 // right size. E.g., if |chunks| contains chunks with two different | 188 // right size. E.g., if |chunks| contains chunks with two different |
237 // list ids then |list_ranges| must contain two elements. | 189 // list ids then |list_ranges| must contain two elements. |
238 void GetChunkRanges(const std::vector<int>& chunks, | 190 void GetChunkRanges(const std::vector<int>& chunks, |
239 std::vector<std::string>* list_ranges) { | 191 std::vector<std::string>* list_ranges) { |
240 // Since there are 2 possible list ids, there must be exactly two | 192 // Since there are 2 possible list ids, there must be exactly two |
241 // list ranges. Even if the chunk data should only contain one | 193 // list ranges. Even if the chunk data should only contain one |
242 // line, this code has to somehow handle corruption. | 194 // line, this code has to somehow handle corruption. |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
314 | 266 |
315 // This code always checks for non-zero file size. This helper makes | 267 // This code always checks for non-zero file size. This helper makes |
316 // that less verbose. | 268 // that less verbose. |
317 int64 GetFileSizeOrZero(const base::FilePath& file_path) { | 269 int64 GetFileSizeOrZero(const base::FilePath& file_path) { |
318 int64 size_64; | 270 int64 size_64; |
319 if (!base::GetFileSize(file_path, &size_64)) | 271 if (!base::GetFileSize(file_path, &size_64)) |
320 return 0; | 272 return 0; |
321 return size_64; | 273 return size_64; |
322 } | 274 } |
323 | 275 |
324 // Used to order whitelist storage in memory. | |
325 bool SBFullHashLess(const SBFullHash& a, const SBFullHash& b) { | |
326 return memcmp(a.full_hash, b.full_hash, sizeof(a.full_hash)) < 0; | |
327 } | |
328 | |
329 } // namespace | 276 } // namespace |
330 | 277 |
331 // The default SafeBrowsingDatabaseFactory. | 278 // The default SafeBrowsingDatabaseFactory. |
332 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { | 279 class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory { |
333 public: | 280 public: |
334 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( | 281 virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase( |
335 bool enable_download_protection, | 282 bool enable_download_protection, |
336 bool enable_client_side_whitelist, | 283 bool enable_client_side_whitelist, |
337 bool enable_download_whitelist, | 284 bool enable_download_whitelist, |
338 bool enable_extension_blacklist, | 285 bool enable_extension_blacklist, |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
524 base::Unretained(this))); | 471 base::Unretained(this))); |
525 DVLOG(1) << "Init browse store: " << browse_filename_.value(); | 472 DVLOG(1) << "Init browse store: " << browse_filename_.value(); |
526 | 473 |
527 { | 474 { |
528 // NOTE: There is no need to grab the lock in this function, since | 475 // NOTE: There is no need to grab the lock in this function, since |
529 // until it returns, there are no pointers to this class on other | 476 // until it returns, there are no pointers to this class on other |
530 // threads. Then again, that means there is no possibility of | 477 // threads. Then again, that means there is no possibility of |
531 // contention on the lock... | 478 // contention on the lock... |
532 base::AutoLock locked(lookup_lock_); | 479 base::AutoLock locked(lookup_lock_); |
533 full_browse_hashes_.clear(); | 480 full_browse_hashes_.clear(); |
534 pending_browse_hashes_.clear(); | 481 browse_gethash_cache_.clear(); |
535 LoadPrefixSet(); | 482 LoadPrefixSet(); |
536 } | 483 } |
537 | 484 |
538 if (download_store_.get()) { | 485 if (download_store_.get()) { |
539 download_filename_ = DownloadDBFilename(filename_base); | 486 download_filename_ = DownloadDBFilename(filename_base); |
540 download_store_->Init( | 487 download_store_->Init( |
541 download_filename_, | 488 download_filename_, |
542 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, | 489 base::Bind(&SafeBrowsingDatabaseNew::HandleCorruptDatabase, |
543 base::Unretained(this))); | 490 base::Unretained(this))); |
544 DVLOG(1) << "Init download store: " << download_filename_.value(); | 491 DVLOG(1) << "Init download store: " << download_filename_.value(); |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
648 // Delete files on disk. | 595 // Delete files on disk. |
649 // TODO(shess): Hard to see where one might want to delete without a | 596 // TODO(shess): Hard to see where one might want to delete without a |
650 // reset. Perhaps inline |Delete()|? | 597 // reset. Perhaps inline |Delete()|? |
651 if (!Delete()) | 598 if (!Delete()) |
652 return false; | 599 return false; |
653 | 600 |
654 // Reset objects in memory. | 601 // Reset objects in memory. |
655 { | 602 { |
656 base::AutoLock locked(lookup_lock_); | 603 base::AutoLock locked(lookup_lock_); |
657 full_browse_hashes_.clear(); | 604 full_browse_hashes_.clear(); |
658 pending_browse_hashes_.clear(); | 605 browse_gethash_cache_.clear(); |
659 prefix_miss_cache_.clear(); | |
660 browse_prefix_set_.reset(); | 606 browse_prefix_set_.reset(); |
661 side_effect_free_whitelist_prefix_set_.reset(); | 607 side_effect_free_whitelist_prefix_set_.reset(); |
662 ip_blacklist_.clear(); | 608 ip_blacklist_.clear(); |
663 } | 609 } |
664 // Wants to acquire the lock itself. | 610 // Wants to acquire the lock itself. |
665 WhitelistEverything(&csd_whitelist_); | 611 WhitelistEverything(&csd_whitelist_); |
666 WhitelistEverything(&download_whitelist_); | 612 WhitelistEverything(&download_whitelist_); |
667 return true; | 613 return true; |
668 } | 614 } |
669 | 615 |
670 // TODO(lzheng): Remove matching_list, it is not used anywhere. | |
671 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( | 616 bool SafeBrowsingDatabaseNew::ContainsBrowseUrl( |
672 const GURL& url, | 617 const GURL& url, |
673 std::string* matching_list, | |
674 std::vector<SBPrefix>* prefix_hits, | 618 std::vector<SBPrefix>* prefix_hits, |
675 std::vector<SBFullHashResult>* full_hits, | 619 std::vector<SBFullHashResult>* cache_hits) { |
676 base::Time last_update) { | |
677 // Clear the results first. | 620 // Clear the results first. |
678 matching_list->clear(); | |
679 prefix_hits->clear(); | 621 prefix_hits->clear(); |
680 full_hits->clear(); | 622 cache_hits->clear(); |
681 | 623 |
682 std::vector<SBFullHash> full_hashes; | 624 std::vector<SBFullHash> full_hashes; |
683 BrowseFullHashesToCheck(url, false, &full_hashes); | 625 BrowseFullHashesToCheck(url, false, &full_hashes); |
684 if (full_hashes.empty()) | 626 if (full_hashes.empty()) |
685 return false; | 627 return false; |
686 | 628 |
629 std::sort(full_hashes.begin(), full_hashes.end(), SBFullHashLess); | |
630 | |
631 return ContainsBrowseUrlHashes(full_hashes, prefix_hits, cache_hits); | |
632 } | |
633 | |
634 bool SafeBrowsingDatabaseNew::ContainsBrowseUrlHashes( | |
635 const std::vector<SBFullHash>& full_hashes, | |
636 std::vector<SBPrefix>* prefix_hits, | |
637 std::vector<SBFullHashResult>* cache_hits) { | |
687 // This function is called on the I/O thread, prevent changes to | 638 // This function is called on the I/O thread, prevent changes to |
688 // filter and caches. | 639 // filter and caches. |
689 base::AutoLock locked(lookup_lock_); | 640 base::AutoLock locked(lookup_lock_); |
690 | 641 |
691 // |browse_prefix_set_| is empty until it is either read from disk, or the | 642 // |browse_prefix_set_| is empty until it is either read from disk, or the |
692 // first update populates it. Bail out without a hit if not yet | 643 // first update populates it. Bail out without a hit if not yet |
693 // available. | 644 // available. |
694 if (!browse_prefix_set_.get()) | 645 if (!browse_prefix_set_.get()) |
695 return false; | 646 return false; |
696 | 647 |
697 size_t miss_count = 0; | 648 const base::Time now = base::Time::Now(); |
649 | |
698 for (size_t i = 0; i < full_hashes.size(); ++i) { | 650 for (size_t i = 0; i < full_hashes.size(); ++i) { |
699 const SBPrefix prefix = full_hashes[i].prefix; | 651 const SBPrefix prefix = full_hashes[i].prefix; |
700 if (browse_prefix_set_->Exists(prefix)) { | 652 if (browse_prefix_set_->Exists(prefix)) { |
701 prefix_hits->push_back(prefix); | 653 // Stupid workaround since std::equal_range requires you to have the |
702 if (prefix_miss_cache_.count(prefix) > 0) | 654 // search key be the same type as the elements in the collection. |
Scott Hess - ex-Googler
2014/04/01 22:08:36
My impression is that this is because while you ca
mattm
2014/04/03 01:38:12
Yeah. I had tried to hack by defining both orderin
| |
703 ++miss_count; | 655 SBAddFullHash search_value; |
656 search_value.full_hash.prefix = prefix; | |
657 | |
658 std::pair<std::vector<SBAddFullHash>::const_iterator, | |
659 std::vector<SBAddFullHash>::const_iterator> | |
660 db_fullhash_prefix_matches = | |
661 std::equal_range(full_browse_hashes_.begin(), | |
662 full_browse_hashes_.end(), | |
663 search_value, | |
664 SBAddFullHashPrefixLess); | |
665 bool match = false; | |
666 // If the database contains any fullhashes with the same prefix, then we | |
667 // only count it as a prefix hit if there is an exact fullhash match. | |
668 if (db_fullhash_prefix_matches.first != | |
669 db_fullhash_prefix_matches.second) { | |
670 // Could do binary_search here, but unlikely to be enough matches to | |
671 // matter. | |
672 for (std::vector<SBAddFullHash>::const_iterator hiter = | |
673 db_fullhash_prefix_matches.first; | |
674 hiter != db_fullhash_prefix_matches.second; | |
675 ++hiter) { | |
676 if (SBFullHashEqual(hiter->full_hash, full_hashes[i])) { | |
677 match = true; | |
678 break; | |
679 } | |
680 } | |
681 } else { | |
682 // If db has only a prefix hit, that's ok too. | |
683 match = true; | |
684 } | |
685 | |
686 // If we have a db match, check if we have a valid cached result for this | |
687 // prefix. | |
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think maybe this is backwards. If there is a va
mattm
2014/04/03 01:38:12
Ah, good point. Could even check the cache before
| |
688 if (match) { | |
689 std::map<SBPrefix, SBCachedFullHashResult>::iterator citer = | |
690 browse_gethash_cache_.find(prefix); | |
691 if (citer != browse_gethash_cache_.end()) { | |
692 if (now > citer->second.expire_after) { | |
693 // If the cached entry is expired, remove it and ignore it. | |
694 browse_gethash_cache_.erase(citer); | |
695 } else { | |
696 for (std::vector<SBFullHashResult>::const_iterator fiter = | |
697 citer->second.full_hashes.begin(); | |
698 fiter != citer->second.full_hashes.end(); | |
699 ++fiter) { | |
700 if (SBFullHashEqual(full_hashes[i], fiter->hash)) { | |
701 cache_hits->push_back(*fiter); | |
702 } | |
703 } | |
704 // If the prefix was in the cache, don't add the prefix to | |
705 // prefix_hits. The result will be in cache_hits (if the fullhash | |
706 // matched), or not (if it was a cached miss). | |
707 match = false; | |
708 } | |
709 } | |
710 } | |
711 | |
712 // If there was a prefix match and no cached result, add to prefix_hits. | |
713 if (match) { | |
714 // Only add a given prefix once. Since full_hashes is sorted, we only | |
715 // need to check the last entry of prefix_hits. | |
716 if (prefix_hits->empty() || prefix_hits->back() != prefix) | |
717 prefix_hits->push_back(prefix); | |
718 } | |
704 } | 719 } |
705 } | 720 } |
706 | 721 |
707 // If all the prefixes are cached as 'misses', don't issue a GetHash. | 722 return !prefix_hits->empty() || !cache_hits->empty(); |
708 if (miss_count == prefix_hits->size()) | |
709 return false; | |
710 | |
711 // Find the matching full-hash results. |full_browse_hashes_| are from the | |
712 // database, |pending_browse_hashes_| are from GetHash requests between | |
713 // updates. | |
714 std::sort(prefix_hits->begin(), prefix_hits->end()); | |
715 | |
716 GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_, | |
717 full_hits, last_update); | |
718 GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_, | |
719 full_hits, last_update); | |
720 return true; | |
721 } | 723 } |
722 | 724 |
723 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( | 725 bool SafeBrowsingDatabaseNew::ContainsDownloadUrl( |
724 const std::vector<GURL>& urls, | 726 const std::vector<GURL>& urls, |
725 std::vector<SBPrefix>* prefix_hits) { | 727 std::vector<SBPrefix>* prefix_hits) { |
726 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); | 728 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); |
727 | 729 |
728 // Ignore this check when download checking is not enabled. | 730 // Ignore this check when download checking is not enabled. |
729 if (!download_store_.get()) | 731 if (!download_store_.get()) |
730 return false; | 732 return false; |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
869 store->WriteAddPrefix(encoded_chunk_id, host); | 871 store->WriteAddPrefix(encoded_chunk_id, host); |
870 } else if (entry->IsPrefix()) { | 872 } else if (entry->IsPrefix()) { |
871 // Prefixes only. | 873 // Prefixes only. |
872 for (int i = 0; i < count; i++) { | 874 for (int i = 0; i < count; i++) { |
873 const SBPrefix prefix = entry->PrefixAt(i); | 875 const SBPrefix prefix = entry->PrefixAt(i); |
874 STATS_COUNTER("SB.PrefixAdd", 1); | 876 STATS_COUNTER("SB.PrefixAdd", 1); |
875 store->WriteAddPrefix(encoded_chunk_id, prefix); | 877 store->WriteAddPrefix(encoded_chunk_id, prefix); |
876 } | 878 } |
877 } else { | 879 } else { |
878 // Prefixes and hashes. | 880 // Prefixes and hashes. |
879 const base::Time receive_time = base::Time::Now(); | |
880 for (int i = 0; i < count; ++i) { | 881 for (int i = 0; i < count; ++i) { |
881 const SBFullHash full_hash = entry->FullHashAt(i); | 882 const SBFullHash full_hash = entry->FullHashAt(i); |
882 const SBPrefix prefix = full_hash.prefix; | 883 const SBPrefix prefix = full_hash.prefix; |
883 | 884 |
884 STATS_COUNTER("SB.PrefixAdd", 1); | 885 STATS_COUNTER("SB.PrefixAdd", 1); |
885 store->WriteAddPrefix(encoded_chunk_id, prefix); | 886 store->WriteAddPrefix(encoded_chunk_id, prefix); |
886 | 887 |
887 STATS_COUNTER("SB.PrefixAddFull", 1); | 888 STATS_COUNTER("SB.PrefixAddFull", 1); |
888 store->WriteAddHash(encoded_chunk_id, receive_time, full_hash); | 889 store->WriteAddHash(encoded_chunk_id, full_hash); |
889 } | 890 } |
890 } | 891 } |
891 } | 892 } |
892 | 893 |
893 // Helper to iterate over all the entries in the hosts in |chunks| and | 894 // Helper to iterate over all the entries in the hosts in |chunks| and |
894 // add them to the store. | 895 // add them to the store. |
895 void SafeBrowsingDatabaseNew::InsertAddChunks( | 896 void SafeBrowsingDatabaseNew::InsertAddChunks( |
896 const safe_browsing_util::ListType list_id, | 897 const safe_browsing_util::ListType list_id, |
897 const SBChunkList& chunks) { | 898 const SBChunkList& chunks) { |
898 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); | 899 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1046 if (chunk_deletes[i].is_sub_del) | 1047 if (chunk_deletes[i].is_sub_del) |
1047 store->DeleteSubChunk(encoded_chunk_id); | 1048 store->DeleteSubChunk(encoded_chunk_id); |
1048 else | 1049 else |
1049 store->DeleteAddChunk(encoded_chunk_id); | 1050 store->DeleteAddChunk(encoded_chunk_id); |
1050 } | 1051 } |
1051 } | 1052 } |
1052 } | 1053 } |
1053 | 1054 |
1054 void SafeBrowsingDatabaseNew::CacheHashResults( | 1055 void SafeBrowsingDatabaseNew::CacheHashResults( |
1055 const std::vector<SBPrefix>& prefixes, | 1056 const std::vector<SBPrefix>& prefixes, |
1056 const std::vector<SBFullHashResult>& full_hits) { | 1057 const std::vector<SBFullHashResult>& full_hits, |
1058 const base::TimeDelta& cache_lifetime) { | |
1059 | |
1060 const base::Time expire_after = base::Time::Now() + cache_lifetime; | |
1061 | |
1057 // This is called on the I/O thread, lock against updates. | 1062 // This is called on the I/O thread, lock against updates. |
1058 base::AutoLock locked(lookup_lock_); | 1063 base::AutoLock locked(lookup_lock_); |
1059 | 1064 |
1060 if (full_hits.empty()) { | 1065 // Create or reset all cached results for these prefixes. |
1061 prefix_miss_cache_.insert(prefixes.begin(), prefixes.end()); | 1066 for (std::vector<SBPrefix>::const_iterator i = prefixes.begin(); |
1062 return; | 1067 i != prefixes.end(); |
1068 ++i) { | |
1069 browse_gethash_cache_[*i] = SBCachedFullHashResult(expire_after); | |
1063 } | 1070 } |
1064 | 1071 |
1065 // TODO(shess): SBFullHashResult and SBAddFullHash are very similar. | 1072 // Insert any fullhash hits. Note that there may be one, multiple, or no |
1066 // Refactor to make them identical. | 1073 // fullhashes for any given entry in |prefixes|. |
1067 const base::Time now = base::Time::Now(); | 1074 for (std::vector<SBFullHashResult>::const_iterator i = full_hits.begin(); |
1068 const size_t orig_size = pending_browse_hashes_.size(); | 1075 i != full_hits.end(); |
1069 for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin(); | 1076 ++i) { |
1070 iter != full_hits.end(); ++iter) { | 1077 browse_gethash_cache_[i->hash.prefix].full_hashes.push_back(*i); |
1071 const int list_id = safe_browsing_util::GetListId(iter->list_name); | |
1072 if (list_id == safe_browsing_util::MALWARE || | |
1073 list_id == safe_browsing_util::PHISH) { | |
1074 int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id); | |
1075 SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash); | |
1076 pending_browse_hashes_.push_back(add_full_hash); | |
1077 } | |
1078 } | 1078 } |
1079 | |
1080 // Sort new entries then merge with the previously-sorted entries. | |
1081 std::vector<SBAddFullHash>::iterator | |
1082 orig_end = pending_browse_hashes_.begin() + orig_size; | |
1083 std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess); | |
1084 std::inplace_merge(pending_browse_hashes_.begin(), | |
1085 orig_end, pending_browse_hashes_.end(), | |
1086 SBAddFullHashPrefixLess); | |
1087 } | 1079 } |
1088 | 1080 |
1089 bool SafeBrowsingDatabaseNew::UpdateStarted( | 1081 bool SafeBrowsingDatabaseNew::UpdateStarted( |
1090 std::vector<SBListChunkRanges>* lists) { | 1082 std::vector<SBListChunkRanges>* lists) { |
1091 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); | 1083 DCHECK_EQ(creation_loop_, base::MessageLoop::current()); |
1092 DCHECK(lists); | 1084 DCHECK(lists); |
1093 | 1085 |
1094 // If |BeginUpdate()| fails, reset the database. | 1086 // If |BeginUpdate()| fails, reset the database. |
1095 if (!browse_store_->BeginUpdate()) { | 1087 if (!browse_store_->BeginUpdate()) { |
1096 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); | 1088 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN); |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1134 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { | 1126 if (ip_blacklist_store_ && !ip_blacklist_store_->BeginUpdate()) { |
1135 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); | 1127 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_BEGIN); |
1136 HandleCorruptDatabase(); | 1128 HandleCorruptDatabase(); |
1137 return false; | 1129 return false; |
1138 } | 1130 } |
1139 | 1131 |
1140 UpdateChunkRangesForLists(browse_store_.get(), | 1132 UpdateChunkRangesForLists(browse_store_.get(), |
1141 safe_browsing_util::kMalwareList, | 1133 safe_browsing_util::kMalwareList, |
1142 safe_browsing_util::kPhishingList, | 1134 safe_browsing_util::kPhishingList, |
1143 lists); | 1135 lists); |
1136 // Cached fullhash results must be cleared on every database update (whether | |
1137 // successful or not.) | |
1138 browse_gethash_cache_.clear(); | |
Scott Hess - ex-Googler
2014/04/01 22:08:36
I think this should be cleared when the update is
mattm
2014/04/03 01:38:12
The spec says "Clients must clear cached full-leng
| |
1144 | 1139 |
1145 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been | 1140 // NOTE(shess): |download_store_| used to contain kBinHashList, which has been |
1146 // deprecated. Code to delete the list from the store shows ~15k hits/day as | 1141 // deprecated. Code to delete the list from the store shows ~15k hits/day as |
1147 // of Feb 2014, so it has been removed. Everything _should_ be resilient to | 1142 // of Feb 2014, so it has been removed. Everything _should_ be resilient to |
1148 // extra data of that sort. | 1143 // extra data of that sort. |
1149 UpdateChunkRangesForList(download_store_.get(), | 1144 UpdateChunkRangesForList(download_store_.get(), |
1150 safe_browsing_util::kBinUrlList, lists); | 1145 safe_browsing_util::kBinUrlList, lists); |
1151 | 1146 |
1152 UpdateChunkRangesForList(csd_whitelist_store_.get(), | 1147 UpdateChunkRangesForList(csd_whitelist_store_.get(), |
1153 safe_browsing_util::kCsdWhiteList, lists); | 1148 safe_browsing_util::kCsdWhiteList, lists); |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1268 UpdateIpBlacklistStore(); | 1263 UpdateIpBlacklistStore(); |
1269 } | 1264 } |
1270 | 1265 |
1271 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( | 1266 void SafeBrowsingDatabaseNew::UpdateWhitelistStore( |
1272 const base::FilePath& store_filename, | 1267 const base::FilePath& store_filename, |
1273 SafeBrowsingStore* store, | 1268 SafeBrowsingStore* store, |
1274 SBWhitelist* whitelist) { | 1269 SBWhitelist* whitelist) { |
1275 if (!store) | 1270 if (!store) |
1276 return; | 1271 return; |
1277 | 1272 |
1278 // For the whitelists, we don't cache and save full hashes since all | |
1279 // hashes are already full. | |
1280 std::vector<SBAddFullHash> empty_add_hashes; | |
1281 | |
1282 // Note: |builder| will not be empty. The current data store implementation | 1273 // Note: |builder| will not be empty. The current data store implementation |
1283 // stores all full-length hashes as both full and prefix hashes. | 1274 // stores all full-length hashes as both full and prefix hashes. |
1284 safe_browsing::PrefixSetBuilder builder; | 1275 safe_browsing::PrefixSetBuilder builder; |
1285 std::vector<SBAddFullHash> full_hashes; | 1276 std::vector<SBAddFullHash> full_hashes; |
1286 if (!store->FinishUpdate(empty_add_hashes, &builder, &full_hashes)) { | 1277 if (!store->FinishUpdate(&builder, &full_hashes)) { |
1287 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); | 1278 RecordFailure(FAILURE_WHITELIST_DATABASE_UPDATE_FINISH); |
1288 WhitelistEverything(whitelist); | 1279 WhitelistEverything(whitelist); |
1289 return; | 1280 return; |
1290 } | 1281 } |
1291 | 1282 |
1292 #if defined(OS_MACOSX) | 1283 #if defined(OS_MACOSX) |
1293 base::mac::SetFileBackupExclusion(store_filename); | 1284 base::mac::SetFileBackupExclusion(store_filename); |
1294 #endif | 1285 #endif |
1295 | 1286 |
1296 LoadWhitelist(full_hashes, whitelist); | 1287 LoadWhitelist(full_hashes, whitelist); |
1297 } | 1288 } |
1298 | 1289 |
1299 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( | 1290 int64 SafeBrowsingDatabaseNew::UpdateHashPrefixStore( |
1300 const base::FilePath& store_filename, | 1291 const base::FilePath& store_filename, |
1301 SafeBrowsingStore* store, | 1292 SafeBrowsingStore* store, |
1302 FailureType failure_type) { | 1293 FailureType failure_type) { |
1303 // We don't cache and save full hashes. | |
1304 std::vector<SBAddFullHash> empty_add_hashes; | |
1305 | |
1306 // These results are not used after this call. Simply ignore the | 1294 // These results are not used after this call. Simply ignore the |
1307 // returned value after FinishUpdate(...). | 1295 // returned value after FinishUpdate(...). |
1308 safe_browsing::PrefixSetBuilder builder; | 1296 safe_browsing::PrefixSetBuilder builder; |
1309 std::vector<SBAddFullHash> add_full_hashes_result; | 1297 std::vector<SBAddFullHash> add_full_hashes_result; |
1310 | 1298 |
1311 if (!store->FinishUpdate(empty_add_hashes, | 1299 if (!store->FinishUpdate(&builder, &add_full_hashes_result)) { |
1312 &builder, | |
1313 &add_full_hashes_result)) { | |
1314 RecordFailure(failure_type); | 1300 RecordFailure(failure_type); |
1315 } | 1301 } |
1316 | 1302 |
1317 #if defined(OS_MACOSX) | 1303 #if defined(OS_MACOSX) |
1318 base::mac::SetFileBackupExclusion(store_filename); | 1304 base::mac::SetFileBackupExclusion(store_filename); |
1319 #endif | 1305 #endif |
1320 | 1306 |
1321 return GetFileSizeOrZero(store_filename); | 1307 return GetFileSizeOrZero(store_filename); |
1322 } | 1308 } |
1323 | 1309 |
1324 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { | 1310 void SafeBrowsingDatabaseNew::UpdateBrowseStore() { |
1325 // Copy out the pending add hashes. Copy rather than swapping in | |
1326 // case |ContainsBrowseURL()| is called before the new filter is complete. | |
1327 std::vector<SBAddFullHash> pending_add_hashes; | |
1328 { | |
1329 base::AutoLock locked(lookup_lock_); | |
1330 pending_add_hashes.insert(pending_add_hashes.end(), | |
1331 pending_browse_hashes_.begin(), | |
1332 pending_browse_hashes_.end()); | |
1333 } | |
1334 | |
1335 // Measure the amount of IO during the filter build. | 1311 // Measure the amount of IO during the filter build. |
1336 base::IoCounters io_before, io_after; | 1312 base::IoCounters io_before, io_after; |
1337 base::ProcessHandle handle = base::Process::Current().handle(); | 1313 base::ProcessHandle handle = base::Process::Current().handle(); |
1338 scoped_ptr<base::ProcessMetrics> metric( | 1314 scoped_ptr<base::ProcessMetrics> metric( |
1339 #if !defined(OS_MACOSX) | 1315 #if !defined(OS_MACOSX) |
1340 base::ProcessMetrics::CreateProcessMetrics(handle) | 1316 base::ProcessMetrics::CreateProcessMetrics(handle) |
1341 #else | 1317 #else |
1342 // Getting stats only for the current process is enough, so NULL is fine. | 1318 // Getting stats only for the current process is enough, so NULL is fine. |
1343 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) | 1319 base::ProcessMetrics::CreateProcessMetrics(handle, NULL) |
1344 #endif | 1320 #endif |
1345 ); | 1321 ); |
1346 | 1322 |
1347 // IoCounters are currently not supported on Mac, and may not be | 1323 // IoCounters are currently not supported on Mac, and may not be |
1348 // available for Linux, so we check the result and only show IO | 1324 // available for Linux, so we check the result and only show IO |
1349 // stats if they are available. | 1325 // stats if they are available. |
1350 const bool got_counters = metric->GetIOCounters(&io_before); | 1326 const bool got_counters = metric->GetIOCounters(&io_before); |
1351 | 1327 |
1352 const base::TimeTicks before = base::TimeTicks::Now(); | 1328 const base::TimeTicks before = base::TimeTicks::Now(); |
1353 | 1329 |
1354 safe_browsing::PrefixSetBuilder builder; | 1330 safe_browsing::PrefixSetBuilder builder; |
1355 std::vector<SBAddFullHash> add_full_hashes; | 1331 std::vector<SBAddFullHash> add_full_hashes; |
1356 if (!browse_store_->FinishUpdate(pending_add_hashes, | 1332 if (!browse_store_->FinishUpdate(&builder, &add_full_hashes)) { |
1357 &builder, &add_full_hashes)) { | |
1358 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); | 1333 RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH); |
1359 return; | 1334 return; |
1360 } | 1335 } |
1361 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); | 1336 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); |
1362 | 1337 |
1363 // This needs to be in sorted order by prefix for efficient access. | 1338 // This needs to be in sorted order by prefix for efficient access. |
1364 std::sort(add_full_hashes.begin(), add_full_hashes.end(), | 1339 std::sort(add_full_hashes.begin(), add_full_hashes.end(), |
1365 SBAddFullHashPrefixLess); | 1340 SBAddFullHashPrefixLess); |
1366 | 1341 |
1367 // Swap in the newly built filter and cache. | 1342 // Swap in the newly built filter and cache. |
1368 { | 1343 { |
1369 base::AutoLock locked(lookup_lock_); | 1344 base::AutoLock locked(lookup_lock_); |
1370 full_browse_hashes_.swap(add_full_hashes); | 1345 full_browse_hashes_.swap(add_full_hashes); |
1371 | |
1372 // TODO(shess): If |CacheHashResults()| is posted between the | |
1373 // earlier lock and this clear, those pending hashes will be lost. | |
1374 // It could be fixed by only removing hashes which were collected | |
1375 // at the earlier point. I believe that is fail-safe as-is (the | |
1376 // hash will be fetched again). | |
1377 pending_browse_hashes_.clear(); | |
1378 prefix_miss_cache_.clear(); | |
1379 browse_prefix_set_.swap(prefix_set); | 1346 browse_prefix_set_.swap(prefix_set); |
1380 } | 1347 } |
1381 | 1348 |
1382 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " | 1349 DVLOG(1) << "SafeBrowsingDatabaseImpl built prefix set in " |
1383 << (base::TimeTicks::Now() - before).InMilliseconds() | 1350 << (base::TimeTicks::Now() - before).InMilliseconds() |
1384 << " ms total."; | 1351 << " ms total."; |
1385 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); | 1352 UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", base::TimeTicks::Now() - before); |
1386 | 1353 |
1387 // Persist the prefix set to disk. Since only this thread changes | 1354 // Persist the prefix set to disk. Since only this thread changes |
1388 // |browse_prefix_set_|, there is no need to lock. | 1355 // |browse_prefix_set_|, there is no need to lock. |
(...skipping 21 matching lines...) Expand all Loading... | |
1410 file_size = GetFileSizeOrZero(browse_filename_); | 1377 file_size = GetFileSizeOrZero(browse_filename_); |
1411 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", | 1378 UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes", |
1412 static_cast<int>(file_size / 1024)); | 1379 static_cast<int>(file_size / 1024)); |
1413 | 1380 |
1414 #if defined(OS_MACOSX) | 1381 #if defined(OS_MACOSX) |
1415 base::mac::SetFileBackupExclusion(browse_filename_); | 1382 base::mac::SetFileBackupExclusion(browse_filename_); |
1416 #endif | 1383 #endif |
1417 } | 1384 } |
1418 | 1385 |
1419 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { | 1386 void SafeBrowsingDatabaseNew::UpdateSideEffectFreeWhitelistStore() { |
1420 std::vector<SBAddFullHash> empty_add_hashes; | |
1421 safe_browsing::PrefixSetBuilder builder; | 1387 safe_browsing::PrefixSetBuilder builder; |
1422 std::vector<SBAddFullHash> add_full_hashes_result; | 1388 std::vector<SBAddFullHash> add_full_hashes_result; |
1423 | 1389 |
1424 if (!side_effect_free_whitelist_store_->FinishUpdate( | 1390 if (!side_effect_free_whitelist_store_->FinishUpdate( |
1425 empty_add_hashes, | 1391 &builder, &add_full_hashes_result)) { |
1426 &builder, | |
1427 &add_full_hashes_result)) { | |
1428 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); | 1392 RecordFailure(FAILURE_SIDE_EFFECT_FREE_WHITELIST_UPDATE_FINISH); |
1429 return; | 1393 return; |
1430 } | 1394 } |
1431 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); | 1395 scoped_ptr<safe_browsing::PrefixSet> prefix_set(builder.GetPrefixSet()); |
1432 | 1396 |
1433 // Swap in the newly built prefix set. | 1397 // Swap in the newly built prefix set. |
1434 { | 1398 { |
1435 base::AutoLock locked(lookup_lock_); | 1399 base::AutoLock locked(lookup_lock_); |
1436 side_effect_free_whitelist_prefix_set_.swap(prefix_set); | 1400 side_effect_free_whitelist_prefix_set_.swap(prefix_set); |
1437 } | 1401 } |
(...skipping 20 matching lines...) Expand all Loading... | |
1458 static_cast<int>(file_size / 1024)); | 1422 static_cast<int>(file_size / 1024)); |
1459 | 1423 |
1460 #if defined(OS_MACOSX) | 1424 #if defined(OS_MACOSX) |
1461 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); | 1425 base::mac::SetFileBackupExclusion(side_effect_free_whitelist_filename_); |
1462 base::mac::SetFileBackupExclusion( | 1426 base::mac::SetFileBackupExclusion( |
1463 side_effect_free_whitelist_prefix_set_filename_); | 1427 side_effect_free_whitelist_prefix_set_filename_); |
1464 #endif | 1428 #endif |
1465 } | 1429 } |
1466 | 1430 |
1467 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { | 1431 void SafeBrowsingDatabaseNew::UpdateIpBlacklistStore() { |
1468 // For the IP blacklist, we don't cache and save full hashes since all | |
1469 // hashes are already full. | |
1470 std::vector<SBAddFullHash> empty_add_hashes; | |
1471 | |
1472 // Note: prefixes will not be empty. The current data store implementation | 1432 // Note: prefixes will not be empty. The current data store implementation |
1473 // stores all full-length hashes as both full and prefix hashes. | 1433 // stores all full-length hashes as both full and prefix hashes. |
1474 safe_browsing::PrefixSetBuilder builder; | 1434 safe_browsing::PrefixSetBuilder builder; |
1475 std::vector<SBAddFullHash> full_hashes; | 1435 std::vector<SBAddFullHash> full_hashes; |
1476 if (!ip_blacklist_store_->FinishUpdate(empty_add_hashes, | 1436 if (!ip_blacklist_store_->FinishUpdate(&builder, &full_hashes)) { |
1477 &builder, &full_hashes)) { | |
1478 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); | 1437 RecordFailure(FAILURE_IP_BLACKLIST_UPDATE_FINISH); |
1479 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. | 1438 LoadIpBlacklist(std::vector<SBAddFullHash>()); // Clear the list. |
1480 return; | 1439 return; |
1481 } | 1440 } |
1482 | 1441 |
1483 #if defined(OS_MACOSX) | 1442 #if defined(OS_MACOSX) |
1484 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); | 1443 base::mac::SetFileBackupExclusion(ip_blacklist_filename_); |
1485 #endif | 1444 #endif |
1486 | 1445 |
1487 LoadIpBlacklist(full_hashes); | 1446 LoadIpBlacklist(full_hashes); |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1683 base::AutoLock locked(lookup_lock_); | 1642 base::AutoLock locked(lookup_lock_); |
1684 ip_blacklist_.swap(new_blacklist); | 1643 ip_blacklist_.swap(new_blacklist); |
1685 } | 1644 } |
1686 | 1645 |
1687 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { | 1646 bool SafeBrowsingDatabaseNew::IsMalwareIPMatchKillSwitchOn() { |
1688 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); | 1647 SBFullHash malware_kill_switch = SBFullHashForString(kMalwareIPKillSwitchUrl); |
1689 std::vector<SBFullHash> full_hashes; | 1648 std::vector<SBFullHash> full_hashes; |
1690 full_hashes.push_back(malware_kill_switch); | 1649 full_hashes.push_back(malware_kill_switch); |
1691 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); | 1650 return ContainsWhitelistedHashes(csd_whitelist_, full_hashes); |
1692 } | 1651 } |
OLD | NEW |