| Index: chrome/browser/safe_browsing/safe_browsing_store.cc
|
| diff --git a/chrome/browser/safe_browsing/safe_browsing_store.cc b/chrome/browser/safe_browsing/safe_browsing_store.cc
|
| index a942b9aedcfbee06e368ca270ce79e3b84c47bc8..d8c69311064b4415d7d9843147228f74e449814a 100644
|
| --- a/chrome/browser/safe_browsing/safe_browsing_store.cc
|
| +++ b/chrome/browser/safe_browsing/safe_browsing_store.cc
|
| @@ -7,6 +7,7 @@
|
| #include <algorithm>
|
|
|
| #include "base/logging.h"
|
| +#include "base/metrics/histogram.h"
|
|
|
| namespace {
|
|
|
| @@ -131,6 +132,49 @@ void RemoveDeleted(ItemsT* items, const base::hash_set<int32>& del_set) {
|
| items->erase(end_iter, items->end());
|
| }
|
|
|
| +// Remove prefixes which are in the same chunk as their fullhash. This was a
|
| +// mistake in earlier implementations.
|
| +template <typename HashesT, typename PrefixesT>
|
| +size_t KnockoutPrefixVolunteers(const HashesT& full_hashes,
|
| + PrefixesT* prefixes) {
|
| + typename PrefixesT::iterator prefixes_process = prefixes->begin();
|
| + typename PrefixesT::iterator prefixes_out = prefixes->begin();
|
| + typename HashesT::const_iterator hashes_process = full_hashes.begin();
|
| +
|
| + size_t skipped_count = 0;
|
| +
|
| + while (hashes_process != full_hashes.end()) {
|
| + // Scan prefixes forward until an item is not less than the current hash.
|
| + while (prefixes_process != prefixes->end() &&
|
| + SBAddPrefixLess(*prefixes_process, *hashes_process)) {
|
| + if (prefixes_process != prefixes_out) {
|
| + *prefixes_out = *prefixes_process;
|
| + }
|
| + prefixes_out++;
|
| + prefixes_process++;
|
| + }
|
| +
|
| + // If the current hash is also not less than the prefix, that implies they
|
| + // are equal. Skip the prefix.
|
| + if (prefixes_process != prefixes->end() &&
|
| + !SBAddPrefixLess(*hashes_process, *prefixes_process)) {
|
| + skipped_count++;
|
| + prefixes_process++;
|
| + }
|
| +
|
| + hashes_process++;
|
| + }
|
| +
|
| + // If any prefixes have been dropped, shift the last batch over and remove the
|
| + // trailing elements.
|
| + if (prefixes_process != prefixes_out) {
|
| + prefixes_out = std::copy(prefixes_process, prefixes->end(), prefixes_out);
|
| + prefixes->erase(prefixes_out, prefixes_process);
|
| + }
|
| +
|
| + return skipped_count;
|
| +}
|
| +
|
| } // namespace
|
|
|
| void SBProcessSubs(SBAddPrefixes* add_prefixes,
|
| @@ -154,6 +198,17 @@ void SBProcessSubs(SBAddPrefixes* add_prefixes,
|
| DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(),
|
| SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>));
|
|
|
| + // Earlier database code added prefixes when it saw fullhashes. The protocol
|
| + // should never send a chunk of mixed prefixes and fullhashes, the following
|
| + // removes any such cases which are seen.
|
| + // TODO(shess): Remove this code once most databases have been processed.
|
| + // Chunk churn should clean up anyone left over. This only takes a few ms to
|
| + // run through my current database, so it doesn't seem worthwhile to do much
|
| + // more than that.
|
| + size_t skipped = KnockoutPrefixVolunteers(*add_full_hashes, add_prefixes);
|
| + skipped += KnockoutPrefixVolunteers(*sub_full_hashes, sub_prefixes);
|
| + UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped);
|
| +
|
| // Factor out the prefix subs.
|
| SBAddPrefixes removed_adds;
|
| KnockoutSubs(sub_prefixes, add_prefixes,
|
|
|