chrome/browser/safe_browsing/prefix_set.cc - Issue 6286072: PrefixSet as an alternate to BloomFilter for safe-browsing.

Side by Side Diff: chrome/browser/safe_browsing/prefix_set.cc

Issue 6286072: PrefixSet as an alternate to BloomFilter for safe-browsing. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix windows build. Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/safe_browsing/prefix_set.h ('K') | « chrome/browser/safe_browsing/prefix_set.h ('k') | chrome/browser/safe_browsing/prefix_set_unittest.cc » ('j') | chrome/browser/safe_browsing/safe_browsing_database.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/safe_browsing/prefix_set.h"

	6

	7 #include <algorithm>

	8 #include <math.h>

	9

	10 #include "base/logging.h"

	11 #include "base/metrics/histogram.h"

	12

	13 namespace {

	14

	15 // For \|std::upper_bound()\| to find a prefix w/in a vector of pairs.

	16 bool PrefixLess(const std::pair<SBPrefix,size_t>& a,

	17 const std::pair<SBPrefix,size_t>& b) {

	18 return a.first < b.first;

	19 }

	20

	21 } // namespace

	22

	23 namespace safe_browsing {

	24

	25 PrefixSet::PrefixSet(const std::vector<SBPrefix>& prefixes) {

	26 if (prefixes.size()) {

	27 std::vector<SBPrefix> sorted(prefixes.begin(), prefixes.end());

	28 std::sort(sorted.begin(), sorted.end());

	29

	30 // Lead with the first prefix.

	31 SBPrefix prev_prefix = sorted[0];

	32 size_t run_length = 0;

	33 index_.push_back(std::make_pair(prev_prefix, deltas_.size()));

	34

	35 for (size_t i = 1; i < sorted.size(); ++i) {

	36 // Don't encode zero deltas.

	37 if (sorted[i] == prev_prefix)

	38 continue;

	39

	40 // Calculate the delta. \|unsigned\| is mandatory, because the

	41 // prefixes could be more than INT_MAX apart.

	42 unsigned delta = sorted[i] - prev_prefix;
	lzheng 2011/02/05 01:03:42 unsigned int32? unsigned int32? Scott Hess - ex-Googler 2011/02/07 19:20:23 Why? The uint16 in the deltas is needed because t Show quoted text On 2011/02/05 01:03:42, lzheng wrote: > unsigned int32? Why? The uint16 in the deltas is needed because that's the entire point of the structure. But this value can be 32-bit or 64-bit depending on platform with no problem. uint32 suffices because SBPrefix is an int32, but that's not hugely relevant. [Unless I'm missing something, of course!] lzheng 2011/02/08 01:18:12 I was thinking to use uint32 because prefixes are I was thinking to use uint32 because prefixes are 4bytes, but it is true that 64bits has no problem. On 2011/02/07 19:20:23, shess wrote: Show quoted text > On 2011/02/05 01:03:42, lzheng wrote: > > unsigned int32? > > Why? The uint16 in the deltas is needed because that's the entire point of the > structure. But this value can be 32-bit or 64-bit depending on platform with no > problem. uint32 suffices because SBPrefix is an int32, but that's not hugely > relevant. > > [Unless I'm missing something, of course!]
	43

	44 // New index ref if the delta is too wide, or if too many

	45 // consecutive deltas have been encoded. Note that

	46 // \|kMaxDelta\| cannot itself be encoded.

	47 if (delta >= kMaxDelta \|\| run_length >= kMaxRun) {

	48 index_.push_back(std::make_pair(sorted[i], deltas_.size()));

	49 run_length = 0;

	50 } else {

	51 // Continue the run of deltas.

	52 deltas_.push_back(static_cast<uint16>(delta));

	53 ++run_length;

	54 }

	55

	56 prev_prefix = sorted[i];

	57 }

	58

	59 // Send up some memory-usage stats. Bits because fractional bytes

	60 // are weird.

	61 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +

	62 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;

	63 const size_t unique_prefixes = index_.size() + deltas_.size();

	64 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;

	65 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",

	66 bits_used / unique_prefixes,

	67 kMaxBitsPerPrefix);

	68 }

	69 }

	70

	71 bool PrefixSet::Exists(SBPrefix prefix) const {

	72 if (index_.empty())

	73 return false;

	74

	75 // Find the first position after \|prefix\| in \|index_\|.

	76 std::vector<std::pair<SBPrefix,size_t> >::const_iterator

	77 iter = std::upper_bound(index_.begin(), index_.end(),

	78 std::pair<SBPrefix,size_t>(prefix, 0),

	79 PrefixLess);

	80

	81 // \|prefix\| comes before anything that's in the set.

	82 if (iter == index_.begin())

	83 return false;

	84

	85 // Capture the upper bound of our target entry's deltas.

	86 const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second);

	87

	88 // Back up to the entry our target is in.

	89 --iter;

	90

	91 // All prefixes in \|index_\| are in the set.

	92 SBPrefix current = iter->first;

	93 if (current == prefix)

	94 return true;

	95

	96 // Scan forward accumulating deltas while a match is possible.

	97 for (size_t di = iter->second; di < bound && current < prefix; ++di) {

	98 current += deltas_[di];

	99 }

	100

	101 return current == prefix;

	102 }

	103

	104 } // namespace safe_browsing

OLD	NEW