Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(247)

Side by Side Diff: components/rappor/bloom_filter.cc

Issue 1090683003: Alternative Multi-dimensional Rappor (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Address comments Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/rappor/bloom_filter.h" 5 #include "components/rappor/bloom_filter.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "third_party/smhasher/src/City.h" 8 #include "third_party/smhasher/src/City.h"
9 9
10 namespace rappor { 10 namespace rappor {
11 11
12 namespace {
13
14 uint32_t ComputeHash(const std::string& str, uint32_t seed) {
15 // Using CityHash here because we have support for it in Dremel. Many hash
16 // functions, such as MD5, SHA1, or Murmur, would probably also work.
17 uint32_t index =
18 CityHash64WithSeed(str.data(), str.size(), seed);
19 return index;
Alexei Svitkine (slow) 2015/04/24 18:10:01 Nit: Might as well return it directly.
Steven Holte 2015/04/24 18:39:57 Done.
20 }
21
22 } // namespace
23
24
Alexei Svitkine (slow) 2015/04/24 18:10:01 Nit: Remove empty line.
Steven Holte 2015/04/24 18:39:57 Done.
12 BloomFilter::BloomFilter(uint32_t bytes_size, 25 BloomFilter::BloomFilter(uint32_t bytes_size,
13 uint32_t hash_function_count, 26 uint32_t hash_function_count,
14 uint32_t hash_seed_offset) 27 uint32_t hash_seed_offset)
15 : bytes_(bytes_size), 28 : bytes_(bytes_size),
16 hash_function_count_(hash_function_count), 29 hash_function_count_(hash_function_count),
17 hash_seed_offset_(hash_seed_offset) { 30 hash_seed_offset_(hash_seed_offset) {
18 DCHECK_GT(bytes_size, 0u); 31 DCHECK_GT(bytes_size, 0u);
19 } 32 }
20 33
21 BloomFilter::~BloomFilter() {} 34 BloomFilter::~BloomFilter() {}
22 35
23 void BloomFilter::SetString(const std::string& str) { 36 void BloomFilter::SetString(const std::string& str) {
24 for (size_t i = 0; i < bytes_.size(); ++i) { 37 for (size_t i = 0; i < bytes_.size(); ++i) {
25 bytes_[i] = 0; 38 bytes_[i] = 0;
26 } 39 }
27 for (size_t i = 0; i < hash_function_count_; ++i) { 40 for (size_t i = 0; i < hash_function_count_; ++i) {
28 // Using CityHash here because we have support for it in Dremel. Many hash 41 uint32_t index = ComputeHash(str, hash_seed_offset_ + i);
29 // functions, such as MD5, SHA1, or Murmur, would probably also work.
30 uint32_t index =
31 CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i);
32 // Note that the "bytes" are uint8_t, so they are always 8-bits. 42 // Note that the "bytes" are uint8_t, so they are always 8-bits.
33 uint32_t byte_index = (index / 8) % bytes_.size(); 43 uint32_t byte_index = (index / 8) % bytes_.size();
34 uint32_t bit_index = index % 8; 44 uint32_t bit_index = index % 8;
35 bytes_[byte_index] |= 1 << bit_index; 45 bytes_[byte_index] |= 1 << bit_index;
36 } 46 }
37 } 47 }
38 48
39 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { 49 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) {
40 DCHECK_EQ(bytes_.size(), bytes.size()); 50 DCHECK_EQ(bytes_.size(), bytes.size());
41 for (size_t i = 0; i < bytes_.size(); ++i) { 51 for (size_t i = 0; i < bytes_.size(); ++i) {
42 bytes_[i] = bytes[i]; 52 bytes_[i] = bytes[i];
43 } 53 }
44 } 54 }
45 55
56 namespace internal {
57
58 uint64_t GetBloomBits(uint32_t bytes_size,
59 uint32_t hash_function_count,
60 uint32_t hash_seed_offset,
61 const std::string& str) {
62 // Make sure result fits in uint64
Alexei Svitkine (slow) 2015/04/24 18:10:01 Nit: add a period.
Steven Holte 2015/04/24 18:39:57 Done.
63 DCHECK_LE(bytes_size, 8u);
64 uint64_t output = 0;
65 uint32_t bits_size = bytes_size * 8;
Alexei Svitkine (slow) 2015/04/24 18:10:01 Nit: const
Steven Holte 2015/04/24 18:39:57 Done.
66 for (size_t i = 0; i < hash_function_count; ++i) {
67 uint32_t index = ComputeHash(str, hash_seed_offset + i);
68 output |= 1 << (index % bits_size);
69 }
70 return output;
71 }
72
73 } // namespace internal
74
46 } // namespace rappor 75 } // namespace rappor
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698