OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/rappor/bloom_filter.h" | 5 #include "components/rappor/bloom_filter.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "third_party/smhasher/src/City.h" | 8 #include "third_party/smhasher/src/City.h" |
9 | 9 |
10 namespace rappor { | 10 namespace rappor { |
11 | 11 |
| 12 namespace { |
| 13 |
| 14 uint32_t ComputeHash(const std::string& str, uint32_t seed) { |
| 15 // Using CityHash here because we have support for it in Dremel. Many hash |
| 16 // functions, such as MD5, SHA1, or Murmur, would probably also work. |
| 17 return CityHash64WithSeed(str.data(), str.size(), seed); |
| 18 } |
| 19 |
| 20 } // namespace |
| 21 |
12 BloomFilter::BloomFilter(uint32_t bytes_size, | 22 BloomFilter::BloomFilter(uint32_t bytes_size, |
13 uint32_t hash_function_count, | 23 uint32_t hash_function_count, |
14 uint32_t hash_seed_offset) | 24 uint32_t hash_seed_offset) |
15 : bytes_(bytes_size), | 25 : bytes_(bytes_size), |
16 hash_function_count_(hash_function_count), | 26 hash_function_count_(hash_function_count), |
17 hash_seed_offset_(hash_seed_offset) { | 27 hash_seed_offset_(hash_seed_offset) { |
18 DCHECK_GT(bytes_size, 0u); | 28 DCHECK_GT(bytes_size, 0u); |
19 } | 29 } |
20 | 30 |
21 BloomFilter::~BloomFilter() {} | 31 BloomFilter::~BloomFilter() {} |
22 | 32 |
23 void BloomFilter::SetString(const std::string& str) { | 33 void BloomFilter::SetString(const std::string& str) { |
24 for (size_t i = 0; i < bytes_.size(); ++i) { | 34 for (size_t i = 0; i < bytes_.size(); ++i) { |
25 bytes_[i] = 0; | 35 bytes_[i] = 0; |
26 } | 36 } |
27 for (size_t i = 0; i < hash_function_count_; ++i) { | 37 for (size_t i = 0; i < hash_function_count_; ++i) { |
28 // Using CityHash here because we have support for it in Dremel. Many hash | 38 uint32_t index = ComputeHash(str, hash_seed_offset_ + i); |
29 // functions, such as MD5, SHA1, or Murmur, would probably also work. | |
30 uint32_t index = | |
31 CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i); | |
32 // Note that the "bytes" are uint8_t, so they are always 8-bits. | 39 // Note that the "bytes" are uint8_t, so they are always 8-bits. |
33 uint32_t byte_index = (index / 8) % bytes_.size(); | 40 uint32_t byte_index = (index / 8) % bytes_.size(); |
34 uint32_t bit_index = index % 8; | 41 uint32_t bit_index = index % 8; |
35 bytes_[byte_index] |= 1 << bit_index; | 42 bytes_[byte_index] |= 1 << bit_index; |
36 } | 43 } |
37 } | 44 } |
38 | 45 |
39 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { | 46 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { |
40 DCHECK_EQ(bytes_.size(), bytes.size()); | 47 DCHECK_EQ(bytes_.size(), bytes.size()); |
41 for (size_t i = 0; i < bytes_.size(); ++i) { | 48 for (size_t i = 0; i < bytes_.size(); ++i) { |
42 bytes_[i] = bytes[i]; | 49 bytes_[i] = bytes[i]; |
43 } | 50 } |
44 } | 51 } |
45 | 52 |
| 53 namespace internal { |
| 54 |
| 55 uint64_t GetBloomBits(uint32_t bytes_size, |
| 56 uint32_t hash_function_count, |
| 57 uint32_t hash_seed_offset, |
| 58 const std::string& str) { |
| 59 // Make sure result fits in uint64. |
| 60 DCHECK_LE(bytes_size, 8u); |
| 61 uint64_t output = 0; |
| 62 const uint32_t bits_size = bytes_size * 8; |
| 63 for (size_t i = 0; i < hash_function_count; ++i) { |
| 64 uint32_t index = ComputeHash(str, hash_seed_offset + i); |
| 65 output |= 1ULL << uint64_t(index % bits_size); |
| 66 } |
| 67 return output; |
| 68 } |
| 69 |
| 70 } // namespace internal |
| 71 |
46 } // namespace rappor | 72 } // namespace rappor |
OLD | NEW |