Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/rappor/bloom_filter.h" | 5 #include "components/rappor/bloom_filter.h" |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "third_party/smhasher/src/City.h" | 8 #include "third_party/smhasher/src/City.h" |
| 9 | 9 |
| 10 namespace rappor { | 10 namespace rappor { |
| 11 | 11 |
| 12 namespace { | |
| 13 | |
| 14 uint32_t ComputeHash(const std::string& str, uint32_t seed) { | |
| 15 // Using CityHash here because we have support for it in Dremel. Many hash | |
| 16 // functions, such as MD5, SHA1, or Murmur, would probably also work. | |
| 17 uint32_t index = | |
| 18 CityHash64WithSeed(str.data(), str.size(), seed); | |
| 19 return index; | |
|
Alexei Svitkine (slow)
2015/04/24 18:10:01
Nit: Might as well return it directly.
Steven Holte
2015/04/24 18:39:57
Done.
| |
| 20 } | |
| 21 | |
| 22 } // namespace | |
| 23 | |
| 24 | |
|
Alexei Svitkine (slow)
2015/04/24 18:10:01
Nit: Remove empty line.
Steven Holte
2015/04/24 18:39:57
Done.
| |
| 12 BloomFilter::BloomFilter(uint32_t bytes_size, | 25 BloomFilter::BloomFilter(uint32_t bytes_size, |
| 13 uint32_t hash_function_count, | 26 uint32_t hash_function_count, |
| 14 uint32_t hash_seed_offset) | 27 uint32_t hash_seed_offset) |
| 15 : bytes_(bytes_size), | 28 : bytes_(bytes_size), |
| 16 hash_function_count_(hash_function_count), | 29 hash_function_count_(hash_function_count), |
| 17 hash_seed_offset_(hash_seed_offset) { | 30 hash_seed_offset_(hash_seed_offset) { |
| 18 DCHECK_GT(bytes_size, 0u); | 31 DCHECK_GT(bytes_size, 0u); |
| 19 } | 32 } |
| 20 | 33 |
| 21 BloomFilter::~BloomFilter() {} | 34 BloomFilter::~BloomFilter() {} |
| 22 | 35 |
| 23 void BloomFilter::SetString(const std::string& str) { | 36 void BloomFilter::SetString(const std::string& str) { |
| 24 for (size_t i = 0; i < bytes_.size(); ++i) { | 37 for (size_t i = 0; i < bytes_.size(); ++i) { |
| 25 bytes_[i] = 0; | 38 bytes_[i] = 0; |
| 26 } | 39 } |
| 27 for (size_t i = 0; i < hash_function_count_; ++i) { | 40 for (size_t i = 0; i < hash_function_count_; ++i) { |
| 28 // Using CityHash here because we have support for it in Dremel. Many hash | 41 uint32_t index = ComputeHash(str, hash_seed_offset_ + i); |
| 29 // functions, such as MD5, SHA1, or Murmur, would probably also work. | |
| 30 uint32_t index = | |
| 31 CityHash64WithSeed(str.data(), str.size(), hash_seed_offset_ + i); | |
| 32 // Note that the "bytes" are uint8_t, so they are always 8-bits. | 42 // Note that the "bytes" are uint8_t, so they are always 8-bits. |
| 33 uint32_t byte_index = (index / 8) % bytes_.size(); | 43 uint32_t byte_index = (index / 8) % bytes_.size(); |
| 34 uint32_t bit_index = index % 8; | 44 uint32_t bit_index = index % 8; |
| 35 bytes_[byte_index] |= 1 << bit_index; | 45 bytes_[byte_index] |= 1 << bit_index; |
| 36 } | 46 } |
| 37 } | 47 } |
| 38 | 48 |
| 39 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { | 49 void BloomFilter::SetBytesForTesting(const ByteVector& bytes) { |
| 40 DCHECK_EQ(bytes_.size(), bytes.size()); | 50 DCHECK_EQ(bytes_.size(), bytes.size()); |
| 41 for (size_t i = 0; i < bytes_.size(); ++i) { | 51 for (size_t i = 0; i < bytes_.size(); ++i) { |
| 42 bytes_[i] = bytes[i]; | 52 bytes_[i] = bytes[i]; |
| 43 } | 53 } |
| 44 } | 54 } |
| 45 | 55 |
| 56 namespace internal { | |
| 57 | |
| 58 uint64_t GetBloomBits(uint32_t bytes_size, | |
| 59 uint32_t hash_function_count, | |
| 60 uint32_t hash_seed_offset, | |
| 61 const std::string& str) { | |
| 62 // Make sure result fits in uint64 | |
|
Alexei Svitkine (slow)
2015/04/24 18:10:01
Nit: add a period.
Steven Holte
2015/04/24 18:39:57
Done.
| |
| 63 DCHECK_LE(bytes_size, 8u); | |
| 64 uint64_t output = 0; | |
| 65 uint32_t bits_size = bytes_size * 8; | |
|
Alexei Svitkine (slow)
2015/04/24 18:10:01
Nit: const
Steven Holte
2015/04/24 18:39:57
Done.
| |
| 66 for (size_t i = 0; i < hash_function_count; ++i) { | |
| 67 uint32_t index = ComputeHash(str, hash_seed_offset + i); | |
| 68 output |= 1 << (index % bits_size); | |
| 69 } | |
| 70 return output; | |
| 71 } | |
| 72 | |
| 73 } // namespace internal | |
| 74 | |
| 46 } // namespace rappor | 75 } // namespace rappor |
| OLD | NEW |