OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/rappor/bloom_filter.h" |
| 6 |
| 7 #include "base/logging.h" |
| 8 #include "third_party/smhasher/src/MurmurHash3.h" |
| 9 |
| 10 namespace { |
| 11 |
| 12 // Distinct seeds are used to create unique hash functions for the Bloom filter. |
| 13 // These seeds were chosen at random, and then checked to ensure that when |
| 14 // used in 128 bit bloom filter that each bit can be set many elements in the |
| 15 // Alexa 10000. |
| 16 const uint32_t kHashSeeds[] = {0xd123957d, 0x6752fc9b, 0xcb6a0102, 0x1a82ea95}; |
| 17 |
| 18 uint32_t MurmurHash3String(const std::string& str, uint32_t seed) { |
| 19 uint32_t output = 0; |
| 20 // This function is optimized for x86_32, but should work on any platform. |
| 21 MurmurHash3_x86_32(str.data(), str.size(), seed, &output); |
| 22 return output; |
| 23 } |
| 24 |
| 25 } // namespace |
| 26 |
| 27 namespace rappor { |
| 28 |
| 29 BloomFilter::BloomFilter(uint32_t bytes_size, uint32_t hash_function_count) |
| 30 : bytes_(bytes_size), hash_function_count_(hash_function_count) { |
| 31 DCHECK_GT(bytes_size, 0u); |
| 32 DCHECK_LE(hash_function_count, arraysize(kHashSeeds)); |
| 33 } |
| 34 |
| 35 BloomFilter::~BloomFilter() {} |
| 36 |
| 37 void BloomFilter::AddString(const std::string& str) { |
| 38 for (size_t i = 0; i < hash_function_count_; ++i) { |
| 39 uint32_t index = MurmurHash3String(str, kHashSeeds[i]); |
| 40 uint32_t byte_index = (index / 8) % bytes_.size(); |
| 41 uint32_t bit_index = index % 8; |
| 42 bytes_[byte_index] |= 1 << bit_index; |
| 43 } |
| 44 } |
| 45 |
| 46 void BloomFilter::AddStrings(const std::vector<std::string>& strings) { |
| 47 for (size_t i = 0; i < strings.size(); ++i) |
| 48 AddString(strings[i]); |
| 49 } |
| 50 |
| 51 } // namespace rappor |
OLD | NEW |