Index: chrome/browser/metrics/rappord.h |
diff --git a/chrome/browser/metrics/rappord.h b/chrome/browser/metrics/rappord.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..61b61d2b41ac0291eaaf299fdc11a8088fd42dee |
--- /dev/null |
+++ b/chrome/browser/metrics/rappord.h |
@@ -0,0 +1,163 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef CHROME_BROWSER_METRICS_RAPPORD_H_ |
+#define CHROME_BROWSER_METRICS_RAPPORD_H_ |
+ |
+#include <assert.h> |
+#include <limits.h> |
+#include <math.h> |
+ |
+#include <bitset> |
+#include <map> |
+#include <string> |
+#include <vector> |
+ |
+#include "crypto/hmac.h" |
+#include "crypto/random.h" |
+// FIXME(holte) Avoid checkdeps issue. |
+// #include "third_party/smhasher/src/MurmurHash3.h" |
+ |
+static inline uint32 MurmurHash3String(const std::string& str, uint32 seed) { |
+ uint32 output; |
+ // MurmurHash3_x86_32(str.data(), str.size(), seed, &output); |
+ return output; |
+} |
+ |
+// NOTE: These were just generated from /dev/urandom. May be unecessary |
+// for murmur hash, or may be better to select carefully to help the |
+// hash function distribute its inputs. |
+static const uint32_t hashSeedCount = 20; |
+static const uint32_t mockHashSeeds[hashSeedCount] = { |
+ 0xd123957d, 0x6752fc9b, 0xcb6a0102, 0x1a82ea95, |
+ 0x55cb27bd, 0x0d23a17e, 0xbfb2beac, 0xbabca478, |
+ 0x6d0b103c, 0x5e98bc37, 0xe73ccb9d, 0xe60c1150, |
+ 0xa5f070a8, 0x91cc68c2, 0x12c919ff, 0xfec1d371, |
+ 0x01b6bf4c, 0xbbe5cf2b, 0x6bd30801, 0x292956d3 |
+}; |
+ |
+ |
+ |
+template <size_t BitCount, size_t HashCount, int lieProbIndex, int pProbIndex, |
+ int qProbIndex> |
+class RappordBloomFilter { |
+ public: |
+ typedef std::bitset<BitCount> bitmap_t; |
+ static const size_t MIN_BITMAP_SIZE = sizeof(uint64_t)*CHAR_BIT; |
+ |
+ RappordBloomFilter(const uint32_t seed, const std::string id) : |
+ hmac_(crypto::HMAC::SHA256) { |
+ if (!hmac_.Init(id)) { |
+ abort(); |
+ }; |
+ assert(hmac_.DigestLength() > sizeof(uint64_t)); |
+ hmac_state_ = uint64_t(BitCount * HashCount * |
+ lieProbIndex * pProbIndex * qProbIndex * seed); |
+ |
+ const bitmap_t liebits = get_random_bitmap_for_probability_index( |
+ lieProbIndex, false); |
+ // hard-code that we lie 50%/50% on ones vs zeros |
+ lieones_ = liebits & random_bitmap(false); |
+ truthbits_ = ~liebits; |
+ } |
+ |
+ bitmap_t get(const std::vector<std::string>& strings) { |
+ return generate_rappord(get_real_bloombits(strings)); |
+ } |
+ |
+ bitmap_t get(const std::string str) { |
+ return generate_rappord(get_real_bloombits(str)); |
+ } |
+ |
+ static bitmap_t get_real_bloombits(const std::string& str) { |
+ // Get the bits of a bloomfilter for a string |
+ assert(BitCount % MIN_BITMAP_SIZE == 0); |
+ assert(HashCount < hashSeedCount); |
+ bitmap_t bloombits; |
+ for (size_t j = 0; j < HashCount; ++j) { |
+ uint32_t index = MurmurHash3String(str, mockHashSeeds[j]); |
+ bloombits.set(index % BitCount); |
+ } |
+ return bloombits; |
+ } |
+ |
+ static bitmap_t get_real_bloombits(const std::vector<std::string>& strings) { |
+ // Get the bits of a bloomfilter for all of the input strings. |
+ bitmap_t bloombits; |
+ for (size_t i = 0, len = strings.size(); i < len; ++i) { |
+ bloombits |= get_real_bloombits(strings[i]); |
+ } |
+ return bloombits; |
+ } |
+ |
+ private: |
+ bitmap_t lieones_; |
+ bitmap_t truthbits_; |
+ crypto::HMAC hmac_; |
+ uint64_t hmac_state_; |
+ |
+ // Construct a RAPPORD based on the bits set in the bloomfilter, as well as |
+ // the bits for which the current user will deterministally lie zero or one. |
+ // Requires: (~truthbits) == liebits == (liezeros | lieones) |
+ // where (liezeros & lieones) == 0 |
+ // |
+ const bitmap_t generate_rappord(const bitmap_t& real_bloombits) { |
+ const bitmap_t zeros = get_random_bitmap_for_probability_index( |
+ pProbIndex, true); |
+ const bitmap_t ones = get_random_bitmap_for_probability_index( |
+ qProbIndex, true); |
+ const bitmap_t onebits = (real_bloombits & truthbits_) | lieones_; |
+ return (zeros & (~onebits)) | (ones & onebits); |
+ } |
+ |
+ bitmap_t random_bitmap(const bool fresh) { |
+ assert(BitCount % MIN_BITMAP_SIZE == 0); |
+ bitmap_t bits; |
+ for (size_t i = 0; i < BitCount; i += MIN_BITMAP_SIZE) { |
+ uint64_t randomBits; |
+ assert(MIN_BITMAP_SIZE/CHAR_BIT == sizeof(uint64_t)); |
+ if (!fresh) { |
+ hmacRandBytes(&randomBits); |
+ } else { |
+ crypto::RandBytes(&randomBits, sizeof(uint64_t)); |
+ } |
+ |
+ bits <<= MIN_BITMAP_SIZE; |
+ bits |= bitmap_t(randomBits); |
+ } |
+ return bits; |
+ } |
+ |
+ void hmacRandBytes(uint64_t* randomBits) { |
+ std::string state = bitmap_t(hmac_state_).to_string(); |
+ if (!hmac_.Sign(state, (unsigned char *) randomBits, sizeof(uint64_t))) { |
+ abort(); |
+ } |
+ ++hmac_state_; |
+ } |
+ |
+ // TODO(holte): change to template function |
+ bitmap_t get_random_bitmap_for_probability_index( |
+ int probIndex, const bool fresh) { |
+ switch (probIndex) { |
+ // 87.5% ones |
+ case -8: return ~(random_bitmap(fresh) & random_bitmap(fresh) & |
+ random_bitmap(fresh)); |
+ // 75% ones |
+ case -4: return ~(random_bitmap(fresh) & random_bitmap(fresh)); |
+ // 50% ones |
+ case 2: return random_bitmap(fresh); |
+ // 25% ones |
+ case 4: return random_bitmap(fresh) & random_bitmap(fresh); |
+ // 12.5% ones |
+ case 8: return random_bitmap(fresh) & random_bitmap(fresh) & |
+ random_bitmap(fresh); |
+ default: |
+ // Invalid probability index "probIndex" for coin flips |
+ abort(); |
+ } |
+ } |
+}; |
+ |
+#endif // CHROME_BROWSER_METRICS_RAPPORD_H_ |