Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <cmath> | |
| 6 #include <limits> | |
| 7 #include <numeric> | |
| 8 | |
| 9 #include "base/basictypes.h" | |
| 10 #include "base/guid.h" | |
| 11 #include "base/memory/scoped_ptr.h" | |
| 12 #include "base/rand_util.h" | |
| 13 #include "base/string_number_conversions.h" | |
| 14 #include "chrome/common/metrics/entropy_provider.h" | |
| 15 #include "testing/gtest/include/gtest/gtest.h" | |
| 16 | |
| 17 namespace { | |
| 18 | |
| 19 // Computes the standard deviation of distribution |values|. | |
| 20 double ComputeStandardDeviation(const std::vector<int>& values) { | |
| 21 const int sum = std::accumulate(values.begin(), values.end(), 0.0); | |
| 22 const int mean = sum / values.size(); | |
|
Ilya Sherman
2012/08/21 03:42:27
nit: Should this be a double rather than an int?
Alexei Svitkine (slow)
2012/08/21 14:25:49
Changed to use doubles throughout the function.
(
| |
| 23 const int sum_of_squares = std::inner_product(values.begin(), values.end(), | |
| 24 values.begin(), 0.0); | |
| 25 const double variance = static_cast<double>(sum_of_squares) / | |
| 26 values.size() - (mean * mean); | |
| 27 return std::sqrt(variance); | |
| 28 } | |
| 29 | |
| 30 } // namespace | |
| 31 | |
| 32 | |
| 33 class EntropyProviderTest : public testing::Test { | |
| 34 public: | |
| 35 // Computes SHA1-based entropy for the given |trial_name| based on | |
| 36 // |entropy_source| | |
| 37 double GenerateSHA1Entropy(const std::string& entropy_source, | |
| 38 const std::string& trial_name) { | |
| 39 SHA1EntropyProvider sha1_provider(entropy_source); | |
| 40 return sha1_provider.GetEntropyForTrial(trial_name); | |
| 41 } | |
| 42 | |
| 43 // Generates permutation-based entropy for the given |trial_name| based on | |
| 44 // |entropy_source| which must be in the range [0, entropy_max). | |
| 45 double GeneratePermutedEntropy(uint16 entropy_source, | |
| 46 size_t entropy_max, | |
| 47 const std::string& trial_name) { | |
| 48 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); | |
| 49 return permuted_provider.GetEntropyForTrial(trial_name); | |
| 50 } | |
| 51 }; | |
| 52 | |
| 53 | |
| 54 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) { | |
| 55 // Simply asserts that two trials using one-time randomization | |
| 56 // that have different names, normally generate different results. | |
| 57 // | |
| 58 // Note that depending on the one-time random initialization, they | |
| 59 // _might_ actually give the same result, but we know that given | |
| 60 // the particular client_id we use for unit tests they won't. | |
| 61 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); | |
| 62 scoped_refptr<base::FieldTrial> trials[] = { | |
| 63 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
| 64 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
| 65 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
| 66 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
| 67 }; | |
| 68 | |
| 69 for (size_t i = 0; i < arraysize(trials); ++i) { | |
| 70 trials[i]->UseOneTimeRandomization(); | |
| 71 | |
| 72 for (int j = 0; j < 100; ++j) | |
| 73 trials[i]->AppendGroup("", 1); | |
| 74 } | |
| 75 | |
| 76 // The trials are most likely to give different results since they have | |
| 77 // different names. | |
| 78 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
| 79 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
| 80 } | |
| 81 | |
| 82 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) { | |
| 83 // Simply asserts that two trials using one-time randomization | |
| 84 // that have different names, normally generate different results. | |
| 85 // | |
| 86 // Note that depending on the one-time random initialization, they | |
| 87 // _might_ actually give the same result, but we know that given | |
| 88 // the particular client_id we use for unit tests they won't. | |
| 89 const size_t kMaxLowEntropySize = (1 << 13); | |
| 90 base::FieldTrialList field_trial_list( | |
| 91 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); | |
| 92 scoped_refptr<base::FieldTrial> trials[] = { | |
| 93 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default", | |
| 94 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
| 95 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default", | |
| 96 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL), | |
| 97 }; | |
| 98 | |
| 99 for (size_t i = 0; i < arraysize(trials); ++i) { | |
| 100 trials[i]->UseOneTimeRandomization(); | |
| 101 | |
| 102 for (int j = 0; j < 100; ++j) | |
| 103 trials[i]->AppendGroup("", 1); | |
| 104 } | |
| 105 | |
| 106 // The trials are most likely to give different results since they have | |
| 107 // different names. | |
| 108 ASSERT_NE(trials[0]->group(), trials[1]->group()); | |
| 109 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
| 110 } | |
| 111 | |
| 112 TEST_F(EntropyProviderTest, SHA1Entropy) { | |
| 113 const double results[] = { | |
| 114 GenerateSHA1Entropy("hi", "1"), | |
| 115 GenerateSHA1Entropy("there", "1"), | |
| 116 }; | |
| 117 ASSERT_NE(results[0], results[1]); | |
| 118 for (size_t i = 0; i < arraysize(results); ++i) { | |
| 119 ASSERT_LE(0.0, results[i]); | |
| 120 ASSERT_GT(1.0, results[i]); | |
| 121 } | |
| 122 | |
| 123 ASSERT_EQ(GenerateSHA1Entropy("yo", "1"), | |
| 124 GenerateSHA1Entropy("yo", "1")); | |
| 125 ASSERT_NE(GenerateSHA1Entropy("yo", "something"), | |
| 126 GenerateSHA1Entropy("yo", "else")); | |
| 127 } | |
| 128 | |
| 129 TEST_F(EntropyProviderTest, PermutedEntropy) { | |
| 130 const size_t kMaxLowEntropySize = (1 << 13); | |
| 131 const double results[] = { | |
| 132 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
| 133 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1"), | |
| 134 }; | |
| 135 ASSERT_NE(results[0], results[1]); | |
| 136 for (size_t i = 0; i < arraysize(results); ++i) { | |
| 137 ASSERT_LE(0.0, results[i]); | |
| 138 ASSERT_GT(1.0, results[i]); | |
| 139 } | |
| 140 | |
| 141 ASSERT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
| 142 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); | |
| 143 ASSERT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), | |
| 144 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); | |
| 145 } | |
| 146 | |
| 147 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) { | |
| 148 const size_t kMaxLowEntropySize = (1 << 13); | |
| 149 const int kBucketCount = 20; | |
| 150 const int kIterationCount = 100000; | |
|
Ilya Sherman
2012/08/21 03:42:27
nit: size_t throughout?
Alexei Svitkine (slow)
2012/08/21 14:25:49
Done.
| |
| 151 | |
| 152 const std::string trial_names[] = { | |
| 153 "TestTrial", | |
| 154 "AnotherTestTrial", | |
| 155 "NewTabButton", | |
| 156 }; | |
| 157 | |
| 158 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
| 159 std::vector<int> distribution(kBucketCount); | |
| 160 | |
| 161 for (int j = 0; j < kIterationCount; ++j) { | |
| 162 // Use a random GUID + 13 bits of entropy to match how the | |
| 163 // SHA1EntropyProvider is used in metrics_service.cc. | |
| 164 const int low_entropy_source = | |
| 165 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
| 166 const std::string high_entropy_source = | |
| 167 base::GenerateGUID() + base::IntToString(low_entropy_source); | |
| 168 const double entropy_value = | |
| 169 GenerateSHA1Entropy(high_entropy_source, trial_names[i]); | |
| 170 const int bucket = static_cast<int>(kBucketCount * entropy_value); | |
| 171 ASSERT_LT(bucket, kBucketCount); | |
| 172 distribution[bucket] += 1; | |
| 173 } | |
| 174 | |
| 175 int min_value = std::numeric_limits<int>::max(); | |
| 176 int max_value = std::numeric_limits<int>::min(); | |
| 177 for (int j = 0; j < kBucketCount; ++j) { | |
| 178 min_value = std::min(min_value, distribution[j]); | |
| 179 max_value = std::max(max_value, distribution[j]); | |
| 180 } | |
| 181 | |
| 182 // TODO(asvitkine): Figure out pass / fail criteria. | |
| 183 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value, | |
| 184 ComputeStandardDeviation(distribution)); | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) { | |
| 189 const size_t kMaxLowEntropySize = (1 << 13); | |
| 190 const int kBucketCount = 20; | |
| 191 const int kIterationCount = 100000; | |
| 192 | |
| 193 const std::string trial_names[] = { | |
| 194 "TestTrial", | |
| 195 "AnotherTestTrial", | |
| 196 "NewTabButton", | |
| 197 }; | |
| 198 | |
| 199 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
| 200 std::vector<int> distribution(kBucketCount); | |
| 201 | |
| 202 // Note: Given a trial name, the computed mapping will be the same. | |
| 203 // As a performance optimization, pre-compute the mapping once per trial | |
| 204 // name and index into it each iteration. | |
|
Ilya Sherman
2012/08/21 03:42:27
Is this performance optimization needed for the te
Alexei Svitkine (slow)
2012/08/21 14:25:49
Unfortunately, yes it is needed. Without it, this
| |
| 205 std::vector<uint16> mapping(kMaxLowEntropySize); | |
| 206 metrics_internal::PermuteMappingUsingTrialName(trial_names[i], &mapping); | |
| 207 | |
| 208 for (int j = 0; j < kIterationCount; ++j) { | |
| 209 const int low_entropy_source = | |
| 210 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
| 211 const double entropy_value = | |
| 212 mapping[low_entropy_source] / static_cast<double>(kMaxLowEntropySize); | |
| 213 const int bucket = static_cast<int>(kBucketCount * entropy_value); | |
| 214 ASSERT_LT(bucket, kBucketCount); | |
| 215 distribution[bucket] += 1; | |
| 216 } | |
| 217 | |
| 218 int min_value = std::numeric_limits<int>::max(); | |
| 219 int max_value = std::numeric_limits<int>::min(); | |
| 220 for (int j = 0; j < kBucketCount; ++j) { | |
| 221 min_value = std::min(min_value, distribution[j]); | |
| 222 max_value = std::max(max_value, distribution[j]); | |
| 223 } | |
| 224 | |
| 225 // TODO(asvitkine): Figure out pass / fail criteria. | |
| 226 printf("min = %d, max = %d, stddev = %f\n", min_value, max_value, | |
| 227 ComputeStandardDeviation(distribution)); | |
| 228 } | |
| 229 } | |
| 230 | |
| 231 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) { | |
| 232 // Verifies that SeededRandGenerator has a uniform distribution. | |
| 233 // | |
| 234 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. | |
| 235 | |
| 236 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; | |
| 237 const uint32 kExpectedAverage = kTopOfRange / 2ULL; | |
| 238 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% | |
| 239 const int kMinAttempts = 1000; | |
| 240 const int kMaxAttempts = 1000000; | |
| 241 | |
| 242 const std::string trial_names[] = { | |
| 243 "TestTrial", | |
| 244 "AnotherTestTrial", | |
| 245 "NewTabButton", | |
| 246 }; | |
| 247 | |
| 248 for (size_t i = 0; i < arraysize(trial_names); ++i) { | |
| 249 const uint32 seed = metrics_internal::HashName(trial_names[i]); | |
| 250 metrics_internal::SeededRandGenerator rand_generator(seed); | |
| 251 | |
| 252 double cumulative_average = 0.0; | |
| 253 int count = 0; | |
| 254 while (count < kMaxAttempts) { | |
| 255 uint32 value = rand_generator(kTopOfRange); | |
| 256 cumulative_average = (count * cumulative_average + value) / (count + 1); | |
| 257 | |
| 258 // Don't quit too quickly for things to start converging, or we may have | |
| 259 // a false positive. | |
| 260 if (count > kMinAttempts && | |
| 261 kExpectedAverage - kAllowedVariance < cumulative_average && | |
| 262 cumulative_average < kExpectedAverage + kAllowedVariance) { | |
| 263 break; | |
| 264 } | |
| 265 | |
| 266 ++count; | |
| 267 } | |
| 268 | |
| 269 ASSERT_LT(count, kMaxAttempts) << "Expected average was " << | |
| 270 kExpectedAverage << ", average ended at " << cumulative_average << | |
| 271 ", for trial " << trial_names[i]; | |
| 272 } | |
| 273 } | |
| 274 | |
| OLD | NEW |