Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: chrome/common/metrics/entropy_provider_unittest.cc

Issue 10830318: Use a different algorithm with the low entropy source for field trials. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 8 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <cmath>
6 #include <limits>
7 #include <numeric>
8
9 #include "base/basictypes.h"
10 #include "base/guid.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "base/rand_util.h"
13 #include "base/string_number_conversions.h"
14 #include "chrome/common/metrics/entropy_provider.h"
15 #include "testing/gtest/include/gtest/gtest.h"
16
17 namespace metrics {
18
19 namespace {
20
21 // Size of the low entropy source to use for the permuted entropy provider
22 // in tests.
23 const size_t kMaxLowEntropySize = (1 << 13);
24
25 // Field trial names used in unit tests.
26 const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",
27 "NewTabButton" };
28
29 // Computes the Chi-Square statistic for |values| assuming they follow a uniform
30 // distribution, where each entry has expected value |expected_value|.
31 //
32 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed
33 // value and E is the expected value.
34 double ComputeChiSquare(const std::vector<int>& values,
35 double expected_value) {
36 double sum = 0;
37 for (size_t i = 0; i < values.size(); ++i) {
38 const double delta = values[i] - expected_value;
39 sum += (delta * delta) / expected_value;
40 }
41 return sum;
42 }
43
44 // Computes SHA1-based entropy for the given |trial_name| based on
45 // |entropy_source|
46 double GenerateSHA1Entropy(const std::string& entropy_source,
47 const std::string& trial_name) {
48 SHA1EntropyProvider sha1_provider(entropy_source);
49 return sha1_provider.GetEntropyForTrial(trial_name);
50 }
51
52 // Generates permutation-based entropy for the given |trial_name| based on
53 // |entropy_source| which must be in the range [0, entropy_max).
54 double GeneratePermutedEntropy(uint16 entropy_source,
55 size_t entropy_max,
56 const std::string& trial_name) {
57 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);
58 return permuted_provider.GetEntropyForTrial(trial_name);
59 }
60
61 // Helper interface for testing used to generate entropy values for a given
62 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source
63 // value constant and generates entropy for different trial names, instances
64 // of TrialEntropyGenerator keep the trial name constant and generate low/high
65 // entropy source values internally to produce each output entropy value.
66 class TrialEntropyGenerator {
67 public:
68 virtual ~TrialEntropyGenerator() {}
69 virtual double GenerateEntropyValue() const = 0;
70 };
71
72 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high
73 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of
74 // entropy corresponding to a low entropy source).
75 class SHA1EntropyGenerator : public TrialEntropyGenerator {
76 public:
77 explicit SHA1EntropyGenerator(const std::string& trial_name)
78 : trial_name_(trial_name) {
79 }
80
81 ~SHA1EntropyGenerator() {
82 }
83
84 virtual double GenerateEntropyValue() const OVERRIDE {
85 // Use a random GUID + 13 additional bits of entropy to match how the
86 // SHA1EntropyProvider is used in metrics_service.cc.
87 const int low_entropy_source =
88 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
89 const std::string high_entropy_source =
90 base::GenerateGUID() + base::IntToString(low_entropy_source);
91 return GenerateSHA1Entropy(high_entropy_source, trial_name_);
92 }
93
94 private:
95 const std::string& trial_name_;
96
97 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);
98 };
99
100 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,
101 // using 13-bit low entropy source values.
102 class PermutedEntropyGenerator : public TrialEntropyGenerator {
103 public:
104 explicit PermutedEntropyGenerator(const std::string& trial_name)
105 : mapping_(kMaxLowEntropySize) {
106 // Note: Given a trial name, the computed mapping will be the same.
107 // As a performance optimization, pre-compute the mapping once per trial
108 // name and index into it for each entropy value.
109 internal::PermuteMappingUsingTrialName(trial_name, &mapping_);
110 }
111
112 ~PermutedEntropyGenerator() {
113 }
114
115 virtual double GenerateEntropyValue() const OVERRIDE {
116 const int low_entropy_source =
117 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));
118 return mapping_[low_entropy_source] /
119 static_cast<double>(kMaxLowEntropySize);
120 }
121
122 private:
123 std::vector<uint16> mapping_;
124
125 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);
126 };
127
128 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness
129 // of Fit Test.
130 void PerformEntropyUniformityTest(
131 const std::string& trial_name,
132 const TrialEntropyGenerator& entropy_generator) {
133 // Number of buckets in the simulated field trials.
134 const size_t kBucketCount = 20;
135 // Max number of iterations to perform before giving up and failing.
136 const size_t kMaxIterationCount = 100000;
137 // The number of iterations to perform before each time the statistical
138 // significance of the results is checked.
139 const size_t kCheckIterationCount = 10000;
140 // This is the Chi-Square threshold from the Chi-Square statistic table for
141 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence
142 // level. See: http://www.medcalc.org/manual/chi-square-table.php
143 const double kChiSquareThreshold = 43.82;
144
145 std::vector<int> distribution(kBucketCount);
146
147 for (size_t i = 1; i <= kMaxIterationCount; ++i) {
148 const double entropy_value = entropy_generator.GenerateEntropyValue();
149 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);
150 ASSERT_LT(bucket, kBucketCount);
151 distribution[bucket] += 1;
152
153 // After |kCheckIterationCount| iterations, compute the Chi-Square
154 // statistic of the distribution. If the resulting statistic is greater
155 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence
156 // that the observed samples do not follow a uniform distribution.
157 //
158 // However, since 99.9% would still result in a false negative every
159 // 1000 runs of the test, do not treat it as a failure (else the test
160 // will be flaky). Instead, perform additional iterations to determine
161 // if the distribution will converge, up to |kMaxIterationCount|.
162 if ((i % kCheckIterationCount) == 0) {
163 const double expected_value_per_bucket =
164 static_cast<double>(i) / kBucketCount;
165 const double chi_square =
166 ComputeChiSquare(distribution, expected_value_per_bucket);
167 if (chi_square < kChiSquareThreshold)
168 break;
169
170 // If |i == kMaxIterationCount|, the Chi-Square statistic did not
171 // converge after |kMaxIterationCount|.
172 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<
173 trial_name << " with chi_square = " << chi_square <<
174 " after " << kMaxIterationCount << " iterations.";
175 }
176 }
177 }
178
179 } // namespace
180
181 class EntropyProviderTest : public testing::Test {
182 };
183
184 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {
185 // Simply asserts that two trials using one-time randomization
186 // that have different names, normally generate different results.
187 //
188 // Note that depending on the one-time random initialization, they
189 // _might_ actually give the same result, but we know that given
190 // the particular client_id we use for unit tests they won't.
191 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));
192 scoped_refptr<base::FieldTrial> trials[] = {
193 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
194 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
195 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
196 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
197
198 for (size_t i = 0; i < arraysize(trials); ++i) {
199 trials[i]->UseOneTimeRandomization();
200
201 for (int j = 0; j < 100; ++j)
202 trials[i]->AppendGroup("", 1);
203 }
204
205 // The trials are most likely to give different results since they have
206 // different names.
207 EXPECT_NE(trials[0]->group(), trials[1]->group());
208 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
209 }
210
211 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {
212 // Simply asserts that two trials using one-time randomization
213 // that have different names, normally generate different results.
214 //
215 // Note that depending on the one-time random initialization, they
216 // _might_ actually give the same result, but we know that given
217 // the particular client_id we use for unit tests they won't.
218 base::FieldTrialList field_trial_list(
219 new PermutedEntropyProvider(1234, kMaxLowEntropySize));
220 scoped_refptr<base::FieldTrial> trials[] = {
221 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",
222 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),
223 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",
224 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };
225
226 for (size_t i = 0; i < arraysize(trials); ++i) {
227 trials[i]->UseOneTimeRandomization();
228
229 for (int j = 0; j < 100; ++j)
230 trials[i]->AppendGroup("", 1);
231 }
232
233 // The trials are most likely to give different results since they have
234 // different names.
235 EXPECT_NE(trials[0]->group(), trials[1]->group());
236 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());
237 }
238
239 TEST_F(EntropyProviderTest, SHA1Entropy) {
240 const double results[] = { GenerateSHA1Entropy("hi", "1"),
241 GenerateSHA1Entropy("there", "1") };
242
243 EXPECT_NE(results[0], results[1]);
244 for (size_t i = 0; i < arraysize(results); ++i) {
245 EXPECT_LE(0.0, results[i]);
246 EXPECT_GT(1.0, results[i]);
247 }
248
249 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),
250 GenerateSHA1Entropy("yo", "1"));
251 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),
252 GenerateSHA1Entropy("yo", "else"));
253 }
254
255 TEST_F(EntropyProviderTest, PermutedEntropy) {
256 const double results[] = {
257 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
258 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };
259
260 EXPECT_NE(results[0], results[1]);
261 for (size_t i = 0; i < arraysize(results); ++i) {
262 EXPECT_LE(0.0, results[i]);
263 EXPECT_GT(1.0, results[i]);
264 }
265
266 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),
267 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));
268 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),
269 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));
270 }
271
272 TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {
273 // Verifies that PermutedEntropyProvider produces expected results. This
274 // ensures that the results are the same between platforms and ensures that
275 // changes to the implementation do not regress this accidentally.
276
277 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),
278 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));
279 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),
280 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));
281 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),
282 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));
283 }
284
285 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {
286 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
287 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);
288 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
289 }
290 }
291
292 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {
293 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
294 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);
295 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);
296 }
297 }
298
299 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {
300 // Verifies that SeededRandGenerator has a uniform distribution.
301 //
302 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.
303
304 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;
305 const uint32 kExpectedAverage = kTopOfRange / 2ULL;
306 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%
307 const int kMinAttempts = 1000;
308 const int kMaxAttempts = 1000000;
309
310 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {
311 const uint32 seed = internal::HashName(kTestTrialNames[i]);
312 internal::SeededRandGenerator rand_generator(seed);
313
314 double cumulative_average = 0.0;
315 int count = 0;
316 while (count < kMaxAttempts) {
317 uint32 value = rand_generator(kTopOfRange);
318 cumulative_average = (count * cumulative_average + value) / (count + 1);
319
320 // Don't quit too quickly for things to start converging, or we may have
321 // a false positive.
322 if (count > kMinAttempts &&
323 kExpectedAverage - kAllowedVariance < cumulative_average &&
324 cumulative_average < kExpectedAverage + kAllowedVariance) {
325 break;
326 }
327
328 ++count;
329 }
330
331 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<
332 kExpectedAverage << ", average ended at " << cumulative_average <<
333 ", for trial " << kTestTrialNames[i];
334 }
335 }
336
337 } // namespace metrics
OLDNEW
« no previous file with comments | « chrome/common/metrics/entropy_provider.cc ('k') | chrome/common/metrics/variations/variations_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698