OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/common/metrics/entropy_provider.h" | |
6 | |
7 #include <cmath> | |
8 #include <limits> | |
9 #include <numeric> | |
10 | |
11 #include "base/basictypes.h" | |
12 #include "base/guid.h" | |
13 #include "base/memory/scoped_ptr.h" | |
14 #include "base/rand_util.h" | |
15 #include "base/strings/string_number_conversions.h" | |
16 #include "chrome/common/metrics/metrics_util.h" | |
17 #include "testing/gtest/include/gtest/gtest.h" | |
18 | |
19 namespace metrics { | |
20 | |
21 namespace { | |
22 | |
23 // Size of the low entropy source to use for the permuted entropy provider | |
24 // in tests. | |
25 const size_t kMaxLowEntropySize = 8000; | |
26 | |
27 // Field trial names used in unit tests. | |
28 const char* const kTestTrialNames[] = { "TestTrial", "AnotherTestTrial", | |
29 "NewTabButton" }; | |
30 | |
31 // Computes the Chi-Square statistic for |values| assuming they follow a uniform | |
32 // distribution, where each entry has expected value |expected_value|. | |
33 // | |
34 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed | |
35 // value and E is the expected value. | |
36 double ComputeChiSquare(const std::vector<int>& values, | |
37 double expected_value) { | |
38 double sum = 0; | |
39 for (size_t i = 0; i < values.size(); ++i) { | |
40 const double delta = values[i] - expected_value; | |
41 sum += (delta * delta) / expected_value; | |
42 } | |
43 return sum; | |
44 } | |
45 | |
46 // Computes SHA1-based entropy for the given |trial_name| based on | |
47 // |entropy_source| | |
48 double GenerateSHA1Entropy(const std::string& entropy_source, | |
49 const std::string& trial_name) { | |
50 SHA1EntropyProvider sha1_provider(entropy_source); | |
51 return sha1_provider.GetEntropyForTrial(trial_name, 0); | |
52 } | |
53 | |
54 // Generates permutation-based entropy for the given |trial_name| based on | |
55 // |entropy_source| which must be in the range [0, entropy_max). | |
56 double GeneratePermutedEntropy(uint16 entropy_source, | |
57 size_t entropy_max, | |
58 const std::string& trial_name) { | |
59 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max); | |
60 return permuted_provider.GetEntropyForTrial(trial_name, 0); | |
61 } | |
62 | |
63 // Helper interface for testing used to generate entropy values for a given | |
64 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source | |
65 // value constant and generates entropy for different trial names, instances | |
66 // of TrialEntropyGenerator keep the trial name constant and generate low/high | |
67 // entropy source values internally to produce each output entropy value. | |
68 class TrialEntropyGenerator { | |
69 public: | |
70 virtual ~TrialEntropyGenerator() {} | |
71 virtual double GenerateEntropyValue() const = 0; | |
72 }; | |
73 | |
74 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high | |
75 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of | |
76 // entropy corresponding to a low entropy source). | |
77 class SHA1EntropyGenerator : public TrialEntropyGenerator { | |
78 public: | |
79 explicit SHA1EntropyGenerator(const std::string& trial_name) | |
80 : trial_name_(trial_name) { | |
81 } | |
82 | |
83 virtual ~SHA1EntropyGenerator() { | |
84 } | |
85 | |
86 virtual double GenerateEntropyValue() const OVERRIDE { | |
87 // Use a random GUID + 13 additional bits of entropy to match how the | |
88 // SHA1EntropyProvider is used in metrics_service.cc. | |
89 const int low_entropy_source = | |
90 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
91 const std::string high_entropy_source = | |
92 base::GenerateGUID() + base::IntToString(low_entropy_source); | |
93 return GenerateSHA1Entropy(high_entropy_source, trial_name_); | |
94 } | |
95 | |
96 private: | |
97 std::string trial_name_; | |
98 | |
99 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator); | |
100 }; | |
101 | |
102 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm, | |
103 // using 13-bit low entropy source values. | |
104 class PermutedEntropyGenerator : public TrialEntropyGenerator { | |
105 public: | |
106 explicit PermutedEntropyGenerator(const std::string& trial_name) | |
107 : mapping_(kMaxLowEntropySize) { | |
108 // Note: Given a trial name, the computed mapping will be the same. | |
109 // As a performance optimization, pre-compute the mapping once per trial | |
110 // name and index into it for each entropy value. | |
111 const uint32 randomization_seed = HashName(trial_name); | |
112 internal::PermuteMappingUsingRandomizationSeed(randomization_seed, | |
113 &mapping_); | |
114 } | |
115 | |
116 virtual ~PermutedEntropyGenerator() { | |
117 } | |
118 | |
119 virtual double GenerateEntropyValue() const OVERRIDE { | |
120 const int low_entropy_source = | |
121 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1)); | |
122 return mapping_[low_entropy_source] / | |
123 static_cast<double>(kMaxLowEntropySize); | |
124 } | |
125 | |
126 private: | |
127 std::vector<uint16> mapping_; | |
128 | |
129 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator); | |
130 }; | |
131 | |
132 // Tests uniformity of a given |entropy_generator| using the Chi-Square Goodness | |
133 // of Fit Test. | |
134 void PerformEntropyUniformityTest( | |
135 const std::string& trial_name, | |
136 const TrialEntropyGenerator& entropy_generator) { | |
137 // Number of buckets in the simulated field trials. | |
138 const size_t kBucketCount = 20; | |
139 // Max number of iterations to perform before giving up and failing. | |
140 const size_t kMaxIterationCount = 100000; | |
141 // The number of iterations to perform before each time the statistical | |
142 // significance of the results is checked. | |
143 const size_t kCheckIterationCount = 10000; | |
144 // This is the Chi-Square threshold from the Chi-Square statistic table for | |
145 // 19 degrees of freedom (based on |kBucketCount|) with a 99.9% confidence | |
146 // level. See: http://www.medcalc.org/manual/chi-square-table.php | |
147 const double kChiSquareThreshold = 43.82; | |
148 | |
149 std::vector<int> distribution(kBucketCount); | |
150 | |
151 for (size_t i = 1; i <= kMaxIterationCount; ++i) { | |
152 const double entropy_value = entropy_generator.GenerateEntropyValue(); | |
153 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value); | |
154 ASSERT_LT(bucket, kBucketCount); | |
155 distribution[bucket] += 1; | |
156 | |
157 // After |kCheckIterationCount| iterations, compute the Chi-Square | |
158 // statistic of the distribution. If the resulting statistic is greater | |
159 // than |kChiSquareThreshold|, we can conclude with 99.9% confidence | |
160 // that the observed samples do not follow a uniform distribution. | |
161 // | |
162 // However, since 99.9% would still result in a false negative every | |
163 // 1000 runs of the test, do not treat it as a failure (else the test | |
164 // will be flaky). Instead, perform additional iterations to determine | |
165 // if the distribution will converge, up to |kMaxIterationCount|. | |
166 if ((i % kCheckIterationCount) == 0) { | |
167 const double expected_value_per_bucket = | |
168 static_cast<double>(i) / kBucketCount; | |
169 const double chi_square = | |
170 ComputeChiSquare(distribution, expected_value_per_bucket); | |
171 if (chi_square < kChiSquareThreshold) | |
172 break; | |
173 | |
174 // If |i == kMaxIterationCount|, the Chi-Square statistic did not | |
175 // converge after |kMaxIterationCount|. | |
176 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " << | |
177 trial_name << " with chi_square = " << chi_square << | |
178 " after " << kMaxIterationCount << " iterations."; | |
179 } | |
180 } | |
181 } | |
182 | |
183 } // namespace | |
184 | |
185 TEST(EntropyProviderTest, UseOneTimeRandomizationSHA1) { | |
186 // Simply asserts that two trials using one-time randomization | |
187 // that have different names, normally generate different results. | |
188 // | |
189 // Note that depending on the one-time random initialization, they | |
190 // _might_ actually give the same result, but we know that given | |
191 // the particular client_id we use for unit tests they won't. | |
192 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id")); | |
193 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; | |
194 scoped_refptr<base::FieldTrial> trials[] = { | |
195 base::FieldTrialList::FactoryGetFieldTrial( | |
196 "one", 100, "default", kNoExpirationYear, 1, 1, | |
197 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), | |
198 base::FieldTrialList::FactoryGetFieldTrial( | |
199 "two", 100, "default", kNoExpirationYear, 1, 1, | |
200 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), | |
201 }; | |
202 | |
203 for (size_t i = 0; i < arraysize(trials); ++i) { | |
204 for (int j = 0; j < 100; ++j) | |
205 trials[i]->AppendGroup(std::string(), 1); | |
206 } | |
207 | |
208 // The trials are most likely to give different results since they have | |
209 // different names. | |
210 EXPECT_NE(trials[0]->group(), trials[1]->group()); | |
211 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
212 } | |
213 | |
214 TEST(EntropyProviderTest, UseOneTimeRandomizationPermuted) { | |
215 // Simply asserts that two trials using one-time randomization | |
216 // that have different names, normally generate different results. | |
217 // | |
218 // Note that depending on the one-time random initialization, they | |
219 // _might_ actually give the same result, but we know that given | |
220 // the particular client_id we use for unit tests they won't. | |
221 base::FieldTrialList field_trial_list( | |
222 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); | |
223 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; | |
224 scoped_refptr<base::FieldTrial> trials[] = { | |
225 base::FieldTrialList::FactoryGetFieldTrial( | |
226 "one", 100, "default", kNoExpirationYear, 1, 1, | |
227 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), | |
228 base::FieldTrialList::FactoryGetFieldTrial( | |
229 "two", 100, "default", kNoExpirationYear, 1, 1, | |
230 base::FieldTrial::ONE_TIME_RANDOMIZED, NULL), | |
231 }; | |
232 | |
233 for (size_t i = 0; i < arraysize(trials); ++i) { | |
234 for (int j = 0; j < 100; ++j) | |
235 trials[i]->AppendGroup(std::string(), 1); | |
236 } | |
237 | |
238 // The trials are most likely to give different results since they have | |
239 // different names. | |
240 EXPECT_NE(trials[0]->group(), trials[1]->group()); | |
241 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name()); | |
242 } | |
243 | |
244 TEST(EntropyProviderTest, UseOneTimeRandomizationWithCustomSeedPermuted) { | |
245 // Ensures that two trials with different names but the same custom seed used | |
246 // for one time randomization produce the same group assignments. | |
247 base::FieldTrialList field_trial_list( | |
248 new PermutedEntropyProvider(1234, kMaxLowEntropySize)); | |
249 const int kNoExpirationYear = base::FieldTrialList::kNoExpirationYear; | |
250 const uint32 kCustomSeed = 9001; | |
251 scoped_refptr<base::FieldTrial> trials[] = { | |
252 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed( | |
253 "one", 100, "default", kNoExpirationYear, 1, 1, | |
254 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL), | |
255 base::FieldTrialList::FactoryGetFieldTrialWithRandomizationSeed( | |
256 "two", 100, "default", kNoExpirationYear, 1, 1, | |
257 base::FieldTrial::ONE_TIME_RANDOMIZED, kCustomSeed, NULL), | |
258 }; | |
259 | |
260 for (size_t i = 0; i < arraysize(trials); ++i) { | |
261 for (int j = 0; j < 100; ++j) | |
262 trials[i]->AppendGroup(std::string(), 1); | |
263 } | |
264 | |
265 // Normally, these trials should produce different groups, but if the same | |
266 // custom seed is used, they should produce the same group assignment. | |
267 EXPECT_EQ(trials[0]->group(), trials[1]->group()); | |
268 EXPECT_EQ(trials[0]->group_name(), trials[1]->group_name()); | |
269 } | |
270 | |
271 TEST(EntropyProviderTest, SHA1Entropy) { | |
272 const double results[] = { GenerateSHA1Entropy("hi", "1"), | |
273 GenerateSHA1Entropy("there", "1") }; | |
274 | |
275 EXPECT_NE(results[0], results[1]); | |
276 for (size_t i = 0; i < arraysize(results); ++i) { | |
277 EXPECT_LE(0.0, results[i]); | |
278 EXPECT_GT(1.0, results[i]); | |
279 } | |
280 | |
281 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"), | |
282 GenerateSHA1Entropy("yo", "1")); | |
283 EXPECT_NE(GenerateSHA1Entropy("yo", "something"), | |
284 GenerateSHA1Entropy("yo", "else")); | |
285 } | |
286 | |
287 TEST(EntropyProviderTest, PermutedEntropy) { | |
288 const double results[] = { | |
289 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
290 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") }; | |
291 | |
292 EXPECT_NE(results[0], results[1]); | |
293 for (size_t i = 0; i < arraysize(results); ++i) { | |
294 EXPECT_LE(0.0, results[i]); | |
295 EXPECT_GT(1.0, results[i]); | |
296 } | |
297 | |
298 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"), | |
299 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1")); | |
300 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"), | |
301 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else")); | |
302 } | |
303 | |
304 TEST(EntropyProviderTest, PermutedEntropyProviderResults) { | |
305 // Verifies that PermutedEntropyProvider produces expected results. This | |
306 // ensures that the results are the same between platforms and ensures that | |
307 // changes to the implementation do not regress this accidentally. | |
308 | |
309 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize), | |
310 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ")); | |
311 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize), | |
312 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test")); | |
313 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize), | |
314 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo")); | |
315 } | |
316 | |
317 TEST(EntropyProviderTest, SHA1EntropyIsUniform) { | |
318 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { | |
319 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]); | |
320 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); | |
321 } | |
322 } | |
323 | |
324 TEST(EntropyProviderTest, PermutedEntropyIsUniform) { | |
325 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { | |
326 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]); | |
327 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator); | |
328 } | |
329 } | |
330 | |
331 TEST(EntropyProviderTest, SeededRandGeneratorIsUniform) { | |
332 // Verifies that SeededRandGenerator has a uniform distribution. | |
333 // | |
334 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc. | |
335 | |
336 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL; | |
337 const uint32 kExpectedAverage = kTopOfRange / 2ULL; | |
338 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2% | |
339 const int kMinAttempts = 1000; | |
340 const int kMaxAttempts = 1000000; | |
341 | |
342 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) { | |
343 const uint32 seed = HashName(kTestTrialNames[i]); | |
344 internal::SeededRandGenerator rand_generator(seed); | |
345 | |
346 double cumulative_average = 0.0; | |
347 int count = 0; | |
348 while (count < kMaxAttempts) { | |
349 uint32 value = rand_generator(kTopOfRange); | |
350 cumulative_average = (count * cumulative_average + value) / (count + 1); | |
351 | |
352 // Don't quit too quickly for things to start converging, or we may have | |
353 // a false positive. | |
354 if (count > kMinAttempts && | |
355 kExpectedAverage - kAllowedVariance < cumulative_average && | |
356 cumulative_average < kExpectedAverage + kAllowedVariance) { | |
357 break; | |
358 } | |
359 | |
360 ++count; | |
361 } | |
362 | |
363 ASSERT_LT(count, kMaxAttempts) << "Expected average was " << | |
364 kExpectedAverage << ", average ended at " << cumulative_average << | |
365 ", for trial " << kTestTrialNames[i]; | |
366 } | |
367 } | |
368 | |
369 } // namespace metrics | |
OLD | NEW |