chrome/common/metrics/entropy_provider_unittest.cc - Issue 10830318: Use a different algorithm with the low entropy source for field trials.

Side by Side Diff: chrome/common/metrics/entropy_provider_unittest.cc

Issue 10830318: Use a different algorithm with the low entropy source for field trials. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/metrics/metrics_service.cc ('K') | « chrome/common/metrics/entropy_provider.cc ('k') | chrome/common/metrics/variations/variations_util_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include <limits>

	6

	7 #include "base/basictypes.h"

	8 #include "base/memory/scoped_ptr.h"

	9 #include "base/rand_util.h"

	10 #include "base/string_number_conversions.h"

	11 #include "chrome/common/metrics/entropy_provider.h"

	12 #include "testing/gtest/include/gtest/gtest.h"

	13

	14 class EntropyProviderTest : public testing::Test {

	15 public:

	16 // Computes SHA1-based entropy for the given \|trial_name\| based on

	17 // \|entropy_source\|

	18 double GenerateSHA1Entropy(const std::string& entropy_source,

	19 const std::string& trial_name) {

	20 scoped_ptr<SHA1EntropyProvider> sha1_provider(

	21 new SHA1EntropyProvider(entropy_source));

	22 return sha1_provider->GetEntropyForTrial(trial_name);

	23 }

	24

	25 // Generates permutation-based entropy for the given \|trial_name\| based on

	26 // \|entropy_source\| which must be in the range [0, entropy_max).

	27 double GeneratePermutedEntropy(uint16 entropy_source,

	28 size_t entropy_max,

	29 const std::string& trial_name) {

	30 scoped_ptr<PermutedEntropyProvider> permuted_provider(

	31 new PermutedEntropyProvider(entropy_source, entropy_max));
	Ilya Sherman 2012/08/17 07:34:28 nit: Why not just stack-allocate this guy? nit: Why not just stack-allocate this guy? Alexei Svitkine (slow) 2012/08/17 14:08:59 Good point. Done. Show quoted text On 2012/08/17 07:34:28, Ilya Sherman wrote: > nit: Why not just stack-allocate this guy? Good point. Done.
	32 return permuted_provider->GetEntropyForTrial(trial_name);

	33 }

	34 };

	35

	36

	37 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {

	38 // Simply asserts that two trials using one-time randomization

	39 // that have different names, normally generate different results.

	40 //

	41 // Note that depending on the one-time random initialization, they

	42 // _might_ actually give the same result, but we know that given

	43 // the particular client_id we use for unit tests they won't.

	44 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));

	45 scoped_refptr<base::FieldTrial> trials[] = {

	46 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",

	47 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	48 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",

	49 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	50 };

	51

	52 for (size_t i = 0; i < arraysize(trials); ++i) {

	53 trials[i]->UseOneTimeRandomization();

	54

	55 for (int j = 0; j < 100; ++j)

	56 trials[i]->AppendGroup("", 1);

	57 }

	58

	59 // The trials are most likely to give different results since they have

	60 // different names.

	61 ASSERT_NE(trials[0]->group(), trials[1]->group());

	62 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name());

	63 }

	64

	65 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {

	66 // Simply asserts that two trials using one-time randomization

	67 // that have different names, normally generate different results.

	68 //

	69 // Note that depending on the one-time random initialization, they

	70 // _might_ actually give the same result, but we know that given

	71 // the particular client_id we use for unit tests they won't.

	72 const size_t kMaxEntropySize = (1 << 13);

	73 base::FieldTrialList field_trial_list(

	74 new PermutedEntropyProvider(1234, kMaxEntropySize));

	75 scoped_refptr<base::FieldTrial> trials[] = {

	76 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",

	77 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	78 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",

	79 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	80 };

	81

	82 for (size_t i = 0; i < arraysize(trials); ++i) {

	83 trials[i]->UseOneTimeRandomization();

	84

	85 for (int j = 0; j < 100; ++j)

	86 trials[i]->AppendGroup("", 1);

	87 }

	88

	89 // The trials are most likely to give different results since they have

	90 // different names.

	91 ASSERT_NE(trials[0]->group(), trials[1]->group());

	92 ASSERT_NE(trials[0]->group_name(), trials[1]->group_name());

	93 }

	94

	95 TEST_F(EntropyProviderTest, SHA1Entropy) {

	96 double results[] = {

	97 GenerateSHA1Entropy("hi", "1"),

	98 GenerateSHA1Entropy("there", "1"),

	99 };

	100 ASSERT_NE(results[0], results[1]);

	101 for (size_t i = 0; i < arraysize(results); ++i) {

	102 ASSERT_LE(0.0, results[i]);

	103 ASSERT_GT(1.0, results[i]);

	104 }

	105

	106 ASSERT_EQ(GenerateSHA1Entropy("yo", "1"),

	107 GenerateSHA1Entropy("yo", "1"));

	108 ASSERT_NE(GenerateSHA1Entropy("yo", "something"),

	109 GenerateSHA1Entropy("yo", "else"));

	110 }

	111

	112 TEST_F(EntropyProviderTest, PermutedEntropy) {

	113 const size_t kMaxEntropySize = (1 << 13);

	114 double results[] = {

	115 GeneratePermutedEntropy(1234, kMaxEntropySize, "1"),

	116 GeneratePermutedEntropy(4321, kMaxEntropySize, "1"),

	117 };

	118 ASSERT_NE(results[0], results[1]);

	119 for (size_t i = 0; i < arraysize(results); ++i) {

	120 ASSERT_LE(0.0, results[i]);

	121 ASSERT_GT(1.0, results[i]);

	122 }

	123

	124 ASSERT_EQ(GeneratePermutedEntropy(1234, kMaxEntropySize, "1"),

	125 GeneratePermutedEntropy(1234, kMaxEntropySize, "1"));

	126 ASSERT_NE(GeneratePermutedEntropy(1234, kMaxEntropySize, "something"),

	127 GeneratePermutedEntropy(1234, kMaxEntropySize, "else"));

	128 }

	129

	130 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {

	131 // Choose a random start number but go sequentially from there, so

	132 // that each test tries a different range but we never provide uniformly

	133 // distributed input data.

	134 int current_number = base::RandInt(0, std::numeric_limits<int>::max());
	Ilya Sherman 2012/08/17 07:34:28 We should print the seed on failure, so that failu We should print the seed on failure, so that failures are reproducible.
	135

	136 // The expected value of a random distribution is the average over all

	137 // samples as the number of samples approaches infinity. For a uniform

	138 // distribution from [0.0, 1.0) this would be 0.5.

	139 //

	140 // We do kSamplesBetweenChecks at a time and check if the value has converged

	141 // to a narrow interval around 0.5. A non-uniform distribution would likely

	142 // converge at something different, or not converge consistently within this

	143 // range (i.e. the test would start timing out occasionally).
	Ilya Sherman 2012/08/17 07:34:28 Hmm, this sounds like it's just checking that the Hmm, this sounds like it's just checking that the distribution is symmetric, not that it's uniform. Why not do something like have 20 buckets and check their counts? Alexei Svitkine (slow) 2012/08/20 16:02:55 I've now replaces this test with one that distribu Show quoted text On 2012/08/17 07:34:28, Ilya Sherman wrote: > Why not do something like have 20 buckets and check their counts? I've now replaces this test with one that distributes 100000 samples between 20 buckets and checks the min and max sizes of the buckets and the standard deviation of the counts per bucket. The results seem more or less consistent between the high entropy source using SHA1 and the low entropy source using the permuted method, although still having more variance than I expected. For example, here's the output from a run of both: [ RUN ] EntropyProviderTest.SHA1EntropyIsUniform min = 4871, max = 5107, stddev = 63.812225 min = 4850, max = 5129, stddev = 70.342022 min = 4875, max = 5185, stddev = 86.602540 [ OK ] EntropyProviderTest.SHA1EntropyIsUniform (2950 ms) [ RUN ] EntropyProviderTest.PermutedEntropyIsUniform min = 4933, max = 5071, stddev = 44.294469 min = 4817, max = 5194, stddev = 74.188948 min = 4894, max = 5109, stddev = 58.932164 [ OK ] EntropyProviderTest.PermutedEntropyIsUniform (515 ms) I am not sure what's a good way to choose the success / failure criteria for such a test without introducing flakiness. For example, I've seen the standard deviation go to as far as 105. So unless we make it really high (like 200), it seems like the test would be prone to be flaky. Any suggestions?
	144 int kSamplesBetweenChecks = 300;

	145 int num_samples = 0;

	146 double total_value = 0.0;

	147 while (true) {
	Ilya Sherman 2012/08/17 07:34:28 Rather than having a while (true) loop, let's figu Rather than having a while (true) loop, let's figure out how many iterations we need to gain 99.999999% confidence for convergence, and iterate that many times.
	148 for (int i = 0; i < kSamplesBetweenChecks; ++i) {

	149 total_value += GenerateSHA1Entropy(

	150 base::IntToString(current_number++), "salt");

	151 num_samples++;

	152 }

	153

	154 double average = total_value / num_samples;

	155 double kExpectedMin = 0.48;

	156 double kExpectedMax = 0.52;
	Ilya Sherman 2012/08/17 07:34:28 This is a pretty big range for the high-entropy so This is a pretty big range for the high-entropy source. Do we really expect so much variance?
	157

	158 if (num_samples > 1000 &&

	159 (average < kExpectedMin \|\| average > kExpectedMax)) {

	160 // Only printed once we have enough samples that it's very unlikely

	161 // things haven't converged.

	162 printf("After %d samples, the average was %f, outside the expected\n"

	163 "range (%f, %f). We will add more samples and check after every\n"

	164 "%d samples. If the average does not converge, something\n"

	165 "is broken. If it does converge, the test will pass.\n",

	166 num_samples, average,

	167 kExpectedMin, kExpectedMax, kSamplesBetweenChecks);

	168 } else {

	169 // Success.

	170 break;

	171 }

	172 }

	173 }

	174

	175 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {

	176 // Choose a random start number but go sequentially from there, so

	177 // that each test tries a different range but we never provide uniformly

	178 // distributed input data.

	179 const size_t kMaxEntropySize = (1 << 13);

	180 int current_number = base::RandInt(0, kMaxEntropySize - 1);

	181

	182 // The expected value of a random distribution is the average over all

	183 // samples as the number of samples approaches infinity. For a uniform

	184 // distribution from [0.0, 1.0) this would be 0.5.

	185 //

	186 // We do kSamplesBetweenChecks at a time and check if the value has converged

	187 // to a narrow interval around 0.5. A non-uniform distribution would likely

	188 // converge at something different, or not converge consistently within this

	189 // range (i.e. the test would start timing out occasionally).

	190 int kSamplesBetweenChecks = 300;

	191 int num_samples = 0;

	192 double total_value = 0.0;

	193 while (true) {

	194 for (int i = 0; i < kSamplesBetweenChecks; ++i) {

	195 total_value += GeneratePermutedEntropy(current_number++ % kMaxEntropySize,

	196 kMaxEntropySize, "salt");

	197 num_samples++;

	198 }

	199

	200 double average = total_value / num_samples;

	201 double kExpectedMin = 0.48;

	202 double kExpectedMax = 0.52;

	203

	204 if (num_samples > 1000 &&

	205 (average < kExpectedMin \|\| average > kExpectedMax)) {

	206 // Only printed once we have enough samples that it's very unlikely

	207 // things haven't converged.

	208 printf("After %d samples, the average was %f, outside the expected\n"

	209 "range (%f, %f). We will add more samples and check after every\n"

	210 "%d samples. If the average does not converge, something\n"

	211 "is broken. If it does converge, the test will pass.\n",

	212 num_samples, average,

	213 kExpectedMin, kExpectedMax, kSamplesBetweenChecks);

	214 } else {

	215 // Success.

	216 break;

	217 }

	218 }

	219 }

	220

	221 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {

	222 // Verifies. that SeededRandGenerator has a uniform distribution.

	223 //

	224 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.

	225

	226 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;

	227 const uint32 kExpectedAverage = kTopOfRange / 2ULL;

	228 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%

	229 const int kMinAttempts = 1000;

	230 const int kMaxAttempts = 1000000;

	231

	232 const std::string trial_names[] = {

	233 "TestTrial",

	234 "AnotherTestTrial",

	235 "NewTabButton",

	236 };

	237

	238 for (size_t i = 0; i < arraysize(trial_names); ++i) {

	239 const uint32 seed = internal::HashName(trial_names[i]);

	240 internal::SeededRandGenerator rand_generator(seed);

	241

	242 double cumulative_average = 0.0;

	243 int count = 0;

	244 while (count < kMaxAttempts) {

	245 uint32 value = rand_generator(kTopOfRange);

	246 cumulative_average = (count * cumulative_average + value) / (count + 1);

	247

	248 // Don't quit too quickly for things to start converging, or we may have

	249 // a false positive.

	250 if (count > kMinAttempts &&

	251 kExpectedAverage - kAllowedVariance < cumulative_average &&

	252 cumulative_average < kExpectedAverage + kAllowedVariance) {

	253 break;

	254 }

	255

	256 ++count;

	257 }

	258

	259 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<

	260 kExpectedAverage << ", average ended at " << cumulative_average;

	261 }

	262 }

	263

OLD	NEW