chrome/common/metrics/entropy_provider_unittest.cc - Issue 10830318: Use a different algorithm with the low entropy source for field trials.

Side by Side Diff: chrome/common/metrics/entropy_provider_unittest.cc

Issue 10830318: Use a different algorithm with the low entropy source for field trials. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 8 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include <cmath>

	6 #include <limits>

	7 #include <numeric>

	8

	9 #include "base/basictypes.h"

	10 #include "base/guid.h"

	11 #include "base/memory/scoped_ptr.h"

	12 #include "base/rand_util.h"

	13 #include "base/string_number_conversions.h"

	14 #include "chrome/common/metrics/entropy_provider.h"

	15 #include "testing/gtest/include/gtest/gtest.h"

	16

	17 namespace metrics {

	18

	19 namespace {

	20

	21 // Size of the low entropy source to use for the permuted entropy provider

	22 // in tests.

	23 const size_t kMaxLowEntropySize = (1 << 13);

	24

	25 // Field trial names used in unit tests.

	26 const std::string kTestTrialNames[] = { "TestTrial", "AnotherTestTrial",

	27 "NewTabButton" };

	28

	29 // Computes the Chi-Square statistic for \|values\| assuming they follow a uniform

	30 // distribution, where each entry has expected value \|expected_value\|.

	31 //

	32 // The Chi-Square statistic is defined as Sum((O-E)^2/E) where O is the observed

	33 // value and E is the expected value.

	34 double ComputeChiSquare(const std::vector<int>& values,

	35 double expected_value) {

	36 double sum = 0;

	37 for (size_t i = 0; i < values.size(); ++i) {

	38 const double delta = values[i] - expected_value;

	39 sum += (delta * delta) / expected_value;

	40 }

	41 return sum;

	42 }

	43

	44 // Computes SHA1-based entropy for the given \|trial_name\| based on

	45 // \|entropy_source\|

	46 double GenerateSHA1Entropy(const std::string& entropy_source,

	47 const std::string& trial_name) {

	48 SHA1EntropyProvider sha1_provider(entropy_source);

	49 return sha1_provider.GetEntropyForTrial(trial_name);

	50 }

	51

	52 // Generates permutation-based entropy for the given \|trial_name\| based on

	53 // \|entropy_source\| which must be in the range [0, entropy_max).

	54 double GeneratePermutedEntropy(uint16 entropy_source,

	55 size_t entropy_max,

	56 const std::string& trial_name) {

	57 PermutedEntropyProvider permuted_provider(entropy_source, entropy_max);

	58 return permuted_provider.GetEntropyForTrial(trial_name);

	59 }

	60

	61 // Helper interface for testing used to generate entropy values for a given

	62 // field trial. Unlike EntropyProvider, which keeps the low/high entropy source

	63 // value constant and generates entropy for different trial names, instances

	64 // of TrialEntropyGenerator keep the trial name constant and generate low/high

	65 // entropy source values internally to produce each output entropy value.

	66 class TrialEntropyGenerator {

	67 public:

	68 virtual ~TrialEntropyGenerator() {}

	69 virtual double GenerateEntropyValue() const = 0;

	70 };

	71

	72 // An TrialEntropyGenerator that uses the SHA1EntropyProvider with the high

	73 // entropy source (random GUID with 128 bits of entropy + 13 additional bits of

	74 // entropy corresponding to a low entropy source).

	75 class SHA1EntropyGenerator : public TrialEntropyGenerator {

	76 public:

	77 explicit SHA1EntropyGenerator(const std::string& trial_name)

	78 : trial_name_(trial_name) {

	79 }

	80

	81 ~SHA1EntropyGenerator() {

	82 }

	83

	84 virtual double GenerateEntropyValue() const OVERRIDE {

	85 // Use a random GUID + 13 additional bits of entropy to match how the

	86 // SHA1EntropyProvider is used in metrics_service.cc.

	87 const int low_entropy_source =

	88 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));

	89 const std::string high_entropy_source =

	90 base::GenerateGUID() + base::IntToString(low_entropy_source);

	91 return GenerateSHA1Entropy(high_entropy_source, trial_name_);

	92 }

	93

	94 private:

	95 const std::string& trial_name_;

	96

	97 DISALLOW_COPY_AND_ASSIGN(SHA1EntropyGenerator);

	98 };

	99

	100 // An TrialEntropyGenerator that uses the permuted entropy provider algorithm,

	101 // using 13-bit low entropy source values.

	102 class PermutedEntropyGenerator : public TrialEntropyGenerator {

	103 public:

	104 explicit PermutedEntropyGenerator(const std::string& trial_name)

	105 : mapping_(kMaxLowEntropySize) {

	106 // Note: Given a trial name, the computed mapping will be the same.

	107 // As a performance optimization, pre-compute the mapping once per trial

	108 // name and index into it for each entropy value.

	109 internal::PermuteMappingUsingTrialName(trial_name, &mapping_);

	110 }

	111

	112 ~PermutedEntropyGenerator() {

	113 }

	114

	115 virtual double GenerateEntropyValue() const OVERRIDE {

	116 const int low_entropy_source =

	117 static_cast<uint16>(base::RandInt(0, kMaxLowEntropySize - 1));

	118 return mapping_[low_entropy_source] /

	119 static_cast<double>(kMaxLowEntropySize);

	120 }

	121

	122 private:

	123 std::vector<uint16> mapping_;

	124

	125 DISALLOW_COPY_AND_ASSIGN(PermutedEntropyGenerator);

	126 };

	127

	128 // Tests uniformity of a given \|entropy_generator\| using the Chi-Square Goodness

	129 // of Fit Test.

	130 void PerformEntropyUniformityTest(

	131 const std::string& trial_name,

	132 const TrialEntropyGenerator& entropy_generator) {

	133 // Number of buckets in the simulated field trials.

	134 const size_t kBucketCount = 20;

	135 // Max number of iterations to perform before giving up and failing.

	136 const size_t kMaxIterationCount = 100000;

	137 // The number of iterations to perform before each time the statistical

	138 // significance of the results is checked.

	139 const size_t kCheckIterationCount = 10000;

	140 // This is the Chi-Square threshold from the Chi-Square statistic table for

	141 // 19 degrees of freedom (based on \|kBucketCount\|) with a 99.9% confidence

	142 // level. See: http://www.medcalc.org/manual/chi-square-table.php

	143 const double kChiSquareThreshold = 43.82;

	144

	145 std::vector<int> distribution(kBucketCount);

	146

	147 for (size_t i = 1; i <= kMaxIterationCount; ++i) {

	148 const double entropy_value = entropy_generator.GenerateEntropyValue();

	149 const size_t bucket = static_cast<size_t>(kBucketCount * entropy_value);

	150 ASSERT_LT(bucket, kBucketCount);

	151 distribution[bucket] += 1;

	152

	153 // After \|kCheckIterationCount\| iterations, compute the Chi-Square

	154 // statistic of the distribution. If the resulting statistic is greater

	155 // than \|kChiSquareThreshold\|, we can conclude with 99.9% confidence

	156 // that the observed samples do not follow a uniform distribution.

	157 //

	158 // However, since 99.9% would still result in a false negative every

	159 // 1000 runs of the test, do not treat it as a failure (else the test

	160 // will be flaky). Instead, perform additional iterations to determine

	161 // if the distribution will converge, up to \|kMaxIterationCount\|.

	162 if ((i % kCheckIterationCount) == 0) {

	163 const double expected_value_per_bucket =

	164 static_cast<double>(i) / kBucketCount;

	165 const double chi_square =

	166 ComputeChiSquare(distribution, expected_value_per_bucket);

	167 if (chi_square < kChiSquareThreshold)

	168 break;

	169

	170 // If \|i == kMaxIterationCount\|, the Chi-Square statistic did not

	171 // converge after \|kMaxIterationCount\|.

	172 EXPECT_NE(i, kMaxIterationCount) << "Failed for trial " <<

	173 trial_name << " with chi_square = " << chi_square <<

	174 " after " << kMaxIterationCount << " iterations.";

	175 }

	176 }

	177 }

	178

	179 } // namespace

	180

	181 class EntropyProviderTest : public testing::Test {

	182 };

	183

	184 TEST_F(EntropyProviderTest, UseOneTimeRandomizationSHA1) {

	185 // Simply asserts that two trials using one-time randomization

	186 // that have different names, normally generate different results.

	187 //

	188 // Note that depending on the one-time random initialization, they

	189 // _might_ actually give the same result, but we know that given

	190 // the particular client_id we use for unit tests they won't.

	191 base::FieldTrialList field_trial_list(new SHA1EntropyProvider("client_id"));

	192 scoped_refptr<base::FieldTrial> trials[] = {

	193 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",

	194 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	195 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",

	196 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };

	197

	198 for (size_t i = 0; i < arraysize(trials); ++i) {

	199 trials[i]->UseOneTimeRandomization();

	200

	201 for (int j = 0; j < 100; ++j)

	202 trials[i]->AppendGroup("", 1);

	203 }

	204

	205 // The trials are most likely to give different results since they have

	206 // different names.

	207 EXPECT_NE(trials[0]->group(), trials[1]->group());

	208 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());

	209 }

	210

	211 TEST_F(EntropyProviderTest, UseOneTimeRandomizationPermuted) {

	212 // Simply asserts that two trials using one-time randomization

	213 // that have different names, normally generate different results.

	214 //

	215 // Note that depending on the one-time random initialization, they

	216 // _might_ actually give the same result, but we know that given

	217 // the particular client_id we use for unit tests they won't.

	218 base::FieldTrialList field_trial_list(

	219 new PermutedEntropyProvider(1234, kMaxLowEntropySize));

	220 scoped_refptr<base::FieldTrial> trials[] = {

	221 base::FieldTrialList::FactoryGetFieldTrial("one", 100, "default",

	222 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL),

	223 base::FieldTrialList::FactoryGetFieldTrial("two", 100, "default",

	224 base::FieldTrialList::kExpirationYearInFuture, 1, 1, NULL) };

	225

	226 for (size_t i = 0; i < arraysize(trials); ++i) {

	227 trials[i]->UseOneTimeRandomization();

	228

	229 for (int j = 0; j < 100; ++j)

	230 trials[i]->AppendGroup("", 1);

	231 }

	232

	233 // The trials are most likely to give different results since they have

	234 // different names.

	235 EXPECT_NE(trials[0]->group(), trials[1]->group());

	236 EXPECT_NE(trials[0]->group_name(), trials[1]->group_name());

	237 }

	238

	239 TEST_F(EntropyProviderTest, SHA1Entropy) {

	240 const double results[] = { GenerateSHA1Entropy("hi", "1"),

	241 GenerateSHA1Entropy("there", "1") };

	242

	243 EXPECT_NE(results[0], results[1]);

	244 for (size_t i = 0; i < arraysize(results); ++i) {

	245 EXPECT_LE(0.0, results[i]);

	246 EXPECT_GT(1.0, results[i]);

	247 }

	248

	249 EXPECT_EQ(GenerateSHA1Entropy("yo", "1"),

	250 GenerateSHA1Entropy("yo", "1"));

	251 EXPECT_NE(GenerateSHA1Entropy("yo", "something"),

	252 GenerateSHA1Entropy("yo", "else"));

	253 }

	254

	255 TEST_F(EntropyProviderTest, PermutedEntropy) {

	256 const double results[] = {

	257 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),

	258 GeneratePermutedEntropy(4321, kMaxLowEntropySize, "1") };

	259

	260 EXPECT_NE(results[0], results[1]);

	261 for (size_t i = 0; i < arraysize(results); ++i) {

	262 EXPECT_LE(0.0, results[i]);

	263 EXPECT_GT(1.0, results[i]);

	264 }

	265

	266 EXPECT_EQ(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"),

	267 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "1"));

	268 EXPECT_NE(GeneratePermutedEntropy(1234, kMaxLowEntropySize, "something"),

	269 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "else"));

	270 }

	271

	272 TEST_F(EntropyProviderTest, PermutedEntropyProviderResults) {

	273 // Verifies that PermutedEntropyProvider produces expected results. This

	274 // ensures that the results are the same between platforms and ensures that

	275 // changes to the implementation do not regress this accidentally.

	276

	277 EXPECT_DOUBLE_EQ(2194 / static_cast<double>(kMaxLowEntropySize),

	278 GeneratePermutedEntropy(1234, kMaxLowEntropySize, "XYZ"));

	279 EXPECT_DOUBLE_EQ(5676 / static_cast<double>(kMaxLowEntropySize),

	280 GeneratePermutedEntropy(1, kMaxLowEntropySize, "Test"));

	281 EXPECT_DOUBLE_EQ(1151 / static_cast<double>(kMaxLowEntropySize),

	282 GeneratePermutedEntropy(5000, kMaxLowEntropySize, "Foo"));

	283 }

	284

	285 TEST_F(EntropyProviderTest, SHA1EntropyIsUniform) {

	286 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {

	287 SHA1EntropyGenerator entropy_generator(kTestTrialNames[i]);

	288 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);

	289 }

	290 }

	291

	292 TEST_F(EntropyProviderTest, PermutedEntropyIsUniform) {

	293 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {

	294 PermutedEntropyGenerator entropy_generator(kTestTrialNames[i]);

	295 PerformEntropyUniformityTest(kTestTrialNames[i], entropy_generator);

	296 }

	297 }

	298

	299 TEST_F(EntropyProviderTest, SeededRandGeneratorIsUniform) {

	300 // Verifies that SeededRandGenerator has a uniform distribution.

	301 //

	302 // Mirrors RandUtilTest.RandGeneratorIsUniform in base/rand_util_unittest.cc.

	303

	304 const uint32 kTopOfRange = (std::numeric_limits<uint32>::max() / 4ULL) * 3ULL;

	305 const uint32 kExpectedAverage = kTopOfRange / 2ULL;

	306 const uint32 kAllowedVariance = kExpectedAverage / 50ULL; // +/- 2%

	307 const int kMinAttempts = 1000;

	308 const int kMaxAttempts = 1000000;

	309

	310 for (size_t i = 0; i < arraysize(kTestTrialNames); ++i) {

	311 const uint32 seed = internal::HashName(kTestTrialNames[i]);

	312 internal::SeededRandGenerator rand_generator(seed);

	313

	314 double cumulative_average = 0.0;

	315 int count = 0;

	316 while (count < kMaxAttempts) {

	317 uint32 value = rand_generator(kTopOfRange);

	318 cumulative_average = (count * cumulative_average + value) / (count + 1);

	319

	320 // Don't quit too quickly for things to start converging, or we may have

	321 // a false positive.

	322 if (count > kMinAttempts &&

	323 kExpectedAverage - kAllowedVariance < cumulative_average &&

	324 cumulative_average < kExpectedAverage + kAllowedVariance) {

	325 break;

	326 }

	327

	328 ++count;

	329 }

	330

	331 ASSERT_LT(count, kMaxAttempts) << "Expected average was " <<

	332 kExpectedAverage << ", average ended at " << cumulative_average <<

	333 ", for trial " << kTestTrialNames[i];

	334 }

	335 }

	336

	337 } // namespace metrics

OLD	NEW

« no previous file with comments | « chrome/common/metrics/entropy_provider.cc ('k') | chrome/common/metrics/variations/variations_util_unittest.cc » ('j') | no next file with comments »