Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: tools/bisect-perf-regression_test.py

Issue 413393002: Use Welch's t-test to calculate confidence scores in the bisect script. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Add Chromium copyright notice to ttest.py. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « tools/bisect-perf-regression.py ('k') | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2014 The Chromium Authors. All rights reserved. 1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import math 5 import math
6 import unittest 6 import unittest
7 7
8 from auto_bisect import source_control as source_control_module 8 from auto_bisect import source_control as source_control_module
9 9
10 # Special import necessary because filename contains dash characters. 10 # Special import necessary because filename contains dash characters.
(...skipping 13 matching lines...) Expand all
24 """Test case for top-level functions in the bisect-perf-regrssion module.""" 24 """Test case for top-level functions in the bisect-perf-regrssion module."""
25 25
26 def setUp(self): 26 def setUp(self):
27 """Sets up the test environment before each test method.""" 27 """Sets up the test environment before each test method."""
28 pass 28 pass
29 29
30 def tearDown(self): 30 def tearDown(self):
31 """Cleans up the test environment after each test method.""" 31 """Cleans up the test environment after each test method."""
32 pass 32 pass
33 33
34 def testConfidenceScore(self): 34 def testConfidenceScoreHigh(self):
35 """Tests the confidence calculation.""" 35 """Tests the confidence calculation."""
36 bad_values = [[0, 1], [1, 2]] 36 bad_values = [[0, 1, 1], [1, 2, 2]]
37 good_values = [[6, 7], [7, 8]] 37 good_values = [[1, 2, 2], [3, 3, 4]]
38 # Closest means are mean(1, 2) and mean(6, 7). 38 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
39 distance = 6.5 - 1.5 39 self.assertEqual(95.0, confidence)
40 # Standard deviation of [n-1, n, n, n+1] is 0.8165.
41 stddev_sum = 0.8165 + 0.8165
42 # Expected confidence is an int in the range [0, 100].
43 expected_confidence = min(100, int(100 * distance / float(stddev_sum)))
44 self.assertEqual(
45 expected_confidence,
46 bisect_perf_module.ConfidenceScore(bad_values, good_values))
47 40
48 def testConfidenceScoreZeroConfidence(self): 41 def testConfidenceScoreNotSoHigh(self):
42 """Tests the confidence calculation."""
43 bad_values = [[0, 1, 1], [1, 2, 2]]
44 good_values = [[1, 1, 1], [3, 3, 4]]
45 # The good and bad groups are closer together than in the above test,
46 # so the confidence that they're different is a little lower.
47 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
48 self.assertEqual(80.0, confidence)
49
50 def testConfidenceScoreZero(self):
49 """Tests the confidence calculation when it's expected to be 0.""" 51 """Tests the confidence calculation when it's expected to be 0."""
50 bad_values = [[0, 1], [1, 2], [4, 5], [0, 2]] 52 bad_values = [[4, 5], [7, 6], [8, 7]]
51 good_values = [[4, 5], [6, 7], [7, 8]] 53 good_values = [[8, 7], [6, 7], [5, 4]]
52 # Both groups have value lists with means of 4.5, which means distance 54 # The good and bad sets contain the same values, so the confidence that
53 # between groups is zero, and thus confidence is zero. 55 # they're different should be zero.
54 self.assertEqual( 56 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
55 0, bisect_perf_module.ConfidenceScore(bad_values, good_values)) 57 self.assertEqual(0.0, confidence)
56 58
57 def testConfidenceScoreMaxConfidence(self): 59 def testConfidenceScoreVeryHigh(self):
58 """Tests the confidence calculation when it's expected to be 100.""" 60 """Tests the confidence calculation when it's expected to be high."""
59 bad_values = [[1, 1], [1, 1]] 61 bad_values = [[1, 1], [1, 1]]
60 good_values = [[1.2, 1.2], [1.2, 1.2]] 62 good_values = [[1.2, 1.2], [1.2, 1.2]]
61 # Standard deviation in both groups is zero, so confidence is 100. 63 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
62 self.assertEqual( 64 self.assertEqual(99.9, confidence)
63 100, bisect_perf_module.ConfidenceScore(bad_values, good_values)) 65
66 def testConfidenceScoreImbalance(self):
67 """Tests the confidence calculation one set of numbers is small."""
68 bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]
69 good_values = [[1.4]]
70 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
71 self.assertEqual(80.0, confidence)
72
73 def testConfidenceScoreImbalance(self):
74 """Tests the confidence calculation one set of numbers is empty."""
75 bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]
76 good_values = []
77 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
78 self.assertEqual(0.0, confidence)
79
80 def testConfidenceScoreFunctionalTestResultsInconsistent(self):
81 """Tests the confidence calculation when the numbers are just 0 and 1."""
82 bad_values = [[1], [1], [0], [1], [1], [1], [0], [1]]
83 good_values = [[0], [0], [1], [0], [1], [0]]
84 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
85 self.assertEqual(80.0, confidence)
86
87 def testConfidenceScoreFunctionalTestResultsConsistent(self):
88 """Tests the confidence calculation when the numbers are 0 and 1."""
89 bad_values = [[1], [1], [1], [1], [1], [1], [1], [1]]
90 good_values = [[0], [0], [0], [0], [0], [0]]
91 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)
92 self.assertEqual(99.9, confidence)
64 93
65 def testParseDEPSStringManually(self): 94 def testParseDEPSStringManually(self):
66 """Tests DEPS parsing.""" 95 """Tests DEPS parsing."""
67 bisect_options = bisect_perf_module.BisectOptions() 96 bisect_options = bisect_perf_module.BisectOptions()
68 bisect_instance = bisect_perf_module.BisectPerformanceMetrics( 97 bisect_instance = bisect_perf_module.BisectPerformanceMetrics(
69 None, bisect_options) 98 None, bisect_options)
70 99
71 deps_file_contents = """ 100 deps_file_contents = """
72 vars = { 101 vars = {
73 'ffmpeg_hash': 102 'ffmpeg_hash':
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
253 '--browser=release page_cycler.intl_ja_zh') 282 '--browser=release page_cycler.intl_ja_zh')
254 expected_command = ('tools/perf/run_benchmark -v --browser=' 283 expected_command = ('tools/perf/run_benchmark -v --browser='
255 'release page_cycler.intl_ja_zh') 284 'release page_cycler.intl_ja_zh')
256 self.assertEqual( 285 self.assertEqual(
257 bisect_instance.GetCompatibleCommand(command, git_revision, depot), 286 bisect_instance.GetCompatibleCommand(command, git_revision, depot),
258 expected_command) 287 expected_command)
259 288
260 289
261 if __name__ == '__main__': 290 if __name__ == '__main__':
262 unittest.main() 291 unittest.main()
OLDNEW
« no previous file with comments | « tools/bisect-perf-regression.py ('k') | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698