tools/bisect-perf-regression_test.py - Issue 413393002: Use Welch's t-test to calculate confidence scores in the bisect script.

Side by Side Diff: tools/bisect-perf-regression_test.py

Issue 413393002: Use Welch's t-test to calculate confidence scores in the bisect script. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Add Chromium copyright notice to ttest.py. Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import math	5 import math

6 import unittest	6 import unittest

7	7

8 from auto_bisect import source_control as source_control_module	8 from auto_bisect import source_control as source_control_module

9	9

10 # Special import necessary because filename contains dash characters.	10 # Special import necessary because filename contains dash characters.

(...skipping 13 matching lines...) Expand all Loading...
24 """Test case for top-level functions in the bisect-perf-regrssion module."""	24 """Test case for top-level functions in the bisect-perf-regrssion module."""

25	25

26 def setUp(self):	26 def setUp(self):

27 """Sets up the test environment before each test method."""	27 """Sets up the test environment before each test method."""

28 pass	28 pass

29	29

30 def tearDown(self):	30 def tearDown(self):

31 """Cleans up the test environment after each test method."""	31 """Cleans up the test environment after each test method."""

32 pass	32 pass

33	33

34 def testConfidenceScore(self):	34 def testConfidenceScoreHigh(self):

35 """Tests the confidence calculation."""	35 """Tests the confidence calculation."""

36 bad_values = [[0, 1], [1, 2]]	36 bad_values = [[0, 1, 1], [1, 2, 2]]

37 good_values = [[6, 7], [7, 8]]	37 good_values = [[1, 2, 2], [3, 3, 4]]

38 # Closest means are mean(1, 2) and mean(6, 7).	38 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

39 distance = 6.5 - 1.5	39 self.assertEqual(95.0, confidence)

40 # Standard deviation of [n-1, n, n, n+1] is 0.8165.

41 stddev_sum = 0.8165 + 0.8165

42 # Expected confidence is an int in the range [0, 100].

43 expected_confidence = min(100, int(100 * distance / float(stddev_sum)))

44 self.assertEqual(

45 expected_confidence,

46 bisect_perf_module.ConfidenceScore(bad_values, good_values))

47	40

48 def testConfidenceScoreZeroConfidence(self):	41 def testConfidenceScoreNotSoHigh(self):

	42 """Tests the confidence calculation."""

	43 bad_values = [[0, 1, 1], [1, 2, 2]]

	44 good_values = [[1, 1, 1], [3, 3, 4]]

	45 # The good and bad groups are closer together than in the above test,

	46 # so the confidence that they're different is a little lower.

	47 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

	48 self.assertEqual(80.0, confidence)

	49

	50 def testConfidenceScoreZero(self):

49 """Tests the confidence calculation when it's expected to be 0."""	51 """Tests the confidence calculation when it's expected to be 0."""

50 bad_values = [[0, 1], [1, 2], [4, 5], [0, 2]]	52 bad_values = [[4, 5], [7, 6], [8, 7]]

51 good_values = [[4, 5], [6, 7], [7, 8]]	53 good_values = [[8, 7], [6, 7], [5, 4]]

52 # Both groups have value lists with means of 4.5, which means distance	54 # The good and bad sets contain the same values, so the confidence that

53 # between groups is zero, and thus confidence is zero.	55 # they're different should be zero.

54 self.assertEqual(	56 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

55 0, bisect_perf_module.ConfidenceScore(bad_values, good_values))	57 self.assertEqual(0.0, confidence)

56	58

57 def testConfidenceScoreMaxConfidence(self):	59 def testConfidenceScoreVeryHigh(self):

58 """Tests the confidence calculation when it's expected to be 100."""	60 """Tests the confidence calculation when it's expected to be high."""

59 bad_values = [[1, 1], [1, 1]]	61 bad_values = [[1, 1], [1, 1]]

60 good_values = [[1.2, 1.2], [1.2, 1.2]]	62 good_values = [[1.2, 1.2], [1.2, 1.2]]

61 # Standard deviation in both groups is zero, so confidence is 100.	63 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

62 self.assertEqual(	64 self.assertEqual(99.9, confidence)

63 100, bisect_perf_module.ConfidenceScore(bad_values, good_values))	65

	66 def testConfidenceScoreImbalance(self):

	67 """Tests the confidence calculation one set of numbers is small."""

	68 bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]

	69 good_values = [[1.4]]

	70 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

	71 self.assertEqual(80.0, confidence)

	72

	73 def testConfidenceScoreImbalance(self):

	74 """Tests the confidence calculation one set of numbers is empty."""

	75 bad_values = [[1.1, 1.2], [1.1, 1.2], [1.0, 1.3], [1.2, 1.3]]

	76 good_values = []

	77 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

	78 self.assertEqual(0.0, confidence)

	79

	80 def testConfidenceScoreFunctionalTestResultsInconsistent(self):

	81 """Tests the confidence calculation when the numbers are just 0 and 1."""

	82 bad_values = [[1], [1], [0], [1], [1], [1], [0], [1]]

	83 good_values = [[0], [0], [1], [0], [1], [0]]

	84 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

	85 self.assertEqual(80.0, confidence)

	86

	87 def testConfidenceScoreFunctionalTestResultsConsistent(self):

	88 """Tests the confidence calculation when the numbers are 0 and 1."""

	89 bad_values = [[1], [1], [1], [1], [1], [1], [1], [1]]

	90 good_values = [[0], [0], [0], [0], [0], [0]]

	91 confidence = bisect_perf_module.ConfidenceScore(bad_values, good_values)

	92 self.assertEqual(99.9, confidence)

64	93

65 def testParseDEPSStringManually(self):	94 def testParseDEPSStringManually(self):

66 """Tests DEPS parsing."""	95 """Tests DEPS parsing."""

67 bisect_options = bisect_perf_module.BisectOptions()	96 bisect_options = bisect_perf_module.BisectOptions()

68 bisect_instance = bisect_perf_module.BisectPerformanceMetrics(	97 bisect_instance = bisect_perf_module.BisectPerformanceMetrics(

69 None, bisect_options)	98 None, bisect_options)

70	99

71 deps_file_contents = """	100 deps_file_contents = """

72 vars = {	101 vars = {

73 'ffmpeg_hash':	102 'ffmpeg_hash':

(...skipping 179 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
253 '--browser=release page_cycler.intl_ja_zh')	282 '--browser=release page_cycler.intl_ja_zh')

254 expected_command = ('tools/perf/run_benchmark -v --browser='	283 expected_command = ('tools/perf/run_benchmark -v --browser='

255 'release page_cycler.intl_ja_zh')	284 'release page_cycler.intl_ja_zh')

256 self.assertEqual(	285 self.assertEqual(

257 bisect_instance.GetCompatibleCommand(command, git_revision, depot),	286 bisect_instance.GetCompatibleCommand(command, git_revision, depot),

258 expected_command)	287 expected_command)

259	288

260	289

261 if __name__ == '__main__':	290 if __name__ == '__main__':

262 unittest.main()	291 unittest.main()

OLD	NEW

« no previous file with comments | « tools/bisect-perf-regression.py ('k') | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »