tools/auto_bisect/bisect_perf_regression_test.py - Issue 764733005: Lower "confidence score" required in order to not abort, and refactor test.

Side by Side Diff: tools/auto_bisect/bisect_perf_regression_test.py

Issue 764733005: Lower "confidence score" required in order to not abort, and refactor test. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import os	5 import os

6 import re	6 import re

7 import shutil	7 import shutil

8 import sys	8 import sys

9 import unittest	9 import unittest

10	10

(...skipping 10 matching lines...) Expand all Loading...
21 CLEAR_NON_REGRESSION = [	21 CLEAR_NON_REGRESSION = [

22 # Mean: 30.223 Std. Dev.: 11.383	22 # Mean: 30.223 Std. Dev.: 11.383

23 [[16.886], [16.909], [16.99], [17.723], [17.952], [18.118], [19.028],	23 [[16.886], [16.909], [16.99], [17.723], [17.952], [18.118], [19.028],

24 [19.552], [21.954], [38.573], [38.839], [38.965], [40.007], [40.572],	24 [19.552], [21.954], [38.573], [38.839], [38.965], [40.007], [40.572],

25 [41.491], [42.002], [42.33], [43.109], [43.238]],	25 [41.491], [42.002], [42.33], [43.109], [43.238]],

26 # Mean: 34.76 Std. Dev.: 11.516	26 # Mean: 34.76 Std. Dev.: 11.516

27 [[16.426], [17.347], [20.593], [21.177], [22.791], [27.843], [28.383],	27 [[16.426], [17.347], [20.593], [21.177], [22.791], [27.843], [28.383],

28 [28.46], [29.143], [40.058], [40.303], [40.558], [41.918], [42.44],	28 [28.46], [29.143], [40.058], [40.303], [40.558], [41.918], [42.44],

29 [45.223], [46.494], [50.002], [50.625], [50.839]]	29 [45.223], [46.494], [50.002], [50.625], [50.839]]

30 ]	30 ]

	31

31 # Regression confidence: ~ 90%	32 # Regression confidence: ~ 90%

32 ALMOST_REGRESSION = [	33 ALMOST_REGRESSION = [

33 # Mean: 30.042 Std. Dev.: 2.002	34 # Mean: 30.042 Std. Dev.: 2.002

34 [[26.146], [28.04], [28.053], [28.074], [28.168], [28.209], [28.471],	35 [[26.146], [28.04], [28.053], [28.074], [28.168], [28.209], [28.471],

35 [28.652], [28.664], [30.862], [30.973], [31.002], [31.897], [31.929],	36 [28.652], [28.664], [30.862], [30.973], [31.002], [31.897], [31.929],

36 [31.99], [32.214], [32.323], [32.452], [32.696]],	37 [31.99], [32.214], [32.323], [32.452], [32.696]],

37 # Mean: 33.008 Std. Dev.: 4.265	38 # Mean: 33.008 Std. Dev.: 4.265

38 [[34.963], [30.741], [39.677], [39.512], [34.314], [31.39], [34.361],	39 [[34.963], [30.741], [39.677], [39.512], [34.314], [31.39], [34.361],

39 [25.2], [30.489], [29.434]]	40 [25.2], [30.489], [29.434]]

40 ]	41 ]

	42

41 # Regression confidence: ~ 98%	43 # Regression confidence: ~ 98%

42 BARELY_REGRESSION = [	44 BARELY_REGRESSION = [

43 # Mean: 28.828 Std. Dev.: 1.993	45 # Mean: 28.828 Std. Dev.: 1.993

44 [[26.96], [27.605], [27.768], [27.829], [28.006], [28.206], [28.393],	46 [[26.96], [27.605], [27.768], [27.829], [28.006], [28.206], [28.393],

45 [28.911], [28.933], [30.38], [30.462], [30.808], [31.74], [31.805],	47 [28.911], [28.933], [30.38], [30.462], [30.808], [31.74], [31.805],

46 [31.899], [32.077], [32.454], [32.597], [33.155]],	48 [31.899], [32.077], [32.454], [32.597], [33.155]],

47 # Mean: 31.156 Std. Dev.: 1.980	49 # Mean: 31.156 Std. Dev.: 1.980

48 [[28.729], [29.112], [29.258], [29.454], [29.789], [30.036], [30.098],	50 [[28.729], [29.112], [29.258], [29.454], [29.789], [30.036], [30.098],

49 [30.174], [30.534], [32.285], [32.295], [32.552], [32.572], [32.967],	51 [30.174], [30.534], [32.285], [32.295], [32.552], [32.572], [32.967],

50 [33.165], [33.403], [33.588], [33.744], [34.147], [35.84]]	52 [33.165], [33.403], [33.588], [33.744], [34.147], [35.84]]

51 ]	53 ]

	54

52 # Regression confidence: 99.5%	55 # Regression confidence: 99.5%

53 CLEAR_REGRESSION = [	56 CLEAR_REGRESSION = [

54 # Mean: 30.254 Std. Dev.: 2.987	57 # Mean: 30.254 Std. Dev.: 2.987

55 [[26.494], [26.621], [26.701], [26.997], [26.997], [27.05], [27.37],	58 [[26.494], [26.621], [26.701], [26.997], [26.997], [27.05], [27.37],

56 [27.488], [27.556], [31.846], [32.192], [32.21], [32.586], [32.596],	59 [27.488], [27.556], [31.846], [32.192], [32.21], [32.586], [32.596],

57 [32.618], [32.95], [32.979], [33.421], [33.457], [34.97]],	60 [32.618], [32.95], [32.979], [33.421], [33.457], [34.97]],

58 # Mean: 33.190 Std. Dev.: 2.972	61 # Mean: 33.190 Std. Dev.: 2.972

59 [[29.547], [29.713], [29.835], [30.132], [30.132], [30.33], [30.406],	62 [[29.547], [29.713], [29.835], [30.132], [30.132], [30.33], [30.406],

60 [30.592], [30.72], [34.486], [35.247], [35.253], [35.335], [35.378],	63 [30.592], [30.72], [34.486], [35.247], [35.253], [35.335], [35.378],

61 [35.934], [36.233], [36.41], [36.947], [37.982]]	64 [35.934], [36.233], [36.41], [36.947], [37.982]]

62 ]	65 ]

63	66

64 # Regression confidence > 95%, taken from: crbug.com/434318	67 # Regression confidence > 95%, taken from: crbug.com/434318

65 # Specifically from Builder android_nexus10_perf_bisect Build #1198	68 # Specifically from Builder android_nexus10_perf_bisect Build #1198

66 MULTIPLE_VALUES = [	69 MULTIPLE_VALUES = [

67 [	70 [

68 [18.916000,22.371000,8.527000,5.877000,5.407000,9.476000,8.100000,	71 [18.916, 22.371, 8.527, 5.877, 5.407, 9.476, 8.100, 5.334,

69 5.334000,4.507000,4.842000,8.485000,8.308000,27.490000,4.560000,	72 4.507, 4.842, 8.485, 8.308, 27.490, 4.560, 4.804, 23.068, 17.577,

70 4.804000,23.068000,17.577000,17.346000,26.738000,60.330000,32.307000,	73 17.346, 26.738, 60.330, 32.307, 5.468, 27.803, 27.373, 17.823,

71 5.468000,27.803000,27.373000,17.823000,5.158000,27.439000,5.236000,	74 5.158, 27.439, 5.236, 11.413],

72 11.413000	75 [18.999, 22.642, 8.158, 5.995, 5.495, 9.499, 8.092, 5.324,

73 ],	76 4.468, 4.788, 8.248, 7.853, 27.533, 4.410, 4.622, 22.341, 22.313,

74 [18.999000,22.642000,8.158000,5.995000,5.495000,9.499000,8.092000,	77 17.072, 26.731, 57.513, 33.001, 5.500, 28.297, 27.277, 26.462,

75 5.324000,4.468000,4.788000,8.248000,7.853000,27.533000,4.410000,	78 5.009, 27.361, 5.130, 10.955]

76 4.622000,22.341000,22.313000,17.072000,26.731000,57.513000,33.001000,

77 5.500000,28.297000,27.277000,26.462000,5.009000,27.361000,5.130000,

78 10.955000

79 ]

80 ],	79 ],

81 [	80 [

82 [18.238000,22.365000,8.555000,5.939000,5.437000,9.463000,7.047000,	81 [18.238, 22.365, 8.555, 5.939, 5.437, 9.463, 7.047, 5.345, 4.517,

83 5.345000,4.517000,4.796000,8.593000,7.901000,27.499000,4.378000,	82 4.796, 8.593, 7.901, 27.499, 4.378, 5.040, 4.904, 4.816, 4.828,

84 5.040000,4.904000,4.816000,4.828000,4.853000,57.363000,34.184000,	83 4.853, 57.363, 34.184, 5.482, 28.190, 27.290, 26.694, 5.099,

85 5.482000,28.190000,27.290000,26.694000,5.099000,4.905000,5.290000,	84 4.905, 5.290, 4.813],

86 4.813000	85 [18.301, 22.522, 8.035, 6.021, 5.565, 9.037, 6.998, 5.321, 4.485,

87 ],	86 4.768, 8.397, 7.865, 27.636, 4.640, 5.015, 4.962, 4.933, 4.977,

88 [18.301000,22.522000,8.035000,6.021000,5.565000,9.037000,6.998000,	87 4.961, 60.648, 34.593, 5.538, 28.454, 27.297, 26.490, 5.099, 5,

89 5.321000,4.485000,4.768000,8.397000,7.865000,27.636000,4.640000,	88 5.247, 4.945],

90 5.015000,4.962000,4.933000,4.977000,4.961000,60.648000,34.593000,	89 [18.907, 23.368, 8.100, 6.169, 5.621, 9.971, 8.161, 5.331, 4.513,

91 5.538000,28.454000,27.297000,26.490000,5.099000,5,5.247000,4.945000	90 4.837, 8.255, 7.852, 26.209, 4.388, 5.045, 5.029, 5.032, 4.946,

92 ],	91 4.973, 60.334, 33.377, 5.499, 28.275, 27.550, 26.103, 5.108,

93 [18.907000,23.368000,8.100000,6.169000,5.621000,9.971000,8.161000,	92 4.951, 5.285, 4.910],

94 5.331000,4.513000,4.837000,8.255000,7.852000,26.209000,4.388000,	93 [18.715, 23.748, 8.128, 6.148, 5.691, 9.361, 8.106, 5.334, 4.528,

95 5.045000,5.029000,5.032000,4.946000,4.973000,60.334000,33.377000,	94 4.965, 8.261, 7.851, 27.282, 4.391, 4.949, 4.981, 4.964, 4.935,

96 5.499000,28.275000,27.550000,26.103000,5.108000,4.951000,5.285000,	95 4.933, 60.231, 33.361, 5.489, 28.106, 27.457, 26.648, 5.108,

97 4.910000	96 4.963, 5.272, 4.954]

98 ],

99 [18.715000,23.748000,8.128000,6.148000,5.691000,9.361000,8.106000,

100 5.334000,4.528000,4.965000,8.261000,7.851000,27.282000,4.391000,

101 4.949000,4.981000,4.964000,4.935000,4.933000,60.231000,33.361000,

102 5.489000,28.106000,27.457000,26.648000,5.108000,4.963000,5.272000,

103 4.954000

104 ]

105 ]	97 ]

106 ]	98 ]

107	99

108 # Default options for the dry run	100 # Default options for the dry run

109 DEFAULT_OPTIONS = {	101 DEFAULT_OPTIONS = {

110 'debug_ignore_build': True,	102 'debug_ignore_build': True,

111 'debug_ignore_sync': True,	103 'debug_ignore_sync': True,

112 'debug_ignore_perf_test': True,	104 'debug_ignore_perf_test': True,

113 'debug_ignore_regression_confidence': True,	105 'debug_ignore_regression_confidence': True,

114 'command': 'fake_command',	106 'command': 'fake_command',

115 'metric': 'fake/metric',	107 'metric': 'fake/metric',

116 'good_revision': 280000,	108 'good_revision': 280000,

117 'bad_revision': 280005,	109 'bad_revision': 280005,

118 }	110 }

119	111

120 # This global is a placeholder for a generator to be defined by the testcases	112 # This global is a placeholder for a generator to be defined by the test cases

121 # that use _MockRunTest	113 # that use _MockRunTests.

122 _MockResultsGenerator = (x for x in [])	114 _MockResultsGenerator = (x for x in [])

123	115

	116

	117 def _MockRunTests(args, *kwargs):

	118 _, _ = args, kwargs

	119 return _FakeTestResult(_MockResultsGenerator.next())

	120

	121

124 def _FakeTestResult(values):	122 def _FakeTestResult(values):

125 result_dict = {'mean': 0.0, 'std_err': 0.0, 'std_dev': 0.0, 'values': values}	123 result_dict = {'mean': 0.0, 'std_err': 0.0, 'std_dev': 0.0, 'values': values}

126 success_code = 0	124 success_code = 0

127 return (result_dict, success_code)	125 return (result_dict, success_code)

128	126

129	127

130 def _MockRunTests(args, *kwargs):

131 _, _ = args, kwargs

132 return _FakeTestResult(_MockResultsGenerator.next())

133

134

135 def _GetBisectPerformanceMetricsInstance(options_dict):	128 def _GetBisectPerformanceMetricsInstance(options_dict):

136 """Returns an instance of the BisectPerformanceMetrics class."""	129 """Returns an instance of the BisectPerformanceMetrics class."""

137 opts = bisect_perf_regression.BisectOptions.FromDict(options_dict)	130 opts = bisect_perf_regression.BisectOptions.FromDict(options_dict)

138 return bisect_perf_regression.BisectPerformanceMetrics(opts, os.getcwd())	131 return bisect_perf_regression.BisectPerformanceMetrics(opts, os.getcwd())

139	132

140	133

141 def _GetExtendedOptions(improvement_dir, fake_first, ignore_confidence=True):	134 def _GetExtendedOptions(improvement_dir, fake_first, ignore_confidence=True):

142 """Returns the a copy of the default options dict plus some options."""	135 """Returns the a copy of the default options dict plus some options."""

143 result = dict(DEFAULT_OPTIONS)	136 result = dict(DEFAULT_OPTIONS)

144 result.update({	137 result.update({

(...skipping 201 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
346	339

347 def testBisectImprovementDirectionSucceeds(self):	340 def testBisectImprovementDirectionSucceeds(self):

348 """Bisects with improvement direction matching regression range."""	341 """Bisects with improvement direction matching regression range."""

349 # Test result goes from 0 to 100 where lower is better	342 # Test result goes from 0 to 100 where lower is better

350 results = _GenericDryRun(_GetExtendedOptions(-1, 100))	343 results = _GenericDryRun(_GetExtendedOptions(-1, 100))

351 self.assertIsNone(results.error)	344 self.assertIsNone(results.error)

352 # Test result goes from 0 to -100 where higher is better	345 # Test result goes from 0 to -100 where higher is better

353 results = _GenericDryRun(_GetExtendedOptions(1, -100))	346 results = _GenericDryRun(_GetExtendedOptions(1, -100))

354 self.assertIsNone(results.error)	347 self.assertIsNone(results.error)

355	348

356 @mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'	349 def _CheckAbortsEarly(self, results):

357 'RunPerformanceTestAndParseResults', _MockRunTests)	350 """Returns True if the bisect job would abort early."""

358 def testBisectStopsOnDoubtfulRegression(self):

359 global _MockResultsGenerator	351 global _MockResultsGenerator

360 _MockResultsGenerator = (rs for rs in CLEAR_NON_REGRESSION)	352 _MockResultsGenerator = (r for r in results)

361 results = _GenericDryRun(_GetExtendedOptions(0, 0, False))	353 bisect_class = bisect_perf_regression.BisectPerformanceMetrics

362 confidence_warnings = [x for x in results.warnings if x.startswith(	354 original_run_tests = bisect_class.RunPerformanceTestAndParseResults

363 '\nWe could not reproduce the regression')]	355 bisect_class.RunPerformanceTestAndParseResults = _MockRunTests

364 self.assertGreater(len(confidence_warnings), 0)

365	356

366 _MockResultsGenerator = (rs for rs in ALMOST_REGRESSION)	357 try:

367 results = _GenericDryRun(_GetExtendedOptions(0, 0, False))	358 _GenericDryRun(_GetExtendedOptions(0, 0, False))

368 confidence_warnings = [x for x in results.warnings if x.startswith(	359 except StopIteration:

369 '\nWe could not reproduce the regression')]	360 # If StopIteration was raised, that means that the next value after

370 self.assertGreater(len(confidence_warnings), 0)	361 # the first two values was requested, so the job was not aborted.

	362 return False

	363 finally:

	364 bisect_class.RunPerformanceTestAndParseResults = original_run_tests

371	365

372 @mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'	366 # If the job was aborted, there should be a warning about it.

373 'RunPerformanceTestAndParseResults', _MockRunTests)	367 assert [w for w in results.warnings

374 def testBisectContinuesOnClearRegression(self):	368 if 'could not reproduce the regression' in w]

375 global _MockResultsGenerator	369 return True

376 _MockResultsGenerator = (rs for rs in CLEAR_REGRESSION)

377 with self.assertRaises(StopIteration):

378 _GenericDryRun(_GetExtendedOptions(0, 0, False))

379	370

380 _MockResultsGenerator = (rs for rs in BARELY_REGRESSION)	371 def testBisectStopsOnClearUnclearRegression(self):

381 with self.assertRaises(StopIteration):	372 self.assertTrue(self._CheckAbortsEarly(CLEAR_NON_REGRESSION))

382 _GenericDryRun(_GetExtendedOptions(0, 0, False))

383	373

384 _MockResultsGenerator = (rs for rs in MULTIPLE_VALUES)	374 def testBisectStopsOnClearUnclearRegression(self):

385 with self.assertRaises(StopIteration):	375 self.assertFalse(self._CheckAbortsEarly(ALMOST_REGRESSION))

386 _GenericDryRun(_GetExtendedOptions(0, 0, False))	376

	377 def testBisectStopsOnClearUnclearRegression(self):

	378 self.assertFalse(self._CheckAbortsEarly(CLEAR_REGRESSION))

	379

	380 def testBisectStopsOnClearUnclearRegression(self):

	381 self.assertFalse(self._CheckAbortsEarly(BARELY_REGRESSION))

	382

	383 def testBisectStopsOnClearUnclearRegression(self):

	384 self.assertFalse(self._CheckAbortsEarly(MULTIPLE_VALUES))

387	385

388 def testGetCommitPosition(self):	386 def testGetCommitPosition(self):

389 cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'	387 cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'

390 self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev))	388 self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev))

391	389

392 svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'	390 svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'

393 self.assertEqual(291467, source_control.GetCommitPosition(svn_git_rev))	391 self.assertEqual(291467, source_control.GetCommitPosition(svn_git_rev))

394	392

395 def testGetCommitPositionForV8(self):	393 def testGetCommitPositionForV8(self):

396 bisect_instance = _GetBisectPerformanceMetricsInstance(DEFAULT_OPTIONS)	394 bisect_instance = _GetBisectPerformanceMetricsInstance(DEFAULT_OPTIONS)

(...skipping 209 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
606 ], (None, 0)),	604 ], (None, 0)),

607 ]	605 ]

608 self._SetupRunGitMock(try_cmd)	606 self._SetupRunGitMock(try_cmd)

609 bisect_perf_regression._BuilderTryjob(	607 bisect_perf_regression._BuilderTryjob(

610 git_revision, bot_name, bisect_job_name, patch)	608 git_revision, bot_name, bisect_job_name, patch)

611	609

612	610

613 if __name__ == '__main__':	611 if __name__ == '__main__':

614 unittest.main()	612 unittest.main()

615	613

OLD	NEW

« no previous file with comments | « tools/auto_bisect/bisect_perf_regression.py ('k') | no next file » | no next file with comments »