tools/auto_bisect/bisect_perf_regression_test.py - Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists

Side by Side Diff: tools/auto_bisect/bisect_perf_regression_test.py

Issue 665893003: Re-applying reverted changes for regression confidence check + fix: ConfidenceScoretakes flat lists (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Addressing comments Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import os	5 import os

6 import re	6 import re

7 import shutil	7 import shutil

8 import sys	8 import sys

9 import unittest	9 import unittest

10	10

11 SRC = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)	11 SRC = os.path.join(os.path.dirname(__file__), os.path.pardir, os.path.pardir)

12 sys.path.append(os.path.join(SRC, 'third_party', 'pymock'))	12 sys.path.append(os.path.join(SRC, 'third_party', 'pymock'))

13	13

14 import bisect_perf_regression	14 import bisect_perf_regression

15 import bisect_printer	15 import bisect_printer

16 import bisect_utils	16 import bisect_utils

17 import mock	17 import mock

18 import source_control	18 import source_control

19	19

20	20

	21 # Regression confidence: 0%

	22 CLEAR_NON_REGRESSION = [

	23 # Mean: 30.223 Std. Dev.: 11.383

	24 [[16.886], [16.909], [16.99], [17.723], [17.952], [18.118], [19.028],

	25 [19.552], [21.954], [38.573], [38.839], [38.965], [40.007], [40.572],

	26 [41.491], [42.002], [42.33], [43.109], [43.238]],

	27 # Mean: 34.76 Std. Dev.: 11.516

	28 [[16.426], [17.347], [20.593], [21.177], [22.791], [27.843], [28.383],

	29 [28.46], [29.143], [40.058], [40.303], [40.558], [41.918], [42.44],

	30 [45.223], [46.494], [50.002], [50.625], [50.839]]

	31 ]

	32 # Regression confidence: ~ 90%

	33 ALMOST_REGRESSION = [

	34 # Mean: 30.042 Std. Dev.: 2.002

	35 [[26.146], [28.04], [28.053], [28.074], [28.168], [28.209], [28.471],

	36 [28.652], [28.664], [30.862], [30.973], [31.002], [31.897], [31.929],

	37 [31.99], [32.214], [32.323], [32.452], [32.696]],

	38 # Mean: 33.008 Std. Dev.: 4.265

	39 [[34.963], [30.741], [39.677], [39.512], [34.314], [31.39], [34.361],

	40 [25.2], [30.489], [29.434]]

	41 ]

	42 # Regression confidence: ~ 98%

	43 BARELY_REGRESSION = [

	44 # Mean: 28.828 Std. Dev.: 1.993

	45 [[26.96], [27.605], [27.768], [27.829], [28.006], [28.206], [28.393],

	46 [28.911], [28.933], [30.38], [30.462], [30.808], [31.74], [31.805],

	47 [31.899], [32.077], [32.454], [32.597], [33.155]],

	48 # Mean: 31.156 Std. Dev.: 1.980

	49 [[28.729], [29.112], [29.258], [29.454], [29.789], [30.036], [30.098],

	50 [30.174], [30.534], [32.285], [32.295], [32.552], [32.572], [32.967],

	51 [33.165], [33.403], [33.588], [33.744], [34.147], [35.84]]

	52 ]

	53 # Regression confidence: 99.5%

	54 CLEAR_REGRESSION = [

	55 # Mean: 30.254 Std. Dev.: 2.987

	56 [[26.494], [26.621], [26.701], [26.997], [26.997], [27.05], [27.37],

	57 [27.488], [27.556], [31.846], [32.192], [32.21], [32.586], [32.596],

	58 [32.618], [32.95], [32.979], [33.421], [33.457], [34.97]],

	59 # Mean: 33.190 Std. Dev.: 2.972

	60 [[29.547], [29.713], [29.835], [30.132], [30.132], [30.33], [30.406],

	61 [30.592], [30.72], [34.486], [35.247], [35.253], [35.335], [35.378],

	62 [35.934], [36.233], [36.41], [36.947], [37.982]]

	63 ]

21 # Default options for the dry run	64 # Default options for the dry run

22 DEFAULT_OPTIONS = {	65 DEFAULT_OPTIONS = {

23 'debug_ignore_build': True,	66 'debug_ignore_build': True,

24 'debug_ignore_sync': True,	67 'debug_ignore_sync': True,

25 'debug_ignore_perf_test': True,	68 'debug_ignore_perf_test': True,

	69 'debug_ignore_regression_confidence': True,

26 'command': 'fake_command',	70 'command': 'fake_command',

27 'metric': 'fake/metric',	71 'metric': 'fake/metric',

28 'good_revision': 280000,	72 'good_revision': 280000,

29 'bad_revision': 280005,	73 'bad_revision': 280005,

30 }	74 }

31	75

	76 # This global is a placeholder for a generator to be defined by the testcases

	77 # that use _MockRunTest

	78 _MockResultsGenerator = (x for x in [])

	79

	80 def _FakeTestResult(values):

	81 result_dict = {'mean': 0.0, 'std_err': 0.0, 'std_dev': 0.0, 'values': values}

	82 success_code = 0

	83 return (result_dict, success_code)

	84

	85

	86 def _MockRunTests(args, *kwargs):

	87 _, _ = args, kwargs

	88 return _FakeTestResult(_MockResultsGenerator.next())

	89

32	90

33 def _GetBisectPerformanceMetricsInstance(options_dict):	91 def _GetBisectPerformanceMetricsInstance(options_dict):

34 """Returns an instance of the BisectPerformanceMetrics class."""	92 """Returns an instance of the BisectPerformanceMetrics class."""

35 opts = bisect_perf_regression.BisectOptions.FromDict(options_dict)	93 opts = bisect_perf_regression.BisectOptions.FromDict(options_dict)

36 return bisect_perf_regression.BisectPerformanceMetrics(opts)	94 return bisect_perf_regression.BisectPerformanceMetrics(opts)

37	95

38	96

39 def _GetExtendedOptions(d, f):	97 def _GetExtendedOptions(improvement_dir, fake_first, ignore_confidence=True):

40 """Returns the a copy of the default options dict plus some options."""	98 """Returns the a copy of the default options dict plus some options."""

41 result = dict(DEFAULT_OPTIONS)	99 result = dict(DEFAULT_OPTIONS)

42 result.update({	100 result.update({

43 'improvement_direction': d,	101 'improvement_direction': improvement_dir,

44 'debug_fake_first_test_mean': f})	102 'debug_fake_first_test_mean': fake_first,

	103 'debug_ignore_regression_confidence': ignore_confidence})

45 return result	104 return result

46	105

47	106

48 def _GenericDryRun(options, print_results=False):	107 def _GenericDryRun(options, print_results=False):

49 """Performs a dry run of the bisector.	108 """Performs a dry run of the bisector.

50	109

51 Args:	110 Args:

52 options: Dictionary containing the options for the bisect instance.	111 options: Dictionary containing the options for the bisect instance.

53 print_results: Boolean telling whether to call FormatAndPrintResults.	112 print_results: Boolean telling whether to call FormatAndPrintResults.

54	113

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 def testDryRun(self):	285 def testDryRun(self):

227 """Does a dry run of the bisect script.	286 """Does a dry run of the bisect script.

228	287

229 This serves as a smoke test to catch errors in the basic execution of the	288 This serves as a smoke test to catch errors in the basic execution of the

230 script.	289 script.

231 """	290 """

232 _GenericDryRun(DEFAULT_OPTIONS, True)	291 _GenericDryRun(DEFAULT_OPTIONS, True)

233	292

234 def testBisectImprovementDirectionFails(self):	293 def testBisectImprovementDirectionFails(self):

235 """Dry run of a bisect with an improvement instead of regression."""	294 """Dry run of a bisect with an improvement instead of regression."""

236

237 # Test result goes from 0 to 100 where higher is better	295 # Test result goes from 0 to 100 where higher is better

238 results = _GenericDryRun(_GetExtendedOptions(1, 100))	296 results = _GenericDryRun(_GetExtendedOptions(1, 100))

239 self.assertIsNotNone(results.error)	297 self.assertIsNotNone(results.error)

240 self.assertIn('not a regression', results.error)	298 self.assertIn('not a regression', results.error)

	299

241 # Test result goes from 0 to -100 where lower is better	300 # Test result goes from 0 to -100 where lower is better

242 results = _GenericDryRun(_GetExtendedOptions(-1, -100))	301 results = _GenericDryRun(_GetExtendedOptions(-1, -100))

243 self.assertIsNotNone(results.error)	302 self.assertIsNotNone(results.error)

244 self.assertIn('not a regression', results.error)	303 self.assertIn('not a regression', results.error)

245	304

246 def testBisectImprovementDirectionSucceeds(self):	305 def testBisectImprovementDirectionSucceeds(self):

247 """Bisects with improvement direction matching regression range."""	306 """Bisects with improvement direction matching regression range."""

248 # Test result goes from 0 to 100 where lower is better	307 # Test result goes from 0 to 100 where lower is better

249 results = _GenericDryRun(_GetExtendedOptions(-1, 100))	308 results = _GenericDryRun(_GetExtendedOptions(-1, 100))

250 self.assertIsNone(results.error)	309 self.assertIsNone(results.error)

251 # Test result goes from 0 to -100 where higher is better	310 # Test result goes from 0 to -100 where higher is better

252 results = _GenericDryRun(_GetExtendedOptions(1, -100))	311 results = _GenericDryRun(_GetExtendedOptions(1, -100))

253 self.assertIsNone(results.error)	312 self.assertIsNone(results.error)

254	313

	314 @mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'

	315 'RunPerformanceTestAndParseResults', _MockRunTests)

	316 def testBisectStopsOnDoubtfulRegression(self):

	317 global _MockResultsGenerator

	318 _MockResultsGenerator = (rs for rs in CLEAR_NON_REGRESSION)

	319 results = _GenericDryRun(_GetExtendedOptions(0, 0, False))

	320 self.assertIsNotNone(results.error)

	321 self.assertIn('could not reproduce the regression', results.error)

	322

	323 _MockResultsGenerator = (rs for rs in ALMOST_REGRESSION)

	324 results = _GenericDryRun(_GetExtendedOptions(0, 0, False))

	325 self.assertIsNotNone(results.error)

	326 self.assertIn('could not reproduce the regression', results.error)

	327

	328 @mock.patch('bisect_perf_regression.BisectPerformanceMetrics.'

	329 'RunPerformanceTestAndParseResults', _MockRunTests)

	330 def testBisectContinuesOnClearRegression(self):

	331 global _MockResultsGenerator

	332 _MockResultsGenerator = (rs for rs in CLEAR_REGRESSION)

	333 with self.assertRaises(StopIteration):

	334 _GenericDryRun(_GetExtendedOptions(0, 0, False))

	335

	336 _MockResultsGenerator = (rs for rs in BARELY_REGRESSION)

	337 with self.assertRaises(StopIteration):

	338 _GenericDryRun(_GetExtendedOptions(0, 0, False))

	339

255 def testGetCommitPosition(self):	340 def testGetCommitPosition(self):

256 cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'	341 cp_git_rev = '7017a81991de983e12ab50dfc071c70e06979531'

257 self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev))	342 self.assertEqual(291765, source_control.GetCommitPosition(cp_git_rev))

258	343

259 svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'	344 svn_git_rev = 'e6db23a037cad47299a94b155b95eebd1ee61a58'

260 self.assertEqual(291467, source_control.GetCommitPosition(svn_git_rev))	345 self.assertEqual(291467, source_control.GetCommitPosition(svn_git_rev))

261	346

262 def testGetCommitPositionForV8(self):	347 def testGetCommitPositionForV8(self):

263 bisect_instance = _GetBisectPerformanceMetricsInstance(DEFAULT_OPTIONS)	348 bisect_instance = _GetBisectPerformanceMetricsInstance(DEFAULT_OPTIONS)

264 v8_rev = '21d700eedcdd6570eff22ece724b63a5eefe78cb'	349 v8_rev = '21d700eedcdd6570eff22ece724b63a5eefe78cb'

(...skipping 208 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
473 '--diff=%s' % patch_content	558 '--diff=%s' % patch_content

474 ], (None, 0))	559 ], (None, 0))

475 ]	560 ]

476 self._SetupRunGitMock(try_cmd)	561 self._SetupRunGitMock(try_cmd)

477 bisect_perf_regression._BuilderTryjob(	562 bisect_perf_regression._BuilderTryjob(

478 git_revision, bot_name, bisect_job_name, patch)	563 git_revision, bot_name, bisect_job_name, patch)

479	564

480	565

481 if __name__ == '__main__':	566 if __name__ == '__main__':

482 unittest.main()	567 unittest.main()

OLD	NEW

« no previous file with comments | « tools/auto_bisect/bisect_perf_regression.py ('k') | tools/auto_bisect/bisect_results.py » ('j') | no next file with comments »