tools/bisect-perf-regression.py - Issue 255943002: [bisect] - First pass bisect functional breakages.

Unified Diff: tools/bisect-perf-regression.py

Issue 255943002: [bisect] - First pass bisect functional breakages. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/bisect-perf-regression.py

diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py

index 6b98b4b40dcb34b7b2a7ce3f2b3f10b58fad4743..b6cf245e5f0dcb3fa1479f0306dbd507091ed28d 100755

--- a/tools/bisect-perf-regression.py

+++ b/tools/bisect-perf-regression.py

@@ -177,6 +177,11 @@ new file mode 100644

+%(deps_sha)s

"""

+BISECT_MODE_MEAN = 'mean'

+BISECT_MODE_STD_DEV = 'std_dev'

+BISECT_MODE_RETURN_CODE = 'return_code'

def _AddAdditionalDepotInfo(depot_info):

"""Adds additional depot info to the global depot variables."""

global DEPOT_DEPS_NAME

@@ -1968,6 +1973,15 @@ class BisectPerformanceMetrics(object):

return False

return True

+ def _IsBisectModeUsingMetric(self):

+ return self.opts.bisect_mode in [BISECT_MODE_MEAN, BISECT_MODE_STD_DEV]

+ def _IsBisectModeReturnCode(self):

+ return self.opts.bisect_mode in [BISECT_MODE_RETURN_CODE]

+ def _IsBisectModeStandardDeviation(self):

+ return self.opts.bisect_mode in [BISECT_MODE_STD_DEV]

def RunPerformanceTestAndParseResults(

self, command_to_run, metric, reset_on_first_run=False,

upload_on_last_run=False, results_label=None):

@@ -2033,6 +2047,22 @@ class BisectPerformanceMetrics(object):

current_args.append('--results-label=%s' % results_label)

(output, return_code) = RunProcessAndRetrieveOutput(current_args,

cwd=self.src_cwd)

+ output_of_all_runs += output

tonyg 2014/04/25 20:38:42 I'm not following why this block moved into the tr

shatch 2014/04/25 22:46:03 Oops, leftover from earlier version. Also moved th

+ if self.opts.output_buildbot_annotations:

+ print output

+ if self._IsBisectModeUsingMetric():

+ metric_values += self.ParseMetricValuesFromOutput(metric, output)

+ # If we're bisecting on a metric (ie, changes in the mean or

+ # standard deviation) and no metric values are produced, bail out.

+ if not metric_values:

+ break

+ elif self._IsBisectModeReturnCode():

+ metric_values.append(return_code)

+ elapsed_minutes = (time.time() - start_time) / 60.0

+ if elapsed_minutes >= self.opts.max_time_minutes:

+ break

except OSError, e:

if e.errno == errno.ENOENT:

err_text = ('Something went wrong running the performance test. '

@@ -2046,39 +2076,49 @@ class BisectPerformanceMetrics(object):

return (err_text, failure_code)

raise

- output_of_all_runs += output

- if self.opts.output_buildbot_annotations:

- print output

- metric_values += self.ParseMetricValuesFromOutput(metric, output)

- elapsed_minutes = (time.time() - start_time) / 60.0

- if elapsed_minutes >= self.opts.max_time_minutes or not metric_values:

- break

if len(metric_values) == 0:

err_text = 'Metric %s was not found in the test output.' % metric

# TODO(qyearsley): Consider also getting and displaying a list of metrics

# that were found in the output here.

return (err_text, failure_code, output_of_all_runs)

- # Need to get the average value if there were multiple values.

- truncated_mean = CalculateTruncatedMean(metric_values,

- self.opts.truncate_percent)

- standard_err = CalculateStandardError(metric_values)

- standard_dev = CalculateStandardDeviation(metric_values)

- values = {

- 'mean': truncated_mean,

- 'std_err': standard_err,

- 'std_dev': standard_dev,

- 'values': metric_values,

- }

- print 'Results of performance test: %12f %12f' % (

- truncated_mean, standard_err)

- print

+ # If we're bisecting on return codes, we're really just looking for zero vs

+ # non-zero.

+ if self._IsBisectModeReturnCode():

+ # If any of the return codes is non-zero, output 1.

+ overall_return_code = 0 if (

+ all(current_value == 0 for current_value in metric_values)) else 1

+ values = {

+ 'mean': overall_return_code,

+ 'std_err': 0.0,

+ 'std_dev': 0.0,

+ 'values': metric_values,

+ }

+ print 'Results of performance test: Command returned with %d' % (

+ overall_return_code)

+ print

+ else:

+ # Need to get the average value if there were multiple values.

+ truncated_mean = CalculateTruncatedMean(metric_values,

+ self.opts.truncate_percent)

+ standard_err = CalculateStandardError(metric_values)

+ standard_dev = CalculateStandardDeviation(metric_values)

+ if self._IsBisectModeStandardDeviation():

+ metric_values = [standard_dev]

+ values = {

+ 'mean': truncated_mean,

+ 'std_err': standard_err,

+ 'std_dev': standard_dev,

+ 'values': metric_values,

+ }

+ print 'Results of performance test: %12f %12f' % (

+ truncated_mean, standard_err)

+ print

return (values, success_code, output_of_all_runs)

def FindAllRevisionsToSync(self, revision, depot):

@@ -2339,7 +2379,7 @@ class BisectPerformanceMetrics(object):

return ('Failed to sync revision: [%s]' % (str(revision, )),

BUILD_RESULT_FAIL)

- def CheckIfRunPassed(self, current_value, known_good_value, known_bad_value):

+ def _CheckIfRunPassed(self, current_value, known_good_value, known_bad_value):

"""Given known good and bad values, decide if the current_value passed

or failed.

@@ -2352,8 +2392,14 @@ class BisectPerformanceMetrics(object):

True if the current_value is closer to the known_good_value than the

known_bad_value.

"""

- dist_to_good_value = abs(current_value['mean'] - known_good_value['mean'])

- dist_to_bad_value = abs(current_value['mean'] - known_bad_value['mean'])

+ if self.opts.bisect_mode == BISECT_MODE_STD_DEV:

+ dist_to_good_value = abs(current_value['std_dev'] -

+ known_good_value['std_dev'])

+ dist_to_bad_value = abs(current_value['std_dev'] -

+ known_bad_value['std_dev'])

+ else:

+ dist_to_good_value = abs(current_value['mean'] - known_good_value['mean'])

+ dist_to_bad_value = abs(current_value['mean'] - known_bad_value['mean'])

return dist_to_good_value < dist_to_bad_value

@@ -2909,9 +2955,9 @@ class BisectPerformanceMetrics(object):

next_revision_data['perf_time'] = run_results[3]

next_revision_data['build_time'] = run_results[4]

- passed_regression = self.CheckIfRunPassed(run_results[0],

- known_good_value,

- known_bad_value)

+ passed_regression = self._CheckIfRunPassed(run_results[0],

+ known_good_value,

+ known_bad_value)

next_revision_data['passed'] = passed_regression

next_revision_data['value'] = run_results[0]

@@ -2966,15 +3012,22 @@ class BisectPerformanceMetrics(object):

print " __o_\___ Aw Snap! We hit a speed bump!"

print "=-O----O-'__.~.___________________________________"

- print 'Bisect reproduced a %.02f%% (+-%.02f%%) change in the %s metric.' % (

- results_dict['regression_size'], results_dict['regression_std_err'],

- '/'.join(self.opts.metric))

+ if self._IsBisectModeReturnCode():

+ print ('Bisect reproduced a change in return codes while running the '

+ 'performance test.')

+ else:

+ print ('Bisect reproduced a %.02f%% (+-%.02f%%) change in the '

+ '%s metric.' % (results_dict['regression_size'],

+ results_dict['regression_std_err'], '/'.join(self.opts.metric)))

self._PrintConfidence(results_dict)

def _PrintFailedBanner(self, results_dict):

- print ('Bisect could not reproduce a change in the '

- '%s/%s metric.' % (self.opts.metric[0], self.opts.metric[1]))

+ if self._IsBisectModeReturnCode():

+ print 'Bisect could not reproduce a change in the return code.'

+ else:

+ print ('Bisect could not reproduce a change in the '

+ '%s metric.' % '/'.join(self.opts.metric))

self._PrintConfidence(results_dict)

tonyg 2014/04/25 20:38:42 Looking at the output in the CL description, it se

shatch 2014/04/25 22:46:03 Done.

@@ -3013,6 +3066,41 @@ class BisectPerformanceMetrics(object):

print 'Commit : %s' % cl

print 'Date : %s' % info['date']

+ def _PrintTestedCommitsHeader(self):

tonyg 2014/04/25 20:38:42 These are now fairly hard to read and a little red

shatch 2014/04/25 22:46:03 Yeah, looks a lot more readable this way.

+ if self.opts.bisect_mode == BISECT_MODE_MEAN:

+ print ' %20s %70s %12s %14s %13s' % ('Depot'.center(20, ' '),

+ 'Commit SHA'.center(70, ' '), 'Mean'.center(12, ' '),

+ 'Std. Error'.center(14, ' '), 'State'.center(13, ' '))

+ elif self.opts.bisect_mode == BISECT_MODE_STD_DEV:

+ print ' %20s %70s %14s %12s %13s' % ('Depot'.center(20, ' '),

+ 'Commit SHA'.center(70, ' '), 'Std. Dev'.center(14, ' '),

+ 'Mean'.center(12, ' '), 'State'.center(13, ' '))

+ elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE:

+ print ' %20s %70s %14s %13s' % ('Depot'.center(20, ' '),

+ 'Commit SHA'.center(70, ' '), 'Return Code'.center(14, ' '),

+ 'State'.center(13, ' '))

+ def _PrintTestedCommitsEntry(self, current_data, cl_link, state_str):

+ if self.opts.bisect_mode == BISECT_MODE_MEAN:

+ std_error = ('+-%.02f' %

+ current_data['value']['std_err']).center(14, ' ')

+ mean = ('%.02f' % current_data['value']['mean']).center(12, ' ')

+ print ' %20s %70s %12s %14s %13s' % (

+ current_data['depot'].center(20, ' '), cl_link.center(70, ' '),

+ mean, std_error, state_str)

+ elif self.opts.bisect_mode == BISECT_MODE_STD_DEV:

+ std_dev = ('+-%.02f' %

+ current_data['value']['std_dev']).center(14, ' ')

+ mean = ('%.02f' % current_data['value']['mean']).center(12, ' ')

+ print ' %20s %70s %14s %12s %13s' % (

+ current_data['depot'].center(20, ' '), cl_link.center(70, ' '),

+ std_dev, mean, state_str)

+ elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE:

+ mean = ('%d' % current_data['value']['mean']).center(12, ' ')

+ print ' %20s %70s %14s %13s' % (

+ current_data['depot'].center(20, ' '), cl_link.center(70, ' '),

+ mean, state_str)

def _PrintTestedCommitsTable(self, revision_data_sorted,

first_working_revision, last_broken_revision, confidence,

final_step=True):

@@ -3021,9 +3109,7 @@ class BisectPerformanceMetrics(object):

print 'Tested commits:'

else:

print 'Partial results:'

- print ' %20s %70s %12s %14s %13s' % ('Depot'.center(20, ' '),

- 'Commit SHA'.center(70, ' '), 'Mean'.center(12, ' '),

- 'Std. Error'.center(14, ' '), 'State'.center(13, ' '))

+ self._PrintTestedCommitsHeader()

state = 0

for current_id, current_data in revision_data_sorted:

if current_data['value']:

@@ -3049,16 +3135,11 @@ class BisectPerformanceMetrics(object):

state_str = ''

state_str = state_str.center(13, ' ')

- std_error = ('+-%.02f' %

- current_data['value']['std_err']).center(14, ' ')

- mean = ('%.02f' % current_data['value']['mean']).center(12, ' ')

cl_link = self._GetViewVCLinkFromDepotAndHash(current_id,

current_data['depot'])

if not cl_link:

cl_link = current_id

- print ' %20s %70s %12s %14s %13s' % (

- current_data['depot'].center(20, ' '), cl_link.center(70, ' '),

- mean, std_error, state_str)

+ self._PrintTestedCommitsEntry(current_data, cl_link, state_str)

def _PrintReproSteps(self):

@@ -3433,6 +3514,7 @@ class BisectOptions(object):

self.target_arch = 'ia32'

self.builder_host = None

self.builder_port = None

+ self.bisect_mode = BISECT_MODE_MEAN

def _CreateCommandLineParser(self):

"""Creates a parser with bisect options.

@@ -3487,6 +3569,13 @@ class BisectOptions(object):

'truncated mean. Values will be clamped to range [0, '

'25]. Default value is 25 (highest/lowest 25% will be '

'discarded).')

+ group.add_option('--bisect_mode',

+ type='choice',

+ choices=[BISECT_MODE_MEAN, BISECT_MODE_STD_DEV,

+ BISECT_MODE_RETURN_CODE],

+ default=BISECT_MODE_MEAN,

+ help='The bisect mode. Choices are to bisect on the '

+ 'difference in mean, std_dev, or return_code.')

parser.add_option_group(group)

group = optparse.OptionGroup(parser, 'Build options')

@@ -3586,7 +3675,7 @@ class BisectOptions(object):

if not opts.bad_revision:

raise RuntimeError('missing required parameter: --bad_revision')

- if not opts.metric:

+ if not opts.metric and opts.bisect_mode != BISECT_MODE_RETURN_CODE:

raise RuntimeError('missing required parameter: --metric')

if opts.gs_bucket:

@@ -3614,7 +3703,8 @@ class BisectOptions(object):

raise RuntimeError('missing required parameter: --working_directory')

metric_values = opts.metric.split('/')

- if len(metric_values) != 2:

+ if (len(metric_values) != 2 and

+ opts.bisect_mode != BISECT_MODE_RETURN_CODE):

raise RuntimeError("Invalid metric specified: [%s]" % opts.metric)

opts.metric = metric_values

« no previous file with comments | « no previous file | tools/run-bisect-perf-regression.py » ('j') | no next file with comments »