Chromium Code Reviews| Index: tools/bisect-perf-regression.py |
| diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py |
| index 6b98b4b40dcb34b7b2a7ce3f2b3f10b58fad4743..3d1bb008b5457789667fc7036146324e53cfe850 100755 |
| --- a/tools/bisect-perf-regression.py |
| +++ b/tools/bisect-perf-regression.py |
| @@ -177,6 +177,11 @@ new file mode 100644 |
| +%(deps_sha)s |
| """ |
| +BISECT_MODE_MEAN = 'mean' |
| +BISECT_MODE_STD_DEV = 'std_dev' |
| +BISECT_MODE_RETURN_CODE = 'return_code' |
|
qyearsley
2014/04/25 23:53:29
Could add a comment about what these three constan
|
| + |
| + |
| def _AddAdditionalDepotInfo(depot_info): |
| """Adds additional depot info to the global depot variables.""" |
| global DEPOT_DEPS_NAME |
| @@ -1968,6 +1973,15 @@ class BisectPerformanceMetrics(object): |
| return False |
| return True |
| + def _IsBisectModeUsingMetric(self): |
| + return self.opts.bisect_mode in [BISECT_MODE_MEAN, BISECT_MODE_STD_DEV] |
| + |
| + def _IsBisectModeReturnCode(self): |
| + return self.opts.bisect_mode in [BISECT_MODE_RETURN_CODE] |
| + |
| + def _IsBisectModeStandardDeviation(self): |
| + return self.opts.bisect_mode in [BISECT_MODE_STD_DEV] |
| + |
| def RunPerformanceTestAndParseResults( |
| self, command_to_run, metric, reset_on_first_run=False, |
| upload_on_last_run=False, results_label=None): |
| @@ -2022,15 +2036,15 @@ class BisectPerformanceMetrics(object): |
| output_of_all_runs = '' |
| for i in xrange(self.opts.repeat_test_count): |
| # Can ignore the return code since if the tests fail, it won't return 0. |
| + current_args = copy.copy(args) |
| + if is_telemetry: |
| + if i == 0 and reset_on_first_run: |
| + current_args.append('--reset-results') |
| + elif i == self.opts.repeat_test_count - 1 and upload_on_last_run: |
| + current_args.append('--upload-results') |
| + if results_label: |
| + current_args.append('--results-label=%s' % results_label) |
| try: |
| - current_args = copy.copy(args) |
| - if is_telemetry: |
| - if i == 0 and reset_on_first_run: |
| - current_args.append('--reset-results') |
| - elif i == self.opts.repeat_test_count - 1 and upload_on_last_run: |
| - current_args.append('--upload-results') |
| - if results_label: |
| - current_args.append('--results-label=%s' % results_label) |
| (output, return_code) = RunProcessAndRetrieveOutput(current_args, |
| cwd=self.src_cwd) |
| except OSError, e: |
| @@ -2050,11 +2064,17 @@ class BisectPerformanceMetrics(object): |
| if self.opts.output_buildbot_annotations: |
| print output |
| - metric_values += self.ParseMetricValuesFromOutput(metric, output) |
| + if self._IsBisectModeUsingMetric(): |
| + metric_values += self.ParseMetricValuesFromOutput(metric, output) |
| + # If we're bisecting on a metric (ie, changes in the mean or |
| + # standard deviation) and no metric values are produced, bail out. |
| + if not metric_values: |
| + break |
| + elif self._IsBisectModeReturnCode(): |
| + metric_values.append(return_code) |
| elapsed_minutes = (time.time() - start_time) / 60.0 |
| - |
| - if elapsed_minutes >= self.opts.max_time_minutes or not metric_values: |
| + if elapsed_minutes >= self.opts.max_time_minutes: |
| break |
| if len(metric_values) == 0: |
| @@ -2063,22 +2083,43 @@ class BisectPerformanceMetrics(object): |
| # that were found in the output here. |
| return (err_text, failure_code, output_of_all_runs) |
| - # Need to get the average value if there were multiple values. |
| - truncated_mean = CalculateTruncatedMean(metric_values, |
| - self.opts.truncate_percent) |
| - standard_err = CalculateStandardError(metric_values) |
| - standard_dev = CalculateStandardDeviation(metric_values) |
| - |
| - values = { |
| - 'mean': truncated_mean, |
| - 'std_err': standard_err, |
| - 'std_dev': standard_dev, |
| - 'values': metric_values, |
| - } |
| - |
| - print 'Results of performance test: %12f %12f' % ( |
| - truncated_mean, standard_err) |
| + # If we're bisecting on return codes, we're really just looking for zero vs |
| + # non-zero. |
| + if self._IsBisectModeReturnCode(): |
| + # If any of the return codes is non-zero, output 1. |
| + overall_return_code = 0 if ( |
| + all(current_value == 0 for current_value in metric_values)) else 1 |
| + |
| + values = { |
| + 'mean': overall_return_code, |
| + 'std_err': 0.0, |
| + 'std_dev': 0.0, |
| + 'values': metric_values, |
| + } |
|
qyearsley
2014/04/25 23:53:29
It's potentially confusing that "mean" could be ov
|
| + |
| + print 'Results of performance test: Command returned with %d' % ( |
| + overall_return_code) |
| + else: |
| + # Need to get the average value if there were multiple values. |
| + truncated_mean = CalculateTruncatedMean(metric_values, |
| + self.opts.truncate_percent) |
| + standard_err = CalculateStandardError(metric_values) |
| + standard_dev = CalculateStandardDeviation(metric_values) |
| + |
| + if self._IsBisectModeStandardDeviation(): |
| + metric_values = [standard_dev] |
| + |
| + values = { |
| + 'mean': truncated_mean, |
| + 'std_err': standard_err, |
| + 'std_dev': standard_dev, |
| + 'values': metric_values, |
| + } |
| + |
| + print 'Results of performance test: %12f %12f' % ( |
| + truncated_mean, standard_err) |
| return (values, success_code, output_of_all_runs) |
| def FindAllRevisionsToSync(self, revision, depot): |
| @@ -2339,7 +2380,7 @@ class BisectPerformanceMetrics(object): |
| return ('Failed to sync revision: [%s]' % (str(revision, )), |
| BUILD_RESULT_FAIL) |
| - def CheckIfRunPassed(self, current_value, known_good_value, known_bad_value): |
| + def _CheckIfRunPassed(self, current_value, known_good_value, known_bad_value): |
| """Given known good and bad values, decide if the current_value passed |
| or failed. |
| @@ -2352,8 +2393,14 @@ class BisectPerformanceMetrics(object): |
| True if the current_value is closer to the known_good_value than the |
| known_bad_value. |
| """ |
| - dist_to_good_value = abs(current_value['mean'] - known_good_value['mean']) |
| - dist_to_bad_value = abs(current_value['mean'] - known_bad_value['mean']) |
| + if self.opts.bisect_mode == BISECT_MODE_STD_DEV: |
| + dist_to_good_value = abs(current_value['std_dev'] - |
| + known_good_value['std_dev']) |
| + dist_to_bad_value = abs(current_value['std_dev'] - |
| + known_bad_value['std_dev']) |
| + else: |
| + dist_to_good_value = abs(current_value['mean'] - known_good_value['mean']) |
| + dist_to_bad_value = abs(current_value['mean'] - known_bad_value['mean']) |
| return dist_to_good_value < dist_to_bad_value |
| @@ -2909,9 +2956,9 @@ class BisectPerformanceMetrics(object): |
| next_revision_data['perf_time'] = run_results[3] |
| next_revision_data['build_time'] = run_results[4] |
| - passed_regression = self.CheckIfRunPassed(run_results[0], |
| - known_good_value, |
| - known_bad_value) |
| + passed_regression = self._CheckIfRunPassed(run_results[0], |
| + known_good_value, |
| + known_bad_value) |
| next_revision_data['passed'] = passed_regression |
| next_revision_data['value'] = run_results[0] |
| @@ -2966,17 +3013,23 @@ class BisectPerformanceMetrics(object): |
| print " __o_\___ Aw Snap! We hit a speed bump!" |
| print "=-O----O-'__.~.___________________________________" |
| - print 'Bisect reproduced a %.02f%% (+-%.02f%%) change in the %s metric.' % ( |
| - results_dict['regression_size'], results_dict['regression_std_err'], |
| - '/'.join(self.opts.metric)) |
| + if self._IsBisectModeReturnCode(): |
| + print ('Bisect reproduced a change in return codes while running the ' |
| + 'performance test.') |
| + else: |
| + print ('Bisect reproduced a %.02f%% (+-%.02f%%) change in the ' |
| + '%s metric.' % (results_dict['regression_size'], |
| + results_dict['regression_std_err'], '/'.join(self.opts.metric))) |
| self._PrintConfidence(results_dict) |
| def _PrintFailedBanner(self, results_dict): |
| - print ('Bisect could not reproduce a change in the ' |
| - '%s/%s metric.' % (self.opts.metric[0], self.opts.metric[1])) |
| + if self._IsBisectModeReturnCode(): |
| + print 'Bisect could not reproduce a change in the return code.' |
| + else: |
| + print ('Bisect could not reproduce a change in the ' |
| + '%s metric.' % '/'.join(self.opts.metric)) |
| - self._PrintConfidence(results_dict) |
| def _GetViewVCLinkFromDepotAndHash(self, cl, depot): |
| info = self.source_control.QueryRevisionInfo(cl, |
| @@ -3013,6 +3066,53 @@ class BisectPerformanceMetrics(object): |
| print 'Commit : %s' % cl |
| print 'Date : %s' % info['date'] |
| + def _PrintTable(self, column_widths, row_data): |
|
tonyg
2014/04/25 23:12:07
Oops, I was thinking this would take all rows inst
shatch
2014/04/25 23:15:10
Done.
|
| + assert len(column_widths) == len(row_data) |
| + |
| + text = '' |
| + for i in xrange(len(column_widths)): |
| + current_row_data = row_data[i].center(column_widths[i], ' ') |
| + text += ('%%%ds' % column_widths[i]) % current_row_data |
| + print text |
| + |
| + def _PrintTestedCommitsHeader(self): |
| + if self.opts.bisect_mode == BISECT_MODE_MEAN: |
| + self._PrintTable( |
| + [20, 70, 14, 12, 13], |
| + ['Depot', 'Commit SHA', 'Mean', 'Std. Error', 'State']) |
| + elif self.opts.bisect_mode == BISECT_MODE_STD_DEV: |
| + self._PrintTable( |
| + [20, 70, 14, 12, 13], |
| + ['Depot', 'Commit SHA', 'Std. Error', 'Mean', 'State']) |
| + elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE: |
| + self._PrintTable( |
| + [20, 70, 14, 13], |
| + ['Depot', 'Commit SHA', 'Return Code', 'State']) |
| + else: |
| + assert False, "Invalid bisect_mode specified." |
| + print ' %20s %70s %14s %13s' % ('Depot'.center(20, ' '), |
| + 'Commit SHA'.center(70, ' '), 'Return Code'.center(14, ' '), |
| + 'State'.center(13, ' ')) |
| + |
| + def _PrintTestedCommitsEntry(self, current_data, cl_link, state_str): |
| + if self.opts.bisect_mode == BISECT_MODE_MEAN: |
| + std_error = '+-%.02f' % current_data['value']['std_err'] |
| + mean = '%.02f' % current_data['value']['mean'] |
| + self._PrintTable( |
| + [20, 70, 12, 14, 13], |
| + [current_data['depot'], cl_link, mean, std_error, state_str]) |
| + elif self.opts.bisect_mode == BISECT_MODE_STD_DEV: |
| + std_error = '+-%.02f' % current_data['value']['std_err'] |
| + mean = '%.02f' % current_data['value']['mean'] |
| + self._PrintTable( |
| + [20, 70, 12, 14, 13], |
| + [current_data['depot'], cl_link, std_error, mean, state_str]) |
| + elif self.opts.bisect_mode == BISECT_MODE_RETURN_CODE: |
| + mean = '%d' % current_data['value']['mean'] |
| + self._PrintTable( |
| + [20, 70, 14, 13], |
| + [current_data['depot'], cl_link, mean, state_str]) |
| + |
| def _PrintTestedCommitsTable(self, revision_data_sorted, |
| first_working_revision, last_broken_revision, confidence, |
| final_step=True): |
| @@ -3021,9 +3121,7 @@ class BisectPerformanceMetrics(object): |
| print 'Tested commits:' |
| else: |
| print 'Partial results:' |
| - print ' %20s %70s %12s %14s %13s' % ('Depot'.center(20, ' '), |
| - 'Commit SHA'.center(70, ' '), 'Mean'.center(12, ' '), |
| - 'Std. Error'.center(14, ' '), 'State'.center(13, ' ')) |
| + self._PrintTestedCommitsHeader() |
| state = 0 |
| for current_id, current_data in revision_data_sorted: |
| if current_data['value']: |
| @@ -3049,16 +3147,11 @@ class BisectPerformanceMetrics(object): |
| state_str = '' |
| state_str = state_str.center(13, ' ') |
| - std_error = ('+-%.02f' % |
| - current_data['value']['std_err']).center(14, ' ') |
| - mean = ('%.02f' % current_data['value']['mean']).center(12, ' ') |
| cl_link = self._GetViewVCLinkFromDepotAndHash(current_id, |
| current_data['depot']) |
| if not cl_link: |
| cl_link = current_id |
| - print ' %20s %70s %12s %14s %13s' % ( |
| - current_data['depot'].center(20, ' '), cl_link.center(70, ' '), |
| - mean, std_error, state_str) |
| + self._PrintTestedCommitsEntry(current_data, cl_link, state_str) |
| def _PrintReproSteps(self): |
| @@ -3433,6 +3526,7 @@ class BisectOptions(object): |
| self.target_arch = 'ia32' |
| self.builder_host = None |
| self.builder_port = None |
| + self.bisect_mode = BISECT_MODE_MEAN |
| def _CreateCommandLineParser(self): |
| """Creates a parser with bisect options. |
| @@ -3487,6 +3581,13 @@ class BisectOptions(object): |
| 'truncated mean. Values will be clamped to range [0, ' |
| '25]. Default value is 25 (highest/lowest 25% will be ' |
| 'discarded).') |
| + group.add_option('--bisect_mode', |
| + type='choice', |
| + choices=[BISECT_MODE_MEAN, BISECT_MODE_STD_DEV, |
| + BISECT_MODE_RETURN_CODE], |
| + default=BISECT_MODE_MEAN, |
| + help='The bisect mode. Choices are to bisect on the ' |
| + 'difference in mean, std_dev, or return_code.') |
| parser.add_option_group(group) |
| group = optparse.OptionGroup(parser, 'Build options') |
| @@ -3586,7 +3687,7 @@ class BisectOptions(object): |
| if not opts.bad_revision: |
| raise RuntimeError('missing required parameter: --bad_revision') |
| - if not opts.metric: |
| + if not opts.metric and opts.bisect_mode != BISECT_MODE_RETURN_CODE: |
| raise RuntimeError('missing required parameter: --metric') |
| if opts.gs_bucket: |
| @@ -3614,7 +3715,8 @@ class BisectOptions(object): |
| raise RuntimeError('missing required parameter: --working_directory') |
| metric_values = opts.metric.split('/') |
| - if len(metric_values) != 2: |
| + if (len(metric_values) != 2 and |
| + opts.bisect_mode != BISECT_MODE_RETURN_CODE): |
| raise RuntimeError("Invalid metric specified: [%s]" % opts.metric) |
| opts.metric = metric_values |