| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
| 7 | 7 |
| 8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
| 9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
| 10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 164 BUILD_RESULT_FAIL = 1 | 164 BUILD_RESULT_FAIL = 1 |
| 165 BUILD_RESULT_SKIPPED = 2 | 165 BUILD_RESULT_SKIPPED = 2 |
| 166 | 166 |
| 167 # Maximum time in seconds to wait after posting build request to tryserver. | 167 # Maximum time in seconds to wait after posting build request to tryserver. |
| 168 # TODO: Change these values based on the actual time taken by buildbots on | 168 # TODO: Change these values based on the actual time taken by buildbots on |
| 169 # the tryserver. | 169 # the tryserver. |
| 170 MAX_MAC_BUILD_TIME = 14400 | 170 MAX_MAC_BUILD_TIME = 14400 |
| 171 MAX_WIN_BUILD_TIME = 14400 | 171 MAX_WIN_BUILD_TIME = 14400 |
| 172 MAX_LINUX_BUILD_TIME = 14400 | 172 MAX_LINUX_BUILD_TIME = 14400 |
| 173 | 173 |
| 174 # The confidence percentage at which confidence can be consider "high". |
| 175 HIGH_CONFIDENCE = 95 |
| 176 |
| 174 # Patch template to add a new file, DEPS.sha under src folder. | 177 # Patch template to add a new file, DEPS.sha under src folder. |
| 175 # This file contains SHA1 value of the DEPS changes made while bisecting | 178 # This file contains SHA1 value of the DEPS changes made while bisecting |
| 176 # dependency repositories. This patch send along with DEPS patch to tryserver. | 179 # dependency repositories. This patch send along with DEPS patch to tryserver. |
| 177 # When a build requested is posted with a patch, bisect builders on tryserver, | 180 # When a build requested is posted with a patch, bisect builders on tryserver, |
| 178 # once build is produced, it reads SHA value from this file and appends it | 181 # once build is produced, it reads SHA value from this file and appends it |
| 179 # to build archive filename. | 182 # to build archive filename. |
| 180 DEPS_SHA_PATCH = """diff --git src/DEPS.sha src/DEPS.sha | 183 DEPS_SHA_PATCH = """diff --git src/DEPS.sha src/DEPS.sha |
| 181 new file mode 100644 | 184 new file mode 100644 |
| 182 --- /dev/null | 185 --- /dev/null |
| 183 +++ src/DEPS.sha | 186 +++ src/DEPS.sha |
| 184 @@ -0,0 +1 @@ | 187 @@ -0,0 +1 @@ |
| 185 +%(deps_sha)s | 188 +%(deps_sha)s |
| 186 """ | 189 """ |
| 187 | 190 |
| 188 # The possible values of the --bisect_mode flag, which determines what to | 191 # The possible values of the --bisect_mode flag, which determines what to |
| 189 # use when classifying a revision as "good" or "bad". | 192 # use when classifying a revision as "good" or "bad". |
| 190 BISECT_MODE_MEAN = 'mean' | 193 BISECT_MODE_MEAN = 'mean' |
| 191 BISECT_MODE_STD_DEV = 'std_dev' | 194 BISECT_MODE_STD_DEV = 'std_dev' |
| 192 BISECT_MODE_RETURN_CODE = 'return_code' | 195 BISECT_MODE_RETURN_CODE = 'return_code' |
| 193 | 196 |
| 194 # The perf dashboard specifically looks for the string | 197 # The perf dashboard looks for a string like "Estimated Confidence: 95%" |
| 195 # "Estimated Confidence: 95%" to decide whether or not to cc the author(s). | 198 # to decide whether or not to cc the author(s). If you change this, please |
| 196 # If you change this, please update the perf dashboard as well. | 199 # update the perf dashboard as well. |
| 197 RESULTS_BANNER = """ | 200 RESULTS_BANNER = """ |
| 198 ===== BISECT JOB RESULTS ===== | 201 ===== BISECT JOB RESULTS ===== |
| 199 Status: %(status)s | 202 Status: %(status)s |
| 200 | 203 |
| 201 Test Command: %(command)s | 204 Test Command: %(command)s |
| 202 Test Metric: %(metrics)s | 205 Test Metric: %(metrics)s |
| 203 Relative Change: %(change)s | 206 Relative Change: %(change)s |
| 204 Estimated Confidence: %(confidence)d%%""" | 207 Estimated Confidence: %(confidence)d%%""" |
| 205 | 208 |
| 206 # The perf dashboard specifically looks for the string | 209 # The perf dashboard specifically looks for the string |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 273 their differences aren't due to chance alone. | 276 their differences aren't due to chance alone. |
| 274 | 277 |
| 275 | 278 |
| 276 Args: | 279 Args: |
| 277 good_results_lists: A list of lists of "good" result numbers. | 280 good_results_lists: A list of lists of "good" result numbers. |
| 278 bad_results_lists: A list of lists of "bad" result numbers. | 281 bad_results_lists: A list of lists of "bad" result numbers. |
| 279 | 282 |
| 280 Returns: | 283 Returns: |
| 281 A number in the range [0, 100]. | 284 A number in the range [0, 100]. |
| 282 """ | 285 """ |
| 283 if not good_results_lists or not bad_results_lists: | 286 # If there's only one item in either list, this means only one revision was |
| 287 # classified good or bad; this isn't good enough evidence to make a decision. |
| 288 # If an empty list was passed, that also implies zero confidence. |
| 289 if len(good_results_lists) <= 1 or len(bad_results_lists) <= 1: |
| 284 return 0.0 | 290 return 0.0 |
| 285 | 291 |
| 286 # Flatten the lists of results lists. | 292 # Flatten the lists of results lists. |
| 287 sample1 = sum(good_results_lists, []) | 293 sample1 = sum(good_results_lists, []) |
| 288 sample2 = sum(bad_results_lists, []) | 294 sample2 = sum(bad_results_lists, []) |
| 295 |
| 296 # If there were only empty lists in either of the lists (this is unexpected |
| 297 # and normally shouldn't happen), then we also want to return 0. |
| 289 if not sample1 or not sample2: | 298 if not sample1 or not sample2: |
| 290 return 0.0 | 299 return 0.0 |
| 291 | 300 |
| 292 # The p-value is approximately the probability of obtaining the given set | 301 # The p-value is approximately the probability of obtaining the given set |
| 293 # of good and bad values just by chance. | 302 # of good and bad values just by chance. |
| 294 _, _, p_value = ttest.WelchsTTest(sample1, sample2) | 303 _, _, p_value = ttest.WelchsTTest(sample1, sample2) |
| 295 return 100.0 * (1.0 - p_value) | 304 return 100.0 * (1.0 - p_value) |
| 296 | 305 |
| 297 | 306 |
| 298 def GetSHA1HexDigest(contents): | 307 def GetSHA1HexDigest(contents): |
| (...skipping 2583 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2882 | 2891 |
| 2883 self._PrintTestedCommitsTable(revision_data_sorted, | 2892 self._PrintTestedCommitsTable(revision_data_sorted, |
| 2884 results_dict['first_working_revision'], | 2893 results_dict['first_working_revision'], |
| 2885 results_dict['last_broken_revision'], | 2894 results_dict['last_broken_revision'], |
| 2886 100, final_step=False) | 2895 100, final_step=False) |
| 2887 | 2896 |
| 2888 def _ConfidenceLevelStatus(self, results_dict): | 2897 def _ConfidenceLevelStatus(self, results_dict): |
| 2889 if not results_dict['confidence']: | 2898 if not results_dict['confidence']: |
| 2890 return None | 2899 return None |
| 2891 confidence_status = 'Successful with %(level)s confidence%(warning)s.' | 2900 confidence_status = 'Successful with %(level)s confidence%(warning)s.' |
| 2892 if results_dict['confidence'] >= 95: | 2901 if results_dict['confidence'] >= HIGH_CONFIDENCE: |
| 2893 level = 'high' | 2902 level = 'high' |
| 2894 else: | 2903 else: |
| 2895 level = 'low' | 2904 level = 'low' |
| 2896 warning = ' and warnings' | 2905 warning = ' and warnings' |
| 2897 if not self.warnings: | 2906 if not self.warnings: |
| 2898 warning = '' | 2907 warning = '' |
| 2899 return confidence_status % {'level': level, 'warning': warning} | 2908 return confidence_status % {'level': level, 'warning': warning} |
| 2900 | 2909 |
| 2901 def _GetViewVCLinkFromDepotAndHash(self, cl, depot): | 2910 def _GetViewVCLinkFromDepotAndHash(self, cl, depot): |
| 2902 info = self.source_control.QueryRevisionInfo(cl, | 2911 info = self.source_control.QueryRevisionInfo(cl, |
| (...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3166 'confidence': confidence, | 3175 'confidence': confidence, |
| 3167 } | 3176 } |
| 3168 | 3177 |
| 3169 def _CheckForWarnings(self, results_dict): | 3178 def _CheckForWarnings(self, results_dict): |
| 3170 if len(results_dict['culprit_revisions']) > 1: | 3179 if len(results_dict['culprit_revisions']) > 1: |
| 3171 self.warnings.append('Due to build errors, regression range could ' | 3180 self.warnings.append('Due to build errors, regression range could ' |
| 3172 'not be narrowed down to a single commit.') | 3181 'not be narrowed down to a single commit.') |
| 3173 if self.opts.repeat_test_count == 1: | 3182 if self.opts.repeat_test_count == 1: |
| 3174 self.warnings.append('Tests were only set to run once. This may ' | 3183 self.warnings.append('Tests were only set to run once. This may ' |
| 3175 'be insufficient to get meaningful results.') | 3184 'be insufficient to get meaningful results.') |
| 3176 if results_dict['confidence'] < 100: | 3185 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: |
| 3177 if results_dict['confidence']: | 3186 self.warnings.append('Confidence is not high. Try bisecting again ' |
| 3178 self.warnings.append( | 3187 'with increased repeat_count, larger range, or ' |
| 3179 'Confidence is less than 100%. There could be other candidates ' | 3188 'on another metric.') |
| 3180 'for this regression. Try bisecting again with increased ' | 3189 if not results_dict['confidence']: |
| 3181 'repeat_count or on a sub-metric that shows the regression more ' | 3190 self.warnings.append('Confidence score is 0%. Try bisecting again on ' |
| 3182 'clearly.') | 3191 'another platform or another metric.') |
| 3183 else: | |
| 3184 self.warnings.append( | |
| 3185 'Confidence is 0%. Try bisecting again on another platform, with ' | |
| 3186 'increased repeat_count or on a sub-metric that shows the ' | |
| 3187 'regression more clearly.') | |
| 3188 | 3192 |
| 3189 def FormatAndPrintResults(self, bisect_results): | 3193 def FormatAndPrintResults(self, bisect_results): |
| 3190 """Prints the results from a bisection run in a readable format. | 3194 """Prints the results from a bisection run in a readable format. |
| 3191 | 3195 |
| 3192 Args: | 3196 Args: |
| 3193 bisect_results: The results from a bisection test run. | 3197 bisect_results: The results from a bisection test run. |
| 3194 """ | 3198 """ |
| 3195 revision_data = bisect_results['revision_data'] | 3199 revision_data = bisect_results['revision_data'] |
| 3196 revision_data_sorted = sorted(revision_data.iteritems(), | 3200 revision_data_sorted = sorted(revision_data.iteritems(), |
| 3197 key = lambda x: x[1]['sort']) | 3201 key = lambda x: x[1]['sort']) |
| (...skipping 460 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3658 # bugs. If you change this, please update the perf dashboard as well. | 3662 # bugs. If you change this, please update the perf dashboard as well. |
| 3659 bisect_utils.OutputAnnotationStepStart('Results') | 3663 bisect_utils.OutputAnnotationStepStart('Results') |
| 3660 print 'Error: %s' % e.message | 3664 print 'Error: %s' % e.message |
| 3661 if opts.output_buildbot_annotations: | 3665 if opts.output_buildbot_annotations: |
| 3662 bisect_utils.OutputAnnotationStepClosed() | 3666 bisect_utils.OutputAnnotationStepClosed() |
| 3663 return 1 | 3667 return 1 |
| 3664 | 3668 |
| 3665 | 3669 |
| 3666 if __name__ == '__main__': | 3670 if __name__ == '__main__': |
| 3667 sys.exit(main()) | 3671 sys.exit(main()) |
| OLD | NEW |