OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright 2013 The Chromium Authors. All rights reserved. | 2 # Copyright 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Chromium auto-bisect tool | 6 """Chromium auto-bisect tool |
7 | 7 |
8 This script bisects a range of commits using binary search. It starts by getting | 8 This script bisects a range of commits using binary search. It starts by getting |
9 reference values for the specified "good" and "bad" commits. Then, for revisions | 9 reference values for the specified "good" and "bad" commits. Then, for revisions |
10 in between, it will get builds, run tests and classify intermediate revisions as | 10 in between, it will get builds, run tests and classify intermediate revisions as |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
79 # Maximum time in seconds to wait after posting build request to the try server. | 79 # Maximum time in seconds to wait after posting build request to the try server. |
80 # TODO: Change these values based on the actual time taken by buildbots on | 80 # TODO: Change these values based on the actual time taken by buildbots on |
81 # the try server. | 81 # the try server. |
82 MAX_MAC_BUILD_TIME = 14400 | 82 MAX_MAC_BUILD_TIME = 14400 |
83 MAX_WIN_BUILD_TIME = 14400 | 83 MAX_WIN_BUILD_TIME = 14400 |
84 MAX_LINUX_BUILD_TIME = 14400 | 84 MAX_LINUX_BUILD_TIME = 14400 |
85 | 85 |
86 # The confidence percentage we require to consider the initial range a | 86 # The confidence percentage we require to consider the initial range a |
87 # regression based on the test results of the inital good and bad revisions. | 87 # regression based on the test results of the inital good and bad revisions. |
88 REGRESSION_CONFIDENCE = 80 | 88 REGRESSION_CONFIDENCE = 80 |
| 89 # How many times to repeat the test on the last known good and first known bad |
| 90 # revisions in order to assess a more accurate confidence score in the |
| 91 # regression culprit. |
| 92 BORDER_REVISIONS_EXTRA_RUNS = 2 |
89 | 93 |
90 # Patch template to add a new file, DEPS.sha under src folder. | 94 # Patch template to add a new file, DEPS.sha under src folder. |
91 # This file contains SHA1 value of the DEPS changes made while bisecting | 95 # This file contains SHA1 value of the DEPS changes made while bisecting |
92 # dependency repositories. This patch send along with DEPS patch to try server. | 96 # dependency repositories. This patch send along with DEPS patch to try server. |
93 # When a build requested is posted with a patch, bisect builders on try server, | 97 # When a build requested is posted with a patch, bisect builders on try server, |
94 # once build is produced, it reads SHA value from this file and appends it | 98 # once build is produced, it reads SHA value from this file and appends it |
95 # to build archive filename. | 99 # to build archive filename. |
96 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 100 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
97 new file mode 100644 | 101 new file mode 100644 |
98 --- /dev/null | 102 --- /dev/null |
(...skipping 1166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1265 return command_to_run.replace(cmd_browser, | 1269 return command_to_run.replace(cmd_browser, |
1266 'android-chromium-testshell') | 1270 'android-chromium-testshell') |
1267 elif (commit_position >= 276628 and | 1271 elif (commit_position >= 276628 and |
1268 cmd_browser == 'android-chromium-testshell'): | 1272 cmd_browser == 'android-chromium-testshell'): |
1269 return command_to_run.replace(cmd_browser, | 1273 return command_to_run.replace(cmd_browser, |
1270 'android-chrome-shell') | 1274 'android-chrome-shell') |
1271 return command_to_run | 1275 return command_to_run |
1272 | 1276 |
1273 def RunPerformanceTestAndParseResults( | 1277 def RunPerformanceTestAndParseResults( |
1274 self, command_to_run, metric, reset_on_first_run=False, | 1278 self, command_to_run, metric, reset_on_first_run=False, |
1275 upload_on_last_run=False, results_label=None): | 1279 upload_on_last_run=False, results_label=None, test_run_multiplier=1): |
1276 """Runs a performance test on the current revision and parses the results. | 1280 """Runs a performance test on the current revision and parses the results. |
1277 | 1281 |
1278 Args: | 1282 Args: |
1279 command_to_run: The command to be run to execute the performance test. | 1283 command_to_run: The command to be run to execute the performance test. |
1280 metric: The metric to parse out from the results of the performance test. | 1284 metric: The metric to parse out from the results of the performance test. |
1281 This is the result chart name and trace name, separated by slash. | 1285 This is the result chart name and trace name, separated by slash. |
1282 May be None for perf try jobs. | 1286 May be None for perf try jobs. |
1283 reset_on_first_run: If True, pass the flag --reset-results on first run. | 1287 reset_on_first_run: If True, pass the flag --reset-results on first run. |
1284 upload_on_last_run: If True, pass the flag --upload-results on last run. | 1288 upload_on_last_run: If True, pass the flag --upload-results on last run. |
1285 results_label: A value for the option flag --results-label. | 1289 results_label: A value for the option flag --results-label. |
1286 The arguments reset_on_first_run, upload_on_last_run and results_label | 1290 The arguments reset_on_first_run, upload_on_last_run and results_label |
1287 are all ignored if the test is not a Telemetry test. | 1291 are all ignored if the test is not a Telemetry test. |
| 1292 test_run_multiplier: Factor by which to multiply the number of test runs |
| 1293 and the timeout period specified in self.opts. |
1288 | 1294 |
1289 Returns: | 1295 Returns: |
1290 (values dict, 0) if --debug_ignore_perf_test was passed. | 1296 (values dict, 0) if --debug_ignore_perf_test was passed. |
1291 (values dict, 0, test output) if the test was run successfully. | 1297 (values dict, 0, test output) if the test was run successfully. |
1292 (error message, -1) if the test couldn't be run. | 1298 (error message, -1) if the test couldn't be run. |
1293 (error message, -1, test output) if the test ran but there was an error. | 1299 (error message, -1, test output) if the test ran but there was an error. |
1294 """ | 1300 """ |
1295 success_code, failure_code = 0, -1 | 1301 success_code, failure_code = 0, -1 |
1296 | 1302 |
1297 if self.opts.debug_ignore_perf_test: | 1303 if self.opts.debug_ignore_perf_test: |
(...skipping 21 matching lines...) Expand all Loading... |
1319 if not _GenerateProfileIfNecessary(args): | 1325 if not _GenerateProfileIfNecessary(args): |
1320 err_text = 'Failed to generate profile for performance test.' | 1326 err_text = 'Failed to generate profile for performance test.' |
1321 return (err_text, failure_code) | 1327 return (err_text, failure_code) |
1322 | 1328 |
1323 is_telemetry = bisect_utils.IsTelemetryCommand(command_to_run) | 1329 is_telemetry = bisect_utils.IsTelemetryCommand(command_to_run) |
1324 | 1330 |
1325 start_time = time.time() | 1331 start_time = time.time() |
1326 | 1332 |
1327 metric_values = [] | 1333 metric_values = [] |
1328 output_of_all_runs = '' | 1334 output_of_all_runs = '' |
1329 for i in xrange(self.opts.repeat_test_count): | 1335 repeat_count = self.opts.repeat_test_count * test_run_multiplier |
| 1336 for i in xrange(repeat_count): |
1330 # Can ignore the return code since if the tests fail, it won't return 0. | 1337 # Can ignore the return code since if the tests fail, it won't return 0. |
1331 current_args = copy.copy(args) | 1338 current_args = copy.copy(args) |
1332 if is_telemetry: | 1339 if is_telemetry: |
1333 if i == 0 and reset_on_first_run: | 1340 if i == 0 and reset_on_first_run: |
1334 current_args.append('--reset-results') | 1341 current_args.append('--reset-results') |
1335 if i == self.opts.repeat_test_count - 1 and upload_on_last_run: | 1342 if i == self.opts.repeat_test_count - 1 and upload_on_last_run: |
1336 current_args.append('--upload-results') | 1343 current_args.append('--upload-results') |
1337 if results_label: | 1344 if results_label: |
1338 current_args.append('--results-label=%s' % results_label) | 1345 current_args.append('--results-label=%s' % results_label) |
1339 try: | 1346 try: |
(...skipping 21 matching lines...) Expand all Loading... |
1361 if parsed_metric: | 1368 if parsed_metric: |
1362 metric_values.append(math_utils.Mean(parsed_metric)) | 1369 metric_values.append(math_utils.Mean(parsed_metric)) |
1363 # If we're bisecting on a metric (ie, changes in the mean or | 1370 # If we're bisecting on a metric (ie, changes in the mean or |
1364 # standard deviation) and no metric values are produced, bail out. | 1371 # standard deviation) and no metric values are produced, bail out. |
1365 if not metric_values: | 1372 if not metric_values: |
1366 break | 1373 break |
1367 elif self._IsBisectModeReturnCode(): | 1374 elif self._IsBisectModeReturnCode(): |
1368 metric_values.append(return_code) | 1375 metric_values.append(return_code) |
1369 | 1376 |
1370 elapsed_minutes = (time.time() - start_time) / 60.0 | 1377 elapsed_minutes = (time.time() - start_time) / 60.0 |
1371 if elapsed_minutes >= self.opts.max_time_minutes: | 1378 time_limit = self.opts.max_time_minutes * test_run_multiplier |
| 1379 if elapsed_minutes >= time_limit: |
1372 break | 1380 break |
1373 | 1381 |
1374 if metric and len(metric_values) == 0: | 1382 if metric and len(metric_values) == 0: |
1375 err_text = 'Metric %s was not found in the test output.' % metric | 1383 err_text = 'Metric %s was not found in the test output.' % metric |
1376 # TODO(qyearsley): Consider also getting and displaying a list of metrics | 1384 # TODO(qyearsley): Consider also getting and displaying a list of metrics |
1377 # that were found in the output here. | 1385 # that were found in the output here. |
1378 return (err_text, failure_code, output_of_all_runs) | 1386 return (err_text, failure_code, output_of_all_runs) |
1379 | 1387 |
1380 # If we're bisecting on return codes, we're really just looking for zero vs | 1388 # If we're bisecting on return codes, we're really just looking for zero vs |
1381 # non-zero. | 1389 # non-zero. |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1466 output = bisect_utils.CheckRunGit(cmd) | 1474 output = bisect_utils.CheckRunGit(cmd) |
1467 | 1475 |
1468 files = output.splitlines() | 1476 files = output.splitlines() |
1469 | 1477 |
1470 if len(files) == 1 and files[0] == 'DEPS': | 1478 if len(files) == 1 and files[0] == 'DEPS': |
1471 return True | 1479 return True |
1472 | 1480 |
1473 return False | 1481 return False |
1474 | 1482 |
1475 def RunTest(self, revision, depot, command, metric, skippable=False, | 1483 def RunTest(self, revision, depot, command, metric, skippable=False, |
1476 skip_sync=False, create_patch=False, force_build=False): | 1484 skip_sync=False, create_patch=False, force_build=False, |
| 1485 test_run_multiplier=1): |
1477 """Performs a full sync/build/run of the specified revision. | 1486 """Performs a full sync/build/run of the specified revision. |
1478 | 1487 |
1479 Args: | 1488 Args: |
1480 revision: The revision to sync to. | 1489 revision: The revision to sync to. |
1481 depot: The depot that's being used at the moment (src, webkit, etc.) | 1490 depot: The depot that's being used at the moment (src, webkit, etc.) |
1482 command: The command to execute the performance test. | 1491 command: The command to execute the performance test. |
1483 metric: The performance metric being tested. | 1492 metric: The performance metric being tested. |
1484 skip_sync: Skip the sync step. | 1493 skip_sync: Skip the sync step. |
1485 create_patch: Create a patch with any locally modified files. | 1494 create_patch: Create a patch with any locally modified files. |
1486 force_build: Force a local build. | 1495 force_build: Force a local build. |
| 1496 test_run_multiplier: Factor by which to multiply the given number of runs |
| 1497 and the set timeout period. |
1487 | 1498 |
1488 Returns: | 1499 Returns: |
1489 On success, a tuple containing the results of the performance test. | 1500 On success, a tuple containing the results of the performance test. |
1490 Otherwise, a tuple with the error message. | 1501 Otherwise, a tuple with the error message. |
1491 """ | 1502 """ |
1492 logging.info('Running RunTest with rev "%s", command "%s"', | 1503 logging.info('Running RunTest with rev "%s", command "%s"', |
1493 revision, command) | 1504 revision, command) |
1494 # Decide which sync program to use. | 1505 # Decide which sync program to use. |
1495 sync_client = None | 1506 sync_client = None |
1496 if depot == 'chromium' or depot == 'android-chrome': | 1507 if depot == 'chromium' or depot == 'android-chrome': |
(...skipping 21 matching lines...) Expand all Loading... |
1518 depot, revision=revision_to_build, create_patch=create_patch) | 1529 depot, revision=revision_to_build, create_patch=create_patch) |
1519 if not build_success: | 1530 if not build_success: |
1520 return ('Failed to build revision: [%s]' % str(revision), | 1531 return ('Failed to build revision: [%s]' % str(revision), |
1521 BUILD_RESULT_FAIL) | 1532 BUILD_RESULT_FAIL) |
1522 after_build_time = time.time() | 1533 after_build_time = time.time() |
1523 | 1534 |
1524 # Possibly alter the command. | 1535 # Possibly alter the command. |
1525 command = self.GetCompatibleCommand(command, revision, depot) | 1536 command = self.GetCompatibleCommand(command, revision, depot) |
1526 | 1537 |
1527 # Run the command and get the results. | 1538 # Run the command and get the results. |
1528 results = self.RunPerformanceTestAndParseResults(command, metric) | 1539 results = self.RunPerformanceTestAndParseResults( |
| 1540 command, metric, test_run_multiplier=test_run_multiplier) |
1529 | 1541 |
1530 # Restore build output directory once the tests are done, to avoid | 1542 # Restore build output directory once the tests are done, to avoid |
1531 # any discrepancies. | 1543 # any discrepancies. |
1532 if self.IsDownloadable(depot) and revision: | 1544 if self.IsDownloadable(depot) and revision: |
1533 self.BackupOrRestoreOutputDirectory(restore=True) | 1545 self.BackupOrRestoreOutputDirectory(restore=True) |
1534 | 1546 |
1535 # A value other than 0 indicates that the test couldn't be run, and results | 1547 # A value other than 0 indicates that the test couldn't be run, and results |
1536 # should also include an error message. | 1548 # should also include an error message. |
1537 if results[1] != 0: | 1549 if results[1] != 0: |
1538 return results | 1550 return results |
(...skipping 893 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2432 | 2444 |
2433 # If the build is broken, remove it and redo search. | 2445 # If the build is broken, remove it and redo search. |
2434 revision_states.pop(next_revision_index) | 2446 revision_states.pop(next_revision_index) |
2435 | 2447 |
2436 max_revision -= 1 | 2448 max_revision -= 1 |
2437 | 2449 |
2438 if self.opts.output_buildbot_annotations: | 2450 if self.opts.output_buildbot_annotations: |
2439 self.printer.PrintPartialResults(bisect_state) | 2451 self.printer.PrintPartialResults(bisect_state) |
2440 bisect_utils.OutputAnnotationStepClosed() | 2452 bisect_utils.OutputAnnotationStepClosed() |
2441 | 2453 |
| 2454 |
| 2455 self._ConfidenceExtraTestRuns(min_revision_state, max_revision_state, |
| 2456 command_to_run, metric) |
2442 results = BisectResults(bisect_state, self.depot_registry, self.opts, | 2457 results = BisectResults(bisect_state, self.depot_registry, self.opts, |
2443 self.warnings) | 2458 self.warnings) |
2444 | 2459 |
2445 self._GatherResultsFromRevertedCulpritCL( | 2460 self._GatherResultsFromRevertedCulpritCL( |
2446 results, target_depot, command_to_run, metric) | 2461 results, target_depot, command_to_run, metric) |
2447 | 2462 |
2448 return results | 2463 return results |
2449 else: | 2464 else: |
2450 # Weren't able to sync and retrieve the revision range. | 2465 # Weren't able to sync and retrieve the revision range. |
2451 error = ('An error occurred attempting to retrieve revision range: ' | 2466 error = ('An error occurred attempting to retrieve revision range: ' |
2452 '[%s..%s]' % (good_revision, bad_revision)) | 2467 '[%s..%s]' % (good_revision, bad_revision)) |
2453 return BisectResults(error=error) | 2468 return BisectResults(error=error) |
2454 | 2469 |
| 2470 def _ConfidenceExtraTestRuns(self, good_state, bad_state, command_to_run, |
| 2471 metric): |
| 2472 if (bool(good_state.passed) != bool(bad_state.passed) |
| 2473 and good_state.passed not in ('Skipped', 'Build Failed') |
| 2474 and bad_state.passed not in ('Skipped', 'Build Failed')): |
| 2475 for state in (good_state, bad_state): |
| 2476 run_results = self.RunTest( |
| 2477 state.revision, |
| 2478 state.depot, |
| 2479 command_to_run, |
| 2480 metric, |
| 2481 test_run_multiplier=BORDER_REVISIONS_EXTRA_RUNS) |
| 2482 # Is extend the right thing to do here? |
| 2483 state.value['values'].extend(run_results[0]['values']) |
| 2484 |
2455 | 2485 |
2456 def _IsPlatformSupported(): | 2486 def _IsPlatformSupported(): |
2457 """Checks that this platform and build system are supported. | 2487 """Checks that this platform and build system are supported. |
2458 | 2488 |
2459 Args: | 2489 Args: |
2460 opts: The options parsed from the command line. | 2490 opts: The options parsed from the command line. |
2461 | 2491 |
2462 Returns: | 2492 Returns: |
2463 True if the platform and build system are supported. | 2493 True if the platform and build system are supported. |
2464 """ | 2494 """ |
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2820 # bugs. If you change this, please update the perf dashboard as well. | 2850 # bugs. If you change this, please update the perf dashboard as well. |
2821 bisect_utils.OutputAnnotationStepStart('Results') | 2851 bisect_utils.OutputAnnotationStepStart('Results') |
2822 print 'Runtime Error: %s' % e | 2852 print 'Runtime Error: %s' % e |
2823 if opts.output_buildbot_annotations: | 2853 if opts.output_buildbot_annotations: |
2824 bisect_utils.OutputAnnotationStepClosed() | 2854 bisect_utils.OutputAnnotationStepClosed() |
2825 return 1 | 2855 return 1 |
2826 | 2856 |
2827 | 2857 |
2828 if __name__ == '__main__': | 2858 if __name__ == '__main__': |
2829 sys.exit(main()) | 2859 sys.exit(main()) |
OLD | NEW |