OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
70 # Maximum time in seconds to wait after posting build request to the try server. | 70 # Maximum time in seconds to wait after posting build request to the try server. |
71 # TODO: Change these values based on the actual time taken by buildbots on | 71 # TODO: Change these values based on the actual time taken by buildbots on |
72 # the try server. | 72 # the try server. |
73 MAX_MAC_BUILD_TIME = 14400 | 73 MAX_MAC_BUILD_TIME = 14400 |
74 MAX_WIN_BUILD_TIME = 14400 | 74 MAX_WIN_BUILD_TIME = 14400 |
75 MAX_LINUX_BUILD_TIME = 14400 | 75 MAX_LINUX_BUILD_TIME = 14400 |
76 | 76 |
77 # The confidence percentage we require to consider the initial range a | 77 # The confidence percentage we require to consider the initial range a |
78 # regression based on the test results of the inital good and bad revisions. | 78 # regression based on the test results of the inital good and bad revisions. |
79 REGRESSION_CONFIDENCE = 80 | 79 REGRESSION_CONFIDENCE = 80 |
80 # How many times to repeat the test on the last known good and first known bad | |
81 # revisions in order to assess a more accurate confidence score in the | |
82 # regression culprit. | |
83 BORDER_REVISIONS_EXTRA_RUNS = 2 | |
80 | 84 |
81 # Patch template to add a new file, DEPS.sha under src folder. | 85 # Patch template to add a new file, DEPS.sha under src folder. |
82 # This file contains SHA1 value of the DEPS changes made while bisecting | 86 # This file contains SHA1 value of the DEPS changes made while bisecting |
83 # dependency repositories. This patch send along with DEPS patch to try server. | 87 # dependency repositories. This patch send along with DEPS patch to try server. |
84 # When a build requested is posted with a patch, bisect builders on try server, | 88 # When a build requested is posted with a patch, bisect builders on try server, |
85 # once build is produced, it reads SHA value from this file and appends it | 89 # once build is produced, it reads SHA value from this file and appends it |
86 # to build archive filename. | 90 # to build archive filename. |
87 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha | 91 DEPS_SHA_PATCH = """diff --git DEPS.sha DEPS.sha |
88 new file mode 100644 | 92 new file mode 100644 |
89 --- /dev/null | 93 --- /dev/null |
(...skipping 1136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1226 return command_to_run.replace(cmd_browser, | 1230 return command_to_run.replace(cmd_browser, |
1227 'android-chromium-testshell') | 1231 'android-chromium-testshell') |
1228 elif (commit_position >= 276628 and | 1232 elif (commit_position >= 276628 and |
1229 cmd_browser == 'android-chromium-testshell'): | 1233 cmd_browser == 'android-chromium-testshell'): |
1230 return command_to_run.replace(cmd_browser, | 1234 return command_to_run.replace(cmd_browser, |
1231 'android-chrome-shell') | 1235 'android-chrome-shell') |
1232 return command_to_run | 1236 return command_to_run |
1233 | 1237 |
1234 def RunPerformanceTestAndParseResults( | 1238 def RunPerformanceTestAndParseResults( |
1235 self, command_to_run, metric, reset_on_first_run=False, | 1239 self, command_to_run, metric, reset_on_first_run=False, |
1236 upload_on_last_run=False, results_label=None): | 1240 upload_on_last_run=False, results_label=None, extra_test_runs=0): |
1237 """Runs a performance test on the current revision and parses the results. | 1241 """Runs a performance test on the current revision and parses the results. |
1238 | 1242 |
1239 Args: | 1243 Args: |
1240 command_to_run: The command to be run to execute the performance test. | 1244 command_to_run: The command to be run to execute the performance test. |
1241 metric: The metric to parse out from the results of the performance test. | 1245 metric: The metric to parse out from the results of the performance test. |
1242 This is the result chart name and trace name, separated by slash. | 1246 This is the result chart name and trace name, separated by slash. |
1243 May be None for perf try jobs. | 1247 May be None for perf try jobs. |
1244 reset_on_first_run: If True, pass the flag --reset-results on first run. | 1248 reset_on_first_run: If True, pass the flag --reset-results on first run. |
1245 upload_on_last_run: If True, pass the flag --upload-results on last run. | 1249 upload_on_last_run: If True, pass the flag --upload-results on last run. |
1246 results_label: A value for the option flag --results-label. | 1250 results_label: A value for the option flag --results-label. |
1247 The arguments reset_on_first_run, upload_on_last_run and results_label | 1251 The arguments reset_on_first_run, upload_on_last_run and results_label |
1248 are all ignored if the test is not a Telemetry test. | 1252 are all ignored if the test is not a Telemetry test. |
1253 extra_test_runs: Factor by which to increase the number of test repeats | |
1254 and the timeout period specified in self.opts. | |
qyearsley
2015/01/16 18:37:03
Possible suggestion:
Rename this to "repeat_count_
RobertoCN
2015/01/30 21:23:32
Done.
| |
1249 | 1255 |
1250 Returns: | 1256 Returns: |
1251 (values dict, 0) if --debug_ignore_perf_test was passed. | 1257 (values dict, 0) if --debug_ignore_perf_test was passed. |
1252 (values dict, 0, test output) if the test was run successfully. | 1258 (values dict, 0, test output) if the test was run successfully. |
1253 (error message, -1) if the test couldn't be run. | 1259 (error message, -1) if the test couldn't be run. |
1254 (error message, -1, test output) if the test ran but there was an error. | 1260 (error message, -1, test output) if the test ran but there was an error. |
1255 """ | 1261 """ |
1256 success_code, failure_code = 0, -1 | 1262 success_code, failure_code = 0, -1 |
1257 | 1263 |
1258 if self.opts.debug_ignore_perf_test: | 1264 if self.opts.debug_ignore_perf_test: |
(...skipping 21 matching lines...) Expand all Loading... | |
1280 if not _GenerateProfileIfNecessary(args): | 1286 if not _GenerateProfileIfNecessary(args): |
1281 err_text = 'Failed to generate profile for performance test.' | 1287 err_text = 'Failed to generate profile for performance test.' |
1282 return (err_text, failure_code) | 1288 return (err_text, failure_code) |
1283 | 1289 |
1284 is_telemetry = bisect_utils.IsTelemetryCommand(command_to_run) | 1290 is_telemetry = bisect_utils.IsTelemetryCommand(command_to_run) |
1285 | 1291 |
1286 start_time = time.time() | 1292 start_time = time.time() |
1287 | 1293 |
1288 metric_values = [] | 1294 metric_values = [] |
1289 output_of_all_runs = '' | 1295 output_of_all_runs = '' |
1290 for i in xrange(self.opts.repeat_test_count): | 1296 repeat_count = self.opts.repeat_test_count * (1 + extra_test_runs) |
1297 for i in xrange(repeat_count): | |
1291 # Can ignore the return code since if the tests fail, it won't return 0. | 1298 # Can ignore the return code since if the tests fail, it won't return 0. |
1292 current_args = copy.copy(args) | 1299 current_args = copy.copy(args) |
1293 if is_telemetry: | 1300 if is_telemetry: |
1294 if i == 0 and reset_on_first_run: | 1301 if i == 0 and reset_on_first_run: |
1295 current_args.append('--reset-results') | 1302 current_args.append('--reset-results') |
1296 if i == self.opts.repeat_test_count - 1 and upload_on_last_run: | 1303 if i == self.opts.repeat_test_count - 1 and upload_on_last_run: |
1297 current_args.append('--upload-results') | 1304 current_args.append('--upload-results') |
1298 if results_label: | 1305 if results_label: |
1299 current_args.append('--results-label=%s' % results_label) | 1306 current_args.append('--results-label=%s' % results_label) |
1300 try: | 1307 try: |
(...skipping 20 matching lines...) Expand all Loading... | |
1321 metric_values.append(math_utils.Mean( | 1328 metric_values.append(math_utils.Mean( |
1322 _ParseMetricValuesFromOutput(metric, output))) | 1329 _ParseMetricValuesFromOutput(metric, output))) |
1323 # If we're bisecting on a metric (ie, changes in the mean or | 1330 # If we're bisecting on a metric (ie, changes in the mean or |
1324 # standard deviation) and no metric values are produced, bail out. | 1331 # standard deviation) and no metric values are produced, bail out. |
1325 if not metric_values: | 1332 if not metric_values: |
1326 break | 1333 break |
1327 elif self._IsBisectModeReturnCode(): | 1334 elif self._IsBisectModeReturnCode(): |
1328 metric_values.append(return_code) | 1335 metric_values.append(return_code) |
1329 | 1336 |
1330 elapsed_minutes = (time.time() - start_time) / 60.0 | 1337 elapsed_minutes = (time.time() - start_time) / 60.0 |
1331 if elapsed_minutes >= self.opts.max_time_minutes: | 1338 time_limit = self.opts.max_time_minutes * (1 + extra_test_runs) |
1339 if elapsed_minutes >= time_limit: | |
1332 break | 1340 break |
1333 | 1341 |
1334 if metric and len(metric_values) == 0: | 1342 if metric and len(metric_values) == 0: |
1335 err_text = 'Metric %s was not found in the test output.' % metric | 1343 err_text = 'Metric %s was not found in the test output.' % metric |
1336 # TODO(qyearsley): Consider also getting and displaying a list of metrics | 1344 # TODO(qyearsley): Consider also getting and displaying a list of metrics |
1337 # that were found in the output here. | 1345 # that were found in the output here. |
1338 return (err_text, failure_code, output_of_all_runs) | 1346 return (err_text, failure_code, output_of_all_runs) |
1339 | 1347 |
1340 # If we're bisecting on return codes, we're really just looking for zero vs | 1348 # If we're bisecting on return codes, we're really just looking for zero vs |
1341 # non-zero. | 1349 # non-zero. |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1426 output = bisect_utils.CheckRunGit(cmd) | 1434 output = bisect_utils.CheckRunGit(cmd) |
1427 | 1435 |
1428 files = output.splitlines() | 1436 files = output.splitlines() |
1429 | 1437 |
1430 if len(files) == 1 and files[0] == 'DEPS': | 1438 if len(files) == 1 and files[0] == 'DEPS': |
1431 return True | 1439 return True |
1432 | 1440 |
1433 return False | 1441 return False |
1434 | 1442 |
1435 def RunTest(self, revision, depot, command, metric, skippable=False, | 1443 def RunTest(self, revision, depot, command, metric, skippable=False, |
1436 skip_sync=False, create_patch=False, force_build=False): | 1444 skip_sync=False, create_patch=False, force_build=False, |
1445 extra_test_runs=0): | |
1437 """Performs a full sync/build/run of the specified revision. | 1446 """Performs a full sync/build/run of the specified revision. |
1438 | 1447 |
1439 Args: | 1448 Args: |
1440 revision: The revision to sync to. | 1449 revision: The revision to sync to. |
1441 depot: The depot that's being used at the moment (src, webkit, etc.) | 1450 depot: The depot that's being used at the moment (src, webkit, etc.) |
1442 command: The command to execute the performance test. | 1451 command: The command to execute the performance test. |
1443 metric: The performance metric being tested. | 1452 metric: The performance metric being tested. |
1444 skip_sync: Skip the sync step. | 1453 skip_sync: Skip the sync step. |
1445 create_patch: Create a patch with any locally modified files. | 1454 create_patch: Create a patch with any locally modified files. |
1446 force_build: Force a local build. | 1455 force_build: Force a local build. |
1456 extra_test_runs: Factor by which to increase the given number of runs and | |
1457 the set timeout period. | |
1447 | 1458 |
1448 Returns: | 1459 Returns: |
1449 On success, a tuple containing the results of the performance test. | 1460 On success, a tuple containing the results of the performance test. |
1450 Otherwise, a tuple with the error message. | 1461 Otherwise, a tuple with the error message. |
1451 """ | 1462 """ |
1452 logging.info('Running RunTest with rev "%s", command "%s"', | 1463 logging.info('Running RunTest with rev "%s", command "%s"', |
1453 revision, command) | 1464 revision, command) |
1454 # Decide which sync program to use. | 1465 # Decide which sync program to use. |
1455 sync_client = None | 1466 sync_client = None |
1456 if depot == 'chromium' or depot == 'android-chrome': | 1467 if depot == 'chromium' or depot == 'android-chrome': |
(...skipping 21 matching lines...) Expand all Loading... | |
1478 depot, revision=revision_to_build, create_patch=create_patch) | 1489 depot, revision=revision_to_build, create_patch=create_patch) |
1479 if not build_success: | 1490 if not build_success: |
1480 return ('Failed to build revision: [%s]' % str(revision), | 1491 return ('Failed to build revision: [%s]' % str(revision), |
1481 BUILD_RESULT_FAIL) | 1492 BUILD_RESULT_FAIL) |
1482 after_build_time = time.time() | 1493 after_build_time = time.time() |
1483 | 1494 |
1484 # Possibly alter the command. | 1495 # Possibly alter the command. |
1485 command = self.GetCompatibleCommand(command, revision, depot) | 1496 command = self.GetCompatibleCommand(command, revision, depot) |
1486 | 1497 |
1487 # Run the command and get the results. | 1498 # Run the command and get the results. |
1488 results = self.RunPerformanceTestAndParseResults(command, metric) | 1499 results = self.RunPerformanceTestAndParseResults( |
1500 command, metric, extra_test_runs=extra_test_runs) | |
1489 | 1501 |
1490 # Restore build output directory once the tests are done, to avoid | 1502 # Restore build output directory once the tests are done, to avoid |
1491 # any discrepancies. | 1503 # any discrepancies. |
1492 if self.IsDownloadable(depot) and revision: | 1504 if self.IsDownloadable(depot) and revision: |
1493 self.BackupOrRestoreOutputDirectory(restore=True) | 1505 self.BackupOrRestoreOutputDirectory(restore=True) |
1494 | 1506 |
1495 # A value other than 0 indicates that the test couldn't be run, and results | 1507 # A value other than 0 indicates that the test couldn't be run, and results |
1496 # should also include an error message. | 1508 # should also include an error message. |
1497 if results[1] != 0: | 1509 if results[1] != 0: |
1498 return results | 1510 return results |
(...skipping 893 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2392 | 2404 |
2393 # If the build is broken, remove it and redo search. | 2405 # If the build is broken, remove it and redo search. |
2394 revision_states.pop(next_revision_index) | 2406 revision_states.pop(next_revision_index) |
2395 | 2407 |
2396 max_revision -= 1 | 2408 max_revision -= 1 |
2397 | 2409 |
2398 if self.opts.output_buildbot_annotations: | 2410 if self.opts.output_buildbot_annotations: |
2399 self.printer.PrintPartialResults(bisect_state) | 2411 self.printer.PrintPartialResults(bisect_state) |
2400 bisect_utils.OutputAnnotationStepClosed() | 2412 bisect_utils.OutputAnnotationStepClosed() |
2401 | 2413 |
2414 | |
2415 self._ConfidenceExtraTestRuns(min_revision_state, max_revision_state, | |
2416 command_to_run, metric) | |
2402 results = BisectResults(bisect_state, self.depot_registry, self.opts, | 2417 results = BisectResults(bisect_state, self.depot_registry, self.opts, |
2403 self.warnings) | 2418 self.warnings) |
2404 | 2419 |
2405 self._GatherResultsFromRevertedCulpritCL( | 2420 self._GatherResultsFromRevertedCulpritCL( |
2406 results, target_depot, command_to_run, metric) | 2421 results, target_depot, command_to_run, metric) |
2407 | 2422 |
2408 return results | 2423 return results |
2409 else: | 2424 else: |
2410 # Weren't able to sync and retrieve the revision range. | 2425 # Weren't able to sync and retrieve the revision range. |
2411 error = ('An error occurred attempting to retrieve revision range: ' | 2426 error = ('An error occurred attempting to retrieve revision range: ' |
2412 '[%s..%s]' % (good_revision, bad_revision)) | 2427 '[%s..%s]' % (good_revision, bad_revision)) |
2413 return BisectResults(error=error) | 2428 return BisectResults(error=error) |
2414 | 2429 |
2430 def _ConfidenceExtraTestRuns(self, good_state, bad_state, command_to_run, | |
2431 metric): | |
2432 if(bool(good_state.passed) != bool(bad_state.passed) | |
qyearsley
2015/01/16 18:37:03
Add space after "if"
RobertoCN
2015/01/30 21:23:31
Done.
| |
2433 and good_state.passed not in ('Skipped', 'Build Failed') | |
2434 and bad_state.passed not in ('Skipped', 'Build Failed')): | |
2435 for state in (good_state, bad_state): | |
2436 run_results = self.RunTest( | |
2437 state.revision, | |
2438 state.depot, | |
2439 command_to_run, | |
2440 metric, | |
2441 extra_test_runs=BORDER_REVISIONS_EXTRA_RUNS | |
2442 ) | |
qyearsley
2015/01/16 18:37:03
Since this is a function call, the convention is t
RobertoCN
2015/01/30 21:23:32
Done.
| |
2443 # Is extend the right thing to do here? | |
2444 state.value['values'].extend(run_results[0]['values']) | |
2445 # Get new confidence score | |
RobertoCN
2015/01/14 18:50:47
This comment will be removed.
RobertoCN
2015/01/30 21:23:32
Done.
| |
2446 | |
2415 | 2447 |
2416 def _IsPlatformSupported(): | 2448 def _IsPlatformSupported(): |
2417 """Checks that this platform and build system are supported. | 2449 """Checks that this platform and build system are supported. |
2418 | 2450 |
2419 Args: | 2451 Args: |
2420 opts: The options parsed from the command line. | 2452 opts: The options parsed from the command line. |
2421 | 2453 |
2422 Returns: | 2454 Returns: |
2423 True if the platform and build system are supported. | 2455 True if the platform and build system are supported. |
2424 """ | 2456 """ |
(...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2780 # bugs. If you change this, please update the perf dashboard as well. | 2812 # bugs. If you change this, please update the perf dashboard as well. |
2781 bisect_utils.OutputAnnotationStepStart('Results') | 2813 bisect_utils.OutputAnnotationStepStart('Results') |
2782 print 'Runtime Error: %s' % e | 2814 print 'Runtime Error: %s' % e |
2783 if opts.output_buildbot_annotations: | 2815 if opts.output_buildbot_annotations: |
2784 bisect_utils.OutputAnnotationStepClosed() | 2816 bisect_utils.OutputAnnotationStepClosed() |
2785 return 1 | 2817 return 1 |
2786 | 2818 |
2787 | 2819 |
2788 if __name__ == '__main__': | 2820 if __name__ == '__main__': |
2789 sys.exit(main()) | 2821 sys.exit(main()) |
OLD | NEW |