Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
| 7 | 7 |
| 8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
| 9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
| 10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 49 | 49 |
| 50 sys.path.append(os.path.join( | 50 sys.path.append(os.path.join( |
| 51 os.path.dirname(__file__), os.path.pardir, 'telemetry')) | 51 os.path.dirname(__file__), os.path.pardir, 'telemetry')) |
| 52 | 52 |
| 53 import bisect_utils | 53 import bisect_utils |
| 54 import builder | 54 import builder |
| 55 import math_utils | 55 import math_utils |
| 56 import request_build | 56 import request_build |
| 57 import source_control as source_control_module | 57 import source_control as source_control_module |
| 58 import ttest | 58 import ttest |
| 59 from telemetry.util import cloud_storage | 59 from telemetry.util import cloud_storage # pylint: disable=F0401 |
| 60 | 60 |
| 61 # Below is the map of "depot" names to information about each depot. Each depot | 61 # Below is the map of "depot" names to information about each depot. Each depot |
| 62 # is a repository, and in the process of bisecting, revision ranges in these | 62 # is a repository, and in the process of bisecting, revision ranges in these |
| 63 # repositories may also be bisected. | 63 # repositories may also be bisected. |
| 64 # | 64 # |
| 65 # Each depot information dictionary may contain: | 65 # Each depot information dictionary may contain: |
| 66 # src: Path to the working directory. | 66 # src: Path to the working directory. |
| 67 # recurse: True if this repository will get bisected. | 67 # recurse: True if this repository will get bisected. |
| 68 # depends: A list of other repositories that are actually part of the same | 68 # depends: A list of other repositories that are actually part of the same |
| 69 # repository in svn. If the repository has any dependent repositories | 69 # repository in svn. If the repository has any dependent repositories |
| (...skipping 791 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 861 if step_count: | 861 if step_count: |
| 862 step_perf_time_avg = step_perf_time_avg / step_count | 862 step_perf_time_avg = step_perf_time_avg / step_count |
| 863 step_build_time_avg = step_build_time_avg / step_count | 863 step_build_time_avg = step_build_time_avg / step_count |
| 864 print | 864 print |
| 865 print 'Average build time : %s' % datetime.timedelta( | 865 print 'Average build time : %s' % datetime.timedelta( |
| 866 seconds=int(step_build_time_avg)) | 866 seconds=int(step_build_time_avg)) |
| 867 print 'Average test time : %s' % datetime.timedelta( | 867 print 'Average test time : %s' % datetime.timedelta( |
| 868 seconds=int(step_perf_time_avg)) | 868 seconds=int(step_perf_time_avg)) |
| 869 | 869 |
| 870 | 870 |
| 871 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good): | 871 class BisectResults(object): |
| 872 """Compiles a list of other possible regressions from the revision data. | 872 """This class holds the results of the bisect.""" |
| 873 | 873 |
| 874 Args: | 874 def __init__(self, bisect_perf_metrics, source_control): |
| 875 revision_data_sorted: Sorted list of (revision, revision data) pairs. | 875 self._bisect_perf_metrics = bisect_perf_metrics |
|
qyearsley
2014/09/19 07:35:16
Initial thought: BisectPerformanceMetrics is a mas
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
| 876 bad_greater_than_good: Whether the result value at the "bad" revision is | 876 self.revision_data = {} |
| 877 numerically greater than the result value at the "good" revision. | 877 self.error = None |
| 878 | 878 self._source_control = source_control |
| 879 Returns: | 879 |
| 880 A list of [current_rev, previous_rev, confidence] for other places where | 880 @staticmethod |
| 881 there may have been a regression. | 881 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good): |
| 882 """ | 882 """Compiles a list of other possible regressions from the revision data. |
| 883 other_regressions = [] | 883 |
| 884 previous_values = [] | 884 Args: |
| 885 previous_id = None | 885 revision_data_sorted: Sorted list of (revision, revision data) pairs. |
| 886 for current_id, current_data in revision_data_sorted: | 886 bad_greater_than_good: Whether the result value at the "bad" revision is |
| 887 current_values = current_data['value'] | 887 numerically greater than the result value at the "good" revision. |
| 888 if current_values: | 888 |
| 889 current_values = current_values['values'] | 889 Returns: |
| 890 if previous_values: | 890 A list of [current_rev, previous_rev, confidence] for other places where |
| 891 confidence = ConfidenceScore(previous_values, [current_values]) | 891 there may have been a regression. |
| 892 mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) | 892 """ |
| 893 mean_of_current_runs = math_utils.Mean(current_values) | 893 other_regressions = [] |
| 894 | 894 previous_values = [] |
| 895 # Check that the potential regression is in the same direction as | 895 previous_id = None |
| 896 # the overall regression. If the mean of the previous runs < the | 896 for current_id, current_data in revision_data_sorted: |
| 897 # mean of the current runs, this local regression is in same | 897 current_values = current_data['value'] |
| 898 # direction. | 898 if current_values: |
| 899 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs | 899 current_values = current_values['values'] |
| 900 is_same_direction = (prev_less_than_current if | 900 if previous_values: |
| 901 bad_greater_than_good else not prev_less_than_current) | 901 confidence = ConfidenceScore(previous_values, [current_values]) |
| 902 | 902 mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) |
| 903 # Only report potential regressions with high confidence. | 903 mean_of_current_runs = math_utils.Mean(current_values) |
| 904 if is_same_direction and confidence > 50: | 904 |
| 905 other_regressions.append([current_id, previous_id, confidence]) | 905 # Check that the potential regression is in the same direction as |
| 906 previous_values.append(current_values) | 906 # the overall regression. If the mean of the previous runs < the |
| 907 previous_id = current_id | 907 # mean of the current runs, this local regression is in same |
| 908 return other_regressions | 908 # direction. |
| 909 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs | |
| 910 is_same_direction = (prev_less_than_current if | |
| 911 bad_greater_than_good else not prev_less_than_current) | |
| 912 | |
| 913 # Only report potential regressions with high confidence. | |
| 914 if is_same_direction and confidence > 50: | |
| 915 other_regressions.append([current_id, previous_id, confidence]) | |
| 916 previous_values.append(current_values) | |
| 917 previous_id = current_id | |
| 918 return other_regressions | |
| 919 | |
| 920 def GetResultsDict(self): | |
| 921 """Returns a dictionary with the following fields | |
|
qyearsley
2014/09/19 07:35:16
The first line of a docstring is generally a self-
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
| 922 | |
| 923 'first_working_revision': First good revision. | |
| 924 'last_broken_revision': Last bad revision. | |
| 925 'culprit_revisions': A list of revisions, which contain the bad change | |
| 926 introducing the failure. | |
| 927 'other_regressions': A list of tuples representing other regressions, which | |
| 928 may have occured. | |
| 929 'regression_size': For performance bisects, this is a relative change of the | |
| 930 mean metric value. For other bisects this field always | |
| 931 contains 'zero-to-nonzero'. | |
| 932 'regression_std_err': For performance bisects, it is a pooled standard | |
| 933 error for groups of good and bad runs. Not used for | |
| 934 other bisects. | |
| 935 'confidence': For performance bisects, it is a confidence that the good and | |
| 936 bad runs are distinct groups. Not used for non-performance | |
| 937 bisects. | |
|
qyearsley
2014/09/19 07:35:16
I think this formatting might look better if line
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
| 938 | |
| 939 'revision_data_stored': dict mapping revision ids to data about that | |
| 940 revision. Each piece of revision data consists of a dict with the | |
| 941 following keys: | |
| 942 | |
| 943 'passed': Represents whether the performance test was successful at | |
| 944 that revision. Possible values include: 1 (passed), 0 (failed), | |
| 945 '?' (skipped), 'F' (build failed). | |
| 946 'depot': The depot that this revision is from (i.e. WebKit) | |
| 947 'external': If the revision is a 'src' revision, 'external' contains | |
| 948 the revisions of each of the external libraries. | |
| 949 'sort': A sort value for sorting the dict in order of commits. | |
| 950 | |
| 951 For example: | |
| 952 { | |
| 953 'CL #1': | |
| 954 { | |
| 955 'passed': False, | |
| 956 'depot': 'chromium', | |
| 957 'external': None, | |
| 958 'sort': 0 | |
| 959 } | |
| 960 } | |
| 961 """ | |
| 962 revision_data_sorted = sorted(self.revision_data.iteritems(), | |
| 963 key = lambda x: x[1]['sort']) | |
| 964 | |
| 965 # Find range where it possibly broke. | |
| 966 first_working_revision = None | |
| 967 first_working_revision_index = -1 | |
| 968 last_broken_revision = None | |
| 969 last_broken_revision_index = -1 | |
| 970 | |
| 971 culprit_revisions = [] | |
| 972 other_regressions = [] | |
| 973 regression_size = 0.0 | |
| 974 regression_std_err = 0.0 | |
| 975 confidence = 0.0 | |
| 976 | |
| 977 for i in xrange(len(revision_data_sorted)): | |
| 978 k, v = revision_data_sorted[i] | |
| 979 if v['passed'] == 1: | |
| 980 if not first_working_revision: | |
| 981 first_working_revision = k | |
| 982 first_working_revision_index = i | |
| 983 | |
| 984 if not v['passed']: | |
| 985 last_broken_revision = k | |
| 986 last_broken_revision_index = i | |
| 987 | |
| 988 if last_broken_revision != None and first_working_revision != None: | |
| 989 broken_means = [] | |
| 990 for i in xrange(0, last_broken_revision_index + 1): | |
| 991 if revision_data_sorted[i][1]['value']: | |
| 992 broken_means.append(revision_data_sorted[i][1]['value']['values']) | |
| 993 | |
| 994 working_means = [] | |
| 995 for i in xrange(first_working_revision_index, len(revision_data_sorted)): | |
| 996 if revision_data_sorted[i][1]['value']: | |
| 997 working_means.append(revision_data_sorted[i][1]['value']['values']) | |
| 998 | |
| 999 # Flatten the lists to calculate mean of all values. | |
| 1000 working_mean = sum(working_means, []) | |
| 1001 broken_mean = sum(broken_means, []) | |
| 1002 | |
| 1003 # Calculate the approximate size of the regression | |
| 1004 mean_of_bad_runs = math_utils.Mean(broken_mean) | |
| 1005 mean_of_good_runs = math_utils.Mean(working_mean) | |
| 1006 | |
| 1007 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs, | |
| 1008 mean_of_bad_runs) | |
| 1009 if math.isnan(regression_size): | |
| 1010 regression_size = 'zero-to-nonzero' | |
| 1011 | |
| 1012 regression_std_err = math.fabs(math_utils.PooledStandardError( | |
| 1013 [working_mean, broken_mean]) / | |
| 1014 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0 | |
| 1015 | |
| 1016 # Give a "confidence" in the bisect. At the moment we use how distinct the | |
| 1017 # values are before and after the last broken revision, and how noisy the | |
| 1018 # overall graph is. | |
| 1019 confidence = ConfidenceScore(working_means, broken_means) | |
| 1020 | |
| 1021 culprit_revisions = [] | |
| 1022 | |
| 1023 cwd = os.getcwd() | |
| 1024 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory( | |
| 1025 self.revision_data[last_broken_revision]['depot']) | |
| 1026 | |
| 1027 if self.revision_data[last_broken_revision]['depot'] == 'cros': | |
| 1028 # Want to get a list of all the commits and what depots they belong | |
| 1029 # to so that we can grab info about each. | |
| 1030 cmd = ['repo', 'forall', '-c', | |
| 1031 'pwd ; git log --pretty=oneline --before=%d --after=%d' % ( | |
| 1032 last_broken_revision, first_working_revision + 1)] | |
| 1033 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd) | |
| 1034 | |
| 1035 changes = [] | |
| 1036 assert not return_code, ('An error occurred while running ' | |
| 1037 '"%s"' % ' '.join(cmd)) | |
| 1038 last_depot = None | |
| 1039 cwd = os.getcwd() | |
| 1040 for l in output.split('\n'): | |
| 1041 if l: | |
| 1042 # Output will be in form: | |
| 1043 # /path_to_depot | |
| 1044 # /path_to_other_depot | |
| 1045 # <SHA1> | |
| 1046 # /path_again | |
| 1047 # <SHA1> | |
| 1048 # etc. | |
| 1049 if l[0] == '/': | |
| 1050 last_depot = l | |
| 1051 else: | |
| 1052 contents = l.split(' ') | |
| 1053 if len(contents) > 1: | |
| 1054 changes.append([last_depot, contents[0]]) | |
| 1055 for c in changes: | |
| 1056 os.chdir(c[0]) | |
| 1057 info = self._source_control.QueryRevisionInfo(c[1]) | |
| 1058 culprit_revisions.append((c[1], info, None)) | |
| 1059 else: | |
| 1060 for i in xrange(last_broken_revision_index, len(revision_data_sorted)): | |
| 1061 k, v = revision_data_sorted[i] | |
| 1062 if k == first_working_revision: | |
| 1063 break | |
| 1064 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory(v['depot']) | |
| 1065 info = self._source_control.QueryRevisionInfo(k) | |
| 1066 culprit_revisions.append((k, info, v['depot'])) | |
| 1067 os.chdir(cwd) | |
| 1068 | |
| 1069 # Check for any other possible regression ranges. | |
| 1070 other_regressions = self._FindOtherRegressions( | |
| 1071 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs) | |
| 1072 | |
| 1073 return { | |
| 1074 'first_working_revision': first_working_revision, | |
| 1075 'last_broken_revision': last_broken_revision, | |
| 1076 'culprit_revisions': culprit_revisions, | |
| 1077 'other_regressions': other_regressions, | |
| 1078 'regression_size': regression_size, | |
| 1079 'regression_std_err': regression_std_err, | |
| 1080 'confidence': confidence, | |
| 1081 'revision_data_sorted': revision_data_sorted | |
| 1082 } | |
| 909 | 1083 |
| 910 | 1084 |
| 911 class BisectPerformanceMetrics(object): | 1085 class BisectPerformanceMetrics(object): |
| 912 """This class contains functionality to perform a bisection of a range of | 1086 """This class contains functionality to perform a bisection of a range of |
| 913 revisions to narrow down where performance regressions may have occurred. | 1087 revisions to narrow down where performance regressions may have occurred. |
| 914 | 1088 |
| 915 The main entry-point is the Run method. | 1089 The main entry-point is the Run method. |
| 916 """ | 1090 """ |
| 917 | 1091 |
| 918 def __init__(self, source_control, opts): | 1092 def __init__(self, source_control, opts): |
| (...skipping 1402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2321 intermediate revisions to determine the CL where the performance regression | 2495 intermediate revisions to determine the CL where the performance regression |
| 2322 occurred. | 2496 occurred. |
| 2323 | 2497 |
| 2324 Args: | 2498 Args: |
| 2325 command_to_run: Specify the command to execute the performance test. | 2499 command_to_run: Specify the command to execute the performance test. |
| 2326 good_revision: Number/tag of the known good revision. | 2500 good_revision: Number/tag of the known good revision. |
| 2327 bad_revision: Number/tag of the known bad revision. | 2501 bad_revision: Number/tag of the known bad revision. |
| 2328 metric: The performance metric to monitor. | 2502 metric: The performance metric to monitor. |
| 2329 | 2503 |
| 2330 Returns: | 2504 Returns: |
| 2331 A dict with 2 members, 'revision_data' and 'error'. On success, | 2505 A BisectResults object. |
| 2332 'revision_data' will contain a dict mapping revision ids to | |
| 2333 data about that revision. Each piece of revision data consists of a | |
| 2334 dict with the following keys: | |
| 2335 | |
| 2336 'passed': Represents whether the performance test was successful at | |
| 2337 that revision. Possible values include: 1 (passed), 0 (failed), | |
| 2338 '?' (skipped), 'F' (build failed). | |
| 2339 'depot': The depot that this revision is from (i.e. WebKit) | |
| 2340 'external': If the revision is a 'src' revision, 'external' contains | |
| 2341 the revisions of each of the external libraries. | |
| 2342 'sort': A sort value for sorting the dict in order of commits. | |
| 2343 | |
| 2344 For example: | |
| 2345 { | |
| 2346 'error':None, | |
| 2347 'revision_data': | |
| 2348 { | |
| 2349 'CL #1': | |
| 2350 { | |
| 2351 'passed': False, | |
| 2352 'depot': 'chromium', | |
| 2353 'external': None, | |
| 2354 'sort': 0 | |
| 2355 } | |
| 2356 } | |
| 2357 } | |
| 2358 | |
| 2359 If an error occurred, the 'error' field will contain the message and | |
| 2360 'revision_data' will be empty. | |
| 2361 """ | 2506 """ |
| 2362 results = { | 2507 results = BisectResults(self, self.source_control) |
| 2363 'revision_data' : {}, | |
| 2364 'error' : None, | |
| 2365 } | |
| 2366 | 2508 |
| 2367 # Choose depot to bisect first | 2509 # Choose depot to bisect first |
| 2368 target_depot = 'chromium' | 2510 target_depot = 'chromium' |
| 2369 if self.opts.target_platform == 'cros': | 2511 if self.opts.target_platform == 'cros': |
| 2370 target_depot = 'cros' | 2512 target_depot = 'cros' |
| 2371 elif self.opts.target_platform == 'android-chrome': | 2513 elif self.opts.target_platform == 'android-chrome': |
| 2372 target_depot = 'android-chrome' | 2514 target_depot = 'android-chrome' |
| 2373 | 2515 |
| 2374 cwd = os.getcwd() | 2516 cwd = os.getcwd() |
| 2375 self.ChangeToDepotWorkingDirectory(target_depot) | 2517 self.ChangeToDepotWorkingDirectory(target_depot) |
| 2376 | 2518 |
| 2377 # If they passed SVN revisions, we can try match them to git SHA1 hashes. | 2519 # If they passed SVN revisions, we can try match them to git SHA1 hashes. |
| 2378 bad_revision = self.source_control.ResolveToRevision( | 2520 bad_revision = self.source_control.ResolveToRevision( |
| 2379 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) | 2521 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) |
| 2380 good_revision = self.source_control.ResolveToRevision( | 2522 good_revision = self.source_control.ResolveToRevision( |
| 2381 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) | 2523 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) |
| 2382 | 2524 |
| 2383 os.chdir(cwd) | 2525 os.chdir(cwd) |
| 2384 if bad_revision is None: | 2526 if bad_revision is None: |
| 2385 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in | 2527 results.error = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in |
| 2386 return results | 2528 return results |
| 2387 | 2529 |
| 2388 if good_revision is None: | 2530 if good_revision is None: |
| 2389 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in | 2531 results.error = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in |
| 2390 return results | 2532 return results |
| 2391 | 2533 |
| 2392 # Check that they didn't accidentally swap good and bad revisions. | 2534 # Check that they didn't accidentally swap good and bad revisions. |
| 2393 if not self.CheckIfRevisionsInProperOrder( | 2535 if not self.CheckIfRevisionsInProperOrder( |
| 2394 target_depot, good_revision, bad_revision): | 2536 target_depot, good_revision, bad_revision): |
| 2395 results['error'] = ('bad_revision < good_revision, did you swap these ' | 2537 results.error = ('bad_revision < good_revision, did you swap these ' |
| 2396 'by mistake?') | 2538 'by mistake?') |
| 2397 return results | 2539 return results |
| 2398 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( | 2540 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( |
| 2399 bad_revision, good_revision, good_revision_in) | 2541 bad_revision, good_revision, good_revision_in) |
| 2400 if self.opts.output_buildbot_annotations: | 2542 if self.opts.output_buildbot_annotations: |
| 2401 bisect_utils.OutputAnnotationStepStart('Gathering Revisions') | 2543 bisect_utils.OutputAnnotationStepStart('Gathering Revisions') |
| 2402 | 2544 |
| 2403 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) | 2545 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) |
| 2404 if cannot_bisect: | 2546 if cannot_bisect: |
| 2405 results['error'] = cannot_bisect.get('error') | 2547 results.error = cannot_bisect.get('error') |
| 2406 return results | 2548 return results |
| 2407 | 2549 |
| 2408 print 'Gathering revision range for bisection.' | 2550 print 'Gathering revision range for bisection.' |
| 2409 # Retrieve a list of revisions to do bisection on. | 2551 # Retrieve a list of revisions to do bisection on. |
| 2410 src_revision_list = self.GetRevisionList( | 2552 src_revision_list = self.GetRevisionList( |
| 2411 target_depot, bad_revision, good_revision) | 2553 target_depot, bad_revision, good_revision) |
| 2412 | 2554 |
| 2413 if self.opts.output_buildbot_annotations: | 2555 if self.opts.output_buildbot_annotations: |
| 2414 bisect_utils.OutputAnnotationStepClosed() | 2556 bisect_utils.OutputAnnotationStepClosed() |
| 2415 | 2557 |
| 2416 if src_revision_list: | 2558 if src_revision_list: |
| 2417 # revision_data will store information about a revision such as the | 2559 # revision_data will store information about a revision such as the |
| 2418 # depot it came from, the webkit/V8 revision at that time, | 2560 # depot it came from, the webkit/V8 revision at that time, |
| 2419 # performance timing, build state, etc... | 2561 # performance timing, build state, etc... |
| 2420 revision_data = results['revision_data'] | 2562 revision_data = results.revision_data |
| 2421 | 2563 |
| 2422 # revision_list is the list we're binary searching through at the moment. | 2564 # revision_list is the list we're binary searching through at the moment. |
| 2423 revision_list = [] | 2565 revision_list = [] |
| 2424 | 2566 |
| 2425 sort_key_ids = 0 | 2567 sort_key_ids = 0 |
| 2426 | 2568 |
| 2427 for current_revision_id in src_revision_list: | 2569 for current_revision_id in src_revision_list: |
| 2428 sort_key_ids += 1 | 2570 sort_key_ids += 1 |
| 2429 | 2571 |
| 2430 revision_data[current_revision_id] = { | 2572 revision_data[current_revision_id] = { |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 2453 bad_results, good_results = self.GatherReferenceValues(good_revision, | 2595 bad_results, good_results = self.GatherReferenceValues(good_revision, |
| 2454 bad_revision, | 2596 bad_revision, |
| 2455 command_to_run, | 2597 command_to_run, |
| 2456 metric, | 2598 metric, |
| 2457 target_depot) | 2599 target_depot) |
| 2458 | 2600 |
| 2459 if self.opts.output_buildbot_annotations: | 2601 if self.opts.output_buildbot_annotations: |
| 2460 bisect_utils.OutputAnnotationStepClosed() | 2602 bisect_utils.OutputAnnotationStepClosed() |
| 2461 | 2603 |
| 2462 if bad_results[1]: | 2604 if bad_results[1]: |
| 2463 results['error'] = ('An error occurred while building and running ' | 2605 results.error = ('An error occurred while building and running ' |
| 2464 'the \'bad\' reference value. The bisect cannot continue without ' | 2606 'the \'bad\' reference value. The bisect cannot continue without ' |
| 2465 'a working \'bad\' revision to start from.\n\nError: %s' % | 2607 'a working \'bad\' revision to start from.\n\nError: %s' % |
| 2466 bad_results[0]) | 2608 bad_results[0]) |
| 2467 return results | 2609 return results |
| 2468 | 2610 |
| 2469 if good_results[1]: | 2611 if good_results[1]: |
| 2470 results['error'] = ('An error occurred while building and running ' | 2612 results.error = ('An error occurred while building and running ' |
| 2471 'the \'good\' reference value. The bisect cannot continue without ' | 2613 'the \'good\' reference value. The bisect cannot continue without ' |
| 2472 'a working \'good\' revision to start from.\n\nError: %s' % | 2614 'a working \'good\' revision to start from.\n\nError: %s' % |
| 2473 good_results[0]) | 2615 good_results[0]) |
| 2474 return results | 2616 return results |
| 2475 | 2617 |
| 2476 | 2618 |
| 2477 # We need these reference values to determine if later runs should be | 2619 # We need these reference values to determine if later runs should be |
| 2478 # classified as pass or fail. | 2620 # classified as pass or fail. |
| 2479 known_bad_value = bad_results[0] | 2621 known_bad_value = bad_results[0] |
| 2480 known_good_value = good_results[0] | 2622 known_good_value = good_results[0] |
| 2481 | 2623 |
| 2482 # Can just mark the good and bad revisions explicitly here since we | 2624 # Can just mark the good and bad revisions explicitly here since we |
| 2483 # already know the results. | 2625 # already know the results. |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2528 break | 2670 break |
| 2529 | 2671 |
| 2530 earliest_revision = max_revision_data['external'][external_depot] | 2672 earliest_revision = max_revision_data['external'][external_depot] |
| 2531 latest_revision = min_revision_data['external'][external_depot] | 2673 latest_revision = min_revision_data['external'][external_depot] |
| 2532 | 2674 |
| 2533 new_revision_list = self.PrepareToBisectOnDepot( | 2675 new_revision_list = self.PrepareToBisectOnDepot( |
| 2534 external_depot, latest_revision, earliest_revision, | 2676 external_depot, latest_revision, earliest_revision, |
| 2535 previous_revision) | 2677 previous_revision) |
| 2536 | 2678 |
| 2537 if not new_revision_list: | 2679 if not new_revision_list: |
| 2538 results['error'] = ('An error occurred attempting to retrieve ' | 2680 results.error = ('An error occurred attempting to retrieve ' |
| 2539 'revision range: [%s..%s]' % | 2681 'revision range: [%s..%s]' % |
| 2540 (earliest_revision, latest_revision)) | 2682 (earliest_revision, latest_revision)) |
| 2541 return results | 2683 return results |
| 2542 | 2684 |
| 2543 _AddRevisionsIntoRevisionData( | 2685 _AddRevisionsIntoRevisionData( |
| 2544 new_revision_list, external_depot, min_revision_data['sort'], | 2686 new_revision_list, external_depot, min_revision_data['sort'], |
| 2545 revision_data) | 2687 revision_data) |
| 2546 | 2688 |
| 2547 # Reset the bisection and perform it on the newly inserted | 2689 # Reset the bisection and perform it on the newly inserted |
| 2548 # changelists. | 2690 # changelists. |
| 2549 revision_list = new_revision_list | 2691 revision_list = new_revision_list |
| 2550 min_revision = 0 | 2692 min_revision = 0 |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2609 # If the build is broken, remove it and redo search. | 2751 # If the build is broken, remove it and redo search. |
| 2610 revision_list.pop(next_revision_index) | 2752 revision_list.pop(next_revision_index) |
| 2611 | 2753 |
| 2612 max_revision -= 1 | 2754 max_revision -= 1 |
| 2613 | 2755 |
| 2614 if self.opts.output_buildbot_annotations: | 2756 if self.opts.output_buildbot_annotations: |
| 2615 self._PrintPartialResults(results) | 2757 self._PrintPartialResults(results) |
| 2616 bisect_utils.OutputAnnotationStepClosed() | 2758 bisect_utils.OutputAnnotationStepClosed() |
| 2617 else: | 2759 else: |
| 2618 # Weren't able to sync and retrieve the revision range. | 2760 # Weren't able to sync and retrieve the revision range. |
| 2619 results['error'] = ('An error occurred attempting to retrieve revision ' | 2761 results.error = ('An error occurred attempting to retrieve revision ' |
| 2620 'range: [%s..%s]' % (good_revision, bad_revision)) | 2762 'range: [%s..%s]' % (good_revision, bad_revision)) |
| 2621 | 2763 |
| 2622 return results | 2764 return results |
| 2623 | 2765 |
| 2624 def _PrintPartialResults(self, results_dict): | 2766 def _PrintPartialResults(self, results): |
| 2625 revision_data = results_dict['revision_data'] | 2767 results_dict = results.GetResultsDict() |
| 2626 revision_data_sorted = sorted(revision_data.iteritems(), | 2768 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'], |
| 2627 key = lambda x: x[1]['sort']) | |
| 2628 results_dict = self._GetResultsDict(revision_data, revision_data_sorted) | |
| 2629 | |
| 2630 self._PrintTestedCommitsTable(revision_data_sorted, | |
| 2631 results_dict['first_working_revision'], | 2769 results_dict['first_working_revision'], |
| 2632 results_dict['last_broken_revision'], | 2770 results_dict['last_broken_revision'], |
| 2633 100, final_step=False) | 2771 100, final_step=False) |
| 2634 | 2772 |
| 2635 def _ConfidenceLevelStatus(self, results_dict): | 2773 def _ConfidenceLevelStatus(self, results_dict): |
| 2636 if not results_dict['confidence']: | 2774 if not results_dict['confidence']: |
| 2637 return None | 2775 return None |
| 2638 confidence_status = 'Successful with %(level)s confidence%(warning)s.' | 2776 confidence_status = 'Successful with %(level)s confidence%(warning)s.' |
| 2639 if results_dict['confidence'] >= HIGH_CONFIDENCE: | 2777 if results_dict['confidence'] >= HIGH_CONFIDENCE: |
| 2640 level = 'high' | 2778 level = 'high' |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2793 if not previous_link: | 2931 if not previous_link: |
| 2794 previous_link = previous_id | 2932 previous_link = previous_id |
| 2795 | 2933 |
| 2796 print ' %8s %70s %s' % ( | 2934 print ' %8s %70s %s' % ( |
| 2797 current_data['depot'], current_link, | 2935 current_data['depot'], current_link, |
| 2798 ('%d%%' % confidence).center(10, ' ')) | 2936 ('%d%%' % confidence).center(10, ' ')) |
| 2799 print ' %8s %70s' % ( | 2937 print ' %8s %70s' % ( |
| 2800 previous_data['depot'], previous_link) | 2938 previous_data['depot'], previous_link) |
| 2801 print | 2939 print |
| 2802 | 2940 |
| 2803 def _GetResultsDict(self, revision_data, revision_data_sorted): | |
| 2804 # Find range where it possibly broke. | |
| 2805 first_working_revision = None | |
| 2806 first_working_revision_index = -1 | |
| 2807 last_broken_revision = None | |
| 2808 last_broken_revision_index = -1 | |
| 2809 | |
| 2810 culprit_revisions = [] | |
| 2811 other_regressions = [] | |
| 2812 regression_size = 0.0 | |
| 2813 regression_std_err = 0.0 | |
| 2814 confidence = 0.0 | |
| 2815 | |
| 2816 for i in xrange(len(revision_data_sorted)): | |
| 2817 k, v = revision_data_sorted[i] | |
| 2818 if v['passed'] == 1: | |
| 2819 if not first_working_revision: | |
| 2820 first_working_revision = k | |
| 2821 first_working_revision_index = i | |
| 2822 | |
| 2823 if not v['passed']: | |
| 2824 last_broken_revision = k | |
| 2825 last_broken_revision_index = i | |
| 2826 | |
| 2827 if last_broken_revision != None and first_working_revision != None: | |
| 2828 broken_means = [] | |
| 2829 for i in xrange(0, last_broken_revision_index + 1): | |
| 2830 if revision_data_sorted[i][1]['value']: | |
| 2831 broken_means.append(revision_data_sorted[i][1]['value']['values']) | |
| 2832 | |
| 2833 working_means = [] | |
| 2834 for i in xrange(first_working_revision_index, len(revision_data_sorted)): | |
| 2835 if revision_data_sorted[i][1]['value']: | |
| 2836 working_means.append(revision_data_sorted[i][1]['value']['values']) | |
| 2837 | |
| 2838 # Flatten the lists to calculate mean of all values. | |
| 2839 working_mean = sum(working_means, []) | |
| 2840 broken_mean = sum(broken_means, []) | |
| 2841 | |
| 2842 # Calculate the approximate size of the regression | |
| 2843 mean_of_bad_runs = math_utils.Mean(broken_mean) | |
| 2844 mean_of_good_runs = math_utils.Mean(working_mean) | |
| 2845 | |
| 2846 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs, | |
| 2847 mean_of_bad_runs) | |
| 2848 if math.isnan(regression_size): | |
| 2849 regression_size = 'zero-to-nonzero' | |
| 2850 | |
| 2851 regression_std_err = math.fabs(math_utils.PooledStandardError( | |
| 2852 [working_mean, broken_mean]) / | |
| 2853 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0 | |
| 2854 | |
| 2855 # Give a "confidence" in the bisect. At the moment we use how distinct the | |
| 2856 # values are before and after the last broken revision, and how noisy the | |
| 2857 # overall graph is. | |
| 2858 confidence = ConfidenceScore(working_means, broken_means) | |
| 2859 | |
| 2860 culprit_revisions = [] | |
| 2861 | |
| 2862 cwd = os.getcwd() | |
| 2863 self.ChangeToDepotWorkingDirectory( | |
| 2864 revision_data[last_broken_revision]['depot']) | |
| 2865 | |
| 2866 if revision_data[last_broken_revision]['depot'] == 'cros': | |
| 2867 # Want to get a list of all the commits and what depots they belong | |
| 2868 # to so that we can grab info about each. | |
| 2869 cmd = ['repo', 'forall', '-c', | |
| 2870 'pwd ; git log --pretty=oneline --before=%d --after=%d' % ( | |
| 2871 last_broken_revision, first_working_revision + 1)] | |
| 2872 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd) | |
| 2873 | |
| 2874 changes = [] | |
| 2875 assert not return_code, ('An error occurred while running ' | |
| 2876 '"%s"' % ' '.join(cmd)) | |
| 2877 last_depot = None | |
| 2878 cwd = os.getcwd() | |
| 2879 for l in output.split('\n'): | |
| 2880 if l: | |
| 2881 # Output will be in form: | |
| 2882 # /path_to_depot | |
| 2883 # /path_to_other_depot | |
| 2884 # <SHA1> | |
| 2885 # /path_again | |
| 2886 # <SHA1> | |
| 2887 # etc. | |
| 2888 if l[0] == '/': | |
| 2889 last_depot = l | |
| 2890 else: | |
| 2891 contents = l.split(' ') | |
| 2892 if len(contents) > 1: | |
| 2893 changes.append([last_depot, contents[0]]) | |
| 2894 for c in changes: | |
| 2895 os.chdir(c[0]) | |
| 2896 info = self.source_control.QueryRevisionInfo(c[1]) | |
| 2897 culprit_revisions.append((c[1], info, None)) | |
| 2898 else: | |
| 2899 for i in xrange(last_broken_revision_index, len(revision_data_sorted)): | |
| 2900 k, v = revision_data_sorted[i] | |
| 2901 if k == first_working_revision: | |
| 2902 break | |
| 2903 self.ChangeToDepotWorkingDirectory(v['depot']) | |
| 2904 info = self.source_control.QueryRevisionInfo(k) | |
| 2905 culprit_revisions.append((k, info, v['depot'])) | |
| 2906 os.chdir(cwd) | |
| 2907 | |
| 2908 # Check for any other possible regression ranges. | |
| 2909 other_regressions = _FindOtherRegressions( | |
| 2910 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs) | |
| 2911 | |
| 2912 return { | |
| 2913 'first_working_revision': first_working_revision, | |
| 2914 'last_broken_revision': last_broken_revision, | |
| 2915 'culprit_revisions': culprit_revisions, | |
| 2916 'other_regressions': other_regressions, | |
| 2917 'regression_size': regression_size, | |
| 2918 'regression_std_err': regression_std_err, | |
| 2919 'confidence': confidence, | |
| 2920 } | |
| 2921 | |
| 2922 def _CheckForWarnings(self, results_dict): | 2941 def _CheckForWarnings(self, results_dict): |
| 2923 if len(results_dict['culprit_revisions']) > 1: | 2942 if len(results_dict['culprit_revisions']) > 1: |
| 2924 self.warnings.append('Due to build errors, regression range could ' | 2943 self.warnings.append('Due to build errors, regression range could ' |
| 2925 'not be narrowed down to a single commit.') | 2944 'not be narrowed down to a single commit.') |
| 2926 if self.opts.repeat_test_count == 1: | 2945 if self.opts.repeat_test_count == 1: |
| 2927 self.warnings.append('Tests were only set to run once. This may ' | 2946 self.warnings.append('Tests were only set to run once. This may ' |
| 2928 'be insufficient to get meaningful results.') | 2947 'be insufficient to get meaningful results.') |
| 2929 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: | 2948 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: |
| 2930 self.warnings.append('Confidence is not high. Try bisecting again ' | 2949 self.warnings.append('Confidence is not high. Try bisecting again ' |
| 2931 'with increased repeat_count, larger range, or ' | 2950 'with increased repeat_count, larger range, or ' |
| 2932 'on another metric.') | 2951 'on another metric.') |
| 2933 if not results_dict['confidence']: | 2952 if not results_dict['confidence']: |
| 2934 self.warnings.append('Confidence score is 0%. Try bisecting again on ' | 2953 self.warnings.append('Confidence score is 0%. Try bisecting again on ' |
| 2935 'another platform or another metric.') | 2954 'another platform or another metric.') |
| 2936 | 2955 |
| 2937 def FormatAndPrintResults(self, bisect_results): | 2956 def FormatAndPrintResults(self, bisect_results): |
| 2938 """Prints the results from a bisection run in a readable format. | 2957 """Prints the results from a bisection run in a readable format. |
| 2939 | 2958 |
| 2940 Args: | 2959 Args: |
| 2941 bisect_results: The results from a bisection test run. | 2960 bisect_results: The results from a bisection test run. |
| 2942 """ | 2961 """ |
| 2943 revision_data = bisect_results['revision_data'] | 2962 results_dict = bisect_results.GetResultsDict() |
| 2944 revision_data_sorted = sorted(revision_data.iteritems(), | |
| 2945 key = lambda x: x[1]['sort']) | |
| 2946 results_dict = self._GetResultsDict(revision_data, revision_data_sorted) | |
| 2947 | 2963 |
| 2948 self._CheckForWarnings(results_dict) | 2964 self._CheckForWarnings(results_dict) |
| 2949 | 2965 |
| 2950 if self.opts.output_buildbot_annotations: | 2966 if self.opts.output_buildbot_annotations: |
| 2951 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision') | 2967 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision') |
| 2952 | 2968 |
| 2953 print | 2969 print |
| 2954 print 'Full results of bisection:' | 2970 print 'Full results of bisection:' |
| 2955 for current_id, current_data in revision_data_sorted: | 2971 for current_id, current_data in results_dict['revision_data_sorted']: |
| 2956 build_status = current_data['passed'] | 2972 build_status = current_data['passed'] |
| 2957 | 2973 |
| 2958 if type(build_status) is bool: | 2974 if type(build_status) is bool: |
| 2959 if build_status: | 2975 if build_status: |
| 2960 build_status = 'Good' | 2976 build_status = 'Good' |
| 2961 else: | 2977 else: |
| 2962 build_status = 'Bad' | 2978 build_status = 'Bad' |
| 2963 | 2979 |
| 2964 print ' %20s %40s %s' % (current_data['depot'], | 2980 print ' %20s %40s %s' % (current_data['depot'], |
| 2965 current_id, build_status) | 2981 current_id, build_status) |
| 2966 print | 2982 print |
| 2967 | 2983 |
| 2968 if self.opts.output_buildbot_annotations: | 2984 if self.opts.output_buildbot_annotations: |
| 2969 bisect_utils.OutputAnnotationStepClosed() | 2985 bisect_utils.OutputAnnotationStepClosed() |
| 2970 # The perf dashboard scrapes the "results" step in order to comment on | 2986 # The perf dashboard scrapes the "results" step in order to comment on |
| 2971 # bugs. If you change this, please update the perf dashboard as well. | 2987 # bugs. If you change this, please update the perf dashboard as well. |
| 2972 bisect_utils.OutputAnnotationStepStart('Results') | 2988 bisect_utils.OutputAnnotationStepStart('Results') |
| 2973 | 2989 |
| 2974 self._PrintBanner(results_dict) | 2990 self._PrintBanner(results_dict) |
| 2975 self._PrintWarnings() | 2991 self._PrintWarnings() |
| 2976 | 2992 |
| 2977 if results_dict['culprit_revisions'] and results_dict['confidence']: | 2993 if results_dict['culprit_revisions'] and results_dict['confidence']: |
| 2978 for culprit in results_dict['culprit_revisions']: | 2994 for culprit in results_dict['culprit_revisions']: |
| 2979 cl, info, depot = culprit | 2995 cl, info, depot = culprit |
| 2980 self._PrintRevisionInfo(cl, info, depot) | 2996 self._PrintRevisionInfo(cl, info, depot) |
| 2981 if results_dict['other_regressions']: | 2997 if results_dict['other_regressions']: |
| 2982 self._PrintOtherRegressions(results_dict['other_regressions'], | 2998 self._PrintOtherRegressions(results_dict['other_regressions'], |
| 2983 revision_data) | 2999 results_dict['revision_data']) |
| 2984 self._PrintTestedCommitsTable(revision_data_sorted, | 3000 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'], |
| 2985 results_dict['first_working_revision'], | 3001 results_dict['first_working_revision'], |
| 2986 results_dict['last_broken_revision'], | 3002 results_dict['last_broken_revision'], |
| 2987 results_dict['confidence']) | 3003 results_dict['confidence']) |
| 2988 _PrintStepTime(revision_data_sorted) | 3004 _PrintStepTime(results_dict['revision_data_sorted']) |
| 2989 self._PrintReproSteps() | 3005 self._PrintReproSteps() |
| 2990 _PrintThankYou() | 3006 _PrintThankYou() |
| 2991 if self.opts.output_buildbot_annotations: | 3007 if self.opts.output_buildbot_annotations: |
| 2992 bisect_utils.OutputAnnotationStepClosed() | 3008 bisect_utils.OutputAnnotationStepClosed() |
| 2993 | 3009 |
| 2994 def _PrintBanner(self, results_dict): | 3010 def _PrintBanner(self, results_dict): |
| 2995 if self._IsBisectModeReturnCode(): | 3011 if self._IsBisectModeReturnCode(): |
| 2996 metrics = 'N/A' | 3012 metrics = 'N/A' |
| 2997 change = 'Yes' | 3013 change = 'Yes' |
| 2998 else: | 3014 else: |
| (...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3389 if (not source_control.IsInProperBranch() and | 3405 if (not source_control.IsInProperBranch() and |
| 3390 not opts.debug_ignore_sync and | 3406 not opts.debug_ignore_sync and |
| 3391 not opts.working_directory): | 3407 not opts.working_directory): |
| 3392 raise RuntimeError('You must switch to master branch to run bisection.') | 3408 raise RuntimeError('You must switch to master branch to run bisection.') |
| 3393 bisect_test = BisectPerformanceMetrics(source_control, opts) | 3409 bisect_test = BisectPerformanceMetrics(source_control, opts) |
| 3394 try: | 3410 try: |
| 3395 bisect_results = bisect_test.Run(opts.command, | 3411 bisect_results = bisect_test.Run(opts.command, |
| 3396 opts.bad_revision, | 3412 opts.bad_revision, |
| 3397 opts.good_revision, | 3413 opts.good_revision, |
| 3398 opts.metric) | 3414 opts.metric) |
| 3399 if bisect_results['error']: | 3415 if bisect_results.error: |
| 3400 raise RuntimeError(bisect_results['error']) | 3416 raise RuntimeError(bisect_results.error) |
| 3401 bisect_test.FormatAndPrintResults(bisect_results) | 3417 bisect_test.FormatAndPrintResults(bisect_results) |
| 3402 return 0 | 3418 return 0 |
| 3403 finally: | 3419 finally: |
| 3404 bisect_test.PerformCleanup() | 3420 bisect_test.PerformCleanup() |
| 3405 except RuntimeError, e: | 3421 except RuntimeError, e: |
| 3406 if opts.output_buildbot_annotations: | 3422 if opts.output_buildbot_annotations: |
| 3407 # The perf dashboard scrapes the "results" step in order to comment on | 3423 # The perf dashboard scrapes the "results" step in order to comment on |
| 3408 # bugs. If you change this, please update the perf dashboard as well. | 3424 # bugs. If you change this, please update the perf dashboard as well. |
| 3409 bisect_utils.OutputAnnotationStepStart('Results') | 3425 bisect_utils.OutputAnnotationStepStart('Results') |
| 3410 print 'Error: %s' % e.message | 3426 print 'Error: %s' % e.message |
| 3411 if opts.output_buildbot_annotations: | 3427 if opts.output_buildbot_annotations: |
| 3412 bisect_utils.OutputAnnotationStepClosed() | 3428 bisect_utils.OutputAnnotationStepClosed() |
| 3413 return 1 | 3429 return 1 |
| 3414 | 3430 |
| 3415 | 3431 |
| 3416 if __name__ == '__main__': | 3432 if __name__ == '__main__': |
| 3417 sys.exit(main()) | 3433 sys.exit(main()) |
| OLD | NEW |