OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Performance Test Bisect Tool | 6 """Performance Test Bisect Tool |
7 | 7 |
8 This script bisects a series of changelists using binary search. It starts at | 8 This script bisects a series of changelists using binary search. It starts at |
9 a bad revision where a performance metric has regressed, and asks for a last | 9 a bad revision where a performance metric has regressed, and asks for a last |
10 known-good revision. It will then binary search across this revision range by | 10 known-good revision. It will then binary search across this revision range by |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
49 | 49 |
50 sys.path.append(os.path.join( | 50 sys.path.append(os.path.join( |
51 os.path.dirname(__file__), os.path.pardir, 'telemetry')) | 51 os.path.dirname(__file__), os.path.pardir, 'telemetry')) |
52 | 52 |
53 import bisect_utils | 53 import bisect_utils |
54 import builder | 54 import builder |
55 import math_utils | 55 import math_utils |
56 import request_build | 56 import request_build |
57 import source_control as source_control_module | 57 import source_control as source_control_module |
58 import ttest | 58 import ttest |
59 from telemetry.util import cloud_storage | 59 from telemetry.util import cloud_storage # pylint: disable=F0401 |
60 | 60 |
61 # Below is the map of "depot" names to information about each depot. Each depot | 61 # Below is the map of "depot" names to information about each depot. Each depot |
62 # is a repository, and in the process of bisecting, revision ranges in these | 62 # is a repository, and in the process of bisecting, revision ranges in these |
63 # repositories may also be bisected. | 63 # repositories may also be bisected. |
64 # | 64 # |
65 # Each depot information dictionary may contain: | 65 # Each depot information dictionary may contain: |
66 # src: Path to the working directory. | 66 # src: Path to the working directory. |
67 # recurse: True if this repository will get bisected. | 67 # recurse: True if this repository will get bisected. |
68 # depends: A list of other repositories that are actually part of the same | 68 # depends: A list of other repositories that are actually part of the same |
69 # repository in svn. If the repository has any dependent repositories | 69 # repository in svn. If the repository has any dependent repositories |
(...skipping 791 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
861 if step_count: | 861 if step_count: |
862 step_perf_time_avg = step_perf_time_avg / step_count | 862 step_perf_time_avg = step_perf_time_avg / step_count |
863 step_build_time_avg = step_build_time_avg / step_count | 863 step_build_time_avg = step_build_time_avg / step_count |
864 print | 864 print |
865 print 'Average build time : %s' % datetime.timedelta( | 865 print 'Average build time : %s' % datetime.timedelta( |
866 seconds=int(step_build_time_avg)) | 866 seconds=int(step_build_time_avg)) |
867 print 'Average test time : %s' % datetime.timedelta( | 867 print 'Average test time : %s' % datetime.timedelta( |
868 seconds=int(step_perf_time_avg)) | 868 seconds=int(step_perf_time_avg)) |
869 | 869 |
870 | 870 |
871 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good): | 871 class BisectResults(object): |
872 """Compiles a list of other possible regressions from the revision data. | 872 """This class holds the results of the bisect.""" |
873 | 873 |
874 Args: | 874 def __init__(self, bisect_perf_metrics, source_control): |
875 revision_data_sorted: Sorted list of (revision, revision data) pairs. | 875 self._bisect_perf_metrics = bisect_perf_metrics |
qyearsley
2014/09/19 07:35:16
Initial thought: BisectPerformanceMetrics is a mas
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
876 bad_greater_than_good: Whether the result value at the "bad" revision is | 876 self.revision_data = {} |
877 numerically greater than the result value at the "good" revision. | 877 self.error = None |
878 | 878 self._source_control = source_control |
879 Returns: | 879 |
880 A list of [current_rev, previous_rev, confidence] for other places where | 880 @staticmethod |
881 there may have been a regression. | 881 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good): |
882 """ | 882 """Compiles a list of other possible regressions from the revision data. |
883 other_regressions = [] | 883 |
884 previous_values = [] | 884 Args: |
885 previous_id = None | 885 revision_data_sorted: Sorted list of (revision, revision data) pairs. |
886 for current_id, current_data in revision_data_sorted: | 886 bad_greater_than_good: Whether the result value at the "bad" revision is |
887 current_values = current_data['value'] | 887 numerically greater than the result value at the "good" revision. |
888 if current_values: | 888 |
889 current_values = current_values['values'] | 889 Returns: |
890 if previous_values: | 890 A list of [current_rev, previous_rev, confidence] for other places where |
891 confidence = ConfidenceScore(previous_values, [current_values]) | 891 there may have been a regression. |
892 mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) | 892 """ |
893 mean_of_current_runs = math_utils.Mean(current_values) | 893 other_regressions = [] |
894 | 894 previous_values = [] |
895 # Check that the potential regression is in the same direction as | 895 previous_id = None |
896 # the overall regression. If the mean of the previous runs < the | 896 for current_id, current_data in revision_data_sorted: |
897 # mean of the current runs, this local regression is in same | 897 current_values = current_data['value'] |
898 # direction. | 898 if current_values: |
899 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs | 899 current_values = current_values['values'] |
900 is_same_direction = (prev_less_than_current if | 900 if previous_values: |
901 bad_greater_than_good else not prev_less_than_current) | 901 confidence = ConfidenceScore(previous_values, [current_values]) |
902 | 902 mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) |
903 # Only report potential regressions with high confidence. | 903 mean_of_current_runs = math_utils.Mean(current_values) |
904 if is_same_direction and confidence > 50: | 904 |
905 other_regressions.append([current_id, previous_id, confidence]) | 905 # Check that the potential regression is in the same direction as |
906 previous_values.append(current_values) | 906 # the overall regression. If the mean of the previous runs < the |
907 previous_id = current_id | 907 # mean of the current runs, this local regression is in same |
908 return other_regressions | 908 # direction. |
909 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs | |
910 is_same_direction = (prev_less_than_current if | |
911 bad_greater_than_good else not prev_less_than_current) | |
912 | |
913 # Only report potential regressions with high confidence. | |
914 if is_same_direction and confidence > 50: | |
915 other_regressions.append([current_id, previous_id, confidence]) | |
916 previous_values.append(current_values) | |
917 previous_id = current_id | |
918 return other_regressions | |
919 | |
920 def GetResultsDict(self): | |
921 """Returns a dictionary with the following fields | |
qyearsley
2014/09/19 07:35:16
The first line of a docstring is generally a self-
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
922 | |
923 'first_working_revision': First good revision. | |
924 'last_broken_revision': Last bad revision. | |
925 'culprit_revisions': A list of revisions, which contain the bad change | |
926 introducing the failure. | |
927 'other_regressions': A list of tuples representing other regressions, which | |
928 may have occured. | |
929 'regression_size': For performance bisects, this is a relative change of the | |
930 mean metric value. For other bisects this field always | |
931 contains 'zero-to-nonzero'. | |
932 'regression_std_err': For performance bisects, it is a pooled standard | |
933 error for groups of good and bad runs. Not used for | |
934 other bisects. | |
935 'confidence': For performance bisects, it is a confidence that the good and | |
936 bad runs are distinct groups. Not used for non-performance | |
937 bisects. | |
qyearsley
2014/09/19 07:35:16
I think this formatting might look better if line
Sergiy Byelozyorov
2014/09/19 13:36:22
Done.
| |
938 | |
939 'revision_data_stored': dict mapping revision ids to data about that | |
940 revision. Each piece of revision data consists of a dict with the | |
941 following keys: | |
942 | |
943 'passed': Represents whether the performance test was successful at | |
944 that revision. Possible values include: 1 (passed), 0 (failed), | |
945 '?' (skipped), 'F' (build failed). | |
946 'depot': The depot that this revision is from (i.e. WebKit) | |
947 'external': If the revision is a 'src' revision, 'external' contains | |
948 the revisions of each of the external libraries. | |
949 'sort': A sort value for sorting the dict in order of commits. | |
950 | |
951 For example: | |
952 { | |
953 'CL #1': | |
954 { | |
955 'passed': False, | |
956 'depot': 'chromium', | |
957 'external': None, | |
958 'sort': 0 | |
959 } | |
960 } | |
961 """ | |
962 revision_data_sorted = sorted(self.revision_data.iteritems(), | |
963 key = lambda x: x[1]['sort']) | |
964 | |
965 # Find range where it possibly broke. | |
966 first_working_revision = None | |
967 first_working_revision_index = -1 | |
968 last_broken_revision = None | |
969 last_broken_revision_index = -1 | |
970 | |
971 culprit_revisions = [] | |
972 other_regressions = [] | |
973 regression_size = 0.0 | |
974 regression_std_err = 0.0 | |
975 confidence = 0.0 | |
976 | |
977 for i in xrange(len(revision_data_sorted)): | |
978 k, v = revision_data_sorted[i] | |
979 if v['passed'] == 1: | |
980 if not first_working_revision: | |
981 first_working_revision = k | |
982 first_working_revision_index = i | |
983 | |
984 if not v['passed']: | |
985 last_broken_revision = k | |
986 last_broken_revision_index = i | |
987 | |
988 if last_broken_revision != None and first_working_revision != None: | |
989 broken_means = [] | |
990 for i in xrange(0, last_broken_revision_index + 1): | |
991 if revision_data_sorted[i][1]['value']: | |
992 broken_means.append(revision_data_sorted[i][1]['value']['values']) | |
993 | |
994 working_means = [] | |
995 for i in xrange(first_working_revision_index, len(revision_data_sorted)): | |
996 if revision_data_sorted[i][1]['value']: | |
997 working_means.append(revision_data_sorted[i][1]['value']['values']) | |
998 | |
999 # Flatten the lists to calculate mean of all values. | |
1000 working_mean = sum(working_means, []) | |
1001 broken_mean = sum(broken_means, []) | |
1002 | |
1003 # Calculate the approximate size of the regression | |
1004 mean_of_bad_runs = math_utils.Mean(broken_mean) | |
1005 mean_of_good_runs = math_utils.Mean(working_mean) | |
1006 | |
1007 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs, | |
1008 mean_of_bad_runs) | |
1009 if math.isnan(regression_size): | |
1010 regression_size = 'zero-to-nonzero' | |
1011 | |
1012 regression_std_err = math.fabs(math_utils.PooledStandardError( | |
1013 [working_mean, broken_mean]) / | |
1014 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0 | |
1015 | |
1016 # Give a "confidence" in the bisect. At the moment we use how distinct the | |
1017 # values are before and after the last broken revision, and how noisy the | |
1018 # overall graph is. | |
1019 confidence = ConfidenceScore(working_means, broken_means) | |
1020 | |
1021 culprit_revisions = [] | |
1022 | |
1023 cwd = os.getcwd() | |
1024 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory( | |
1025 self.revision_data[last_broken_revision]['depot']) | |
1026 | |
1027 if self.revision_data[last_broken_revision]['depot'] == 'cros': | |
1028 # Want to get a list of all the commits and what depots they belong | |
1029 # to so that we can grab info about each. | |
1030 cmd = ['repo', 'forall', '-c', | |
1031 'pwd ; git log --pretty=oneline --before=%d --after=%d' % ( | |
1032 last_broken_revision, first_working_revision + 1)] | |
1033 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd) | |
1034 | |
1035 changes = [] | |
1036 assert not return_code, ('An error occurred while running ' | |
1037 '"%s"' % ' '.join(cmd)) | |
1038 last_depot = None | |
1039 cwd = os.getcwd() | |
1040 for l in output.split('\n'): | |
1041 if l: | |
1042 # Output will be in form: | |
1043 # /path_to_depot | |
1044 # /path_to_other_depot | |
1045 # <SHA1> | |
1046 # /path_again | |
1047 # <SHA1> | |
1048 # etc. | |
1049 if l[0] == '/': | |
1050 last_depot = l | |
1051 else: | |
1052 contents = l.split(' ') | |
1053 if len(contents) > 1: | |
1054 changes.append([last_depot, contents[0]]) | |
1055 for c in changes: | |
1056 os.chdir(c[0]) | |
1057 info = self._source_control.QueryRevisionInfo(c[1]) | |
1058 culprit_revisions.append((c[1], info, None)) | |
1059 else: | |
1060 for i in xrange(last_broken_revision_index, len(revision_data_sorted)): | |
1061 k, v = revision_data_sorted[i] | |
1062 if k == first_working_revision: | |
1063 break | |
1064 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory(v['depot']) | |
1065 info = self._source_control.QueryRevisionInfo(k) | |
1066 culprit_revisions.append((k, info, v['depot'])) | |
1067 os.chdir(cwd) | |
1068 | |
1069 # Check for any other possible regression ranges. | |
1070 other_regressions = self._FindOtherRegressions( | |
1071 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs) | |
1072 | |
1073 return { | |
1074 'first_working_revision': first_working_revision, | |
1075 'last_broken_revision': last_broken_revision, | |
1076 'culprit_revisions': culprit_revisions, | |
1077 'other_regressions': other_regressions, | |
1078 'regression_size': regression_size, | |
1079 'regression_std_err': regression_std_err, | |
1080 'confidence': confidence, | |
1081 'revision_data_sorted': revision_data_sorted | |
1082 } | |
909 | 1083 |
910 | 1084 |
911 class BisectPerformanceMetrics(object): | 1085 class BisectPerformanceMetrics(object): |
912 """This class contains functionality to perform a bisection of a range of | 1086 """This class contains functionality to perform a bisection of a range of |
913 revisions to narrow down where performance regressions may have occurred. | 1087 revisions to narrow down where performance regressions may have occurred. |
914 | 1088 |
915 The main entry-point is the Run method. | 1089 The main entry-point is the Run method. |
916 """ | 1090 """ |
917 | 1091 |
918 def __init__(self, source_control, opts): | 1092 def __init__(self, source_control, opts): |
(...skipping 1402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2321 intermediate revisions to determine the CL where the performance regression | 2495 intermediate revisions to determine the CL where the performance regression |
2322 occurred. | 2496 occurred. |
2323 | 2497 |
2324 Args: | 2498 Args: |
2325 command_to_run: Specify the command to execute the performance test. | 2499 command_to_run: Specify the command to execute the performance test. |
2326 good_revision: Number/tag of the known good revision. | 2500 good_revision: Number/tag of the known good revision. |
2327 bad_revision: Number/tag of the known bad revision. | 2501 bad_revision: Number/tag of the known bad revision. |
2328 metric: The performance metric to monitor. | 2502 metric: The performance metric to monitor. |
2329 | 2503 |
2330 Returns: | 2504 Returns: |
2331 A dict with 2 members, 'revision_data' and 'error'. On success, | 2505 A BisectResults object. |
2332 'revision_data' will contain a dict mapping revision ids to | |
2333 data about that revision. Each piece of revision data consists of a | |
2334 dict with the following keys: | |
2335 | |
2336 'passed': Represents whether the performance test was successful at | |
2337 that revision. Possible values include: 1 (passed), 0 (failed), | |
2338 '?' (skipped), 'F' (build failed). | |
2339 'depot': The depot that this revision is from (i.e. WebKit) | |
2340 'external': If the revision is a 'src' revision, 'external' contains | |
2341 the revisions of each of the external libraries. | |
2342 'sort': A sort value for sorting the dict in order of commits. | |
2343 | |
2344 For example: | |
2345 { | |
2346 'error':None, | |
2347 'revision_data': | |
2348 { | |
2349 'CL #1': | |
2350 { | |
2351 'passed': False, | |
2352 'depot': 'chromium', | |
2353 'external': None, | |
2354 'sort': 0 | |
2355 } | |
2356 } | |
2357 } | |
2358 | |
2359 If an error occurred, the 'error' field will contain the message and | |
2360 'revision_data' will be empty. | |
2361 """ | 2506 """ |
2362 results = { | 2507 results = BisectResults(self, self.source_control) |
2363 'revision_data' : {}, | |
2364 'error' : None, | |
2365 } | |
2366 | 2508 |
2367 # Choose depot to bisect first | 2509 # Choose depot to bisect first |
2368 target_depot = 'chromium' | 2510 target_depot = 'chromium' |
2369 if self.opts.target_platform == 'cros': | 2511 if self.opts.target_platform == 'cros': |
2370 target_depot = 'cros' | 2512 target_depot = 'cros' |
2371 elif self.opts.target_platform == 'android-chrome': | 2513 elif self.opts.target_platform == 'android-chrome': |
2372 target_depot = 'android-chrome' | 2514 target_depot = 'android-chrome' |
2373 | 2515 |
2374 cwd = os.getcwd() | 2516 cwd = os.getcwd() |
2375 self.ChangeToDepotWorkingDirectory(target_depot) | 2517 self.ChangeToDepotWorkingDirectory(target_depot) |
2376 | 2518 |
2377 # If they passed SVN revisions, we can try match them to git SHA1 hashes. | 2519 # If they passed SVN revisions, we can try match them to git SHA1 hashes. |
2378 bad_revision = self.source_control.ResolveToRevision( | 2520 bad_revision = self.source_control.ResolveToRevision( |
2379 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) | 2521 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) |
2380 good_revision = self.source_control.ResolveToRevision( | 2522 good_revision = self.source_control.ResolveToRevision( |
2381 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) | 2523 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) |
2382 | 2524 |
2383 os.chdir(cwd) | 2525 os.chdir(cwd) |
2384 if bad_revision is None: | 2526 if bad_revision is None: |
2385 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in | 2527 results.error = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in |
2386 return results | 2528 return results |
2387 | 2529 |
2388 if good_revision is None: | 2530 if good_revision is None: |
2389 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in | 2531 results.error = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in |
2390 return results | 2532 return results |
2391 | 2533 |
2392 # Check that they didn't accidentally swap good and bad revisions. | 2534 # Check that they didn't accidentally swap good and bad revisions. |
2393 if not self.CheckIfRevisionsInProperOrder( | 2535 if not self.CheckIfRevisionsInProperOrder( |
2394 target_depot, good_revision, bad_revision): | 2536 target_depot, good_revision, bad_revision): |
2395 results['error'] = ('bad_revision < good_revision, did you swap these ' | 2537 results.error = ('bad_revision < good_revision, did you swap these ' |
2396 'by mistake?') | 2538 'by mistake?') |
2397 return results | 2539 return results |
2398 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( | 2540 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( |
2399 bad_revision, good_revision, good_revision_in) | 2541 bad_revision, good_revision, good_revision_in) |
2400 if self.opts.output_buildbot_annotations: | 2542 if self.opts.output_buildbot_annotations: |
2401 bisect_utils.OutputAnnotationStepStart('Gathering Revisions') | 2543 bisect_utils.OutputAnnotationStepStart('Gathering Revisions') |
2402 | 2544 |
2403 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) | 2545 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) |
2404 if cannot_bisect: | 2546 if cannot_bisect: |
2405 results['error'] = cannot_bisect.get('error') | 2547 results.error = cannot_bisect.get('error') |
2406 return results | 2548 return results |
2407 | 2549 |
2408 print 'Gathering revision range for bisection.' | 2550 print 'Gathering revision range for bisection.' |
2409 # Retrieve a list of revisions to do bisection on. | 2551 # Retrieve a list of revisions to do bisection on. |
2410 src_revision_list = self.GetRevisionList( | 2552 src_revision_list = self.GetRevisionList( |
2411 target_depot, bad_revision, good_revision) | 2553 target_depot, bad_revision, good_revision) |
2412 | 2554 |
2413 if self.opts.output_buildbot_annotations: | 2555 if self.opts.output_buildbot_annotations: |
2414 bisect_utils.OutputAnnotationStepClosed() | 2556 bisect_utils.OutputAnnotationStepClosed() |
2415 | 2557 |
2416 if src_revision_list: | 2558 if src_revision_list: |
2417 # revision_data will store information about a revision such as the | 2559 # revision_data will store information about a revision such as the |
2418 # depot it came from, the webkit/V8 revision at that time, | 2560 # depot it came from, the webkit/V8 revision at that time, |
2419 # performance timing, build state, etc... | 2561 # performance timing, build state, etc... |
2420 revision_data = results['revision_data'] | 2562 revision_data = results.revision_data |
2421 | 2563 |
2422 # revision_list is the list we're binary searching through at the moment. | 2564 # revision_list is the list we're binary searching through at the moment. |
2423 revision_list = [] | 2565 revision_list = [] |
2424 | 2566 |
2425 sort_key_ids = 0 | 2567 sort_key_ids = 0 |
2426 | 2568 |
2427 for current_revision_id in src_revision_list: | 2569 for current_revision_id in src_revision_list: |
2428 sort_key_ids += 1 | 2570 sort_key_ids += 1 |
2429 | 2571 |
2430 revision_data[current_revision_id] = { | 2572 revision_data[current_revision_id] = { |
(...skipping 22 matching lines...) Expand all Loading... | |
2453 bad_results, good_results = self.GatherReferenceValues(good_revision, | 2595 bad_results, good_results = self.GatherReferenceValues(good_revision, |
2454 bad_revision, | 2596 bad_revision, |
2455 command_to_run, | 2597 command_to_run, |
2456 metric, | 2598 metric, |
2457 target_depot) | 2599 target_depot) |
2458 | 2600 |
2459 if self.opts.output_buildbot_annotations: | 2601 if self.opts.output_buildbot_annotations: |
2460 bisect_utils.OutputAnnotationStepClosed() | 2602 bisect_utils.OutputAnnotationStepClosed() |
2461 | 2603 |
2462 if bad_results[1]: | 2604 if bad_results[1]: |
2463 results['error'] = ('An error occurred while building and running ' | 2605 results.error = ('An error occurred while building and running ' |
2464 'the \'bad\' reference value. The bisect cannot continue without ' | 2606 'the \'bad\' reference value. The bisect cannot continue without ' |
2465 'a working \'bad\' revision to start from.\n\nError: %s' % | 2607 'a working \'bad\' revision to start from.\n\nError: %s' % |
2466 bad_results[0]) | 2608 bad_results[0]) |
2467 return results | 2609 return results |
2468 | 2610 |
2469 if good_results[1]: | 2611 if good_results[1]: |
2470 results['error'] = ('An error occurred while building and running ' | 2612 results.error = ('An error occurred while building and running ' |
2471 'the \'good\' reference value. The bisect cannot continue without ' | 2613 'the \'good\' reference value. The bisect cannot continue without ' |
2472 'a working \'good\' revision to start from.\n\nError: %s' % | 2614 'a working \'good\' revision to start from.\n\nError: %s' % |
2473 good_results[0]) | 2615 good_results[0]) |
2474 return results | 2616 return results |
2475 | 2617 |
2476 | 2618 |
2477 # We need these reference values to determine if later runs should be | 2619 # We need these reference values to determine if later runs should be |
2478 # classified as pass or fail. | 2620 # classified as pass or fail. |
2479 known_bad_value = bad_results[0] | 2621 known_bad_value = bad_results[0] |
2480 known_good_value = good_results[0] | 2622 known_good_value = good_results[0] |
2481 | 2623 |
2482 # Can just mark the good and bad revisions explicitly here since we | 2624 # Can just mark the good and bad revisions explicitly here since we |
2483 # already know the results. | 2625 # already know the results. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2528 break | 2670 break |
2529 | 2671 |
2530 earliest_revision = max_revision_data['external'][external_depot] | 2672 earliest_revision = max_revision_data['external'][external_depot] |
2531 latest_revision = min_revision_data['external'][external_depot] | 2673 latest_revision = min_revision_data['external'][external_depot] |
2532 | 2674 |
2533 new_revision_list = self.PrepareToBisectOnDepot( | 2675 new_revision_list = self.PrepareToBisectOnDepot( |
2534 external_depot, latest_revision, earliest_revision, | 2676 external_depot, latest_revision, earliest_revision, |
2535 previous_revision) | 2677 previous_revision) |
2536 | 2678 |
2537 if not new_revision_list: | 2679 if not new_revision_list: |
2538 results['error'] = ('An error occurred attempting to retrieve ' | 2680 results.error = ('An error occurred attempting to retrieve ' |
2539 'revision range: [%s..%s]' % | 2681 'revision range: [%s..%s]' % |
2540 (earliest_revision, latest_revision)) | 2682 (earliest_revision, latest_revision)) |
2541 return results | 2683 return results |
2542 | 2684 |
2543 _AddRevisionsIntoRevisionData( | 2685 _AddRevisionsIntoRevisionData( |
2544 new_revision_list, external_depot, min_revision_data['sort'], | 2686 new_revision_list, external_depot, min_revision_data['sort'], |
2545 revision_data) | 2687 revision_data) |
2546 | 2688 |
2547 # Reset the bisection and perform it on the newly inserted | 2689 # Reset the bisection and perform it on the newly inserted |
2548 # changelists. | 2690 # changelists. |
2549 revision_list = new_revision_list | 2691 revision_list = new_revision_list |
2550 min_revision = 0 | 2692 min_revision = 0 |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2609 # If the build is broken, remove it and redo search. | 2751 # If the build is broken, remove it and redo search. |
2610 revision_list.pop(next_revision_index) | 2752 revision_list.pop(next_revision_index) |
2611 | 2753 |
2612 max_revision -= 1 | 2754 max_revision -= 1 |
2613 | 2755 |
2614 if self.opts.output_buildbot_annotations: | 2756 if self.opts.output_buildbot_annotations: |
2615 self._PrintPartialResults(results) | 2757 self._PrintPartialResults(results) |
2616 bisect_utils.OutputAnnotationStepClosed() | 2758 bisect_utils.OutputAnnotationStepClosed() |
2617 else: | 2759 else: |
2618 # Weren't able to sync and retrieve the revision range. | 2760 # Weren't able to sync and retrieve the revision range. |
2619 results['error'] = ('An error occurred attempting to retrieve revision ' | 2761 results.error = ('An error occurred attempting to retrieve revision ' |
2620 'range: [%s..%s]' % (good_revision, bad_revision)) | 2762 'range: [%s..%s]' % (good_revision, bad_revision)) |
2621 | 2763 |
2622 return results | 2764 return results |
2623 | 2765 |
2624 def _PrintPartialResults(self, results_dict): | 2766 def _PrintPartialResults(self, results): |
2625 revision_data = results_dict['revision_data'] | 2767 results_dict = results.GetResultsDict() |
2626 revision_data_sorted = sorted(revision_data.iteritems(), | 2768 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'], |
2627 key = lambda x: x[1]['sort']) | |
2628 results_dict = self._GetResultsDict(revision_data, revision_data_sorted) | |
2629 | |
2630 self._PrintTestedCommitsTable(revision_data_sorted, | |
2631 results_dict['first_working_revision'], | 2769 results_dict['first_working_revision'], |
2632 results_dict['last_broken_revision'], | 2770 results_dict['last_broken_revision'], |
2633 100, final_step=False) | 2771 100, final_step=False) |
2634 | 2772 |
2635 def _ConfidenceLevelStatus(self, results_dict): | 2773 def _ConfidenceLevelStatus(self, results_dict): |
2636 if not results_dict['confidence']: | 2774 if not results_dict['confidence']: |
2637 return None | 2775 return None |
2638 confidence_status = 'Successful with %(level)s confidence%(warning)s.' | 2776 confidence_status = 'Successful with %(level)s confidence%(warning)s.' |
2639 if results_dict['confidence'] >= HIGH_CONFIDENCE: | 2777 if results_dict['confidence'] >= HIGH_CONFIDENCE: |
2640 level = 'high' | 2778 level = 'high' |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2793 if not previous_link: | 2931 if not previous_link: |
2794 previous_link = previous_id | 2932 previous_link = previous_id |
2795 | 2933 |
2796 print ' %8s %70s %s' % ( | 2934 print ' %8s %70s %s' % ( |
2797 current_data['depot'], current_link, | 2935 current_data['depot'], current_link, |
2798 ('%d%%' % confidence).center(10, ' ')) | 2936 ('%d%%' % confidence).center(10, ' ')) |
2799 print ' %8s %70s' % ( | 2937 print ' %8s %70s' % ( |
2800 previous_data['depot'], previous_link) | 2938 previous_data['depot'], previous_link) |
2801 print | 2939 print |
2802 | 2940 |
2803 def _GetResultsDict(self, revision_data, revision_data_sorted): | |
2804 # Find range where it possibly broke. | |
2805 first_working_revision = None | |
2806 first_working_revision_index = -1 | |
2807 last_broken_revision = None | |
2808 last_broken_revision_index = -1 | |
2809 | |
2810 culprit_revisions = [] | |
2811 other_regressions = [] | |
2812 regression_size = 0.0 | |
2813 regression_std_err = 0.0 | |
2814 confidence = 0.0 | |
2815 | |
2816 for i in xrange(len(revision_data_sorted)): | |
2817 k, v = revision_data_sorted[i] | |
2818 if v['passed'] == 1: | |
2819 if not first_working_revision: | |
2820 first_working_revision = k | |
2821 first_working_revision_index = i | |
2822 | |
2823 if not v['passed']: | |
2824 last_broken_revision = k | |
2825 last_broken_revision_index = i | |
2826 | |
2827 if last_broken_revision != None and first_working_revision != None: | |
2828 broken_means = [] | |
2829 for i in xrange(0, last_broken_revision_index + 1): | |
2830 if revision_data_sorted[i][1]['value']: | |
2831 broken_means.append(revision_data_sorted[i][1]['value']['values']) | |
2832 | |
2833 working_means = [] | |
2834 for i in xrange(first_working_revision_index, len(revision_data_sorted)): | |
2835 if revision_data_sorted[i][1]['value']: | |
2836 working_means.append(revision_data_sorted[i][1]['value']['values']) | |
2837 | |
2838 # Flatten the lists to calculate mean of all values. | |
2839 working_mean = sum(working_means, []) | |
2840 broken_mean = sum(broken_means, []) | |
2841 | |
2842 # Calculate the approximate size of the regression | |
2843 mean_of_bad_runs = math_utils.Mean(broken_mean) | |
2844 mean_of_good_runs = math_utils.Mean(working_mean) | |
2845 | |
2846 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs, | |
2847 mean_of_bad_runs) | |
2848 if math.isnan(regression_size): | |
2849 regression_size = 'zero-to-nonzero' | |
2850 | |
2851 regression_std_err = math.fabs(math_utils.PooledStandardError( | |
2852 [working_mean, broken_mean]) / | |
2853 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0 | |
2854 | |
2855 # Give a "confidence" in the bisect. At the moment we use how distinct the | |
2856 # values are before and after the last broken revision, and how noisy the | |
2857 # overall graph is. | |
2858 confidence = ConfidenceScore(working_means, broken_means) | |
2859 | |
2860 culprit_revisions = [] | |
2861 | |
2862 cwd = os.getcwd() | |
2863 self.ChangeToDepotWorkingDirectory( | |
2864 revision_data[last_broken_revision]['depot']) | |
2865 | |
2866 if revision_data[last_broken_revision]['depot'] == 'cros': | |
2867 # Want to get a list of all the commits and what depots they belong | |
2868 # to so that we can grab info about each. | |
2869 cmd = ['repo', 'forall', '-c', | |
2870 'pwd ; git log --pretty=oneline --before=%d --after=%d' % ( | |
2871 last_broken_revision, first_working_revision + 1)] | |
2872 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd) | |
2873 | |
2874 changes = [] | |
2875 assert not return_code, ('An error occurred while running ' | |
2876 '"%s"' % ' '.join(cmd)) | |
2877 last_depot = None | |
2878 cwd = os.getcwd() | |
2879 for l in output.split('\n'): | |
2880 if l: | |
2881 # Output will be in form: | |
2882 # /path_to_depot | |
2883 # /path_to_other_depot | |
2884 # <SHA1> | |
2885 # /path_again | |
2886 # <SHA1> | |
2887 # etc. | |
2888 if l[0] == '/': | |
2889 last_depot = l | |
2890 else: | |
2891 contents = l.split(' ') | |
2892 if len(contents) > 1: | |
2893 changes.append([last_depot, contents[0]]) | |
2894 for c in changes: | |
2895 os.chdir(c[0]) | |
2896 info = self.source_control.QueryRevisionInfo(c[1]) | |
2897 culprit_revisions.append((c[1], info, None)) | |
2898 else: | |
2899 for i in xrange(last_broken_revision_index, len(revision_data_sorted)): | |
2900 k, v = revision_data_sorted[i] | |
2901 if k == first_working_revision: | |
2902 break | |
2903 self.ChangeToDepotWorkingDirectory(v['depot']) | |
2904 info = self.source_control.QueryRevisionInfo(k) | |
2905 culprit_revisions.append((k, info, v['depot'])) | |
2906 os.chdir(cwd) | |
2907 | |
2908 # Check for any other possible regression ranges. | |
2909 other_regressions = _FindOtherRegressions( | |
2910 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs) | |
2911 | |
2912 return { | |
2913 'first_working_revision': first_working_revision, | |
2914 'last_broken_revision': last_broken_revision, | |
2915 'culprit_revisions': culprit_revisions, | |
2916 'other_regressions': other_regressions, | |
2917 'regression_size': regression_size, | |
2918 'regression_std_err': regression_std_err, | |
2919 'confidence': confidence, | |
2920 } | |
2921 | |
2922 def _CheckForWarnings(self, results_dict): | 2941 def _CheckForWarnings(self, results_dict): |
2923 if len(results_dict['culprit_revisions']) > 1: | 2942 if len(results_dict['culprit_revisions']) > 1: |
2924 self.warnings.append('Due to build errors, regression range could ' | 2943 self.warnings.append('Due to build errors, regression range could ' |
2925 'not be narrowed down to a single commit.') | 2944 'not be narrowed down to a single commit.') |
2926 if self.opts.repeat_test_count == 1: | 2945 if self.opts.repeat_test_count == 1: |
2927 self.warnings.append('Tests were only set to run once. This may ' | 2946 self.warnings.append('Tests were only set to run once. This may ' |
2928 'be insufficient to get meaningful results.') | 2947 'be insufficient to get meaningful results.') |
2929 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: | 2948 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: |
2930 self.warnings.append('Confidence is not high. Try bisecting again ' | 2949 self.warnings.append('Confidence is not high. Try bisecting again ' |
2931 'with increased repeat_count, larger range, or ' | 2950 'with increased repeat_count, larger range, or ' |
2932 'on another metric.') | 2951 'on another metric.') |
2933 if not results_dict['confidence']: | 2952 if not results_dict['confidence']: |
2934 self.warnings.append('Confidence score is 0%. Try bisecting again on ' | 2953 self.warnings.append('Confidence score is 0%. Try bisecting again on ' |
2935 'another platform or another metric.') | 2954 'another platform or another metric.') |
2936 | 2955 |
2937 def FormatAndPrintResults(self, bisect_results): | 2956 def FormatAndPrintResults(self, bisect_results): |
2938 """Prints the results from a bisection run in a readable format. | 2957 """Prints the results from a bisection run in a readable format. |
2939 | 2958 |
2940 Args: | 2959 Args: |
2941 bisect_results: The results from a bisection test run. | 2960 bisect_results: The results from a bisection test run. |
2942 """ | 2961 """ |
2943 revision_data = bisect_results['revision_data'] | 2962 results_dict = bisect_results.GetResultsDict() |
2944 revision_data_sorted = sorted(revision_data.iteritems(), | |
2945 key = lambda x: x[1]['sort']) | |
2946 results_dict = self._GetResultsDict(revision_data, revision_data_sorted) | |
2947 | 2963 |
2948 self._CheckForWarnings(results_dict) | 2964 self._CheckForWarnings(results_dict) |
2949 | 2965 |
2950 if self.opts.output_buildbot_annotations: | 2966 if self.opts.output_buildbot_annotations: |
2951 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision') | 2967 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision') |
2952 | 2968 |
2953 print | 2969 print |
2954 print 'Full results of bisection:' | 2970 print 'Full results of bisection:' |
2955 for current_id, current_data in revision_data_sorted: | 2971 for current_id, current_data in results_dict['revision_data_sorted']: |
2956 build_status = current_data['passed'] | 2972 build_status = current_data['passed'] |
2957 | 2973 |
2958 if type(build_status) is bool: | 2974 if type(build_status) is bool: |
2959 if build_status: | 2975 if build_status: |
2960 build_status = 'Good' | 2976 build_status = 'Good' |
2961 else: | 2977 else: |
2962 build_status = 'Bad' | 2978 build_status = 'Bad' |
2963 | 2979 |
2964 print ' %20s %40s %s' % (current_data['depot'], | 2980 print ' %20s %40s %s' % (current_data['depot'], |
2965 current_id, build_status) | 2981 current_id, build_status) |
2966 print | 2982 print |
2967 | 2983 |
2968 if self.opts.output_buildbot_annotations: | 2984 if self.opts.output_buildbot_annotations: |
2969 bisect_utils.OutputAnnotationStepClosed() | 2985 bisect_utils.OutputAnnotationStepClosed() |
2970 # The perf dashboard scrapes the "results" step in order to comment on | 2986 # The perf dashboard scrapes the "results" step in order to comment on |
2971 # bugs. If you change this, please update the perf dashboard as well. | 2987 # bugs. If you change this, please update the perf dashboard as well. |
2972 bisect_utils.OutputAnnotationStepStart('Results') | 2988 bisect_utils.OutputAnnotationStepStart('Results') |
2973 | 2989 |
2974 self._PrintBanner(results_dict) | 2990 self._PrintBanner(results_dict) |
2975 self._PrintWarnings() | 2991 self._PrintWarnings() |
2976 | 2992 |
2977 if results_dict['culprit_revisions'] and results_dict['confidence']: | 2993 if results_dict['culprit_revisions'] and results_dict['confidence']: |
2978 for culprit in results_dict['culprit_revisions']: | 2994 for culprit in results_dict['culprit_revisions']: |
2979 cl, info, depot = culprit | 2995 cl, info, depot = culprit |
2980 self._PrintRevisionInfo(cl, info, depot) | 2996 self._PrintRevisionInfo(cl, info, depot) |
2981 if results_dict['other_regressions']: | 2997 if results_dict['other_regressions']: |
2982 self._PrintOtherRegressions(results_dict['other_regressions'], | 2998 self._PrintOtherRegressions(results_dict['other_regressions'], |
2983 revision_data) | 2999 results_dict['revision_data']) |
2984 self._PrintTestedCommitsTable(revision_data_sorted, | 3000 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'], |
2985 results_dict['first_working_revision'], | 3001 results_dict['first_working_revision'], |
2986 results_dict['last_broken_revision'], | 3002 results_dict['last_broken_revision'], |
2987 results_dict['confidence']) | 3003 results_dict['confidence']) |
2988 _PrintStepTime(revision_data_sorted) | 3004 _PrintStepTime(results_dict['revision_data_sorted']) |
2989 self._PrintReproSteps() | 3005 self._PrintReproSteps() |
2990 _PrintThankYou() | 3006 _PrintThankYou() |
2991 if self.opts.output_buildbot_annotations: | 3007 if self.opts.output_buildbot_annotations: |
2992 bisect_utils.OutputAnnotationStepClosed() | 3008 bisect_utils.OutputAnnotationStepClosed() |
2993 | 3009 |
2994 def _PrintBanner(self, results_dict): | 3010 def _PrintBanner(self, results_dict): |
2995 if self._IsBisectModeReturnCode(): | 3011 if self._IsBisectModeReturnCode(): |
2996 metrics = 'N/A' | 3012 metrics = 'N/A' |
2997 change = 'Yes' | 3013 change = 'Yes' |
2998 else: | 3014 else: |
(...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3389 if (not source_control.IsInProperBranch() and | 3405 if (not source_control.IsInProperBranch() and |
3390 not opts.debug_ignore_sync and | 3406 not opts.debug_ignore_sync and |
3391 not opts.working_directory): | 3407 not opts.working_directory): |
3392 raise RuntimeError('You must switch to master branch to run bisection.') | 3408 raise RuntimeError('You must switch to master branch to run bisection.') |
3393 bisect_test = BisectPerformanceMetrics(source_control, opts) | 3409 bisect_test = BisectPerformanceMetrics(source_control, opts) |
3394 try: | 3410 try: |
3395 bisect_results = bisect_test.Run(opts.command, | 3411 bisect_results = bisect_test.Run(opts.command, |
3396 opts.bad_revision, | 3412 opts.bad_revision, |
3397 opts.good_revision, | 3413 opts.good_revision, |
3398 opts.metric) | 3414 opts.metric) |
3399 if bisect_results['error']: | 3415 if bisect_results.error: |
3400 raise RuntimeError(bisect_results['error']) | 3416 raise RuntimeError(bisect_results.error) |
3401 bisect_test.FormatAndPrintResults(bisect_results) | 3417 bisect_test.FormatAndPrintResults(bisect_results) |
3402 return 0 | 3418 return 0 |
3403 finally: | 3419 finally: |
3404 bisect_test.PerformCleanup() | 3420 bisect_test.PerformCleanup() |
3405 except RuntimeError, e: | 3421 except RuntimeError, e: |
3406 if opts.output_buildbot_annotations: | 3422 if opts.output_buildbot_annotations: |
3407 # The perf dashboard scrapes the "results" step in order to comment on | 3423 # The perf dashboard scrapes the "results" step in order to comment on |
3408 # bugs. If you change this, please update the perf dashboard as well. | 3424 # bugs. If you change this, please update the perf dashboard as well. |
3409 bisect_utils.OutputAnnotationStepStart('Results') | 3425 bisect_utils.OutputAnnotationStepStart('Results') |
3410 print 'Error: %s' % e.message | 3426 print 'Error: %s' % e.message |
3411 if opts.output_buildbot_annotations: | 3427 if opts.output_buildbot_annotations: |
3412 bisect_utils.OutputAnnotationStepClosed() | 3428 bisect_utils.OutputAnnotationStepClosed() |
3413 return 1 | 3429 return 1 |
3414 | 3430 |
3415 | 3431 |
3416 if __name__ == '__main__': | 3432 if __name__ == '__main__': |
3417 sys.exit(main()) | 3433 sys.exit(main()) |
OLD | NEW |