Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2352)

Side by Side Diff: tools/auto_bisect/bisect_perf_regression.py

Issue 554283003: Refactored bisect results dicts into a separate class (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Set correct upstream branch Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Performance Test Bisect Tool 6 """Performance Test Bisect Tool
7 7
8 This script bisects a series of changelists using binary search. It starts at 8 This script bisects a series of changelists using binary search. It starts at
9 a bad revision where a performance metric has regressed, and asks for a last 9 a bad revision where a performance metric has regressed, and asks for a last
10 known-good revision. It will then binary search across this revision range by 10 known-good revision. It will then binary search across this revision range by
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 49
50 sys.path.append(os.path.join( 50 sys.path.append(os.path.join(
51 os.path.dirname(__file__), os.path.pardir, 'telemetry')) 51 os.path.dirname(__file__), os.path.pardir, 'telemetry'))
52 52
53 import bisect_utils 53 import bisect_utils
54 import builder 54 import builder
55 import math_utils 55 import math_utils
56 import request_build 56 import request_build
57 import source_control as source_control_module 57 import source_control as source_control_module
58 import ttest 58 import ttest
59 from telemetry.util import cloud_storage 59 from telemetry.util import cloud_storage # pylint: disable=F0401
60 60
61 # Below is the map of "depot" names to information about each depot. Each depot 61 # Below is the map of "depot" names to information about each depot. Each depot
62 # is a repository, and in the process of bisecting, revision ranges in these 62 # is a repository, and in the process of bisecting, revision ranges in these
63 # repositories may also be bisected. 63 # repositories may also be bisected.
64 # 64 #
65 # Each depot information dictionary may contain: 65 # Each depot information dictionary may contain:
66 # src: Path to the working directory. 66 # src: Path to the working directory.
67 # recurse: True if this repository will get bisected. 67 # recurse: True if this repository will get bisected.
68 # depends: A list of other repositories that are actually part of the same 68 # depends: A list of other repositories that are actually part of the same
69 # repository in svn. If the repository has any dependent repositories 69 # repository in svn. If the repository has any dependent repositories
(...skipping 791 matching lines...) Expand 10 before | Expand all | Expand 10 after
861 if step_count: 861 if step_count:
862 step_perf_time_avg = step_perf_time_avg / step_count 862 step_perf_time_avg = step_perf_time_avg / step_count
863 step_build_time_avg = step_build_time_avg / step_count 863 step_build_time_avg = step_build_time_avg / step_count
864 print 864 print
865 print 'Average build time : %s' % datetime.timedelta( 865 print 'Average build time : %s' % datetime.timedelta(
866 seconds=int(step_build_time_avg)) 866 seconds=int(step_build_time_avg))
867 print 'Average test time : %s' % datetime.timedelta( 867 print 'Average test time : %s' % datetime.timedelta(
868 seconds=int(step_perf_time_avg)) 868 seconds=int(step_perf_time_avg))
869 869
870 870
871 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good): 871 class BisectResults(object):
872 """Compiles a list of other possible regressions from the revision data. 872 """This class holds the results of the bisect."""
873 873
874 Args: 874 def __init__(self, bisect_perf_metrics, source_control):
875 revision_data_sorted: Sorted list of (revision, revision data) pairs. 875 self._bisect_perf_metrics = bisect_perf_metrics
qyearsley 2014/09/19 07:35:16 Initial thought: BisectPerformanceMetrics is a mas
Sergiy Byelozyorov 2014/09/19 13:36:22 Done.
876 bad_greater_than_good: Whether the result value at the "bad" revision is 876 self.revision_data = {}
877 numerically greater than the result value at the "good" revision. 877 self.error = None
878 878 self._source_control = source_control
879 Returns: 879
880 A list of [current_rev, previous_rev, confidence] for other places where 880 @staticmethod
881 there may have been a regression. 881 def _FindOtherRegressions(revision_data_sorted, bad_greater_than_good):
882 """ 882 """Compiles a list of other possible regressions from the revision data.
883 other_regressions = [] 883
884 previous_values = [] 884 Args:
885 previous_id = None 885 revision_data_sorted: Sorted list of (revision, revision data) pairs.
886 for current_id, current_data in revision_data_sorted: 886 bad_greater_than_good: Whether the result value at the "bad" revision is
887 current_values = current_data['value'] 887 numerically greater than the result value at the "good" revision.
888 if current_values: 888
889 current_values = current_values['values'] 889 Returns:
890 if previous_values: 890 A list of [current_rev, previous_rev, confidence] for other places where
891 confidence = ConfidenceScore(previous_values, [current_values]) 891 there may have been a regression.
892 mean_of_prev_runs = math_utils.Mean(sum(previous_values, [])) 892 """
893 mean_of_current_runs = math_utils.Mean(current_values) 893 other_regressions = []
894 894 previous_values = []
895 # Check that the potential regression is in the same direction as 895 previous_id = None
896 # the overall regression. If the mean of the previous runs < the 896 for current_id, current_data in revision_data_sorted:
897 # mean of the current runs, this local regression is in same 897 current_values = current_data['value']
898 # direction. 898 if current_values:
899 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs 899 current_values = current_values['values']
900 is_same_direction = (prev_less_than_current if 900 if previous_values:
901 bad_greater_than_good else not prev_less_than_current) 901 confidence = ConfidenceScore(previous_values, [current_values])
902 902 mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
903 # Only report potential regressions with high confidence. 903 mean_of_current_runs = math_utils.Mean(current_values)
904 if is_same_direction and confidence > 50: 904
905 other_regressions.append([current_id, previous_id, confidence]) 905 # Check that the potential regression is in the same direction as
906 previous_values.append(current_values) 906 # the overall regression. If the mean of the previous runs < the
907 previous_id = current_id 907 # mean of the current runs, this local regression is in same
908 return other_regressions 908 # direction.
909 prev_less_than_current = mean_of_prev_runs < mean_of_current_runs
910 is_same_direction = (prev_less_than_current if
911 bad_greater_than_good else not prev_less_than_current)
912
913 # Only report potential regressions with high confidence.
914 if is_same_direction and confidence > 50:
915 other_regressions.append([current_id, previous_id, confidence])
916 previous_values.append(current_values)
917 previous_id = current_id
918 return other_regressions
919
920 def GetResultsDict(self):
921 """Returns a dictionary with the following fields
qyearsley 2014/09/19 07:35:16 The first line of a docstring is generally a self-
Sergiy Byelozyorov 2014/09/19 13:36:22 Done.
922
923 'first_working_revision': First good revision.
924 'last_broken_revision': Last bad revision.
925 'culprit_revisions': A list of revisions, which contain the bad change
926 introducing the failure.
927 'other_regressions': A list of tuples representing other regressions, which
928 may have occured.
929 'regression_size': For performance bisects, this is a relative change of the
930 mean metric value. For other bisects this field always
931 contains 'zero-to-nonzero'.
932 'regression_std_err': For performance bisects, it is a pooled standard
933 error for groups of good and bad runs. Not used for
934 other bisects.
935 'confidence': For performance bisects, it is a confidence that the good and
936 bad runs are distinct groups. Not used for non-performance
937 bisects.
qyearsley 2014/09/19 07:35:16 I think this formatting might look better if line
Sergiy Byelozyorov 2014/09/19 13:36:22 Done.
938
939 'revision_data_stored': dict mapping revision ids to data about that
940 revision. Each piece of revision data consists of a dict with the
941 following keys:
942
943 'passed': Represents whether the performance test was successful at
944 that revision. Possible values include: 1 (passed), 0 (failed),
945 '?' (skipped), 'F' (build failed).
946 'depot': The depot that this revision is from (i.e. WebKit)
947 'external': If the revision is a 'src' revision, 'external' contains
948 the revisions of each of the external libraries.
949 'sort': A sort value for sorting the dict in order of commits.
950
951 For example:
952 {
953 'CL #1':
954 {
955 'passed': False,
956 'depot': 'chromium',
957 'external': None,
958 'sort': 0
959 }
960 }
961 """
962 revision_data_sorted = sorted(self.revision_data.iteritems(),
963 key = lambda x: x[1]['sort'])
964
965 # Find range where it possibly broke.
966 first_working_revision = None
967 first_working_revision_index = -1
968 last_broken_revision = None
969 last_broken_revision_index = -1
970
971 culprit_revisions = []
972 other_regressions = []
973 regression_size = 0.0
974 regression_std_err = 0.0
975 confidence = 0.0
976
977 for i in xrange(len(revision_data_sorted)):
978 k, v = revision_data_sorted[i]
979 if v['passed'] == 1:
980 if not first_working_revision:
981 first_working_revision = k
982 first_working_revision_index = i
983
984 if not v['passed']:
985 last_broken_revision = k
986 last_broken_revision_index = i
987
988 if last_broken_revision != None and first_working_revision != None:
989 broken_means = []
990 for i in xrange(0, last_broken_revision_index + 1):
991 if revision_data_sorted[i][1]['value']:
992 broken_means.append(revision_data_sorted[i][1]['value']['values'])
993
994 working_means = []
995 for i in xrange(first_working_revision_index, len(revision_data_sorted)):
996 if revision_data_sorted[i][1]['value']:
997 working_means.append(revision_data_sorted[i][1]['value']['values'])
998
999 # Flatten the lists to calculate mean of all values.
1000 working_mean = sum(working_means, [])
1001 broken_mean = sum(broken_means, [])
1002
1003 # Calculate the approximate size of the regression
1004 mean_of_bad_runs = math_utils.Mean(broken_mean)
1005 mean_of_good_runs = math_utils.Mean(working_mean)
1006
1007 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
1008 mean_of_bad_runs)
1009 if math.isnan(regression_size):
1010 regression_size = 'zero-to-nonzero'
1011
1012 regression_std_err = math.fabs(math_utils.PooledStandardError(
1013 [working_mean, broken_mean]) /
1014 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
1015
1016 # Give a "confidence" in the bisect. At the moment we use how distinct the
1017 # values are before and after the last broken revision, and how noisy the
1018 # overall graph is.
1019 confidence = ConfidenceScore(working_means, broken_means)
1020
1021 culprit_revisions = []
1022
1023 cwd = os.getcwd()
1024 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory(
1025 self.revision_data[last_broken_revision]['depot'])
1026
1027 if self.revision_data[last_broken_revision]['depot'] == 'cros':
1028 # Want to get a list of all the commits and what depots they belong
1029 # to so that we can grab info about each.
1030 cmd = ['repo', 'forall', '-c',
1031 'pwd ; git log --pretty=oneline --before=%d --after=%d' % (
1032 last_broken_revision, first_working_revision + 1)]
1033 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd)
1034
1035 changes = []
1036 assert not return_code, ('An error occurred while running '
1037 '"%s"' % ' '.join(cmd))
1038 last_depot = None
1039 cwd = os.getcwd()
1040 for l in output.split('\n'):
1041 if l:
1042 # Output will be in form:
1043 # /path_to_depot
1044 # /path_to_other_depot
1045 # <SHA1>
1046 # /path_again
1047 # <SHA1>
1048 # etc.
1049 if l[0] == '/':
1050 last_depot = l
1051 else:
1052 contents = l.split(' ')
1053 if len(contents) > 1:
1054 changes.append([last_depot, contents[0]])
1055 for c in changes:
1056 os.chdir(c[0])
1057 info = self._source_control.QueryRevisionInfo(c[1])
1058 culprit_revisions.append((c[1], info, None))
1059 else:
1060 for i in xrange(last_broken_revision_index, len(revision_data_sorted)):
1061 k, v = revision_data_sorted[i]
1062 if k == first_working_revision:
1063 break
1064 self._bisect_perf_metrics.ChangeToDepotWorkingDirectory(v['depot'])
1065 info = self._source_control.QueryRevisionInfo(k)
1066 culprit_revisions.append((k, info, v['depot']))
1067 os.chdir(cwd)
1068
1069 # Check for any other possible regression ranges.
1070 other_regressions = self._FindOtherRegressions(
1071 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)
1072
1073 return {
1074 'first_working_revision': first_working_revision,
1075 'last_broken_revision': last_broken_revision,
1076 'culprit_revisions': culprit_revisions,
1077 'other_regressions': other_regressions,
1078 'regression_size': regression_size,
1079 'regression_std_err': regression_std_err,
1080 'confidence': confidence,
1081 'revision_data_sorted': revision_data_sorted
1082 }
909 1083
910 1084
911 class BisectPerformanceMetrics(object): 1085 class BisectPerformanceMetrics(object):
912 """This class contains functionality to perform a bisection of a range of 1086 """This class contains functionality to perform a bisection of a range of
913 revisions to narrow down where performance regressions may have occurred. 1087 revisions to narrow down where performance regressions may have occurred.
914 1088
915 The main entry-point is the Run method. 1089 The main entry-point is the Run method.
916 """ 1090 """
917 1091
918 def __init__(self, source_control, opts): 1092 def __init__(self, source_control, opts):
(...skipping 1402 matching lines...) Expand 10 before | Expand all | Expand 10 after
2321 intermediate revisions to determine the CL where the performance regression 2495 intermediate revisions to determine the CL where the performance regression
2322 occurred. 2496 occurred.
2323 2497
2324 Args: 2498 Args:
2325 command_to_run: Specify the command to execute the performance test. 2499 command_to_run: Specify the command to execute the performance test.
2326 good_revision: Number/tag of the known good revision. 2500 good_revision: Number/tag of the known good revision.
2327 bad_revision: Number/tag of the known bad revision. 2501 bad_revision: Number/tag of the known bad revision.
2328 metric: The performance metric to monitor. 2502 metric: The performance metric to monitor.
2329 2503
2330 Returns: 2504 Returns:
2331 A dict with 2 members, 'revision_data' and 'error'. On success, 2505 A BisectResults object.
2332 'revision_data' will contain a dict mapping revision ids to
2333 data about that revision. Each piece of revision data consists of a
2334 dict with the following keys:
2335
2336 'passed': Represents whether the performance test was successful at
2337 that revision. Possible values include: 1 (passed), 0 (failed),
2338 '?' (skipped), 'F' (build failed).
2339 'depot': The depot that this revision is from (i.e. WebKit)
2340 'external': If the revision is a 'src' revision, 'external' contains
2341 the revisions of each of the external libraries.
2342 'sort': A sort value for sorting the dict in order of commits.
2343
2344 For example:
2345 {
2346 'error':None,
2347 'revision_data':
2348 {
2349 'CL #1':
2350 {
2351 'passed': False,
2352 'depot': 'chromium',
2353 'external': None,
2354 'sort': 0
2355 }
2356 }
2357 }
2358
2359 If an error occurred, the 'error' field will contain the message and
2360 'revision_data' will be empty.
2361 """ 2506 """
2362 results = { 2507 results = BisectResults(self, self.source_control)
2363 'revision_data' : {},
2364 'error' : None,
2365 }
2366 2508
2367 # Choose depot to bisect first 2509 # Choose depot to bisect first
2368 target_depot = 'chromium' 2510 target_depot = 'chromium'
2369 if self.opts.target_platform == 'cros': 2511 if self.opts.target_platform == 'cros':
2370 target_depot = 'cros' 2512 target_depot = 'cros'
2371 elif self.opts.target_platform == 'android-chrome': 2513 elif self.opts.target_platform == 'android-chrome':
2372 target_depot = 'android-chrome' 2514 target_depot = 'android-chrome'
2373 2515
2374 cwd = os.getcwd() 2516 cwd = os.getcwd()
2375 self.ChangeToDepotWorkingDirectory(target_depot) 2517 self.ChangeToDepotWorkingDirectory(target_depot)
2376 2518
2377 # If they passed SVN revisions, we can try match them to git SHA1 hashes. 2519 # If they passed SVN revisions, we can try match them to git SHA1 hashes.
2378 bad_revision = self.source_control.ResolveToRevision( 2520 bad_revision = self.source_control.ResolveToRevision(
2379 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100) 2521 bad_revision_in, target_depot, DEPOT_DEPS_NAME, 100)
2380 good_revision = self.source_control.ResolveToRevision( 2522 good_revision = self.source_control.ResolveToRevision(
2381 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100) 2523 good_revision_in, target_depot, DEPOT_DEPS_NAME, -100)
2382 2524
2383 os.chdir(cwd) 2525 os.chdir(cwd)
2384 if bad_revision is None: 2526 if bad_revision is None:
2385 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in 2527 results.error = 'Couldn\'t resolve [%s] to SHA1.' % bad_revision_in
2386 return results 2528 return results
2387 2529
2388 if good_revision is None: 2530 if good_revision is None:
2389 results['error'] = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in 2531 results.error = 'Couldn\'t resolve [%s] to SHA1.' % good_revision_in
2390 return results 2532 return results
2391 2533
2392 # Check that they didn't accidentally swap good and bad revisions. 2534 # Check that they didn't accidentally swap good and bad revisions.
2393 if not self.CheckIfRevisionsInProperOrder( 2535 if not self.CheckIfRevisionsInProperOrder(
2394 target_depot, good_revision, bad_revision): 2536 target_depot, good_revision, bad_revision):
2395 results['error'] = ('bad_revision < good_revision, did you swap these ' 2537 results.error = ('bad_revision < good_revision, did you swap these '
2396 'by mistake?') 2538 'by mistake?')
2397 return results 2539 return results
2398 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange( 2540 bad_revision, good_revision = self.NudgeRevisionsIfDEPSChange(
2399 bad_revision, good_revision, good_revision_in) 2541 bad_revision, good_revision, good_revision_in)
2400 if self.opts.output_buildbot_annotations: 2542 if self.opts.output_buildbot_annotations:
2401 bisect_utils.OutputAnnotationStepStart('Gathering Revisions') 2543 bisect_utils.OutputAnnotationStepStart('Gathering Revisions')
2402 2544
2403 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision) 2545 cannot_bisect = self.CanPerformBisect(good_revision, bad_revision)
2404 if cannot_bisect: 2546 if cannot_bisect:
2405 results['error'] = cannot_bisect.get('error') 2547 results.error = cannot_bisect.get('error')
2406 return results 2548 return results
2407 2549
2408 print 'Gathering revision range for bisection.' 2550 print 'Gathering revision range for bisection.'
2409 # Retrieve a list of revisions to do bisection on. 2551 # Retrieve a list of revisions to do bisection on.
2410 src_revision_list = self.GetRevisionList( 2552 src_revision_list = self.GetRevisionList(
2411 target_depot, bad_revision, good_revision) 2553 target_depot, bad_revision, good_revision)
2412 2554
2413 if self.opts.output_buildbot_annotations: 2555 if self.opts.output_buildbot_annotations:
2414 bisect_utils.OutputAnnotationStepClosed() 2556 bisect_utils.OutputAnnotationStepClosed()
2415 2557
2416 if src_revision_list: 2558 if src_revision_list:
2417 # revision_data will store information about a revision such as the 2559 # revision_data will store information about a revision such as the
2418 # depot it came from, the webkit/V8 revision at that time, 2560 # depot it came from, the webkit/V8 revision at that time,
2419 # performance timing, build state, etc... 2561 # performance timing, build state, etc...
2420 revision_data = results['revision_data'] 2562 revision_data = results.revision_data
2421 2563
2422 # revision_list is the list we're binary searching through at the moment. 2564 # revision_list is the list we're binary searching through at the moment.
2423 revision_list = [] 2565 revision_list = []
2424 2566
2425 sort_key_ids = 0 2567 sort_key_ids = 0
2426 2568
2427 for current_revision_id in src_revision_list: 2569 for current_revision_id in src_revision_list:
2428 sort_key_ids += 1 2570 sort_key_ids += 1
2429 2571
2430 revision_data[current_revision_id] = { 2572 revision_data[current_revision_id] = {
(...skipping 22 matching lines...) Expand all
2453 bad_results, good_results = self.GatherReferenceValues(good_revision, 2595 bad_results, good_results = self.GatherReferenceValues(good_revision,
2454 bad_revision, 2596 bad_revision,
2455 command_to_run, 2597 command_to_run,
2456 metric, 2598 metric,
2457 target_depot) 2599 target_depot)
2458 2600
2459 if self.opts.output_buildbot_annotations: 2601 if self.opts.output_buildbot_annotations:
2460 bisect_utils.OutputAnnotationStepClosed() 2602 bisect_utils.OutputAnnotationStepClosed()
2461 2603
2462 if bad_results[1]: 2604 if bad_results[1]:
2463 results['error'] = ('An error occurred while building and running ' 2605 results.error = ('An error occurred while building and running '
2464 'the \'bad\' reference value. The bisect cannot continue without ' 2606 'the \'bad\' reference value. The bisect cannot continue without '
2465 'a working \'bad\' revision to start from.\n\nError: %s' % 2607 'a working \'bad\' revision to start from.\n\nError: %s' %
2466 bad_results[0]) 2608 bad_results[0])
2467 return results 2609 return results
2468 2610
2469 if good_results[1]: 2611 if good_results[1]:
2470 results['error'] = ('An error occurred while building and running ' 2612 results.error = ('An error occurred while building and running '
2471 'the \'good\' reference value. The bisect cannot continue without ' 2613 'the \'good\' reference value. The bisect cannot continue without '
2472 'a working \'good\' revision to start from.\n\nError: %s' % 2614 'a working \'good\' revision to start from.\n\nError: %s' %
2473 good_results[0]) 2615 good_results[0])
2474 return results 2616 return results
2475 2617
2476 2618
2477 # We need these reference values to determine if later runs should be 2619 # We need these reference values to determine if later runs should be
2478 # classified as pass or fail. 2620 # classified as pass or fail.
2479 known_bad_value = bad_results[0] 2621 known_bad_value = bad_results[0]
2480 known_good_value = good_results[0] 2622 known_good_value = good_results[0]
2481 2623
2482 # Can just mark the good and bad revisions explicitly here since we 2624 # Can just mark the good and bad revisions explicitly here since we
2483 # already know the results. 2625 # already know the results.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
2528 break 2670 break
2529 2671
2530 earliest_revision = max_revision_data['external'][external_depot] 2672 earliest_revision = max_revision_data['external'][external_depot]
2531 latest_revision = min_revision_data['external'][external_depot] 2673 latest_revision = min_revision_data['external'][external_depot]
2532 2674
2533 new_revision_list = self.PrepareToBisectOnDepot( 2675 new_revision_list = self.PrepareToBisectOnDepot(
2534 external_depot, latest_revision, earliest_revision, 2676 external_depot, latest_revision, earliest_revision,
2535 previous_revision) 2677 previous_revision)
2536 2678
2537 if not new_revision_list: 2679 if not new_revision_list:
2538 results['error'] = ('An error occurred attempting to retrieve ' 2680 results.error = ('An error occurred attempting to retrieve '
2539 'revision range: [%s..%s]' % 2681 'revision range: [%s..%s]' %
2540 (earliest_revision, latest_revision)) 2682 (earliest_revision, latest_revision))
2541 return results 2683 return results
2542 2684
2543 _AddRevisionsIntoRevisionData( 2685 _AddRevisionsIntoRevisionData(
2544 new_revision_list, external_depot, min_revision_data['sort'], 2686 new_revision_list, external_depot, min_revision_data['sort'],
2545 revision_data) 2687 revision_data)
2546 2688
2547 # Reset the bisection and perform it on the newly inserted 2689 # Reset the bisection and perform it on the newly inserted
2548 # changelists. 2690 # changelists.
2549 revision_list = new_revision_list 2691 revision_list = new_revision_list
2550 min_revision = 0 2692 min_revision = 0
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
2609 # If the build is broken, remove it and redo search. 2751 # If the build is broken, remove it and redo search.
2610 revision_list.pop(next_revision_index) 2752 revision_list.pop(next_revision_index)
2611 2753
2612 max_revision -= 1 2754 max_revision -= 1
2613 2755
2614 if self.opts.output_buildbot_annotations: 2756 if self.opts.output_buildbot_annotations:
2615 self._PrintPartialResults(results) 2757 self._PrintPartialResults(results)
2616 bisect_utils.OutputAnnotationStepClosed() 2758 bisect_utils.OutputAnnotationStepClosed()
2617 else: 2759 else:
2618 # Weren't able to sync and retrieve the revision range. 2760 # Weren't able to sync and retrieve the revision range.
2619 results['error'] = ('An error occurred attempting to retrieve revision ' 2761 results.error = ('An error occurred attempting to retrieve revision '
2620 'range: [%s..%s]' % (good_revision, bad_revision)) 2762 'range: [%s..%s]' % (good_revision, bad_revision))
2621 2763
2622 return results 2764 return results
2623 2765
2624 def _PrintPartialResults(self, results_dict): 2766 def _PrintPartialResults(self, results):
2625 revision_data = results_dict['revision_data'] 2767 results_dict = results.GetResultsDict()
2626 revision_data_sorted = sorted(revision_data.iteritems(), 2768 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
2627 key = lambda x: x[1]['sort'])
2628 results_dict = self._GetResultsDict(revision_data, revision_data_sorted)
2629
2630 self._PrintTestedCommitsTable(revision_data_sorted,
2631 results_dict['first_working_revision'], 2769 results_dict['first_working_revision'],
2632 results_dict['last_broken_revision'], 2770 results_dict['last_broken_revision'],
2633 100, final_step=False) 2771 100, final_step=False)
2634 2772
2635 def _ConfidenceLevelStatus(self, results_dict): 2773 def _ConfidenceLevelStatus(self, results_dict):
2636 if not results_dict['confidence']: 2774 if not results_dict['confidence']:
2637 return None 2775 return None
2638 confidence_status = 'Successful with %(level)s confidence%(warning)s.' 2776 confidence_status = 'Successful with %(level)s confidence%(warning)s.'
2639 if results_dict['confidence'] >= HIGH_CONFIDENCE: 2777 if results_dict['confidence'] >= HIGH_CONFIDENCE:
2640 level = 'high' 2778 level = 'high'
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
2793 if not previous_link: 2931 if not previous_link:
2794 previous_link = previous_id 2932 previous_link = previous_id
2795 2933
2796 print ' %8s %70s %s' % ( 2934 print ' %8s %70s %s' % (
2797 current_data['depot'], current_link, 2935 current_data['depot'], current_link,
2798 ('%d%%' % confidence).center(10, ' ')) 2936 ('%d%%' % confidence).center(10, ' '))
2799 print ' %8s %70s' % ( 2937 print ' %8s %70s' % (
2800 previous_data['depot'], previous_link) 2938 previous_data['depot'], previous_link)
2801 print 2939 print
2802 2940
2803 def _GetResultsDict(self, revision_data, revision_data_sorted):
2804 # Find range where it possibly broke.
2805 first_working_revision = None
2806 first_working_revision_index = -1
2807 last_broken_revision = None
2808 last_broken_revision_index = -1
2809
2810 culprit_revisions = []
2811 other_regressions = []
2812 regression_size = 0.0
2813 regression_std_err = 0.0
2814 confidence = 0.0
2815
2816 for i in xrange(len(revision_data_sorted)):
2817 k, v = revision_data_sorted[i]
2818 if v['passed'] == 1:
2819 if not first_working_revision:
2820 first_working_revision = k
2821 first_working_revision_index = i
2822
2823 if not v['passed']:
2824 last_broken_revision = k
2825 last_broken_revision_index = i
2826
2827 if last_broken_revision != None and first_working_revision != None:
2828 broken_means = []
2829 for i in xrange(0, last_broken_revision_index + 1):
2830 if revision_data_sorted[i][1]['value']:
2831 broken_means.append(revision_data_sorted[i][1]['value']['values'])
2832
2833 working_means = []
2834 for i in xrange(first_working_revision_index, len(revision_data_sorted)):
2835 if revision_data_sorted[i][1]['value']:
2836 working_means.append(revision_data_sorted[i][1]['value']['values'])
2837
2838 # Flatten the lists to calculate mean of all values.
2839 working_mean = sum(working_means, [])
2840 broken_mean = sum(broken_means, [])
2841
2842 # Calculate the approximate size of the regression
2843 mean_of_bad_runs = math_utils.Mean(broken_mean)
2844 mean_of_good_runs = math_utils.Mean(working_mean)
2845
2846 regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
2847 mean_of_bad_runs)
2848 if math.isnan(regression_size):
2849 regression_size = 'zero-to-nonzero'
2850
2851 regression_std_err = math.fabs(math_utils.PooledStandardError(
2852 [working_mean, broken_mean]) /
2853 max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
2854
2855 # Give a "confidence" in the bisect. At the moment we use how distinct the
2856 # values are before and after the last broken revision, and how noisy the
2857 # overall graph is.
2858 confidence = ConfidenceScore(working_means, broken_means)
2859
2860 culprit_revisions = []
2861
2862 cwd = os.getcwd()
2863 self.ChangeToDepotWorkingDirectory(
2864 revision_data[last_broken_revision]['depot'])
2865
2866 if revision_data[last_broken_revision]['depot'] == 'cros':
2867 # Want to get a list of all the commits and what depots they belong
2868 # to so that we can grab info about each.
2869 cmd = ['repo', 'forall', '-c',
2870 'pwd ; git log --pretty=oneline --before=%d --after=%d' % (
2871 last_broken_revision, first_working_revision + 1)]
2872 output, return_code = bisect_utils.RunProcessAndRetrieveOutput(cmd)
2873
2874 changes = []
2875 assert not return_code, ('An error occurred while running '
2876 '"%s"' % ' '.join(cmd))
2877 last_depot = None
2878 cwd = os.getcwd()
2879 for l in output.split('\n'):
2880 if l:
2881 # Output will be in form:
2882 # /path_to_depot
2883 # /path_to_other_depot
2884 # <SHA1>
2885 # /path_again
2886 # <SHA1>
2887 # etc.
2888 if l[0] == '/':
2889 last_depot = l
2890 else:
2891 contents = l.split(' ')
2892 if len(contents) > 1:
2893 changes.append([last_depot, contents[0]])
2894 for c in changes:
2895 os.chdir(c[0])
2896 info = self.source_control.QueryRevisionInfo(c[1])
2897 culprit_revisions.append((c[1], info, None))
2898 else:
2899 for i in xrange(last_broken_revision_index, len(revision_data_sorted)):
2900 k, v = revision_data_sorted[i]
2901 if k == first_working_revision:
2902 break
2903 self.ChangeToDepotWorkingDirectory(v['depot'])
2904 info = self.source_control.QueryRevisionInfo(k)
2905 culprit_revisions.append((k, info, v['depot']))
2906 os.chdir(cwd)
2907
2908 # Check for any other possible regression ranges.
2909 other_regressions = _FindOtherRegressions(
2910 revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)
2911
2912 return {
2913 'first_working_revision': first_working_revision,
2914 'last_broken_revision': last_broken_revision,
2915 'culprit_revisions': culprit_revisions,
2916 'other_regressions': other_regressions,
2917 'regression_size': regression_size,
2918 'regression_std_err': regression_std_err,
2919 'confidence': confidence,
2920 }
2921
2922 def _CheckForWarnings(self, results_dict): 2941 def _CheckForWarnings(self, results_dict):
2923 if len(results_dict['culprit_revisions']) > 1: 2942 if len(results_dict['culprit_revisions']) > 1:
2924 self.warnings.append('Due to build errors, regression range could ' 2943 self.warnings.append('Due to build errors, regression range could '
2925 'not be narrowed down to a single commit.') 2944 'not be narrowed down to a single commit.')
2926 if self.opts.repeat_test_count == 1: 2945 if self.opts.repeat_test_count == 1:
2927 self.warnings.append('Tests were only set to run once. This may ' 2946 self.warnings.append('Tests were only set to run once. This may '
2928 'be insufficient to get meaningful results.') 2947 'be insufficient to get meaningful results.')
2929 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE: 2948 if 0 < results_dict['confidence'] < HIGH_CONFIDENCE:
2930 self.warnings.append('Confidence is not high. Try bisecting again ' 2949 self.warnings.append('Confidence is not high. Try bisecting again '
2931 'with increased repeat_count, larger range, or ' 2950 'with increased repeat_count, larger range, or '
2932 'on another metric.') 2951 'on another metric.')
2933 if not results_dict['confidence']: 2952 if not results_dict['confidence']:
2934 self.warnings.append('Confidence score is 0%. Try bisecting again on ' 2953 self.warnings.append('Confidence score is 0%. Try bisecting again on '
2935 'another platform or another metric.') 2954 'another platform or another metric.')
2936 2955
2937 def FormatAndPrintResults(self, bisect_results): 2956 def FormatAndPrintResults(self, bisect_results):
2938 """Prints the results from a bisection run in a readable format. 2957 """Prints the results from a bisection run in a readable format.
2939 2958
2940 Args: 2959 Args:
2941 bisect_results: The results from a bisection test run. 2960 bisect_results: The results from a bisection test run.
2942 """ 2961 """
2943 revision_data = bisect_results['revision_data'] 2962 results_dict = bisect_results.GetResultsDict()
2944 revision_data_sorted = sorted(revision_data.iteritems(),
2945 key = lambda x: x[1]['sort'])
2946 results_dict = self._GetResultsDict(revision_data, revision_data_sorted)
2947 2963
2948 self._CheckForWarnings(results_dict) 2964 self._CheckForWarnings(results_dict)
2949 2965
2950 if self.opts.output_buildbot_annotations: 2966 if self.opts.output_buildbot_annotations:
2951 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision') 2967 bisect_utils.OutputAnnotationStepStart('Build Status Per Revision')
2952 2968
2953 print 2969 print
2954 print 'Full results of bisection:' 2970 print 'Full results of bisection:'
2955 for current_id, current_data in revision_data_sorted: 2971 for current_id, current_data in results_dict['revision_data_sorted']:
2956 build_status = current_data['passed'] 2972 build_status = current_data['passed']
2957 2973
2958 if type(build_status) is bool: 2974 if type(build_status) is bool:
2959 if build_status: 2975 if build_status:
2960 build_status = 'Good' 2976 build_status = 'Good'
2961 else: 2977 else:
2962 build_status = 'Bad' 2978 build_status = 'Bad'
2963 2979
2964 print ' %20s %40s %s' % (current_data['depot'], 2980 print ' %20s %40s %s' % (current_data['depot'],
2965 current_id, build_status) 2981 current_id, build_status)
2966 print 2982 print
2967 2983
2968 if self.opts.output_buildbot_annotations: 2984 if self.opts.output_buildbot_annotations:
2969 bisect_utils.OutputAnnotationStepClosed() 2985 bisect_utils.OutputAnnotationStepClosed()
2970 # The perf dashboard scrapes the "results" step in order to comment on 2986 # The perf dashboard scrapes the "results" step in order to comment on
2971 # bugs. If you change this, please update the perf dashboard as well. 2987 # bugs. If you change this, please update the perf dashboard as well.
2972 bisect_utils.OutputAnnotationStepStart('Results') 2988 bisect_utils.OutputAnnotationStepStart('Results')
2973 2989
2974 self._PrintBanner(results_dict) 2990 self._PrintBanner(results_dict)
2975 self._PrintWarnings() 2991 self._PrintWarnings()
2976 2992
2977 if results_dict['culprit_revisions'] and results_dict['confidence']: 2993 if results_dict['culprit_revisions'] and results_dict['confidence']:
2978 for culprit in results_dict['culprit_revisions']: 2994 for culprit in results_dict['culprit_revisions']:
2979 cl, info, depot = culprit 2995 cl, info, depot = culprit
2980 self._PrintRevisionInfo(cl, info, depot) 2996 self._PrintRevisionInfo(cl, info, depot)
2981 if results_dict['other_regressions']: 2997 if results_dict['other_regressions']:
2982 self._PrintOtherRegressions(results_dict['other_regressions'], 2998 self._PrintOtherRegressions(results_dict['other_regressions'],
2983 revision_data) 2999 results_dict['revision_data'])
2984 self._PrintTestedCommitsTable(revision_data_sorted, 3000 self._PrintTestedCommitsTable(results_dict['revision_data_sorted'],
2985 results_dict['first_working_revision'], 3001 results_dict['first_working_revision'],
2986 results_dict['last_broken_revision'], 3002 results_dict['last_broken_revision'],
2987 results_dict['confidence']) 3003 results_dict['confidence'])
2988 _PrintStepTime(revision_data_sorted) 3004 _PrintStepTime(results_dict['revision_data_sorted'])
2989 self._PrintReproSteps() 3005 self._PrintReproSteps()
2990 _PrintThankYou() 3006 _PrintThankYou()
2991 if self.opts.output_buildbot_annotations: 3007 if self.opts.output_buildbot_annotations:
2992 bisect_utils.OutputAnnotationStepClosed() 3008 bisect_utils.OutputAnnotationStepClosed()
2993 3009
2994 def _PrintBanner(self, results_dict): 3010 def _PrintBanner(self, results_dict):
2995 if self._IsBisectModeReturnCode(): 3011 if self._IsBisectModeReturnCode():
2996 metrics = 'N/A' 3012 metrics = 'N/A'
2997 change = 'Yes' 3013 change = 'Yes'
2998 else: 3014 else:
(...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after
3389 if (not source_control.IsInProperBranch() and 3405 if (not source_control.IsInProperBranch() and
3390 not opts.debug_ignore_sync and 3406 not opts.debug_ignore_sync and
3391 not opts.working_directory): 3407 not opts.working_directory):
3392 raise RuntimeError('You must switch to master branch to run bisection.') 3408 raise RuntimeError('You must switch to master branch to run bisection.')
3393 bisect_test = BisectPerformanceMetrics(source_control, opts) 3409 bisect_test = BisectPerformanceMetrics(source_control, opts)
3394 try: 3410 try:
3395 bisect_results = bisect_test.Run(opts.command, 3411 bisect_results = bisect_test.Run(opts.command,
3396 opts.bad_revision, 3412 opts.bad_revision,
3397 opts.good_revision, 3413 opts.good_revision,
3398 opts.metric) 3414 opts.metric)
3399 if bisect_results['error']: 3415 if bisect_results.error:
3400 raise RuntimeError(bisect_results['error']) 3416 raise RuntimeError(bisect_results.error)
3401 bisect_test.FormatAndPrintResults(bisect_results) 3417 bisect_test.FormatAndPrintResults(bisect_results)
3402 return 0 3418 return 0
3403 finally: 3419 finally:
3404 bisect_test.PerformCleanup() 3420 bisect_test.PerformCleanup()
3405 except RuntimeError, e: 3421 except RuntimeError, e:
3406 if opts.output_buildbot_annotations: 3422 if opts.output_buildbot_annotations:
3407 # The perf dashboard scrapes the "results" step in order to comment on 3423 # The perf dashboard scrapes the "results" step in order to comment on
3408 # bugs. If you change this, please update the perf dashboard as well. 3424 # bugs. If you change this, please update the perf dashboard as well.
3409 bisect_utils.OutputAnnotationStepStart('Results') 3425 bisect_utils.OutputAnnotationStepStart('Results')
3410 print 'Error: %s' % e.message 3426 print 'Error: %s' % e.message
3411 if opts.output_buildbot_annotations: 3427 if opts.output_buildbot_annotations:
3412 bisect_utils.OutputAnnotationStepClosed() 3428 bisect_utils.OutputAnnotationStepClosed()
3413 return 1 3429 return 1
3414 3430
3415 3431
3416 if __name__ == '__main__': 3432 if __name__ == '__main__':
3417 sys.exit(main()) 3433 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | tools/auto_bisect/bisect_perf_regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698