tools/bisect-perf-regression.py - Issue 122563003: Refactor calculation of "other regressions" output.

Unified Diff: tools/bisect-perf-regression.py

Issue 122563003: Refactor calculation of "other regressions" output. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: . Created 6 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/bisect-perf-regression.py

diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py

index 62d991c9c503f7aef18c694f748758c77de7ae4e..85e4977bace673ae0ff98de62aa2613962f57397 100755

--- a/tools/bisect-perf-regression.py

+++ b/tools/bisect-perf-regression.py

@@ -2341,22 +2341,16 @@ class BisectPerformanceMetrics(object):

def _PrintOtherRegressions(self, other_regressions, revision_data):

print 'Other regressions may have occurred:'

+ print ' %8s %82s %10s' % ('Depot'.center(8, ' '),

+ 'Range'.center(82, ' '), 'Confidence'.center(10, ' '))

for regression in other_regressions:

- current_id, previous_id, percent_change, deviations = regression

+ current_id, previous_id, confidence = regression

current_data = revision_data[current_id]

previous_data = revision_data[previous_id]

- if deviations is None:

- deviations = 'N/A'

- else:

- deviations = '%.2f' % deviations

- if percent_change is None:

- percent_change = 0

- print ' %8s %s [%.2f%%, %s x std.dev]' % (

- previous_data['depot'], previous_id, 100 * percent_change, deviations)

- print ' %8s %s' % (current_data['depot'], current_id)

+ print ' %8s %s..%s %s' % (

+ current_data['depot'], current_id, previous_id,

+ ('%d%%' % confidence).center(10, ' '))

def _PrintStepTime(self, revision_data_sorted):

@@ -2384,6 +2378,65 @@ class BisectPerformanceMetrics(object):

for w in self.warnings:

print ' !!! %s' % w

+ def _FindOtherRegressions(self, revision_data_sorted, bad_greater_than_good):

+ other_regressions = []

+ previous_values = []

+ previous_id = None

+ for current_id, current_data in revision_data_sorted:

+ current_values = current_data['value']

+ if current_values:

+ current_values = current_values['values']

+ if previous_values:

+ confidence = self._CalculateConfidence(previous_values,

+ [current_values])

+ mean_of_prev_runs = CalculateTruncatedMean(

+ sum(previous_values, []), 0)

+ mean_of_current_runs = CalculateTruncatedMean(current_values, 0)

+ # Check that the potential regression is in the same direction as

+ # the overall regression. If the mean of the previous runs < the

+ # mean of the current runs, this local regression is in same

+ # direction.

+ prev_less_than_current = mean_of_prev_runs < mean_of_current_runs

+ is_same_direction = (prev_less_than_current if

+ bad_greater_than_good else not prev_less_than_current)

+ # Only report potential regressions with high confidence.

+ if is_same_direction and confidence > 50:

+ other_regressions.append([current_id, previous_id, confidence])

+ previous_values.append(current_values)

+ previous_id = current_id

+ return other_regressions

+ def _CalculateConfidence(self, working_means, broken_means):

+ bounds_working = []

+ bounds_broken = []

+ for m in working_means:

+ current_mean = CalculateTruncatedMean(m, 0)

+ if bounds_working:

+ bounds_working[0] = min(current_mean, bounds_working[0])

+ bounds_working[1] = max(current_mean, bounds_working[0])

+ else:

+ bounds_working = [current_mean, current_mean]

+ for m in broken_means:

+ current_mean = CalculateTruncatedMean(m, 0)

+ if bounds_broken:

+ bounds_broken[0] = min(current_mean, bounds_broken[0])

+ bounds_broken[1] = max(current_mean, bounds_broken[0])

+ else:

+ bounds_broken = [current_mean, current_mean]

+ dist_between_groups = min(math.fabs(bounds_broken[1] - bounds_working[0]),

+ math.fabs(bounds_broken[0] - bounds_working[1]))

+ working_mean = sum(working_means, [])

+ broken_mean = sum(broken_means, [])

+ len_working_group = CalculateStandardDeviation(working_mean)

+ len_broken_group = CalculateStandardDeviation(broken_mean)

+ confidence = (dist_between_groups / (

+ max(0.0001, (len_broken_group + len_working_group ))))

+ confidence = int(min(1.0, max(confidence, 0.0)) * 100.0)

+ return confidence

def _GetResultsDict(self, revision_data, revision_data_sorted):

# Find range where it possibly broke.

first_working_revision = None

@@ -2403,27 +2456,19 @@ class BisectPerformanceMetrics(object):

last_broken_revision_index = i

if last_broken_revision != None and first_working_revision != None:

- bounds_broken = [revision_data[last_broken_revision]['value']['mean'],

- revision_data[last_broken_revision]['value']['mean']]

- broken_mean = []

+ broken_means = []

for i in xrange(0, last_broken_revision_index + 1):

if revision_data_sorted[i][1]['value']:

- bounds_broken[0] = min(bounds_broken[0],

- revision_data_sorted[i][1]['value']['mean'])

- bounds_broken[1] = max(bounds_broken[1],

- revision_data_sorted[i][1]['value']['mean'])

- broken_mean.extend(revision_data_sorted[i][1]['value']['values'])

- bounds_working = [revision_data[first_working_revision]['value']['mean'],

- revision_data[first_working_revision]['value']['mean']]

- working_mean = []

+ broken_means.append(revision_data_sorted[i][1]['value']['values'])

+ working_means = []

for i in xrange(first_working_revision_index, len(revision_data_sorted)):

if revision_data_sorted[i][1]['value']:

- bounds_working[0] = min(bounds_working[0],

- revision_data_sorted[i][1]['value']['mean'])

- bounds_working[1] = max(bounds_working[1],

- revision_data_sorted[i][1]['value']['mean'])

- working_mean.extend(revision_data_sorted[i][1]['value']['values'])

+ working_means.append(revision_data_sorted[i][1]['value']['values'])

+ # Flatten the lists to calculate mean of all values.

+ working_mean = sum(working_means, [])

+ broken_mean = sum(broken_means, [])

# Calculate the approximate size of the regression

mean_of_bad_runs = CalculateTruncatedMean(broken_mean, 0.0)

@@ -2439,14 +2484,7 @@ class BisectPerformanceMetrics(object):

# Give a "confidence" in the bisect. At the moment we use how distinct the

# values are before and after the last broken revision, and how noisy the

# overall graph is.

- dist_between_groups = min(math.fabs(bounds_broken[1] - bounds_working[0]),

- math.fabs(bounds_broken[0] - bounds_working[1]))

- len_working_group = CalculateStandardDeviation(working_mean)

- len_broken_group = CalculateStandardDeviation(broken_mean)

- confidence = (dist_between_groups / (

- max(0.0001, (len_broken_group + len_working_group ))))

- confidence = int(min(1.0, max(confidence, 0.0)) * 100.0)

+ confidence = self._CalculateConfidence(working_means, broken_means)

culprit_revisions = []

@@ -2497,39 +2535,8 @@ class BisectPerformanceMetrics(object):

os.chdir(cwd)

# Check for any other possible regression ranges

- good_std_dev = revision_data[first_working_revision]['value']['std_err']

- good_mean = revision_data[first_working_revision]['value']['mean']

- bad_mean = revision_data[last_broken_revision]['value']['mean']

- prev_revision_data = revision_data_sorted[0][1]

- prev_revision_id = revision_data_sorted[0][0]

- other_regressions = []

- for current_id, current_data in revision_data_sorted:

- if current_data['value']:

- prev_mean = prev_revision_data['value']['mean']

- cur_mean = current_data['value']['mean']

- if good_std_dev:

- deviations = math.fabs(prev_mean - cur_mean) / good_std_dev

- else:

- deviations = None

- if good_mean:

- percent_change = (prev_mean - cur_mean) / good_mean

- # If the "good" valuse are supposed to be higher than the "bad"

- # values (ie. scores), flip the sign of the percent change so that

- # a positive value always represents a regression.

- if bad_mean < good_mean:

- percent_change *= -1.0

- else:

- percent_change = None

- if deviations >= 1.5 or percent_change > 0.01:

- if current_id != first_working_revision:

- other_regressions.append(

- [current_id, prev_revision_id, percent_change, deviations])

- prev_revision_data = current_data

- prev_revision_id = current_id

+ other_regressions = self._FindOtherRegressions(revision_data_sorted,

+ mean_of_bad_runs > mean_of_good_runs)

# Check for warnings:

if len(culprit_revisions) > 1:

« no previous file with comments | « no previous file | no next file » | no next file with comments »