| Index: tools/bisect-perf-regression.py
|
| diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py
|
| index 014e646d074e65e5ee5707ae200f588c71bbcf1f..e2832ba7182a992e7e2c951a9d4ba6ccb7914aba 100755
|
| --- a/tools/bisect-perf-regression.py
|
| +++ b/tools/bisect-perf-regression.py
|
| @@ -32,7 +32,6 @@ An example usage (using git hashes):
|
| -g 1f6e67861535121c5c819c16a666f2436c207e7b\
|
| -b b732f23b4f81c382db0b23b9035f3dadc7d925bb\
|
| -m shutdown/simple-user-quit
|
| -
|
| """
|
|
|
| import copy
|
| @@ -53,6 +52,7 @@ import zipfile
|
| sys.path.append(os.path.join(os.path.dirname(__file__), 'telemetry'))
|
|
|
| from auto_bisect import bisect_utils
|
| +from auto_bisect import math_utils
|
| from auto_bisect import post_perf_builder_job as bisect_builder
|
| from auto_bisect import source_control as source_control_module
|
| from telemetry.util import cloud_storage
|
| @@ -259,57 +259,7 @@ def _AddAdditionalDepotInfo(depot_info):
|
| DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
|
|
|
|
|
| -def CalculateTruncatedMean(data_set, truncate_percent):
|
| - """Calculates the truncated mean of a set of values.
|
| -
|
| - Note that this isn't just the mean of the set of values with the highest
|
| - and lowest values discarded; the non-discarded values are also weighted
|
| - differently depending how many values are discarded.
|
| -
|
| - Args:
|
| - data_set: Non-empty list of values.
|
| - truncate_percent: The % from the upper and lower portions of the data set
|
| - to discard, expressed as a value in [0, 1].
|
| -
|
| - Returns:
|
| - The truncated mean as a float.
|
| -
|
| - Raises:
|
| - TypeError: The data set was empty after discarding values.
|
| - """
|
| - if len(data_set) > 2:
|
| - data_set = sorted(data_set)
|
| -
|
| - discard_num_float = len(data_set) * truncate_percent
|
| - discard_num_int = int(math.floor(discard_num_float))
|
| - kept_weight = len(data_set) - discard_num_float * 2
|
| -
|
| - data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
|
| -
|
| - weight_left = 1.0 - (discard_num_float - discard_num_int)
|
| -
|
| - if weight_left < 1:
|
| - # If the % to discard leaves a fractional portion, need to weight those
|
| - # values.
|
| - unweighted_vals = data_set[1:len(data_set)-1]
|
| - weighted_vals = [data_set[0], data_set[len(data_set)-1]]
|
| - weighted_vals = [w * weight_left for w in weighted_vals]
|
| - data_set = weighted_vals + unweighted_vals
|
| - else:
|
| - kept_weight = len(data_set)
|
| -
|
| - truncated_mean = reduce(lambda x, y: float(x) + float(y),
|
| - data_set) / kept_weight
|
| -
|
| - return truncated_mean
|
| -
|
| -
|
| -def CalculateMean(values):
|
| - """Calculates the arithmetic mean of a list of values."""
|
| - return CalculateTruncatedMean(values, 0.0)
|
| -
|
| -
|
| -def CalculateConfidence(good_results_lists, bad_results_lists):
|
| +def ConfidenceScore(good_results_lists, bad_results_lists):
|
| """Calculates a confidence percentage.
|
|
|
| This is calculated based on how distinct the "good" and "bad" values are,
|
| @@ -330,8 +280,8 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
|
| A number between in the range [0, 100].
|
| """
|
| # Get the distance between the two groups.
|
| - means_good = map(CalculateMean, good_results_lists)
|
| - means_bad = map(CalculateMean, bad_results_lists)
|
| + means_good = map(math_utils.Mean, good_results_lists)
|
| + means_bad = map(math_utils.Mean, bad_results_lists)
|
| bounds_good = (min(means_good), max(means_good))
|
| bounds_bad = (min(means_bad), max(means_bad))
|
| dist_between_groups = min(
|
| @@ -341,8 +291,8 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
|
| # Get the sum of the standard deviations of the two groups.
|
| good_results_flattened = sum(good_results_lists, [])
|
| bad_results_flattened = sum(bad_results_lists, [])
|
| - stddev_good = CalculateStandardDeviation(good_results_flattened)
|
| - stddev_bad = CalculateStandardDeviation(bad_results_flattened)
|
| + stddev_good = math_utils.StandardDeviation(good_results_flattened)
|
| + stddev_bad = math_utils.StandardDeviation(bad_results_flattened)
|
| stddev_sum = stddev_good + stddev_bad
|
|
|
| confidence = dist_between_groups / (max(0.0001, stddev_sum))
|
| @@ -350,71 +300,6 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
|
| return confidence
|
|
|
|
|
| -def CalculateStandardDeviation(values):
|
| - """Calculates the sample standard deviation of the given list of values."""
|
| - if len(values) == 1:
|
| - return 0.0
|
| -
|
| - mean = CalculateMean(values)
|
| - differences_from_mean = [float(x) - mean for x in values]
|
| - squared_differences = [float(x * x) for x in differences_from_mean]
|
| - variance = sum(squared_differences) / (len(values) - 1)
|
| - std_dev = math.sqrt(variance)
|
| -
|
| - return std_dev
|
| -
|
| -
|
| -def CalculateRelativeChange(before, after):
|
| - """Returns the relative change of before and after, relative to before.
|
| -
|
| - There are several different ways to define relative difference between
|
| - two numbers; sometimes it is defined as relative to the smaller number,
|
| - or to the mean of the two numbers. This version returns the difference
|
| - relative to the first of the two numbers.
|
| -
|
| - Args:
|
| - before: A number representing an earlier value.
|
| - after: Another number, representing a later value.
|
| -
|
| - Returns:
|
| - A non-negative floating point number; 0.1 represents a 10% change.
|
| - """
|
| - if before == after:
|
| - return 0.0
|
| - if before == 0:
|
| - return float('nan')
|
| - difference = after - before
|
| - return math.fabs(difference / before)
|
| -
|
| -
|
| -def CalculatePooledStandardError(work_sets):
|
| - numerator = 0.0
|
| - denominator1 = 0.0
|
| - denominator2 = 0.0
|
| -
|
| - for current_set in work_sets:
|
| - std_dev = CalculateStandardDeviation(current_set)
|
| - numerator += (len(current_set) - 1) * std_dev ** 2
|
| - denominator1 += len(current_set) - 1
|
| - denominator2 += 1.0 / len(current_set)
|
| -
|
| - if denominator1:
|
| - return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
|
| - return 0.0
|
| -
|
| -
|
| -def CalculateStandardError(values):
|
| - """Calculates the standard error of a list of values."""
|
| - if len(values) <= 1:
|
| - return 0.0
|
| -
|
| - std_dev = CalculateStandardDeviation(values)
|
| -
|
| - return std_dev / math.sqrt(len(values))
|
| -
|
| -
|
| -
|
| -
|
| def GetSHA1HexDigest(contents):
|
| """Returns secured hash containing hexadecimal for the given contents."""
|
| return hashlib.sha1(contents).hexdigest()
|
| @@ -1981,10 +1866,10 @@ class BisectPerformanceMetrics(object):
|
| print
|
| else:
|
| # Need to get the average value if there were multiple values.
|
| - truncated_mean = CalculateTruncatedMean(metric_values,
|
| - self.opts.truncate_percent)
|
| - standard_err = CalculateStandardError(metric_values)
|
| - standard_dev = CalculateStandardDeviation(metric_values)
|
| + truncated_mean = math_utils.TruncatedMean(
|
| + metric_values, self.opts.truncate_percent)
|
| + standard_err = math_utils.StandardError(metric_values)
|
| + standard_dev = math_utils.StandardDeviation(metric_values)
|
|
|
| if self._IsBisectModeStandardDeviation():
|
| metric_values = [standard_dev]
|
| @@ -3174,9 +3059,9 @@ class BisectPerformanceMetrics(object):
|
| if current_values:
|
| current_values = current_values['values']
|
| if previous_values:
|
| - confidence = CalculateConfidence(previous_values, [current_values])
|
| - mean_of_prev_runs = CalculateMean(sum(previous_values, []))
|
| - mean_of_current_runs = CalculateMean(current_values)
|
| + confidence = ConfidenceScore(previous_values, [current_values])
|
| + mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
|
| + mean_of_current_runs = math_utils.Mean(current_values)
|
|
|
| # Check that the potential regression is in the same direction as
|
| # the overall regression. If the mean of the previous runs < the
|
| @@ -3228,22 +3113,22 @@ class BisectPerformanceMetrics(object):
|
| broken_mean = sum(broken_means, [])
|
|
|
| # Calculate the approximate size of the regression
|
| - mean_of_bad_runs = CalculateMean(broken_mean)
|
| - mean_of_good_runs = CalculateMean(working_mean)
|
| + mean_of_bad_runs = math_utils.Mean(broken_mean)
|
| + mean_of_good_runs = math_utils.Mean(working_mean)
|
|
|
| - regression_size = 100 * CalculateRelativeChange(mean_of_good_runs,
|
| + regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
|
| mean_of_bad_runs)
|
| if math.isnan(regression_size):
|
| regression_size = 'zero-to-nonzero'
|
|
|
| - regression_std_err = math.fabs(CalculatePooledStandardError(
|
| + regression_std_err = math.fabs(math_utils.PooledStandardError(
|
| [working_mean, broken_mean]) /
|
| max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
|
|
|
| # Give a "confidence" in the bisect. At the moment we use how distinct the
|
| # values are before and after the last broken revision, and how noisy the
|
| # overall graph is.
|
| - confidence = CalculateConfidence(working_means, broken_means)
|
| + confidence = ConfidenceScore(working_means, broken_means)
|
|
|
| culprit_revisions = []
|
|
|
| @@ -3771,5 +3656,6 @@ def main():
|
| bisect_utils.OutputAnnotationStepClosed()
|
| return 1
|
|
|
| +
|
| if __name__ == '__main__':
|
| sys.exit(main())
|
|
|