Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(376)

Unified Diff: tools/bisect-perf-regression.py

Issue 417013003: Move statistical functions in bisect script to their own module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Respond to nit Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/auto_bisect/math_utils_test.py ('k') | tools/bisect-perf-regression_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/bisect-perf-regression.py
diff --git a/tools/bisect-perf-regression.py b/tools/bisect-perf-regression.py
index 014e646d074e65e5ee5707ae200f588c71bbcf1f..e2832ba7182a992e7e2c951a9d4ba6ccb7914aba 100755
--- a/tools/bisect-perf-regression.py
+++ b/tools/bisect-perf-regression.py
@@ -32,7 +32,6 @@ An example usage (using git hashes):
-g 1f6e67861535121c5c819c16a666f2436c207e7b\
-b b732f23b4f81c382db0b23b9035f3dadc7d925bb\
-m shutdown/simple-user-quit
-
"""
import copy
@@ -53,6 +52,7 @@ import zipfile
sys.path.append(os.path.join(os.path.dirname(__file__), 'telemetry'))
from auto_bisect import bisect_utils
+from auto_bisect import math_utils
from auto_bisect import post_perf_builder_job as bisect_builder
from auto_bisect import source_control as source_control_module
from telemetry.util import cloud_storage
@@ -259,57 +259,7 @@ def _AddAdditionalDepotInfo(depot_info):
DEPOT_NAMES = DEPOT_DEPS_NAME.keys()
-def CalculateTruncatedMean(data_set, truncate_percent):
- """Calculates the truncated mean of a set of values.
-
- Note that this isn't just the mean of the set of values with the highest
- and lowest values discarded; the non-discarded values are also weighted
- differently depending how many values are discarded.
-
- Args:
- data_set: Non-empty list of values.
- truncate_percent: The % from the upper and lower portions of the data set
- to discard, expressed as a value in [0, 1].
-
- Returns:
- The truncated mean as a float.
-
- Raises:
- TypeError: The data set was empty after discarding values.
- """
- if len(data_set) > 2:
- data_set = sorted(data_set)
-
- discard_num_float = len(data_set) * truncate_percent
- discard_num_int = int(math.floor(discard_num_float))
- kept_weight = len(data_set) - discard_num_float * 2
-
- data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
-
- weight_left = 1.0 - (discard_num_float - discard_num_int)
-
- if weight_left < 1:
- # If the % to discard leaves a fractional portion, need to weight those
- # values.
- unweighted_vals = data_set[1:len(data_set)-1]
- weighted_vals = [data_set[0], data_set[len(data_set)-1]]
- weighted_vals = [w * weight_left for w in weighted_vals]
- data_set = weighted_vals + unweighted_vals
- else:
- kept_weight = len(data_set)
-
- truncated_mean = reduce(lambda x, y: float(x) + float(y),
- data_set) / kept_weight
-
- return truncated_mean
-
-
-def CalculateMean(values):
- """Calculates the arithmetic mean of a list of values."""
- return CalculateTruncatedMean(values, 0.0)
-
-
-def CalculateConfidence(good_results_lists, bad_results_lists):
+def ConfidenceScore(good_results_lists, bad_results_lists):
"""Calculates a confidence percentage.
This is calculated based on how distinct the "good" and "bad" values are,
@@ -330,8 +280,8 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
A number between in the range [0, 100].
"""
# Get the distance between the two groups.
- means_good = map(CalculateMean, good_results_lists)
- means_bad = map(CalculateMean, bad_results_lists)
+ means_good = map(math_utils.Mean, good_results_lists)
+ means_bad = map(math_utils.Mean, bad_results_lists)
bounds_good = (min(means_good), max(means_good))
bounds_bad = (min(means_bad), max(means_bad))
dist_between_groups = min(
@@ -341,8 +291,8 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
# Get the sum of the standard deviations of the two groups.
good_results_flattened = sum(good_results_lists, [])
bad_results_flattened = sum(bad_results_lists, [])
- stddev_good = CalculateStandardDeviation(good_results_flattened)
- stddev_bad = CalculateStandardDeviation(bad_results_flattened)
+ stddev_good = math_utils.StandardDeviation(good_results_flattened)
+ stddev_bad = math_utils.StandardDeviation(bad_results_flattened)
stddev_sum = stddev_good + stddev_bad
confidence = dist_between_groups / (max(0.0001, stddev_sum))
@@ -350,71 +300,6 @@ def CalculateConfidence(good_results_lists, bad_results_lists):
return confidence
-def CalculateStandardDeviation(values):
- """Calculates the sample standard deviation of the given list of values."""
- if len(values) == 1:
- return 0.0
-
- mean = CalculateMean(values)
- differences_from_mean = [float(x) - mean for x in values]
- squared_differences = [float(x * x) for x in differences_from_mean]
- variance = sum(squared_differences) / (len(values) - 1)
- std_dev = math.sqrt(variance)
-
- return std_dev
-
-
-def CalculateRelativeChange(before, after):
- """Returns the relative change of before and after, relative to before.
-
- There are several different ways to define relative difference between
- two numbers; sometimes it is defined as relative to the smaller number,
- or to the mean of the two numbers. This version returns the difference
- relative to the first of the two numbers.
-
- Args:
- before: A number representing an earlier value.
- after: Another number, representing a later value.
-
- Returns:
- A non-negative floating point number; 0.1 represents a 10% change.
- """
- if before == after:
- return 0.0
- if before == 0:
- return float('nan')
- difference = after - before
- return math.fabs(difference / before)
-
-
-def CalculatePooledStandardError(work_sets):
- numerator = 0.0
- denominator1 = 0.0
- denominator2 = 0.0
-
- for current_set in work_sets:
- std_dev = CalculateStandardDeviation(current_set)
- numerator += (len(current_set) - 1) * std_dev ** 2
- denominator1 += len(current_set) - 1
- denominator2 += 1.0 / len(current_set)
-
- if denominator1:
- return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
- return 0.0
-
-
-def CalculateStandardError(values):
- """Calculates the standard error of a list of values."""
- if len(values) <= 1:
- return 0.0
-
- std_dev = CalculateStandardDeviation(values)
-
- return std_dev / math.sqrt(len(values))
-
-
-
-
def GetSHA1HexDigest(contents):
"""Returns secured hash containing hexadecimal for the given contents."""
return hashlib.sha1(contents).hexdigest()
@@ -1981,10 +1866,10 @@ class BisectPerformanceMetrics(object):
print
else:
# Need to get the average value if there were multiple values.
- truncated_mean = CalculateTruncatedMean(metric_values,
- self.opts.truncate_percent)
- standard_err = CalculateStandardError(metric_values)
- standard_dev = CalculateStandardDeviation(metric_values)
+ truncated_mean = math_utils.TruncatedMean(
+ metric_values, self.opts.truncate_percent)
+ standard_err = math_utils.StandardError(metric_values)
+ standard_dev = math_utils.StandardDeviation(metric_values)
if self._IsBisectModeStandardDeviation():
metric_values = [standard_dev]
@@ -3174,9 +3059,9 @@ class BisectPerformanceMetrics(object):
if current_values:
current_values = current_values['values']
if previous_values:
- confidence = CalculateConfidence(previous_values, [current_values])
- mean_of_prev_runs = CalculateMean(sum(previous_values, []))
- mean_of_current_runs = CalculateMean(current_values)
+ confidence = ConfidenceScore(previous_values, [current_values])
+ mean_of_prev_runs = math_utils.Mean(sum(previous_values, []))
+ mean_of_current_runs = math_utils.Mean(current_values)
# Check that the potential regression is in the same direction as
# the overall regression. If the mean of the previous runs < the
@@ -3228,22 +3113,22 @@ class BisectPerformanceMetrics(object):
broken_mean = sum(broken_means, [])
# Calculate the approximate size of the regression
- mean_of_bad_runs = CalculateMean(broken_mean)
- mean_of_good_runs = CalculateMean(working_mean)
+ mean_of_bad_runs = math_utils.Mean(broken_mean)
+ mean_of_good_runs = math_utils.Mean(working_mean)
- regression_size = 100 * CalculateRelativeChange(mean_of_good_runs,
+ regression_size = 100 * math_utils.RelativeChange(mean_of_good_runs,
mean_of_bad_runs)
if math.isnan(regression_size):
regression_size = 'zero-to-nonzero'
- regression_std_err = math.fabs(CalculatePooledStandardError(
+ regression_std_err = math.fabs(math_utils.PooledStandardError(
[working_mean, broken_mean]) /
max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0
# Give a "confidence" in the bisect. At the moment we use how distinct the
# values are before and after the last broken revision, and how noisy the
# overall graph is.
- confidence = CalculateConfidence(working_means, broken_means)
+ confidence = ConfidenceScore(working_means, broken_means)
culprit_revisions = []
@@ -3771,5 +3656,6 @@ def main():
bisect_utils.OutputAnnotationStepClosed()
return 1
+
if __name__ == '__main__':
sys.exit(main())
« no previous file with comments | « tools/auto_bisect/math_utils_test.py ('k') | tools/bisect-perf-regression_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698